From 546709cee90310e08c5481264c3ef20377052edf Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Thu, 21 May 2026 19:36:18 +1200 Subject: [PATCH 1/8] feat: optional `serde` support for the descriptor vocabulary (`serde` feature) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an off-by-default `serde` feature providing `Serialize`/`Deserialize` across the descriptor types. The wire shape mirrors the storage backends (sqlx / mongodb / async-graphql) so a serde value matches their column representation: - open codec/format enums serialize as their `as_str()` slug (no `{"Other": ...}` wrapper); closed FFmpeg-coded enums + `TrackDisposition` as their `to_u32()` integer. Both round-trips total. - plain structs derive serde directly; validated structs (`GeoLocation`, `Fingerprint`, `CoverArt`) deserialize through their checking constructors so invariant-violating values are rejected. - `lang::Language` (de)serializes as its canonical BCP-47 string. The feature is orthogonal to the capability tiers: no-alloc Copy types (colour / pixel-format / frame geometry / disposition) gain serde under bare `--features serde`; heap-tier types when `alloc`/`std` is also on (forwarding `serde` to `smol_str` / `bytes`). serde stays no-std (no `alloc`/`std` serde feature needed — borrowed-`&str` visitors). Centralised enum impls in `serde_impls.rs`; struct derives at their definition sites. Bumps to 0.1.1. --- CHANGELOG.md | 34 +++++ README.md | 8 ++ mediaframe/Cargo.toml | 21 ++- mediaframe/src/audio/cover_art.rs | 24 ++++ mediaframe/src/audio/fingerprint.rs | 24 ++++ mediaframe/src/audio/loudness.rs | 1 + mediaframe/src/audio/tags.rs | 1 + mediaframe/src/capture/device.rs | 1 + mediaframe/src/capture/geo.rs | 26 ++++ mediaframe/src/color.rs | 6 + mediaframe/src/frame/mod.rs | 5 + mediaframe/src/lang.rs | 29 ++++ mediaframe/src/lib.rs | 6 + mediaframe/src/serde_impls.rs | 202 ++++++++++++++++++++++++++++ 14 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 mediaframe/src/serde_impls.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e149955..b4c3a12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,40 @@ All notable changes to this crate are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.1] May 21, 2026 + +### Added + +- **`serde` feature** — optional `serde::{Serialize, Deserialize}` for the + whole descriptor vocabulary, gated behind `--features serde` (off by + default). The wire shape mirrors what storage backends already use, so a + serde-`json` value matches their representation: + - **Open** codec / format enums (`codec::{Video,Audio,Subtitle}Codec`, + `container::Format`, `subtitle::Format`, + `audio::{ChannelLayout, SampleFormat, ContainerFormat}`) serialize as + their canonical `as_str()` slug — `VideoCodec::H264` ⇄ `"h264"`, + `Other("x265")` ⇄ `"x265"` (no `{"Other": …}` wrapper). + - **Closed** FFmpeg-coded enums (`color::{Matrix, Primaries, Transfer, + DynamicRange, ChromaLocation, DcpTargetGamut}`, + `pixel_format::PixelFormat`, `frame::{Rotation, FieldOrder, StereoMode}`, + `subtitle::TrackOrigin`, `audio::BitRateMode`) and + `disposition::TrackDisposition` serialize as their `to_u32()` integer. + Both round-trips are total (unknown slug → `Other`, unknown code → + `Unknown`). + - **Plain structs** (`color::Info` and its HDR/mastering sub-structs, + `frame::{Dimensions, Rect, Rational, SampleAspectRatio, FrameRate}`, + `audio::{Loudness, Tags, Device}`… ) derive serde directly. + - **Validated structs** (`capture::GeoLocation`, `audio::Fingerprint`, + `audio::CoverArt`) route deserialize through their checking + constructors, so out-of-range / invariant-violating values are rejected + rather than materialised. + - **`lang::Language`** serializes as its canonical BCP-47 string + (`"en-US"`, `"zh-Hant-TW"`, `"und"`). + - Works at every capability tier: the no-alloc Copy types gain serde + under bare `--features serde`; the heap-tier types (codecs, formats, + audio metadata, capture, language) when paired with `alloc` / `std` + (forwarding `serde` to `smol_str` / `bytes`). + ## [0.1.0] May 19, 2026 Initial `mediaframe` release — this crate is a **rename** of the diff --git a/README.md b/README.md index f222c34..2ca5a79 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,14 @@ can all speak to without agreeing on anything heavier. frame / HDR vocabulary so downstream proto schemas can extern-map `.mediaframe.v1` → `::mediaframe`. Off by default — enable with `--features buffa`. +- **`serde`** — optional `serde::{Serialize, Deserialize}` for the whole + descriptor vocabulary. Open codec / format enums serialize as their + `as_str()` slug, closed FFmpeg-coded enums as their `to_u32()` integer + (both round-trips total), `lang::Language` as its BCP-47 string; + validated structs (`GeoLocation` / `Fingerprint` / `CoverArt`) + deserialize through their checking constructors. Orthogonal to the + capability tiers (no-alloc Copy types included). Off by default — + enable with `--features serde`. - **`PixelSink`** + **`SourceFormat`** sealed traits re-exported at the crate root. diff --git a/mediaframe/Cargo.toml b/mediaframe/Cargo.toml index ae7b3f7..b463434 100644 --- a/mediaframe/Cargo.toml +++ b/mediaframe/Cargo.toml @@ -2,7 +2,7 @@ [package] name = "mediaframe" -version = "0.1.0" +version = "0.1.1" edition = "2024" license = "MIT OR Apache-2.0" repository = "https://github.com/Findit-AI/mediaframe" @@ -18,6 +18,16 @@ default = ["std"] alloc = ["dep:smol_str", "dep:icu_locale_core", "icu_locale_core/alloc", "dep:bytes", "derive_more/unwrap", "derive_more/try_unwrap"] std = ["alloc", "thiserror/default", "smol_str?/std", "bytes?/std"] buffa = ["dep:buffa", "mediatime/buffa", "alloc"] +# Optional `serde` (de)serialization for the descriptor vocabulary. +# Orthogonal to the capability tiers: the no-alloc Copy types (colour / +# pixel-format / frame geometry / disposition) gain serde under bare +# `--features serde`; the heap-tier types (codecs, container/subtitle +# formats, audio metadata, capture, language) gain it when `alloc`/`std` +# is also on, forwarding `serde` to `smol_str` / `bytes` for their string +# / byte fields. The wire shape mirrors the storage backends: open +# codec/format enums serialize as their `as_str()` slug, closed +# FFmpeg-coded enums as their `to_u32()` integer. +serde = ["dep:serde", "smol_str?/serde", "bytes?/serde"] frame = [ "yuv-planar", "yuv-semi-planar", "yuva", "yuv-packed", "yuv-444-packed", @@ -51,6 +61,15 @@ icu_locale_core = { version = "2", default-features = false, optional = true } bytes = { version = "1", default-features = false, optional = true } buffa = { version = "0.6", default-features = false, optional = true } smol_str = { version = "0.3", default-features = false, optional = true } +# `serde` is no-std by default (`default-features = false`); the `derive` +# feature pulls `serde_derive` for the struct/enum impls. No `alloc`/`std` +# serde feature is needed — string handling goes through borrowed-`&str` +# visitors and `smol_str` / `bytes` carry their own serde impls. +serde = { version = "1", default-features = false, optional = true, features = ["derive"] } + +[dev-dependencies] +# Concrete serde format for the `serde` round-trip tests (std-only). +serde_json = "1" [profile.bench] opt-level = 3 diff --git a/mediaframe/src/audio/cover_art.rs b/mediaframe/src/audio/cover_art.rs index afa6597..d68c8ed 100644 --- a/mediaframe/src/audio/cover_art.rs +++ b/mediaframe/src/audio/cover_art.rs @@ -13,12 +13,36 @@ use smol_str::SmolStr; /// large image clones in O(1) (refcount bump) rather than a deep copy. /// Both must be non-empty (an empty mime or empty payload is not a /// meaningful cover-art attachment); use [`CoverArt::try_new`]. +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(try_from = "CoverArtShadow") +)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CoverArt { mime: SmolStr, data: Bytes, } +/// Deserialization shadow for [`CoverArt`] — routes through +/// [`CoverArt::try_new`] so the non-empty `mime` / `data` invariants are +/// enforced on the way in, instead of being bypassed by a field-derived +/// `Deserialize`. +#[cfg(feature = "serde")] +#[derive(serde::Deserialize)] +struct CoverArtShadow { + mime: SmolStr, + data: Bytes, +} + +#[cfg(feature = "serde")] +impl core::convert::TryFrom for CoverArt { + type Error = CoverArtError; + fn try_from(s: CoverArtShadow) -> Result { + Self::try_new(s.mime, s.data) + } +} + impl Default for CoverArt { /// Synthetic `Default` — `mime: "application/octet-stream"`, /// `data: [0u8]`. The public constructor [`Self::try_new`] still diff --git a/mediaframe/src/audio/fingerprint.rs b/mediaframe/src/audio/fingerprint.rs index 1d62cc6..343ce90 100644 --- a/mediaframe/src/audio/fingerprint.rs +++ b/mediaframe/src/audio/fingerprint.rs @@ -14,12 +14,36 @@ use smol_str::SmolStr; /// fingerprint cannot be routed; empty `value` is **allowed** (some /// algorithms emit an empty fingerprint for silence / sub-second /// clips). +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(try_from = "FingerprintShadow") +)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Fingerprint { algorithm: SmolStr, value: Bytes, } +/// Deserialization shadow for [`Fingerprint`] — routes through +/// [`Fingerprint::try_new`] so the non-empty-`algorithm` invariant is +/// enforced on the way in, instead of being bypassed by a field-derived +/// `Deserialize`. +#[cfg(feature = "serde")] +#[derive(serde::Deserialize)] +struct FingerprintShadow { + algorithm: SmolStr, + value: Bytes, +} + +#[cfg(feature = "serde")] +impl core::convert::TryFrom for Fingerprint { + type Error = FingerprintError; + fn try_from(s: FingerprintShadow) -> Result { + Self::try_new(s.algorithm, s.value) + } +} + impl Default for Fingerprint { /// Synthetic `Default` — `algorithm: "default"`, `value: []`. The /// public constructor [`Self::try_new`] still rejects empty diff --git a/mediaframe/src/audio/loudness.rs b/mediaframe/src/audio/loudness.rs index 268dd75..2ed1b5d 100644 --- a/mediaframe/src/audio/loudness.rs +++ b/mediaframe/src/audio/loudness.rs @@ -23,6 +23,7 @@ /// /// `f32` storage precludes `Eq`/`Hash` (NaN ≠ NaN); the derives are /// limited to `Debug`/`Clone`/`Copy`/`PartialEq`. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq)] pub struct Loudness { integrated_lufs: f32, diff --git a/mediaframe/src/audio/tags.rs b/mediaframe/src/audio/tags.rs index f871b54..51b8c8d 100644 --- a/mediaframe/src/audio/tags.rs +++ b/mediaframe/src/audio/tags.rs @@ -17,6 +17,7 @@ use smol_str::SmolStr; /// - Numeric fields use `Option` because `0` is a *valid* /// value (year `0` exists historically; "track 0" sometimes /// appears in test files), so the absent state must be distinct. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Tags { title: SmolStr, diff --git a/mediaframe/src/capture/device.rs b/mediaframe/src/capture/device.rs index 5ec2117..4e09460 100644 --- a/mediaframe/src/capture/device.rs +++ b/mediaframe/src/capture/device.rs @@ -21,6 +21,7 @@ use smol_str::SmolStr; /// sentinel for "absent" so callers never need `Option` /// (matches the codec / source-tagging convention elsewhere in this /// crate). Use [`Self::is_empty`] to detect the fully-absent state. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Device { make: SmolStr, diff --git a/mediaframe/src/capture/geo.rs b/mediaframe/src/capture/geo.rs index 137dc8b..afa7662 100644 --- a/mediaframe/src/capture/geo.rs +++ b/mediaframe/src/capture/geo.rs @@ -15,6 +15,11 @@ use smol_str::SmolStr; /// parse from an ISO 6709 string via [`Self::from_iso6709`] / /// `parse::()` (the [`core::str::FromStr`] impl). /// Serialise back via [`Self::to_iso6709`] / [`core::fmt::Display`]. +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(try_from = "GeoLocationShadow") +)] #[derive(Debug, Clone, Copy, PartialEq)] pub struct GeoLocation { lat: f64, @@ -22,6 +27,27 @@ pub struct GeoLocation { altitude: Option, } +/// Deserialization shadow for [`GeoLocation`] — routes through +/// [`GeoLocation::try_new`] so out-of-range coordinates are rejected and +/// a non-finite altitude is normalised to `None`, instead of being +/// materialised directly by a field-derived `Deserialize`. +#[cfg(feature = "serde")] +#[derive(serde::Deserialize)] +struct GeoLocationShadow { + lat: f64, + lon: f64, + #[serde(default)] + altitude: Option, +} + +#[cfg(feature = "serde")] +impl core::convert::TryFrom for GeoLocation { + type Error = GeoLocationError; + fn try_from(s: GeoLocationShadow) -> Result { + Self::try_new(s.lat, s.lon, s.altitude) + } +} + impl Default for GeoLocation { /// `(0.0, 0.0, None)` — "Null Island" with unknown altitude. This /// is a legal in-range coordinate, the conventional sentinel for diff --git a/mediaframe/src/color.rs b/mediaframe/src/color.rs index 8901de5..e9c7592 100644 --- a/mediaframe/src/color.rs +++ b/mediaframe/src/color.rs @@ -653,6 +653,7 @@ impl ChromaLocation { /// RAW backends populate from clip-level color science and leave /// `Unspecified` if absent. `Info::UNSPECIFIED` is the sensible /// default for RAW backends that don't carry per-frame color data. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Info { primaries: Primaries, @@ -918,6 +919,7 @@ impl DcpTargetGamut { /// This is clip / stream level (and frame-level when carried as /// frame side data); the per-frame [`Info`] enums are /// unchanged. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct ContentLightLevel { max_cll: u32, @@ -987,6 +989,7 @@ impl ContentLightLevel { /// losslessly** rather than being silently saturated (Codex /// adversarial-review F3). Validity is a separate concern from /// preservation — see [`HdrStaticMetadata`]. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct ChromaCoord { x: u32, @@ -1062,6 +1065,7 @@ impl ChromaCoord { /// - `max_luminance` / `min_luminance` are in units of **0.0001 /// cd/m²** (floating value = `raw / 10000.0`), matching FFmpeg's /// `n/10000` `AVRational` luminance encoding. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct MasteringDisplay { display_primaries: [ChromaCoord; 3], @@ -1185,6 +1189,7 @@ impl MasteringDisplay { /// stays per-frame closed-form enums only; HDR10 static metadata is /// clip / stream level and optional, so it lives in its own type. /// (Dynamic HDR — HDR10+ / Dolby Vision RPU — is out of scope here.) +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct HdrStaticMetadata { mastering: Option, @@ -1263,6 +1268,7 @@ impl HdrStaticMetadata { /// /// All fields default to `0` (`#[derive(Default)]`), matching an /// absent / unset configuration. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct DolbyVisionConfig { profile: u8, diff --git a/mediaframe/src/frame/mod.rs b/mediaframe/src/frame/mod.rs index 3c45e36..78d8198 100644 --- a/mediaframe/src/frame/mod.rs +++ b/mediaframe/src/frame/mod.rs @@ -300,6 +300,7 @@ impl UnsupportedBits { /// packing some adjacent crates use) covers every realistic /// resolution; the `u32` choice here keeps the public API plug- /// compatible with both adapter typings. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct Dimensions { width: u32, @@ -374,6 +375,7 @@ impl core::fmt::Display for Dimensions { /// /// Used for `VideoFrame::visible_rect` (FFmpeg crop / /// WebCodecs `visibleRect` / ProRes RAW `CleanAperture`). +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct Rect { x: u32, @@ -569,6 +571,7 @@ impl Rotation { /// truth for "exact ratio with a non-zero denominator". The fields /// are private; the entire public method API (and the `buffa` wire /// format) is unchanged, delegating to the inner `Rational`. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SampleAspectRatio(Rational); @@ -690,6 +693,7 @@ impl From for SampleAspectRatio { /// frames-per-second) carry the domain meaning. A `0` numerator is a /// valid representable state (e.g. an "unknown" FFmpeg `AVRational` /// `0/1`) — see [`Self::is_zero`]. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Rational { num: u32, @@ -788,6 +792,7 @@ impl core::fmt::Display for Rational { /// /// The [`Default`] is `{ rate: Rational::default() (1/1), /// is_vfr: false }`. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct FrameRate { rate: Rational, diff --git a/mediaframe/src/lang.rs b/mediaframe/src/lang.rs index d185857..2bc2670 100644 --- a/mediaframe/src/lang.rs +++ b/mediaframe/src/lang.rs @@ -182,6 +182,35 @@ impl core::str::FromStr for Language { } } +/// Serializes as the canonical BCP-47 string (`to_bcp47`), e.g. `"en-US"` +/// / `"zh-Hant-TW"` / `"und"` — the same lossless text form used by the +/// storage backends. Not a derived impl: the `icu_locale_core` subtag +/// fields have no stable serde representation, and the string round-trip +/// keeps the wire shape engine-agnostic. +#[cfg(feature = "serde")] +impl serde::Serialize for Language { + fn serialize(&self, ser: S) -> Result { + ser.serialize_str(&self.to_bcp47()) + } +} + +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Language { + fn deserialize>(de: D) -> Result { + struct V; + impl serde::de::Visitor<'_> for V { + type Value = Language; + fn expecting(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("a BCP-47 language tag string") + } + fn visit_str(self, v: &str) -> Result { + Language::from_bcp47(v).map_err(serde::de::Error::custom) + } + } + de.deserialize_str(V) + } +} + /// Errors returned by [`Language`] constructors / parsers. #[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] #[non_exhaustive] diff --git a/mediaframe/src/lib.rs b/mediaframe/src/lib.rs index aa0f7cf..839a9eb 100644 --- a/mediaframe/src/lib.rs +++ b/mediaframe/src/lib.rs @@ -56,6 +56,12 @@ pub mod frame; #[cfg_attr(docsrs, doc(cfg(any(feature = "std", feature = "alloc"))))] pub mod lang; pub mod pixel_format; +/// Centralised `serde` impls for the descriptor enums (the structs derive +/// serde at their definition sites). Open codec/format enums serialize as +/// their `as_str()` slug; closed FFmpeg-coded enums as their `to_u32()` +/// code — mirroring the storage backends. +#[cfg(feature = "serde")] +mod serde_impls; pub mod source; /// Subtitle-stream descriptor vocabulary — file / demuxer format /// ([`subtitle::Format`]) and track-origin axis diff --git a/mediaframe/src/serde_impls.rs b/mediaframe/src/serde_impls.rs new file mode 100644 index 0000000..1c7a3e1 --- /dev/null +++ b/mediaframe/src/serde_impls.rs @@ -0,0 +1,202 @@ +//! Centralised `serde` implementations for the descriptor enums +//! (`feature = "serde"`). +//! +//! The wire shape mirrors what the storage backends (sqlx / mongodb / +//! async-graphql) independently chose, so a serde-`json` column matches +//! their representation byte-for-byte: +//! +//! - **Open** codec / format enums (those with an `Other(SmolStr)` escape +//! arm and a total [`FromStr`](core::str::FromStr)) serialize as their +//! canonical `as_str()` slug — e.g. `VideoCodec::H264` ⇄ `"h264"`, +//! `Other("x265")` ⇄ `"x265"` (no `{"Other": …}` wrapper). +//! - **Closed** FFmpeg-coded enums (those with `to_u32()` / `from_u32()` +//! and no `FromStr`) serialize as their `u32` code — e.g. +//! `color::Matrix::Bt709` ⇄ `1`. +//! +//! Both round-trips are total: an unrecognised slug rides the `Other` +//! arm, an unrecognised code the `Unknown` arm. The plain data structs +//! (`color::Info`, `frame::Dimensions`, `audio::Tags`, …) derive serde +//! at their definition site; the validated structs +//! (`capture::GeoLocation`, `audio::Fingerprint`, `audio::CoverArt`) +//! route deserialize through their checking constructors there too. +//! `lang::Language` carries a bespoke BCP-47 string impl in its module. + +/// Implements `Serialize` / `Deserialize` for an *open* enum via its +/// canonical string slug (`as_str()` to serialize, [`FromStr`] to parse). +/// The `FromStr` impl is total (`Err = Infallible`) — unknown slugs ride +/// the enum's `Other` arm — but the deserializer surfaces any error as a +/// serde error for forward-compatibility. +/// +/// [`FromStr`]: core::str::FromStr +// All invocations are heap-tier (codecs / formats); unused under the +// no-alloc tier where only the coded enums exist. +#[allow(unused_macros)] +macro_rules! serde_via_str { + ($t:path) => { + impl serde::Serialize for $t { + #[inline] + fn serialize(&self, ser: S) -> Result { + ser.serialize_str(self.as_str()) + } + } + + impl<'de> serde::Deserialize<'de> for $t { + fn deserialize>(de: D) -> Result { + struct V; + impl serde::de::Visitor<'_> for V { + type Value = $t; + fn expecting(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str(concat!("a ", stringify!($t), " slug string")) + } + #[inline] + fn visit_str(self, v: &str) -> Result { + v.parse::<$t>().map_err(serde::de::Error::custom) + } + } + de.deserialize_str(V) + } + } + }; +} + +/// Implements `Serialize` / `Deserialize` for a *closed* FFmpeg-coded +/// enum via its `to_u32()` / `from_u32()` round-trip. Total in both +/// directions — an unrecognised code rides the enum's `Unknown` arm. +macro_rules! serde_via_code { + ($t:path) => { + impl serde::Serialize for $t { + #[inline] + fn serialize(&self, ser: S) -> Result { + ser.serialize_u32(self.to_u32()) + } + } + + impl<'de> serde::Deserialize<'de> for $t { + #[inline] + fn deserialize>(de: D) -> Result { + Ok(<$t>::from_u32(::deserialize( + de, + )?)) + } + } + }; +} + +// ── Closed FFmpeg-coded enums (available at every capability tier) ── +serde_via_code!(crate::color::Matrix); +serde_via_code!(crate::color::Primaries); +serde_via_code!(crate::color::Transfer); +serde_via_code!(crate::color::DynamicRange); +serde_via_code!(crate::color::ChromaLocation); +serde_via_code!(crate::color::DcpTargetGamut); +serde_via_code!(crate::pixel_format::PixelFormat); +serde_via_code!(crate::frame::Rotation); +serde_via_code!(crate::frame::FieldOrder); +serde_via_code!(crate::frame::StereoMode); +serde_via_code!(crate::disposition::TrackDisposition); + +// ── Open slug enums (heap-tier: codecs / formats carry `Other(SmolStr)`) ── +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::codec::VideoCodec); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::codec::AudioCodec); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::codec::SubtitleCodec); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::container::Format); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::subtitle::Format); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::audio::ChannelLayout); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::audio::SampleFormat); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_str!(crate::audio::ContainerFormat); + +// ── Closed coded enums that live in heap-tier modules ── +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_code!(crate::subtitle::TrackOrigin); +#[cfg(any(feature = "std", feature = "alloc"))] +serde_via_code!(crate::audio::BitRateMode); + +#[cfg(all(test, feature = "std"))] +mod tests { + use crate::{ + audio::{ChannelLayout, Fingerprint, Tags}, + capture::GeoLocation, + codec::VideoCodec, + color::{self, Matrix}, + disposition::TrackDisposition, + frame::{Dimensions, SampleAspectRatio}, + lang::Language, + }; + + fn round_trip(v: &T) -> T + where + T: serde::Serialize + serde::de::DeserializeOwned + PartialEq + core::fmt::Debug, + { + let json = serde_json::to_string(v).unwrap(); + let back: T = serde_json::from_str(&json).unwrap(); + assert_eq!(*v, back, "round-trip mismatch via {json}"); + back + } + + #[test] + fn open_enum_serializes_as_slug() { + assert_eq!( + serde_json::to_string(&VideoCodec::H264).unwrap(), + "\"h264\"" + ); + round_trip(&VideoCodec::H264); + // Unknown slug rides the `Other` arm losslessly. + let custom = VideoCodec::Other(smol_str::SmolStr::new("zzcodec")); + assert_eq!(serde_json::to_string(&custom).unwrap(), "\"zzcodec\""); + round_trip(&custom); + round_trip(&ChannelLayout::default()); + } + + #[test] + fn closed_enum_serializes_as_code() { + let json = serde_json::to_string(&Matrix::Bt709).unwrap(); + assert_eq!(json, Matrix::Bt709.to_u32().to_string()); + round_trip(&Matrix::Bt709); + // Unknown code rides the `Unknown` arm losslessly. + let unknown: Matrix = serde_json::from_str("250").unwrap(); + assert_eq!(unknown.to_u32(), 250); + } + + #[test] + fn structs_round_trip() { + round_trip(&color::Info::default()); + round_trip(&Dimensions::new(1920, 1080)); + round_trip(&SampleAspectRatio::new( + 40, + core::num::NonZeroU32::new(33).unwrap(), + )); + round_trip(&Tags::new().with_title("Song").with_year(2026)); + round_trip(&(TrackDisposition::DEFAULT | TrackDisposition::FORCED)); + } + + #[test] + fn language_round_trips_as_bcp47() { + let l = Language::from_bcp47("zh-Hant-TW").unwrap(); + assert_eq!(serde_json::to_string(&l).unwrap(), "\"zh-Hant-TW\""); + round_trip(&l); + round_trip(&Language::default()); + } + + #[test] + fn validated_structs_check_on_deserialize() { + let g = GeoLocation::try_new(48.8584, 2.2945, Some(330.0)).unwrap(); + round_trip(&g); + // Out-of-range latitude is rejected, not silently materialised. + assert!( + serde_json::from_str::(r#"{"lat":999.0,"lon":0.0,"altitude":null}"#).is_err() + ); + + let fp = Fingerprint::try_new("chromaprint", &b"\x01\x02\x03"[..]).unwrap(); + round_trip(&fp); + // Empty algorithm violates the invariant and must be rejected. + assert!(serde_json::from_str::(r#"{"algorithm":"","value":[1,2,3]}"#).is_err()); + } +} From 1552a78621347be04e7ee34c0c2f88f9bd50bb51 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Thu, 21 May 2026 20:21:03 +1200 Subject: [PATCH 2/8] refactor: group optional serde impls in anonymous `const` blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply the const-block convention for feature-gated trait impls: the validated structs (`Fingerprint`, `CoverArt`, `GeoLocation`) and `Language` now carry their `serde` impls in a single #[cfg(feature = "serde")] #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] const _: () = { ... }; block instead of scattering `#[cfg(feature = "serde")]` across a module-level shadow struct + `TryFrom` impl + the type's derive. The validate-on-deserialize shadow is now private to the block (no longer pollutes the module namespace), one gate covers both directions, and the docs.rs feature badge is attached. Plain structs (single `cfg_attr(derive(...))`) and the centralised enum impls (already grouped in the gated `serde_impls` module) are unchanged — they are already single-gate groupings. Adds a CoverArt round-trip test (its Serialize is now hand-written). No behaviour change; 159 tests green. --- mediaframe/src/audio/cover_art.rs | 49 ++++++++++++++----------- mediaframe/src/audio/fingerprint.rs | 49 ++++++++++++++----------- mediaframe/src/capture/geo.rs | 55 +++++++++++++++++------------ mediaframe/src/lang.rs | 47 ++++++++++++------------ mediaframe/src/serde_impls.rs | 7 +++- 5 files changed, 122 insertions(+), 85 deletions(-) diff --git a/mediaframe/src/audio/cover_art.rs b/mediaframe/src/audio/cover_art.rs index d68c8ed..3a749bf 100644 --- a/mediaframe/src/audio/cover_art.rs +++ b/mediaframe/src/audio/cover_art.rs @@ -13,35 +13,44 @@ use smol_str::SmolStr; /// large image clones in O(1) (refcount bump) rather than a deep copy. /// Both must be non-empty (an empty mime or empty payload is not a /// meaningful cover-art attachment); use [`CoverArt::try_new`]. -#[cfg_attr( - feature = "serde", - derive(serde::Serialize, serde::Deserialize), - serde(try_from = "CoverArtShadow") -)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CoverArt { mime: SmolStr, data: Bytes, } -/// Deserialization shadow for [`CoverArt`] — routes through -/// [`CoverArt::try_new`] so the non-empty `mime` / `data` invariants are -/// enforced on the way in, instead of being bypassed by a field-derived -/// `Deserialize`. +// Optional `serde` impls grouped in one gated `const` block: a single +// `#[cfg]` covers both directions, and the validate-on-deserialize shadow +// stays private to the block (no module-namespace pollution). #[cfg(feature = "serde")] -#[derive(serde::Deserialize)] -struct CoverArtShadow { - mime: SmolStr, - data: Bytes, -} +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +const _: () = { + use serde::{Deserialize, Deserializer, Serialize, Serializer, ser::SerializeStruct}; -#[cfg(feature = "serde")] -impl core::convert::TryFrom for CoverArt { - type Error = CoverArtError; - fn try_from(s: CoverArtShadow) -> Result { - Self::try_new(s.mime, s.data) + impl Serialize for CoverArt { + fn serialize(&self, ser: S) -> Result { + let mut st = ser.serialize_struct("CoverArt", 2)?; + st.serialize_field("mime", &self.mime)?; + st.serialize_field("data", &self.data)?; + st.end() + } } -} + + // Routes deserialize through `try_new` so the non-empty `mime` / `data` + // invariants hold instead of being bypassed by a field derive. + #[derive(Deserialize)] + struct Shadow { + mime: SmolStr, + data: Bytes, + } + + impl<'de> Deserialize<'de> for CoverArt { + fn deserialize>(de: D) -> Result { + let s = Shadow::deserialize(de)?; + CoverArt::try_new(s.mime, s.data).map_err(serde::de::Error::custom) + } + } +}; impl Default for CoverArt { /// Synthetic `Default` — `mime: "application/octet-stream"`, diff --git a/mediaframe/src/audio/fingerprint.rs b/mediaframe/src/audio/fingerprint.rs index 343ce90..d14c1c6 100644 --- a/mediaframe/src/audio/fingerprint.rs +++ b/mediaframe/src/audio/fingerprint.rs @@ -14,35 +14,44 @@ use smol_str::SmolStr; /// fingerprint cannot be routed; empty `value` is **allowed** (some /// algorithms emit an empty fingerprint for silence / sub-second /// clips). -#[cfg_attr( - feature = "serde", - derive(serde::Serialize, serde::Deserialize), - serde(try_from = "FingerprintShadow") -)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Fingerprint { algorithm: SmolStr, value: Bytes, } -/// Deserialization shadow for [`Fingerprint`] — routes through -/// [`Fingerprint::try_new`] so the non-empty-`algorithm` invariant is -/// enforced on the way in, instead of being bypassed by a field-derived -/// `Deserialize`. +// Optional `serde` impls grouped in one gated `const` block: a single +// `#[cfg]` covers both directions, and the validate-on-deserialize shadow +// stays private to the block (no module-namespace pollution). #[cfg(feature = "serde")] -#[derive(serde::Deserialize)] -struct FingerprintShadow { - algorithm: SmolStr, - value: Bytes, -} +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +const _: () = { + use serde::{Deserialize, Deserializer, Serialize, Serializer, ser::SerializeStruct}; -#[cfg(feature = "serde")] -impl core::convert::TryFrom for Fingerprint { - type Error = FingerprintError; - fn try_from(s: FingerprintShadow) -> Result { - Self::try_new(s.algorithm, s.value) + impl Serialize for Fingerprint { + fn serialize(&self, ser: S) -> Result { + let mut st = ser.serialize_struct("Fingerprint", 2)?; + st.serialize_field("algorithm", &self.algorithm)?; + st.serialize_field("value", &self.value)?; + st.end() + } } -} + + // Routes deserialize through `try_new` so the non-empty-`algorithm` + // invariant holds instead of being bypassed by a field derive. + #[derive(Deserialize)] + struct Shadow { + algorithm: SmolStr, + value: Bytes, + } + + impl<'de> Deserialize<'de> for Fingerprint { + fn deserialize>(de: D) -> Result { + let s = Shadow::deserialize(de)?; + Fingerprint::try_new(s.algorithm, s.value).map_err(serde::de::Error::custom) + } + } +}; impl Default for Fingerprint { /// Synthetic `Default` — `algorithm: "default"`, `value: []`. The diff --git a/mediaframe/src/capture/geo.rs b/mediaframe/src/capture/geo.rs index afa7662..73eb96d 100644 --- a/mediaframe/src/capture/geo.rs +++ b/mediaframe/src/capture/geo.rs @@ -15,11 +15,6 @@ use smol_str::SmolStr; /// parse from an ISO 6709 string via [`Self::from_iso6709`] / /// `parse::()` (the [`core::str::FromStr`] impl). /// Serialise back via [`Self::to_iso6709`] / [`core::fmt::Display`]. -#[cfg_attr( - feature = "serde", - derive(serde::Serialize, serde::Deserialize), - serde(try_from = "GeoLocationShadow") -)] #[derive(Debug, Clone, Copy, PartialEq)] pub struct GeoLocation { lat: f64, @@ -27,26 +22,42 @@ pub struct GeoLocation { altitude: Option, } -/// Deserialization shadow for [`GeoLocation`] — routes through -/// [`GeoLocation::try_new`] so out-of-range coordinates are rejected and -/// a non-finite altitude is normalised to `None`, instead of being -/// materialised directly by a field-derived `Deserialize`. +// Optional `serde` impls grouped in one gated `const` block: a single +// `#[cfg]` covers both directions, and the validate-on-deserialize shadow +// stays private to the block (no module-namespace pollution). #[cfg(feature = "serde")] -#[derive(serde::Deserialize)] -struct GeoLocationShadow { - lat: f64, - lon: f64, - #[serde(default)] - altitude: Option, -} +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +const _: () = { + use serde::{Deserialize, Deserializer, Serialize, Serializer, ser::SerializeStruct}; + + impl Serialize for GeoLocation { + fn serialize(&self, ser: S) -> Result { + let mut st = ser.serialize_struct("GeoLocation", 3)?; + st.serialize_field("lat", &self.lat)?; + st.serialize_field("lon", &self.lon)?; + st.serialize_field("altitude", &self.altitude)?; + st.end() + } + } -#[cfg(feature = "serde")] -impl core::convert::TryFrom for GeoLocation { - type Error = GeoLocationError; - fn try_from(s: GeoLocationShadow) -> Result { - Self::try_new(s.lat, s.lon, s.altitude) + // Routes deserialize through `try_new` so out-of-range coordinates are + // rejected and a non-finite altitude is normalised, instead of being + // materialised directly by a field derive. + #[derive(Deserialize)] + struct Shadow { + lat: f64, + lon: f64, + #[serde(default)] + altitude: Option, } -} + + impl<'de> Deserialize<'de> for GeoLocation { + fn deserialize>(de: D) -> Result { + let s = Shadow::deserialize(de)?; + GeoLocation::try_new(s.lat, s.lon, s.altitude).map_err(serde::de::Error::custom) + } + } +}; impl Default for GeoLocation { /// `(0.0, 0.0, None)` — "Null Island" with unknown altitude. This diff --git a/mediaframe/src/lang.rs b/mediaframe/src/lang.rs index 2bc2670..a6ae3fc 100644 --- a/mediaframe/src/lang.rs +++ b/mediaframe/src/lang.rs @@ -182,34 +182,37 @@ impl core::str::FromStr for Language { } } -/// Serializes as the canonical BCP-47 string (`to_bcp47`), e.g. `"en-US"` -/// / `"zh-Hant-TW"` / `"und"` — the same lossless text form used by the -/// storage backends. Not a derived impl: the `icu_locale_core` subtag -/// fields have no stable serde representation, and the string round-trip -/// keeps the wire shape engine-agnostic. +// Optional `serde` impls grouped in one gated `const` block: (de)serializes +// as the canonical BCP-47 string (`"en-US"`, `"zh-Hant-TW"`, `"und"`) — the +// same lossless text form the storage backends use. Not derived: the +// `icu_locale_core` subtag fields have no stable serde representation. #[cfg(feature = "serde")] -impl serde::Serialize for Language { - fn serialize(&self, ser: S) -> Result { - ser.serialize_str(&self.to_bcp47()) +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +const _: () = { + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + impl Serialize for Language { + fn serialize(&self, ser: S) -> Result { + ser.serialize_str(&self.to_bcp47()) + } } -} -#[cfg(feature = "serde")] -impl<'de> serde::Deserialize<'de> for Language { - fn deserialize>(de: D) -> Result { - struct V; - impl serde::de::Visitor<'_> for V { - type Value = Language; - fn expecting(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.write_str("a BCP-47 language tag string") - } - fn visit_str(self, v: &str) -> Result { - Language::from_bcp47(v).map_err(serde::de::Error::custom) + impl<'de> Deserialize<'de> for Language { + fn deserialize>(de: D) -> Result { + struct V; + impl serde::de::Visitor<'_> for V { + type Value = Language; + fn expecting(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("a BCP-47 language tag string") + } + fn visit_str(self, v: &str) -> Result { + Language::from_bcp47(v).map_err(serde::de::Error::custom) + } } + de.deserialize_str(V) } - de.deserialize_str(V) } -} +}; /// Errors returned by [`Language`] constructors / parsers. #[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] diff --git a/mediaframe/src/serde_impls.rs b/mediaframe/src/serde_impls.rs index 1c7a3e1..32852b5 100644 --- a/mediaframe/src/serde_impls.rs +++ b/mediaframe/src/serde_impls.rs @@ -122,7 +122,7 @@ serde_via_code!(crate::audio::BitRateMode); #[cfg(all(test, feature = "std"))] mod tests { use crate::{ - audio::{ChannelLayout, Fingerprint, Tags}, + audio::{ChannelLayout, CoverArt, Fingerprint, Tags}, capture::GeoLocation, codec::VideoCodec, color::{self, Matrix}, @@ -198,5 +198,10 @@ mod tests { round_trip(&fp); // Empty algorithm violates the invariant and must be rejected. assert!(serde_json::from_str::(r#"{"algorithm":"","value":[1,2,3]}"#).is_err()); + + let art = CoverArt::try_new("image/png", &b"\x89PNG"[..]).unwrap(); + round_trip(&art); + // Empty mime violates the invariant and must be rejected. + assert!(serde_json::from_str::(r#"{"mime":"","data":[1]}"#).is_err()); } } From 3a89a0734f9069bf8cf5af771c29d962f3e960da Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Fri, 22 May 2026 12:22:42 +1200 Subject: [PATCH 3/8] fix(serde, codex round 1): preserve `SampleFormat::Unknown(u32)` + reject corrupt codes on strictly-closed enums MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two codex findings on PR #7: 1. [high] `SampleFormat` round-trip lost `Unknown(u32)` values. The type has BOTH `Unknown(u32)` (lossless numeric escape) AND `Other(SmolStr)` (lossless string escape), but `serde_via_str!` routed everything through `as_str()`/`FromStr`. `Unknown(12345)` serialized to `"unknown"`, then deserialized via `FromStr` into `Other(SmolStr("unknown"))` — the original FFmpeg code was destroyed. Fix: bespoke serde impl (anonymous-const block, mirrors the project's feature-gated impl pattern): - serialize: `Unknown(v)` → numeric `v`; every other variant (named slugs + `Other`) → string slug via `as_str()`. - deserialize: `deserialize_any` accepts either a u32 (→ `from_u32`, preserving `Unknown`) or a string (→ `from_str`, preserving `Other`). - Includes `visit_u64`/`visit_i64` so formats that emit non-`u32` integers (ron / toml / yaml) work too; out-of-range values are rejected with a clear error. 2. [medium] `TrackOrigin` / `BitRateMode` silently canonicalised corrupt wire codes. Both are strictly-closed (no `Unknown` arm); their `from_u32` falls back to the default for any out-of-range code, so `serde_via_code!` would deserialize `999` as `TrackOrigin::Embedded` or `BitRateMode::Cbr` — adversarial input passes silently as valid. Fix: added `pub const fn try_from_u32(v: u32) -> Option` to each (strict counterpart to `from_u32`). New `serde_via_code_strict!` macro uses it and emits a serde error on `None`. The lossless `serde_via_code!` stays for enums that actually have an `Unknown(u32)` escape. Tests (in `serde_impls::tests`): - `sample_format_preserves_unknown_u32` — checks all three arms round-trip (named slug, `Other` slug, `Unknown(v)` for 12_345 / 0xDEADBEEF / u32::MAX) and that a numeric input matching a named variant's code canonicalises (matches `from_u32` contract). - `closed_coded_enums_reject_unknown_codes` — every variant round-trips, `999` and `3` are rejected as serde errors. Matrix (RUSTFLAGS=-Dwarnings cargo +1.95.0): check -p mediaframe ✓ check -p mediaframe --no-default-features --features alloc,serde ✓ check -p mediaframe --all-features ✓ test -p mediaframe --features serde --lib 161 pass (+2 new) --- mediaframe/src/audio/bit_rate_mode.rs | 14 ++ mediaframe/src/serde_impls.rs | 172 ++++++++++++++++++++++-- mediaframe/src/subtitle/track_origin.rs | 14 ++ 3 files changed, 192 insertions(+), 8 deletions(-) diff --git a/mediaframe/src/audio/bit_rate_mode.rs b/mediaframe/src/audio/bit_rate_mode.rs index a3ae62c..6568372 100644 --- a/mediaframe/src/audio/bit_rate_mode.rs +++ b/mediaframe/src/audio/bit_rate_mode.rs @@ -60,6 +60,20 @@ impl BitRateMode { _ => Self::Cbr, } } + + /// Strict counterpart to [`Self::from_u32`]: returns `None` for any code + /// outside the enumerated set, instead of silently mapping it to the + /// default. Used by the strict deserialize path so adversarial / corrupt + /// wire values fail loudly rather than masquerading as `Cbr`. + #[cfg_attr(not(tarpaulin), inline(always))] + pub const fn try_from_u32(v: u32) -> Option { + match v { + 0 => Some(Self::Cbr), + 1 => Some(Self::Vbr), + 2 => Some(Self::Abr), + _ => None, + } + } } #[cfg(test)] diff --git a/mediaframe/src/serde_impls.rs b/mediaframe/src/serde_impls.rs index 32852b5..7a27845 100644 --- a/mediaframe/src/serde_impls.rs +++ b/mediaframe/src/serde_impls.rs @@ -59,9 +59,10 @@ macro_rules! serde_via_str { }; } -/// Implements `Serialize` / `Deserialize` for a *closed* FFmpeg-coded -/// enum via its `to_u32()` / `from_u32()` round-trip. Total in both -/// directions — an unrecognised code rides the enum's `Unknown` arm. +/// Implements `Serialize` / `Deserialize` for a *closed* FFmpeg-coded enum +/// **whose `from_u32` is lossless** — i.e. it has an `Unknown(u32)` escape +/// arm so any code round-trips losslessly. Use this for enums where every +/// `u32` is meaningful wire data. macro_rules! serde_via_code { ($t:path) => { impl serde::Serialize for $t { @@ -82,6 +83,97 @@ macro_rules! serde_via_code { }; } +/// Implements `Serialize` / `Deserialize` for a **strictly-closed** +/// FFmpeg-coded enum — one with NO `Unknown(u32)` escape arm — via its +/// `to_u32()` / `try_from_u32()` pair. Adversarial / corrupt codes outside +/// the enumerated set are rejected as serde errors instead of silently +/// canonicalising to the default variant (which `from_u32` would do). +macro_rules! serde_via_code_strict { + ($t:path) => { + impl serde::Serialize for $t { + #[inline] + fn serialize(&self, ser: S) -> Result { + ser.serialize_u32(self.to_u32()) + } + } + + impl<'de> serde::Deserialize<'de> for $t { + #[inline] + fn deserialize>(de: D) -> Result { + let v = ::deserialize(de)?; + <$t>::try_from_u32(v).ok_or_else(|| { + serde::de::Error::custom(::std::format!( + "{}: unknown wire code {}", + stringify!($t), + v + )) + }) + } + } + }; +} + +/// Bespoke serde for [`SampleFormat`](crate::audio::SampleFormat) — it has +/// *both* `Unknown(u32)` (lossless numeric escape) **and** `Other(SmolStr)` +/// (lossless string escape). The generic `serde_via_str!` would route +/// `Unknown(12345)` through `as_str()` → `"unknown"` → `Other("unknown")`, +/// destroying the original code; the generic `serde_via_code!` would lose +/// the `Other` string variants. This impl preserves both: +/// +/// - **Serialize**: `Unknown(v)` → numeric `v`; every other variant +/// (named slugs + `Other`) → string slug. +/// - **Deserialize**: accept either a u32 (→ `from_u32`, preserving +/// `Unknown`) or a string (→ `from_str`, preserving `Other` and named +/// variants). +#[cfg(any(feature = "std", feature = "alloc"))] +const _: () = { + use crate::audio::SampleFormat; + use core::{fmt, str::FromStr}; + + impl serde::Serialize for SampleFormat { + fn serialize(&self, ser: S) -> Result { + match self { + SampleFormat::Unknown(v) => ser.serialize_u32(*v), + other => ser.serialize_str(other.as_str()), + } + } + } + + impl<'de> serde::Deserialize<'de> for SampleFormat { + fn deserialize>(de: D) -> Result { + struct V; + impl serde::de::Visitor<'_> for V { + type Value = SampleFormat; + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a SampleFormat slug string or a u32 FFmpeg code") + } + fn visit_str(self, v: &str) -> Result { + // `FromStr` is `Infallible`; non-named slugs ride `Other(SmolStr)`. + SampleFormat::from_str(v).map_err(serde::de::Error::custom) + } + // Numeric inputs route through `from_u32` so `Unknown(v)` survives. + // Cover the integer width spread serde drivers actually produce — + // unsigned and signed, since some formats (e.g. ron, toml) prefer + // signed by default — and clamp/forward into u32. + fn visit_u32(self, v: u32) -> Result { + Ok(SampleFormat::from_u32(v)) + } + fn visit_u64(self, v: u64) -> Result { + u32::try_from(v) + .map(SampleFormat::from_u32) + .map_err(|_| serde::de::Error::custom("SampleFormat u32 code overflow")) + } + fn visit_i64(self, v: i64) -> Result { + u32::try_from(v) + .map(SampleFormat::from_u32) + .map_err(|_| serde::de::Error::custom("SampleFormat u32 code out of range")) + } + } + de.deserialize_any(V) + } + } +}; + // ── Closed FFmpeg-coded enums (available at every capability tier) ── serde_via_code!(crate::color::Matrix); serde_via_code!(crate::color::Primaries); @@ -108,16 +200,21 @@ serde_via_str!(crate::container::Format); serde_via_str!(crate::subtitle::Format); #[cfg(any(feature = "std", feature = "alloc"))] serde_via_str!(crate::audio::ChannelLayout); -#[cfg(any(feature = "std", feature = "alloc"))] -serde_via_str!(crate::audio::SampleFormat); +// `SampleFormat` has BOTH `Unknown(u32)` and `Other(SmolStr)` — the bespoke +// impl below (immediately after the macros) covers it; do NOT route it +// through `serde_via_str!` (would silently drop `Unknown(v)` codes through +// `as_str()` → `"unknown"` → `Other("unknown")`). #[cfg(any(feature = "std", feature = "alloc"))] serde_via_str!(crate::audio::ContainerFormat); -// ── Closed coded enums that live in heap-tier modules ── +// ── Strictly-closed coded enums (no `Unknown` escape) ── +// Use `serde_via_code_strict!` — adversarial / unknown wire codes are +// rejected as serde errors, not silently canonicalised to the default +// (which `from_u32` would do for `TrackOrigin::from_u32(999) == Embedded`). #[cfg(any(feature = "std", feature = "alloc"))] -serde_via_code!(crate::subtitle::TrackOrigin); +serde_via_code_strict!(crate::subtitle::TrackOrigin); #[cfg(any(feature = "std", feature = "alloc"))] -serde_via_code!(crate::audio::BitRateMode); +serde_via_code_strict!(crate::audio::BitRateMode); #[cfg(all(test, feature = "std"))] mod tests { @@ -204,4 +301,63 @@ mod tests { // Empty mime violates the invariant and must be rejected. assert!(serde_json::from_str::(r#"{"mime":"","data":[1]}"#).is_err()); } + + // ── Codex round 1 findings ── + + /// `SampleFormat` has both `Unknown(u32)` (lossless numeric escape) and + /// `Other(SmolStr)` (lossless string escape). Every round-trip must + /// preserve which arm a value came from — earlier the type rode the pure + /// string path, so `Unknown(12345)` → `"unknown"` → `Other("unknown")` + /// silently destroyed the FFmpeg code. + #[test] + fn sample_format_preserves_unknown_u32() { + use crate::audio::SampleFormat; + // Named variant — slug. + assert_eq!( + serde_json::to_string(&SampleFormat::S16).unwrap(), + "\"s16\"" + ); + round_trip(&SampleFormat::S16); + // `Other` slug variant — string. + let other = SampleFormat::Other(smol_str::SmolStr::new("custom")); + assert_eq!(serde_json::to_string(&other).unwrap(), "\"custom\""); + round_trip(&other); + // `Unknown(v)` — numeric, MUST stay `Unknown(v)` after round-trip. + for v in [12_345u32, 0xDEAD_BEEFu32, u32::MAX] { + let fmt = SampleFormat::Unknown(v); + assert_eq!(serde_json::to_string(&fmt).unwrap(), v.to_string()); + let back: SampleFormat = serde_json::from_str(&v.to_string()).unwrap(); + assert_eq!(back, fmt, "lost Unknown({v}) on round-trip"); + } + // A pure numeric input that happens to match a named variant's code + // *does* canonicalise to the named arm — that's `from_u32`'s contract. + let from_named_code: SampleFormat = serde_json::from_str("1").unwrap(); + assert_eq!(from_named_code, SampleFormat::S16); + } + + /// Strictly-closed coded enums (no `Unknown` arm) must REJECT unknown + /// wire codes instead of silently mapping them to the default. Previously + /// `from_u32(999)` quietly returned `Embedded` / `Cbr`, so corrupt input + /// looked like valid data on the consumer side. + #[test] + fn closed_coded_enums_reject_unknown_codes() { + use crate::{audio::BitRateMode, subtitle::TrackOrigin}; + + for o in [ + TrackOrigin::Embedded, + TrackOrigin::Sidecar, + TrackOrigin::External, + ] { + round_trip(&o); + } + for m in [BitRateMode::Cbr, BitRateMode::Vbr, BitRateMode::Abr] { + round_trip(&m); + } + + // Out-of-range codes are rejected — not canonicalised to the default. + assert!(serde_json::from_str::("999").is_err()); + assert!(serde_json::from_str::("3").is_err()); + assert!(serde_json::from_str::("999").is_err()); + assert!(serde_json::from_str::("3").is_err()); + } } diff --git a/mediaframe/src/subtitle/track_origin.rs b/mediaframe/src/subtitle/track_origin.rs index c924f37..305c125 100644 --- a/mediaframe/src/subtitle/track_origin.rs +++ b/mediaframe/src/subtitle/track_origin.rs @@ -72,6 +72,20 @@ impl TrackOrigin { _ => Self::Embedded, } } + + /// Strict counterpart to [`Self::from_u32`]: returns `None` for any code + /// outside the enumerated set, instead of silently mapping it to the + /// default. Used by the strict deserialize path so adversarial / corrupt + /// wire values fail loudly rather than masquerading as `Embedded`. + #[cfg_attr(not(tarpaulin), inline(always))] + pub const fn try_from_u32(v: u32) -> Option { + match v { + 0 => Some(Self::Embedded), + 1 => Some(Self::Sidecar), + 2 => Some(Self::External), + _ => None, + } + } } #[cfg(test)] From 8c3603f709bb473350f29cddb691cd0eb16fa004 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Fri, 22 May 2026 12:38:37 +1200 Subject: [PATCH 4/8] fix(serde, codex round 2): binary-format SampleFormat + `serde(default)` for sparse-deserializable structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two codex findings on round 2: 1. [medium] `SampleFormat` deserialize was JSON-only — `deserialize_any` doesn't work on non-self-describing formats (bincode/postcard). The crate advertised generic serde, so binary-driver users would silently fail to round-trip. Fix: branch on `Serializer::is_human_readable()` / `Deserializer:: is_human_readable()`: - Self-describing (JSON/YAML/RON/TOML): existing bare-value shape — `Unknown(v)` → number, named/Other → string, visitor uses `deserialize_any`. - Non-self-describing (bincode/postcard/etc): explicit 2-variant tagged `BinaryWire<'a>{ Code(u32), Slug(Cow<'a, str>) }`. The enum derive picks a compact discriminator; consumers don't need `deserialize_any`. New regression test `sample_format_postcard_binary_roundtrip` exercises the binary branch via postcard for all three arms (named / Other / Unknown). Added `postcard 1` to dev-dependencies (alloc-only — keeps the no-std build matrix unchanged). 2. [medium] Default-backed metadata structs rejected sparse JSON. `Tags` (and the related `Device` / `Loudness`) document empty/zero as the absent sentinel, but the serde derive treated every field as required. A JSON object containing only `{"title":"x"}` failed to decode, and any future field addition would break older persisted records. Fix: added `serde(default)` at the container level on each of `Tags`, `Device`, `Loudness` so missing fields route through the type-level `Default` impl. The other VOs (color/frame value objects) keep their strict-required shape — those fields are wire data, not absence sentinels, and silently defaulting them would mask data corruption. New regression test `sparse_json_uses_serde_default_on_default_backed_structs` asserts a one-field JSON deserializes correctly for all three structs. Matrix (RUSTFLAGS=-Dwarnings cargo +1.95.0): test -p mediaframe --features serde --lib 163 pass (+2 new) check -p mediaframe ✓ check -p mediaframe --features arbitrary,serde ✓ check -p mediaframe --no-default-features --features alloc,serde ✓ check -p mediaframe --all-features ✓ --- mediaframe/Cargo.toml | 5 +- mediaframe/src/audio/loudness.rs | 9 +- mediaframe/src/audio/tags.rs | 9 +- mediaframe/src/capture/device.rs | 9 +- mediaframe/src/serde_impls.rs | 162 ++++++++++++++++++++++++------- 5 files changed, 154 insertions(+), 40 deletions(-) diff --git a/mediaframe/Cargo.toml b/mediaframe/Cargo.toml index b463434..b46957f 100644 --- a/mediaframe/Cargo.toml +++ b/mediaframe/Cargo.toml @@ -68,8 +68,11 @@ smol_str = { version = "0.3", default-features = false, optional = true } serde = { version = "1", default-features = false, optional = true, features = ["derive"] } [dev-dependencies] -# Concrete serde format for the `serde` round-trip tests (std-only). +# Concrete serde formats for the `serde` round-trip tests (std-only). +# `serde_json` covers the human-readable path; `postcard` covers the +# non-self-describing binary path used by `SampleFormat`'s tagged wire. serde_json = "1" +postcard = { version = "1", default-features = false, features = ["alloc"] } [profile.bench] opt-level = 3 diff --git a/mediaframe/src/audio/loudness.rs b/mediaframe/src/audio/loudness.rs index 2ed1b5d..e739021 100644 --- a/mediaframe/src/audio/loudness.rs +++ b/mediaframe/src/audio/loudness.rs @@ -23,7 +23,14 @@ /// /// `f32` storage precludes `Eq`/`Hash` (NaN ≠ NaN); the derives are /// limited to `Debug`/`Clone`/`Copy`/`PartialEq`. -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +// `serde(default)` keeps sparse / older-schema JSON deserializable: missing +// fields fall back to the type-level `Default` impl — the all-zero +// "silent / fresh measurement" sentinel. +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(default) +)] #[derive(Debug, Clone, Copy, PartialEq)] pub struct Loudness { integrated_lufs: f32, diff --git a/mediaframe/src/audio/tags.rs b/mediaframe/src/audio/tags.rs index 51b8c8d..ba84fc4 100644 --- a/mediaframe/src/audio/tags.rs +++ b/mediaframe/src/audio/tags.rs @@ -17,7 +17,14 @@ use smol_str::SmolStr; /// - Numeric fields use `Option` because `0` is a *valid* /// value (year `0` exists historically; "track 0" sometimes /// appears in test files), so the absent state must be distinct. -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +// `serde(default)` keeps sparse / older-schema JSON deserializable: missing +// fields fall back to the type-level `Default` impl (`Tags::new()` — all +// fields absent / empty), matching the absent-vs-empty convention above. +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(default) +)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Tags { title: SmolStr, diff --git a/mediaframe/src/capture/device.rs b/mediaframe/src/capture/device.rs index 4e09460..a78de40 100644 --- a/mediaframe/src/capture/device.rs +++ b/mediaframe/src/capture/device.rs @@ -21,7 +21,14 @@ use smol_str::SmolStr; /// sentinel for "absent" so callers never need `Option` /// (matches the codec / source-tagging convention elsewhere in this /// crate). Use [`Self::is_empty`] to detect the fully-absent state. -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +// `serde(default)` keeps sparse / older-schema JSON deserializable: missing +// fields fall back to the type-level `Default` impl (`Device::new()` — both +// `make` and `model` empty), matching the empty-string-means-absent convention. +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(default) +)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Device { make: SmolStr, diff --git a/mediaframe/src/serde_impls.rs b/mediaframe/src/serde_impls.rs index 7a27845..b948022 100644 --- a/mediaframe/src/serde_impls.rs +++ b/mediaframe/src/serde_impls.rs @@ -118,58 +118,89 @@ macro_rules! serde_via_code_strict { /// (lossless string escape). The generic `serde_via_str!` would route /// `Unknown(12345)` through `as_str()` → `"unknown"` → `Other("unknown")`, /// destroying the original code; the generic `serde_via_code!` would lose -/// the `Other` string variants. This impl preserves both: +/// the `Other` string variants. /// -/// - **Serialize**: `Unknown(v)` → numeric `v`; every other variant -/// (named slugs + `Other`) → string slug. -/// - **Deserialize**: accept either a u32 (→ `from_u32`, preserving -/// `Unknown`) or a string (→ `from_str`, preserving `Other` and named -/// variants). +/// **Wire shape — branches on `Serializer::is_human_readable()`:** +/// +/// - **Self-describing (JSON / YAML / RON / TOML / etc.)** — bare value: +/// `Unknown(v)` → number; named slug + `Other` → string. The visitor +/// uses `deserialize_any` to choose the arm at decode time. +/// - **Non-self-describing (bincode / postcard / etc.)** — explicit +/// 2-variant tagged enum (`{Code(u32), Slug(String)}`), since +/// `deserialize_any` is not supported on these formats. Wire bytes are +/// compact and the variant tag drives reconstruction unambiguously. #[cfg(any(feature = "std", feature = "alloc"))] const _: () = { use crate::audio::SampleFormat; use core::{fmt, str::FromStr}; + // Tagged representation used only on non-self-describing formats. The + // derive picks a compact discriminant + payload; downstream binary serde + // drivers know exactly how to round-trip it without `deserialize_any`. + #[derive(serde::Serialize, serde::Deserialize)] + enum BinaryWire<'a> { + Code(u32), + Slug(::std::borrow::Cow<'a, str>), + } + impl serde::Serialize for SampleFormat { fn serialize(&self, ser: S) -> Result { - match self { - SampleFormat::Unknown(v) => ser.serialize_u32(*v), - other => ser.serialize_str(other.as_str()), + if ser.is_human_readable() { + // Bare value — current human-readable shape. + match self { + SampleFormat::Unknown(v) => ser.serialize_u32(*v), + other => ser.serialize_str(other.as_str()), + } + } else { + // Tagged wire for binary formats. + match self { + SampleFormat::Unknown(v) => BinaryWire::Code(*v).serialize(ser), + other => BinaryWire::Slug(::std::borrow::Cow::Borrowed(other.as_str())).serialize(ser), + } } } } impl<'de> serde::Deserialize<'de> for SampleFormat { fn deserialize>(de: D) -> Result { - struct V; - impl serde::de::Visitor<'_> for V { - type Value = SampleFormat; - fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("a SampleFormat slug string or a u32 FFmpeg code") - } - fn visit_str(self, v: &str) -> Result { - // `FromStr` is `Infallible`; non-named slugs ride `Other(SmolStr)`. - SampleFormat::from_str(v).map_err(serde::de::Error::custom) - } - // Numeric inputs route through `from_u32` so `Unknown(v)` survives. - // Cover the integer width spread serde drivers actually produce — - // unsigned and signed, since some formats (e.g. ron, toml) prefer - // signed by default — and clamp/forward into u32. - fn visit_u32(self, v: u32) -> Result { - Ok(SampleFormat::from_u32(v)) - } - fn visit_u64(self, v: u64) -> Result { - u32::try_from(v) - .map(SampleFormat::from_u32) - .map_err(|_| serde::de::Error::custom("SampleFormat u32 code overflow")) - } - fn visit_i64(self, v: i64) -> Result { - u32::try_from(v) - .map(SampleFormat::from_u32) - .map_err(|_| serde::de::Error::custom("SampleFormat u32 code out of range")) + if de.is_human_readable() { + // Self-describing: accept either a u32 OR a string via `deserialize_any`. + struct V; + impl serde::de::Visitor<'_> for V { + type Value = SampleFormat; + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a SampleFormat slug string or a u32 FFmpeg code") + } + fn visit_str(self, v: &str) -> Result { + // `FromStr` is `Infallible`; non-named slugs ride `Other(SmolStr)`. + SampleFormat::from_str(v).map_err(serde::de::Error::custom) + } + // Numeric inputs route through `from_u32` so `Unknown(v)` survives. + // Cover the integer width spread serde drivers actually produce. + fn visit_u32(self, v: u32) -> Result { + Ok(SampleFormat::from_u32(v)) + } + fn visit_u64(self, v: u64) -> Result { + u32::try_from(v) + .map(SampleFormat::from_u32) + .map_err(|_| serde::de::Error::custom("SampleFormat u32 code overflow")) + } + fn visit_i64(self, v: i64) -> Result { + u32::try_from(v) + .map(SampleFormat::from_u32) + .map_err(|_| serde::de::Error::custom("SampleFormat u32 code out of range")) + } } + de.deserialize_any(V) + } else { + // Non-self-describing: drive the tagged wire enum, then convert. + let w = BinaryWire::deserialize(de)?; + Ok(match w { + BinaryWire::Code(v) => SampleFormat::from_u32(v), + // `FromStr` is `Infallible` for `SampleFormat`. + BinaryWire::Slug(s) => SampleFormat::from_str(&s).unwrap(), + }) } - de.deserialize_any(V) } } }; @@ -360,4 +391,63 @@ mod tests { assert!(serde_json::from_str::("999").is_err()); assert!(serde_json::from_str::("3").is_err()); } + + // ── Codex round 2 findings ── + + /// `SampleFormat`'s `deserialize_any` path only works on self-describing + /// formats. Non-self-describing binary formats (bincode/postcard) need + /// an explicit tagged wire; the impl branches on `is_human_readable()` + /// and serializes through a 2-variant `BinaryWire` enum. This test + /// exercises the binary branch via postcard. + #[test] + fn sample_format_postcard_binary_roundtrip() { + use crate::audio::SampleFormat; + + fn binary_round_trip(v: &SampleFormat) -> SampleFormat { + let bytes = postcard::to_allocvec(v).expect("postcard serialize"); + postcard::from_bytes::(&bytes).expect("postcard deserialize") + } + + // Named — `Slug` arm of the wire. + assert_eq!(binary_round_trip(&SampleFormat::S16), SampleFormat::S16); + // `Other` slug — also `Slug` arm. + let other = SampleFormat::Other(smol_str::SmolStr::new("custom")); + assert_eq!(binary_round_trip(&other), other); + // `Unknown(v)` — `Code` arm; the u32 must survive losslessly. + for v in [12_345u32, 0xDEAD_BEEFu32, u32::MAX] { + let fmt = SampleFormat::Unknown(v); + let back = binary_round_trip(&fmt); + assert_eq!(back, fmt, "lost Unknown({v}) on postcard round-trip"); + } + } + + /// Default-backed metadata structs must accept sparse JSON — missing + /// fields default rather than failing — so older / partial records + /// remain readable as the schema evolves. `serde(default)` at the + /// container level routes missing fields through `Default`. + #[test] + fn sparse_json_uses_serde_default_on_default_backed_structs() { + use crate::{ + audio::{Loudness, Tags}, + capture::Device, + }; + + // Tags: only `title` present; the rest fall back to absent sentinels. + let t: Tags = serde_json::from_str(r#"{"title":"hello"}"#).unwrap(); + let expected = Tags::new().with_title(smol_str::SmolStr::new("hello")); + assert_eq!(t, expected); + + // Tags: completely empty object → fully-default value (no missing-field error). + let empty: Tags = serde_json::from_str("{}").unwrap(); + assert_eq!(empty, Tags::default()); + + // Device: only `make` present. + let d: Device = serde_json::from_str(r#"{"make":"Apple"}"#).unwrap(); + let expected = Device::new().with_make(smol_str::SmolStr::new("Apple")); + assert_eq!(d, expected); + + // Loudness: partial measurement. + let l: Loudness = serde_json::from_str(r#"{"integrated_lufs":-23.0}"#).unwrap(); + assert_eq!(l, Loudness::new(-23.0, 0.0, 0.0, 0.0)); + } } From 96fb48cab0782e4ec4b3dfd6499847f7429b5624 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Fri, 22 May 2026 12:46:17 +1200 Subject: [PATCH 5/8] docs(serde, codex round 3): split lossless vs strict closed-enum wire contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round 3 flagged docs/code mismatch: CHANGELOG + README + module doc all claimed "both round-trips total" and grouped `TrackOrigin` / `BitRateMode` with lossless `Unknown(u32)` enums — but round 2 routed those two through `serde_via_code_strict!`, which rejects unknown codes. A reader of the docs would expect forward-compat round-tripping; the impl actually fails on out-of-range input. Split the docs into three explicit buckets: 1. Open string enums (slug + `Other` escape). 2. Closed FFmpeg-coded with lossless `Unknown(u32)` arm. 3. Strictly-closed (`TrackOrigin`, `BitRateMode`) — reject unknown codes; `try_from_u32` backs the rejection. Module doc additionally calls out the `SampleFormat` bespoke (dual `Unknown(u32)` + `Other(SmolStr)`, human-readable vs binary branch). README + CHANGELOG bullet refactored to match. No code change (the rejection behavior is intentional per round 2). Test suite unchanged: 163 pass. --- CHANGELOG.md | 18 +++++++++++------ README.md | 12 +++++++---- mediaframe/src/serde_impls.rs | 38 +++++++++++++++++++++++++---------- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4c3a12..20471d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,13 +17,19 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). `audio::{ChannelLayout, SampleFormat, ContainerFormat}`) serialize as their canonical `as_str()` slug — `VideoCodec::H264` ⇄ `"h264"`, `Other("x265")` ⇄ `"x265"` (no `{"Other": …}` wrapper). - - **Closed** FFmpeg-coded enums (`color::{Matrix, Primaries, Transfer, - DynamicRange, ChromaLocation, DcpTargetGamut}`, - `pixel_format::PixelFormat`, `frame::{Rotation, FieldOrder, StereoMode}`, - `subtitle::TrackOrigin`, `audio::BitRateMode`) and + - **Closed FFmpeg-coded enums with a lossless `Unknown(u32)` escape** + (`color::{Matrix, Primaries, Transfer, DynamicRange, ChromaLocation, + DcpTargetGamut}`, `pixel_format::PixelFormat`, + `frame::{Rotation, FieldOrder, StereoMode}`) and `disposition::TrackDisposition` serialize as their `to_u32()` integer. - Both round-trips are total (unknown slug → `Other`, unknown code → - `Unknown`). + Round-trip is total — unknown codes deserialize to `Unknown(v)` / + unknown slugs to `Other`. + - **Strictly-closed coded enums (no `Unknown` arm)** — + `subtitle::TrackOrigin` (`Embedded`/`Sidecar`/`External`) and + `audio::BitRateMode` (`Cbr`/`Vbr`/`Abr`) — serialize as their `to_u32()` + integer but **reject unrecognised wire codes** as serde errors instead + of silently collapsing them to the default variant. Both expose a + `try_from_u32(v: u32) -> Option` method backing this behavior. - **Plain structs** (`color::Info` and its HDR/mastering sub-structs, `frame::{Dimensions, Rect, Rational, SampleAspectRatio, FrameRate}`, `audio::{Loudness, Tags, Device}`… ) derive serde directly. diff --git a/README.md b/README.md index 2ca5a79..3b3d741 100644 --- a/README.md +++ b/README.md @@ -77,10 +77,14 @@ can all speak to without agreeing on anything heavier. `--features buffa`. - **`serde`** — optional `serde::{Serialize, Deserialize}` for the whole descriptor vocabulary. Open codec / format enums serialize as their - `as_str()` slug, closed FFmpeg-coded enums as their `to_u32()` integer - (both round-trips total), `lang::Language` as its BCP-47 string; - validated structs (`GeoLocation` / `Fingerprint` / `CoverArt`) - deserialize through their checking constructors. Orthogonal to the + `as_str()` slug (unknown → `Other`); FFmpeg-coded enums with an + `Unknown(u32)` arm serialize as `to_u32()` and round-trip totally; + **strictly-closed coded enums** (`subtitle::TrackOrigin`, + `audio::BitRateMode`) also serialize as `to_u32()` but **reject** unknown + wire codes as serde errors rather than collapsing them to the default; + `lang::Language` as its BCP-47 string; validated structs (`GeoLocation` / + `Fingerprint` / `CoverArt`) deserialize through their checking + constructors. Orthogonal to the capability tiers (no-alloc Copy types included). Off by default — enable with `--features serde`. - **`PixelSink`** + **`SourceFormat`** sealed traits re-exported at diff --git a/mediaframe/src/serde_impls.rs b/mediaframe/src/serde_impls.rs index b948022..3ef90ae 100644 --- a/mediaframe/src/serde_impls.rs +++ b/mediaframe/src/serde_impls.rs @@ -8,18 +8,34 @@ //! - **Open** codec / format enums (those with an `Other(SmolStr)` escape //! arm and a total [`FromStr`](core::str::FromStr)) serialize as their //! canonical `as_str()` slug — e.g. `VideoCodec::H264` ⇄ `"h264"`, -//! `Other("x265")` ⇄ `"x265"` (no `{"Other": …}` wrapper). -//! - **Closed** FFmpeg-coded enums (those with `to_u32()` / `from_u32()` -//! and no `FromStr`) serialize as their `u32` code — e.g. -//! `color::Matrix::Bt709` ⇄ `1`. +//! `Other("x265")` ⇄ `"x265"` (no `{"Other": …}` wrapper). Round-trip +//! total: an unrecognised slug rides the `Other` arm. +//! - **Closed FFmpeg-coded enums with a lossless `Unknown(u32)` escape** +//! (color enums, pixel-format, frame coded enums, `TrackDisposition` +//! bitflags) serialize as their `u32` code — e.g. `color::Matrix::Bt709` +//! ⇄ `1`. Round-trip total: an unrecognised code rides the `Unknown` +//! arm. +//! - **Strictly-closed coded enums (no `Unknown` arm)** — +//! [`crate::subtitle::TrackOrigin`] and [`crate::audio::BitRateMode`] — +//! serialize as their `u32` code but **reject** unknown wire codes as +//! serde errors instead of silently collapsing them to the default +//! variant (which `from_u32` would do). This is intentional: a corrupt +//! or out-of-range value on the wire must fail loudly rather than +//! masquerade as `Embedded` / `Cbr`. The check is backed by each type's +//! `try_from_u32(v: u32) -> Option` method. +//! - **[`crate::audio::SampleFormat`]** — has BOTH `Unknown(u32)` and +//! `Other(SmolStr)` escape arms. Bespoke impl preserves both: on +//! self-describing formats (JSON/YAML/etc.) it emits a bare value +//! (number for `Unknown`, string for named / `Other`); on +//! non-self-describing binary formats (bincode/postcard) it routes +//! through an explicit `{Code(u32), Slug(Cow)}` tagged enum. //! -//! Both round-trips are total: an unrecognised slug rides the `Other` -//! arm, an unrecognised code the `Unknown` arm. The plain data structs -//! (`color::Info`, `frame::Dimensions`, `audio::Tags`, …) derive serde -//! at their definition site; the validated structs -//! (`capture::GeoLocation`, `audio::Fingerprint`, `audio::CoverArt`) -//! route deserialize through their checking constructors there too. -//! `lang::Language` carries a bespoke BCP-47 string impl in its module. +//! The plain data structs (`color::Info`, `frame::Dimensions`, +//! `audio::Tags`, …) derive serde at their definition site; the +//! validated structs (`capture::GeoLocation`, `audio::Fingerprint`, +//! `audio::CoverArt`) route deserialize through their checking +//! constructors there too. `lang::Language` carries a bespoke BCP-47 +//! string impl in its module. /// Implements `Serialize` / `Deserialize` for an *open* enum via its /// canonical string slug (`as_str()` to serialize, [`FromStr`] to parse). From 0653597a6f35efbce054b29457bf5822d870648d Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Fri, 22 May 2026 12:55:11 +1200 Subject: [PATCH 6/8] =?UTF-8?q?docs(serde,=20codex=20round=204):=20CHANGEL?= =?UTF-8?q?OG=20=E2=80=94=20dedicated=20SampleFormat=20bullet?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 4 caught the last docs/code mismatch: CHANGELOG's open-enum bullet still listed `audio::SampleFormat` alongside the slug-only enums, claiming it serializes as a canonical `as_str()` string. That's wrong — the bespoke impl emits `Unknown(v)` as a numeric code on human-readable formats and uses the `BinaryWire::{Code, Slug}` tagged enum on binary formats. Removed `SampleFormat` from the slug-only bullet; added a dedicated bullet documenting the dual-escape (`Unknown(u32)` + `Other(SmolStr)`) contract and the human-readable vs binary wire split — matching `serde_impls.rs` and the README. No code change. --- CHANGELOG.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20471d6..9ea6bcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,17 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). serde-`json` value matches their representation: - **Open** codec / format enums (`codec::{Video,Audio,Subtitle}Codec`, `container::Format`, `subtitle::Format`, - `audio::{ChannelLayout, SampleFormat, ContainerFormat}`) serialize as - their canonical `as_str()` slug — `VideoCodec::H264` ⇄ `"h264"`, - `Other("x265")` ⇄ `"x265"` (no `{"Other": …}` wrapper). + `audio::{ChannelLayout, ContainerFormat}`) serialize as their canonical + `as_str()` slug — `VideoCodec::H264` ⇄ `"h264"`, `Other("x265")` ⇄ + `"x265"` (no `{"Other": …}` wrapper). + - **`audio::SampleFormat`** — has BOTH an `Unknown(u32)` numeric escape + AND an `Other(SmolStr)` string escape, so it gets a bespoke impl rather + than the slug-only path. On **human-readable** formats (JSON / YAML / + …): named + `Other` values serialize as their `as_str()` string, + `Unknown(v)` as the bare numeric code `v`. On **non-human-readable** + binary formats (bincode / postcard / …): an explicit tagged + `{Code(u32), Slug(Cow)}` wire enum, since `deserialize_any` is + unavailable there. All three arms round-trip losslessly on both. - **Closed FFmpeg-coded enums with a lossless `Unknown(u32)` escape** (`color::{Matrix, Primaries, Transfer, DynamicRange, ChromaLocation, DcpTargetGamut}`, `pixel_format::PixelFormat`, From 8a7d1d149977951664963de9bed08621ee94c5cb Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Fri, 22 May 2026 13:02:40 +1200 Subject: [PATCH 7/8] docs(serde, codex round 5): per-type README serde table + drop bogus slug-fallback claim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two doc-contract nits from round 5: 1. README's `serde` bullet still gave broad "open enums → slug / coded enums → to_u32" rules that implicitly swept in `SampleFormat`. Replaced with an explicit per-type sub-list: open enums, Unknown-arm coded enums, strictly-closed coded enums, the `SampleFormat` bespoke, and `lang::Language` — each with its actual wire shape. 2. CHANGELOG's Unknown-arm coded-enum bullet claimed "unknown slugs → `Other`" — but those enums have no `Other` arm and accept only integers. Dropped the slug clause; the `Other`-fallback wording belongs only to the open string enums (where it already appears). No code change. --- CHANGELOG.md | 4 ++-- README.md | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ea6bcb..da1e80b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,8 +30,8 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). DcpTargetGamut}`, `pixel_format::PixelFormat`, `frame::{Rotation, FieldOrder, StereoMode}`) and `disposition::TrackDisposition` serialize as their `to_u32()` integer. - Round-trip is total — unknown codes deserialize to `Unknown(v)` / - unknown slugs to `Other`. + Round-trip is total: an unrecognised *code* deserializes to `Unknown(v)`. + These accept only integers — there is no slug form. - **Strictly-closed coded enums (no `Unknown` arm)** — `subtitle::TrackOrigin` (`Embedded`/`Sidecar`/`External`) and `audio::BitRateMode` (`Cbr`/`Vbr`/`Abr`) — serialize as their `to_u32()` diff --git a/README.md b/README.md index 3b3d741..a4d3b0e 100644 --- a/README.md +++ b/README.md @@ -76,17 +76,25 @@ can all speak to without agreeing on anything heavier. `.mediaframe.v1` → `::mediaframe`. Off by default — enable with `--features buffa`. - **`serde`** — optional `serde::{Serialize, Deserialize}` for the whole - descriptor vocabulary. Open codec / format enums serialize as their - `as_str()` slug (unknown → `Other`); FFmpeg-coded enums with an - `Unknown(u32)` arm serialize as `to_u32()` and round-trip totally; - **strictly-closed coded enums** (`subtitle::TrackOrigin`, - `audio::BitRateMode`) also serialize as `to_u32()` but **reject** unknown - wire codes as serde errors rather than collapsing them to the default; - `lang::Language` as its BCP-47 string; validated structs (`GeoLocation` / - `Fingerprint` / `CoverArt`) deserialize through their checking - constructors. Orthogonal to the - capability tiers (no-alloc Copy types included). Off by default — - enable with `--features serde`. + descriptor vocabulary. Wire shape by type: + - Open codec / format enums (`codec::*`, `container::Format`, + `subtitle::Format`, `audio::{ChannelLayout, ContainerFormat}`) — the + `as_str()` slug, unknown slugs ride `Other`. + - FFmpeg-coded enums with an `Unknown(u32)` arm (colour, pixel-format, + frame coded enums, `TrackDisposition`) — the `to_u32()` integer; + unknown *codes* round-trip via `Unknown` (no slug form). + - Strictly-closed coded enums (`subtitle::TrackOrigin`, + `audio::BitRateMode`) — the `to_u32()` integer, but unknown codes are + **rejected** as serde errors rather than collapsing to the default. + - `audio::SampleFormat` (both `Unknown(u32)` and `Other(SmolStr)`) — + bespoke: human-readable formats emit a string for named/`Other` and a + number for `Unknown`; binary formats use a tagged `{Code, Slug}` wire. + - `lang::Language` — its BCP-47 string. Validated structs (`GeoLocation` + / `Fingerprint` / `CoverArt`) deserialize through their checking + constructors. + + Orthogonal to the capability tiers (no-alloc Copy types included). Off + by default — enable with `--features serde`. - **`PixelSink`** + **`SourceFormat`** sealed traits re-exported at the crate root. From f6fa95a9c5db525d4b6f46dcb8164d6bc64a8e79 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Fri, 22 May 2026 13:19:17 +1200 Subject: [PATCH 8/8] fix(serde): `#[allow(unused_macros)]` on `serde_via_code_strict!` for the no-alloc tier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI failed `--no-default-features --features serde` on test (ubuntu) + build/clippy (macos) with `error: unused macro definition: serde_via_code_strict`. `serde_via_code_strict!`'s only two invocations — `subtitle::TrackOrigin` and `audio::BitRateMode` — are both heap-tier, gated on `any(feature = "std", feature = "alloc")`. Under bare `--features serde` (the no-alloc capability tier) both are cfg'd out, leaving the macro defined-but-unused → `unused_macros` → `-D warnings` error. Added `#[allow(unused_macros)]`, exactly as the sibling `serde_via_str!` macro already carries for the same reason (its codec/format invocations are also heap-tier-only). --- mediaframe/src/serde_impls.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mediaframe/src/serde_impls.rs b/mediaframe/src/serde_impls.rs index 3ef90ae..6907790 100644 --- a/mediaframe/src/serde_impls.rs +++ b/mediaframe/src/serde_impls.rs @@ -104,6 +104,11 @@ macro_rules! serde_via_code { /// `to_u32()` / `try_from_u32()` pair. Adversarial / corrupt codes outside /// the enumerated set are rejected as serde errors instead of silently /// canonicalising to the default variant (which `from_u32` would do). +// Both invocations (`TrackOrigin` / `BitRateMode`) are heap-tier — gated on +// `any(feature = "std", feature = "alloc")`. Under bare `--features serde` +// (no-alloc tier) they are cfg'd out and the macro is unused; the `allow` +// silences the resulting `unused_macros` lint, exactly as for `serde_via_str!`. +#[allow(unused_macros)] macro_rules! serde_via_code_strict { ($t:path) => { impl serde::Serialize for $t {