From c83ff286e04c9c32165b00372e8befa0dcfa8687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Wed, 26 Jun 2024 18:26:13 +0200 Subject: [PATCH] xso-proc: add support for text codecs Text codecs allow to customize the conversion of data from/to XML, in particular in two scenarios: 1. When the type for which the behaviour is to be defined comes from a foreign crate, preventing the implementation of FromXmlText/IntoXmlText. 2. When there is not one obvious, or more than one sensible, way to convert a value to XML text and back. --- parsers/src/util/macro_tests.rs | 27 ++++++++++++ xso-proc/src/compound.rs | 4 +- xso-proc/src/field.rs | 46 ++++++++++++++------- xso-proc/src/meta.rs | 29 +++++++++++-- xso-proc/src/types.rs | 73 +++++++++++++++++++++++++++++++++ xso/src/from_xml_doc.md | 43 ++++++++++++++++--- xso/src/lib.rs | 5 ++- xso/src/text.rs | 57 +++++++++++++++++++++++++ 8 files changed, 258 insertions(+), 26 deletions(-) diff --git a/parsers/src/util/macro_tests.rs b/parsers/src/util/macro_tests.rs index ca480d6..c7b5f9e 100644 --- a/parsers/src/util/macro_tests.rs +++ b/parsers/src/util/macro_tests.rs @@ -464,3 +464,30 @@ fn fails_text_without_text_consumer_positive() { other => panic!("unexpected result: {:?}", other), } } + +#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)] +#[xml(namespace = NS1, name = "text")] +struct TextWithCodec { + #[xml(text(codec = xso::text::EmptyAsNone))] + text: std::option::Option, +} + +#[test] +fn text_with_codec_roundtrip_empty() { + #[allow(unused_imports)] + use std::{ + option::Option::{None, Some}, + result::Result::{Err, Ok}, + }; + roundtrip_full::(""); +} + +#[test] +fn text_with_codec_roundtrip_non_empty() { + #[allow(unused_imports)] + use std::{ + option::Option::{None, Some}, + result::Result::{Err, Ok}, + }; + roundtrip_full::("hello"); +} diff --git a/xso-proc/src/compound.rs b/xso-proc/src/compound.rs index 55f41da..128aa8c 100644 --- a/xso-proc/src/compound.rs +++ b/xso-proc/src/compound.rs @@ -280,9 +280,9 @@ impl Compound { State::new(state_name) .with_field(&bound_name, field.ty()) .with_impl(quote! { - ::core::option::Option::Some(::xso::exports::rxml::Event::Text( + #generator.map(|value| ::xso::exports::rxml::Event::Text( ::xso::exports::rxml::parser::EventMetrics::zero(), - #generator, + value, )) }), ); diff --git a/xso-proc/src/field.rs b/xso-proc/src/field.rs index 410abf7..e502613 100644 --- a/xso-proc/src/field.rs +++ b/xso-proc/src/field.rs @@ -17,6 +17,7 @@ use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta}; use crate::scope::{FromEventsScope, IntoEventsScope}; use crate::types::{ default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty, + text_codec_decode_fn, text_codec_encode_fn, }; /// Code slices necessary for declaring and initializing a temporary variable @@ -98,7 +99,10 @@ enum FieldKind { }, /// The field maps to the character data of the element. - Text, + Text { + /// Optional codec to use + codec: Option, + }, } impl FieldKind { @@ -143,7 +147,7 @@ impl FieldKind { }) } - XmlFieldMeta::Text => Ok(Self::Text), + XmlFieldMeta::Text { codec } => Ok(Self::Text { codec }), } } } @@ -257,10 +261,21 @@ impl FieldDef { }) } - FieldKind::Text => { + FieldKind::Text { ref codec } => { let FromEventsScope { ref text, .. } = scope; let field_access = scope.access_field(&self.member); - let from_xml_text = from_xml_text_fn(self.ty.clone()); + let finalize = match codec { + Some(codec_ty) => { + let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone()); + quote! { + #decode(#field_access)? + } + } + None => { + let from_xml_text = from_xml_text_fn(self.ty.clone()); + quote! { #from_xml_text(#field_access)? } + } + }; Ok(FieldBuilderPart::Text { value: FieldTempInit { @@ -270,9 +285,7 @@ impl FieldDef { collect: quote! { #field_access.push_str(#text.as_str()); }, - finalize: quote! { - #from_xml_text(#field_access)? - }, + finalize, }) } } @@ -318,14 +331,19 @@ impl FieldDef { }) } - FieldKind::Text => { - let into_xml_text = into_xml_text_fn(self.ty.clone()); + FieldKind::Text { ref codec } => { + let generator = match codec { + Some(codec_ty) => { + let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone()); + quote! { #encode(#bound_name)? } + } + None => { + let into_xml_text = into_xml_text_fn(self.ty.clone()); + quote! { ::core::option::Option::Some(#into_xml_text(#bound_name)?) } + } + }; - Ok(FieldIteratorPart::Text { - generator: quote! { - #into_xml_text(#bound_name)? - }, - }) + Ok(FieldIteratorPart::Text { generator }) } } } diff --git a/xso-proc/src/meta.rs b/xso-proc/src/meta.rs index 51829d8..2a88020 100644 --- a/xso-proc/src/meta.rs +++ b/xso-proc/src/meta.rs @@ -312,7 +312,10 @@ pub(crate) enum XmlFieldMeta { }, /// `#[xml(text)]` - Text, + Text { + /// The path to the optional codec type. + codec: Option, + }, } impl XmlFieldMeta { @@ -393,8 +396,28 @@ impl XmlFieldMeta { } /// Parse a `#[xml(text)]` meta. - fn text_from_meta(_: ParseNestedMeta<'_>) -> Result { - Ok(Self::Text) + fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result { + let mut codec: Option = None; + if meta.input.peek(Token![=]) { + Ok(Self::Text { + codec: Some(meta.value()?.parse()?), + }) + } else if meta.input.peek(syn::token::Paren) { + meta.parse_nested_meta(|meta| { + if meta.path.is_ident("codec") { + if codec.is_some() { + return Err(Error::new_spanned(meta.path, "duplicate `codec` key")); + } + codec = Some(meta.value()?.parse()?); + Ok(()) + } else { + Err(Error::new_spanned(meta.path, "unsupported key")) + } + })?; + Ok(Self::Text { codec }) + } else { + Ok(Self::Text { codec: None }) + } } /// Parse [`Self`] from a nestd meta, switching on the identifier diff --git a/xso-proc/src/types.rs b/xso-proc/src/types.rs index 8cc3f25..db43802 100644 --- a/xso-proc/src/types.rs +++ b/xso-proc/src/types.rs @@ -220,3 +220,76 @@ pub(crate) fn into_xml_text_fn(ty: Type) -> Expr { }, }) } + +/// Construct a [`syn::TypePath`] referring to +/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the +/// [`syn::Span`] of the `codec_ty` alongside it. +fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) { + let span = codec_ty.span(); + ( + span, + TypePath { + qself: Some(QSelf { + lt_token: syn::token::Lt { spans: [span] }, + ty: Box::new(codec_ty), + position: 2, + as_token: Some(syn::token::As { span }), + gt_token: syn::token::Gt { spans: [span] }, + }), + path: Path { + leading_colon: Some(syn::token::PathSep { + spans: [span, span], + }), + segments: [ + PathSegment { + ident: Ident::new("xso", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("TextCodec", span), + arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments { + colon2_token: Some(syn::token::PathSep { + spans: [span, span], + }), + lt_token: syn::token::Lt { spans: [span] }, + args: [GenericArgument::Type(for_ty)].into_iter().collect(), + gt_token: syn::token::Gt { spans: [span] }, + }), + }, + ] + .into_iter() + .collect(), + }, + }, + ) +} + +/// Construct a [`syn::Expr`] referring to +/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`. +pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr { + let (span, mut ty) = text_codec_of(codec_ty, for_ty); + ty.path.segments.push(PathSegment { + ident: Ident::new("encode", span), + arguments: PathArguments::None, + }); + Expr::Path(ExprPath { + attrs: Vec::new(), + qself: ty.qself, + path: ty.path, + }) +} + +/// Construct a [`syn::Expr`] referring to +/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`. +pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr { + let (span, mut ty) = text_codec_of(codec_ty, for_ty); + ty.path.segments.push(PathSegment { + ident: Ident::new("decode", span), + arguments: PathArguments::None, + }); + Expr::Path(ExprPath { + attrs: Vec::new(), + qself: ty.qself, + path: ty.path, + }) +} diff --git a/xso/src/from_xml_doc.md b/xso/src/from_xml_doc.md index 55e4d11..90073ba 100644 --- a/xso/src/from_xml_doc.md +++ b/xso/src/from_xml_doc.md @@ -34,6 +34,7 @@ such: - *path*: A Rust path, like `some_crate::foo::Bar`. Note that `foo` on its own is also a path. - *string literal*: A string literal, like `"hello world!"`. +- *type*: A Rust type. - flag: Has no value. The key's mere presence has relevance and it must not be followed by a `=` sign. @@ -137,14 +138,27 @@ assert_eq!(foo, Foo { #### `text` meta The `text` meta causes the field to be mapped to the text content of the -element. For `FromXml`, the field's type must implement [`FromXmlText`] and -for `IntoXml`, the field's type must implement [`IntoXmlText`]. +element. -The `text` meta supports no options or value. Only a single field per struct -may be annotated with `#[xml(text)]` at a time, to avoid parsing ambiguities. -This is also true if only `IntoXml` is derived on a field, for consistency. +| Key | Value type | Description | +| --- | --- | --- | +| `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. | -##### Example +If `codec` is given, the given `codec` must implement +[`TextCodec`][`TextCodec`] where `T` is the type of the field. + +If `codec` is *not* given, the field's type must implement [`FromXmlText`] for +`FromXml` and for `IntoXml`, the field's type must implement [`IntoXmlText`]. + +The `text` meta also supports a shorthand syntax, `#[xml(text = ..)]`, where +the value is treated as the value for the `codec` key (with optional prefix as +described above, and unnamespaced otherwise). + +Only a single field per struct may be annotated with `#[xml(text)]` at a time, +to avoid parsing ambiguities. This is also true if only `IntoXml` is derived on +a field, for consistency. + +##### Example without codec ```rust # use xso::FromXml; @@ -160,3 +174,20 @@ assert_eq!(foo, Foo { a: "hello".to_string(), }); ``` + +##### Example with codec + +```rust +# use xso::FromXml; +#[derive(FromXml, Debug, PartialEq)] +#[xml(namespace = "urn:example", name = "foo")] +struct Foo { + #[xml(text = xso::text::EmptyAsNone)] + a: Option, +}; + +let foo: Foo = xso::from_bytes(b"").unwrap(); +assert_eq!(foo, Foo { + a: None, +}); +``` diff --git a/xso/src/lib.rs b/xso/src/lib.rs index 621c436..1326338 100644 --- a/xso/src/lib.rs +++ b/xso/src/lib.rs @@ -24,7 +24,7 @@ pub mod error; #[cfg(feature = "minidom")] #[cfg_attr(docsrs, doc(cfg(feature = "minidom")))] pub mod minidom_compat; -mod text; +pub mod text; #[doc(hidden)] pub mod exports { @@ -35,6 +35,9 @@ pub mod exports { use std::borrow::Cow; +#[doc(inline)] +pub use text::TextCodec; + #[doc = include_str!("from_xml_doc.md")] #[doc(inline)] #[cfg(feature = "macros")] diff --git a/xso/src/text.rs b/xso/src/text.rs index bdacd15..167502a 100644 --- a/xso/src/text.rs +++ b/xso/src/text.rs @@ -103,3 +103,60 @@ convert_via_fromstr_and_display! { #[cfg(feature = "jid")] jid::BareJid, } + +/// Represent a way to encode/decode text data into a Rust type. +/// +/// This trait can be used in scenarios where implementing [`FromXmlText`] +/// and/or [`IntoXmlText`] on a type is not feasible or sensible, such as the +/// following: +/// +/// 1. The type originates in a foreign crate, preventing the implementation +/// of foreign traits. +/// +/// 2. There is more than one way to convert a value to/from XML. +/// +/// The codec to use for a text can be specified in the attributes understood +/// by `FromXml` and `IntoXml` derive macros. See the documentation of the +/// [`FromXml`][`macro@crate::FromXml`] derive macro for details. +pub trait TextCodec { + /// Decode a string value into the type. + fn decode(s: String) -> Result; + + /// Encode the type as string value. + /// + /// If this returns `None`, the string value is not emitted at all. + fn encode(value: T) -> Result, Error>; +} + +/// Text codec which does no transform. +pub struct Plain; + +impl TextCodec for Plain { + fn decode(s: String) -> Result { + Ok(s) + } + + fn encode(value: String) -> Result, Error> { + Ok(Some(value)) + } +} + +/// Text codec which returns None instead of the empty string. +pub struct EmptyAsNone; + +impl TextCodec> for EmptyAsNone { + fn decode(s: String) -> Result, Error> { + if s.len() == 0 { + Ok(None) + } else { + Ok(Some(s)) + } + } + + fn encode(value: Option) -> Result, Error> { + Ok(match value { + Some(v) if v.len() > 0 => Some(v), + Some(_) | None => None, + }) + } +}