xso-proc: add support for text codecs

Text codecs allow to customize the conversion of data from/to XML,
in particular in two scenarios:

1. When the type for which the behaviour is to be defined comes from a
   foreign crate, preventing the implementation of
   FromXmlText/IntoXmlText.

2. When there is not one obvious, or more than one sensible, way to
   convert a value to XML text and back.
This commit is contained in:
Jonas Schäfer 2024-06-26 18:26:13 +02:00
parent 46584f05f9
commit c83ff286e0
8 changed files with 258 additions and 26 deletions

View file

@ -464,3 +464,30 @@ fn fails_text_without_text_consumer_positive() {
other => panic!("unexpected result: {:?}", other), other => panic!("unexpected result: {:?}", other),
} }
} }
#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
#[xml(namespace = NS1, name = "text")]
struct TextWithCodec {
#[xml(text(codec = xso::text::EmptyAsNone))]
text: std::option::Option<String>,
}
#[test]
fn text_with_codec_roundtrip_empty() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<TextWithCodec>("<text xmlns='urn:example:ns1'/>");
}
#[test]
fn text_with_codec_roundtrip_non_empty() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<TextWithCodec>("<text xmlns='urn:example:ns1'>hello</text>");
}

View file

@ -280,9 +280,9 @@ impl Compound {
State::new(state_name) State::new(state_name)
.with_field(&bound_name, field.ty()) .with_field(&bound_name, field.ty())
.with_impl(quote! { .with_impl(quote! {
::core::option::Option::Some(::xso::exports::rxml::Event::Text( #generator.map(|value| ::xso::exports::rxml::Event::Text(
::xso::exports::rxml::parser::EventMetrics::zero(), ::xso::exports::rxml::parser::EventMetrics::zero(),
#generator, value,
)) ))
}), }),
); );

View file

@ -17,6 +17,7 @@ use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta};
use crate::scope::{FromEventsScope, IntoEventsScope}; use crate::scope::{FromEventsScope, IntoEventsScope};
use crate::types::{ use crate::types::{
default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty, default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty,
text_codec_decode_fn, text_codec_encode_fn,
}; };
/// Code slices necessary for declaring and initializing a temporary variable /// Code slices necessary for declaring and initializing a temporary variable
@ -98,7 +99,10 @@ enum FieldKind {
}, },
/// The field maps to the character data of the element. /// The field maps to the character data of the element.
Text, Text {
/// Optional codec to use
codec: Option<Type>,
},
} }
impl FieldKind { impl FieldKind {
@ -143,7 +147,7 @@ impl FieldKind {
}) })
} }
XmlFieldMeta::Text => Ok(Self::Text), XmlFieldMeta::Text { codec } => Ok(Self::Text { codec }),
} }
} }
} }
@ -257,10 +261,21 @@ impl FieldDef {
}) })
} }
FieldKind::Text => { FieldKind::Text { ref codec } => {
let FromEventsScope { ref text, .. } = scope; let FromEventsScope { ref text, .. } = scope;
let field_access = scope.access_field(&self.member); let field_access = scope.access_field(&self.member);
let finalize = match codec {
Some(codec_ty) => {
let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone());
quote! {
#decode(#field_access)?
}
}
None => {
let from_xml_text = from_xml_text_fn(self.ty.clone()); let from_xml_text = from_xml_text_fn(self.ty.clone());
quote! { #from_xml_text(#field_access)? }
}
};
Ok(FieldBuilderPart::Text { Ok(FieldBuilderPart::Text {
value: FieldTempInit { value: FieldTempInit {
@ -270,9 +285,7 @@ impl FieldDef {
collect: quote! { collect: quote! {
#field_access.push_str(#text.as_str()); #field_access.push_str(#text.as_str());
}, },
finalize: quote! { finalize,
#from_xml_text(#field_access)?
},
}) })
} }
} }
@ -318,14 +331,19 @@ impl FieldDef {
}) })
} }
FieldKind::Text => { FieldKind::Text { ref codec } => {
let generator = match codec {
Some(codec_ty) => {
let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone());
quote! { #encode(#bound_name)? }
}
None => {
let into_xml_text = into_xml_text_fn(self.ty.clone()); let into_xml_text = into_xml_text_fn(self.ty.clone());
quote! { ::core::option::Option::Some(#into_xml_text(#bound_name)?) }
}
};
Ok(FieldIteratorPart::Text { Ok(FieldIteratorPart::Text { generator })
generator: quote! {
#into_xml_text(#bound_name)?
},
})
} }
} }
} }

View file

@ -312,7 +312,10 @@ pub(crate) enum XmlFieldMeta {
}, },
/// `#[xml(text)]` /// `#[xml(text)]`
Text, Text {
/// The path to the optional codec type.
codec: Option<Type>,
},
} }
impl XmlFieldMeta { impl XmlFieldMeta {
@ -393,8 +396,28 @@ impl XmlFieldMeta {
} }
/// Parse a `#[xml(text)]` meta. /// Parse a `#[xml(text)]` meta.
fn text_from_meta(_: ParseNestedMeta<'_>) -> Result<Self> { fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
Ok(Self::Text) let mut codec: Option<Type> = None;
if meta.input.peek(Token![=]) {
Ok(Self::Text {
codec: Some(meta.value()?.parse()?),
})
} else if meta.input.peek(syn::token::Paren) {
meta.parse_nested_meta(|meta| {
if meta.path.is_ident("codec") {
if codec.is_some() {
return Err(Error::new_spanned(meta.path, "duplicate `codec` key"));
}
codec = Some(meta.value()?.parse()?);
Ok(())
} else {
Err(Error::new_spanned(meta.path, "unsupported key"))
}
})?;
Ok(Self::Text { codec })
} else {
Ok(Self::Text { codec: None })
}
} }
/// Parse [`Self`] from a nestd meta, switching on the identifier /// Parse [`Self`] from a nestd meta, switching on the identifier

View file

@ -220,3 +220,76 @@ pub(crate) fn into_xml_text_fn(ty: Type) -> Expr {
}, },
}) })
} }
/// Construct a [`syn::TypePath`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the
/// [`syn::Span`] of the `codec_ty` alongside it.
fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) {
let span = codec_ty.span();
(
span,
TypePath {
qself: Some(QSelf {
lt_token: syn::token::Lt { spans: [span] },
ty: Box::new(codec_ty),
position: 2,
as_token: Some(syn::token::As { span }),
gt_token: syn::token::Gt { spans: [span] },
}),
path: Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
segments: [
PathSegment {
ident: Ident::new("xso", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("TextCodec", span),
arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
colon2_token: Some(syn::token::PathSep {
spans: [span, span],
}),
lt_token: syn::token::Lt { spans: [span] },
args: [GenericArgument::Type(for_ty)].into_iter().collect(),
gt_token: syn::token::Gt { spans: [span] },
}),
},
]
.into_iter()
.collect(),
},
},
)
}
/// Construct a [`syn::Expr`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`.
pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr {
let (span, mut ty) = text_codec_of(codec_ty, for_ty);
ty.path.segments.push(PathSegment {
ident: Ident::new("encode", span),
arguments: PathArguments::None,
});
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: ty.qself,
path: ty.path,
})
}
/// Construct a [`syn::Expr`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`.
pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr {
let (span, mut ty) = text_codec_of(codec_ty, for_ty);
ty.path.segments.push(PathSegment {
ident: Ident::new("decode", span),
arguments: PathArguments::None,
});
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: ty.qself,
path: ty.path,
})
}

View file

@ -34,6 +34,7 @@ such:
- *path*: A Rust path, like `some_crate::foo::Bar`. Note that `foo` on its own - *path*: A Rust path, like `some_crate::foo::Bar`. Note that `foo` on its own
is also a path. is also a path.
- *string literal*: A string literal, like `"hello world!"`. - *string literal*: A string literal, like `"hello world!"`.
- *type*: A Rust type.
- flag: Has no value. The key's mere presence has relevance and it must not be - flag: Has no value. The key's mere presence has relevance and it must not be
followed by a `=` sign. followed by a `=` sign.
@ -137,14 +138,27 @@ assert_eq!(foo, Foo {
#### `text` meta #### `text` meta
The `text` meta causes the field to be mapped to the text content of the The `text` meta causes the field to be mapped to the text content of the
element. For `FromXml`, the field's type must implement [`FromXmlText`] and element.
for `IntoXml`, the field's type must implement [`IntoXmlText`].
The `text` meta supports no options or value. Only a single field per struct | Key | Value type | Description |
may be annotated with `#[xml(text)]` at a time, to avoid parsing ambiguities. | --- | --- | --- |
This is also true if only `IntoXml` is derived on a field, for consistency. | `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
##### Example If `codec` is given, the given `codec` must implement
[`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
If `codec` is *not* given, the field's type must implement [`FromXmlText`] for
`FromXml` and for `IntoXml`, the field's type must implement [`IntoXmlText`].
The `text` meta also supports a shorthand syntax, `#[xml(text = ..)]`, where
the value is treated as the value for the `codec` key (with optional prefix as
described above, and unnamespaced otherwise).
Only a single field per struct may be annotated with `#[xml(text)]` at a time,
to avoid parsing ambiguities. This is also true if only `IntoXml` is derived on
a field, for consistency.
##### Example without codec
```rust ```rust
# use xso::FromXml; # use xso::FromXml;
@ -160,3 +174,20 @@ assert_eq!(foo, Foo {
a: "hello".to_string(), a: "hello".to_string(),
}); });
``` ```
##### Example with codec
```rust
# use xso::FromXml;
#[derive(FromXml, Debug, PartialEq)]
#[xml(namespace = "urn:example", name = "foo")]
struct Foo {
#[xml(text = xso::text::EmptyAsNone)]
a: Option<String>,
};
let foo: Foo = xso::from_bytes(b"<foo xmlns='urn:example'/>").unwrap();
assert_eq!(foo, Foo {
a: None,
});
```

View file

@ -24,7 +24,7 @@ pub mod error;
#[cfg(feature = "minidom")] #[cfg(feature = "minidom")]
#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))] #[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
pub mod minidom_compat; pub mod minidom_compat;
mod text; pub mod text;
#[doc(hidden)] #[doc(hidden)]
pub mod exports { pub mod exports {
@ -35,6 +35,9 @@ pub mod exports {
use std::borrow::Cow; use std::borrow::Cow;
#[doc(inline)]
pub use text::TextCodec;
#[doc = include_str!("from_xml_doc.md")] #[doc = include_str!("from_xml_doc.md")]
#[doc(inline)] #[doc(inline)]
#[cfg(feature = "macros")] #[cfg(feature = "macros")]

View file

@ -103,3 +103,60 @@ convert_via_fromstr_and_display! {
#[cfg(feature = "jid")] #[cfg(feature = "jid")]
jid::BareJid, jid::BareJid,
} }
/// Represent a way to encode/decode text data into a Rust type.
///
/// This trait can be used in scenarios where implementing [`FromXmlText`]
/// and/or [`IntoXmlText`] on a type is not feasible or sensible, such as the
/// following:
///
/// 1. The type originates in a foreign crate, preventing the implementation
/// of foreign traits.
///
/// 2. There is more than one way to convert a value to/from XML.
///
/// The codec to use for a text can be specified in the attributes understood
/// by `FromXml` and `IntoXml` derive macros. See the documentation of the
/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
pub trait TextCodec<T> {
/// Decode a string value into the type.
fn decode(s: String) -> Result<T, Error>;
/// Encode the type as string value.
///
/// If this returns `None`, the string value is not emitted at all.
fn encode(value: T) -> Result<Option<String>, Error>;
}
/// Text codec which does no transform.
pub struct Plain;
impl TextCodec<String> for Plain {
fn decode(s: String) -> Result<String, Error> {
Ok(s)
}
fn encode(value: String) -> Result<Option<String>, Error> {
Ok(Some(value))
}
}
/// Text codec which returns None instead of the empty string.
pub struct EmptyAsNone;
impl TextCodec<Option<String>> for EmptyAsNone {
fn decode(s: String) -> Result<Option<String>, Error> {
if s.len() == 0 {
Ok(None)
} else {
Ok(Some(s))
}
}
fn encode(value: Option<String>) -> Result<Option<String>, Error> {
Ok(match value {
Some(v) if v.len() > 0 => Some(v),
Some(_) | None => None,
})
}
}