xso-proc: add support for text codecs

Text codecs allow to customize the conversion of data from/to XML,
in particular in two scenarios:

1. When the type for which the behaviour is to be defined comes from a
   foreign crate, preventing the implementation of
   FromXmlText/IntoXmlText.

2. When there is not one obvious, or more than one sensible, way to
   convert a value to XML text and back.
This commit is contained in:
Jonas Schäfer 2024-06-26 18:26:13 +02:00
parent 46584f05f9
commit c83ff286e0
8 changed files with 258 additions and 26 deletions

View file

@ -464,3 +464,30 @@ fn fails_text_without_text_consumer_positive() {
other => panic!("unexpected result: {:?}", other),
}
}
#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
#[xml(namespace = NS1, name = "text")]
struct TextWithCodec {
#[xml(text(codec = xso::text::EmptyAsNone))]
text: std::option::Option<String>,
}
#[test]
fn text_with_codec_roundtrip_empty() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<TextWithCodec>("<text xmlns='urn:example:ns1'/>");
}
#[test]
fn text_with_codec_roundtrip_non_empty() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<TextWithCodec>("<text xmlns='urn:example:ns1'>hello</text>");
}

View file

@ -280,9 +280,9 @@ impl Compound {
State::new(state_name)
.with_field(&bound_name, field.ty())
.with_impl(quote! {
::core::option::Option::Some(::xso::exports::rxml::Event::Text(
#generator.map(|value| ::xso::exports::rxml::Event::Text(
::xso::exports::rxml::parser::EventMetrics::zero(),
#generator,
value,
))
}),
);

View file

@ -17,6 +17,7 @@ use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta};
use crate::scope::{FromEventsScope, IntoEventsScope};
use crate::types::{
default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty,
text_codec_decode_fn, text_codec_encode_fn,
};
/// Code slices necessary for declaring and initializing a temporary variable
@ -98,7 +99,10 @@ enum FieldKind {
},
/// The field maps to the character data of the element.
Text,
Text {
/// Optional codec to use
codec: Option<Type>,
},
}
impl FieldKind {
@ -143,7 +147,7 @@ impl FieldKind {
})
}
XmlFieldMeta::Text => Ok(Self::Text),
XmlFieldMeta::Text { codec } => Ok(Self::Text { codec }),
}
}
}
@ -257,10 +261,21 @@ impl FieldDef {
})
}
FieldKind::Text => {
FieldKind::Text { ref codec } => {
let FromEventsScope { ref text, .. } = scope;
let field_access = scope.access_field(&self.member);
let from_xml_text = from_xml_text_fn(self.ty.clone());
let finalize = match codec {
Some(codec_ty) => {
let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone());
quote! {
#decode(#field_access)?
}
}
None => {
let from_xml_text = from_xml_text_fn(self.ty.clone());
quote! { #from_xml_text(#field_access)? }
}
};
Ok(FieldBuilderPart::Text {
value: FieldTempInit {
@ -270,9 +285,7 @@ impl FieldDef {
collect: quote! {
#field_access.push_str(#text.as_str());
},
finalize: quote! {
#from_xml_text(#field_access)?
},
finalize,
})
}
}
@ -318,14 +331,19 @@ impl FieldDef {
})
}
FieldKind::Text => {
let into_xml_text = into_xml_text_fn(self.ty.clone());
FieldKind::Text { ref codec } => {
let generator = match codec {
Some(codec_ty) => {
let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone());
quote! { #encode(#bound_name)? }
}
None => {
let into_xml_text = into_xml_text_fn(self.ty.clone());
quote! { ::core::option::Option::Some(#into_xml_text(#bound_name)?) }
}
};
Ok(FieldIteratorPart::Text {
generator: quote! {
#into_xml_text(#bound_name)?
},
})
Ok(FieldIteratorPart::Text { generator })
}
}
}

View file

@ -312,7 +312,10 @@ pub(crate) enum XmlFieldMeta {
},
/// `#[xml(text)]`
Text,
Text {
/// The path to the optional codec type.
codec: Option<Type>,
},
}
impl XmlFieldMeta {
@ -393,8 +396,28 @@ impl XmlFieldMeta {
}
/// Parse a `#[xml(text)]` meta.
fn text_from_meta(_: ParseNestedMeta<'_>) -> Result<Self> {
Ok(Self::Text)
fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
let mut codec: Option<Type> = None;
if meta.input.peek(Token![=]) {
Ok(Self::Text {
codec: Some(meta.value()?.parse()?),
})
} else if meta.input.peek(syn::token::Paren) {
meta.parse_nested_meta(|meta| {
if meta.path.is_ident("codec") {
if codec.is_some() {
return Err(Error::new_spanned(meta.path, "duplicate `codec` key"));
}
codec = Some(meta.value()?.parse()?);
Ok(())
} else {
Err(Error::new_spanned(meta.path, "unsupported key"))
}
})?;
Ok(Self::Text { codec })
} else {
Ok(Self::Text { codec: None })
}
}
/// Parse [`Self`] from a nestd meta, switching on the identifier

View file

@ -220,3 +220,76 @@ pub(crate) fn into_xml_text_fn(ty: Type) -> Expr {
},
})
}
/// Construct a [`syn::TypePath`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the
/// [`syn::Span`] of the `codec_ty` alongside it.
fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) {
let span = codec_ty.span();
(
span,
TypePath {
qself: Some(QSelf {
lt_token: syn::token::Lt { spans: [span] },
ty: Box::new(codec_ty),
position: 2,
as_token: Some(syn::token::As { span }),
gt_token: syn::token::Gt { spans: [span] },
}),
path: Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
segments: [
PathSegment {
ident: Ident::new("xso", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("TextCodec", span),
arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
colon2_token: Some(syn::token::PathSep {
spans: [span, span],
}),
lt_token: syn::token::Lt { spans: [span] },
args: [GenericArgument::Type(for_ty)].into_iter().collect(),
gt_token: syn::token::Gt { spans: [span] },
}),
},
]
.into_iter()
.collect(),
},
},
)
}
/// Construct a [`syn::Expr`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`.
pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr {
let (span, mut ty) = text_codec_of(codec_ty, for_ty);
ty.path.segments.push(PathSegment {
ident: Ident::new("encode", span),
arguments: PathArguments::None,
});
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: ty.qself,
path: ty.path,
})
}
/// Construct a [`syn::Expr`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`.
pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr {
let (span, mut ty) = text_codec_of(codec_ty, for_ty);
ty.path.segments.push(PathSegment {
ident: Ident::new("decode", span),
arguments: PathArguments::None,
});
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: ty.qself,
path: ty.path,
})
}

View file

@ -34,6 +34,7 @@ such:
- *path*: A Rust path, like `some_crate::foo::Bar`. Note that `foo` on its own
is also a path.
- *string literal*: A string literal, like `"hello world!"`.
- *type*: A Rust type.
- flag: Has no value. The key's mere presence has relevance and it must not be
followed by a `=` sign.
@ -137,14 +138,27 @@ assert_eq!(foo, Foo {
#### `text` meta
The `text` meta causes the field to be mapped to the text content of the
element. For `FromXml`, the field's type must implement [`FromXmlText`] and
for `IntoXml`, the field's type must implement [`IntoXmlText`].
element.
The `text` meta supports no options or value. Only a single field per struct
may be annotated with `#[xml(text)]` at a time, to avoid parsing ambiguities.
This is also true if only `IntoXml` is derived on a field, for consistency.
| Key | Value type | Description |
| --- | --- | --- |
| `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
##### Example
If `codec` is given, the given `codec` must implement
[`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
If `codec` is *not* given, the field's type must implement [`FromXmlText`] for
`FromXml` and for `IntoXml`, the field's type must implement [`IntoXmlText`].
The `text` meta also supports a shorthand syntax, `#[xml(text = ..)]`, where
the value is treated as the value for the `codec` key (with optional prefix as
described above, and unnamespaced otherwise).
Only a single field per struct may be annotated with `#[xml(text)]` at a time,
to avoid parsing ambiguities. This is also true if only `IntoXml` is derived on
a field, for consistency.
##### Example without codec
```rust
# use xso::FromXml;
@ -160,3 +174,20 @@ assert_eq!(foo, Foo {
a: "hello".to_string(),
});
```
##### Example with codec
```rust
# use xso::FromXml;
#[derive(FromXml, Debug, PartialEq)]
#[xml(namespace = "urn:example", name = "foo")]
struct Foo {
#[xml(text = xso::text::EmptyAsNone)]
a: Option<String>,
};
let foo: Foo = xso::from_bytes(b"<foo xmlns='urn:example'/>").unwrap();
assert_eq!(foo, Foo {
a: None,
});
```

View file

@ -24,7 +24,7 @@ pub mod error;
#[cfg(feature = "minidom")]
#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
pub mod minidom_compat;
mod text;
pub mod text;
#[doc(hidden)]
pub mod exports {
@ -35,6 +35,9 @@ pub mod exports {
use std::borrow::Cow;
#[doc(inline)]
pub use text::TextCodec;
#[doc = include_str!("from_xml_doc.md")]
#[doc(inline)]
#[cfg(feature = "macros")]

View file

@ -103,3 +103,60 @@ convert_via_fromstr_and_display! {
#[cfg(feature = "jid")]
jid::BareJid,
}
/// Represent a way to encode/decode text data into a Rust type.
///
/// This trait can be used in scenarios where implementing [`FromXmlText`]
/// and/or [`IntoXmlText`] on a type is not feasible or sensible, such as the
/// following:
///
/// 1. The type originates in a foreign crate, preventing the implementation
/// of foreign traits.
///
/// 2. There is more than one way to convert a value to/from XML.
///
/// The codec to use for a text can be specified in the attributes understood
/// by `FromXml` and `IntoXml` derive macros. See the documentation of the
/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
pub trait TextCodec<T> {
/// Decode a string value into the type.
fn decode(s: String) -> Result<T, Error>;
/// Encode the type as string value.
///
/// If this returns `None`, the string value is not emitted at all.
fn encode(value: T) -> Result<Option<String>, Error>;
}
/// Text codec which does no transform.
pub struct Plain;
impl TextCodec<String> for Plain {
fn decode(s: String) -> Result<String, Error> {
Ok(s)
}
fn encode(value: String) -> Result<Option<String>, Error> {
Ok(Some(value))
}
}
/// Text codec which returns None instead of the empty string.
pub struct EmptyAsNone;
impl TextCodec<Option<String>> for EmptyAsNone {
fn decode(s: String) -> Result<Option<String>, Error> {
if s.len() == 0 {
Ok(None)
} else {
Ok(Some(s))
}
}
fn encode(value: Option<String>) -> Result<Option<String>, Error> {
Ok(match value {
Some(v) if v.len() > 0 => Some(v),
Some(_) | None => None,
})
}
}