diff --git a/parsers/src/avatar.rs b/parsers/src/avatar.rs index 157e7a95..21780dba 100644 --- a/parsers/src/avatar.rs +++ b/parsers/src/avatar.rs @@ -5,7 +5,7 @@ // file, You can obtain one at http://mozilla.org/MPL/2.0/. use xso::{ - text::{Base64, StripWhitespace}, + text::{Base64, StripWhitespace, TextCodec}, AsXml, FromXml, }; @@ -58,7 +58,7 @@ pub struct Info { #[xml(namespace = ns::AVATAR_DATA, name = "data")] pub struct Data { /// Vector of bytes representing the avatar’s image. - #[xml(text(codec = Base64))] + #[xml(text(codec = Base64.filtered(StripWhitespace)))] pub data: Vec, } diff --git a/parsers/src/component.rs b/parsers/src/component.rs index 5e344b62..5aad5007 100644 --- a/parsers/src/component.rs +++ b/parsers/src/component.rs @@ -20,7 +20,7 @@ pub struct Handshake { /// /// If None, it is the successful reply from the server, the stream is now /// fully established and both sides can now exchange stanzas. - #[xml(text(codec = FixedHex<20>))] + #[xml(text(codec = FixedHex::<20>))] pub data: Option<[u8; 20]>, } diff --git a/parsers/src/vcard.rs b/parsers/src/vcard.rs index f216a056..c770a6b3 100644 --- a/parsers/src/vcard.rs +++ b/parsers/src/vcard.rs @@ -15,7 +15,7 @@ use xso::{ error::Error, - text::{Base64, StripWhitespace}, + text::{Base64, StripWhitespace, TextCodec}, AsXml, FromXml, }; @@ -50,7 +50,7 @@ pub struct Type { #[xml(namespace = ns::VCARD, name = "BINVAL")] pub struct Binval { /// The actual data. - #[xml(text(codec = Base64))] + #[xml(text(codec = Base64.filtered(StripWhitespace)))] pub data: Vec, } diff --git a/parsers/src/vcard_update.rs b/parsers/src/vcard_update.rs index 702d41f2..502a51ef 100644 --- a/parsers/src/vcard_update.rs +++ b/parsers/src/vcard_update.rs @@ -30,7 +30,7 @@ pub struct VCardUpdate { #[xml(namespace = ns::VCARD_UPDATE, name = "photo")] pub struct Photo { /// The SHA1 hash of the avatar. Empty when there is no photo. - #[xml(text(codec = FixedHex<20>))] + #[xml(text(codec = FixedHex::<20>))] pub data: Option<[u8; 20]>, } diff --git a/xso-proc/src/field.rs b/xso-proc/src/field.rs index 30ed89e2..e19dbab4 100644 --- a/xso-proc/src/field.rs +++ b/xso-proc/src/field.rs @@ -148,7 +148,7 @@ enum FieldKind { /// The field maps to the character data of the element. Text { /// Optional codec to use - codec: Option, + codec: Option, }, /// The field maps to a child @@ -321,10 +321,10 @@ impl FieldDef { let FromEventsScope { ref text, .. } = scope; let field_access = scope.access_field(&self.member); let finalize = match codec { - Some(codec_ty) => { - let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone()); + Some(codec) => { + let decode = text_codec_decode_fn(self.ty.clone()); quote! { - #decode(#field_access)? + #decode(&#codec, #field_access)? } } None => { @@ -429,9 +429,9 @@ impl FieldDef { FieldKind::Text { ref codec } => { let generator = match codec { - Some(codec_ty) => { - let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone()); - quote! { #encode(#bound_name)? } + Some(codec) => { + let encode = text_codec_encode_fn(self.ty.clone()); + quote! { #encode(&#codec, #bound_name)? } } None => { let as_xml_text = as_xml_text_fn(self.ty.clone()); diff --git a/xso-proc/src/meta.rs b/xso-proc/src/meta.rs index 8496d513..909ae271 100644 --- a/xso-proc/src/meta.rs +++ b/xso-proc/src/meta.rs @@ -408,7 +408,7 @@ pub(crate) enum XmlFieldMeta { /// `#[xml(text)]` Text { /// The path to the optional codec type. - codec: Option, + codec: Option, }, /// `#[xml(child)` @@ -497,7 +497,7 @@ impl XmlFieldMeta { /// Parse a `#[xml(text)]` meta. fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result { - let mut codec: Option = None; + let mut codec: Option = None; if meta.input.peek(Token![=]) { Ok(Self::Text { codec: Some(meta.value()?.parse()?), diff --git a/xso-proc/src/types.rs b/xso-proc/src/types.rs index 692caaa8..851a9f67 100644 --- a/xso-proc/src/types.rs +++ b/xso-proc/src/types.rs @@ -298,76 +298,66 @@ pub(crate) fn as_xml_text_fn(ty: Type) -> Expr { }) } -/// Construct a [`syn::TypePath`] referring to -/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the -/// [`syn::Span`] of the `codec_ty` alongside it. -fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) { - let span = codec_ty.span(); +/// Construct a [`syn::Path`] referring to `::xso::TextCodec::<#for_ty>`, +/// returing the span of `for_ty` alongside it. +fn text_codec_of(for_ty: Type) -> (Span, Path) { + let span = for_ty.span(); ( span, - TypePath { - qself: Some(QSelf { - lt_token: syn::token::Lt { spans: [span] }, - ty: Box::new(codec_ty), - position: 2, - as_token: Some(syn::token::As { span }), - gt_token: syn::token::Gt { spans: [span] }, + Path { + leading_colon: Some(syn::token::PathSep { + spans: [span, span], }), - path: Path { - leading_colon: Some(syn::token::PathSep { - spans: [span, span], - }), - segments: [ - PathSegment { - ident: Ident::new("xso", span), - arguments: PathArguments::None, - }, - PathSegment { - ident: Ident::new("TextCodec", span), - arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments { - colon2_token: Some(syn::token::PathSep { - spans: [span, span], - }), - lt_token: syn::token::Lt { spans: [span] }, - args: [GenericArgument::Type(for_ty)].into_iter().collect(), - gt_token: syn::token::Gt { spans: [span] }, + segments: [ + PathSegment { + ident: Ident::new("xso", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("TextCodec", span), + arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments { + colon2_token: Some(syn::token::PathSep { + spans: [span, span], }), - }, - ] - .into_iter() - .collect(), - }, + lt_token: syn::token::Lt { spans: [span] }, + args: [GenericArgument::Type(for_ty)].into_iter().collect(), + gt_token: syn::token::Gt { spans: [span] }, + }), + }, + ] + .into_iter() + .collect(), }, ) } /// Construct a [`syn::Expr`] referring to -/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`. -pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr { - let (span, mut ty) = text_codec_of(codec_ty, for_ty); - ty.path.segments.push(PathSegment { +/// `::xso::TextCodec::<#for_ty>::encode`. +pub(crate) fn text_codec_encode_fn(for_ty: Type) -> Expr { + let (span, mut path) = text_codec_of(for_ty); + path.segments.push(PathSegment { ident: Ident::new("encode", span), arguments: PathArguments::None, }); Expr::Path(ExprPath { attrs: Vec::new(), - qself: ty.qself, - path: ty.path, + qself: None, + path: path, }) } /// Construct a [`syn::Expr`] referring to -/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`. -pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr { - let (span, mut ty) = text_codec_of(codec_ty, for_ty); - ty.path.segments.push(PathSegment { +/// `::xso::TextCodec::<#for_ty>::decode`. +pub(crate) fn text_codec_decode_fn(for_ty: Type) -> Expr { + let (span, mut path) = text_codec_of(for_ty); + path.segments.push(PathSegment { ident: Ident::new("decode", span), arguments: PathArguments::None, }); Expr::Path(ExprPath { attrs: Vec::new(), - qself: ty.qself, - path: ty.path, + qself: None, + path: path, }) } diff --git a/xso/ChangeLog b/xso/ChangeLog index f015e9de..33f83822 100644 --- a/xso/ChangeLog +++ b/xso/ChangeLog @@ -1,5 +1,22 @@ Version NEXT: 0000-00-00 Jonas Schäfer + * Breaking + - The methods of `TextCodec` now have `&self` receivers. This also + implies that `#[xml(text(codec = ..))]` now takes expressions instead + of type paths. + + Because all implementations provided by `xso` were in fact unit + structs, this should not change most invocations, with two exceptions: + + 1. The type argument of `Base64` was removed. Replace all + `Base64` references with `Base64.filtered(Foo)` to update + your code. + + 2. `FixedHex` is not a valid expression. You will have to update + your code to use `FixedHex::` instead. + + This change overall allows for more flexibility in the implementation + of text codecs. * Added - Support for child elements in derive macros. Child elements may also be wrapped in Option or Box. diff --git a/xso/src/from_xml_doc.md b/xso/src/from_xml_doc.md index 8b76ecd3..b221c6e1 100644 --- a/xso/src/from_xml_doc.md +++ b/xso/src/from_xml_doc.md @@ -35,6 +35,7 @@ such: is also a path. - *string literal*: A string literal, like `"hello world!"`. - *type*: A Rust type. +- *expression*: A Rust expression. - *ident*: A Rust identifier. - flag: Has no value. The key's mere presence has relevance and it must not be followed by a `=` sign. @@ -258,9 +259,9 @@ element. | Key | Value type | Description | | --- | --- | --- | -| `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. | +| `codec` | *expression* | Optional [`TextCodec`] implementation which is used to encode or decode the field. | -If `codec` is given, the given `codec` must implement +If `codec` is given, the given `codec` value must implement [`TextCodec`][`TextCodec`] where `T` is the type of the field. If `codec` is *not* given, the field's type must implement [`FromXmlText`] for diff --git a/xso/src/text.rs b/xso/src/text.rs index 483b97b1..ad558bad 100644 --- a/xso/src/text.rs +++ b/xso/src/text.rs @@ -6,7 +6,6 @@ //! Module containing implementations for conversions to/from XML text. -#[cfg(feature = "base64")] use core::marker::PhantomData; use std::borrow::Cow; @@ -138,23 +137,67 @@ convert_via_fromstr_and_display! { /// [`FromXml`][`macro@crate::FromXml`] derive macro for details. pub trait TextCodec { /// Decode a string value into the type. - fn decode(s: String) -> Result; + fn decode(&self, s: String) -> Result; /// Encode the type as string value. /// /// If this returns `None`, the string value is not emitted at all. - fn encode(value: &T) -> Result>, Error>; + fn encode<'x>(&self, value: &'x T) -> Result>, Error>; + + /// Apply a filter to this codec. + /// + /// Filters preprocess strings before they are handed to the codec for + /// parsing, allowing to, for example, make the codec ignore irrelevant + /// content by stripping it. + // NOTE: The bound on T is needed because any given type A may implement + // TextCodec for any number of types. If we pass T down to the `Filtered` + // struct, rustc can do type inferrence on which `TextCodec` + // implementation the `filtered` method is supposed to have been called + // on. + fn filtered(self, filter: F) -> Filtered + where + // placing the bound here (instead of on the `TextCodec` trait + // itself) preserves object-safety of TextCodec. + Self: Sized, + { + Filtered { + filter, + codec: self, + bound: PhantomData, + } + } +} + +/// Wrapper struct to apply a filter to a codec. +/// +/// You can construct a value of this type via [`TextCodec::filtered`]. +// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too. +pub struct Filtered { + filter: F, + codec: C, + bound: PhantomData, +} + +impl> TextCodec for Filtered { + fn decode(&self, s: String) -> Result { + let s = self.filter.preprocess(s); + self.codec.decode(s) + } + + fn encode<'x>(&self, value: &'x T) -> Result>, Error> { + self.codec.encode(value) + } } /// Text codec which does no transform. pub struct Plain; impl TextCodec for Plain { - fn decode(s: String) -> Result { + fn decode(&self, s: String) -> Result { Ok(s) } - fn encode(value: &String) -> Result>, Error> { + fn encode<'x>(&self, value: &'x String) -> Result>, Error> { Ok(Some(Cow::Borrowed(value.as_str()))) } } @@ -163,7 +206,7 @@ impl TextCodec for Plain { pub struct EmptyAsNone; impl TextCodec> for EmptyAsNone { - fn decode(s: String) -> Result, Error> { + fn decode(&self, s: String) -> Result, Error> { if s.is_empty() { Ok(None) } else { @@ -171,7 +214,7 @@ impl TextCodec> for EmptyAsNone { } } - fn encode(value: &Option) -> Result>, Error> { + fn encode<'x>(&self, value: &'x Option) -> Result>, Error> { Ok(match value.as_ref() { Some(v) if !v.is_empty() => Some(Cow::Borrowed(v.as_str())), Some(_) | None => None, @@ -183,7 +226,7 @@ impl TextCodec> for EmptyAsNone { pub struct EmptyAsError; impl TextCodec for EmptyAsError { - fn decode(s: String) -> Result { + fn decode(&self, s: String) -> Result { if s.is_empty() { Err(Error::Other("Empty text node.")) } else { @@ -191,7 +234,7 @@ impl TextCodec for EmptyAsError { } } - fn encode(value: &String) -> Result>, Error> { + fn encode<'x>(&self, value: &'x String) -> Result>, Error> { if value.is_empty() { Err(Error::Other("Empty text node.")) } else { @@ -205,14 +248,14 @@ impl TextCodec for EmptyAsError { /// This may be used by codecs to allow to customize some of their behaviour. pub trait TextFilter { /// Process the incoming string and return the result of the processing. - fn preprocess(s: String) -> String; + fn preprocess(&self, s: String) -> String; } /// Text preprocessor which returns the input unchanged. pub struct NoFilter; impl TextFilter for NoFilter { - fn preprocess(s: String) -> String { + fn preprocess(&self, s: String) -> String { s } } @@ -221,7 +264,7 @@ impl TextFilter for NoFilter { pub struct StripWhitespace; impl TextFilter for StripWhitespace { - fn preprocess(s: String) -> String { + fn preprocess(&self, s: String) -> String { let s: String = s .chars() .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t') @@ -237,56 +280,54 @@ impl TextFilter for StripWhitespace { /// will make the implementation ignore any whitespace within the text. #[cfg(feature = "base64")] #[cfg_attr(docsrs, doc(cfg(feature = "base64")))] -pub struct Base64(PhantomData); +pub struct Base64; #[cfg(feature = "base64")] #[cfg_attr(docsrs, doc(cfg(feature = "base64")))] -impl TextCodec> for Base64 { - fn decode(s: String) -> Result, Error> { - let value = Filter::preprocess(s); +impl TextCodec> for Base64 { + fn decode(&self, s: String) -> Result, Error> { StandardBase64Engine - .decode(value.as_bytes()) + .decode(s.as_bytes()) .map_err(Error::text_parse_error) } - fn encode(value: &Vec) -> Result>, Error> { + fn encode<'x>(&self, value: &'x Vec) -> Result>, Error> { Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value)))) } } #[cfg(feature = "base64")] #[cfg_attr(docsrs, doc(cfg(feature = "base64")))] -impl<'x, Filter: TextFilter> TextCodec> for Base64 { - fn decode(s: String) -> Result, Error> { - let value = Filter::preprocess(s); +impl<'x> TextCodec> for Base64 { + fn decode(&self, s: String) -> Result, Error> { StandardBase64Engine - .decode(value.as_bytes()) + .decode(s.as_bytes()) .map_err(Error::text_parse_error) .map(Cow::Owned) } - fn encode<'a>(value: &'a Cow<'x, [u8]>) -> Result>, Error> { + fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result>, Error> { Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value)))) } } #[cfg(feature = "base64")] #[cfg_attr(docsrs, doc(cfg(feature = "base64")))] -impl TextCodec> for Base64 +impl TextCodec> for Base64 where - Base64: TextCodec, + Base64: TextCodec, { - fn decode(s: String) -> Result, Error> { + fn decode(&self, s: String) -> Result, Error> { if s.is_empty() { return Ok(None); } - Ok(Some(Self::decode(s)?)) + Ok(Some(self.decode(s)?)) } - fn encode(decoded: &Option) -> Result>, Error> { + fn encode<'x>(&self, decoded: &'x Option) -> Result>, Error> { decoded .as_ref() - .map(Self::encode) + .map(|x| self.encode(x)) .transpose() .map(Option::flatten) } @@ -298,7 +339,7 @@ where pub struct FixedHex; impl TextCodec<[u8; N]> for FixedHex { - fn decode(s: String) -> Result<[u8; N], Error> { + fn decode(&self, s: String) -> Result<[u8; N], Error> { if s.len() != 2 * N { return Err(Error::Other("Invalid length")); } @@ -312,7 +353,7 @@ impl TextCodec<[u8; N]> for FixedHex { Ok(bytes) } - fn encode(value: &[u8; N]) -> Result>, Error> { + fn encode<'x>(&self, value: &'x [u8; N]) -> Result>, Error> { let mut bytes = String::with_capacity(N * 2); for byte in value { bytes.extend(format!("{:02x}", byte).chars()); @@ -325,17 +366,17 @@ impl TextCodec> for FixedHex where FixedHex: TextCodec, { - fn decode(s: String) -> Result, Error> { + fn decode(&self, s: String) -> Result, Error> { if s.is_empty() { return Ok(None); } - Ok(Some(Self::decode(s)?)) + Ok(Some(self.decode(s)?)) } - fn encode(decoded: &Option) -> Result>, Error> { + fn encode<'x>(&self, decoded: &'x Option) -> Result>, Error> { decoded .as_ref() - .map(Self::encode) + .map(|x| self.encode(x)) .transpose() .map(Option::flatten) } @@ -345,7 +386,7 @@ where pub struct ColonSeparatedHex; impl TextCodec> for ColonSeparatedHex { - fn decode(s: String) -> Result, Error> { + fn decode(&self, s: String) -> Result, Error> { assert_eq!((s.len() + 1) % 3, 0); let mut bytes = Vec::with_capacity((s.len() + 1) / 3); for i in 0..(1 + s.len()) / 3 { @@ -359,7 +400,7 @@ impl TextCodec> for ColonSeparatedHex { Ok(bytes) } - fn encode(decoded: &Vec) -> Result>, Error> { + fn encode<'x>(&self, decoded: &'x Vec) -> Result>, Error> { // TODO: Super inefficient! let mut bytes = Vec::with_capacity(decoded.len()); for byte in decoded {