xso: use values instead of types for text codecs

This allows stateful or configurable codecs without having to express
all configuration in the type name itself. For example, we could have a
Base64 type with configurable Base64 engines without having to duplicate
the Base64 type itself.

(Note that the different engines in the Base64 crate are values, not
types.)
This commit is contained in:
Jonas Schäfer 2024-08-03 10:51:23 +02:00
parent b9fc15977b
commit 271c31c9d4
10 changed files with 150 additions and 101 deletions

View file

@ -5,7 +5,7 @@
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
use xso::{
text::{Base64, StripWhitespace},
text::{Base64, StripWhitespace, TextCodec},
AsXml, FromXml,
};
@ -58,7 +58,7 @@ pub struct Info {
#[xml(namespace = ns::AVATAR_DATA, name = "data")]
pub struct Data {
/// Vector of bytes representing the avatars image.
#[xml(text(codec = Base64<StripWhitespace>))]
#[xml(text(codec = Base64.filtered(StripWhitespace)))]
pub data: Vec<u8>,
}

View file

@ -20,7 +20,7 @@ pub struct Handshake {
///
/// If None, it is the successful reply from the server, the stream is now
/// fully established and both sides can now exchange stanzas.
#[xml(text(codec = FixedHex<20>))]
#[xml(text(codec = FixedHex::<20>))]
pub data: Option<[u8; 20]>,
}

View file

@ -15,7 +15,7 @@
use xso::{
error::Error,
text::{Base64, StripWhitespace},
text::{Base64, StripWhitespace, TextCodec},
AsXml, FromXml,
};
@ -50,7 +50,7 @@ pub struct Type {
#[xml(namespace = ns::VCARD, name = "BINVAL")]
pub struct Binval {
/// The actual data.
#[xml(text(codec = Base64<StripWhitespace>))]
#[xml(text(codec = Base64.filtered(StripWhitespace)))]
pub data: Vec<u8>,
}

View file

@ -30,7 +30,7 @@ pub struct VCardUpdate {
#[xml(namespace = ns::VCARD_UPDATE, name = "photo")]
pub struct Photo {
/// The SHA1 hash of the avatar. Empty when there is no photo.
#[xml(text(codec = FixedHex<20>))]
#[xml(text(codec = FixedHex::<20>))]
pub data: Option<[u8; 20]>,
}

View file

@ -148,7 +148,7 @@ enum FieldKind {
/// The field maps to the character data of the element.
Text {
/// Optional codec to use
codec: Option<Type>,
codec: Option<Expr>,
},
/// The field maps to a child
@ -321,10 +321,10 @@ impl FieldDef {
let FromEventsScope { ref text, .. } = scope;
let field_access = scope.access_field(&self.member);
let finalize = match codec {
Some(codec_ty) => {
let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone());
Some(codec) => {
let decode = text_codec_decode_fn(self.ty.clone());
quote! {
#decode(#field_access)?
#decode(&#codec, #field_access)?
}
}
None => {
@ -429,9 +429,9 @@ impl FieldDef {
FieldKind::Text { ref codec } => {
let generator = match codec {
Some(codec_ty) => {
let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone());
quote! { #encode(#bound_name)? }
Some(codec) => {
let encode = text_codec_encode_fn(self.ty.clone());
quote! { #encode(&#codec, #bound_name)? }
}
None => {
let as_xml_text = as_xml_text_fn(self.ty.clone());

View file

@ -408,7 +408,7 @@ pub(crate) enum XmlFieldMeta {
/// `#[xml(text)]`
Text {
/// The path to the optional codec type.
codec: Option<Type>,
codec: Option<Expr>,
},
/// `#[xml(child)`
@ -497,7 +497,7 @@ impl XmlFieldMeta {
/// Parse a `#[xml(text)]` meta.
fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
let mut codec: Option<Type> = None;
let mut codec: Option<Expr> = None;
if meta.input.peek(Token![=]) {
Ok(Self::Text {
codec: Some(meta.value()?.parse()?),

View file

@ -298,76 +298,66 @@ pub(crate) fn as_xml_text_fn(ty: Type) -> Expr {
})
}
/// Construct a [`syn::TypePath`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the
/// [`syn::Span`] of the `codec_ty` alongside it.
fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) {
let span = codec_ty.span();
/// Construct a [`syn::Path`] referring to `::xso::TextCodec::<#for_ty>`,
/// returing the span of `for_ty` alongside it.
fn text_codec_of(for_ty: Type) -> (Span, Path) {
let span = for_ty.span();
(
span,
TypePath {
qself: Some(QSelf {
lt_token: syn::token::Lt { spans: [span] },
ty: Box::new(codec_ty),
position: 2,
as_token: Some(syn::token::As { span }),
gt_token: syn::token::Gt { spans: [span] },
Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
path: Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
segments: [
PathSegment {
ident: Ident::new("xso", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("TextCodec", span),
arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
colon2_token: Some(syn::token::PathSep {
spans: [span, span],
}),
lt_token: syn::token::Lt { spans: [span] },
args: [GenericArgument::Type(for_ty)].into_iter().collect(),
gt_token: syn::token::Gt { spans: [span] },
segments: [
PathSegment {
ident: Ident::new("xso", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("TextCodec", span),
arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
colon2_token: Some(syn::token::PathSep {
spans: [span, span],
}),
},
]
.into_iter()
.collect(),
},
lt_token: syn::token::Lt { spans: [span] },
args: [GenericArgument::Type(for_ty)].into_iter().collect(),
gt_token: syn::token::Gt { spans: [span] },
}),
},
]
.into_iter()
.collect(),
},
)
}
/// Construct a [`syn::Expr`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`.
pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr {
let (span, mut ty) = text_codec_of(codec_ty, for_ty);
ty.path.segments.push(PathSegment {
/// `::xso::TextCodec::<#for_ty>::encode`.
pub(crate) fn text_codec_encode_fn(for_ty: Type) -> Expr {
let (span, mut path) = text_codec_of(for_ty);
path.segments.push(PathSegment {
ident: Ident::new("encode", span),
arguments: PathArguments::None,
});
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: ty.qself,
path: ty.path,
qself: None,
path: path,
})
}
/// Construct a [`syn::Expr`] referring to
/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`.
pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr {
let (span, mut ty) = text_codec_of(codec_ty, for_ty);
ty.path.segments.push(PathSegment {
/// `::xso::TextCodec::<#for_ty>::decode`.
pub(crate) fn text_codec_decode_fn(for_ty: Type) -> Expr {
let (span, mut path) = text_codec_of(for_ty);
path.segments.push(PathSegment {
ident: Ident::new("decode", span),
arguments: PathArguments::None,
});
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: ty.qself,
path: ty.path,
qself: None,
path: path,
})
}

View file

@ -1,5 +1,22 @@
Version NEXT:
0000-00-00 Jonas Schäfer <jonas@zombofant.net>
* Breaking
- The methods of `TextCodec<T>` now have `&self` receivers. This also
implies that `#[xml(text(codec = ..))]` now takes expressions instead
of type paths.
Because all implementations provided by `xso` were in fact unit
structs, this should not change most invocations, with two exceptions:
1. The type argument of `Base64` was removed. Replace all
`Base64<Foo>` references with `Base64.filtered(Foo)` to update
your code.
2. `FixedHex<N>` is not a valid expression. You will have to update
your code to use `FixedHex::<N>` instead.
This change overall allows for more flexibility in the implementation
of text codecs.
* Added
- Support for child elements in derive macros. Child elements may also
be wrapped in Option or Box.

View file

@ -35,6 +35,7 @@ such:
is also a path.
- *string literal*: A string literal, like `"hello world!"`.
- *type*: A Rust type.
- *expression*: A Rust expression.
- *ident*: A Rust identifier.
- flag: Has no value. The key's mere presence has relevance and it must not be
followed by a `=` sign.
@ -258,9 +259,9 @@ element.
| Key | Value type | Description |
| --- | --- | --- |
| `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
| `codec` | *expression* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
If `codec` is given, the given `codec` must implement
If `codec` is given, the given `codec` value must implement
[`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
If `codec` is *not* given, the field's type must implement [`FromXmlText`] for

View file

@ -6,7 +6,6 @@
//! Module containing implementations for conversions to/from XML text.
#[cfg(feature = "base64")]
use core::marker::PhantomData;
use std::borrow::Cow;
@ -138,23 +137,67 @@ convert_via_fromstr_and_display! {
/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
pub trait TextCodec<T> {
/// Decode a string value into the type.
fn decode(s: String) -> Result<T, Error>;
fn decode(&self, s: String) -> Result<T, Error>;
/// Encode the type as string value.
///
/// If this returns `None`, the string value is not emitted at all.
fn encode(value: &T) -> Result<Option<Cow<'_, str>>, Error>;
fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
/// Apply a filter to this codec.
///
/// Filters preprocess strings before they are handed to the codec for
/// parsing, allowing to, for example, make the codec ignore irrelevant
/// content by stripping it.
// NOTE: The bound on T is needed because any given type A may implement
// TextCodec for any number of types. If we pass T down to the `Filtered`
// struct, rustc can do type inferrence on which `TextCodec`
// implementation the `filtered` method is supposed to have been called
// on.
fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
where
// placing the bound here (instead of on the `TextCodec<T>` trait
// itself) preserves object-safety of TextCodec<T>.
Self: Sized,
{
Filtered {
filter,
codec: self,
bound: PhantomData,
}
}
}
/// Wrapper struct to apply a filter to a codec.
///
/// You can construct a value of this type via [`TextCodec::filtered`].
// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
pub struct Filtered<F, C, T> {
filter: F,
codec: C,
bound: PhantomData<T>,
}
impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
fn decode(&self, s: String) -> Result<T, Error> {
let s = self.filter.preprocess(s);
self.codec.decode(s)
}
fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
self.codec.encode(value)
}
}
/// Text codec which does no transform.
pub struct Plain;
impl TextCodec<String> for Plain {
fn decode(s: String) -> Result<String, Error> {
fn decode(&self, s: String) -> Result<String, Error> {
Ok(s)
}
fn encode(value: &String) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
Ok(Some(Cow::Borrowed(value.as_str())))
}
}
@ -163,7 +206,7 @@ impl TextCodec<String> for Plain {
pub struct EmptyAsNone;
impl TextCodec<Option<String>> for EmptyAsNone {
fn decode(s: String) -> Result<Option<String>, Error> {
fn decode(&self, s: String) -> Result<Option<String>, Error> {
if s.is_empty() {
Ok(None)
} else {
@ -171,7 +214,7 @@ impl TextCodec<Option<String>> for EmptyAsNone {
}
}
fn encode(value: &Option<String>) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, value: &'x Option<String>) -> Result<Option<Cow<'x, str>>, Error> {
Ok(match value.as_ref() {
Some(v) if !v.is_empty() => Some(Cow::Borrowed(v.as_str())),
Some(_) | None => None,
@ -183,7 +226,7 @@ impl TextCodec<Option<String>> for EmptyAsNone {
pub struct EmptyAsError;
impl TextCodec<String> for EmptyAsError {
fn decode(s: String) -> Result<String, Error> {
fn decode(&self, s: String) -> Result<String, Error> {
if s.is_empty() {
Err(Error::Other("Empty text node."))
} else {
@ -191,7 +234,7 @@ impl TextCodec<String> for EmptyAsError {
}
}
fn encode(value: &String) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
if value.is_empty() {
Err(Error::Other("Empty text node."))
} else {
@ -205,14 +248,14 @@ impl TextCodec<String> for EmptyAsError {
/// This may be used by codecs to allow to customize some of their behaviour.
pub trait TextFilter {
/// Process the incoming string and return the result of the processing.
fn preprocess(s: String) -> String;
fn preprocess(&self, s: String) -> String;
}
/// Text preprocessor which returns the input unchanged.
pub struct NoFilter;
impl TextFilter for NoFilter {
fn preprocess(s: String) -> String {
fn preprocess(&self, s: String) -> String {
s
}
}
@ -221,7 +264,7 @@ impl TextFilter for NoFilter {
pub struct StripWhitespace;
impl TextFilter for StripWhitespace {
fn preprocess(s: String) -> String {
fn preprocess(&self, s: String) -> String {
let s: String = s
.chars()
.filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
@ -237,56 +280,54 @@ impl TextFilter for StripWhitespace {
/// will make the implementation ignore any whitespace within the text.
#[cfg(feature = "base64")]
#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
pub struct Base64<Filter: TextFilter = NoFilter>(PhantomData<Filter>);
pub struct Base64;
#[cfg(feature = "base64")]
#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
impl<Filter: TextFilter> TextCodec<Vec<u8>> for Base64<Filter> {
fn decode(s: String) -> Result<Vec<u8>, Error> {
let value = Filter::preprocess(s);
impl TextCodec<Vec<u8>> for Base64 {
fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
StandardBase64Engine
.decode(value.as_bytes())
.decode(s.as_bytes())
.map_err(Error::text_parse_error)
}
fn encode(value: &Vec<u8>) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
}
}
#[cfg(feature = "base64")]
#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
impl<'x, Filter: TextFilter> TextCodec<Cow<'x, [u8]>> for Base64<Filter> {
fn decode(s: String) -> Result<Cow<'x, [u8]>, Error> {
let value = Filter::preprocess(s);
impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
StandardBase64Engine
.decode(value.as_bytes())
.decode(s.as_bytes())
.map_err(Error::text_parse_error)
.map(Cow::Owned)
}
fn encode<'a>(value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
}
}
#[cfg(feature = "base64")]
#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
impl<T, Filter: TextFilter> TextCodec<Option<T>> for Base64<Filter>
impl<T> TextCodec<Option<T>> for Base64
where
Base64<Filter>: TextCodec<T>,
Base64: TextCodec<T>,
{
fn decode(s: String) -> Result<Option<T>, Error> {
fn decode(&self, s: String) -> Result<Option<T>, Error> {
if s.is_empty() {
return Ok(None);
}
Ok(Some(Self::decode(s)?))
Ok(Some(self.decode(s)?))
}
fn encode(decoded: &Option<T>) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
decoded
.as_ref()
.map(Self::encode)
.map(|x| self.encode(x))
.transpose()
.map(Option::flatten)
}
@ -298,7 +339,7 @@ where
pub struct FixedHex<const N: usize>;
impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
fn decode(s: String) -> Result<[u8; N], Error> {
fn decode(&self, s: String) -> Result<[u8; N], Error> {
if s.len() != 2 * N {
return Err(Error::Other("Invalid length"));
}
@ -312,7 +353,7 @@ impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
Ok(bytes)
}
fn encode(value: &[u8; N]) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
let mut bytes = String::with_capacity(N * 2);
for byte in value {
bytes.extend(format!("{:02x}", byte).chars());
@ -325,17 +366,17 @@ impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
where
FixedHex<N>: TextCodec<T>,
{
fn decode(s: String) -> Result<Option<T>, Error> {
fn decode(&self, s: String) -> Result<Option<T>, Error> {
if s.is_empty() {
return Ok(None);
}
Ok(Some(Self::decode(s)?))
Ok(Some(self.decode(s)?))
}
fn encode(decoded: &Option<T>) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
decoded
.as_ref()
.map(Self::encode)
.map(|x| self.encode(x))
.transpose()
.map(Option::flatten)
}
@ -345,7 +386,7 @@ where
pub struct ColonSeparatedHex;
impl TextCodec<Vec<u8>> for ColonSeparatedHex {
fn decode(s: String) -> Result<Vec<u8>, Error> {
fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
assert_eq!((s.len() + 1) % 3, 0);
let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
for i in 0..(1 + s.len()) / 3 {
@ -359,7 +400,7 @@ impl TextCodec<Vec<u8>> for ColonSeparatedHex {
Ok(bytes)
}
fn encode(decoded: &Vec<u8>) -> Result<Option<Cow<'_, str>>, Error> {
fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
// TODO: Super inefficient!
let mut bytes = Vec::with_capacity(decoded.len());
for byte in decoded {