xso: implement catch-all for unknown elements

This commit is contained in:
Jonas Schäfer 2024-08-05 08:20:25 +02:00
parent 2103ef0191
commit 0361b5905b
10 changed files with 413 additions and 49 deletions

View file

@ -1551,3 +1551,22 @@ fn optional_attribute_optional_extract_double_option_roundtrip_absent_child() {
"<parent xmlns='urn:example:ns1'/>",
)
}
#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
#[xml(namespace = NS1, name = "parent")]
struct ElementCatchall {
#[xml(element(n = ..))]
children: Vec<::minidom::Element>,
}
#[test]
fn element_catchall_roundtrip() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<ElementCatchall>(
"<parent xmlns='urn:example:ns1'><child><deeper/></child><child xmlns='urn:example:ns2'/><more-children/><yet-another-child/><child/></parent>",
)
}

View file

@ -11,7 +11,7 @@ use quote::quote;
use syn::{spanned::Spanned, *};
use crate::error_message::ParentRef;
use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit};
use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit, NestedMatcher};
use crate::meta::NamespaceRef;
use crate::scope::{mangle_member, AsItemsScope, FromEventsScope};
use crate::state::{AsItemsSubmachine, FromEventsSubmachine, State};
@ -109,6 +109,7 @@ impl Compound {
let mut builder_data_init = TokenStream::default();
let mut output_cons = TokenStream::default();
let mut child_matchers = TokenStream::default();
let mut fallback_child_matcher = None;
let mut text_handler = None;
let mut extra_defs = TokenStream::default();
let is_tuple = !output_name.is_path();
@ -219,18 +220,44 @@ impl Compound {
#builder_field_name: #init,
});
child_matchers.extend(quote! {
let (name, attrs) = match #matcher {
::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(e),
::core::result::Result::Ok(#substate_data) => {
return ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name {
#builder_data_ident,
#substate_data,
}))
match matcher {
NestedMatcher::Selective(matcher) => {
child_matchers.extend(quote! {
let (name, attrs) = match #matcher {
::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(e),
::core::result::Result::Ok(#substate_data) => {
return ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name {
#builder_data_ident,
#substate_data,
}))
}
};
});
}
NestedMatcher::Fallback(matcher) => {
if let Some((span, _)) = fallback_child_matcher.as_ref() {
let mut err = Error::new(
field.span(),
"more than one field is attempting to consume all unmatched child elements"
);
err.combine(Error::new(
*span,
"the previous field collecting all unmatched child elements is here"
));
return Err(err);
}
};
});
let matcher = quote! {
::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name {
#builder_data_ident,
#substate_data: { #matcher },
}))
};
fallback_child_matcher = Some((field.span(), matcher));
}
}
if is_tuple {
output_cons.extend(quote! {
@ -278,6 +305,14 @@ impl Compound {
}
};
let child_fallback = match fallback_child_matcher {
Some((_, matcher)) => matcher,
None => quote! {
let _ = (name, attrs);
::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err))
},
};
states.push(State::new_with_builder(
default_state_ident.clone(),
builder_data_ident,
@ -292,8 +327,7 @@ impl Compound {
}
::xso::exports::rxml::Event::StartElement(_, name, attrs) => {
#child_matchers
let _ = (name, attrs);
::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err))
#child_fallback
}
::xso::exports::rxml::Event::Text(_, #text) => {
#text_handler

View file

@ -23,7 +23,7 @@ use crate::types::{
option_as_xml_ty, option_ty, ref_ty, ty_from_ident,
};
use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit};
use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit, NestedMatcher};
/// The field maps to a child
pub(super) struct ChildField {
@ -101,7 +101,7 @@ impl Field for ChildField {
init: quote! { ::core::option::Option::None },
ty: option_ty(ty.clone()),
},
matcher: quote! {
matcher: NestedMatcher::Selective(quote! {
match #matcher {
::core::result::Result::Ok(v) => if #field_access.is_some() {
::core::result::Result::Err(::xso::error::FromEventsError::Invalid(::xso::error::Error::Other(#duplicate_msg)))
@ -110,7 +110,7 @@ impl Field for ChildField {
},
::core::result::Result::Err(e) => ::core::result::Result::Err(e),
}
},
}),
builder,
collect: quote! {
#field_access = ::core::option::Option::Some(#fetch);
@ -132,7 +132,7 @@ impl Field for ChildField {
init: quote! { #ty_default() },
ty: ty.clone(),
},
matcher,
matcher: NestedMatcher::Selective(matcher),
builder,
collect: quote! {
#ty_extend(&mut #field_access, [#fetch]);

View file

@ -0,0 +1,110 @@
// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//! This module concerns the processing of untyped `minidom::Element`
//! children.
//!
//! In particular, it provides the `#[xml(element)]` implementation.
use proc_macro2::{Span, TokenStream};
use quote::quote;
use syn::*;
use crate::error_message::ParentRef;
use crate::scope::{AsItemsScope, FromEventsScope};
use crate::types::{
default_fn, element_ty, from_xml_builder_ty, into_iterator_into_iter_fn, into_iterator_iter_ty,
item_iter_ty, option_ty, ref_ty,
};
use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit, NestedMatcher};
pub(super) struct ElementField;
impl Field for ElementField {
fn make_builder_part(
&self,
scope: &FromEventsScope,
_container_name: &ParentRef,
member: &Member,
ty: &Type,
) -> Result<FieldBuilderPart> {
let FromEventsScope {
ref substate_result,
..
} = scope;
let field_access = scope.access_field(member);
let element_ty = element_ty(Span::call_site());
let default_fn = default_fn(ty.clone());
let builder = from_xml_builder_ty(element_ty.clone());
Ok(FieldBuilderPart::Nested {
extra_defs: TokenStream::default(),
value: FieldTempInit {
init: quote! { #default_fn() },
ty: ty.clone(),
},
matcher: NestedMatcher::Fallback(quote! {
#builder::new(name, attrs)
}),
builder,
collect: quote! {
<#ty as ::core::iter::Extend::<#element_ty>>::extend(&mut #field_access, [#substate_result]);
},
finalize: quote! {
#field_access
},
})
}
fn make_iterator_part(
&self,
scope: &AsItemsScope,
_container_name: &ParentRef,
bound_name: &Ident,
_member: &Member,
ty: &Type,
) -> Result<FieldIteratorPart> {
let AsItemsScope { ref lifetime, .. } = scope;
let element_ty = element_ty(Span::call_site());
let iter_ty = item_iter_ty(element_ty.clone(), lifetime.clone());
let element_iter = into_iterator_iter_ty(ref_ty(ty.clone(), lifetime.clone()));
let into_iter = into_iterator_into_iter_fn(ref_ty(ty.clone(), lifetime.clone()));
let state_ty = Type::Tuple(TypeTuple {
paren_token: token::Paren::default(),
elems: [element_iter, option_ty(iter_ty)].into_iter().collect(),
});
Ok(FieldIteratorPart::Content {
extra_defs: TokenStream::default(),
value: FieldTempInit {
init: quote! {
(#into_iter(#bound_name), ::core::option::Option::None)
},
ty: state_ty,
},
generator: quote! {
loop {
if let ::core::option::Option::Some(current) = #bound_name.1.as_mut() {
if let ::core::option::Option::Some(item) = current.next() {
break ::core::option::Option::Some(item).transpose();
}
}
if let ::core::option::Option::Some(item) = #bound_name.0.next() {
#bound_name.1 = ::core::option::Option::Some(
<#element_ty as ::xso::AsXml>::as_xml_iter(item)?
);
} else {
break ::core::result::Result::Ok(::core::option::Option::None)
}
}
},
})
}
}

View file

@ -18,10 +18,14 @@ use crate::scope::{AsItemsScope, FromEventsScope};
mod attribute;
mod child;
#[cfg(feature = "minidom")]
mod element;
mod text;
use self::attribute::AttributeField;
use self::child::{ChildField, ExtractDef};
#[cfg(feature = "minidom")]
use self::element::ElementField;
use self::text::TextField;
/// Code slices necessary for declaring and initializing a temporary variable
@ -34,6 +38,33 @@ pub(crate) struct FieldTempInit {
pub(crate) init: TokenStream,
}
/// Configure how a nested field builder selects child elements.
pub(crate) enum NestedMatcher {
/// Matches a specific child element fallabily.
Selective(
/// Expression which evaluates to `Result<T, FromEventsError>`,
/// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
///
/// `T` must be the type specified in the
/// [`FieldBuilderPart::Nested::builder`] field.
TokenStream,
),
#[cfg_attr(not(feature = "minidom"), allow(dead_code))]
/// Matches any child element not matched by another matcher.
///
/// Only a single field may use this variant, otherwise an error is
/// raised during execution of the proc macro.
Fallback(
/// Expression which evaluates to `T` (or `return`s an error),
/// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
///
/// `T` must be the type specified in the
/// [`FieldBuilderPart::Nested::builder`] field.
TokenStream,
),
}
/// Describe how a struct or enum variant's member is parsed from XML data.
///
/// This struct is returned from [`FieldDef::make_builder_part`] and
@ -73,12 +104,9 @@ pub(crate) enum FieldBuilderPart {
/// parsing.
value: FieldTempInit,
/// Expression which evaluates to `Result<T, FromEventsError>`,
/// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
///
/// `T` must be the type specified in the
/// [`Self::Nested::builder`] field.
matcher: TokenStream,
/// Configure child matching behaviour for this field. See
/// [`NestedMatcher`] for options.
matcher: NestedMatcher,
/// Type implementing `xso::FromEventsBuilder` which parses the child
/// element.
@ -343,6 +371,31 @@ fn new_field(
}),
}))
}
#[cfg(feature = "minidom")]
XmlFieldMeta::Element { span, amount } => {
match amount {
Some(AmountConstraint::Any(_)) => (),
Some(AmountConstraint::FixedSingle(span)) => {
return Err(Error::new(
span,
"only `n = ..` is supported for #[xml(element)]` currently",
))
}
None => return Err(Error::new(span, "`n` must be set to `..` currently")),
}
Ok(Box::new(ElementField))
}
#[cfg(not(feature = "minidom"))]
XmlFieldMeta::Element { span, amount } => {
let _ = amount;
Err(Error::new(
span,
"#[xml(element)] requires xso to be built with the \"minidom\" feature.",
))
}
}
}
@ -351,6 +404,9 @@ fn new_field(
/// See [`Compound`][`crate::compound::Compound`] for more information on
/// compounds in general.
pub(crate) struct FieldDef {
/// A span which refers to the field's definition.
span: Span,
/// The member identifying the field.
member: Member,
@ -388,6 +444,7 @@ impl FieldDef {
let ty = field.ty.clone();
Ok(Self {
span: field_span,
inner: new_field(meta, ident, &ty, container_namespace)?,
member,
ty,
@ -406,6 +463,7 @@ impl FieldDef {
) -> Result<Self> {
let span = meta.span();
Ok(Self {
span,
member: Member::Unnamed(Index { index, span }),
ty: ty.clone(),
inner: new_field(meta, None, ty, container_namespace)?,
@ -454,4 +512,9 @@ impl FieldDef {
pub(crate) fn is_text_field(&self) -> bool {
self.inner.captures_text()
}
/// Return a span which points at the field's definition.'
pub(crate) fn span(&self) -> Span {
self.span
}
}

View file

@ -686,6 +686,17 @@ pub(crate) enum XmlFieldMeta {
/// The `fields` nested meta.
fields: Vec<XmlFieldMeta>,
},
/// `#[xml(element)]`
Element {
/// The span of the `#[xml(element)]` meta from which this was parsed.
///
/// This is useful for error messages.
span: Span,
/// The `n` flag.
amount: Option<AmountConstraint>,
},
}
impl XmlFieldMeta {
@ -906,6 +917,26 @@ impl XmlFieldMeta {
})
}
/// Parse a `#[xml(element)]` meta.
fn element_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
let mut amount = None;
meta.parse_nested_meta(|meta| {
if meta.path.is_ident("n") {
if amount.is_some() {
return Err(Error::new_spanned(meta.path, "duplicate `n` key"));
}
amount = Some(meta.value()?.parse()?);
Ok(())
} else {
Err(Error::new_spanned(meta.path, "unsupported key"))
}
})?;
Ok(Self::Element {
span: meta.path.span(),
amount,
})
}
/// Parse [`Self`] from a nestd meta, switching on the identifier
/// of that nested meta.
fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
@ -917,6 +948,8 @@ impl XmlFieldMeta {
Self::child_from_meta(meta)
} else if meta.path.is_ident("extract") {
Self::extract_from_meta(meta)
} else if meta.path.is_ident("element") {
Self::element_from_meta(meta)
} else {
Err(Error::new_spanned(meta.path, "unsupported field meta"))
}
@ -998,6 +1031,7 @@ impl XmlFieldMeta {
Self::Child { ref span, .. } => *span,
Self::Text { ref span, .. } => *span,
Self::Extract { ref span, .. } => *span,
Self::Element { ref span, .. } => *span,
}
}

View file

@ -783,3 +783,36 @@ pub(crate) fn option_as_xml_ty(inner_ty: Type) -> Type {
},
})
}
/// Construct a [`syn::Type`] referring to `::xso::exports::minidom::Element`.
#[cfg(feature = "minidom")]
pub(crate) fn element_ty(span: Span) -> Type {
Type::Path(TypePath {
qself: None,
path: Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
segments: [
PathSegment {
ident: Ident::new("xso", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("exports", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("minidom", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("Element", span),
arguments: PathArguments::None,
},
]
.into_iter()
.collect(),
},
})
}

View file

@ -20,6 +20,8 @@ Version NEXT:
- Support for deriving FromXml and AsXml on enums.
- Support for extracting data from child elements without intermediate
structs.
- Support for collecting all unknown children in a single field as
collection of `minidom::Element`.
Version 0.1.2:
2024-07-26 Jonas Schäfer <jonas@zombofant.net>

View file

@ -150,6 +150,7 @@ The following mapping types are defined:
| --- | --- |
| [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element |
| [`child`](#child-meta) | Map the field to a child element |
| [`element`](#element-meta) | Map the field to a child element as [`minidom::Element`] |
| [`extract`](#extract-meta) | Map the field to contents of a child element of specified structure |
| [`text`](#text-meta) | Map the field to the text content of the struct's element |
@ -307,6 +308,63 @@ assert_eq!(parent, Parent {
});
```
#### `element` meta
The `element` meta causes the field to be mapped to child elements, stored as
a container containing [`minidom::Element`] instances.
This meta is only available if `xso` is being built with the `"minidom"`
feature.
The following keys can be used inside the `#[xml(extract(..))]` meta:
| Key | Value type | Description |
| --- | --- | --- |
| `n` | `..` | Must be set to the value `..`. |
The `n` parameter will, in the future, support values other than `..`. In
order to provide a non-breaking path into that future, it must be set to the
value `..` right now, indicating that an arbitrary number of elements may be
collected by this meta.
The field's type must be a collection of `minidom::Element`. It must thus
implement
[`IntoIterator<Item = minidom::Element>`][`core::iter::IntoIterator`]. In
addition, the field's type must implement
[`Extend<minidom::Element>`][`core::iter::Extend`] to derive `FromXml` and the
field's reference type must implement
`IntoIterator<Item = &'_ minidom::Element>` to derive `AsXml`.
Fields with the `element` meta are deserialised with the lowest priority.
While other fields are processed in the order they are declared, `element`
fields may capture arbitrary child elements, so they are considered as the
last choice when no other field matched a given child element. In addition,
it is not allowed to have more than one field in any given struct with the
`#[xml(element)]` meta.
##### Example
```rust
# #[cfg(feature = "minidom")]
# {
# use xso::FromXml;
# use xso::exports::minidom;
#[derive(FromXml, Debug, PartialEq)]
#[xml(namespace = "urn:example", name = "parent")]
struct Parent {
#[xml(element(n = ..))]
misc: Vec<minidom::Element>,
}
let parent: Parent = xso::from_bytes(b"<parent
xmlns='urn:example'
><child-a/><child-b/><child-a/></parent>").unwrap();
assert_eq!(parent.misc[0].name(), "child-a");
assert_eq!(parent.misc[1].name(), "child-b");
assert_eq!(parent.misc[2].name(), "child-a");
# }
```
#### `extract` meta
The `extract` meta causes the field to be mapped to the *contents* of a child

View file

@ -309,6 +309,41 @@ pub struct ElementFromEvents {
nested: Option<Box<ElementFromEvents>>,
}
impl ElementFromEvents {
/// Construct a new builder from an element header.
///
/// Unlike the [`FromXml::from_events`] implementation on
/// [`minidom::Element`], this is contractually infallible. Using this may
/// thus save you an `unwrap()` call.
pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
let mut prefixes = SimpleNamespaces::new();
let mut builder = Element::builder(qname.1, qname.0);
for ((namespace, name), value) in attrs.into_iter() {
if namespace.is_none() {
builder = builder.attr(name, value);
} else {
let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
let name = prefix.with_suffix(&name);
if is_new {
builder = builder
.prefix(
Some(prefix.as_str().to_owned()),
namespace.as_str().to_owned(),
)
.unwrap();
}
builder = builder.attr(name, value);
}
}
let element = builder.build();
Self {
inner: Some(element),
nested: None,
}
}
}
impl FromEventsBuilder for ElementFromEvents {
type Output = minidom::Element;
@ -356,31 +391,7 @@ impl FromXml for Element {
qname: rxml::QName,
attrs: rxml::AttrMap,
) -> Result<Self::Builder, FromEventsError> {
let mut prefixes = SimpleNamespaces::new();
let mut builder = Element::builder(qname.1, qname.0);
for ((namespace, name), value) in attrs.into_iter() {
if namespace.is_none() {
builder = builder.attr(name, value);
} else {
let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
let name = prefix.with_suffix(&name);
if is_new {
builder = builder
.prefix(
Some(prefix.as_str().to_owned()),
namespace.as_str().to_owned(),
)
.unwrap();
}
builder = builder.attr(name, value);
}
}
let element = builder.build();
Ok(Self::Builder {
inner: Some(element),
nested: None,
})
Ok(Self::Builder::new(qname, attrs))
}
}