From 0361b5905b8a1d37bb8797280fa67ae1afbdb83b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Mon, 5 Aug 2024 08:20:25 +0200 Subject: [PATCH] xso: implement catch-all for unknown elements --- parsers/src/util/macro_tests.rs | 19 ++++++ xso-proc/src/compound.rs | 62 ++++++++++++++---- xso-proc/src/field/child.rs | 8 +-- xso-proc/src/field/element.rs | 110 ++++++++++++++++++++++++++++++++ xso-proc/src/field/mod.rs | 75 ++++++++++++++++++++-- xso-proc/src/meta.rs | 34 ++++++++++ xso-proc/src/types.rs | 33 ++++++++++ xso/ChangeLog | 2 + xso/src/from_xml_doc.md | 58 +++++++++++++++++ xso/src/minidom_compat.rs | 61 ++++++++++-------- 10 files changed, 413 insertions(+), 49 deletions(-) create mode 100644 xso-proc/src/field/element.rs diff --git a/parsers/src/util/macro_tests.rs b/parsers/src/util/macro_tests.rs index 8e457f7c..7a9ecd3d 100644 --- a/parsers/src/util/macro_tests.rs +++ b/parsers/src/util/macro_tests.rs @@ -1551,3 +1551,22 @@ fn optional_attribute_optional_extract_double_option_roundtrip_absent_child() { "", ) } + +#[derive(FromXml, AsXml, PartialEq, Debug, Clone)] +#[xml(namespace = NS1, name = "parent")] +struct ElementCatchall { + #[xml(element(n = ..))] + children: Vec<::minidom::Element>, +} + +#[test] +fn element_catchall_roundtrip() { + #[allow(unused_imports)] + use std::{ + option::Option::{None, Some}, + result::Result::{Err, Ok}, + }; + roundtrip_full::( + "", + ) +} diff --git a/xso-proc/src/compound.rs b/xso-proc/src/compound.rs index bf8ec8e1..1753a9d4 100644 --- a/xso-proc/src/compound.rs +++ b/xso-proc/src/compound.rs @@ -11,7 +11,7 @@ use quote::quote; use syn::{spanned::Spanned, *}; use crate::error_message::ParentRef; -use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit}; +use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit, NestedMatcher}; use crate::meta::NamespaceRef; use crate::scope::{mangle_member, AsItemsScope, FromEventsScope}; use crate::state::{AsItemsSubmachine, FromEventsSubmachine, State}; @@ -109,6 +109,7 @@ impl Compound { let mut builder_data_init = TokenStream::default(); let mut output_cons = TokenStream::default(); let mut child_matchers = TokenStream::default(); + let mut fallback_child_matcher = None; let mut text_handler = None; let mut extra_defs = TokenStream::default(); let is_tuple = !output_name.is_path(); @@ -219,18 +220,44 @@ impl Compound { #builder_field_name: #init, }); - child_matchers.extend(quote! { - let (name, attrs) = match #matcher { - ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs), - ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(e), - ::core::result::Result::Ok(#substate_data) => { - return ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name { - #builder_data_ident, - #substate_data, - })) + match matcher { + NestedMatcher::Selective(matcher) => { + child_matchers.extend(quote! { + let (name, attrs) = match #matcher { + ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs), + ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(e), + ::core::result::Result::Ok(#substate_data) => { + return ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name { + #builder_data_ident, + #substate_data, + })) + } + }; + }); + } + NestedMatcher::Fallback(matcher) => { + if let Some((span, _)) = fallback_child_matcher.as_ref() { + let mut err = Error::new( + field.span(), + "more than one field is attempting to consume all unmatched child elements" + ); + err.combine(Error::new( + *span, + "the previous field collecting all unmatched child elements is here" + )); + return Err(err); } - }; - }); + + let matcher = quote! { + ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name { + #builder_data_ident, + #substate_data: { #matcher }, + })) + }; + + fallback_child_matcher = Some((field.span(), matcher)); + } + } if is_tuple { output_cons.extend(quote! { @@ -278,6 +305,14 @@ impl Compound { } }; + let child_fallback = match fallback_child_matcher { + Some((_, matcher)) => matcher, + None => quote! { + let _ = (name, attrs); + ::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err)) + }, + }; + states.push(State::new_with_builder( default_state_ident.clone(), builder_data_ident, @@ -292,8 +327,7 @@ impl Compound { } ::xso::exports::rxml::Event::StartElement(_, name, attrs) => { #child_matchers - let _ = (name, attrs); - ::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err)) + #child_fallback } ::xso::exports::rxml::Event::Text(_, #text) => { #text_handler diff --git a/xso-proc/src/field/child.rs b/xso-proc/src/field/child.rs index 5b37fb1c..9474926c 100644 --- a/xso-proc/src/field/child.rs +++ b/xso-proc/src/field/child.rs @@ -23,7 +23,7 @@ use crate::types::{ option_as_xml_ty, option_ty, ref_ty, ty_from_ident, }; -use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit}; +use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit, NestedMatcher}; /// The field maps to a child pub(super) struct ChildField { @@ -101,7 +101,7 @@ impl Field for ChildField { init: quote! { ::core::option::Option::None }, ty: option_ty(ty.clone()), }, - matcher: quote! { + matcher: NestedMatcher::Selective(quote! { match #matcher { ::core::result::Result::Ok(v) => if #field_access.is_some() { ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(::xso::error::Error::Other(#duplicate_msg))) @@ -110,7 +110,7 @@ impl Field for ChildField { }, ::core::result::Result::Err(e) => ::core::result::Result::Err(e), } - }, + }), builder, collect: quote! { #field_access = ::core::option::Option::Some(#fetch); @@ -132,7 +132,7 @@ impl Field for ChildField { init: quote! { #ty_default() }, ty: ty.clone(), }, - matcher, + matcher: NestedMatcher::Selective(matcher), builder, collect: quote! { #ty_extend(&mut #field_access, [#fetch]); diff --git a/xso-proc/src/field/element.rs b/xso-proc/src/field/element.rs new file mode 100644 index 00000000..c4e376cc --- /dev/null +++ b/xso-proc/src/field/element.rs @@ -0,0 +1,110 @@ +// Copyright (c) 2024 Jonas Schäfer +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! This module concerns the processing of untyped `minidom::Element` +//! children. +//! +//! In particular, it provides the `#[xml(element)]` implementation. + +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::*; + +use crate::error_message::ParentRef; +use crate::scope::{AsItemsScope, FromEventsScope}; +use crate::types::{ + default_fn, element_ty, from_xml_builder_ty, into_iterator_into_iter_fn, into_iterator_iter_ty, + item_iter_ty, option_ty, ref_ty, +}; + +use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit, NestedMatcher}; + +pub(super) struct ElementField; + +impl Field for ElementField { + fn make_builder_part( + &self, + scope: &FromEventsScope, + _container_name: &ParentRef, + member: &Member, + ty: &Type, + ) -> Result { + let FromEventsScope { + ref substate_result, + .. + } = scope; + let field_access = scope.access_field(member); + + let element_ty = element_ty(Span::call_site()); + let default_fn = default_fn(ty.clone()); + let builder = from_xml_builder_ty(element_ty.clone()); + + Ok(FieldBuilderPart::Nested { + extra_defs: TokenStream::default(), + value: FieldTempInit { + init: quote! { #default_fn() }, + ty: ty.clone(), + }, + matcher: NestedMatcher::Fallback(quote! { + #builder::new(name, attrs) + }), + builder, + collect: quote! { + <#ty as ::core::iter::Extend::<#element_ty>>::extend(&mut #field_access, [#substate_result]); + }, + finalize: quote! { + #field_access + }, + }) + } + + fn make_iterator_part( + &self, + scope: &AsItemsScope, + _container_name: &ParentRef, + bound_name: &Ident, + _member: &Member, + ty: &Type, + ) -> Result { + let AsItemsScope { ref lifetime, .. } = scope; + + let element_ty = element_ty(Span::call_site()); + let iter_ty = item_iter_ty(element_ty.clone(), lifetime.clone()); + let element_iter = into_iterator_iter_ty(ref_ty(ty.clone(), lifetime.clone())); + let into_iter = into_iterator_into_iter_fn(ref_ty(ty.clone(), lifetime.clone())); + + let state_ty = Type::Tuple(TypeTuple { + paren_token: token::Paren::default(), + elems: [element_iter, option_ty(iter_ty)].into_iter().collect(), + }); + + Ok(FieldIteratorPart::Content { + extra_defs: TokenStream::default(), + value: FieldTempInit { + init: quote! { + (#into_iter(#bound_name), ::core::option::Option::None) + }, + ty: state_ty, + }, + generator: quote! { + loop { + if let ::core::option::Option::Some(current) = #bound_name.1.as_mut() { + if let ::core::option::Option::Some(item) = current.next() { + break ::core::option::Option::Some(item).transpose(); + } + } + if let ::core::option::Option::Some(item) = #bound_name.0.next() { + #bound_name.1 = ::core::option::Option::Some( + <#element_ty as ::xso::AsXml>::as_xml_iter(item)? + ); + } else { + break ::core::result::Result::Ok(::core::option::Option::None) + } + } + }, + }) + } +} diff --git a/xso-proc/src/field/mod.rs b/xso-proc/src/field/mod.rs index 5b4280e6..41a7f1e6 100644 --- a/xso-proc/src/field/mod.rs +++ b/xso-proc/src/field/mod.rs @@ -18,10 +18,14 @@ use crate::scope::{AsItemsScope, FromEventsScope}; mod attribute; mod child; +#[cfg(feature = "minidom")] +mod element; mod text; use self::attribute::AttributeField; use self::child::{ChildField, ExtractDef}; +#[cfg(feature = "minidom")] +use self::element::ElementField; use self::text::TextField; /// Code slices necessary for declaring and initializing a temporary variable @@ -34,6 +38,33 @@ pub(crate) struct FieldTempInit { pub(crate) init: TokenStream, } +/// Configure how a nested field builder selects child elements. +pub(crate) enum NestedMatcher { + /// Matches a specific child element fallabily. + Selective( + /// Expression which evaluates to `Result`, + /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`. + /// + /// `T` must be the type specified in the + /// [`FieldBuilderPart::Nested::builder`] field. + TokenStream, + ), + + #[cfg_attr(not(feature = "minidom"), allow(dead_code))] + /// Matches any child element not matched by another matcher. + /// + /// Only a single field may use this variant, otherwise an error is + /// raised during execution of the proc macro. + Fallback( + /// Expression which evaluates to `T` (or `return`s an error), + /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`. + /// + /// `T` must be the type specified in the + /// [`FieldBuilderPart::Nested::builder`] field. + TokenStream, + ), +} + /// Describe how a struct or enum variant's member is parsed from XML data. /// /// This struct is returned from [`FieldDef::make_builder_part`] and @@ -73,12 +104,9 @@ pub(crate) enum FieldBuilderPart { /// parsing. value: FieldTempInit, - /// Expression which evaluates to `Result`, - /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`. - /// - /// `T` must be the type specified in the - /// [`Self::Nested::builder`] field. - matcher: TokenStream, + /// Configure child matching behaviour for this field. See + /// [`NestedMatcher`] for options. + matcher: NestedMatcher, /// Type implementing `xso::FromEventsBuilder` which parses the child /// element. @@ -343,6 +371,31 @@ fn new_field( }), })) } + + #[cfg(feature = "minidom")] + XmlFieldMeta::Element { span, amount } => { + match amount { + Some(AmountConstraint::Any(_)) => (), + Some(AmountConstraint::FixedSingle(span)) => { + return Err(Error::new( + span, + "only `n = ..` is supported for #[xml(element)]` currently", + )) + } + None => return Err(Error::new(span, "`n` must be set to `..` currently")), + } + + Ok(Box::new(ElementField)) + } + + #[cfg(not(feature = "minidom"))] + XmlFieldMeta::Element { span, amount } => { + let _ = amount; + Err(Error::new( + span, + "#[xml(element)] requires xso to be built with the \"minidom\" feature.", + )) + } } } @@ -351,6 +404,9 @@ fn new_field( /// See [`Compound`][`crate::compound::Compound`] for more information on /// compounds in general. pub(crate) struct FieldDef { + /// A span which refers to the field's definition. + span: Span, + /// The member identifying the field. member: Member, @@ -388,6 +444,7 @@ impl FieldDef { let ty = field.ty.clone(); Ok(Self { + span: field_span, inner: new_field(meta, ident, &ty, container_namespace)?, member, ty, @@ -406,6 +463,7 @@ impl FieldDef { ) -> Result { let span = meta.span(); Ok(Self { + span, member: Member::Unnamed(Index { index, span }), ty: ty.clone(), inner: new_field(meta, None, ty, container_namespace)?, @@ -454,4 +512,9 @@ impl FieldDef { pub(crate) fn is_text_field(&self) -> bool { self.inner.captures_text() } + + /// Return a span which points at the field's definition.' + pub(crate) fn span(&self) -> Span { + self.span + } } diff --git a/xso-proc/src/meta.rs b/xso-proc/src/meta.rs index ac160483..f64de026 100644 --- a/xso-proc/src/meta.rs +++ b/xso-proc/src/meta.rs @@ -686,6 +686,17 @@ pub(crate) enum XmlFieldMeta { /// The `fields` nested meta. fields: Vec, }, + + /// `#[xml(element)]` + Element { + /// The span of the `#[xml(element)]` meta from which this was parsed. + /// + /// This is useful for error messages. + span: Span, + + /// The `n` flag. + amount: Option, + }, } impl XmlFieldMeta { @@ -906,6 +917,26 @@ impl XmlFieldMeta { }) } + /// Parse a `#[xml(element)]` meta. + fn element_from_meta(meta: ParseNestedMeta<'_>) -> Result { + let mut amount = None; + meta.parse_nested_meta(|meta| { + if meta.path.is_ident("n") { + if amount.is_some() { + return Err(Error::new_spanned(meta.path, "duplicate `n` key")); + } + amount = Some(meta.value()?.parse()?); + Ok(()) + } else { + Err(Error::new_spanned(meta.path, "unsupported key")) + } + })?; + Ok(Self::Element { + span: meta.path.span(), + amount, + }) + } + /// Parse [`Self`] from a nestd meta, switching on the identifier /// of that nested meta. fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result { @@ -917,6 +948,8 @@ impl XmlFieldMeta { Self::child_from_meta(meta) } else if meta.path.is_ident("extract") { Self::extract_from_meta(meta) + } else if meta.path.is_ident("element") { + Self::element_from_meta(meta) } else { Err(Error::new_spanned(meta.path, "unsupported field meta")) } @@ -998,6 +1031,7 @@ impl XmlFieldMeta { Self::Child { ref span, .. } => *span, Self::Text { ref span, .. } => *span, Self::Extract { ref span, .. } => *span, + Self::Element { ref span, .. } => *span, } } diff --git a/xso-proc/src/types.rs b/xso-proc/src/types.rs index c9de610d..975afcfd 100644 --- a/xso-proc/src/types.rs +++ b/xso-proc/src/types.rs @@ -783,3 +783,36 @@ pub(crate) fn option_as_xml_ty(inner_ty: Type) -> Type { }, }) } + +/// Construct a [`syn::Type`] referring to `::xso::exports::minidom::Element`. +#[cfg(feature = "minidom")] +pub(crate) fn element_ty(span: Span) -> Type { + Type::Path(TypePath { + qself: None, + path: Path { + leading_colon: Some(syn::token::PathSep { + spans: [span, span], + }), + segments: [ + PathSegment { + ident: Ident::new("xso", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("exports", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("minidom", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("Element", span), + arguments: PathArguments::None, + }, + ] + .into_iter() + .collect(), + }, + }) +} diff --git a/xso/ChangeLog b/xso/ChangeLog index bcdb8e76..3f410583 100644 --- a/xso/ChangeLog +++ b/xso/ChangeLog @@ -20,6 +20,8 @@ Version NEXT: - Support for deriving FromXml and AsXml on enums. - Support for extracting data from child elements without intermediate structs. + - Support for collecting all unknown children in a single field as + collection of `minidom::Element`. Version 0.1.2: 2024-07-26 Jonas Schäfer diff --git a/xso/src/from_xml_doc.md b/xso/src/from_xml_doc.md index fcffe574..23c5e787 100644 --- a/xso/src/from_xml_doc.md +++ b/xso/src/from_xml_doc.md @@ -150,6 +150,7 @@ The following mapping types are defined: | --- | --- | | [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element | | [`child`](#child-meta) | Map the field to a child element | +| [`element`](#element-meta) | Map the field to a child element as [`minidom::Element`] | | [`extract`](#extract-meta) | Map the field to contents of a child element of specified structure | | [`text`](#text-meta) | Map the field to the text content of the struct's element | @@ -307,6 +308,63 @@ assert_eq!(parent, Parent { }); ``` +#### `element` meta + +The `element` meta causes the field to be mapped to child elements, stored as +a container containing [`minidom::Element`] instances. + +This meta is only available if `xso` is being built with the `"minidom"` +feature. + +The following keys can be used inside the `#[xml(extract(..))]` meta: + +| Key | Value type | Description | +| --- | --- | --- | +| `n` | `..` | Must be set to the value `..`. | + +The `n` parameter will, in the future, support values other than `..`. In +order to provide a non-breaking path into that future, it must be set to the +value `..` right now, indicating that an arbitrary number of elements may be +collected by this meta. + +The field's type must be a collection of `minidom::Element`. It must thus +implement +[`IntoIterator`][`core::iter::IntoIterator`]. In +addition, the field's type must implement +[`Extend`][`core::iter::Extend`] to derive `FromXml` and the +field's reference type must implement +`IntoIterator` to derive `AsXml`. + +Fields with the `element` meta are deserialised with the lowest priority. +While other fields are processed in the order they are declared, `element` +fields may capture arbitrary child elements, so they are considered as the +last choice when no other field matched a given child element. In addition, +it is not allowed to have more than one field in any given struct with the +`#[xml(element)]` meta. + +##### Example + +```rust +# #[cfg(feature = "minidom")] +# { +# use xso::FromXml; +# use xso::exports::minidom; +#[derive(FromXml, Debug, PartialEq)] +#[xml(namespace = "urn:example", name = "parent")] +struct Parent { + #[xml(element(n = ..))] + misc: Vec, +} + +let parent: Parent = xso::from_bytes(b"").unwrap(); +assert_eq!(parent.misc[0].name(), "child-a"); +assert_eq!(parent.misc[1].name(), "child-b"); +assert_eq!(parent.misc[2].name(), "child-a"); +# } +``` + #### `extract` meta The `extract` meta causes the field to be mapped to the *contents* of a child diff --git a/xso/src/minidom_compat.rs b/xso/src/minidom_compat.rs index 6f491606..f70b6b68 100644 --- a/xso/src/minidom_compat.rs +++ b/xso/src/minidom_compat.rs @@ -309,6 +309,41 @@ pub struct ElementFromEvents { nested: Option>, } +impl ElementFromEvents { + /// Construct a new builder from an element header. + /// + /// Unlike the [`FromXml::from_events`] implementation on + /// [`minidom::Element`], this is contractually infallible. Using this may + /// thus save you an `unwrap()` call. + pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self { + let mut prefixes = SimpleNamespaces::new(); + let mut builder = Element::builder(qname.1, qname.0); + for ((namespace, name), value) in attrs.into_iter() { + if namespace.is_none() { + builder = builder.attr(name, value); + } else { + let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone()); + let name = prefix.with_suffix(&name); + if is_new { + builder = builder + .prefix( + Some(prefix.as_str().to_owned()), + namespace.as_str().to_owned(), + ) + .unwrap(); + } + builder = builder.attr(name, value); + } + } + + let element = builder.build(); + Self { + inner: Some(element), + nested: None, + } + } +} + impl FromEventsBuilder for ElementFromEvents { type Output = minidom::Element; @@ -356,31 +391,7 @@ impl FromXml for Element { qname: rxml::QName, attrs: rxml::AttrMap, ) -> Result { - let mut prefixes = SimpleNamespaces::new(); - let mut builder = Element::builder(qname.1, qname.0); - for ((namespace, name), value) in attrs.into_iter() { - if namespace.is_none() { - builder = builder.attr(name, value); - } else { - let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone()); - let name = prefix.with_suffix(&name); - if is_new { - builder = builder - .prefix( - Some(prefix.as_str().to_owned()), - namespace.as_str().to_owned(), - ) - .unwrap(); - } - builder = builder.attr(name, value); - } - } - - let element = builder.build(); - Ok(Self::Builder { - inner: Some(element), - nested: None, - }) + Ok(Self::Builder::new(qname, attrs)) } }