From b0803f831b2eb8c3fa57e56a26f19ed5285f0f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Wed, 26 Jun 2024 17:54:36 +0200 Subject: [PATCH] xso-proc: add support for parsing text content --- parsers/src/util/macro_tests.rs | 34 +++++++++++++++ xso-proc/src/compound.rs | 76 ++++++++++++++++++++++++++++++--- xso-proc/src/field.rs | 70 +++++++++++++++++++++++++++--- xso-proc/src/meta.rs | 11 +++++ xso-proc/src/scope.rs | 30 +++++++++++++ xso-proc/src/types.rs | 65 ++++++++++++++++++++++++++++ xso/src/from_xml_doc.md | 26 +++++++++++ 7 files changed, 301 insertions(+), 11 deletions(-) diff --git a/parsers/src/util/macro_tests.rs b/parsers/src/util/macro_tests.rs index 6ddae995..986e9e6e 100644 --- a/parsers/src/util/macro_tests.rs +++ b/parsers/src/util/macro_tests.rs @@ -380,3 +380,37 @@ fn default_attribute_roundtrip_pp() { }; roundtrip_full::(""); } + +#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)] +#[xml(namespace = NS1, name = "text")] +struct TextString { + #[xml(text)] + text: String, +} + +#[test] +fn text_string_roundtrip() { + #[allow(unused_imports)] + use std::{ + option::Option::{None, Some}, + result::Result::{Err, Ok}, + }; + roundtrip_full::("hello world!"); +} + +#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)] +#[xml(namespace = NS1, name = "text")] +struct TextNonString { + #[xml(text)] + text: u32, +} + +#[test] +fn text_non_string_roundtrip() { + #[allow(unused_imports)] + use std::{ + option::Option::{None, Some}, + result::Result::{Err, Ok}, + }; + roundtrip_full::("123456"); +} diff --git a/xso-proc/src/compound.rs b/xso-proc/src/compound.rs index 42b51b36..b823a4d4 100644 --- a/xso-proc/src/compound.rs +++ b/xso-proc/src/compound.rs @@ -56,10 +56,14 @@ impl Compound { state_prefix: &str, ) -> Result { let scope = FromEventsScope::new(); - let FromEventsScope { ref attrs, .. } = scope; + let FromEventsScope { + ref attrs, + ref builder_data_ident, + ref text, + .. + } = scope; let default_state_ident = quote::format_ident!("{}Default", state_prefix); - let builder_data_ident = quote::format_ident!("__data"); let builder_data_ty: Type = TypePath { qself: None, path: quote::format_ident!("{}Data{}", state_ty_ident, state_prefix).into(), @@ -70,6 +74,7 @@ impl Compound { let mut builder_data_def = TokenStream::default(); let mut builder_data_init = TokenStream::default(); let mut output_cons = TokenStream::default(); + let mut text_handler = None; for field in self.fields.iter() { let member = field.member(); @@ -92,9 +97,45 @@ impl Compound { #member: #builder_data_ident.#builder_field_name, }); } + + FieldBuilderPart::Text { + value: FieldTempInit { ty, init }, + collect, + finalize, + } => { + if text_handler.is_some() { + return Err(Error::new_spanned( + field.member(), + "more than one field attempts to collect text data", + )); + } + + builder_data_def.extend(quote! { + #builder_field_name: #ty, + }); + builder_data_init.extend(quote! { + #builder_field_name: #init, + }); + text_handler = Some(quote! { + #collect + ::core::result::Result::Ok(::std::ops::ControlFlow::Break( + Self::#default_state_ident { #builder_data_ident } + )) + }); + output_cons.extend(quote! { + #member: #finalize, + }); + } } } + let text_handler = match text_handler { + Some(v) => v, + None => quote! { + ::core::result::Result::Err(::xso::error::Error::Other("Unexpected text content".into())) + }, + }; + let unknown_attr_err = format!("Unknown attribute in {}.", output_name); let unknown_child_err = format!("Unknown child in {}.", output_name); @@ -121,8 +162,8 @@ impl Compound { ::xso::exports::rxml::Event::StartElement(..) => { ::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err)) } - ::xso::exports::rxml::Event::Text(..) => { - ::core::result::Result::Err(::xso::error::Error::Other("Unexpected text content".into())) + ::xso::exports::rxml::Event::Text(_, #text) => { + #text_handler } // we ignore these: a correct parser only generates // them at document start, and there we want to indeed @@ -186,10 +227,11 @@ impl Compound { .with_field(&name_ident, &qname_ty(Span::call_site())), ); - for field in self.fields.iter() { + for (i, field) in self.fields.iter().enumerate() { let member = field.member(); let bound_name = mangle_member(member); let part = field.make_iterator_part(&scope, &bound_name)?; + let state_name = quote::format_ident!("{}Field{}", state_prefix, i); match part { FieldIteratorPart::Header { setter } => { @@ -202,6 +244,30 @@ impl Compound { }); states[0].add_field(&bound_name, field.ty()); } + + FieldIteratorPart::Text { generator } => { + // we have to make sure that we carry our data around in + // all the previous states. + for state in states.iter_mut() { + state.add_field(&bound_name, field.ty()); + } + states.push( + State::new(state_name) + .with_field(&bound_name, field.ty()) + .with_impl(quote! { + ::core::option::Option::Some(::xso::exports::rxml::Event::Text( + ::xso::exports::rxml::parser::EventMetrics::zero(), + #generator, + )) + }), + ); + destructure.extend(quote! { + #member: #bound_name, + }); + start_init.extend(quote! { + #bound_name, + }); + } } } diff --git a/xso-proc/src/field.rs b/xso-proc/src/field.rs index f84f8bd1..613a2924 100644 --- a/xso-proc/src/field.rs +++ b/xso-proc/src/field.rs @@ -6,7 +6,7 @@ //! Compound (struct or enum variant) field types -use proc_macro2::TokenStream; +use proc_macro2::{Span, TokenStream}; use quote::{quote, ToTokens}; use syn::{spanned::Spanned, *}; @@ -15,7 +15,9 @@ use rxml_validation::NcName; use crate::error_message::{self, ParentRef}; use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta}; use crate::scope::{FromEventsScope, IntoEventsScope}; -use crate::types::{default_fn, from_xml_text_fn, into_optional_xml_text_fn}; +use crate::types::{ + default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty, +}; /// Code slices necessary for declaring and initializing a temporary variable /// for parsing purposes. @@ -40,6 +42,21 @@ pub(crate) enum FieldBuilderPart { /// element's start event. value: FieldTempInit, }, + + /// Parse a field from text events. + Text { + /// Expression and type which initializes a buffer to use during + /// parsing. + value: FieldTempInit, + + /// Statement which takes text and accumulates it into the temporary + /// value declared via `value`. + collect: TokenStream, + + /// Expression which evaluates to the field's type, consuming the + /// temporary value. + finalize: TokenStream, + }, } /// Describe how a struct or enum variant's member is converted to XML data. @@ -56,6 +73,13 @@ pub(crate) enum FieldIteratorPart { /// field's value. setter: TokenStream, }, + + /// The field is emitted as text event. + Text { + /// An expression which consumes the field's value and returns a + /// String, which is then emitted as text data. + generator: TokenStream, + }, } /// Specify how the field is mapped to XML. @@ -72,6 +96,9 @@ enum FieldKind { // attribute is absent. default_: Flag, }, + + /// The field maps to the character data of the element. + Text, } impl FieldKind { @@ -115,6 +142,8 @@ impl FieldKind { default_, }) } + + XmlFieldMeta::Text => Ok(Self::Text), } } } @@ -215,7 +244,7 @@ impl FieldDef { } }; - return Ok(FieldBuilderPart::Init { + Ok(FieldBuilderPart::Init { value: FieldTempInit { init: quote! { match #attrs.remove(#xml_namespace, #xml_name).map(#from_xml_text).transpose()? { @@ -225,7 +254,26 @@ impl FieldDef { }, ty: self.ty.clone(), }, - }); + }) + } + + FieldKind::Text => { + let FromEventsScope { ref text, .. } = scope; + let field_access = scope.access_field(&self.member); + let from_xml_text = from_xml_text_fn(self.ty.clone()); + + Ok(FieldBuilderPart::Text { + value: FieldTempInit { + init: quote! { ::std::string::String::new() }, + ty: string_ty(Span::call_site()), + }, + collect: quote! { + #field_access.push_str(#text.as_str()); + }, + finalize: quote! { + #from_xml_text(#field_access)? + }, + }) } } } @@ -256,7 +304,7 @@ impl FieldDef { let into_optional_xml_text = into_optional_xml_text_fn(self.ty.clone()); - return Ok(FieldIteratorPart::Header { + Ok(FieldIteratorPart::Header { // This is a neat little trick: // Option::from(x) converts x to an Option *unless* it // already is an Option<_>. @@ -267,7 +315,17 @@ impl FieldDef { #bound_name, )); }, - }); + }) + } + + FieldKind::Text => { + let into_xml_text = into_xml_text_fn(self.ty.clone()); + + Ok(FieldIteratorPart::Text { + generator: quote! { + #into_xml_text(#bound_name)? + }, + }) } } } diff --git a/xso-proc/src/meta.rs b/xso-proc/src/meta.rs index 19cee15e..51829d81 100644 --- a/xso-proc/src/meta.rs +++ b/xso-proc/src/meta.rs @@ -294,6 +294,7 @@ fn parse_prefixed_name( /// Contents of an `#[xml(..)]` attribute on a struct or enum variant member. #[derive(Debug)] pub(crate) enum XmlFieldMeta { + /// `#[xml(attribute)]`, `#[xml(attribute = ..)]` or `#[xml(attribute(..))]` Attribute { /// The span of the `#[xml(attribute)]` meta from which this was parsed. /// @@ -309,6 +310,9 @@ pub(crate) enum XmlFieldMeta { /// The `default` flag. default_: Flag, }, + + /// `#[xml(text)]` + Text, } impl XmlFieldMeta { @@ -388,11 +392,18 @@ impl XmlFieldMeta { } } + /// Parse a `#[xml(text)]` meta. + fn text_from_meta(_: ParseNestedMeta<'_>) -> Result { + Ok(Self::Text) + } + /// Parse [`Self`] from a nestd meta, switching on the identifier /// of that nested meta. fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result { if meta.path.is_ident("attribute") { Self::attribute_from_meta(meta) + } else if meta.path.is_ident("text") { + Self::text_from_meta(meta) } else { Err(Error::new_spanned(meta.path, "unsupported field meta")) } diff --git a/xso-proc/src/scope.rs b/xso-proc/src/scope.rs index 2a2315c6..1a4fbdc9 100644 --- a/xso-proc/src/scope.rs +++ b/xso-proc/src/scope.rs @@ -29,6 +29,17 @@ pub(crate) struct FromEventsScope { /// Accesses the `AttrMap` from code in /// [`crate::field::FieldBuilderPart::Init`]. pub(crate) attrs: Ident, + + /// Accesses the `String` of a `rxml::Event::Text` event from code in + /// [`crate::field::FieldBuilderPart::Text`]. + pub(crate) text: Ident, + + /// Accesses the builder data during parsing. + /// + /// This should not be used directly outside [`crate::compound`]. Most of + /// the time, using [`Self::access_field`] is the correct way to access + /// the builder data. + pub(crate) builder_data_ident: Ident, } impl FromEventsScope { @@ -38,8 +49,27 @@ impl FromEventsScope { // well-known identifiers from scratch all the time. Self { attrs: Ident::new("attrs", Span::call_site()), + text: Ident::new("__xso_proc_macro_text_data", Span::call_site()), + builder_data_ident: Ident::new("__xso_proc_macro_builder_data", Span::call_site()), } } + + /// Generate an expression which accesses the temporary value for the + /// given `member` during parsing. + pub(crate) fn access_field(&self, member: &Member) -> Expr { + Expr::Field(ExprField { + attrs: Vec::new(), + base: Box::new(Expr::Path(ExprPath { + attrs: Vec::new(), + qself: None, + path: self.builder_data_ident.clone().into(), + })), + dot_token: syn::token::Dot { + spans: [Span::call_site()], + }, + member: Member::Named(mangle_member(member)), + }) + } } /// Container struct for various identifiers used throughout the generator diff --git a/xso-proc/src/types.rs b/xso-proc/src/types.rs index d2c76089..8cc3f253 100644 --- a/xso-proc/src/types.rs +++ b/xso-proc/src/types.rs @@ -155,3 +155,68 @@ pub(crate) fn default_fn(of_ty: Type) -> Expr { }, }) } + +/// Construct a [`syn::Type`] referring to `::std::string::String`. +pub(crate) fn string_ty(span: Span) -> Type { + Type::Path(TypePath { + qself: None, + path: Path { + leading_colon: Some(syn::token::PathSep { + spans: [span, span], + }), + segments: [ + PathSegment { + ident: Ident::new("std", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("string", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("String", span), + arguments: PathArguments::None, + }, + ] + .into_iter() + .collect(), + }, + }) +} + +/// Construct a [`syn::Expr`] referring to +/// `<#ty as ::xso::IntoXmlText>::into_xml_text`. +pub(crate) fn into_xml_text_fn(ty: Type) -> Expr { + let span = ty.span(); + Expr::Path(ExprPath { + attrs: Vec::new(), + qself: Some(QSelf { + lt_token: syn::token::Lt { spans: [span] }, + ty: Box::new(ty), + position: 2, + as_token: Some(syn::token::As { span }), + gt_token: syn::token::Gt { spans: [span] }, + }), + path: Path { + leading_colon: Some(syn::token::PathSep { + spans: [span, span], + }), + segments: [ + PathSegment { + ident: Ident::new("xso", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("IntoXmlText", span), + arguments: PathArguments::None, + }, + PathSegment { + ident: Ident::new("into_xml_text", span), + arguments: PathArguments::None, + }, + ] + .into_iter() + .collect(), + }, + }) +} diff --git a/xso/src/from_xml_doc.md b/xso/src/from_xml_doc.md index 283a8fac..8362f609 100644 --- a/xso/src/from_xml_doc.md +++ b/xso/src/from_xml_doc.md @@ -68,6 +68,7 @@ The following mapping types are defined: | Type | Description | | --- | --- | | [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element | +| [`text`](#text-meta) | Map the field to the text content of the struct's element | #### `attribute` meta @@ -132,3 +133,28 @@ assert_eq!(foo, Foo { e: "5".to_string(), }); ``` + +#### `text` meta + +The `text` meta causes the field to be mapped to the text content of the +element. For `FromXml`, the field's type must implement [`FromXmlText`] and +for `IntoXml`, the field's type must implement [`IntoXmlText`]. + +The `text` meta supports no options or value. + +##### Example + +```rust +# use xso::FromXml; +#[derive(FromXml, Debug, PartialEq)] +#[xml(namespace = "urn:example", name = "foo")] +struct Foo { + #[xml(text)] + a: String, +}; + +let foo: Foo = xso::from_bytes(b"hello").unwrap(); +assert_eq!(foo, Foo { + a: "hello".to_string(), +}); +```