xso-proc: add support for parsing text content

This commit is contained in:
Jonas Schäfer 2024-06-26 17:54:36 +02:00
parent 92e69cf59f
commit b0803f831b
7 changed files with 301 additions and 11 deletions

View file

@ -380,3 +380,37 @@ fn default_attribute_roundtrip_pp() {
};
roundtrip_full::<DefaultAttribute>("<attr xmlns='urn:example:ns1' foo='xyz' bar='16'/>");
}
#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
#[xml(namespace = NS1, name = "text")]
struct TextString {
#[xml(text)]
text: String,
}
#[test]
fn text_string_roundtrip() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<TextString>("<text xmlns='urn:example:ns1'>hello world!</text>");
}
#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
#[xml(namespace = NS1, name = "text")]
struct TextNonString {
#[xml(text)]
text: u32,
}
#[test]
fn text_non_string_roundtrip() {
#[allow(unused_imports)]
use std::{
option::Option::{None, Some},
result::Result::{Err, Ok},
};
roundtrip_full::<TextNonString>("<text xmlns='urn:example:ns1'>123456</text>");
}

View file

@ -56,10 +56,14 @@ impl Compound {
state_prefix: &str,
) -> Result<FromEventsSubmachine> {
let scope = FromEventsScope::new();
let FromEventsScope { ref attrs, .. } = scope;
let FromEventsScope {
ref attrs,
ref builder_data_ident,
ref text,
..
} = scope;
let default_state_ident = quote::format_ident!("{}Default", state_prefix);
let builder_data_ident = quote::format_ident!("__data");
let builder_data_ty: Type = TypePath {
qself: None,
path: quote::format_ident!("{}Data{}", state_ty_ident, state_prefix).into(),
@ -70,6 +74,7 @@ impl Compound {
let mut builder_data_def = TokenStream::default();
let mut builder_data_init = TokenStream::default();
let mut output_cons = TokenStream::default();
let mut text_handler = None;
for field in self.fields.iter() {
let member = field.member();
@ -92,9 +97,45 @@ impl Compound {
#member: #builder_data_ident.#builder_field_name,
});
}
FieldBuilderPart::Text {
value: FieldTempInit { ty, init },
collect,
finalize,
} => {
if text_handler.is_some() {
return Err(Error::new_spanned(
field.member(),
"more than one field attempts to collect text data",
));
}
builder_data_def.extend(quote! {
#builder_field_name: #ty,
});
builder_data_init.extend(quote! {
#builder_field_name: #init,
});
text_handler = Some(quote! {
#collect
::core::result::Result::Ok(::std::ops::ControlFlow::Break(
Self::#default_state_ident { #builder_data_ident }
))
});
output_cons.extend(quote! {
#member: #finalize,
});
}
}
}
let text_handler = match text_handler {
Some(v) => v,
None => quote! {
::core::result::Result::Err(::xso::error::Error::Other("Unexpected text content".into()))
},
};
let unknown_attr_err = format!("Unknown attribute in {}.", output_name);
let unknown_child_err = format!("Unknown child in {}.", output_name);
@ -121,8 +162,8 @@ impl Compound {
::xso::exports::rxml::Event::StartElement(..) => {
::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err))
}
::xso::exports::rxml::Event::Text(..) => {
::core::result::Result::Err(::xso::error::Error::Other("Unexpected text content".into()))
::xso::exports::rxml::Event::Text(_, #text) => {
#text_handler
}
// we ignore these: a correct parser only generates
// them at document start, and there we want to indeed
@ -186,10 +227,11 @@ impl Compound {
.with_field(&name_ident, &qname_ty(Span::call_site())),
);
for field in self.fields.iter() {
for (i, field) in self.fields.iter().enumerate() {
let member = field.member();
let bound_name = mangle_member(member);
let part = field.make_iterator_part(&scope, &bound_name)?;
let state_name = quote::format_ident!("{}Field{}", state_prefix, i);
match part {
FieldIteratorPart::Header { setter } => {
@ -202,6 +244,30 @@ impl Compound {
});
states[0].add_field(&bound_name, field.ty());
}
FieldIteratorPart::Text { generator } => {
// we have to make sure that we carry our data around in
// all the previous states.
for state in states.iter_mut() {
state.add_field(&bound_name, field.ty());
}
states.push(
State::new(state_name)
.with_field(&bound_name, field.ty())
.with_impl(quote! {
::core::option::Option::Some(::xso::exports::rxml::Event::Text(
::xso::exports::rxml::parser::EventMetrics::zero(),
#generator,
))
}),
);
destructure.extend(quote! {
#member: #bound_name,
});
start_init.extend(quote! {
#bound_name,
});
}
}
}

View file

@ -6,7 +6,7 @@
//! Compound (struct or enum variant) field types
use proc_macro2::TokenStream;
use proc_macro2::{Span, TokenStream};
use quote::{quote, ToTokens};
use syn::{spanned::Spanned, *};
@ -15,7 +15,9 @@ use rxml_validation::NcName;
use crate::error_message::{self, ParentRef};
use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta};
use crate::scope::{FromEventsScope, IntoEventsScope};
use crate::types::{default_fn, from_xml_text_fn, into_optional_xml_text_fn};
use crate::types::{
default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty,
};
/// Code slices necessary for declaring and initializing a temporary variable
/// for parsing purposes.
@ -40,6 +42,21 @@ pub(crate) enum FieldBuilderPart {
/// element's start event.
value: FieldTempInit,
},
/// Parse a field from text events.
Text {
/// Expression and type which initializes a buffer to use during
/// parsing.
value: FieldTempInit,
/// Statement which takes text and accumulates it into the temporary
/// value declared via `value`.
collect: TokenStream,
/// Expression which evaluates to the field's type, consuming the
/// temporary value.
finalize: TokenStream,
},
}
/// Describe how a struct or enum variant's member is converted to XML data.
@ -56,6 +73,13 @@ pub(crate) enum FieldIteratorPart {
/// field's value.
setter: TokenStream,
},
/// The field is emitted as text event.
Text {
/// An expression which consumes the field's value and returns a
/// String, which is then emitted as text data.
generator: TokenStream,
},
}
/// Specify how the field is mapped to XML.
@ -72,6 +96,9 @@ enum FieldKind {
// attribute is absent.
default_: Flag,
},
/// The field maps to the character data of the element.
Text,
}
impl FieldKind {
@ -115,6 +142,8 @@ impl FieldKind {
default_,
})
}
XmlFieldMeta::Text => Ok(Self::Text),
}
}
}
@ -215,7 +244,7 @@ impl FieldDef {
}
};
return Ok(FieldBuilderPart::Init {
Ok(FieldBuilderPart::Init {
value: FieldTempInit {
init: quote! {
match #attrs.remove(#xml_namespace, #xml_name).map(#from_xml_text).transpose()? {
@ -225,7 +254,26 @@ impl FieldDef {
},
ty: self.ty.clone(),
},
});
})
}
FieldKind::Text => {
let FromEventsScope { ref text, .. } = scope;
let field_access = scope.access_field(&self.member);
let from_xml_text = from_xml_text_fn(self.ty.clone());
Ok(FieldBuilderPart::Text {
value: FieldTempInit {
init: quote! { ::std::string::String::new() },
ty: string_ty(Span::call_site()),
},
collect: quote! {
#field_access.push_str(#text.as_str());
},
finalize: quote! {
#from_xml_text(#field_access)?
},
})
}
}
}
@ -256,7 +304,7 @@ impl FieldDef {
let into_optional_xml_text = into_optional_xml_text_fn(self.ty.clone());
return Ok(FieldIteratorPart::Header {
Ok(FieldIteratorPart::Header {
// This is a neat little trick:
// Option::from(x) converts x to an Option<T> *unless* it
// already is an Option<_>.
@ -267,7 +315,17 @@ impl FieldDef {
#bound_name,
));
},
});
})
}
FieldKind::Text => {
let into_xml_text = into_xml_text_fn(self.ty.clone());
Ok(FieldIteratorPart::Text {
generator: quote! {
#into_xml_text(#bound_name)?
},
})
}
}
}

View file

@ -294,6 +294,7 @@ fn parse_prefixed_name(
/// Contents of an `#[xml(..)]` attribute on a struct or enum variant member.
#[derive(Debug)]
pub(crate) enum XmlFieldMeta {
/// `#[xml(attribute)]`, `#[xml(attribute = ..)]` or `#[xml(attribute(..))]`
Attribute {
/// The span of the `#[xml(attribute)]` meta from which this was parsed.
///
@ -309,6 +310,9 @@ pub(crate) enum XmlFieldMeta {
/// The `default` flag.
default_: Flag,
},
/// `#[xml(text)]`
Text,
}
impl XmlFieldMeta {
@ -388,11 +392,18 @@ impl XmlFieldMeta {
}
}
/// Parse a `#[xml(text)]` meta.
fn text_from_meta(_: ParseNestedMeta<'_>) -> Result<Self> {
Ok(Self::Text)
}
/// Parse [`Self`] from a nestd meta, switching on the identifier
/// of that nested meta.
fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
if meta.path.is_ident("attribute") {
Self::attribute_from_meta(meta)
} else if meta.path.is_ident("text") {
Self::text_from_meta(meta)
} else {
Err(Error::new_spanned(meta.path, "unsupported field meta"))
}

View file

@ -29,6 +29,17 @@ pub(crate) struct FromEventsScope {
/// Accesses the `AttrMap` from code in
/// [`crate::field::FieldBuilderPart::Init`].
pub(crate) attrs: Ident,
/// Accesses the `String` of a `rxml::Event::Text` event from code in
/// [`crate::field::FieldBuilderPart::Text`].
pub(crate) text: Ident,
/// Accesses the builder data during parsing.
///
/// This should not be used directly outside [`crate::compound`]. Most of
/// the time, using [`Self::access_field`] is the correct way to access
/// the builder data.
pub(crate) builder_data_ident: Ident,
}
impl FromEventsScope {
@ -38,8 +49,27 @@ impl FromEventsScope {
// well-known identifiers from scratch all the time.
Self {
attrs: Ident::new("attrs", Span::call_site()),
text: Ident::new("__xso_proc_macro_text_data", Span::call_site()),
builder_data_ident: Ident::new("__xso_proc_macro_builder_data", Span::call_site()),
}
}
/// Generate an expression which accesses the temporary value for the
/// given `member` during parsing.
pub(crate) fn access_field(&self, member: &Member) -> Expr {
Expr::Field(ExprField {
attrs: Vec::new(),
base: Box::new(Expr::Path(ExprPath {
attrs: Vec::new(),
qself: None,
path: self.builder_data_ident.clone().into(),
})),
dot_token: syn::token::Dot {
spans: [Span::call_site()],
},
member: Member::Named(mangle_member(member)),
})
}
}
/// Container struct for various identifiers used throughout the generator

View file

@ -155,3 +155,68 @@ pub(crate) fn default_fn(of_ty: Type) -> Expr {
},
})
}
/// Construct a [`syn::Type`] referring to `::std::string::String`.
pub(crate) fn string_ty(span: Span) -> Type {
Type::Path(TypePath {
qself: None,
path: Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
segments: [
PathSegment {
ident: Ident::new("std", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("string", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("String", span),
arguments: PathArguments::None,
},
]
.into_iter()
.collect(),
},
})
}
/// Construct a [`syn::Expr`] referring to
/// `<#ty as ::xso::IntoXmlText>::into_xml_text`.
pub(crate) fn into_xml_text_fn(ty: Type) -> Expr {
let span = ty.span();
Expr::Path(ExprPath {
attrs: Vec::new(),
qself: Some(QSelf {
lt_token: syn::token::Lt { spans: [span] },
ty: Box::new(ty),
position: 2,
as_token: Some(syn::token::As { span }),
gt_token: syn::token::Gt { spans: [span] },
}),
path: Path {
leading_colon: Some(syn::token::PathSep {
spans: [span, span],
}),
segments: [
PathSegment {
ident: Ident::new("xso", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("IntoXmlText", span),
arguments: PathArguments::None,
},
PathSegment {
ident: Ident::new("into_xml_text", span),
arguments: PathArguments::None,
},
]
.into_iter()
.collect(),
},
})
}

View file

@ -68,6 +68,7 @@ The following mapping types are defined:
| Type | Description |
| --- | --- |
| [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element |
| [`text`](#text-meta) | Map the field to the text content of the struct's element |
#### `attribute` meta
@ -132,3 +133,28 @@ assert_eq!(foo, Foo {
e: "5".to_string(),
});
```
#### `text` meta
The `text` meta causes the field to be mapped to the text content of the
element. For `FromXml`, the field's type must implement [`FromXmlText`] and
for `IntoXml`, the field's type must implement [`IntoXmlText`].
The `text` meta supports no options or value.
##### Example
```rust
# use xso::FromXml;
#[derive(FromXml, Debug, PartialEq)]
#[xml(namespace = "urn:example", name = "foo")]
struct Foo {
#[xml(text)]
a: String,
};
let foo: Foo = xso::from_bytes(b"<foo xmlns='urn:example'>hello</foo>").unwrap();
assert_eq!(foo, Foo {
a: "hello".to_string(),
});
```