diff --git a/Cargo.toml b/Cargo.toml index 80a8a026..6d20ee4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ # alphabetically sorted "sasl", "tokio-xmpp", "xmpp", + "xso", ] resolver = "2" diff --git a/minidom/src/element.rs b/minidom/src/element.rs index b161a996..0bb6c0b7 100644 --- a/minidom/src/element.rs +++ b/minidom/src/element.rs @@ -427,6 +427,11 @@ impl Element { Ok(()) } + /// Extracts all children into a collection. + pub fn take_nodes(&mut self) -> Vec { + self.children.drain(..).collect() + } + /// Returns an iterator over references to every child node of this element. /// /// # Examples diff --git a/xso/Cargo.toml b/xso/Cargo.toml new file mode 100644 index 00000000..944a636f --- /dev/null +++ b/xso/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "xso" +version = "0.0.2" +edition = "2021" +description = "XML Streamed Objects: similar to serde, but XML-native." +homepage = "https://xmpp.rs" +repository = "https://gitlab.com/xmpp-rs/xmpp-rs" +keywords = ["xmpp", "xml", "serialization"] +categories = ["encoding"] +license = "MPL-2.0" + +[dependencies] +rxml = { version = "0.11.0", default-features = false } +minidom = { version = "^0.15" } diff --git a/xso/src/error.rs b/xso/src/error.rs new file mode 100644 index 00000000..5da472d4 --- /dev/null +++ b/xso/src/error.rs @@ -0,0 +1,106 @@ +/*! +# Error types for XML parsing + +This module contains the error types used throughout the `xso` crate. +*/ +// Copyright (c) 2024 Jonas Schäfer +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. +use std::fmt; + +use rxml::error::XmlError; + +/// Error variants generated while parsing or serialising XML data. +#[derive(Debug)] +pub enum Error { + /// Invalid XML data encountered + XmlError(XmlError), + + /// Attempt to parse text data failed with the provided nested error. + TextParseError(Box), + + /// An element header did not match an expected element. + /// + /// This is only rarely generated: most of the time, a mismatch of element + /// types is reported as either an unexpected or a missing child element, + /// errors which are generally more specific. + TypeMismatch, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::XmlError(ref e) => write!(f, "xml parse error: {}", e), + Self::TextParseError(ref e) => write!(f, "text parse error: {}", e), + Self::TypeMismatch => f.write_str("mismatch between expected and actual XML data"), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::XmlError(ref e) => Some(e), + Self::TextParseError(ref e) => Some(&**e), + _ => None, + } + } +} + +impl From for Error { + fn from(other: rxml::error::XmlError) -> Error { + Error::XmlError(other) + } +} + +impl From for Error { + fn from(other: rxml::strings::Error) -> Error { + Error::XmlError(other.into()) + } +} + +/// Error returned from +/// [`FromXml::from_events`][`crate::FromXml::from_events`]. +#[derive(Debug)] +pub enum FromEventsError { + /// The `name` and/or `attrs` passed to `FromXml::from_events` did not + /// match the element's type. + Mismatch { + /// The `name` passed to `from_events`. + name: rxml::QName, + + /// The `attrs` passed to `from_events`. + attrs: rxml::AttrMap, + }, + + /// The `name` and `attrs` passed to `FromXml::from_events` matched the + /// element's type, but the data was invalid. Details are in the inner + /// error. + Invalid(Error), +} + +impl From for FromEventsError { + fn from(other: Error) -> Self { + Self::Invalid(other) + } +} + +impl fmt::Display for FromEventsError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Mismatch { .. } => f.write_str("element header did not match"), + Self::Invalid(ref e) => fmt::Display::fmt(e, f), + } + } +} + +impl std::error::Error for FromEventsError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Mismatch { .. } => None, + Self::Invalid(ref e) => Some(e), + } + } +} diff --git a/xso/src/lib.rs b/xso/src/lib.rs new file mode 100644 index 00000000..c38f386d --- /dev/null +++ b/xso/src/lib.rs @@ -0,0 +1,123 @@ +#![forbid(missing_docs, unsafe_code)] +/*! +# XML Streamed Objects -- serde-like parsing for XML + +This crate provides the traits for parsing XML data into Rust structs, and +vice versa. + +While it is in 0.0.x versions, many features still need to be developed, but +rest assured that there is a solid plan to get it fully usable for even +advanced XML scenarios. + +XSO is an acronym for XML Stream(ed) Objects, referring to the main field of +use of this library in parsing XML streams like specified in RFC 6120. +*/ +// Copyright (c) 2024 Jonas Schäfer +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. +pub mod error; +pub mod minidom_compat; + +/// Trait allowing to consume a struct and iterate its contents as +/// serialisable [`rxml::Event`] items. +pub trait IntoXml { + /// The iterator type. + type EventIter: Iterator>; + + /// Return an iterator which emits the contents of the struct or enum as + /// serialisable [`rxml::Event`] items. + fn into_event_iter(self) -> Result; +} + +/// Trait for a temporary object allowing to construct a struct from +/// [`rxml::Event`] items. +/// +/// Objects of this type are generally constructed through +/// [`FromXml::from_events`] and are used to build Rust structs or enums from +/// XML data. The XML data must be fed as `rxml::Event` to the +/// [`feed`][`Self::feed`] method. +pub trait FromEventsBuilder { + /// The type which will be constructed by this builder. + type Output; + + /// Feed another [`rxml::Event`] into the element construction + /// process. + /// + /// Once the construction process completes, `Ok(Some(_))` is returned. + /// When valid data has been fed but more events are needed to fully + /// construct the resulting struct, `Ok(None)` is returned. + /// + /// If the construction fails, `Err(_)` is returned. Errors are generally + /// fatal and the builder should be assumed to be broken at that point. + /// Feeding more events after an error may result in panics, errors or + /// inconsistent result data, though it may never result in unsound or + /// unsafe behaviour. + fn feed(&mut self, ev: rxml::Event) -> Result, self::error::Error>; +} + +/// Trait allowing to construct a struct from a stream of +/// [`rxml::Event`] items. +/// +/// To use this, first call [`FromXml::from_events`] with the qualified +/// name and the attributes of the corresponding +/// [`rxml::Event::StartElement`] event. If the call succeeds, the +/// returned builder object must be fed with the events representing the +/// contents of the element, and then with the `EndElement` event. +/// +/// The `StartElement` passed to `from_events` must not be passed to `feed`. +/// +/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type +/// is considered a non-breaking change for any given implementation of this +/// trait. Always refer to a type's builder type using fully-qualified +/// notation, for example: `::Builder`. +pub trait FromXml { + /// A builder type used to construct the element. + /// + /// **Important:** Changing this type is considered a non-breaking change + /// for any given implementation of this trait. Always refer to a type's + /// builder type using fully-qualified notation, for example: + /// `::Builder`. + type Builder: FromEventsBuilder; + + /// Attempt to initiate the streamed construction of this struct from XML. + /// + /// If the passed qualified `name` and `attrs` match the element's type, + /// the [`Self::Builder`] is returned and should be fed with XML events + /// by the caller. + /// + /// Otherwise, an appropriate error is returned. + fn from_events( + name: rxml::QName, + attrs: rxml::AttrMap, + ) -> Result; +} + +/// Attempt to transform a type implementing [`IntoXml`] into another +/// type which implements [`FromXml`]. +pub fn transform(from: F) -> Result { + let mut iter = from.into_event_iter()?; + let (qname, attrs) = match iter.next() { + Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs), + Some(Err(e)) => return Err(e), + _ => panic!("into_event_iter did not start with StartElement event!"), + }; + let mut sink = match T::from_events(qname, attrs) { + Ok(v) => v, + Err(self::error::FromEventsError::Mismatch { .. }) => { + return Err(self::error::Error::TypeMismatch) + } + Err(self::error::FromEventsError::Invalid(e)) => return Err(e), + }; + for event in iter { + let event = event?; + match sink.feed(event)? { + Some(v) => return Ok(v), + None => (), + } + } + Err(self::error::Error::XmlError( + rxml::error::XmlError::InvalidEof("during transform"), + )) +} diff --git a/xso/src/minidom_compat.rs b/xso/src/minidom_compat.rs new file mode 100644 index 00000000..fffe2e2b --- /dev/null +++ b/xso/src/minidom_compat.rs @@ -0,0 +1,248 @@ +//! Implementations of traits from this crate for minidom types +// Copyright (c) 2024 Jonas Schäfer +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. +use std::vec::IntoIter; + +use minidom::{Element, Node}; + +use rxml::{ + parser::EventMetrics, + writer::{SimpleNamespaces, TrackNamespace}, + AttrMap, Event, Name, Namespace, NcName, +}; + +use crate::{ + error::{Error, FromEventsError}, + FromEventsBuilder, FromXml, IntoXml, +}; + +/// State machine for converting a minidom Element into rxml events. +enum IntoEventsInner { + /// Element header: the element is still intact and we need to generate + /// the [`rxml::Event::StartElement`] event from the namespace, name, and + /// attributes. + Header(Element), + + /// Content: The contents of the element are streamed as events. + Nodes { + /// Remaining child nodes (text and/or children) to emit. + remaining: IntoIter, + + /// When emitting a child element, this is a nested [`IntoEvents`] + /// instance for that child element. + nested: Option>, + }, + + /// End of iteration: this state generates an end-of-iterator state. + /// + /// Note that the [`rxml::Event::EndElement`] event for the element itself + /// is generated by the iterator alraedy in the `Nodes` state, when + /// `nested` is None and `remaining` returns `None` from its `next()` + /// implementation. + Fin, +} + +/// Create the parts for a [`rxml::Event::StartElement`] from a +/// [`minidom::Element`]. +/// +/// Note that this copies the attribute data as well as namespace and name. +/// This is due to limitations in the [`minidom::Element`] API. +// NOTE to developers: The limitations are not fully trivial to overcome: +// the attributes use a BTreeMap internally, which does not offer a `drain` +// iterator. +fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> { + let name = NcName::try_from(el.name())?; + let namespace = Namespace::from(el.ns()); + + let mut attrs = AttrMap::new(); + for (name, value) in el.attrs() { + let name = Name::try_from(name)?; + let (prefix, name) = name.split_name()?; + let namespace = if let Some(prefix) = prefix { + if prefix == "xml" { + Namespace::XML + } else { + let ns = match el.prefixes.get(&Some(prefix.into())) { + Some(v) => v, + None => { + panic!("undeclared xml namespace prefix in minidom::Element") + } + }; + Namespace::from(ns.to_owned()) + } + } else { + Namespace::NONE + }; + + attrs.insert(namespace, name, value.to_owned()); + } + + Ok(((namespace, name), attrs)) +} + +impl IntoEventsInner { + fn next(&mut self) -> Result, Error> { + match self { + IntoEventsInner::Header(ref mut el) => { + let (qname, attrs) = make_start_ev_parts(el)?; + let event = Event::StartElement(EventMetrics::zero(), qname, attrs); + + *self = IntoEventsInner::Nodes { + remaining: el.take_nodes().into_iter(), + nested: None, + }; + return Ok(Some(event)); + } + IntoEventsInner::Nodes { + ref mut nested, + ref mut remaining, + } => { + loop { + if let Some(nested) = nested.as_mut() { + if let Some(ev) = nested.next() { + return Some(ev).transpose(); + } + } + match remaining.next() { + Some(Node::Text(text)) => { + return Ok(Some(Event::Text(EventMetrics::zero(), text))); + } + Some(Node::Element(el)) => { + *nested = Some(Box::new(el.into_event_iter()?)); + // fallthrough to next loop iteration + } + None => { + // end of element, switch state and emit EndElement + *self = IntoEventsInner::Fin; + return Ok(Some(Event::EndElement(EventMetrics::zero()))); + } + } + } + } + IntoEventsInner::Fin => Ok(None), + } + } +} + +/// Convert a [`minidom::Element`] into [`rxml::Event`]s. +/// +/// This can be constructed from the +/// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`] +/// implementation on [`minidom::Element`]. +pub struct IntoEvents(IntoEventsInner); + +impl Iterator for IntoEvents { + type Item = Result; + + fn next(&mut self) -> Option { + self.0.next().transpose() + } +} + +impl IntoXml for Element { + type EventIter = IntoEvents; + + fn into_event_iter(self) -> Result { + Ok(IntoEvents(IntoEventsInner::Header(self))) + } +} + +/// Construct a [`minidom::Element`] from [`rxml::Event`]s +/// +/// This can be constructed from the +/// [`FromXml::from_events`][`crate::FromXml::from_events`] +/// implementation on [`minidom::Element`]. +pub struct ElementFromEvents { + inner: Option, + nested: Option>, +} + +impl FromEventsBuilder for ElementFromEvents { + type Output = minidom::Element; + + fn feed(&mut self, ev: Event) -> Result, Error> { + let inner = self + .inner + .as_mut() + .expect("feed() called after it finished"); + if let Some(nested) = self.nested.as_mut() { + match nested.feed(ev)? { + Some(v) => { + inner.append_child(v); + self.nested = None; + return Ok(None); + } + None => return Ok(None), + } + } + match ev { + Event::XmlDeclaration(_, _) => Ok(None), + Event::StartElement(_, qname, attrs) => { + let nested = match Element::from_events(qname, attrs) { + Ok(v) => v, + Err(FromEventsError::Invalid(e)) => return Err(e), + Err(FromEventsError::Mismatch { .. }) => { + unreachable!("::from_events should accept everything!") + } + }; + self.nested = Some(Box::new(nested)); + Ok(None) + } + Event::Text(_, text) => { + inner.append_text_node(text); + Ok(None) + } + Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())), + } + } +} + +impl FromXml for Element { + type Builder = ElementFromEvents; + + fn from_events( + qname: rxml::QName, + attrs: rxml::AttrMap, + ) -> Result { + let mut prefixes = SimpleNamespaces::new(); + let mut builder = Element::builder(qname.1, qname.0); + for ((namespace, name), value) in attrs.into_iter() { + if namespace.is_none() { + builder = builder.attr(name, String::from(value)); + } else { + let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone()); + let name = prefix.with_suffix(&name); + if is_new { + builder = builder + .prefix( + Some(prefix.as_str().to_owned()), + namespace.as_str().to_owned(), + ) + .unwrap(); + } + builder = builder.attr(name, String::from(value)); + } + } + + let element = builder.build(); + Ok(Self::Builder { + inner: Some(element), + nested: None, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn transform_element_is_equivalent() { + let el: Element = "some text".parse().unwrap(); + let transformed: Element = crate::transform(el.clone()).unwrap(); + assert_eq!(el, transformed); + } +}