xmpp-rs/xso/src/minidom_compat.rs
Jonas Schäfer 6ef8dbefa3 parsers: use Error type from xso
This is a large change and as such, it needs good motivation. Let me
remind you of the ultimate goal: we want a derive macro which allows us
to FromXml/IntoXml, and that derive macro should be usable from
`xmpp_parsers` and other crates.

For that, any code generated by the derive macro mustn't depend on any
code in the `xmpp_parsers` crate, because you cannot name the crate you
are in portably (`xmpp_parsers::..` wouldn't resolve within
`xmpp_parsers`, and `crate::..` would point at other crates if the macro
was used in other crates).

We also want to interoperate with code already implementing
`TryFrom<Element>` and `Into<Element>` on structs. This ultimately
requires that we have an error type which is shared by the two
implementations and that error type must be declared in the `xso` crate
to be usable by the macros.

Thus, we port the error type over to use the type declared in `xso`.

This changes the structure of the error type greatly; I do not think
that `xso` should have to know about all the different types we are
parsing there and they don't deserve special treatment. Wrapping them in
a `Box<dyn ..>` seems more appropriate.
2024-06-23 09:40:52 +02:00

317 lines
10 KiB
Rust

//! Implementations of traits from this crate for minidom types
// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
use std::marker::PhantomData;
use std::vec::IntoIter;
use minidom::{Element, Node};
use rxml::{
parser::EventMetrics,
writer::{SimpleNamespaces, TrackNamespace},
AttrMap, Event, Name, Namespace, NcName,
};
use crate::{
error::{Error, FromEventsError},
FromEventsBuilder, FromXml, IntoXml,
};
/// State machine for converting a minidom Element into rxml events.
enum IntoEventsInner {
/// Element header: the element is still intact and we need to generate
/// the [`rxml::Event::StartElement`] event from the namespace, name, and
/// attributes.
Header(Element),
/// Content: The contents of the element are streamed as events.
Nodes {
/// Remaining child nodes (text and/or children) to emit.
remaining: IntoIter<Node>,
/// When emitting a child element, this is a nested [`IntoEvents`]
/// instance for that child element.
nested: Option<Box<IntoEvents>>,
},
/// End of iteration: this state generates an end-of-iterator state.
///
/// Note that the [`rxml::Event::EndElement`] event for the element itself
/// is generated by the iterator alraedy in the `Nodes` state, when
/// `nested` is None and `remaining` returns `None` from its `next()`
/// implementation.
Fin,
}
/// Create the parts for a [`rxml::Event::StartElement`] from a
/// [`minidom::Element`].
///
/// Note that this copies the attribute data as well as namespace and name.
/// This is due to limitations in the [`minidom::Element`] API.
// NOTE to developers: The limitations are not fully trivial to overcome:
// the attributes use a BTreeMap internally, which does not offer a `drain`
// iterator.
pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
let name = NcName::try_from(el.name())?;
let namespace = Namespace::from(el.ns());
let mut attrs = AttrMap::new();
for (name, value) in el.attrs() {
let name = Name::try_from(name)?;
let (prefix, name) = name.split_name()?;
let namespace = if let Some(prefix) = prefix {
if prefix == "xml" {
Namespace::XML
} else {
let ns = match el.prefixes.get(&Some(prefix.into())) {
Some(v) => v,
None => {
panic!("undeclared xml namespace prefix in minidom::Element")
}
};
Namespace::from(ns.to_owned())
}
} else {
Namespace::NONE
};
attrs.insert(namespace, name, value.to_owned());
}
Ok(((namespace, name), attrs))
}
impl IntoEventsInner {
fn next(&mut self) -> Result<Option<Event>, Error> {
match self {
IntoEventsInner::Header(ref mut el) => {
let (qname, attrs) = make_start_ev_parts(el)?;
let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
*self = IntoEventsInner::Nodes {
remaining: el.take_nodes().into_iter(),
nested: None,
};
return Ok(Some(event));
}
IntoEventsInner::Nodes {
ref mut nested,
ref mut remaining,
} => {
loop {
if let Some(nested) = nested.as_mut() {
if let Some(ev) = nested.next() {
return Some(ev).transpose();
}
}
match remaining.next() {
Some(Node::Text(text)) => {
return Ok(Some(Event::Text(EventMetrics::zero(), text)));
}
Some(Node::Element(el)) => {
*nested = Some(Box::new(el.into_event_iter()?));
// fallthrough to next loop iteration
}
None => {
// end of element, switch state and emit EndElement
*self = IntoEventsInner::Fin;
return Ok(Some(Event::EndElement(EventMetrics::zero())));
}
}
}
}
IntoEventsInner::Fin => Ok(None),
}
}
}
/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
///
/// This can be constructed from the
/// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`]
/// implementation on [`minidom::Element`].
pub struct IntoEvents(IntoEventsInner);
impl Iterator for IntoEvents {
type Item = Result<Event, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().transpose()
}
}
impl IntoXml for Element {
type EventIter = IntoEvents;
fn into_event_iter(self) -> Result<Self::EventIter, Error> {
Ok(IntoEvents(IntoEventsInner::Header(self)))
}
}
/// Construct a [`minidom::Element`] from [`rxml::Event`]s
///
/// This can be constructed from the
/// [`FromXml::from_events`][`crate::FromXml::from_events`]
/// implementation on [`minidom::Element`].
pub struct ElementFromEvents {
inner: Option<Element>,
nested: Option<Box<ElementFromEvents>>,
}
impl FromEventsBuilder for ElementFromEvents {
type Output = minidom::Element;
fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
let inner = self
.inner
.as_mut()
.expect("feed() called after it finished");
if let Some(nested) = self.nested.as_mut() {
match nested.feed(ev)? {
Some(v) => {
inner.append_child(v);
self.nested = None;
return Ok(None);
}
None => return Ok(None),
}
}
match ev {
Event::XmlDeclaration(_, _) => Ok(None),
Event::StartElement(_, qname, attrs) => {
let nested = match Element::from_events(qname, attrs) {
Ok(v) => v,
Err(FromEventsError::Invalid(e)) => return Err(e),
Err(FromEventsError::Mismatch { .. }) => {
unreachable!("<Element as FromXml>::from_events should accept everything!")
}
};
self.nested = Some(Box::new(nested));
Ok(None)
}
Event::Text(_, text) => {
inner.append_text_node(text);
Ok(None)
}
Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
}
}
}
impl FromXml for Element {
type Builder = ElementFromEvents;
fn from_events(
qname: rxml::QName,
attrs: rxml::AttrMap,
) -> Result<Self::Builder, FromEventsError> {
let mut prefixes = SimpleNamespaces::new();
let mut builder = Element::builder(qname.1, qname.0);
for ((namespace, name), value) in attrs.into_iter() {
if namespace.is_none() {
builder = builder.attr(name, String::from(value));
} else {
let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
let name = prefix.with_suffix(&name);
if is_new {
builder = builder
.prefix(
Some(prefix.as_str().to_owned()),
namespace.as_str().to_owned(),
)
.unwrap();
}
builder = builder.attr(name, String::from(value));
}
}
let element = builder.build();
Ok(Self::Builder {
inner: Some(element),
nested: None,
})
}
}
/// Helper struct to streamingly parse a struct which implements conversion
/// from [`minidom::Element`].
pub struct FromEventsViaElement<T> {
inner: ElementFromEvents,
// needed here because we need to keep the type `T` around until
// `FromEventsBuilder` is done and it must always be the same type, so we
// have to nail it down in the struct's type, and to do that we need to
// bind it to a field. that's what PhantomData is for.
_phantom: PhantomData<T>,
}
impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
where
Error: From<E>,
{
/// Create a new streaming parser for `T`.
pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
Ok(Self {
_phantom: PhantomData,
inner: Element::from_events(qname, attrs)?,
})
}
}
impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
where
Error: From<E>,
{
type Output = T;
fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
match self.inner.feed(ev) {
Ok(Some(v)) => Ok(Some(v.try_into()?)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Helper struct to stream a struct which implements conversion
/// to [`minidom::Element`].
pub struct IntoEventsViaElement {
inner: IntoEvents,
}
impl IntoEventsViaElement {
/// Create a new streaming parser for `T`.
pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
where
Error: From<E>,
minidom::Element: TryFrom<T, Error = E>,
{
let element: minidom::Element = value.try_into()?;
Ok(Self {
inner: element.into_event_iter()?,
})
}
}
impl Iterator for IntoEventsViaElement {
type Item = Result<Event, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn transform_element_is_equivalent() {
let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
let transformed: Element = crate::transform(el.clone()).unwrap();
assert_eq!(el, transformed);
}
}