xmpp-rs/xso/src/minidom_compat.rs
Alvaro Parker 2ff89a9e42 fix typos
2024-09-16 18:29:44 -03:00

477 lines
16 KiB
Rust

//! Implementations of traits from this crate for minidom types
// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
use core::marker::PhantomData;
use std::borrow::Cow;
use std::vec::IntoIter;
use minidom::{Element, Node};
use rxml::{
parser::EventMetrics,
writer::{SimpleNamespaces, TrackNamespace},
AttrMap, Event, Name, NameStr, Namespace, NcName, NcNameStr,
};
use crate::{
error::{Error, FromEventsError},
rxml_util::{EventToItem, Item},
AsXml, FromEventsBuilder, FromXml,
};
/// State machine for converting a minidom Element into rxml events.
enum IntoEventsInner {
/// Element header: the element is still intact and we need to generate
/// the [`rxml::Event::StartElement`] event from the namespace, name, and
/// attributes.
Header(Element),
/// Content: The contents of the element are streamed as events.
Nodes {
/// Remaining child nodes (text and/or children) to emit.
remaining: IntoIter<Node>,
/// When emitting a child element, this is a nested [`IntoEvents`]
/// instance for that child element.
nested: Option<Box<IntoEvents>>,
},
/// End of iteration: this state generates an end-of-iterator state.
///
/// Note that the [`rxml::Event::EndElement`] event for the element itself
/// is generated by the iterator already in the `Nodes` state, when
/// `nested` is None and `remaining` returns `None` from its `next()`
/// implementation.
Fin,
}
/// Create the parts for a [`rxml::Event::StartElement`] from a
/// [`minidom::Element`].
///
/// Note that this copies the attribute data as well as namespace and name.
/// This is due to limitations in the [`minidom::Element`] API.
// NOTE to developers: The limitations are not fully trivial to overcome:
// the attributes use a BTreeMap internally, which does not offer a `drain`
// iterator.
pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
let name = NcName::try_from(el.name())?;
let namespace = Namespace::from(el.ns());
let mut attrs = AttrMap::new();
for (name, value) in el.attrs() {
let name = Name::try_from(name)?;
let (prefix, name) = name.split_name()?;
let namespace = if let Some(prefix) = prefix {
if prefix == "xml" {
Namespace::XML
} else {
let ns = match el.prefixes.get(&Some(prefix.into())) {
Some(v) => v,
None => {
panic!("undeclared xml namespace prefix in minidom::Element")
}
};
Namespace::from(ns.to_owned())
}
} else {
Namespace::NONE
};
attrs.insert(namespace, name, value.to_owned());
}
Ok(((namespace, name), attrs))
}
impl IntoEventsInner {
fn next(&mut self) -> Result<Option<Event>, Error> {
match self {
IntoEventsInner::Header(ref mut el) => {
let (qname, attrs) = make_start_ev_parts(el)?;
let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
*self = IntoEventsInner::Nodes {
remaining: el.take_nodes().into_iter(),
nested: None,
};
Ok(Some(event))
}
IntoEventsInner::Nodes {
ref mut nested,
ref mut remaining,
} => {
loop {
if let Some(nested) = nested.as_mut() {
if let Some(ev) = nested.next() {
return Some(ev).transpose();
}
}
match remaining.next() {
Some(Node::Text(text)) => {
return Ok(Some(Event::Text(EventMetrics::zero(), text)));
}
Some(Node::Element(el)) => {
*nested = Some(Box::new(IntoEvents::new(el)));
// fallthrough to next loop iteration
}
None => {
// end of element, switch state and emit EndElement
*self = IntoEventsInner::Fin;
return Ok(Some(Event::EndElement(EventMetrics::zero())));
}
}
}
}
IntoEventsInner::Fin => Ok(None),
}
}
}
/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
///
/// This can be constructed from the
/// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`]
/// implementation on [`minidom::Element`].
struct IntoEvents(IntoEventsInner);
impl IntoEvents {
fn new(el: Element) -> Self {
IntoEvents(IntoEventsInner::Header(el))
}
}
impl Iterator for IntoEvents {
type Item = Result<Event, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().transpose()
}
}
enum AsXmlState<'a> {
/// Element header: we need to generate the
/// [`Item::ElementHeadStart`] item from the namespace and name.
Header { element: &'a Element },
/// Element header: we now generate the attributes.
Attributes {
/// The element (needed for the contents later and to access the
/// prefix mapping).
element: &'a Element,
/// Attribute iterator.
attributes: minidom::element::Attrs<'a>,
},
/// Content: The contents of the element are streamed as events.
Nodes {
/// Remaining child nodes (text and/or children) to emit.
nodes: minidom::element::Nodes<'a>,
/// When emitting a child element, this is a nested [`IntoEvents`]
/// instance for that child element.
nested: Option<Box<ElementAsXml<'a>>>,
},
}
/// Convert a [`minidom::Element`] to [`Item`][`crate::rxml_util::Item`]s.
///
/// This can be constructed from the
/// [`AsXml::as_xml_iter`][`crate::AsXml::as_xml_iter`]
/// implementation on [`minidom::Element`].
pub struct ElementAsXml<'a>(Option<AsXmlState<'a>>);
impl<'a> Iterator for ElementAsXml<'a> {
type Item = Result<Item<'a>, Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.0 {
None => None,
Some(AsXmlState::Header { ref element }) => {
let item = Item::ElementHeadStart(
Namespace::from(element.ns()),
Cow::Borrowed(match <&NcNameStr>::try_from(element.name()) {
Ok(v) => v,
Err(e) => {
self.0 = None;
return Some(Err(e.into()));
}
}),
);
self.0 = Some(AsXmlState::Attributes {
element,
attributes: element.attrs(),
});
Some(Ok(item))
}
Some(AsXmlState::Attributes {
ref mut attributes,
ref element,
}) => {
if let Some((name, value)) = attributes.next() {
let name = match <&NameStr>::try_from(name) {
Ok(v) => v,
Err(e) => {
self.0 = None;
return Some(Err(e.into()));
}
};
let (prefix, name) = match name.split_name() {
Ok(v) => v,
Err(e) => {
self.0 = None;
return Some(Err(e.into()));
}
};
let namespace = if let Some(prefix) = prefix {
if prefix == "xml" {
Namespace::XML
} else {
let ns = match element.prefixes.get(&Some(prefix.as_str().to_owned())) {
Some(v) => v,
None => {
panic!("undeclared xml namespace prefix in minidom::Element")
}
};
Namespace::from(ns.to_owned())
}
} else {
Namespace::NONE
};
Some(Ok(Item::Attribute(
namespace,
Cow::Borrowed(name),
Cow::Borrowed(value),
)))
} else {
self.0 = Some(AsXmlState::Nodes {
nodes: element.nodes(),
nested: None,
});
Some(Ok(Item::ElementHeadEnd))
}
}
Some(AsXmlState::Nodes {
ref mut nodes,
ref mut nested,
}) => {
if let Some(nested) = nested.as_mut() {
if let Some(next) = nested.next() {
return Some(next);
}
}
*nested = None;
match nodes.next() {
None => {
self.0 = None;
Some(Ok(Item::ElementFoot))
}
Some(minidom::Node::Text(ref text)) => {
Some(Ok(Item::Text(Cow::Borrowed(text))))
}
Some(minidom::Node::Element(ref element)) => {
let mut iter = match element.as_xml_iter() {
Ok(v) => v,
Err(e) => {
self.0 = None;
return Some(Err(e.into()));
}
};
let item = iter.next().unwrap();
*nested = Some(Box::new(iter));
Some(item)
}
}
}
}
}
}
impl AsXml for minidom::Element {
type ItemIter<'a> = ElementAsXml<'a>;
fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, Error> {
Ok(ElementAsXml(Some(AsXmlState::Header { element: self })))
}
}
/// Construct a [`minidom::Element`] from [`rxml::Event`]s
///
/// This can be constructed from the
/// [`FromXml::from_events`][`crate::FromXml::from_events`]
/// implementation on [`minidom::Element`].
pub struct ElementFromEvents {
inner: Option<Element>,
nested: Option<Box<ElementFromEvents>>,
}
impl ElementFromEvents {
/// Construct a new builder from an element header.
///
/// Unlike the [`FromXml::from_events`] implementation on
/// [`minidom::Element`], this is contractually infallible. Using this may
/// thus save you an `unwrap()` call.
pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
let mut prefixes = SimpleNamespaces::new();
let mut builder = Element::builder(qname.1, qname.0);
for ((namespace, name), value) in attrs.into_iter() {
if namespace.is_none() {
builder = builder.attr(name, value);
} else {
let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
let name = prefix.with_suffix(&name);
if is_new {
builder = builder
.prefix(
Some(prefix.as_str().to_owned()),
namespace.as_str().to_owned(),
)
.unwrap();
}
builder = builder.attr(name, value);
}
}
let element = builder.build();
Self {
inner: Some(element),
nested: None,
}
}
}
impl FromEventsBuilder for ElementFromEvents {
type Output = minidom::Element;
fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
let inner = self
.inner
.as_mut()
.expect("feed() called after it finished");
if let Some(nested) = self.nested.as_mut() {
match nested.feed(ev)? {
Some(v) => {
inner.append_child(v);
self.nested = None;
return Ok(None);
}
None => return Ok(None),
}
}
match ev {
Event::XmlDeclaration(_, _) => Ok(None),
Event::StartElement(_, qname, attrs) => {
let nested = match Element::from_events(qname, attrs) {
Ok(v) => v,
Err(FromEventsError::Invalid(e)) => return Err(e),
Err(FromEventsError::Mismatch { .. }) => {
unreachable!("<Element as FromXml>::from_events should accept everything!")
}
};
self.nested = Some(Box::new(nested));
Ok(None)
}
Event::Text(_, text) => {
inner.append_text_node(text);
Ok(None)
}
Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
}
}
}
impl FromXml for Element {
type Builder = ElementFromEvents;
fn from_events(
qname: rxml::QName,
attrs: rxml::AttrMap,
) -> Result<Self::Builder, FromEventsError> {
Ok(Self::Builder::new(qname, attrs))
}
}
/// Helper struct to streamingly parse a struct which implements conversion
/// from [`minidom::Element`].
pub struct FromEventsViaElement<T> {
inner: ElementFromEvents,
// needed here because we need to keep the type `T` around until
// `FromEventsBuilder` is done and it must always be the same type, so we
// have to nail it down in the struct's type, and to do that we need to
// bind it to a field. that's what PhantomData is for.
_phantom: PhantomData<T>,
}
impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
where
Error: From<E>,
{
/// Create a new streaming parser for `T`.
pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
Ok(Self {
_phantom: PhantomData,
inner: Element::from_events(qname, attrs)?,
})
}
}
impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
where
Error: From<E>,
{
type Output = T;
fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
match self.inner.feed(ev) {
Ok(Some(v)) => Ok(Some(v.try_into()?)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Helper struct to stream a struct which implements conversion
/// to [`minidom::Element`].
pub struct AsItemsViaElement<'x> {
iter: EventToItem<IntoEvents>,
lifetime_binding: PhantomData<Item<'x>>,
}
impl<'x> AsItemsViaElement<'x> {
/// Create a new streaming parser for `T`.
pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
where
Error: From<E>,
minidom::Element: TryFrom<T, Error = E>,
{
let element: minidom::Element = value.try_into()?;
Ok(Self {
iter: EventToItem::new(IntoEvents::new(element)),
lifetime_binding: PhantomData,
})
}
}
impl<'x> Iterator for AsItemsViaElement<'x> {
type Item = Result<Item<'x>, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|x| x.map(Item::into_owned))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn transform_element_is_equivalent() {
let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
let transformed: Element = crate::transform(el.clone()).unwrap();
assert_eq!(el, transformed);
}
}