//! Implementations of traits from this crate for minidom types // Copyright (c) 2024 Jonas Schäfer // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use core::marker::PhantomData; use std::borrow::Cow; use std::vec::IntoIter; use minidom::{Element, Node}; use rxml::{ parser::EventMetrics, writer::{SimpleNamespaces, TrackNamespace}, AttrMap, Event, Name, NameStr, Namespace, NcName, NcNameStr, }; use crate::{ error::{Error, FromEventsError}, rxml_util::{EventToItem, Item}, AsXml, FromEventsBuilder, FromXml, }; /// State machine for converting a minidom Element into rxml events. enum IntoEventsInner { /// Element header: the element is still intact and we need to generate /// the [`rxml::Event::StartElement`] event from the namespace, name, and /// attributes. Header(Element), /// Content: The contents of the element are streamed as events. Nodes { /// Remaining child nodes (text and/or children) to emit. remaining: IntoIter, /// When emitting a child element, this is a nested [`IntoEvents`] /// instance for that child element. nested: Option>, }, /// End of iteration: this state generates an end-of-iterator state. /// /// Note that the [`rxml::Event::EndElement`] event for the element itself /// is generated by the iterator already in the `Nodes` state, when /// `nested` is None and `remaining` returns `None` from its `next()` /// implementation. Fin, } /// Create the parts for a [`rxml::Event::StartElement`] from a /// [`minidom::Element`]. /// /// Note that this copies the attribute data as well as namespace and name. /// This is due to limitations in the [`minidom::Element`] API. // NOTE to developers: The limitations are not fully trivial to overcome: // the attributes use a BTreeMap internally, which does not offer a `drain` // iterator. pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> { let name = NcName::try_from(el.name())?; let namespace = Namespace::from(el.ns()); let mut attrs = AttrMap::new(); for (name, value) in el.attrs() { let name = Name::try_from(name)?; let (prefix, name) = name.split_name()?; let namespace = if let Some(prefix) = prefix { if prefix == "xml" { Namespace::XML } else { let ns = match el.prefixes.get(&Some(prefix.into())) { Some(v) => v, None => { panic!("undeclared xml namespace prefix in minidom::Element") } }; Namespace::from(ns.to_owned()) } } else { Namespace::NONE }; attrs.insert(namespace, name, value.to_owned()); } Ok(((namespace, name), attrs)) } impl IntoEventsInner { fn next(&mut self) -> Result, Error> { match self { IntoEventsInner::Header(ref mut el) => { let (qname, attrs) = make_start_ev_parts(el)?; let event = Event::StartElement(EventMetrics::zero(), qname, attrs); *self = IntoEventsInner::Nodes { remaining: el.take_nodes().into_iter(), nested: None, }; Ok(Some(event)) } IntoEventsInner::Nodes { ref mut nested, ref mut remaining, } => { loop { if let Some(nested) = nested.as_mut() { if let Some(ev) = nested.next() { return Some(ev).transpose(); } } match remaining.next() { Some(Node::Text(text)) => { return Ok(Some(Event::Text(EventMetrics::zero(), text))); } Some(Node::Element(el)) => { *nested = Some(Box::new(IntoEvents::new(el))); // fallthrough to next loop iteration } None => { // end of element, switch state and emit EndElement *self = IntoEventsInner::Fin; return Ok(Some(Event::EndElement(EventMetrics::zero()))); } } } } IntoEventsInner::Fin => Ok(None), } } } /// Convert a [`minidom::Element`] into [`rxml::Event`]s. /// /// This can be constructed from the /// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`] /// implementation on [`minidom::Element`]. struct IntoEvents(IntoEventsInner); impl IntoEvents { fn new(el: Element) -> Self { IntoEvents(IntoEventsInner::Header(el)) } } impl Iterator for IntoEvents { type Item = Result; fn next(&mut self) -> Option { self.0.next().transpose() } } enum AsXmlState<'a> { /// Element header: we need to generate the /// [`Item::ElementHeadStart`] item from the namespace and name. Header { element: &'a Element }, /// Element header: we now generate the attributes. Attributes { /// The element (needed for the contents later and to access the /// prefix mapping). element: &'a Element, /// Attribute iterator. attributes: minidom::element::Attrs<'a>, }, /// Content: The contents of the element are streamed as events. Nodes { /// Remaining child nodes (text and/or children) to emit. nodes: minidom::element::Nodes<'a>, /// When emitting a child element, this is a nested [`IntoEvents`] /// instance for that child element. nested: Option>>, }, } /// Convert a [`minidom::Element`] to [`Item`][`crate::rxml_util::Item`]s. /// /// This can be constructed from the /// [`AsXml::as_xml_iter`][`crate::AsXml::as_xml_iter`] /// implementation on [`minidom::Element`]. pub struct ElementAsXml<'a>(Option>); impl<'a> Iterator for ElementAsXml<'a> { type Item = Result, Error>; fn next(&mut self) -> Option { match self.0 { None => None, Some(AsXmlState::Header { ref element }) => { let item = Item::ElementHeadStart( Namespace::from(element.ns()), Cow::Borrowed(match <&NcNameStr>::try_from(element.name()) { Ok(v) => v, Err(e) => { self.0 = None; return Some(Err(e.into())); } }), ); self.0 = Some(AsXmlState::Attributes { element, attributes: element.attrs(), }); Some(Ok(item)) } Some(AsXmlState::Attributes { ref mut attributes, ref element, }) => { if let Some((name, value)) = attributes.next() { let name = match <&NameStr>::try_from(name) { Ok(v) => v, Err(e) => { self.0 = None; return Some(Err(e.into())); } }; let (prefix, name) = match name.split_name() { Ok(v) => v, Err(e) => { self.0 = None; return Some(Err(e.into())); } }; let namespace = if let Some(prefix) = prefix { if prefix == "xml" { Namespace::XML } else { let ns = match element.prefixes.get(&Some(prefix.as_str().to_owned())) { Some(v) => v, None => { panic!("undeclared xml namespace prefix in minidom::Element") } }; Namespace::from(ns.to_owned()) } } else { Namespace::NONE }; Some(Ok(Item::Attribute( namespace, Cow::Borrowed(name), Cow::Borrowed(value), ))) } else { self.0 = Some(AsXmlState::Nodes { nodes: element.nodes(), nested: None, }); Some(Ok(Item::ElementHeadEnd)) } } Some(AsXmlState::Nodes { ref mut nodes, ref mut nested, }) => { if let Some(nested) = nested.as_mut() { if let Some(next) = nested.next() { return Some(next); } } *nested = None; match nodes.next() { None => { self.0 = None; Some(Ok(Item::ElementFoot)) } Some(minidom::Node::Text(ref text)) => { Some(Ok(Item::Text(Cow::Borrowed(text)))) } Some(minidom::Node::Element(ref element)) => { let mut iter = match element.as_xml_iter() { Ok(v) => v, Err(e) => { self.0 = None; return Some(Err(e.into())); } }; let item = iter.next().unwrap(); *nested = Some(Box::new(iter)); Some(item) } } } } } } impl AsXml for minidom::Element { type ItemIter<'a> = ElementAsXml<'a>; fn as_xml_iter(&self) -> Result, Error> { Ok(ElementAsXml(Some(AsXmlState::Header { element: self }))) } } /// Construct a [`minidom::Element`] from [`rxml::Event`]s /// /// This can be constructed from the /// [`FromXml::from_events`][`crate::FromXml::from_events`] /// implementation on [`minidom::Element`]. pub struct ElementFromEvents { inner: Option, nested: Option>, } impl ElementFromEvents { /// Construct a new builder from an element header. /// /// Unlike the [`FromXml::from_events`] implementation on /// [`minidom::Element`], this is contractually infallible. Using this may /// thus save you an `unwrap()` call. pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self { let mut prefixes = SimpleNamespaces::new(); let mut builder = Element::builder(qname.1, qname.0); for ((namespace, name), value) in attrs.into_iter() { if namespace.is_none() { builder = builder.attr(name, value); } else { let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone()); let name = prefix.with_suffix(&name); if is_new { builder = builder .prefix( Some(prefix.as_str().to_owned()), namespace.as_str().to_owned(), ) .unwrap(); } builder = builder.attr(name, value); } } let element = builder.build(); Self { inner: Some(element), nested: None, } } } impl FromEventsBuilder for ElementFromEvents { type Output = minidom::Element; fn feed(&mut self, ev: Event) -> Result, Error> { let inner = self .inner .as_mut() .expect("feed() called after it finished"); if let Some(nested) = self.nested.as_mut() { match nested.feed(ev)? { Some(v) => { inner.append_child(v); self.nested = None; return Ok(None); } None => return Ok(None), } } match ev { Event::XmlDeclaration(_, _) => Ok(None), Event::StartElement(_, qname, attrs) => { let nested = match Element::from_events(qname, attrs) { Ok(v) => v, Err(FromEventsError::Invalid(e)) => return Err(e), Err(FromEventsError::Mismatch { .. }) => { unreachable!("::from_events should accept everything!") } }; self.nested = Some(Box::new(nested)); Ok(None) } Event::Text(_, text) => { inner.append_text_node(text); Ok(None) } Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())), } } } impl FromXml for Element { type Builder = ElementFromEvents; fn from_events( qname: rxml::QName, attrs: rxml::AttrMap, ) -> Result { Ok(Self::Builder::new(qname, attrs)) } } /// Helper struct to streamingly parse a struct which implements conversion /// from [`minidom::Element`]. pub struct FromEventsViaElement { inner: ElementFromEvents, // needed here because we need to keep the type `T` around until // `FromEventsBuilder` is done and it must always be the same type, so we // have to nail it down in the struct's type, and to do that we need to // bind it to a field. that's what PhantomData is for. _phantom: PhantomData, } impl> FromEventsViaElement where Error: From, { /// Create a new streaming parser for `T`. pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result { Ok(Self { _phantom: PhantomData, inner: Element::from_events(qname, attrs)?, }) } } impl> FromEventsBuilder for FromEventsViaElement where Error: From, { type Output = T; fn feed(&mut self, ev: Event) -> Result, Error> { match self.inner.feed(ev) { Ok(Some(v)) => Ok(Some(v.try_into()?)), Ok(None) => Ok(None), Err(e) => Err(e), } } } /// Helper struct to stream a struct which implements conversion /// to [`minidom::Element`]. pub struct AsItemsViaElement<'x> { iter: EventToItem, lifetime_binding: PhantomData>, } impl<'x> AsItemsViaElement<'x> { /// Create a new streaming parser for `T`. pub fn new(value: T) -> Result where Error: From, minidom::Element: TryFrom, { let element: minidom::Element = value.try_into()?; Ok(Self { iter: EventToItem::new(IntoEvents::new(element)), lifetime_binding: PhantomData, }) } } impl<'x> Iterator for AsItemsViaElement<'x> { type Item = Result, Error>; fn next(&mut self) -> Option { self.iter.next().map(|x| x.map(Item::into_owned)) } } #[cfg(test)] mod tests { use super::*; #[test] fn transform_element_is_equivalent() { let el: Element = "some text".parse().unwrap(); let transformed: Element = crate::transform(el.clone()).unwrap(); assert_eq!(el, transformed); } }