//! Implementations of traits from this crate for minidom types // Copyright (c) 2024 Jonas Schäfer // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use std::marker::PhantomData; use std::vec::IntoIter; use minidom::{Element, Node}; use rxml::{ parser::EventMetrics, writer::{SimpleNamespaces, TrackNamespace}, AttrMap, Event, Name, Namespace, NcName, }; use crate::{ error::{Error, FromEventsError}, FromEventsBuilder, FromXml, IntoXml, }; /// State machine for converting a minidom Element into rxml events. enum IntoEventsInner { /// Element header: the element is still intact and we need to generate /// the [`rxml::Event::StartElement`] event from the namespace, name, and /// attributes. Header(Element), /// Content: The contents of the element are streamed as events. Nodes { /// Remaining child nodes (text and/or children) to emit. remaining: IntoIter, /// When emitting a child element, this is a nested [`IntoEvents`] /// instance for that child element. nested: Option>, }, /// End of iteration: this state generates an end-of-iterator state. /// /// Note that the [`rxml::Event::EndElement`] event for the element itself /// is generated by the iterator alraedy in the `Nodes` state, when /// `nested` is None and `remaining` returns `None` from its `next()` /// implementation. Fin, } /// Create the parts for a [`rxml::Event::StartElement`] from a /// [`minidom::Element`]. /// /// Note that this copies the attribute data as well as namespace and name. /// This is due to limitations in the [`minidom::Element`] API. // NOTE to developers: The limitations are not fully trivial to overcome: // the attributes use a BTreeMap internally, which does not offer a `drain` // iterator. pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> { let name = NcName::try_from(el.name())?; let namespace = Namespace::from(el.ns()); let mut attrs = AttrMap::new(); for (name, value) in el.attrs() { let name = Name::try_from(name)?; let (prefix, name) = name.split_name()?; let namespace = if let Some(prefix) = prefix { if prefix == "xml" { Namespace::XML } else { let ns = match el.prefixes.get(&Some(prefix.into())) { Some(v) => v, None => { panic!("undeclared xml namespace prefix in minidom::Element") } }; Namespace::from(ns.to_owned()) } } else { Namespace::NONE }; attrs.insert(namespace, name, value.to_owned()); } Ok(((namespace, name), attrs)) } impl IntoEventsInner { fn next(&mut self) -> Result, Error> { match self { IntoEventsInner::Header(ref mut el) => { let (qname, attrs) = make_start_ev_parts(el)?; let event = Event::StartElement(EventMetrics::zero(), qname, attrs); *self = IntoEventsInner::Nodes { remaining: el.take_nodes().into_iter(), nested: None, }; return Ok(Some(event)); } IntoEventsInner::Nodes { ref mut nested, ref mut remaining, } => { loop { if let Some(nested) = nested.as_mut() { if let Some(ev) = nested.next() { return Some(ev).transpose(); } } match remaining.next() { Some(Node::Text(text)) => { return Ok(Some(Event::Text(EventMetrics::zero(), text))); } Some(Node::Element(el)) => { *nested = Some(Box::new(el.into_event_iter()?)); // fallthrough to next loop iteration } None => { // end of element, switch state and emit EndElement *self = IntoEventsInner::Fin; return Ok(Some(Event::EndElement(EventMetrics::zero()))); } } } } IntoEventsInner::Fin => Ok(None), } } } /// Convert a [`minidom::Element`] into [`rxml::Event`]s. /// /// This can be constructed from the /// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`] /// implementation on [`minidom::Element`]. pub struct IntoEvents(IntoEventsInner); impl Iterator for IntoEvents { type Item = Result; fn next(&mut self) -> Option { self.0.next().transpose() } } impl IntoXml for Element { type EventIter = IntoEvents; fn into_event_iter(self) -> Result { Ok(IntoEvents(IntoEventsInner::Header(self))) } } /// Construct a [`minidom::Element`] from [`rxml::Event`]s /// /// This can be constructed from the /// [`FromXml::from_events`][`crate::FromXml::from_events`] /// implementation on [`minidom::Element`]. pub struct ElementFromEvents { inner: Option, nested: Option>, } impl FromEventsBuilder for ElementFromEvents { type Output = minidom::Element; fn feed(&mut self, ev: Event) -> Result, Error> { let inner = self .inner .as_mut() .expect("feed() called after it finished"); if let Some(nested) = self.nested.as_mut() { match nested.feed(ev)? { Some(v) => { inner.append_child(v); self.nested = None; return Ok(None); } None => return Ok(None), } } match ev { Event::XmlDeclaration(_, _) => Ok(None), Event::StartElement(_, qname, attrs) => { let nested = match Element::from_events(qname, attrs) { Ok(v) => v, Err(FromEventsError::Invalid(e)) => return Err(e), Err(FromEventsError::Mismatch { .. }) => { unreachable!("::from_events should accept everything!") } }; self.nested = Some(Box::new(nested)); Ok(None) } Event::Text(_, text) => { inner.append_text_node(text); Ok(None) } Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())), } } } impl FromXml for Element { type Builder = ElementFromEvents; fn from_events( qname: rxml::QName, attrs: rxml::AttrMap, ) -> Result { let mut prefixes = SimpleNamespaces::new(); let mut builder = Element::builder(qname.1, qname.0); for ((namespace, name), value) in attrs.into_iter() { if namespace.is_none() { builder = builder.attr(name, String::from(value)); } else { let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone()); let name = prefix.with_suffix(&name); if is_new { builder = builder .prefix( Some(prefix.as_str().to_owned()), namespace.as_str().to_owned(), ) .unwrap(); } builder = builder.attr(name, String::from(value)); } } let element = builder.build(); Ok(Self::Builder { inner: Some(element), nested: None, }) } } /// Helper struct to streamingly parse a struct which implements conversion /// from [`minidom::Element`]. pub struct FromEventsViaElement { inner: ElementFromEvents, // needed here because we need to keep the type `T` around until // `FromEventsBuilder` is done and it must always be the same type, so we // have to nail it down in the struct's type, and to do that we need to // bind it to a field. that's what PhantomData is for. _phantom: PhantomData, } impl> FromEventsViaElement where Error: From, { /// Create a new streaming parser for `T`. pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result { Ok(Self { _phantom: PhantomData, inner: Element::from_events(qname, attrs)?, }) } } impl> FromEventsBuilder for FromEventsViaElement where Error: From, { type Output = T; fn feed(&mut self, ev: Event) -> Result, Error> { match self.inner.feed(ev) { Ok(Some(v)) => Ok(Some(v.try_into()?)), Ok(None) => Ok(None), Err(e) => Err(e), } } } /// Helper struct to stream a struct which implements conversion /// to [`minidom::Element`]. pub struct IntoEventsViaElement { inner: IntoEvents, } impl IntoEventsViaElement { /// Create a new streaming parser for `T`. pub fn new(value: T) -> Result where Error: From, minidom::Element: TryFrom, { let element: minidom::Element = value.try_into()?; Ok(Self { inner: element.into_event_iter()?, }) } } impl Iterator for IntoEventsViaElement { type Item = Result; fn next(&mut self) -> Option { self.inner.next() } } #[cfg(test)] mod tests { use super::*; #[test] fn transform_element_is_equivalent() { let el: Element = "some text".parse().unwrap(); let transformed: Element = crate::transform(el.clone()).unwrap(); assert_eq!(el, transformed); } }