From 2e21911c47b352b5382df683926ba137768902e5 Mon Sep 17 00:00:00 2001 From: Astro Date: Tue, 29 Mar 2022 03:13:01 +0200 Subject: [PATCH] minidom: use rxml's new RawParser --- minidom/Cargo.toml | 2 +- minidom/src/element.rs | 9 ++- minidom/src/error.rs | 7 --- minidom/src/parser.rs | 10 ++-- minidom/src/tests.rs | 8 ++- minidom/src/tree_builder.rs | 111 +++++++++++++++++++----------------- 6 files changed, 75 insertions(+), 72 deletions(-) diff --git a/minidom/Cargo.toml b/minidom/Cargo.toml index e108691..dd24a9e 100644 --- a/minidom/Cargo.toml +++ b/minidom/Cargo.toml @@ -22,4 +22,4 @@ gitlab = { repository = "xmpp-rs/xmpp-rs" } [dependencies] quick-xml = "0.22.0" -rxml = "0.5" +rxml = { git = "https://github.com/horazont/rxml.git" } diff --git a/minidom/src/element.rs b/minidom/src/element.rs index 9ae389d..9b9e57b 100644 --- a/minidom/src/element.rs +++ b/minidom/src/element.rs @@ -25,7 +25,7 @@ use std::io::{Cursor, BufRead, Write}; use std::borrow::Cow; use std::str; -use rxml::{EventRead, PullParser}; +use rxml::{EventRead, Lexer, PullDriver, RawParser}; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, Event}; use quick_xml::Writer as EventWriter; @@ -304,8 +304,8 @@ impl Element { /// Parse a document from a `Read`. pub fn from_reader(reader: R) -> Result { let mut tree_builder = TreeBuilder::new(); - let mut parser = PullParser::new(reader); - while let Some(event) = parser.read()? { + let mut driver = PullDriver::wrap(reader, Lexer::new(), RawParser::new()); + while let Some(event) = driver.read()? { tree_builder.process_event(event)?; if let Some(root) = tree_builder.root.take() { @@ -954,8 +954,7 @@ mod tests { #[test] fn parses_spectest_xml() { // From: https://gitlab.com/lumi/minidom-rs/issues/8 - let xml = r#" - + let xml = r#" "#; diff --git a/minidom/src/error.rs b/minidom/src/error.rs index d868c96..7bd15f3 100644 --- a/minidom/src/error.rs +++ b/minidom/src/error.rs @@ -29,9 +29,6 @@ pub enum Error { /// An error which is returned when the end of the document was reached prematurely. EndOfDocument, - /// An error which is returned when an element is closed when it shouldn't be - InvalidElementClosed, - /// An error which is returned when an element being serialized doesn't contain a prefix /// (be it None or Some(_)). InvalidPrefix, @@ -50,7 +47,6 @@ impl StdError for Error { Error::ParserError(e) => Some(e), Error::IoError(e) => Some(e), Error::EndOfDocument => None, - Error::InvalidElementClosed => None, Error::InvalidPrefix => None, Error::MissingNamespace => None, Error::DuplicatePrefix => None, @@ -67,9 +63,6 @@ impl std::fmt::Display for Error { Error::EndOfDocument => { write!(fmt, "the end of the document has been reached prematurely") } - Error::InvalidElementClosed => { - write!(fmt, "the XML is invalid, an element was wrongly closed") - } Error::InvalidPrefix => write!(fmt, "the prefix is invalid"), Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",), Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"), diff --git a/minidom/src/parser.rs b/minidom/src/parser.rs index cda6a4c..1af2497 100644 --- a/minidom/src/parser.rs +++ b/minidom/src/parser.rs @@ -8,9 +8,9 @@ //! Provides a `Parser` type, which takes bytes and returns Elements. It also keeps a hold of //! ascendant elements to be able to handle namespaces properly. +use rxml::{PushDriver, RawParser}; use crate::element::Element; use crate::error::{Error, ParserError, Result}; -use crate::tokenizer::Tokenizer; use crate::tree_builder::TreeBuilder; use std::str; @@ -18,7 +18,7 @@ use std::str; /// Parser #[derive(Debug)] pub struct Parser { - tokenizer: Tokenizer, + driver: PushDriver, tree_builder: TreeBuilder, state: ParserState, } @@ -90,7 +90,7 @@ impl Parser { /// Creates a new Parser pub fn new() -> Parser { Parser { - tokenizer: Tokenizer::new(), + driver: PushDriver::default(), tree_builder: TreeBuilder::new(), state: ParserState::Empty, } @@ -98,7 +98,9 @@ impl Parser { /// Feed bytes to the parser. pub fn feed(&mut self, bytes: BytesMut) -> Result<()> { - self.buffer.borrow_mut().unsplit(bytes); + self.driver.feed(bytes); + bytes.clear(); + let state = match self.state { ParserState::Empty => { // TODO: Try splitting xml prolog and stream header diff --git a/minidom/src/tests.rs b/minidom/src/tests.rs index c76529c..56664b7 100644 --- a/minidom/src/tests.rs +++ b/minidom/src/tests.rs @@ -432,12 +432,16 @@ fn fail_comments() { #[test] fn xml_error() { match "".parse::() { - Err(crate::error::Error::InvalidElementClosed) => (), + Err(crate::error::Error::ParserError( + rxml::Error::NotWellFormed(rxml::error::WFError::ElementMismatch) + )) => (), err => panic!("No or wrong error: {:?}", err), } match "() { - Err(crate::error::Error::EndOfDocument) => (), + Err(crate::error::Error::ParserError( + rxml::Error::NotWellFormed(rxml::error::WFError::InvalidEof(_)) + )) => (), err => panic!("No or wrong error: {:?}", err), } } diff --git a/minidom/src/tree_builder.rs b/minidom/src/tree_builder.rs index 180f2c1..c49588b 100644 --- a/minidom/src/tree_builder.rs +++ b/minidom/src/tree_builder.rs @@ -2,13 +2,14 @@ //! SAX events to DOM tree conversion -use std::collections::{BTreeMap, HashMap}; -use rxml::{CData, Event, QName}; +use std::collections::BTreeMap; +use rxml::RawEvent; use crate::{Element, Error}; -use crate::prefixes::Prefixes; +use crate::prefixes::{Prefix, Prefixes}; /// Tree-building parser state pub struct TreeBuilder { + next_tag: Option<(Prefix, String, Prefixes, BTreeMap)>, /// Parsing stack stack: Vec, /// Namespace set stack by prefix @@ -21,6 +22,7 @@ impl TreeBuilder { /// Create a new one pub fn new() -> Self { TreeBuilder { + next_tag: None, stack: vec![], prefixes_stack: vec![], root: None, @@ -64,48 +66,6 @@ impl TreeBuilder { None } - fn process_start_tag(&mut self, (prefix, name): QName, attrs: HashMap) -> Result<(), Error> { - dbg!(&attrs); - let mut prefixes = Prefixes::default(); - let mut attributes = BTreeMap::new(); - for ((prefix, name), value) in attrs.into_iter() { - match (prefix, name) { - (None, xmlns) if xmlns == "xmlns" => { - prefixes.insert(None, value); - } - (Some(xmlns), prefix) if *xmlns == "xmlns" => { - prefixes.insert(Some(prefix.as_string()), value); - } - (Some(prefix), name) => { - attributes.insert(format!("{}:{}", prefix, name), value.as_string()); - } - (None, name) => { - attributes.insert(name.as_string(), value.as_string()); - } - } - } - dbg!(&prefixes); - self.prefixes_stack.push(prefixes.clone()); - dbg!(&attributes); - - let namespace = self.lookup_prefix( - &prefix.clone().map(|prefix| prefix.as_str().to_owned()) - ) - .ok_or(Error::MissingNamespace)? - .to_owned(); - let el = Element::new( - name.as_string(), - namespace, - Some(prefix.map(|prefix| prefix.as_str().to_owned())), - prefixes, - attributes, - vec![] - ); - self.stack.push(el); - - Ok(()) - } - fn process_end_tag(&mut self) -> Result<(), Error> { if let Some(el) = self.pop() { if self.depth() > 0 { @@ -127,19 +87,64 @@ impl TreeBuilder { } /// Process a Event that you got out of a Eventizer - pub fn process_event(&mut self, event: Event) -> Result<(), Error> { - dbg!(&event); + pub fn process_event(&mut self, event: RawEvent) -> Result<(), Error> { match event { - Event::XMLDeclaration(_, _) => {}, + RawEvent::XMLDeclaration(_, _) => {}, - Event::StartElement(_, name, attrs) => - self.process_start_tag(name, attrs)?, + RawEvent::ElementHeadOpen(_, (prefix, name)) => + self.next_tag = Some(( + prefix.map(|prefix| prefix.as_str().to_owned()), + name.as_str().to_owned(), + Prefixes::default(), + BTreeMap::new() + )), - Event::EndElement(_) => + RawEvent::Attribute(_, (prefix, name), value) => { + self.next_tag.as_mut() + .map(|(_, _, ref mut prefixes, ref mut attrs)| { + match (prefix, name) { + (None, xmlns) if xmlns == "xmlns" => { + prefixes.insert(None, value); + } + (Some(xmlns), prefix) if xmlns.as_str() == "xmlns" => { + prefixes.insert(Some(prefix.as_str().to_owned()), value); + } + (Some(prefix), name) => { + attrs.insert(format!("{}:{}", prefix, name), value.as_str().to_owned()); + } + (None, name) => { + attrs.insert(name.as_str().to_owned(), value.as_str().to_owned()); + } + } + }); + } + + RawEvent::ElementHeadClose(_) => { + if let Some((prefix, name, prefixes, attrs)) = self.next_tag.take() { + self.prefixes_stack.push(prefixes.clone()); + + let namespace = self.lookup_prefix( + &prefix.clone().map(|prefix| prefix.as_str().to_owned()) + ) + .ok_or(Error::MissingNamespace)? + .to_owned(); + let el = Element::new( + name.as_str().to_owned(), + namespace, + Some(prefix.map(|prefix| prefix.as_str().to_owned())), + prefixes, + attrs, + vec![] + ); + self.stack.push(el); + } + } + + RawEvent::ElementFoot(_) => self.process_end_tag()?, - Event::Text(_, text) => - self.process_text(text.as_string()), + RawEvent::Text(_, text) => + self.process_text(text.as_str().to_owned()), } Ok(())