minidom: use rxml's new RawParser

This commit is contained in:
Astro 2022-03-29 03:13:01 +02:00
parent 99190789cb
commit 2e21911c47
6 changed files with 75 additions and 72 deletions

View file

@ -22,4 +22,4 @@ gitlab = { repository = "xmpp-rs/xmpp-rs" }
[dependencies] [dependencies]
quick-xml = "0.22.0" quick-xml = "0.22.0"
rxml = "0.5" rxml = { git = "https://github.com/horazont/rxml.git" }

View file

@ -25,7 +25,7 @@ use std::io::{Cursor, BufRead, Write};
use std::borrow::Cow; use std::borrow::Cow;
use std::str; use std::str;
use rxml::{EventRead, PullParser}; use rxml::{EventRead, Lexer, PullDriver, RawParser};
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, Event}; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, Event};
use quick_xml::Writer as EventWriter; use quick_xml::Writer as EventWriter;
@ -304,8 +304,8 @@ impl Element {
/// Parse a document from a `Read`. /// Parse a document from a `Read`.
pub fn from_reader<R: BufRead>(reader: R) -> Result<Element> { pub fn from_reader<R: BufRead>(reader: R) -> Result<Element> {
let mut tree_builder = TreeBuilder::new(); let mut tree_builder = TreeBuilder::new();
let mut parser = PullParser::new(reader); let mut driver = PullDriver::wrap(reader, Lexer::new(), RawParser::new());
while let Some(event) = parser.read()? { while let Some(event) = driver.read()? {
tree_builder.process_event(event)?; tree_builder.process_event(event)?;
if let Some(root) = tree_builder.root.take() { if let Some(root) = tree_builder.root.take() {
@ -954,8 +954,7 @@ mod tests {
#[test] #[test]
fn parses_spectest_xml() { fn parses_spectest_xml() {
// From: https://gitlab.com/lumi/minidom-rs/issues/8 // From: https://gitlab.com/lumi/minidom-rs/issues/8
let xml = r#" let xml = r#"<rng:grammar xmlns:rng="http://relaxng.org/ns/structure/1.0">
<rng:grammar xmlns:rng="http://relaxng.org/ns/structure/1.0">
<rng:name xmlns:rng="http://relaxng.org/ns/structure/1.0"></rng:name> <rng:name xmlns:rng="http://relaxng.org/ns/structure/1.0"></rng:name>
</rng:grammar> </rng:grammar>
"#; "#;

View file

@ -29,9 +29,6 @@ pub enum Error {
/// An error which is returned when the end of the document was reached prematurely. /// An error which is returned when the end of the document was reached prematurely.
EndOfDocument, EndOfDocument,
/// An error which is returned when an element is closed when it shouldn't be
InvalidElementClosed,
/// An error which is returned when an element being serialized doesn't contain a prefix /// An error which is returned when an element being serialized doesn't contain a prefix
/// (be it None or Some(_)). /// (be it None or Some(_)).
InvalidPrefix, InvalidPrefix,
@ -50,7 +47,6 @@ impl StdError for Error {
Error::ParserError(e) => Some(e), Error::ParserError(e) => Some(e),
Error::IoError(e) => Some(e), Error::IoError(e) => Some(e),
Error::EndOfDocument => None, Error::EndOfDocument => None,
Error::InvalidElementClosed => None,
Error::InvalidPrefix => None, Error::InvalidPrefix => None,
Error::MissingNamespace => None, Error::MissingNamespace => None,
Error::DuplicatePrefix => None, Error::DuplicatePrefix => None,
@ -67,9 +63,6 @@ impl std::fmt::Display for Error {
Error::EndOfDocument => { Error::EndOfDocument => {
write!(fmt, "the end of the document has been reached prematurely") write!(fmt, "the end of the document has been reached prematurely")
} }
Error::InvalidElementClosed => {
write!(fmt, "the XML is invalid, an element was wrongly closed")
}
Error::InvalidPrefix => write!(fmt, "the prefix is invalid"), Error::InvalidPrefix => write!(fmt, "the prefix is invalid"),
Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",), Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",),
Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"), Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"),

View file

@ -8,9 +8,9 @@
//! Provides a `Parser` type, which takes bytes and returns Elements. It also keeps a hold of //! Provides a `Parser` type, which takes bytes and returns Elements. It also keeps a hold of
//! ascendant elements to be able to handle namespaces properly. //! ascendant elements to be able to handle namespaces properly.
use rxml::{PushDriver, RawParser};
use crate::element::Element; use crate::element::Element;
use crate::error::{Error, ParserError, Result}; use crate::error::{Error, ParserError, Result};
use crate::tokenizer::Tokenizer;
use crate::tree_builder::TreeBuilder; use crate::tree_builder::TreeBuilder;
use std::str; use std::str;
@ -18,7 +18,7 @@ use std::str;
/// Parser /// Parser
#[derive(Debug)] #[derive(Debug)]
pub struct Parser { pub struct Parser {
tokenizer: Tokenizer, driver: PushDriver<RawParser>,
tree_builder: TreeBuilder, tree_builder: TreeBuilder,
state: ParserState, state: ParserState,
} }
@ -90,7 +90,7 @@ impl Parser {
/// Creates a new Parser /// Creates a new Parser
pub fn new() -> Parser { pub fn new() -> Parser {
Parser { Parser {
tokenizer: Tokenizer::new(), driver: PushDriver::default(),
tree_builder: TreeBuilder::new(), tree_builder: TreeBuilder::new(),
state: ParserState::Empty, state: ParserState::Empty,
} }
@ -98,7 +98,9 @@ impl Parser {
/// Feed bytes to the parser. /// Feed bytes to the parser.
pub fn feed(&mut self, bytes: BytesMut) -> Result<()> { pub fn feed(&mut self, bytes: BytesMut) -> Result<()> {
self.buffer.borrow_mut().unsplit(bytes); self.driver.feed(bytes);
bytes.clear();
let state = match self.state { let state = match self.state {
ParserState::Empty => { ParserState::Empty => {
// TODO: Try splitting xml prolog and stream header // TODO: Try splitting xml prolog and stream header

View file

@ -432,12 +432,16 @@ fn fail_comments() {
#[test] #[test]
fn xml_error() { fn xml_error() {
match "<a xmlns='ns1'></b>".parse::<Element>() { match "<a xmlns='ns1'></b>".parse::<Element>() {
Err(crate::error::Error::InvalidElementClosed) => (), Err(crate::error::Error::ParserError(
rxml::Error::NotWellFormed(rxml::error::WFError::ElementMismatch)
)) => (),
err => panic!("No or wrong error: {:?}", err), err => panic!("No or wrong error: {:?}", err),
} }
match "<a xmlns='ns1'></".parse::<Element>() { match "<a xmlns='ns1'></".parse::<Element>() {
Err(crate::error::Error::EndOfDocument) => (), Err(crate::error::Error::ParserError(
rxml::Error::NotWellFormed(rxml::error::WFError::InvalidEof(_))
)) => (),
err => panic!("No or wrong error: {:?}", err), err => panic!("No or wrong error: {:?}", err),
} }
} }

View file

@ -2,13 +2,14 @@
//! SAX events to DOM tree conversion //! SAX events to DOM tree conversion
use std::collections::{BTreeMap, HashMap}; use std::collections::BTreeMap;
use rxml::{CData, Event, QName}; use rxml::RawEvent;
use crate::{Element, Error}; use crate::{Element, Error};
use crate::prefixes::Prefixes; use crate::prefixes::{Prefix, Prefixes};
/// Tree-building parser state /// Tree-building parser state
pub struct TreeBuilder { pub struct TreeBuilder {
next_tag: Option<(Prefix, String, Prefixes, BTreeMap<String, String>)>,
/// Parsing stack /// Parsing stack
stack: Vec<Element>, stack: Vec<Element>,
/// Namespace set stack by prefix /// Namespace set stack by prefix
@ -21,6 +22,7 @@ impl TreeBuilder {
/// Create a new one /// Create a new one
pub fn new() -> Self { pub fn new() -> Self {
TreeBuilder { TreeBuilder {
next_tag: None,
stack: vec![], stack: vec![],
prefixes_stack: vec![], prefixes_stack: vec![],
root: None, root: None,
@ -64,48 +66,6 @@ impl TreeBuilder {
None None
} }
fn process_start_tag(&mut self, (prefix, name): QName, attrs: HashMap<QName, CData>) -> Result<(), Error> {
dbg!(&attrs);
let mut prefixes = Prefixes::default();
let mut attributes = BTreeMap::new();
for ((prefix, name), value) in attrs.into_iter() {
match (prefix, name) {
(None, xmlns) if xmlns == "xmlns" => {
prefixes.insert(None, value);
}
(Some(xmlns), prefix) if *xmlns == "xmlns" => {
prefixes.insert(Some(prefix.as_string()), value);
}
(Some(prefix), name) => {
attributes.insert(format!("{}:{}", prefix, name), value.as_string());
}
(None, name) => {
attributes.insert(name.as_string(), value.as_string());
}
}
}
dbg!(&prefixes);
self.prefixes_stack.push(prefixes.clone());
dbg!(&attributes);
let namespace = self.lookup_prefix(
&prefix.clone().map(|prefix| prefix.as_str().to_owned())
)
.ok_or(Error::MissingNamespace)?
.to_owned();
let el = Element::new(
name.as_string(),
namespace,
Some(prefix.map(|prefix| prefix.as_str().to_owned())),
prefixes,
attributes,
vec![]
);
self.stack.push(el);
Ok(())
}
fn process_end_tag(&mut self) -> Result<(), Error> { fn process_end_tag(&mut self) -> Result<(), Error> {
if let Some(el) = self.pop() { if let Some(el) = self.pop() {
if self.depth() > 0 { if self.depth() > 0 {
@ -127,19 +87,64 @@ impl TreeBuilder {
} }
/// Process a Event that you got out of a Eventizer /// Process a Event that you got out of a Eventizer
pub fn process_event(&mut self, event: Event) -> Result<(), Error> { pub fn process_event(&mut self, event: RawEvent) -> Result<(), Error> {
dbg!(&event);
match event { match event {
Event::XMLDeclaration(_, _) => {}, RawEvent::XMLDeclaration(_, _) => {},
Event::StartElement(_, name, attrs) => RawEvent::ElementHeadOpen(_, (prefix, name)) =>
self.process_start_tag(name, attrs)?, self.next_tag = Some((
prefix.map(|prefix| prefix.as_str().to_owned()),
name.as_str().to_owned(),
Prefixes::default(),
BTreeMap::new()
)),
Event::EndElement(_) => RawEvent::Attribute(_, (prefix, name), value) => {
self.next_tag.as_mut()
.map(|(_, _, ref mut prefixes, ref mut attrs)| {
match (prefix, name) {
(None, xmlns) if xmlns == "xmlns" => {
prefixes.insert(None, value);
}
(Some(xmlns), prefix) if xmlns.as_str() == "xmlns" => {
prefixes.insert(Some(prefix.as_str().to_owned()), value);
}
(Some(prefix), name) => {
attrs.insert(format!("{}:{}", prefix, name), value.as_str().to_owned());
}
(None, name) => {
attrs.insert(name.as_str().to_owned(), value.as_str().to_owned());
}
}
});
}
RawEvent::ElementHeadClose(_) => {
if let Some((prefix, name, prefixes, attrs)) = self.next_tag.take() {
self.prefixes_stack.push(prefixes.clone());
let namespace = self.lookup_prefix(
&prefix.clone().map(|prefix| prefix.as_str().to_owned())
)
.ok_or(Error::MissingNamespace)?
.to_owned();
let el = Element::new(
name.as_str().to_owned(),
namespace,
Some(prefix.map(|prefix| prefix.as_str().to_owned())),
prefixes,
attrs,
vec![]
);
self.stack.push(el);
}
}
RawEvent::ElementFoot(_) =>
self.process_end_tag()?, self.process_end_tag()?,
Event::Text(_, text) => RawEvent::Text(_, text) =>
self.process_text(text.as_string()), self.process_text(text.as_str().to_owned()),
} }
Ok(()) Ok(())