Replace xml-rs by quick_xml

quick_xml is way faster than xml-rs

Here is an example with a quick atom parser:
    With xml-rs:
        test parse_factorio_atom ... bench:   3,295,678 ns/iter (+/- 165,851)
    With quick_xml:
        test parse_factorio_atom ... bench:     203,215 ns/iter (+/- 13,485)

Unfortunately I had to break the API for this change to happen.
* Element::from_reader now takes `R: BufRead` instead of `R: Read`
* Element::write_to now takes `W: io::Write` instead of `EventWriter<W: Write>`

This migration also allow us to have a write_to function which assumes
we're already in a given namespace (see `write_to_in_namespace`).
This commit is contained in:
Bastien Orivel 2017-06-07 22:40:53 +02:00
parent d6a9e6e9ea
commit 9cec9fce9b
5 changed files with 156 additions and 127 deletions

View file

@ -1,8 +1,8 @@
[package] [package]
name = "minidom" name = "minidom"
version = "0.4.3" version = "0.5.0"
authors = ["lumi <lumi@pew.im>", "Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>", "Bastien Orivel <eijebong+minidom@bananium.fr>"] authors = ["lumi <lumi@pew.im>", "Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>", "Bastien Orivel <eijebong+minidom@bananium.fr>"]
description = "A small, simple DOM implementation on top of xml-rs." description = "A small, simple DOM implementation on top of quick-xml"
homepage = "https://gitlab.com/lumi/minidom-rs" homepage = "https://gitlab.com/lumi/minidom-rs"
repository = "https://gitlab.com/lumi/minidom-rs" repository = "https://gitlab.com/lumi/minidom-rs"
documentation = "https://docs.rs/minidom" documentation = "https://docs.rs/minidom"
@ -14,5 +14,5 @@ license = "MIT"
gitlab = { repository = "lumi/minidom-rs" } gitlab = { repository = "lumi/minidom-rs" }
[dependencies] [dependencies]
xml-rs = "0.4.1" quick-xml = "0.7.3"
error-chain = "0.10.0" error-chain = "0.10.0"

View file

@ -1,18 +1,16 @@
//! Provides an `Element` type, which represents DOM nodes, and a builder to create them with. //! Provides an `Element` type, which represents DOM nodes, and a builder to create them with.
use std::io::prelude::*; use std::io:: Write;
use std::io::Cursor; use std::collections::{btree_map, BTreeMap};
use std::collections::BTreeMap;
use std::collections::btree_map;
use std::fmt; use std::str;
use error::{Error, ErrorKind, Result}; use error::{Error, ErrorKind, Result};
use xml::reader::{XmlEvent as ReaderEvent, EventReader}; use quick_xml::reader::Reader as EventReader;
use xml::writer::{XmlEvent as WriterEvent, EventWriter, EmitterConfig}; use quick_xml::events::{Event, BytesStart};
use xml::name::Name;
use xml::namespace::NS_NO_PREFIX; use std::io::BufRead;
use std::str::FromStr; use std::str::FromStr;
@ -69,9 +67,18 @@ impl Node {
Node::Text(ref s) => Some(s), Node::Text(ref s) => Some(s),
} }
} }
fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()>{
match *self {
Node::Element(ref elmt) => elmt.write_to_inner(writer, last_namespace)?,
Node::Text(ref s) => write!(writer, "{}", s)?,
} }
#[derive(Clone, PartialEq, Eq)] Ok(())
}
}
#[derive(Clone, PartialEq, Eq, Debug)]
/// A struct representing a DOM Element. /// A struct representing a DOM Element.
pub struct Element { pub struct Element {
name: String, name: String,
@ -82,26 +89,18 @@ pub struct Element {
impl<'a> From<&'a Element> for String { impl<'a> From<&'a Element> for String {
fn from(elem: &'a Element) -> String { fn from(elem: &'a Element) -> String {
let mut out = Vec::new(); let mut writer = Vec::new();
let config = EmitterConfig::new() elem.write_to(&mut writer).unwrap();
.write_document_declaration(false); String::from_utf8(writer).unwrap()
elem.write_to(&mut EventWriter::new_with_config(&mut out, config)).unwrap();
String::from_utf8(out).unwrap()
} }
} }
impl fmt::Debug for Element {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", String::from(self))?;
Ok(())
}
}
impl FromStr for Element { impl FromStr for Element {
type Err = Error; type Err = Error;
fn from_str(s: &str) -> Result<Element> { fn from_str(s: &str) -> Result<Element> {
let mut reader = EventReader::new(Cursor::new(s)); let mut reader = EventReader::from_str(s);
Element::from_reader(&mut reader) Element::from_reader(&mut reader)
} }
} }
@ -246,106 +245,105 @@ impl Element {
} }
/// Parse a document from an `EventReader`. /// Parse a document from an `EventReader`.
pub fn from_reader<R: Read>(reader: &mut EventReader<R>) -> Result<Element> { pub fn from_reader<R: BufRead>(reader: &mut EventReader<R>) -> Result<Element> {
loop { let mut buf = Vec::new();
let e = reader.next()?; let root: Element;
match e {
ReaderEvent::StartElement { name, attributes, namespace } => {
let attributes = attributes.into_iter()
.map(|o| {
(match o.name.prefix {
Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
None => o.name.local_name
},
o.value)
})
.collect();
let ns = if let Some(ref prefix) = name.prefix {
namespace.get(prefix)
}
else {
namespace.get(NS_NO_PREFIX)
}.map(|s| s.to_owned());
let mut root = Element::new(name.local_name, ns, attributes, Vec::new()); loop {
root.from_reader_inner(reader)?; let e = reader.read_event(&mut buf)?;
return Ok(root); match e {
Event::Empty(ref e) | Event::Start(ref e) => {
root = build_element(e)?; // FIXME: could be break build_element(e)? when break value is stable
break;
}, },
ReaderEvent::EndDocument => { Event::Eof => {
bail!(ErrorKind::EndOfDocument); bail!(ErrorKind::EndOfDocument);
}, },
_ => () // TODO: may need more errors _ => () // TODO: may need more errors
} }
} };
}
let mut stack = vec![root];
#[cfg_attr(feature = "cargo-clippy", allow(wrong_self_convention))]
fn from_reader_inner<R: Read>(&mut self, reader: &mut EventReader<R>) -> Result<()> {
loop { loop {
let e = reader.next()?; match reader.read_event(&mut buf)? {
match e { Event::Empty(ref e) => {
ReaderEvent::StartElement { name, attributes, namespace } => { let elem = build_element(e)?;
let attributes = attributes.into_iter() // Since there is no Event::End after, directly append it to the current node
.map(|o| { stack.last_mut().unwrap().append_child(elem);
(match o.name.prefix {
Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
None => o.name.local_name
}, },
o.value) Event::Start(ref e) => {
}) let elem = build_element(e)?;
.collect(); stack.push(elem);
let ns = if let Some(ref prefix) = name.prefix { },
namespace.get(prefix) Event::End(ref e) => {
if stack.len() <= 1 {
break;
}
let elem = stack.pop().unwrap();
if let Some(to) = stack.last_mut() {
if elem.name().as_bytes() != e.name() {
bail!(ErrorKind::InvalidElementClosed);
}
to.append_child(elem);
} }
else {
namespace.get(NS_NO_PREFIX)
}.map(|s| s.to_owned());
let elem = Element::new(name.local_name, ns, attributes, Vec::with_capacity(1));
let elem_ref = self.append_child(elem);
elem_ref.from_reader_inner(reader)?;
}, },
ReaderEvent::EndElement { .. } => { Event::Text(s) | Event::CData(s) => {
// TODO: may want to check whether we're closing the correct element let text = s.unescape_and_decode(reader)?;
return Ok(()); if text != "" {
let mut current_elem = stack.last_mut().unwrap();
current_elem.append_text_node(text);
}
}, },
ReaderEvent::Characters(s) | ReaderEvent::CData(s) => { Event::Eof => {
self.append_text_node(s); break;
},
ReaderEvent::EndDocument => {
bail!(ErrorKind::EndOfDocument);
}, },
_ => (), // TODO: may need to implement more _ => (), // TODO: may need to implement more
} }
} }
Ok(stack.pop().unwrap())
} }
/// Output a document to an `EventWriter`. /// Output a document to a `Writer`.
pub fn write_to<W: Write>(&self, writer: &mut EventWriter<W>) -> Result<()> { pub fn write_to<W: Write>(&self, writer: &mut W) -> Result<()> {
let name = if let Some(ref ns) = self.namespace { let mut last_namespace = None;
Name::qualified(&self.name, ns, None) write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
self.write_to_inner(writer, &mut last_namespace)
} }
else {
Name::local(&self.name) /// Output a document to a `Writer` assuming you're already in the provided namespace
}; pub fn write_to_in_namespace<W: Write>(&self, writer: &mut W, namespace: &str) -> Result<()> {
let mut start = WriterEvent::start_element(name); write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
self.write_to_inner(writer, &mut Some(namespace.to_owned()))
}
fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()> {
write!(writer, "<")?;
write!(writer, "{}", self.name)?;
if let Some(ref ns) = self.namespace { if let Some(ref ns) = self.namespace {
start = start.default_ns(ns.clone()); if *last_namespace != self.namespace {
write!(writer, " xmlns=\"{}\"", ns)?;
*last_namespace = Some(ns.clone());
} }
for attr in &self.attributes { // TODO: I think this could be done a lot more efficiently
start = start.attr(Name::local(attr.0), attr.1);
} }
writer.write(start)?;
for (key, value) in &self.attributes {
write!(writer, " {}=\"{}\"", key, value)?;
}
if self.children.is_empty() {
write!(writer, " />")?;
return Ok(())
}
write!(writer, ">")?;
for child in &self.children { for child in &self.children {
match *child { child.write_to_inner(writer, last_namespace)?;
Node::Element(ref e) => {
e.write_to(writer)?;
},
Node::Text(ref s) => {
writer.write(WriterEvent::characters(s))?;
},
} }
}
writer.write(WriterEvent::end_element())?; write!(writer, "</{}>", self.name)?;
Ok(()) Ok(())
} }
@ -354,7 +352,7 @@ impl Element {
/// # Examples /// # Examples
/// ///
/// ```rust /// ```rust
/// use minidom::{Element, Node}; /// use minidom::Element;
/// ///
/// let elem: Element = "<root>a<c1 />b<c2 />c</root>".parse().unwrap(); /// let elem: Element = "<root>a<c1 />b<c2 />c</root>".parse().unwrap();
/// ///
@ -592,6 +590,31 @@ impl Element {
} }
} }
fn build_element(event: &BytesStart) -> Result<Element> {
let mut attributes = event.attributes()
.map(|o| {
let o = o?;
let key = str::from_utf8(o.key)?.to_owned();
let value = str::from_utf8(o.value)?.to_owned();
Ok((key, value))
}
)
.collect::<Result<BTreeMap<String, String>>>()?;
let mut ns_key = None;
for (key, _) in &attributes {
if key == "xmlns" || key.starts_with("xmlns:") {
ns_key = Some(key.clone());
}
}
let ns = match ns_key {
None => None,
Some(key) => attributes.remove(&key),
};
let name = str::from_utf8(event.name())?.to_owned();
Ok(Element::new(name, ns, attributes, Vec::new()))
}
/// An iterator over references to child elements of an `Element`. /// An iterator over references to child elements of an `Element`.
pub struct Children<'a> { pub struct Children<'a> {
iter: slice::Iter<'a, Node>, iter: slice::Iter<'a, Node>,

View file

@ -1,30 +1,30 @@
//! Provides an error type for this crate. //! Provides an error type for this crate.
use std::io;
use std::convert::From; use std::convert::From;
use xml::writer::Error as WriterError;
use xml::reader::Error as ReaderError;
error_chain! { error_chain! {
foreign_links { foreign_links {
XmlWriterError(WriterError) XmlError(::quick_xml::errors::Error)
/// An error with writing an XML event, from xml::writer::EventWriter. /// An error from quick_xml.
; ;
XmlReaderError(ReaderError) Utf8Error(::std::str::Utf8Error)
/// An error with reading an XML event, from xml::reader::EventReader. /// An UTF-8 conversion error.
; ;
IoError(io::Error) IoError(::std::io::Error)
/// An I/O error, from std::io. /// An I/O error, from std::io.
; ;
} }
errors { errors {
/// En error which is returned when the end of the document was reached prematurely. /// An error which is returned when the end of the document was reached prematurely.
EndOfDocument { EndOfDocument {
description("the end of the document has been reached prematurely") description("the end of the document has been reached prematurely")
display("the end of the document has been reached prematurely") display("the end of the document has been reached prematurely")
} }
/// An error which is returned when an element is closed when it shouldn't be
InvalidElementClosed {
description("The XML is invalid, an element was wrongly closed")
display("the XML is invalid, an element was wrongly closed")
}
} }
} }

View file

@ -64,7 +64,7 @@
//! minidom = "*" //! minidom = "*"
//! ``` //! ```
extern crate xml; extern crate quick_xml;
#[macro_use] extern crate error_chain; #[macro_use] extern crate error_chain;
pub mod error; pub mod error;

View file

@ -1,9 +1,6 @@
use std::io::Cursor;
use std::iter::Iterator; use std::iter::Iterator;
use xml::reader::EventReader; use quick_xml::reader::Reader;
use xml::writer::EventWriter;
use element::Element; use element::Element;
@ -32,19 +29,18 @@ fn build_test_tree() -> Element {
#[test] #[test]
fn reader_works() { fn reader_works() {
let mut reader = EventReader::new(Cursor::new(TEST_STRING)); let mut reader = Reader::from_str(TEST_STRING);
assert_eq!(Element::from_reader(&mut reader).unwrap(), build_test_tree()); assert_eq!(Element::from_reader(&mut reader).unwrap(), build_test_tree());
} }
#[test] #[test]
fn writer_works() { fn writer_works() {
let root = build_test_tree(); let root = build_test_tree();
let mut out = Vec::new(); let mut writer = Vec::new();
{ {
let mut writer = EventWriter::new(&mut out);
root.write_to(&mut writer).unwrap(); root.write_to(&mut writer).unwrap();
} }
assert_eq!(String::from_utf8(out).unwrap(), TEST_STRING); assert_eq!(String::from_utf8(writer).unwrap(), TEST_STRING);
} }
#[test] #[test]
@ -110,8 +106,18 @@ fn two_elements_with_same_arguments_different_order_are_equal() {
#[test] #[test]
fn namespace_attributes_works() { fn namespace_attributes_works() {
let mut reader = EventReader::new(Cursor::new(TEST_STRING)); let mut reader = Reader::from_str(TEST_STRING);
let root = Element::from_reader(&mut reader).unwrap(); let root = Element::from_reader(&mut reader).unwrap();
assert_eq!("en", root.attr("xml:lang").unwrap()); assert_eq!("en", root.attr("xml:lang").unwrap());
assert_eq!("fr", root.get_child("child", "child_ns").unwrap().attr("xml:lang").unwrap()); assert_eq!("fr", root.get_child("child", "child_ns").unwrap().attr("xml:lang").unwrap());
} }
#[test]
fn wrongly_closed_elements_error() {
let elem1 = "<a></b>".parse::<Element>();
assert!(elem1.is_err());
let elem1 = "<a></c></a>".parse::<Element>();
assert!(elem1.is_err());
let elem1 = "<a><c><d/></c></a>".parse::<Element>();
assert!(elem1.is_ok());
}