Replace xml-rs by quick_xml

quick_xml is way faster than xml-rs

Here is an example with a quick atom parser:
    With xml-rs:
        test parse_factorio_atom ... bench:   3,295,678 ns/iter (+/- 165,851)
    With quick_xml:
        test parse_factorio_atom ... bench:     203,215 ns/iter (+/- 13,485)

Unfortunately I had to break the API for this change to happen.
* Element::from_reader now takes `R: BufRead` instead of `R: Read`
* Element::write_to now takes `W: io::Write` instead of `EventWriter<W: Write>`

This migration also allow us to have a write_to function which assumes
we're already in a given namespace (see `write_to_in_namespace`).
This commit is contained in:
Bastien Orivel 2017-06-07 22:40:53 +02:00
parent d6a9e6e9ea
commit 9cec9fce9b
5 changed files with 156 additions and 127 deletions

View file

@ -1,8 +1,8 @@
[package]
name = "minidom"
version = "0.4.3"
version = "0.5.0"
authors = ["lumi <lumi@pew.im>", "Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>", "Bastien Orivel <eijebong+minidom@bananium.fr>"]
description = "A small, simple DOM implementation on top of xml-rs."
description = "A small, simple DOM implementation on top of quick-xml"
homepage = "https://gitlab.com/lumi/minidom-rs"
repository = "https://gitlab.com/lumi/minidom-rs"
documentation = "https://docs.rs/minidom"
@ -14,5 +14,5 @@ license = "MIT"
gitlab = { repository = "lumi/minidom-rs" }
[dependencies]
xml-rs = "0.4.1"
quick-xml = "0.7.3"
error-chain = "0.10.0"

View file

@ -1,18 +1,16 @@
//! Provides an `Element` type, which represents DOM nodes, and a builder to create them with.
use std::io::prelude::*;
use std::io::Cursor;
use std::collections::BTreeMap;
use std::collections::btree_map;
use std::io:: Write;
use std::collections::{btree_map, BTreeMap};
use std::fmt;
use std::str;
use error::{Error, ErrorKind, Result};
use xml::reader::{XmlEvent as ReaderEvent, EventReader};
use xml::writer::{XmlEvent as WriterEvent, EventWriter, EmitterConfig};
use xml::name::Name;
use xml::namespace::NS_NO_PREFIX;
use quick_xml::reader::Reader as EventReader;
use quick_xml::events::{Event, BytesStart};
use std::io::BufRead;
use std::str::FromStr;
@ -69,9 +67,18 @@ impl Node {
Node::Text(ref s) => Some(s),
}
}
fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()>{
match *self {
Node::Element(ref elmt) => elmt.write_to_inner(writer, last_namespace)?,
Node::Text(ref s) => write!(writer, "{}", s)?,
}
Ok(())
}
}
#[derive(Clone, PartialEq, Eq)]
#[derive(Clone, PartialEq, Eq, Debug)]
/// A struct representing a DOM Element.
pub struct Element {
name: String,
@ -82,26 +89,18 @@ pub struct Element {
impl<'a> From<&'a Element> for String {
fn from(elem: &'a Element) -> String {
let mut out = Vec::new();
let config = EmitterConfig::new()
.write_document_declaration(false);
elem.write_to(&mut EventWriter::new_with_config(&mut out, config)).unwrap();
String::from_utf8(out).unwrap()
let mut writer = Vec::new();
elem.write_to(&mut writer).unwrap();
String::from_utf8(writer).unwrap()
}
}
impl fmt::Debug for Element {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", String::from(self))?;
Ok(())
}
}
impl FromStr for Element {
type Err = Error;
fn from_str(s: &str) -> Result<Element> {
let mut reader = EventReader::new(Cursor::new(s));
let mut reader = EventReader::from_str(s);
Element::from_reader(&mut reader)
}
}
@ -246,106 +245,105 @@ impl Element {
}
/// Parse a document from an `EventReader`.
pub fn from_reader<R: Read>(reader: &mut EventReader<R>) -> Result<Element> {
loop {
let e = reader.next()?;
match e {
ReaderEvent::StartElement { name, attributes, namespace } => {
let attributes = attributes.into_iter()
.map(|o| {
(match o.name.prefix {
Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
None => o.name.local_name
},
o.value)
})
.collect();
let ns = if let Some(ref prefix) = name.prefix {
namespace.get(prefix)
}
else {
namespace.get(NS_NO_PREFIX)
}.map(|s| s.to_owned());
pub fn from_reader<R: BufRead>(reader: &mut EventReader<R>) -> Result<Element> {
let mut buf = Vec::new();
let root: Element;
let mut root = Element::new(name.local_name, ns, attributes, Vec::new());
root.from_reader_inner(reader)?;
return Ok(root);
loop {
let e = reader.read_event(&mut buf)?;
match e {
Event::Empty(ref e) | Event::Start(ref e) => {
root = build_element(e)?; // FIXME: could be break build_element(e)? when break value is stable
break;
},
ReaderEvent::EndDocument => {
Event::Eof => {
bail!(ErrorKind::EndOfDocument);
},
_ => () // TODO: may need more errors
}
}
}
};
let mut stack = vec![root];
#[cfg_attr(feature = "cargo-clippy", allow(wrong_self_convention))]
fn from_reader_inner<R: Read>(&mut self, reader: &mut EventReader<R>) -> Result<()> {
loop {
let e = reader.next()?;
match e {
ReaderEvent::StartElement { name, attributes, namespace } => {
let attributes = attributes.into_iter()
.map(|o| {
(match o.name.prefix {
Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
None => o.name.local_name
match reader.read_event(&mut buf)? {
Event::Empty(ref e) => {
let elem = build_element(e)?;
// Since there is no Event::End after, directly append it to the current node
stack.last_mut().unwrap().append_child(elem);
},
o.value)
})
.collect();
let ns = if let Some(ref prefix) = name.prefix {
namespace.get(prefix)
Event::Start(ref e) => {
let elem = build_element(e)?;
stack.push(elem);
},
Event::End(ref e) => {
if stack.len() <= 1 {
break;
}
let elem = stack.pop().unwrap();
if let Some(to) = stack.last_mut() {
if elem.name().as_bytes() != e.name() {
bail!(ErrorKind::InvalidElementClosed);
}
to.append_child(elem);
}
else {
namespace.get(NS_NO_PREFIX)
}.map(|s| s.to_owned());
let elem = Element::new(name.local_name, ns, attributes, Vec::with_capacity(1));
let elem_ref = self.append_child(elem);
elem_ref.from_reader_inner(reader)?;
},
ReaderEvent::EndElement { .. } => {
// TODO: may want to check whether we're closing the correct element
return Ok(());
Event::Text(s) | Event::CData(s) => {
let text = s.unescape_and_decode(reader)?;
if text != "" {
let mut current_elem = stack.last_mut().unwrap();
current_elem.append_text_node(text);
}
},
ReaderEvent::Characters(s) | ReaderEvent::CData(s) => {
self.append_text_node(s);
},
ReaderEvent::EndDocument => {
bail!(ErrorKind::EndOfDocument);
Event::Eof => {
break;
},
_ => (), // TODO: may need to implement more
}
}
Ok(stack.pop().unwrap())
}
/// Output a document to an `EventWriter`.
pub fn write_to<W: Write>(&self, writer: &mut EventWriter<W>) -> Result<()> {
let name = if let Some(ref ns) = self.namespace {
Name::qualified(&self.name, ns, None)
/// Output a document to a `Writer`.
pub fn write_to<W: Write>(&self, writer: &mut W) -> Result<()> {
let mut last_namespace = None;
write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
self.write_to_inner(writer, &mut last_namespace)
}
else {
Name::local(&self.name)
};
let mut start = WriterEvent::start_element(name);
/// Output a document to a `Writer` assuming you're already in the provided namespace
pub fn write_to_in_namespace<W: Write>(&self, writer: &mut W, namespace: &str) -> Result<()> {
write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
self.write_to_inner(writer, &mut Some(namespace.to_owned()))
}
fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()> {
write!(writer, "<")?;
write!(writer, "{}", self.name)?;
if let Some(ref ns) = self.namespace {
start = start.default_ns(ns.clone());
if *last_namespace != self.namespace {
write!(writer, " xmlns=\"{}\"", ns)?;
*last_namespace = Some(ns.clone());
}
for attr in &self.attributes { // TODO: I think this could be done a lot more efficiently
start = start.attr(Name::local(attr.0), attr.1);
}
writer.write(start)?;
for (key, value) in &self.attributes {
write!(writer, " {}=\"{}\"", key, value)?;
}
if self.children.is_empty() {
write!(writer, " />")?;
return Ok(())
}
write!(writer, ">")?;
for child in &self.children {
match *child {
Node::Element(ref e) => {
e.write_to(writer)?;
},
Node::Text(ref s) => {
writer.write(WriterEvent::characters(s))?;
},
child.write_to_inner(writer, last_namespace)?;
}
}
writer.write(WriterEvent::end_element())?;
write!(writer, "</{}>", self.name)?;
Ok(())
}
@ -354,7 +352,7 @@ impl Element {
/// # Examples
///
/// ```rust
/// use minidom::{Element, Node};
/// use minidom::Element;
///
/// let elem: Element = "<root>a<c1 />b<c2 />c</root>".parse().unwrap();
///
@ -592,6 +590,31 @@ impl Element {
}
}
fn build_element(event: &BytesStart) -> Result<Element> {
let mut attributes = event.attributes()
.map(|o| {
let o = o?;
let key = str::from_utf8(o.key)?.to_owned();
let value = str::from_utf8(o.value)?.to_owned();
Ok((key, value))
}
)
.collect::<Result<BTreeMap<String, String>>>()?;
let mut ns_key = None;
for (key, _) in &attributes {
if key == "xmlns" || key.starts_with("xmlns:") {
ns_key = Some(key.clone());
}
}
let ns = match ns_key {
None => None,
Some(key) => attributes.remove(&key),
};
let name = str::from_utf8(event.name())?.to_owned();
Ok(Element::new(name, ns, attributes, Vec::new()))
}
/// An iterator over references to child elements of an `Element`.
pub struct Children<'a> {
iter: slice::Iter<'a, Node>,

View file

@ -1,30 +1,30 @@
//! Provides an error type for this crate.
use std::io;
use std::convert::From;
use xml::writer::Error as WriterError;
use xml::reader::Error as ReaderError;
error_chain! {
foreign_links {
XmlWriterError(WriterError)
/// An error with writing an XML event, from xml::writer::EventWriter.
XmlError(::quick_xml::errors::Error)
/// An error from quick_xml.
;
XmlReaderError(ReaderError)
/// An error with reading an XML event, from xml::reader::EventReader.
Utf8Error(::std::str::Utf8Error)
/// An UTF-8 conversion error.
;
IoError(io::Error)
IoError(::std::io::Error)
/// An I/O error, from std::io.
;
}
errors {
/// En error which is returned when the end of the document was reached prematurely.
/// An error which is returned when the end of the document was reached prematurely.
EndOfDocument {
description("the end of the document has been reached prematurely")
display("the end of the document has been reached prematurely")
}
/// An error which is returned when an element is closed when it shouldn't be
InvalidElementClosed {
description("The XML is invalid, an element was wrongly closed")
display("the XML is invalid, an element was wrongly closed")
}
}
}

View file

@ -64,7 +64,7 @@
//! minidom = "*"
//! ```
extern crate xml;
extern crate quick_xml;
#[macro_use] extern crate error_chain;
pub mod error;

View file

@ -1,9 +1,6 @@
use std::io::Cursor;
use std::iter::Iterator;
use xml::reader::EventReader;
use xml::writer::EventWriter;
use quick_xml::reader::Reader;
use element::Element;
@ -32,19 +29,18 @@ fn build_test_tree() -> Element {
#[test]
fn reader_works() {
let mut reader = EventReader::new(Cursor::new(TEST_STRING));
let mut reader = Reader::from_str(TEST_STRING);
assert_eq!(Element::from_reader(&mut reader).unwrap(), build_test_tree());
}
#[test]
fn writer_works() {
let root = build_test_tree();
let mut out = Vec::new();
let mut writer = Vec::new();
{
let mut writer = EventWriter::new(&mut out);
root.write_to(&mut writer).unwrap();
}
assert_eq!(String::from_utf8(out).unwrap(), TEST_STRING);
assert_eq!(String::from_utf8(writer).unwrap(), TEST_STRING);
}
#[test]
@ -110,8 +106,18 @@ fn two_elements_with_same_arguments_different_order_are_equal() {
#[test]
fn namespace_attributes_works() {
let mut reader = EventReader::new(Cursor::new(TEST_STRING));
let mut reader = Reader::from_str(TEST_STRING);
let root = Element::from_reader(&mut reader).unwrap();
assert_eq!("en", root.attr("xml:lang").unwrap());
assert_eq!("fr", root.get_child("child", "child_ns").unwrap().attr("xml:lang").unwrap());
}
#[test]
fn wrongly_closed_elements_error() {
let elem1 = "<a></b>".parse::<Element>();
assert!(elem1.is_err());
let elem1 = "<a></c></a>".parse::<Element>();
assert!(elem1.is_err());
let elem1 = "<a><c><d/></c></a>".parse::<Element>();
assert!(elem1.is_ok());
}