mirror of
https://gitlab.com/xmpp-rs/xmpp-rs.git
synced 2024-07-12 22:21:53 +00:00
Replace xml-rs by quick_xml
quick_xml is way faster than xml-rs Here is an example with a quick atom parser: With xml-rs: test parse_factorio_atom ... bench: 3,295,678 ns/iter (+/- 165,851) With quick_xml: test parse_factorio_atom ... bench: 203,215 ns/iter (+/- 13,485) Unfortunately I had to break the API for this change to happen. * Element::from_reader now takes `R: BufRead` instead of `R: Read` * Element::write_to now takes `W: io::Write` instead of `EventWriter<W: Write>` This migration also allow us to have a write_to function which assumes we're already in a given namespace (see `write_to_in_namespace`).
This commit is contained in:
parent
d6a9e6e9ea
commit
9cec9fce9b
5 changed files with 156 additions and 127 deletions
|
@ -1,8 +1,8 @@
|
||||||
[package]
|
[package]
|
||||||
name = "minidom"
|
name = "minidom"
|
||||||
version = "0.4.3"
|
version = "0.5.0"
|
||||||
authors = ["lumi <lumi@pew.im>", "Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>", "Bastien Orivel <eijebong+minidom@bananium.fr>"]
|
authors = ["lumi <lumi@pew.im>", "Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>", "Bastien Orivel <eijebong+minidom@bananium.fr>"]
|
||||||
description = "A small, simple DOM implementation on top of xml-rs."
|
description = "A small, simple DOM implementation on top of quick-xml"
|
||||||
homepage = "https://gitlab.com/lumi/minidom-rs"
|
homepage = "https://gitlab.com/lumi/minidom-rs"
|
||||||
repository = "https://gitlab.com/lumi/minidom-rs"
|
repository = "https://gitlab.com/lumi/minidom-rs"
|
||||||
documentation = "https://docs.rs/minidom"
|
documentation = "https://docs.rs/minidom"
|
||||||
|
@ -14,5 +14,5 @@ license = "MIT"
|
||||||
gitlab = { repository = "lumi/minidom-rs" }
|
gitlab = { repository = "lumi/minidom-rs" }
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
xml-rs = "0.4.1"
|
quick-xml = "0.7.3"
|
||||||
error-chain = "0.10.0"
|
error-chain = "0.10.0"
|
||||||
|
|
221
src/element.rs
221
src/element.rs
|
@ -1,18 +1,16 @@
|
||||||
//! Provides an `Element` type, which represents DOM nodes, and a builder to create them with.
|
//! Provides an `Element` type, which represents DOM nodes, and a builder to create them with.
|
||||||
|
|
||||||
use std::io::prelude::*;
|
use std::io:: Write;
|
||||||
use std::io::Cursor;
|
use std::collections::{btree_map, BTreeMap};
|
||||||
use std::collections::BTreeMap;
|
|
||||||
use std::collections::btree_map;
|
|
||||||
|
|
||||||
use std::fmt;
|
use std::str;
|
||||||
|
|
||||||
use error::{Error, ErrorKind, Result};
|
use error::{Error, ErrorKind, Result};
|
||||||
|
|
||||||
use xml::reader::{XmlEvent as ReaderEvent, EventReader};
|
use quick_xml::reader::Reader as EventReader;
|
||||||
use xml::writer::{XmlEvent as WriterEvent, EventWriter, EmitterConfig};
|
use quick_xml::events::{Event, BytesStart};
|
||||||
use xml::name::Name;
|
|
||||||
use xml::namespace::NS_NO_PREFIX;
|
use std::io::BufRead;
|
||||||
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
@ -69,9 +67,18 @@ impl Node {
|
||||||
Node::Text(ref s) => Some(s),
|
Node::Text(ref s) => Some(s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()>{
|
||||||
|
match *self {
|
||||||
|
Node::Element(ref elmt) => elmt.write_to_inner(writer, last_namespace)?,
|
||||||
|
Node::Text(ref s) => write!(writer, "{}", s)?,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq)]
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
/// A struct representing a DOM Element.
|
/// A struct representing a DOM Element.
|
||||||
pub struct Element {
|
pub struct Element {
|
||||||
name: String,
|
name: String,
|
||||||
|
@ -82,26 +89,18 @@ pub struct Element {
|
||||||
|
|
||||||
impl<'a> From<&'a Element> for String {
|
impl<'a> From<&'a Element> for String {
|
||||||
fn from(elem: &'a Element) -> String {
|
fn from(elem: &'a Element) -> String {
|
||||||
let mut out = Vec::new();
|
let mut writer = Vec::new();
|
||||||
let config = EmitterConfig::new()
|
elem.write_to(&mut writer).unwrap();
|
||||||
.write_document_declaration(false);
|
String::from_utf8(writer).unwrap()
|
||||||
elem.write_to(&mut EventWriter::new_with_config(&mut out, config)).unwrap();
|
|
||||||
String::from_utf8(out).unwrap()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for Element {
|
|
||||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
write!(fmt, "{}", String::from(self))?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FromStr for Element {
|
impl FromStr for Element {
|
||||||
type Err = Error;
|
type Err = Error;
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Element> {
|
fn from_str(s: &str) -> Result<Element> {
|
||||||
let mut reader = EventReader::new(Cursor::new(s));
|
let mut reader = EventReader::from_str(s);
|
||||||
Element::from_reader(&mut reader)
|
Element::from_reader(&mut reader)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -246,106 +245,105 @@ impl Element {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a document from an `EventReader`.
|
/// Parse a document from an `EventReader`.
|
||||||
pub fn from_reader<R: Read>(reader: &mut EventReader<R>) -> Result<Element> {
|
pub fn from_reader<R: BufRead>(reader: &mut EventReader<R>) -> Result<Element> {
|
||||||
loop {
|
let mut buf = Vec::new();
|
||||||
let e = reader.next()?;
|
let root: Element;
|
||||||
match e {
|
|
||||||
ReaderEvent::StartElement { name, attributes, namespace } => {
|
|
||||||
let attributes = attributes.into_iter()
|
|
||||||
.map(|o| {
|
|
||||||
(match o.name.prefix {
|
|
||||||
Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
|
|
||||||
None => o.name.local_name
|
|
||||||
},
|
|
||||||
o.value)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let ns = if let Some(ref prefix) = name.prefix {
|
|
||||||
namespace.get(prefix)
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
namespace.get(NS_NO_PREFIX)
|
|
||||||
}.map(|s| s.to_owned());
|
|
||||||
|
|
||||||
let mut root = Element::new(name.local_name, ns, attributes, Vec::new());
|
loop {
|
||||||
root.from_reader_inner(reader)?;
|
let e = reader.read_event(&mut buf)?;
|
||||||
return Ok(root);
|
match e {
|
||||||
|
Event::Empty(ref e) | Event::Start(ref e) => {
|
||||||
|
root = build_element(e)?; // FIXME: could be break build_element(e)? when break value is stable
|
||||||
|
break;
|
||||||
},
|
},
|
||||||
ReaderEvent::EndDocument => {
|
Event::Eof => {
|
||||||
bail!(ErrorKind::EndOfDocument);
|
bail!(ErrorKind::EndOfDocument);
|
||||||
},
|
},
|
||||||
_ => () // TODO: may need more errors
|
_ => () // TODO: may need more errors
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
}
|
|
||||||
|
let mut stack = vec![root];
|
||||||
|
|
||||||
#[cfg_attr(feature = "cargo-clippy", allow(wrong_self_convention))]
|
|
||||||
fn from_reader_inner<R: Read>(&mut self, reader: &mut EventReader<R>) -> Result<()> {
|
|
||||||
loop {
|
loop {
|
||||||
let e = reader.next()?;
|
match reader.read_event(&mut buf)? {
|
||||||
match e {
|
Event::Empty(ref e) => {
|
||||||
ReaderEvent::StartElement { name, attributes, namespace } => {
|
let elem = build_element(e)?;
|
||||||
let attributes = attributes.into_iter()
|
// Since there is no Event::End after, directly append it to the current node
|
||||||
.map(|o| {
|
stack.last_mut().unwrap().append_child(elem);
|
||||||
(match o.name.prefix {
|
|
||||||
Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
|
|
||||||
None => o.name.local_name
|
|
||||||
},
|
},
|
||||||
o.value)
|
Event::Start(ref e) => {
|
||||||
})
|
let elem = build_element(e)?;
|
||||||
.collect();
|
stack.push(elem);
|
||||||
let ns = if let Some(ref prefix) = name.prefix {
|
},
|
||||||
namespace.get(prefix)
|
Event::End(ref e) => {
|
||||||
|
if stack.len() <= 1 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let elem = stack.pop().unwrap();
|
||||||
|
if let Some(to) = stack.last_mut() {
|
||||||
|
if elem.name().as_bytes() != e.name() {
|
||||||
|
bail!(ErrorKind::InvalidElementClosed);
|
||||||
|
}
|
||||||
|
to.append_child(elem);
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
namespace.get(NS_NO_PREFIX)
|
|
||||||
}.map(|s| s.to_owned());
|
|
||||||
let elem = Element::new(name.local_name, ns, attributes, Vec::with_capacity(1));
|
|
||||||
let elem_ref = self.append_child(elem);
|
|
||||||
elem_ref.from_reader_inner(reader)?;
|
|
||||||
},
|
},
|
||||||
ReaderEvent::EndElement { .. } => {
|
Event::Text(s) | Event::CData(s) => {
|
||||||
// TODO: may want to check whether we're closing the correct element
|
let text = s.unescape_and_decode(reader)?;
|
||||||
return Ok(());
|
if text != "" {
|
||||||
|
let mut current_elem = stack.last_mut().unwrap();
|
||||||
|
current_elem.append_text_node(text);
|
||||||
|
}
|
||||||
},
|
},
|
||||||
ReaderEvent::Characters(s) | ReaderEvent::CData(s) => {
|
Event::Eof => {
|
||||||
self.append_text_node(s);
|
break;
|
||||||
},
|
|
||||||
ReaderEvent::EndDocument => {
|
|
||||||
bail!(ErrorKind::EndOfDocument);
|
|
||||||
},
|
},
|
||||||
_ => (), // TODO: may need to implement more
|
_ => (), // TODO: may need to implement more
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(stack.pop().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Output a document to an `EventWriter`.
|
/// Output a document to a `Writer`.
|
||||||
pub fn write_to<W: Write>(&self, writer: &mut EventWriter<W>) -> Result<()> {
|
pub fn write_to<W: Write>(&self, writer: &mut W) -> Result<()> {
|
||||||
let name = if let Some(ref ns) = self.namespace {
|
let mut last_namespace = None;
|
||||||
Name::qualified(&self.name, ns, None)
|
write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
|
||||||
|
self.write_to_inner(writer, &mut last_namespace)
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
Name::local(&self.name)
|
/// Output a document to a `Writer` assuming you're already in the provided namespace
|
||||||
};
|
pub fn write_to_in_namespace<W: Write>(&self, writer: &mut W, namespace: &str) -> Result<()> {
|
||||||
let mut start = WriterEvent::start_element(name);
|
write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
|
||||||
|
self.write_to_inner(writer, &mut Some(namespace.to_owned()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()> {
|
||||||
|
write!(writer, "<")?;
|
||||||
|
write!(writer, "{}", self.name)?;
|
||||||
|
|
||||||
if let Some(ref ns) = self.namespace {
|
if let Some(ref ns) = self.namespace {
|
||||||
start = start.default_ns(ns.clone());
|
if *last_namespace != self.namespace {
|
||||||
|
write!(writer, " xmlns=\"{}\"", ns)?;
|
||||||
|
*last_namespace = Some(ns.clone());
|
||||||
}
|
}
|
||||||
for attr in &self.attributes { // TODO: I think this could be done a lot more efficiently
|
|
||||||
start = start.attr(Name::local(attr.0), attr.1);
|
|
||||||
}
|
}
|
||||||
writer.write(start)?;
|
|
||||||
|
for (key, value) in &self.attributes {
|
||||||
|
write!(writer, " {}=\"{}\"", key, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.children.is_empty() {
|
||||||
|
write!(writer, " />")?;
|
||||||
|
return Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
write!(writer, ">")?;
|
||||||
|
|
||||||
for child in &self.children {
|
for child in &self.children {
|
||||||
match *child {
|
child.write_to_inner(writer, last_namespace)?;
|
||||||
Node::Element(ref e) => {
|
|
||||||
e.write_to(writer)?;
|
|
||||||
},
|
|
||||||
Node::Text(ref s) => {
|
|
||||||
writer.write(WriterEvent::characters(s))?;
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
writer.write(WriterEvent::end_element())?;
|
write!(writer, "</{}>", self.name)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -354,7 +352,7 @@ impl Element {
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
/// use minidom::{Element, Node};
|
/// use minidom::Element;
|
||||||
///
|
///
|
||||||
/// let elem: Element = "<root>a<c1 />b<c2 />c</root>".parse().unwrap();
|
/// let elem: Element = "<root>a<c1 />b<c2 />c</root>".parse().unwrap();
|
||||||
///
|
///
|
||||||
|
@ -592,6 +590,31 @@ impl Element {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_element(event: &BytesStart) -> Result<Element> {
|
||||||
|
let mut attributes = event.attributes()
|
||||||
|
.map(|o| {
|
||||||
|
let o = o?;
|
||||||
|
let key = str::from_utf8(o.key)?.to_owned();
|
||||||
|
let value = str::from_utf8(o.value)?.to_owned();
|
||||||
|
Ok((key, value))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.collect::<Result<BTreeMap<String, String>>>()?;
|
||||||
|
let mut ns_key = None;
|
||||||
|
for (key, _) in &attributes {
|
||||||
|
if key == "xmlns" || key.starts_with("xmlns:") {
|
||||||
|
ns_key = Some(key.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let ns = match ns_key {
|
||||||
|
None => None,
|
||||||
|
Some(key) => attributes.remove(&key),
|
||||||
|
};
|
||||||
|
let name = str::from_utf8(event.name())?.to_owned();
|
||||||
|
Ok(Element::new(name, ns, attributes, Vec::new()))
|
||||||
|
}
|
||||||
|
|
||||||
/// An iterator over references to child elements of an `Element`.
|
/// An iterator over references to child elements of an `Element`.
|
||||||
pub struct Children<'a> {
|
pub struct Children<'a> {
|
||||||
iter: slice::Iter<'a, Node>,
|
iter: slice::Iter<'a, Node>,
|
||||||
|
|
22
src/error.rs
22
src/error.rs
|
@ -1,30 +1,30 @@
|
||||||
//! Provides an error type for this crate.
|
//! Provides an error type for this crate.
|
||||||
|
|
||||||
use std::io;
|
|
||||||
|
|
||||||
use std::convert::From;
|
use std::convert::From;
|
||||||
|
|
||||||
use xml::writer::Error as WriterError;
|
|
||||||
use xml::reader::Error as ReaderError;
|
|
||||||
|
|
||||||
error_chain! {
|
error_chain! {
|
||||||
foreign_links {
|
foreign_links {
|
||||||
XmlWriterError(WriterError)
|
XmlError(::quick_xml::errors::Error)
|
||||||
/// An error with writing an XML event, from xml::writer::EventWriter.
|
/// An error from quick_xml.
|
||||||
;
|
;
|
||||||
XmlReaderError(ReaderError)
|
Utf8Error(::std::str::Utf8Error)
|
||||||
/// An error with reading an XML event, from xml::reader::EventReader.
|
/// An UTF-8 conversion error.
|
||||||
;
|
;
|
||||||
IoError(io::Error)
|
IoError(::std::io::Error)
|
||||||
/// An I/O error, from std::io.
|
/// An I/O error, from std::io.
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
errors {
|
errors {
|
||||||
/// En error which is returned when the end of the document was reached prematurely.
|
/// An error which is returned when the end of the document was reached prematurely.
|
||||||
EndOfDocument {
|
EndOfDocument {
|
||||||
description("the end of the document has been reached prematurely")
|
description("the end of the document has been reached prematurely")
|
||||||
display("the end of the document has been reached prematurely")
|
display("the end of the document has been reached prematurely")
|
||||||
}
|
}
|
||||||
|
/// An error which is returned when an element is closed when it shouldn't be
|
||||||
|
InvalidElementClosed {
|
||||||
|
description("The XML is invalid, an element was wrongly closed")
|
||||||
|
display("the XML is invalid, an element was wrongly closed")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,7 @@
|
||||||
//! minidom = "*"
|
//! minidom = "*"
|
||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
extern crate xml;
|
extern crate quick_xml;
|
||||||
#[macro_use] extern crate error_chain;
|
#[macro_use] extern crate error_chain;
|
||||||
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
|
24
src/tests.rs
24
src/tests.rs
|
@ -1,9 +1,6 @@
|
||||||
use std::io::Cursor;
|
|
||||||
|
|
||||||
use std::iter::Iterator;
|
use std::iter::Iterator;
|
||||||
|
|
||||||
use xml::reader::EventReader;
|
use quick_xml::reader::Reader;
|
||||||
use xml::writer::EventWriter;
|
|
||||||
|
|
||||||
use element::Element;
|
use element::Element;
|
||||||
|
|
||||||
|
@ -32,19 +29,18 @@ fn build_test_tree() -> Element {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reader_works() {
|
fn reader_works() {
|
||||||
let mut reader = EventReader::new(Cursor::new(TEST_STRING));
|
let mut reader = Reader::from_str(TEST_STRING);
|
||||||
assert_eq!(Element::from_reader(&mut reader).unwrap(), build_test_tree());
|
assert_eq!(Element::from_reader(&mut reader).unwrap(), build_test_tree());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn writer_works() {
|
fn writer_works() {
|
||||||
let root = build_test_tree();
|
let root = build_test_tree();
|
||||||
let mut out = Vec::new();
|
let mut writer = Vec::new();
|
||||||
{
|
{
|
||||||
let mut writer = EventWriter::new(&mut out);
|
|
||||||
root.write_to(&mut writer).unwrap();
|
root.write_to(&mut writer).unwrap();
|
||||||
}
|
}
|
||||||
assert_eq!(String::from_utf8(out).unwrap(), TEST_STRING);
|
assert_eq!(String::from_utf8(writer).unwrap(), TEST_STRING);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -110,8 +106,18 @@ fn two_elements_with_same_arguments_different_order_are_equal() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn namespace_attributes_works() {
|
fn namespace_attributes_works() {
|
||||||
let mut reader = EventReader::new(Cursor::new(TEST_STRING));
|
let mut reader = Reader::from_str(TEST_STRING);
|
||||||
let root = Element::from_reader(&mut reader).unwrap();
|
let root = Element::from_reader(&mut reader).unwrap();
|
||||||
assert_eq!("en", root.attr("xml:lang").unwrap());
|
assert_eq!("en", root.attr("xml:lang").unwrap());
|
||||||
assert_eq!("fr", root.get_child("child", "child_ns").unwrap().attr("xml:lang").unwrap());
|
assert_eq!("fr", root.get_child("child", "child_ns").unwrap().attr("xml:lang").unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn wrongly_closed_elements_error() {
|
||||||
|
let elem1 = "<a></b>".parse::<Element>();
|
||||||
|
assert!(elem1.is_err());
|
||||||
|
let elem1 = "<a></c></a>".parse::<Element>();
|
||||||
|
assert!(elem1.is_err());
|
||||||
|
let elem1 = "<a><c><d/></c></a>".parse::<Element>();
|
||||||
|
assert!(elem1.is_ok());
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue