mirror of
https://gitlab.com/xmpp-rs/xmpp-rs.git
synced 2024-07-12 22:21:53 +00:00
minidom: add tree_builder
This commit is contained in:
parent
4e5630d03c
commit
1887fdd1b5
7 changed files with 187 additions and 203 deletions
|
@ -17,19 +17,18 @@ use crate::error::{Error, Result};
|
||||||
use crate::namespaces::NSChoice;
|
use crate::namespaces::NSChoice;
|
||||||
use crate::node::Node;
|
use crate::node::Node;
|
||||||
use crate::prefixes::{Namespace, Prefix, Prefixes};
|
use crate::prefixes::{Namespace, Prefix, Prefixes};
|
||||||
|
use crate::tokenizer::Tokenizer;
|
||||||
|
use crate::tree_builder::TreeBuilder;
|
||||||
|
|
||||||
use std::collections::{btree_map, BTreeMap};
|
use std::collections::{btree_map, BTreeMap};
|
||||||
use std::io::Write;
|
use std::io::{Cursor, Read, Write};
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, Event};
|
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, Event};
|
||||||
use quick_xml::Reader as EventReader;
|
|
||||||
use quick_xml::Writer as EventWriter;
|
use quick_xml::Writer as EventWriter;
|
||||||
|
|
||||||
use std::io::BufRead;
|
|
||||||
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use std::slice;
|
use std::slice;
|
||||||
|
@ -102,7 +101,7 @@ impl FromStr for Element {
|
||||||
type Err = Error;
|
type Err = Error;
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Element> {
|
fn from_str(s: &str) -> Result<Element> {
|
||||||
let mut reader = EventReader::from_str(s);
|
let mut reader = Cursor::new(s);
|
||||||
Element::from_reader(&mut reader)
|
Element::from_reader(&mut reader)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -128,7 +127,7 @@ fn ensure_no_prefix<S: AsRef<str>>(s: &S) -> Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Element {
|
impl Element {
|
||||||
fn new<P: Into<Prefixes>>(
|
pub(crate) fn new<P: Into<Prefixes>>(
|
||||||
name: String,
|
name: String,
|
||||||
namespace: String,
|
namespace: String,
|
||||||
prefix: Option<Prefix>,
|
prefix: Option<Prefix>,
|
||||||
|
@ -310,123 +309,28 @@ impl Element {
|
||||||
namespace.into().compare(self.namespace.as_ref())
|
namespace.into().compare(self.namespace.as_ref())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a document from an `EventReader`.
|
/// Parse a document from a `Read`.
|
||||||
pub fn from_reader<R: BufRead>(reader: &mut EventReader<R>) -> Result<Element> {
|
pub fn from_reader<R: Read>(mut reader: R) -> Result<Element> {
|
||||||
let mut buf = Vec::new();
|
const CHUNK_SIZE: usize = 65536;
|
||||||
|
|
||||||
let mut prefixes = BTreeMap::new();
|
|
||||||
let root: Element = loop {
|
|
||||||
let e = reader.read_event(&mut buf)?;
|
|
||||||
match e {
|
|
||||||
Event::Empty(ref e) | Event::Start(ref e) => {
|
|
||||||
break build_element(reader, e, &mut prefixes)?;
|
|
||||||
}
|
|
||||||
Event::Eof => {
|
|
||||||
return Err(Error::EndOfDocument);
|
|
||||||
}
|
|
||||||
Event::Comment { .. } => {
|
|
||||||
return Err(Error::NoComments);
|
|
||||||
}
|
|
||||||
Event::Text { .. }
|
|
||||||
| Event::End { .. }
|
|
||||||
| Event::CData { .. }
|
|
||||||
| Event::Decl { .. }
|
|
||||||
| Event::PI { .. }
|
|
||||||
| Event::DocType { .. } => (), // TODO: may need more errors
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut stack = vec![root];
|
|
||||||
let mut prefix_stack = vec![prefixes];
|
|
||||||
|
|
||||||
|
let mut buf = [0; CHUNK_SIZE];
|
||||||
|
let mut tokenizer = Tokenizer::new();
|
||||||
|
let mut tree_builder = TreeBuilder::new();
|
||||||
loop {
|
loop {
|
||||||
match reader.read_event(&mut buf)? {
|
let len = reader.read(&mut buf)?;
|
||||||
Event::Empty(ref e) => {
|
if len == 0 {
|
||||||
let mut prefixes = prefix_stack.last().unwrap().clone();
|
break;
|
||||||
let elem = build_element(reader, e, &mut prefixes)?;
|
}
|
||||||
// Since there is no Event::End after, directly append it to the current node
|
tokenizer.push(&buf[0..len]);
|
||||||
stack.last_mut().unwrap().append_child(elem);
|
while let Some(token) = tokenizer.pull()? {
|
||||||
|
tree_builder.process_token(token);
|
||||||
|
|
||||||
|
if let Some(root) = tree_builder.root.take() {
|
||||||
|
return Ok(root);
|
||||||
}
|
}
|
||||||
Event::Start(ref e) => {
|
|
||||||
let mut prefixes = prefix_stack.last().unwrap().clone();
|
|
||||||
let elem = build_element(reader, e, &mut prefixes)?;
|
|
||||||
stack.push(elem);
|
|
||||||
prefix_stack.push(prefixes);
|
|
||||||
}
|
|
||||||
Event::End(ref e) => {
|
|
||||||
if stack.len() <= 1 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let prefixes = prefix_stack.pop().unwrap();
|
|
||||||
let elem = stack.pop().unwrap();
|
|
||||||
if let Some(to) = stack.last_mut() {
|
|
||||||
// TODO: check whether this is correct, we are comparing &[u8]s, not &strs
|
|
||||||
let elem_name = e.name();
|
|
||||||
let mut split_iter = elem_name.splitn(2, |u| *u == 0x3A);
|
|
||||||
let possible_prefix = split_iter.next().unwrap(); // Can't be empty.
|
|
||||||
let opening_prefix = {
|
|
||||||
let mut tmp: Option<Option<String>> = None;
|
|
||||||
for (prefix, ns) in prefixes {
|
|
||||||
if ns == elem.namespace {
|
|
||||||
tmp = Some(prefix.clone());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match tmp {
|
|
||||||
Some(prefix) => prefix,
|
|
||||||
None => return Err(Error::InvalidPrefix),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
match split_iter.next() {
|
|
||||||
// There is a prefix on the closing tag
|
|
||||||
Some(name) => {
|
|
||||||
// Does the closing prefix match the opening prefix?
|
|
||||||
match opening_prefix {
|
|
||||||
Some(prefix) if possible_prefix == prefix.as_bytes() => (),
|
|
||||||
_ => return Err(Error::InvalidElementClosed),
|
|
||||||
}
|
|
||||||
// Does the closing tag name match the opening tag name?
|
|
||||||
if name != elem.name().as_bytes() {
|
|
||||||
return Err(Error::InvalidElementClosed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// There was no prefix on the closing tag
|
|
||||||
None => {
|
|
||||||
// Is there a prefix on the opening tag?
|
|
||||||
if opening_prefix.is_some() {
|
|
||||||
return Err(Error::InvalidElementClosed);
|
|
||||||
}
|
|
||||||
// Does the opening tag name match the closing one?
|
|
||||||
if possible_prefix != elem.name().as_bytes() {
|
|
||||||
return Err(Error::InvalidElementClosed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
to.append_child(elem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Event::Text(s) => {
|
|
||||||
let text = s.unescape_and_decode(reader)?;
|
|
||||||
if !text.is_empty() {
|
|
||||||
let current_elem = stack.last_mut().unwrap();
|
|
||||||
current_elem.append_text_node(text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Event::CData(s) => {
|
|
||||||
let text = s.unescape_and_decode(&reader)?;
|
|
||||||
if !text.is_empty() {
|
|
||||||
let current_elem = stack.last_mut().unwrap();
|
|
||||||
current_elem.append_text_node(text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Event::Eof => {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Event::Comment(_) => return Err(Error::NoComments),
|
|
||||||
Event::Decl { .. } | Event::PI { .. } | Event::DocType { .. } => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(stack.pop().unwrap())
|
Err(Error::EndOfDocument)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Output a document to a `Writer`.
|
/// Output a document to a `Writer`.
|
||||||
|
@ -824,68 +728,6 @@ impl Element {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split_element_name<S: AsRef<str>>(s: S) -> Result<(Option<String>, String)> {
|
|
||||||
let name_parts = s.as_ref().split(':').collect::<Vec<&str>>();
|
|
||||||
match name_parts.len() {
|
|
||||||
2 => Ok((Some(name_parts[0].to_owned()), name_parts[1].to_owned())),
|
|
||||||
1 => Ok((None, name_parts[0].to_owned())),
|
|
||||||
_ => Err(Error::InvalidElement),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_element<R: BufRead>(
|
|
||||||
reader: &EventReader<R>,
|
|
||||||
event: &BytesStart,
|
|
||||||
prefixes: &mut BTreeMap<Prefix, Namespace>,
|
|
||||||
) -> Result<Element> {
|
|
||||||
let (prefix, name) = split_element_name(str::from_utf8(event.name())?)?;
|
|
||||||
let mut local_prefixes = BTreeMap::new();
|
|
||||||
|
|
||||||
let attributes = event
|
|
||||||
.attributes()
|
|
||||||
.map(|o| {
|
|
||||||
let o = o?;
|
|
||||||
let key = str::from_utf8(o.key)?.to_owned();
|
|
||||||
let value = o.unescape_and_decode_value(reader)?;
|
|
||||||
Ok((key, value))
|
|
||||||
})
|
|
||||||
.filter(|o| match *o {
|
|
||||||
Ok((ref key, ref value)) if key == "xmlns" => {
|
|
||||||
local_prefixes.insert(None, value.clone());
|
|
||||||
prefixes.insert(None, value.clone());
|
|
||||||
false
|
|
||||||
}
|
|
||||||
Ok((ref key, ref value)) if key.starts_with("xmlns:") => {
|
|
||||||
local_prefixes.insert(Some(key[6..].to_owned()), value.to_owned());
|
|
||||||
prefixes.insert(Some(key[6..].to_owned()), value.to_owned());
|
|
||||||
false
|
|
||||||
}
|
|
||||||
_ => true,
|
|
||||||
})
|
|
||||||
.collect::<Result<BTreeMap<String, String>>>()?;
|
|
||||||
|
|
||||||
let namespace: &String = {
|
|
||||||
if let Some(namespace) = local_prefixes.get(&prefix) {
|
|
||||||
namespace
|
|
||||||
} else if let Some(namespace) = prefixes.get(&prefix) {
|
|
||||||
namespace
|
|
||||||
} else {
|
|
||||||
return Err(Error::MissingNamespace);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Element::new(
|
|
||||||
name,
|
|
||||||
namespace.clone(),
|
|
||||||
// Note that this will always be Some(_) as we can't distinguish between the None case and
|
|
||||||
// Some(None). At least we make sure the prefix has a namespace associated.
|
|
||||||
Some(prefix),
|
|
||||||
local_prefixes,
|
|
||||||
attributes,
|
|
||||||
Vec::new(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An iterator over references to child elements of an `Element`.
|
/// An iterator over references to child elements of an `Element`.
|
||||||
pub struct Children<'a> {
|
pub struct Children<'a> {
|
||||||
iter: slice::Iter<'a, Node>,
|
iter: slice::Iter<'a, Node>,
|
||||||
|
@ -1068,7 +910,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_from_reader_simple() {
|
fn test_from_reader_simple() {
|
||||||
let xml = "<foo xmlns='ns1'></foo>";
|
let xml = "<foo xmlns='ns1'></foo>";
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let elem = Element::from_reader(&mut reader);
|
let elem = Element::from_reader(&mut reader);
|
||||||
|
|
||||||
let elem2 = Element::builder("foo", "ns1").build();
|
let elem2 = Element::builder("foo", "ns1").build();
|
||||||
|
@ -1079,7 +921,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_from_reader_nested() {
|
fn test_from_reader_nested() {
|
||||||
let xml = "<foo xmlns='ns1'><bar xmlns='ns1' baz='qxx' /></foo>";
|
let xml = "<foo xmlns='ns1'><bar xmlns='ns1' baz='qxx' /></foo>";
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let elem = Element::from_reader(&mut reader);
|
let elem = Element::from_reader(&mut reader);
|
||||||
|
|
||||||
let nested = Element::builder("bar", "ns1").attr("baz", "qxx").build();
|
let nested = Element::builder("bar", "ns1").attr("baz", "qxx").build();
|
||||||
|
@ -1091,7 +933,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_from_reader_with_prefix() {
|
fn test_from_reader_with_prefix() {
|
||||||
let xml = "<foo xmlns='ns1'><prefix:bar xmlns:prefix='ns1' baz='qxx' /></foo>";
|
let xml = "<foo xmlns='ns1'><prefix:bar xmlns:prefix='ns1' baz='qxx' /></foo>";
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let elem = Element::from_reader(&mut reader);
|
let elem = Element::from_reader(&mut reader);
|
||||||
|
|
||||||
let nested = Element::builder("bar", "ns1").attr("baz", "qxx").build();
|
let nested = Element::builder("bar", "ns1").attr("baz", "qxx").build();
|
||||||
|
@ -1103,7 +945,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_from_reader_split_prefix() {
|
fn test_from_reader_split_prefix() {
|
||||||
let xml = "<foo:bar xmlns:foo='ns1'/>";
|
let xml = "<foo:bar xmlns:foo='ns1'/>";
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let elem = Element::from_reader(&mut reader).unwrap();
|
let elem = Element::from_reader(&mut reader).unwrap();
|
||||||
|
|
||||||
assert_eq!(elem.name(), String::from("bar"));
|
assert_eq!(elem.name(), String::from("bar"));
|
||||||
|
@ -1123,14 +965,14 @@ mod tests {
|
||||||
<rng:name xmlns:rng="http://relaxng.org/ns/structure/1.0"></rng:name>
|
<rng:name xmlns:rng="http://relaxng.org/ns/structure/1.0"></rng:name>
|
||||||
</rng:grammar>
|
</rng:grammar>
|
||||||
"#;
|
"#;
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let _ = Element::from_reader(&mut reader).unwrap();
|
let _ = Element::from_reader(&mut reader).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn does_not_unescape_cdata() {
|
fn does_not_unescape_cdata() {
|
||||||
let xml = "<test xmlns='test'><![CDATA['>blah<blah>]]></test>";
|
let xml = "<test xmlns='test'><![CDATA['>blah<blah>]]></test>";
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let elem = Element::from_reader(&mut reader).unwrap();
|
let elem = Element::from_reader(&mut reader).unwrap();
|
||||||
assert_eq!(elem.text(), "'>blah<blah>");
|
assert_eq!(elem.text(), "'>blah<blah>");
|
||||||
}
|
}
|
||||||
|
@ -1138,17 +980,17 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_compare_all_ns() {
|
fn test_compare_all_ns() {
|
||||||
let xml = "<foo xmlns='foo' xmlns:bar='baz'><bar:meh xmlns:bar='baz' /></foo>";
|
let xml = "<foo xmlns='foo' xmlns:bar='baz'><bar:meh xmlns:bar='baz' /></foo>";
|
||||||
let mut reader = EventReader::from_str(xml);
|
let mut reader = Cursor::new(xml);
|
||||||
let elem = Element::from_reader(&mut reader).unwrap();
|
let elem = Element::from_reader(&mut reader).unwrap();
|
||||||
|
|
||||||
let elem2 = elem.clone();
|
let elem2 = elem.clone();
|
||||||
|
|
||||||
let xml3 = "<foo xmlns='foo'><bar:meh xmlns:bar='baz'/></foo>";
|
let xml3 = "<foo xmlns='foo'><bar:meh xmlns:bar='baz'/></foo>";
|
||||||
let mut reader3 = EventReader::from_str(xml3);
|
let mut reader3 = Cursor::new(xml3);
|
||||||
let elem3 = Element::from_reader(&mut reader3).unwrap();
|
let elem3 = Element::from_reader(&mut reader3).unwrap();
|
||||||
|
|
||||||
let xml4 = "<prefix:foo xmlns:prefix='foo'><bar:meh xmlns:bar='baz'/></prefix:foo>";
|
let xml4 = "<prefix:foo xmlns:prefix='foo'><bar:meh xmlns:bar='baz'/></prefix:foo>";
|
||||||
let mut reader4 = EventReader::from_str(xml4);
|
let mut reader4 = Cursor::new(xml4);
|
||||||
let elem4 = Element::from_reader(&mut reader4).unwrap();
|
let elem4 = Element::from_reader(&mut reader4).unwrap();
|
||||||
|
|
||||||
assert_eq!(elem, elem2);
|
assert_eq!(elem, elem2);
|
||||||
|
|
|
@ -20,6 +20,9 @@ pub enum Error {
|
||||||
/// An error from quick_xml.
|
/// An error from quick_xml.
|
||||||
XmlError(::quick_xml::Error),
|
XmlError(::quick_xml::Error),
|
||||||
|
|
||||||
|
/// Error from the Tokenizer
|
||||||
|
TokenizerError(crate::tokenizer::TokenizerError),
|
||||||
|
|
||||||
/// An UTF-8 conversion error.
|
/// An UTF-8 conversion error.
|
||||||
Utf8Error(::std::str::Utf8Error),
|
Utf8Error(::std::str::Utf8Error),
|
||||||
|
|
||||||
|
@ -53,6 +56,7 @@ impl StdError for Error {
|
||||||
fn cause(&self) -> Option<&dyn StdError> {
|
fn cause(&self) -> Option<&dyn StdError> {
|
||||||
match self {
|
match self {
|
||||||
Error::XmlError(e) => Some(e),
|
Error::XmlError(e) => Some(e),
|
||||||
|
Error::TokenizerError(e) => Some(e),
|
||||||
Error::Utf8Error(e) => Some(e),
|
Error::Utf8Error(e) => Some(e),
|
||||||
Error::IoError(e) => Some(e),
|
Error::IoError(e) => Some(e),
|
||||||
Error::EndOfDocument => None,
|
Error::EndOfDocument => None,
|
||||||
|
@ -70,6 +74,7 @@ impl std::fmt::Display for Error {
|
||||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Error::XmlError(e) => write!(fmt, "XML error: {}", e),
|
Error::XmlError(e) => write!(fmt, "XML error: {}", e),
|
||||||
|
Error::TokenizerError(e) => write!(fmt, "XML tokenizer error: {}", e),
|
||||||
Error::Utf8Error(e) => write!(fmt, "UTF-8 error: {}", e),
|
Error::Utf8Error(e) => write!(fmt, "UTF-8 error: {}", e),
|
||||||
Error::IoError(e) => write!(fmt, "IO error: {}", e),
|
Error::IoError(e) => write!(fmt, "IO error: {}", e),
|
||||||
Error::EndOfDocument => {
|
Error::EndOfDocument => {
|
||||||
|
@ -96,6 +101,12 @@ impl From<::quick_xml::Error> for Error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<crate::tokenizer::TokenizerError> for Error {
|
||||||
|
fn from(err: crate::tokenizer::TokenizerError) -> Error {
|
||||||
|
Error::TokenizerError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<::std::str::Utf8Error> for Error {
|
impl From<::std::str::Utf8Error> for Error {
|
||||||
fn from(err: ::std::str::Utf8Error) -> Error {
|
fn from(err: ::std::str::Utf8Error) -> Error {
|
||||||
Error::Utf8Error(err)
|
Error::Utf8Error(err)
|
||||||
|
|
|
@ -85,6 +85,7 @@ pub mod node;
|
||||||
mod prefixes;
|
mod prefixes;
|
||||||
pub mod token;
|
pub mod token;
|
||||||
pub mod tokenizer;
|
pub mod tokenizer;
|
||||||
|
pub mod tree_builder;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
|
@ -10,16 +10,16 @@
|
||||||
|
|
||||||
use crate::element::Element;
|
use crate::element::Element;
|
||||||
use crate::error::{Error, ParserError, Result};
|
use crate::error::{Error, ParserError, Result};
|
||||||
|
use crate::tokenizer::Tokenizer;
|
||||||
|
use crate::tree_builder::TreeBuilder;
|
||||||
|
|
||||||
use bytes::BytesMut;
|
|
||||||
use quick_xml::Reader as EventReader;
|
|
||||||
use std::cell::RefCell;
|
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
/// Parser
|
/// Parser
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
buffer: RefCell<BytesMut>,
|
tokenizer: Tokenizer,
|
||||||
|
tree_builder: TreeBuilder,
|
||||||
state: ParserState,
|
state: ParserState,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,7 +90,8 @@ impl Parser {
|
||||||
/// Creates a new Parser
|
/// Creates a new Parser
|
||||||
pub fn new() -> Parser {
|
pub fn new() -> Parser {
|
||||||
Parser {
|
Parser {
|
||||||
buffer: RefCell::new(BytesMut::new()),
|
tokenizer: Tokenizer::new(),
|
||||||
|
tree_builder: TreeBuilder::new(),
|
||||||
state: ParserState::Empty,
|
state: ParserState::Empty,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,11 +10,11 @@
|
||||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
use crate::element::Element;
|
use crate::element::Element;
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
|
|
||||||
use quick_xml::Reader;
|
|
||||||
|
|
||||||
const TEST_STRING: &'static str = r#"<root xmlns="root_ns" a="b" xml:lang="en">meow<child c="d"/><child xmlns="child_ns" d="e" xml:lang="fr"/>nya</root>"#;
|
const TEST_STRING: &'static str = r#"<root xmlns="root_ns" a="b" xml:lang="en">meow<child c="d"/><child xmlns="child_ns" d="e" xml:lang="fr"/>nya</root>"#;
|
||||||
|
|
||||||
fn build_test_tree() -> Element {
|
fn build_test_tree() -> Element {
|
||||||
|
@ -36,7 +36,7 @@ fn build_test_tree() -> Element {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reader_works() {
|
fn reader_works() {
|
||||||
let mut reader = Reader::from_str(TEST_STRING);
|
let mut reader = Cursor::new(TEST_STRING);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Element::from_reader(&mut reader).unwrap(),
|
Element::from_reader(&mut reader).unwrap(),
|
||||||
build_test_tree()
|
build_test_tree()
|
||||||
|
@ -348,7 +348,7 @@ fn two_elements_with_same_arguments_different_order_are_equal() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn namespace_attributes_works() {
|
fn namespace_attributes_works() {
|
||||||
let mut reader = Reader::from_str(TEST_STRING);
|
let mut reader = Cursor::new(TEST_STRING);
|
||||||
let root = Element::from_reader(&mut reader).unwrap();
|
let root = Element::from_reader(&mut reader).unwrap();
|
||||||
assert_eq!("en", root.attr("xml:lang").unwrap());
|
assert_eq!("en", root.attr("xml:lang").unwrap());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
|
@ -4,7 +4,7 @@ use bytes::BytesMut;
|
||||||
use super::Token;
|
use super::Token;
|
||||||
|
|
||||||
/// `Result::Err` type returned from `Tokenizer`
|
/// `Result::Err` type returned from `Tokenizer`
|
||||||
pub type TokenizerError = nom::error::Error<()>;
|
pub type TokenizerError = nom::error::Error<String>;
|
||||||
|
|
||||||
/// Streaming tokenizer (SAX parser)
|
/// Streaming tokenizer (SAX parser)
|
||||||
pub struct Tokenizer {
|
pub struct Tokenizer {
|
||||||
|
@ -33,9 +33,11 @@ impl Tokenizer {
|
||||||
pub fn pull(&mut self) -> Result<Option<Token>, TokenizerError> {
|
pub fn pull(&mut self) -> Result<Option<Token>, TokenizerError> {
|
||||||
/// cannot return an error with location info that points to
|
/// cannot return an error with location info that points to
|
||||||
/// our buffer that we still want to mutate
|
/// our buffer that we still want to mutate
|
||||||
fn erase_location<T>(e: nom::error::Error<T>) -> TokenizerError {
|
fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError {
|
||||||
nom::error::Error {
|
nom::error::Error {
|
||||||
input: (),
|
input: std::str::from_utf8(e.input)
|
||||||
|
.unwrap_or("invalud UTF-8")
|
||||||
|
.to_owned(),
|
||||||
code: e.code,
|
code: e.code,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,9 +48,9 @@ impl Tokenizer {
|
||||||
Result::Err(nom::Err::Incomplete(_)) =>
|
Result::Err(nom::Err::Incomplete(_)) =>
|
||||||
None,
|
None,
|
||||||
Result::Err(nom::Err::Error(e)) =>
|
Result::Err(nom::Err::Error(e)) =>
|
||||||
return Err(erase_location(e)),
|
return Err(with_input_to_owned(e)),
|
||||||
Result::Err(nom::Err::Failure(e)) =>
|
Result::Err(nom::Err::Failure(e)) =>
|
||||||
return Err(erase_location(e)),
|
return Err(with_input_to_owned(e)),
|
||||||
} };
|
} };
|
||||||
match result {
|
match result {
|
||||||
Some((s_len, token)) => {
|
Some((s_len, token)) => {
|
||||||
|
|
127
minidom/src/tree_builder.rs
Normal file
127
minidom/src/tree_builder.rs
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
//! SAX events to DOM tree conversion
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use crate::Element;
|
||||||
|
use crate::prefixes::Prefixes;
|
||||||
|
use crate::token::{Attribute, LocalName, Token};
|
||||||
|
|
||||||
|
/// Tree-building parser state
|
||||||
|
pub struct TreeBuilder {
|
||||||
|
/// Parsing stack
|
||||||
|
stack: Vec<Element>,
|
||||||
|
/// Namespace set stack by prefix
|
||||||
|
prefixes_stack: Vec<Prefixes>,
|
||||||
|
/// Document root element if finished
|
||||||
|
pub root: Option<Element>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TreeBuilder {
|
||||||
|
/// Create a new one
|
||||||
|
pub fn new() -> Self {
|
||||||
|
TreeBuilder {
|
||||||
|
stack: vec![],
|
||||||
|
prefixes_stack: vec![],
|
||||||
|
root: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stack depth
|
||||||
|
pub fn depth(&self) -> usize {
|
||||||
|
self.stack.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pop the top-most element from the stack
|
||||||
|
pub fn pop(&mut self) -> Option<Element> {
|
||||||
|
self.prefixes_stack.pop();
|
||||||
|
self.stack.pop()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lookup XML namespace declaration for given prefix (or no prefix)
|
||||||
|
fn lookup_prefix(&self, prefix: &Option<String>) -> Option<&str> {
|
||||||
|
for nss in self.prefixes_stack.iter().rev() {
|
||||||
|
if let Some(ns) = nss.get(prefix) {
|
||||||
|
return Some(ns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_start_tag(&mut self, name: LocalName, attrs: Vec<Attribute>) {
|
||||||
|
let mut prefixes = Prefixes::default();
|
||||||
|
let mut attributes = BTreeMap::new();
|
||||||
|
for attr in attrs.into_iter() {
|
||||||
|
match (attr.name.prefix, attr.name.name) {
|
||||||
|
(None, xmlns) if xmlns == "xmlns" => {
|
||||||
|
prefixes.insert(None, attr.value);
|
||||||
|
}
|
||||||
|
(Some(xmlns), prefix) if xmlns == "xmlns" => {
|
||||||
|
prefixes.insert(Some(prefix), attr.value);
|
||||||
|
}
|
||||||
|
(Some(prefix), name) => {
|
||||||
|
attributes.insert(format!("{}:{}", prefix, name), attr.value);
|
||||||
|
}
|
||||||
|
(None, name) => {
|
||||||
|
attributes.insert(name, attr.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.prefixes_stack.push(prefixes.clone());
|
||||||
|
|
||||||
|
let el = Element::new(
|
||||||
|
name.name,
|
||||||
|
self.lookup_prefix(&name.prefix).unwrap_or("").to_owned(),
|
||||||
|
Some(name.prefix),
|
||||||
|
prefixes,
|
||||||
|
attributes,
|
||||||
|
vec![]
|
||||||
|
);
|
||||||
|
self.stack.push(el);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_end_tag(&mut self) {
|
||||||
|
if let Some(el) = self.pop() {
|
||||||
|
if self.depth() > 0 {
|
||||||
|
let top = self.stack.len() - 1;
|
||||||
|
self.stack[top].append_child(el);
|
||||||
|
} else {
|
||||||
|
self.root = Some(el);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_text(&mut self, text: String) {
|
||||||
|
if self.depth() > 0 {
|
||||||
|
let top = self.stack.len() - 1;
|
||||||
|
self.stack[top].append_text_node(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process a Token that you got out of a Tokenizer
|
||||||
|
pub fn process_token(&mut self, token: Token) {
|
||||||
|
match token {
|
||||||
|
Token::XmlDecl { .. } => {},
|
||||||
|
|
||||||
|
Token::StartTag {
|
||||||
|
name,
|
||||||
|
attrs,
|
||||||
|
self_closing: false,
|
||||||
|
} => self.process_start_tag(name, attrs),
|
||||||
|
|
||||||
|
Token::StartTag {
|
||||||
|
name,
|
||||||
|
attrs,
|
||||||
|
self_closing: true,
|
||||||
|
} => {
|
||||||
|
self.process_start_tag(name, attrs);
|
||||||
|
self.process_end_tag();
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::EndTag { .. } =>
|
||||||
|
self.process_end_tag(),
|
||||||
|
|
||||||
|
Token::Text(text) =>
|
||||||
|
self.process_text(text),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue