minidom: add more error reporting to tokenizer and tree_builder to fix tests

This commit is contained in:
Astro 2022-03-23 23:36:44 +01:00
parent 1887fdd1b5
commit c1e661dd61
6 changed files with 33 additions and 45 deletions

View file

@ -83,7 +83,7 @@ pub struct Element {
namespace: String, namespace: String,
/// This is only used when deserializing. If you have to use a custom prefix use /// This is only used when deserializing. If you have to use a custom prefix use
/// `ElementBuilder::prefix`. /// `ElementBuilder::prefix`.
prefix: Option<Prefix>, pub(crate) prefix: Option<Prefix>,
prefixes: Prefixes, prefixes: Prefixes,
attributes: BTreeMap<String, String>, attributes: BTreeMap<String, String>,
children: Vec<Node>, children: Vec<Node>,
@ -119,13 +119,6 @@ impl PartialEq for Element {
} }
} }
fn ensure_no_prefix<S: AsRef<str>>(s: &S) -> Result<()> {
match s.as_ref().split(':').count() {
1 => Ok(()),
_ => Err(Error::InvalidElement),
}
}
impl Element { impl Element {
pub(crate) fn new<P: Into<Prefixes>>( pub(crate) fn new<P: Into<Prefixes>>(
name: String, name: String,
@ -135,8 +128,6 @@ impl Element {
attributes: BTreeMap<String, String>, attributes: BTreeMap<String, String>,
children: Vec<Node>, children: Vec<Node>,
) -> Element { ) -> Element {
ensure_no_prefix(&name).unwrap();
// TODO: Return Result<Element> instead.
Element { Element {
name, name,
namespace, namespace,
@ -323,7 +314,7 @@ impl Element {
} }
tokenizer.push(&buf[0..len]); tokenizer.push(&buf[0..len]);
while let Some(token) = tokenizer.pull()? { while let Some(token) = tokenizer.pull()? {
tree_builder.process_token(token); tree_builder.process_token(token)?;
if let Some(root) = tree_builder.root.take() { if let Some(root) = tree_builder.root.take() {
return Ok(root); return Ok(root);

View file

@ -35,9 +35,6 @@ pub enum Error {
/// An error which is returned when an element is closed when it shouldn't be /// An error which is returned when an element is closed when it shouldn't be
InvalidElementClosed, InvalidElementClosed,
/// An error which is returned when an elemet's name contains more colons than permitted
InvalidElement,
/// An error which is returned when an element being serialized doesn't contain a prefix /// An error which is returned when an element being serialized doesn't contain a prefix
/// (be it None or Some(_)). /// (be it None or Some(_)).
InvalidPrefix, InvalidPrefix,
@ -45,9 +42,6 @@ pub enum Error {
/// An error which is returned when an element doesn't contain a namespace /// An error which is returned when an element doesn't contain a namespace
MissingNamespace, MissingNamespace,
/// An error which is returned when a comment is to be parsed by minidom
NoComments,
/// An error which is returned when a prefixed is defined twice /// An error which is returned when a prefixed is defined twice
DuplicatePrefix, DuplicatePrefix,
} }
@ -61,10 +55,8 @@ impl StdError for Error {
Error::IoError(e) => Some(e), Error::IoError(e) => Some(e),
Error::EndOfDocument => None, Error::EndOfDocument => None,
Error::InvalidElementClosed => None, Error::InvalidElementClosed => None,
Error::InvalidElement => None,
Error::InvalidPrefix => None, Error::InvalidPrefix => None,
Error::MissingNamespace => None, Error::MissingNamespace => None,
Error::NoComments => None,
Error::DuplicatePrefix => None, Error::DuplicatePrefix => None,
} }
} }
@ -83,13 +75,8 @@ impl std::fmt::Display for Error {
Error::InvalidElementClosed => { Error::InvalidElementClosed => {
write!(fmt, "the XML is invalid, an element was wrongly closed") write!(fmt, "the XML is invalid, an element was wrongly closed")
} }
Error::InvalidElement => write!(fmt, "the XML element is invalid"),
Error::InvalidPrefix => write!(fmt, "the prefix is invalid"), Error::InvalidPrefix => write!(fmt, "the prefix is invalid"),
Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",), Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",),
Error::NoComments => write!(
fmt,
"a comment has been found even though comments are forbidden"
),
Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"), Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"),
} }
} }

View file

@ -424,7 +424,7 @@ fn namespace_inherited_prefixed2() {
fn fail_comments() { fn fail_comments() {
let elem: Result<Element, Error> = "<foo xmlns='ns1'><!-- bar --></foo>".parse(); let elem: Result<Element, Error> = "<foo xmlns='ns1'><!-- bar --></foo>".parse();
match elem { match elem {
Err(Error::NoComments) => (), Err(_) => (),
_ => panic!(), _ => panic!(),
}; };
} }
@ -432,20 +432,12 @@ fn fail_comments() {
#[test] #[test]
fn xml_error() { fn xml_error() {
match "<a xmlns='ns1'></b>".parse::<Element>() { match "<a xmlns='ns1'></b>".parse::<Element>() {
Err(crate::error::Error::XmlError(_)) => (), Err(crate::error::Error::InvalidElementClosed) => (),
err => panic!("No or wrong error: {:?}", err), err => panic!("No or wrong error: {:?}", err),
} }
match "<a xmlns='ns1'></".parse::<Element>() { match "<a xmlns='ns1'></".parse::<Element>() {
Err(crate::error::Error::XmlError(_)) => (), Err(crate::error::Error::EndOfDocument) => (),
err => panic!("No or wrong error: {:?}", err),
}
}
#[test]
fn invalid_element_error() {
match "<a:b:c>".parse::<Element>() {
Err(crate::error::Error::InvalidElement) => (),
err => panic!("No or wrong error: {:?}", err), err => panic!("No or wrong error: {:?}", err),
} }
} }

View file

@ -77,6 +77,7 @@ impl Token {
alt(( alt((
Self::parse_tag, Self::parse_tag,
|s| { |s| {
let (s, _) = not(peek(char('<')))(s)?;
let (s, text) = Self::parse_text('<', s)?; let (s, text) = Self::parse_text('<', s)?;
Ok((s, Token::Text(text))) Ok((s, Token::Text(text)))
}, },

View file

@ -1,3 +1,5 @@
// Copyright (c) 2022 Astro <astro@spaceboyz.net>
//! Streaming tokenizer (SAX parser) //! Streaming tokenizer (SAX parser)
use bytes::BytesMut; use bytes::BytesMut;

View file

@ -1,7 +1,9 @@
// Copyright (c) 2022 Astro <astro@spaceboyz.net>
//! SAX events to DOM tree conversion //! SAX events to DOM tree conversion
use std::collections::BTreeMap; use std::collections::BTreeMap;
use crate::Element; use crate::{Element, Error};
use crate::prefixes::Prefixes; use crate::prefixes::Prefixes;
use crate::token::{Attribute, LocalName, Token}; use crate::token::{Attribute, LocalName, Token};
@ -47,7 +49,7 @@ impl TreeBuilder {
None None
} }
fn process_start_tag(&mut self, name: LocalName, attrs: Vec<Attribute>) { fn process_start_tag(&mut self, name: LocalName, attrs: Vec<Attribute>) -> Result<(), Error> {
let mut prefixes = Prefixes::default(); let mut prefixes = Prefixes::default();
let mut attributes = BTreeMap::new(); let mut attributes = BTreeMap::new();
for attr in attrs.into_iter() { for attr in attrs.into_iter() {
@ -68,19 +70,28 @@ impl TreeBuilder {
} }
self.prefixes_stack.push(prefixes.clone()); self.prefixes_stack.push(prefixes.clone());
let namespace = self.lookup_prefix(&name.prefix)
.ok_or(Error::MissingNamespace)?
.to_owned();
let el = Element::new( let el = Element::new(
name.name, name.name,
self.lookup_prefix(&name.prefix).unwrap_or("").to_owned(), namespace,
Some(name.prefix), Some(name.prefix),
prefixes, prefixes,
attributes, attributes,
vec![] vec![]
); );
self.stack.push(el); self.stack.push(el);
Ok(())
} }
fn process_end_tag(&mut self) { fn process_end_tag(&mut self, name: LocalName) -> Result<(), Error> {
if let Some(el) = self.pop() { if let Some(el) = self.pop() {
if el.name() != name.name || el.prefix != Some(name.prefix) {
return Err(Error::InvalidElementClosed);
}
if self.depth() > 0 { if self.depth() > 0 {
let top = self.stack.len() - 1; let top = self.stack.len() - 1;
self.stack[top].append_child(el); self.stack[top].append_child(el);
@ -88,6 +99,8 @@ impl TreeBuilder {
self.root = Some(el); self.root = Some(el);
} }
} }
Ok(())
} }
fn process_text(&mut self, text: String) { fn process_text(&mut self, text: String) {
@ -98,7 +111,7 @@ impl TreeBuilder {
} }
/// Process a Token that you got out of a Tokenizer /// Process a Token that you got out of a Tokenizer
pub fn process_token(&mut self, token: Token) { pub fn process_token(&mut self, token: Token) -> Result<(), Error> {
match token { match token {
Token::XmlDecl { .. } => {}, Token::XmlDecl { .. } => {},
@ -106,22 +119,24 @@ impl TreeBuilder {
name, name,
attrs, attrs,
self_closing: false, self_closing: false,
} => self.process_start_tag(name, attrs), } => self.process_start_tag(name, attrs)?,
Token::StartTag { Token::StartTag {
name, name,
attrs, attrs,
self_closing: true, self_closing: true,
} => { } => {
self.process_start_tag(name, attrs); self.process_start_tag(name.clone(), attrs)?;
self.process_end_tag(); self.process_end_tag(name)?;
} }
Token::EndTag { .. } => Token::EndTag { name } =>
self.process_end_tag(), self.process_end_tag(name)?,
Token::Text(text) => Token::Text(text) =>
self.process_text(text), self.process_text(text),
} }
Ok(())
} }
} }