minidom: add more error reporting to tokenizer and tree_builder to fix tests

This commit is contained in:
Astro 2022-03-23 23:36:44 +01:00
parent 1887fdd1b5
commit c1e661dd61
6 changed files with 33 additions and 45 deletions

View file

@ -83,7 +83,7 @@ pub struct Element {
namespace: String,
/// This is only used when deserializing. If you have to use a custom prefix use
/// `ElementBuilder::prefix`.
prefix: Option<Prefix>,
pub(crate) prefix: Option<Prefix>,
prefixes: Prefixes,
attributes: BTreeMap<String, String>,
children: Vec<Node>,
@ -119,13 +119,6 @@ impl PartialEq for Element {
}
}
fn ensure_no_prefix<S: AsRef<str>>(s: &S) -> Result<()> {
match s.as_ref().split(':').count() {
1 => Ok(()),
_ => Err(Error::InvalidElement),
}
}
impl Element {
pub(crate) fn new<P: Into<Prefixes>>(
name: String,
@ -135,8 +128,6 @@ impl Element {
attributes: BTreeMap<String, String>,
children: Vec<Node>,
) -> Element {
ensure_no_prefix(&name).unwrap();
// TODO: Return Result<Element> instead.
Element {
name,
namespace,
@ -323,7 +314,7 @@ impl Element {
}
tokenizer.push(&buf[0..len]);
while let Some(token) = tokenizer.pull()? {
tree_builder.process_token(token);
tree_builder.process_token(token)?;
if let Some(root) = tree_builder.root.take() {
return Ok(root);

View file

@ -35,9 +35,6 @@ pub enum Error {
/// An error which is returned when an element is closed when it shouldn't be
InvalidElementClosed,
/// An error which is returned when an elemet's name contains more colons than permitted
InvalidElement,
/// An error which is returned when an element being serialized doesn't contain a prefix
/// (be it None or Some(_)).
InvalidPrefix,
@ -45,9 +42,6 @@ pub enum Error {
/// An error which is returned when an element doesn't contain a namespace
MissingNamespace,
/// An error which is returned when a comment is to be parsed by minidom
NoComments,
/// An error which is returned when a prefixed is defined twice
DuplicatePrefix,
}
@ -61,10 +55,8 @@ impl StdError for Error {
Error::IoError(e) => Some(e),
Error::EndOfDocument => None,
Error::InvalidElementClosed => None,
Error::InvalidElement => None,
Error::InvalidPrefix => None,
Error::MissingNamespace => None,
Error::NoComments => None,
Error::DuplicatePrefix => None,
}
}
@ -83,13 +75,8 @@ impl std::fmt::Display for Error {
Error::InvalidElementClosed => {
write!(fmt, "the XML is invalid, an element was wrongly closed")
}
Error::InvalidElement => write!(fmt, "the XML element is invalid"),
Error::InvalidPrefix => write!(fmt, "the prefix is invalid"),
Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",),
Error::NoComments => write!(
fmt,
"a comment has been found even though comments are forbidden"
),
Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"),
}
}

View file

@ -424,7 +424,7 @@ fn namespace_inherited_prefixed2() {
fn fail_comments() {
let elem: Result<Element, Error> = "<foo xmlns='ns1'><!-- bar --></foo>".parse();
match elem {
Err(Error::NoComments) => (),
Err(_) => (),
_ => panic!(),
};
}
@ -432,20 +432,12 @@ fn fail_comments() {
#[test]
fn xml_error() {
match "<a xmlns='ns1'></b>".parse::<Element>() {
Err(crate::error::Error::XmlError(_)) => (),
Err(crate::error::Error::InvalidElementClosed) => (),
err => panic!("No or wrong error: {:?}", err),
}
match "<a xmlns='ns1'></".parse::<Element>() {
Err(crate::error::Error::XmlError(_)) => (),
err => panic!("No or wrong error: {:?}", err),
}
}
#[test]
fn invalid_element_error() {
match "<a:b:c>".parse::<Element>() {
Err(crate::error::Error::InvalidElement) => (),
Err(crate::error::Error::EndOfDocument) => (),
err => panic!("No or wrong error: {:?}", err),
}
}

View file

@ -77,6 +77,7 @@ impl Token {
alt((
Self::parse_tag,
|s| {
let (s, _) = not(peek(char('<')))(s)?;
let (s, text) = Self::parse_text('<', s)?;
Ok((s, Token::Text(text)))
},

View file

@ -1,3 +1,5 @@
// Copyright (c) 2022 Astro <astro@spaceboyz.net>
//! Streaming tokenizer (SAX parser)
use bytes::BytesMut;

View file

@ -1,7 +1,9 @@
// Copyright (c) 2022 Astro <astro@spaceboyz.net>
//! SAX events to DOM tree conversion
use std::collections::BTreeMap;
use crate::Element;
use crate::{Element, Error};
use crate::prefixes::Prefixes;
use crate::token::{Attribute, LocalName, Token};
@ -47,7 +49,7 @@ impl TreeBuilder {
None
}
fn process_start_tag(&mut self, name: LocalName, attrs: Vec<Attribute>) {
fn process_start_tag(&mut self, name: LocalName, attrs: Vec<Attribute>) -> Result<(), Error> {
let mut prefixes = Prefixes::default();
let mut attributes = BTreeMap::new();
for attr in attrs.into_iter() {
@ -68,19 +70,28 @@ impl TreeBuilder {
}
self.prefixes_stack.push(prefixes.clone());
let namespace = self.lookup_prefix(&name.prefix)
.ok_or(Error::MissingNamespace)?
.to_owned();
let el = Element::new(
name.name,
self.lookup_prefix(&name.prefix).unwrap_or("").to_owned(),
namespace,
Some(name.prefix),
prefixes,
attributes,
vec![]
);
self.stack.push(el);
Ok(())
}
fn process_end_tag(&mut self) {
fn process_end_tag(&mut self, name: LocalName) -> Result<(), Error> {
if let Some(el) = self.pop() {
if el.name() != name.name || el.prefix != Some(name.prefix) {
return Err(Error::InvalidElementClosed);
}
if self.depth() > 0 {
let top = self.stack.len() - 1;
self.stack[top].append_child(el);
@ -88,6 +99,8 @@ impl TreeBuilder {
self.root = Some(el);
}
}
Ok(())
}
fn process_text(&mut self, text: String) {
@ -98,7 +111,7 @@ impl TreeBuilder {
}
/// Process a Token that you got out of a Tokenizer
pub fn process_token(&mut self, token: Token) {
pub fn process_token(&mut self, token: Token) -> Result<(), Error> {
match token {
Token::XmlDecl { .. } => {},
@ -106,22 +119,24 @@ impl TreeBuilder {
name,
attrs,
self_closing: false,
} => self.process_start_tag(name, attrs),
} => self.process_start_tag(name, attrs)?,
Token::StartTag {
name,
attrs,
self_closing: true,
} => {
self.process_start_tag(name, attrs);
self.process_end_tag();
self.process_start_tag(name.clone(), attrs)?;
self.process_end_tag(name)?;
}
Token::EndTag { .. } =>
self.process_end_tag(),
Token::EndTag { name } =>
self.process_end_tag(name)?,
Token::Text(text) =>
self.process_text(text),
}
Ok(())
}
}