From c1e661dd61078a07e83cd74f22399beeb1edb67f Mon Sep 17 00:00:00 2001 From: Astro Date: Wed, 23 Mar 2022 23:36:44 +0100 Subject: [PATCH] minidom: add more error reporting to tokenizer and tree_builder to fix tests --- minidom/src/element.rs | 13 ++----------- minidom/src/error.rs | 13 ------------- minidom/src/tests.rs | 14 +++----------- minidom/src/token.rs | 1 + minidom/src/tokenizer.rs | 2 ++ minidom/src/tree_builder.rs | 35 +++++++++++++++++++++++++---------- 6 files changed, 33 insertions(+), 45 deletions(-) diff --git a/minidom/src/element.rs b/minidom/src/element.rs index 0d6ce36..bb54233 100644 --- a/minidom/src/element.rs +++ b/minidom/src/element.rs @@ -83,7 +83,7 @@ pub struct Element { namespace: String, /// This is only used when deserializing. If you have to use a custom prefix use /// `ElementBuilder::prefix`. - prefix: Option, + pub(crate) prefix: Option, prefixes: Prefixes, attributes: BTreeMap, children: Vec, @@ -119,13 +119,6 @@ impl PartialEq for Element { } } -fn ensure_no_prefix>(s: &S) -> Result<()> { - match s.as_ref().split(':').count() { - 1 => Ok(()), - _ => Err(Error::InvalidElement), - } -} - impl Element { pub(crate) fn new>( name: String, @@ -135,8 +128,6 @@ impl Element { attributes: BTreeMap, children: Vec, ) -> Element { - ensure_no_prefix(&name).unwrap(); - // TODO: Return Result instead. Element { name, namespace, @@ -323,7 +314,7 @@ impl Element { } tokenizer.push(&buf[0..len]); while let Some(token) = tokenizer.pull()? { - tree_builder.process_token(token); + tree_builder.process_token(token)?; if let Some(root) = tree_builder.root.take() { return Ok(root); diff --git a/minidom/src/error.rs b/minidom/src/error.rs index 05158de..fb1496a 100644 --- a/minidom/src/error.rs +++ b/minidom/src/error.rs @@ -35,9 +35,6 @@ pub enum Error { /// An error which is returned when an element is closed when it shouldn't be InvalidElementClosed, - /// An error which is returned when an elemet's name contains more colons than permitted - InvalidElement, - /// An error which is returned when an element being serialized doesn't contain a prefix /// (be it None or Some(_)). InvalidPrefix, @@ -45,9 +42,6 @@ pub enum Error { /// An error which is returned when an element doesn't contain a namespace MissingNamespace, - /// An error which is returned when a comment is to be parsed by minidom - NoComments, - /// An error which is returned when a prefixed is defined twice DuplicatePrefix, } @@ -61,10 +55,8 @@ impl StdError for Error { Error::IoError(e) => Some(e), Error::EndOfDocument => None, Error::InvalidElementClosed => None, - Error::InvalidElement => None, Error::InvalidPrefix => None, Error::MissingNamespace => None, - Error::NoComments => None, Error::DuplicatePrefix => None, } } @@ -83,13 +75,8 @@ impl std::fmt::Display for Error { Error::InvalidElementClosed => { write!(fmt, "the XML is invalid, an element was wrongly closed") } - Error::InvalidElement => write!(fmt, "the XML element is invalid"), Error::InvalidPrefix => write!(fmt, "the prefix is invalid"), Error::MissingNamespace => write!(fmt, "the XML element is missing a namespace",), - Error::NoComments => write!( - fmt, - "a comment has been found even though comments are forbidden" - ), Error::DuplicatePrefix => write!(fmt, "the prefix is already defined"), } } diff --git a/minidom/src/tests.rs b/minidom/src/tests.rs index ce43ecb..c76529c 100644 --- a/minidom/src/tests.rs +++ b/minidom/src/tests.rs @@ -424,7 +424,7 @@ fn namespace_inherited_prefixed2() { fn fail_comments() { let elem: Result = "".parse(); match elem { - Err(Error::NoComments) => (), + Err(_) => (), _ => panic!(), }; } @@ -432,20 +432,12 @@ fn fail_comments() { #[test] fn xml_error() { match "".parse::() { - Err(crate::error::Error::XmlError(_)) => (), + Err(crate::error::Error::InvalidElementClosed) => (), err => panic!("No or wrong error: {:?}", err), } match "() { - Err(crate::error::Error::XmlError(_)) => (), - err => panic!("No or wrong error: {:?}", err), - } -} - -#[test] -fn invalid_element_error() { - match "".parse::() { - Err(crate::error::Error::InvalidElement) => (), + Err(crate::error::Error::EndOfDocument) => (), err => panic!("No or wrong error: {:?}", err), } } diff --git a/minidom/src/token.rs b/minidom/src/token.rs index 462724c..2f79e6a 100644 --- a/minidom/src/token.rs +++ b/minidom/src/token.rs @@ -77,6 +77,7 @@ impl Token { alt(( Self::parse_tag, |s| { + let (s, _) = not(peek(char('<')))(s)?; let (s, text) = Self::parse_text('<', s)?; Ok((s, Token::Text(text))) }, diff --git a/minidom/src/tokenizer.rs b/minidom/src/tokenizer.rs index cf67f89..d787484 100644 --- a/minidom/src/tokenizer.rs +++ b/minidom/src/tokenizer.rs @@ -1,3 +1,5 @@ +// Copyright (c) 2022 Astro + //! Streaming tokenizer (SAX parser) use bytes::BytesMut; diff --git a/minidom/src/tree_builder.rs b/minidom/src/tree_builder.rs index 7787d92..406a600 100644 --- a/minidom/src/tree_builder.rs +++ b/minidom/src/tree_builder.rs @@ -1,7 +1,9 @@ +// Copyright (c) 2022 Astro + //! SAX events to DOM tree conversion use std::collections::BTreeMap; -use crate::Element; +use crate::{Element, Error}; use crate::prefixes::Prefixes; use crate::token::{Attribute, LocalName, Token}; @@ -47,7 +49,7 @@ impl TreeBuilder { None } - fn process_start_tag(&mut self, name: LocalName, attrs: Vec) { + fn process_start_tag(&mut self, name: LocalName, attrs: Vec) -> Result<(), Error> { let mut prefixes = Prefixes::default(); let mut attributes = BTreeMap::new(); for attr in attrs.into_iter() { @@ -68,19 +70,28 @@ impl TreeBuilder { } self.prefixes_stack.push(prefixes.clone()); + let namespace = self.lookup_prefix(&name.prefix) + .ok_or(Error::MissingNamespace)? + .to_owned(); let el = Element::new( name.name, - self.lookup_prefix(&name.prefix).unwrap_or("").to_owned(), + namespace, Some(name.prefix), prefixes, attributes, vec![] ); self.stack.push(el); + + Ok(()) } - fn process_end_tag(&mut self) { + fn process_end_tag(&mut self, name: LocalName) -> Result<(), Error> { if let Some(el) = self.pop() { + if el.name() != name.name || el.prefix != Some(name.prefix) { + return Err(Error::InvalidElementClosed); + } + if self.depth() > 0 { let top = self.stack.len() - 1; self.stack[top].append_child(el); @@ -88,6 +99,8 @@ impl TreeBuilder { self.root = Some(el); } } + + Ok(()) } fn process_text(&mut self, text: String) { @@ -98,7 +111,7 @@ impl TreeBuilder { } /// Process a Token that you got out of a Tokenizer - pub fn process_token(&mut self, token: Token) { + pub fn process_token(&mut self, token: Token) -> Result<(), Error> { match token { Token::XmlDecl { .. } => {}, @@ -106,22 +119,24 @@ impl TreeBuilder { name, attrs, self_closing: false, - } => self.process_start_tag(name, attrs), + } => self.process_start_tag(name, attrs)?, Token::StartTag { name, attrs, self_closing: true, } => { - self.process_start_tag(name, attrs); - self.process_end_tag(); + self.process_start_tag(name.clone(), attrs)?; + self.process_end_tag(name)?; } - Token::EndTag { .. } => - self.process_end_tag(), + Token::EndTag { name } => + self.process_end_tag(name)?, Token::Text(text) => self.process_text(text), } + + Ok(()) } }