diff --git a/minidom/src/lib.rs b/minidom/src/lib.rs index 5717ada..65033e0 100644 --- a/minidom/src/lib.rs +++ b/minidom/src/lib.rs @@ -96,4 +96,4 @@ pub use error::{Error, Result}; pub use namespaces::NSChoice; pub use node::Node; pub use token::Token; -pub use tokenizer::{Tokenizer, TokenizerError, tokenize}; +pub use tokenizer::{tokenize, Tokenizer, TokenizerError}; diff --git a/minidom/src/token.rs b/minidom/src/token.rs index 6408508..011a6fa 100644 --- a/minidom/src/token.rs +++ b/minidom/src/token.rs @@ -1,15 +1,18 @@ //! Parsed XML token -use std::borrow::Cow; use nom::{ branch::alt, bytes::streaming::{tag, take_while1}, - character::{is_space, streaming::{char, digit1, one_of, space0, space1}}, + character::{ + is_space, + streaming::{char, digit1, one_of, space0, space1}, + }, combinator::{not, peek, value}, multi::many0, number::streaming::hex_u32, IResult, }; +use std::borrow::Cow; /// Attribute name with prefix #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -23,16 +26,14 @@ pub struct LocalName { impl From<&str> for LocalName { fn from(s: &str) -> Self { match s.split_once(':') { - Some((prefix, name)) => - LocalName { - prefix: Some(prefix.to_owned()), - name: name.to_owned(), - }, - None => - LocalName { - prefix: None, - name: s.to_owned(), - }, + Some((prefix, name)) => LocalName { + prefix: Some(prefix.to_owned()), + name: name.to_owned(), + }, + None => LocalName { + prefix: None, + name: s.to_owned(), + }, } } } @@ -75,97 +76,109 @@ pub enum Token { impl Token { /// Parse one token pub fn parse(s: &[u8]) -> IResult<&[u8], Token> { - alt(( - Self::parse_tag, - |s| { - let (s, _) = not(peek(char('<')))(s)?; - let (s, text) = Self::parse_text('<', s)?; - Ok((s, Token::Text(text.into_owned()))) - }, - ))(s) + alt((Self::parse_tag, |s| { + let (s, _) = not(peek(char('<')))(s)?; + let (s, text) = Self::parse_text('<', s)?; + Ok((s, Token::Text(text.into_owned()))) + }))(s) } fn parse_tag(s: &[u8]) -> IResult<&[u8], Token> { let (s, _) = tag("<")(s)?; - alt((|s| -> IResult<&[u8], Token> { - // CDATA - let (s, _) = tag("![CDATA[")(s)?; - let mut end = None; - for i in 0..s.len() - 2 { - if &s[i..i + 3] == b"]]>" { - end = Some(i); - break + alt(( + |s| -> IResult<&[u8], Token> { + // CDATA + let (s, _) = tag("![CDATA[")(s)?; + let mut end = None; + for i in 0..s.len() - 2 { + if &s[i..i + 3] == b"]]>" { + end = Some(i); + break; + } } - } - if let Some(end) = end { - let text = Self::str_from_utf8(&s[..end])?; - Ok((&s[end + 3..], Token::Text(text.to_string()))) - } else { - Err(nom::Err::Incomplete(nom::Needed::Unknown)) - } - }, |s| { - // XmlDecl - let (s, _) = tag("?xml")(s)?; - let (s, _) = space1(s)?; + if let Some(end) = end { + let text = Self::str_from_utf8(&s[..end])?; + Ok((&s[end + 3..], Token::Text(text.to_string()))) + } else { + Err(nom::Err::Incomplete(nom::Needed::Unknown)) + } + }, + |s| { + // XmlDecl + let (s, _) = tag("?xml")(s)?; + let (s, _) = space1(s)?; + + let (s, attrs) = many0(|s| { + let (s, (name, value)) = Self::parse_attr(s)?; + let (s, _) = space0(s)?; + Ok((s, (name, value))) + })(s)?; - let (s, attrs) = many0(|s| { - let (s, (name, value)) = Self::parse_attr(s)?; let (s, _) = space0(s)?; - Ok((s, (name, value))) - })(s)?; - - let (s, _) = space0(s)?; - let (s, _) = tag("?>")(s)?; - Ok((s, Token::XmlDecl { - attrs: attrs.into_iter() - .map(|(name, value)| Attribute { - name: name.into(), - value: value.into_owned(), - }) - .collect(), - })) - }, |s| { - // EndTag - let (s, _) = tag("/")(s)?; - let (s, _) = space0(s)?; - let (s, name) = take_while1(|b| !(is_space(b) || b == b'>'))(s)?; - let (s, _) = space0(s)?; - let (s, _) = tag(">")(s)?; - let name = Self::str_from_utf8(name)?; - Ok((s, Token::EndTag { name: name.into() })) - }, |s| { - // StartTag - let (s, _) = space0(s)?; - let (s, name) = take_while1(|b| !(is_space(b) || b == b'>' || b == b'/'))(s)?; - let (s, _) = space0(s)?; - let (s, attrs) = many0(|s| { - let (s, (name, value)) = Self::parse_attr(s)?; - let (s, _) = space0(s)?; - Ok((s, (name, value))) - })(s)?; - - let (s, self_closing) = alt((|s| { + let (s, _) = tag("?>")(s)?; + Ok(( + s, + Token::XmlDecl { + attrs: attrs + .into_iter() + .map(|(name, value)| Attribute { + name: name.into(), + value: value.into_owned(), + }) + .collect(), + }, + )) + }, + |s| { + // EndTag let (s, _) = tag("/")(s)?; let (s, _) = space0(s)?; + let (s, name) = take_while1(|b| !(is_space(b) || b == b'>'))(s)?; + let (s, _) = space0(s)?; let (s, _) = tag(">")(s)?; - Ok((s, true)) - }, |s| { - let (s, _) = tag(">")(s)?; - Ok((s, false)) - }))(s)?; + let name = Self::str_from_utf8(name)?; + Ok((s, Token::EndTag { name: name.into() })) + }, + |s| { + // StartTag + let (s, _) = space0(s)?; + let (s, name) = take_while1(|b| !(is_space(b) || b == b'>' || b == b'/'))(s)?; + let (s, _) = space0(s)?; + let (s, attrs) = many0(|s| { + let (s, (name, value)) = Self::parse_attr(s)?; + let (s, _) = space0(s)?; + Ok((s, (name, value))) + })(s)?; - Ok((s, Token::StartTag { - name: Self::str_from_utf8(name)? - .into(), - attrs: attrs.into_iter() - .map(|(name, value)| Attribute { - name: name.into(), - value: value.into_owned(), - }) - .collect(), - self_closing, - })) - }))(s) + let (s, self_closing) = alt(( + |s| { + let (s, _) = tag("/")(s)?; + let (s, _) = space0(s)?; + let (s, _) = tag(">")(s)?; + Ok((s, true)) + }, + |s| { + let (s, _) = tag(">")(s)?; + Ok((s, false)) + }, + ))(s)?; + + Ok(( + s, + Token::StartTag { + name: Self::str_from_utf8(name)?.into(), + attrs: attrs + .into_iter() + .map(|(name, value)| Attribute { + name: name.into(), + value: value.into_owned(), + }) + .collect(), + self_closing, + }, + )) + }, + ))(s) } fn parse_attr(s: &[u8]) -> IResult<&[u8], (&str, Cow)> { @@ -182,54 +195,51 @@ impl Token { } fn parse_text(until: char, s: &[u8]) -> IResult<&[u8], Cow> { - let (s, results) = many0( - alt( - (|s| { - let (s, _) = tag("&#")(s)?; - let (s, num) = digit1(s)?; - let (s, _) = char(';')(s)?; - let num: u32 = Self::str_from_utf8(num)? - .parse() - .map_err(|_| nom::Err::Failure(nom::error::Error::new(s, nom::error::ErrorKind::Fail)))?; - if let Some(c) = std::char::from_u32(num) { - Ok((s, Cow::from(format!("{}", c)))) - } else { - Ok((s, Cow::from(format!("")))) - } - }, |s| { - let (s, _) = tag("&#x")(s)?; - let (s, num) = hex_u32(s)?; - let (s, _) = char(';')(s)?; - if let Some(c) = std::char::from_u32(num) { - Ok((s, Cow::from(format!("{}", c)))) - } else { - Ok((s, Cow::from(format!("")))) - } - }, |s| { - let (s, _) = char('&')(s)?; - let (s, c) = alt(( - value('&', tag("amp")), - value('<', tag("lt")), - value('>', tag("gt")), - value('"', tag("quot")), - value('\'', tag("apos")), - ))(s)?; - let (s, _) = char(';')(s)?; + let (s, results) = many0(alt(( + |s| { + let (s, _) = tag("&#")(s)?; + let (s, num) = digit1(s)?; + let (s, _) = char(';')(s)?; + let num: u32 = Self::str_from_utf8(num)?.parse().map_err(|_| { + nom::Err::Failure(nom::error::Error::new(s, nom::error::ErrorKind::Fail)) + })?; + if let Some(c) = std::char::from_u32(num) { Ok((s, Cow::from(format!("{}", c)))) - }, |s| { - let (s, _) = not(peek(char(until)))(s)?; - let (s, text) = take_while1(|b| - b != until as u8 && - b != b'&' && - b != b'<' && - b != b'>' - )(s)?; - let text = Self::str_from_utf8(text)?; - let text = Self::normalize_newlines(text); - Ok((s, text)) - }) - ) - )(s)?; + } else { + Ok((s, Cow::from(format!("")))) + } + }, + |s| { + let (s, _) = tag("&#x")(s)?; + let (s, num) = hex_u32(s)?; + let (s, _) = char(';')(s)?; + if let Some(c) = std::char::from_u32(num) { + Ok((s, Cow::from(format!("{}", c)))) + } else { + Ok((s, Cow::from(format!("")))) + } + }, + |s| { + let (s, _) = char('&')(s)?; + let (s, c) = alt(( + value('&', tag("amp")), + value('<', tag("lt")), + value('>', tag("gt")), + value('"', tag("quot")), + value('\'', tag("apos")), + ))(s)?; + let (s, _) = char(';')(s)?; + Ok((s, Cow::from(format!("{}", c)))) + }, + |s| { + let (s, _) = not(peek(char(until)))(s)?; + let (s, text) = + take_while1(|b| b != until as u8 && b != b'&' && b != b'<' && b != b'>')(s)?; + let text = Self::str_from_utf8(text)?; + let text = Self::normalize_newlines(text); + Ok((s, text)) + }, + )))(s)?; if results.len() == 1 { Ok((s, results.into_iter().next().unwrap())) @@ -329,11 +339,14 @@ mod tests { #[test] fn test_tag() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: "foobar".into(), - attrs: vec![], - self_closing: false, - })), + Ok(( + &b""[..], + Token::StartTag { + name: "foobar".into(), + attrs: vec![], + self_closing: false, + } + )), Token::parse(b"") ); } @@ -341,15 +354,14 @@ mod tests { #[test] fn test_attrs() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: "a".into(), - attrs: vec![ - attr("a", "2'3"), - attr("b", "4\"2"), - attr("c", ""), - ], - self_closing: false, - })), + Ok(( + &b""[..], + Token::StartTag { + name: "a".into(), + attrs: vec![attr("a", "2'3"), attr("b", "4\"2"), attr("c", ""),], + self_closing: false, + } + )), Token::parse(b"") ); } @@ -357,15 +369,14 @@ mod tests { #[test] fn test_attrs_normalized() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: "a".into(), - attrs: vec![ - attr("a", "x y"), - attr("b", " "), - attr("c", "a b"), - ], - self_closing: false, - })), + Ok(( + &b""[..], + Token::StartTag { + name: "a".into(), + attrs: vec![attr("a", "x y"), attr("b", " "), attr("c", "a b"),], + self_closing: false, + } + )), Token::parse(b"") ); } @@ -373,13 +384,14 @@ mod tests { #[test] fn test_attrs_entities() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: "a".into(), - attrs: vec![ - attr("a", "<3"), - ], - self_closing: false, - })), + Ok(( + &b""[..], + Token::StartTag { + name: "a".into(), + attrs: vec![attr("a", "<3"),], + self_closing: false, + } + )), Token::parse(b"") ); } @@ -387,11 +399,14 @@ mod tests { #[test] fn test_self_closing_tag() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: "foobar".into(), - attrs: vec![], - self_closing: true, - })), + Ok(( + &b""[..], + Token::StartTag { + name: "foobar".into(), + attrs: vec![], + self_closing: true, + } + )), Token::parse(b"") ); } @@ -399,9 +414,12 @@ mod tests { #[test] fn test_end_tag() { assert_eq!( - Ok((&b""[..], Token::EndTag { - name: "foobar".into(), - })), + Ok(( + &b""[..], + Token::EndTag { + name: "foobar".into(), + } + )), Token::parse(b"") ); } @@ -409,14 +427,17 @@ mod tests { #[test] fn test_element_prefix() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: LocalName { - name: "z".to_owned(), - prefix: Some("x".to_owned()), - }, - attrs: vec![], - self_closing: true, - })), + Ok(( + &b""[..], + Token::StartTag { + name: LocalName { + name: "z".to_owned(), + prefix: Some("x".to_owned()), + }, + attrs: vec![], + self_closing: true, + } + )), Token::parse(b"") ); } @@ -424,17 +445,20 @@ mod tests { #[test] fn test_attr_prefix() { assert_eq!( - Ok((&b""[..], Token::StartTag { - name: "a".into(), - attrs: vec![Attribute { - name: LocalName { - name: "abc".to_owned(), - prefix: Some("xyz".to_owned()), - }, - value: "".to_owned(), - }], - self_closing: false, - })), + Ok(( + &b""[..], + Token::StartTag { + name: "a".into(), + attrs: vec![Attribute { + name: LocalName { + name: "abc".to_owned(), + prefix: Some("xyz".to_owned()), + }, + value: "".to_owned(), + }], + self_closing: false, + } + )), Token::parse(b"") ); } @@ -442,21 +466,27 @@ mod tests { #[test] fn test_xml_decl() { assert_eq!( - Ok((&b""[..], Token::XmlDecl { - attrs: vec![Attribute { - name: LocalName { - name: "version".to_owned(), - prefix: None, - }, - value: "1.0".to_owned(), - }, Attribute { - name: LocalName { - name: "encoding".to_owned(), - prefix: None, - }, - value: "UTF-8".to_owned(), - }], - })), + Ok(( + &b""[..], + Token::XmlDecl { + attrs: vec![ + Attribute { + name: LocalName { + name: "version".to_owned(), + prefix: None, + }, + value: "1.0".to_owned(), + }, + Attribute { + name: LocalName { + name: "encoding".to_owned(), + prefix: None, + }, + value: "UTF-8".to_owned(), + } + ], + } + )), Token::parse(b"") ); } diff --git a/minidom/src/tokenizer.rs b/minidom/src/tokenizer.rs index 7f3be48..fa4afe5 100644 --- a/minidom/src/tokenizer.rs +++ b/minidom/src/tokenizer.rs @@ -2,8 +2,8 @@ //! Streaming tokenizer (SAX parser) -use bytes::BytesMut; use super::{Error, Token}; +use bytes::BytesMut; /// `Result::Err` type returned from `Tokenizer` pub type TokenizerError = nom::error::Error; @@ -55,26 +55,23 @@ pub fn tokenize(buffer: &mut BytesMut) -> Result, Error> { } } - let result: Option<(usize, Token)> = { match Token::parse(&buffer) { - Ok((s, token)) => - Some((s.len(), token)), - Result::Err(nom::Err::Incomplete(_)) => - None, - Result::Err(nom::Err::Error(e)) => - return Err(with_input_to_owned(e).into()), - Result::Err(nom::Err::Failure(e)) => - return Err(with_input_to_owned(e).into()), - } }; + let result: Option<(usize, Token)> = { + match Token::parse(&buffer) { + Ok((s, token)) => Some((s.len(), token)), + Result::Err(nom::Err::Incomplete(_)) => None, + Result::Err(nom::Err::Error(e)) => return Err(with_input_to_owned(e).into()), + Result::Err(nom::Err::Failure(e)) => return Err(with_input_to_owned(e).into()), + } + }; match result { Some((s_len, token)) => { let _ = buffer.split_to(buffer.len() - s_len); Ok(Some(token)) } - None => Ok(None) + None => Ok(None), } } - #[cfg(test)] mod tests { use super::*; @@ -101,20 +98,21 @@ mod tests { let buf = b"quux"; for chunk_size in 1..=buf.len() { - assert_eq!(vec![ - Token::StartTag { - name: "foo".into(), - attrs: vec![Attribute { - name: "bar".into(), - value: "baz".to_owned(), - }], - self_closing: false, - }, - Token::Text("quux".to_owned()), - Token::EndTag { - name: "foo".into(), - }, - ], run(chunk_size, buf)); + assert_eq!( + vec![ + Token::StartTag { + name: "foo".into(), + attrs: vec![Attribute { + name: "bar".into(), + value: "baz".to_owned(), + }], + self_closing: false, + }, + Token::Text("quux".to_owned()), + Token::EndTag { name: "foo".into() }, + ], + run(chunk_size, buf) + ); } } } diff --git a/minidom/src/tree_builder.rs b/minidom/src/tree_builder.rs index 6166bdf..549aa99 100644 --- a/minidom/src/tree_builder.rs +++ b/minidom/src/tree_builder.rs @@ -2,10 +2,10 @@ //! SAX events to DOM tree conversion -use std::collections::BTreeMap; -use crate::{Element, Error}; use crate::prefixes::Prefixes; use crate::token::{Attribute, LocalName, Token}; +use crate::{Element, Error}; +use std::collections::BTreeMap; /// Tree-building parser state pub struct TreeBuilder { @@ -85,7 +85,8 @@ impl TreeBuilder { } self.prefixes_stack.push(prefixes.clone()); - let namespace = self.lookup_prefix(&name.prefix) + let namespace = self + .lookup_prefix(&name.prefix) .ok_or(Error::MissingNamespace)? .to_owned(); let el = Element::new( @@ -94,7 +95,7 @@ impl TreeBuilder { Some(name.prefix), prefixes, attributes, - vec![] + vec![], ); self.stack.push(el); @@ -128,7 +129,7 @@ impl TreeBuilder { /// Process a Token that you got out of a Tokenizer pub fn process_token(&mut self, token: Token) -> Result<(), Error> { match token { - Token::XmlDecl { .. } => {}, + Token::XmlDecl { .. } => {} Token::StartTag { name, @@ -145,11 +146,9 @@ impl TreeBuilder { self.process_end_tag(name)?; } - Token::EndTag { name } => - self.process_end_tag(name)?, + Token::EndTag { name } => self.process_end_tag(name)?, - Token::Text(text) => - self.process_text(text), + Token::Text(text) => self.process_text(text), } Ok(()) diff --git a/tokio-xmpp/src/client/async_client.rs b/tokio-xmpp/src/client/async_client.rs index bf6cd0e..dc62993 100644 --- a/tokio-xmpp/src/client/async_client.rs +++ b/tokio-xmpp/src/client/async_client.rs @@ -203,30 +203,28 @@ impl Stream for Client { self.poll_next(cx) } ClientState::Disconnected => Poll::Ready(None), - ClientState::Connecting(mut connect) => { - match Pin::new(&mut connect).poll(cx) { - Poll::Ready(Ok(Ok(stream))) => { - let bound_jid = stream.jid.clone(); - self.state = ClientState::Connected(stream); - Poll::Ready(Some(Event::Online { - bound_jid, - resumed: false, - })) - } - Poll::Ready(Ok(Err(e))) => { - self.state = ClientState::Disconnected; - return Poll::Ready(Some(Event::Disconnected(e.into()))); - } - Poll::Ready(Err(e)) => { - self.state = ClientState::Disconnected; - panic!("connect task: {}", e); - } - Poll::Pending => { - self.state = ClientState::Connecting(connect); - Poll::Pending - } + ClientState::Connecting(mut connect) => match Pin::new(&mut connect).poll(cx) { + Poll::Ready(Ok(Ok(stream))) => { + let bound_jid = stream.jid.clone(); + self.state = ClientState::Connected(stream); + Poll::Ready(Some(Event::Online { + bound_jid, + resumed: false, + })) } - } + Poll::Ready(Ok(Err(e))) => { + self.state = ClientState::Disconnected; + return Poll::Ready(Some(Event::Disconnected(e.into()))); + } + Poll::Ready(Err(e)) => { + self.state = ClientState::Disconnected; + panic!("connect task: {}", e); + } + Poll::Pending => { + self.state = ClientState::Connecting(connect); + Poll::Pending + } + }, ClientState::Connected(mut stream) => { // Poll sink match Pin::new(&mut stream).poll_ready(cx) { diff --git a/tokio-xmpp/src/xmpp_codec.rs b/tokio-xmpp/src/xmpp_codec.rs index b990897..f82f078 100644 --- a/tokio-xmpp/src/xmpp_codec.rs +++ b/tokio-xmpp/src/xmpp_codec.rs @@ -1,7 +1,9 @@ //! XML stream parser for XMPP +use crate::Error; use bytes::{BufMut, BytesMut}; use log::debug; +use minidom::{tokenize, tree_builder::TreeBuilder}; use std; use std::collections::HashMap; use std::default::Default; @@ -9,8 +11,6 @@ use std::fmt::Write; use std::io; use tokio_util::codec::{Decoder, Encoder}; use xmpp_parsers::Element; -use minidom::{tokenize, tree_builder::TreeBuilder}; -use crate::Error; /// Anything that can be sent or received on an XMPP/XML stream #[derive(Debug, Clone, PartialEq, Eq)] @@ -60,20 +60,23 @@ impl Decoder for XMPPCodec { self.stanza_builder.process_token(token)?; let has_stream_root = self.stanza_builder.depth() > 0; - if ! had_stream_root && has_stream_root { + if !had_stream_root && has_stream_root { let root = self.stanza_builder.top().unwrap(); - let attrs = root.attrs() - .map(|(name, value)| (name.to_owned(), value.to_owned())) - .chain( - root.prefixes.declared_prefixes() - .iter() - .map(|(prefix, namespace)| ( - prefix.as_ref().map(|prefix| format!("xmlns:{}", prefix)) - .unwrap_or_else(|| "xmlns".to_owned()), - namespace.clone() - )) - ) - .collect(); + let attrs = + root.attrs() + .map(|(name, value)| (name.to_owned(), value.to_owned())) + .chain(root.prefixes.declared_prefixes().iter().map( + |(prefix, namespace)| { + ( + prefix + .as_ref() + .map(|prefix| format!("xmlns:{}", prefix)) + .unwrap_or_else(|| "xmlns".to_owned()), + namespace.clone(), + ) + }, + )) + .collect(); return Ok(Some(Packet::StreamStart(attrs))); } else if self.stanza_builder.depth() == 1 { if let Some(stanza) = self.stanza_builder.unshift_child() {