From 04579bcde40b5b50bf8501eec3377253cfbe34f3 Mon Sep 17 00:00:00 2001 From: Astro Date: Fri, 25 Mar 2022 01:37:56 +0100 Subject: [PATCH] minidom: allow tokenizer to be used with a preexisting BytesMut --- minidom/src/lib.rs | 2 +- minidom/src/tokenizer.rs | 62 +++++++++++++++++++++--------------- tokio-xmpp/src/xmpp_codec.rs | 17 ++-------- 3 files changed, 39 insertions(+), 42 deletions(-) diff --git a/minidom/src/lib.rs b/minidom/src/lib.rs index beea39a..5717ada 100644 --- a/minidom/src/lib.rs +++ b/minidom/src/lib.rs @@ -96,4 +96,4 @@ pub use error::{Error, Result}; pub use namespaces::NSChoice; pub use node::Node; pub use token::Token; -pub use tokenizer::{Tokenizer, TokenizerError}; +pub use tokenizer::{Tokenizer, TokenizerError, tokenize}; diff --git a/minidom/src/tokenizer.rs b/minidom/src/tokenizer.rs index e0f13d1..7f3be48 100644 --- a/minidom/src/tokenizer.rs +++ b/minidom/src/tokenizer.rs @@ -33,34 +33,44 @@ impl Tokenizer { /// Parse the next document fragment pub fn pull(&mut self) -> Result, Error> { - /// cannot return an error with location info that points to - /// our buffer that we still want to mutate - fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError { - nom::error::Error { - input: std::str::from_utf8(e.input) - .unwrap_or("invalud UTF-8") - .to_owned(), - code: e.code, - } + tokenize(&mut self.buffer) + } +} + +/// Parse the next document fragment +/// +/// By using this function directly instead of constructing a +/// Tokenizer, you can work with a preexisting BytesMut instance that +/// has been passed by tokio_util::codec::Decoder::decode() for +/// example. +pub fn tokenize(buffer: &mut BytesMut) -> Result, Error> { + /// cannot return an error with location info that points to + /// our buffer that we still want to mutate + fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError { + nom::error::Error { + input: std::str::from_utf8(e.input) + .unwrap_or("invalud UTF-8") + .to_owned(), + code: e.code, } - - let result: Option<(usize, Token)> = { match Token::parse(&self.buffer) { - Ok((s, token)) => - Some((s.len(), token)), - Result::Err(nom::Err::Incomplete(_)) => - None, - Result::Err(nom::Err::Error(e)) => - return Err(with_input_to_owned(e).into()), - Result::Err(nom::Err::Failure(e)) => - return Err(with_input_to_owned(e).into()), - } }; - match result { - Some((s_len, token)) => { - let _ = self.buffer.split_to(self.buffer.len() - s_len); - Ok(Some(token)) - } - None => Ok(None) + } + + let result: Option<(usize, Token)> = { match Token::parse(&buffer) { + Ok((s, token)) => + Some((s.len(), token)), + Result::Err(nom::Err::Incomplete(_)) => + None, + Result::Err(nom::Err::Error(e)) => + return Err(with_input_to_owned(e).into()), + Result::Err(nom::Err::Failure(e)) => + return Err(with_input_to_owned(e).into()), + } }; + match result { + Some((s_len, token)) => { + let _ = buffer.split_to(buffer.len() - s_len); + Ok(Some(token)) } + None => Ok(None) } } diff --git a/tokio-xmpp/src/xmpp_codec.rs b/tokio-xmpp/src/xmpp_codec.rs index fcd6b08..b990897 100644 --- a/tokio-xmpp/src/xmpp_codec.rs +++ b/tokio-xmpp/src/xmpp_codec.rs @@ -9,7 +9,7 @@ use std::fmt::Write; use std::io; use tokio_util::codec::{Decoder, Encoder}; use xmpp_parsers::Element; -use minidom::{Tokenizer, tree_builder::TreeBuilder}; +use minidom::{tokenize, tree_builder::TreeBuilder}; use crate::Error; /// Anything that can be sent or received on an XMPP/XML stream @@ -30,18 +30,15 @@ pub struct XMPPCodec { /// Outgoing ns: Option, /// Incoming - tokenizer: Tokenizer, stanza_builder: TreeBuilder, } impl XMPPCodec { /// Constructor pub fn new() -> Self { - let tokenizer = Tokenizer::new(); let stanza_builder = TreeBuilder::new(); XMPPCodec { ns: None, - tokenizer, stanza_builder, } } @@ -58,10 +55,7 @@ impl Decoder for XMPPCodec { type Error = Error; fn decode(&mut self, buf: &mut BytesMut) -> Result, Self::Error> { - self.tokenizer.push(buf); - buf.clear(); - - while let Some(token) = self.tokenizer.pull()? { + while let Some(token) = tokenize(buf)? { let had_stream_root = self.stanza_builder.depth() > 0; self.stanza_builder.process_token(token)?; let has_stream_root = self.stanza_builder.depth() > 0; @@ -218,7 +212,6 @@ mod tests { Ok(Some(Packet::StreamStart(_))) => true, _ => false, }); - b.clear(); b.put_slice(b""); let r = c.decode(&mut b); assert!(match r { @@ -238,7 +231,6 @@ mod tests { _ => false, }); - b.clear(); b.put_slice("ß false, }); - b.clear(); b.put_slice(b">"); let r = c.decode(&mut b); assert!(match r { @@ -266,7 +257,6 @@ mod tests { _ => false, }); - b.clear(); b.put(&b"\xc3"[..]); let r = c.decode(&mut b); assert!(match r { @@ -274,7 +264,6 @@ mod tests { _ => false, }); - b.clear(); b.put(&b"\x9f"[..]); let r = c.decode(&mut b); assert!(match r { @@ -295,7 +284,6 @@ mod tests { _ => false, }); - b.clear(); b.put_slice(b"Test status"); let r = c.decode(&mut b); assert!(match r { @@ -345,7 +333,6 @@ mod tests { _ => false, }); - b.clear(); b.put_slice(b"Foo"); let r = c.decode(&mut b);