minidom: allow tokenizer to be used with a preexisting BytesMut

This commit is contained in:
Astro 2022-03-25 01:37:56 +01:00
parent fb29b24a1f
commit 910e587003
3 changed files with 39 additions and 42 deletions

View file

@ -96,4 +96,4 @@ pub use error::{Error, Result};
pub use namespaces::NSChoice; pub use namespaces::NSChoice;
pub use node::Node; pub use node::Node;
pub use token::Token; pub use token::Token;
pub use tokenizer::{Tokenizer, TokenizerError}; pub use tokenizer::{Tokenizer, TokenizerError, tokenize};

View file

@ -33,34 +33,44 @@ impl Tokenizer {
/// Parse the next document fragment /// Parse the next document fragment
pub fn pull(&mut self) -> Result<Option<Token>, Error> { pub fn pull(&mut self) -> Result<Option<Token>, Error> {
/// cannot return an error with location info that points to tokenize(&mut self.buffer)
/// our buffer that we still want to mutate }
fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError { }
nom::error::Error {
input: std::str::from_utf8(e.input) /// Parse the next document fragment
.unwrap_or("invalud UTF-8") ///
.to_owned(), /// By using this function directly instead of constructing a
code: e.code, /// Tokenizer, you can work with a preexisting BytesMut instance that
} /// has been passed by tokio_util::codec::Decoder::decode() for
/// example.
pub fn tokenize(buffer: &mut BytesMut) -> Result<Option<Token>, Error> {
/// cannot return an error with location info that points to
/// our buffer that we still want to mutate
fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError {
nom::error::Error {
input: std::str::from_utf8(e.input)
.unwrap_or("invalud UTF-8")
.to_owned(),
code: e.code,
} }
}
let result: Option<(usize, Token)> = { match Token::parse(&self.buffer) {
Ok((s, token)) => let result: Option<(usize, Token)> = { match Token::parse(&buffer) {
Some((s.len(), token)), Ok((s, token)) =>
Result::Err(nom::Err::Incomplete(_)) => Some((s.len(), token)),
None, Result::Err(nom::Err::Incomplete(_)) =>
Result::Err(nom::Err::Error(e)) => None,
return Err(with_input_to_owned(e).into()), Result::Err(nom::Err::Error(e)) =>
Result::Err(nom::Err::Failure(e)) => return Err(with_input_to_owned(e).into()),
return Err(with_input_to_owned(e).into()), Result::Err(nom::Err::Failure(e)) =>
} }; return Err(with_input_to_owned(e).into()),
match result { } };
Some((s_len, token)) => { match result {
let _ = self.buffer.split_to(self.buffer.len() - s_len); Some((s_len, token)) => {
Ok(Some(token)) let _ = buffer.split_to(buffer.len() - s_len);
} Ok(Some(token))
None => Ok(None)
} }
None => Ok(None)
} }
} }

View file

@ -9,7 +9,7 @@ use std::fmt::Write;
use std::io; use std::io;
use tokio_util::codec::{Decoder, Encoder}; use tokio_util::codec::{Decoder, Encoder};
use xmpp_parsers::Element; use xmpp_parsers::Element;
use minidom::{Tokenizer, tree_builder::TreeBuilder}; use minidom::{tokenize, tree_builder::TreeBuilder};
use crate::Error; use crate::Error;
/// Anything that can be sent or received on an XMPP/XML stream /// Anything that can be sent or received on an XMPP/XML stream
@ -30,18 +30,15 @@ pub struct XMPPCodec {
/// Outgoing /// Outgoing
ns: Option<String>, ns: Option<String>,
/// Incoming /// Incoming
tokenizer: Tokenizer,
stanza_builder: TreeBuilder, stanza_builder: TreeBuilder,
} }
impl XMPPCodec { impl XMPPCodec {
/// Constructor /// Constructor
pub fn new() -> Self { pub fn new() -> Self {
let tokenizer = Tokenizer::new();
let stanza_builder = TreeBuilder::new(); let stanza_builder = TreeBuilder::new();
XMPPCodec { XMPPCodec {
ns: None, ns: None,
tokenizer,
stanza_builder, stanza_builder,
} }
} }
@ -58,10 +55,7 @@ impl Decoder for XMPPCodec {
type Error = Error; type Error = Error;
fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> { fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
self.tokenizer.push(buf); while let Some(token) = tokenize(buf)? {
buf.clear();
while let Some(token) = self.tokenizer.pull()? {
let had_stream_root = self.stanza_builder.depth() > 0; let had_stream_root = self.stanza_builder.depth() > 0;
self.stanza_builder.process_token(token)?; self.stanza_builder.process_token(token)?;
let has_stream_root = self.stanza_builder.depth() > 0; let has_stream_root = self.stanza_builder.depth() > 0;
@ -218,7 +212,6 @@ mod tests {
Ok(Some(Packet::StreamStart(_))) => true, Ok(Some(Packet::StreamStart(_))) => true,
_ => false, _ => false,
}); });
b.clear();
b.put_slice(b"</stream:stream>"); b.put_slice(b"</stream:stream>");
let r = c.decode(&mut b); let r = c.decode(&mut b);
assert!(match r { assert!(match r {
@ -238,7 +231,6 @@ mod tests {
_ => false, _ => false,
}); });
b.clear();
b.put_slice("<test>ß</test".as_bytes()); b.put_slice("<test>ß</test".as_bytes());
let r = c.decode(&mut b); let r = c.decode(&mut b);
assert!(match r { assert!(match r {
@ -246,7 +238,6 @@ mod tests {
_ => false, _ => false,
}); });
b.clear();
b.put_slice(b">"); b.put_slice(b">");
let r = c.decode(&mut b); let r = c.decode(&mut b);
assert!(match r { assert!(match r {
@ -266,7 +257,6 @@ mod tests {
_ => false, _ => false,
}); });
b.clear();
b.put(&b"<test>\xc3"[..]); b.put(&b"<test>\xc3"[..]);
let r = c.decode(&mut b); let r = c.decode(&mut b);
assert!(match r { assert!(match r {
@ -274,7 +264,6 @@ mod tests {
_ => false, _ => false,
}); });
b.clear();
b.put(&b"\x9f</test>"[..]); b.put(&b"\x9f</test>"[..]);
let r = c.decode(&mut b); let r = c.decode(&mut b);
assert!(match r { assert!(match r {
@ -295,7 +284,6 @@ mod tests {
_ => false, _ => false,
}); });
b.clear();
b.put_slice(b"<status xml:lang='en'>Test status</status>"); b.put_slice(b"<status xml:lang='en'>Test status</status>");
let r = c.decode(&mut b); let r = c.decode(&mut b);
assert!(match r { assert!(match r {
@ -345,7 +333,6 @@ mod tests {
_ => false, _ => false,
}); });
b.clear();
b.put_slice(b"<message "); b.put_slice(b"<message ");
b.put_slice(b"type='chat'><body>Foo</body></message>"); b.put_slice(b"type='chat'><body>Foo</body></message>");
let r = c.decode(&mut b); let r = c.decode(&mut b);