minidom: allow tokenizer to be used with a preexisting BytesMut

This commit is contained in:
Astro 2022-03-25 01:37:56 +01:00
parent 81ffb2a9f1
commit 04579bcde4
3 changed files with 39 additions and 42 deletions

View file

@ -96,4 +96,4 @@ pub use error::{Error, Result};
pub use namespaces::NSChoice;
pub use node::Node;
pub use token::Token;
pub use tokenizer::{Tokenizer, TokenizerError};
pub use tokenizer::{Tokenizer, TokenizerError, tokenize};

View file

@ -33,34 +33,44 @@ impl Tokenizer {
/// Parse the next document fragment
pub fn pull(&mut self) -> Result<Option<Token>, Error> {
/// cannot return an error with location info that points to
/// our buffer that we still want to mutate
fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError {
nom::error::Error {
input: std::str::from_utf8(e.input)
.unwrap_or("invalud UTF-8")
.to_owned(),
code: e.code,
}
tokenize(&mut self.buffer)
}
}
/// Parse the next document fragment
///
/// By using this function directly instead of constructing a
/// Tokenizer, you can work with a preexisting BytesMut instance that
/// has been passed by tokio_util::codec::Decoder::decode() for
/// example.
pub fn tokenize(buffer: &mut BytesMut) -> Result<Option<Token>, Error> {
/// cannot return an error with location info that points to
/// our buffer that we still want to mutate
fn with_input_to_owned(e: nom::error::Error<&[u8]>) -> TokenizerError {
nom::error::Error {
input: std::str::from_utf8(e.input)
.unwrap_or("invalud UTF-8")
.to_owned(),
code: e.code,
}
let result: Option<(usize, Token)> = { match Token::parse(&self.buffer) {
Ok((s, token)) =>
Some((s.len(), token)),
Result::Err(nom::Err::Incomplete(_)) =>
None,
Result::Err(nom::Err::Error(e)) =>
return Err(with_input_to_owned(e).into()),
Result::Err(nom::Err::Failure(e)) =>
return Err(with_input_to_owned(e).into()),
} };
match result {
Some((s_len, token)) => {
let _ = self.buffer.split_to(self.buffer.len() - s_len);
Ok(Some(token))
}
None => Ok(None)
}
let result: Option<(usize, Token)> = { match Token::parse(&buffer) {
Ok((s, token)) =>
Some((s.len(), token)),
Result::Err(nom::Err::Incomplete(_)) =>
None,
Result::Err(nom::Err::Error(e)) =>
return Err(with_input_to_owned(e).into()),
Result::Err(nom::Err::Failure(e)) =>
return Err(with_input_to_owned(e).into()),
} };
match result {
Some((s_len, token)) => {
let _ = buffer.split_to(buffer.len() - s_len);
Ok(Some(token))
}
None => Ok(None)
}
}

View file

@ -9,7 +9,7 @@ use std::fmt::Write;
use std::io;
use tokio_util::codec::{Decoder, Encoder};
use xmpp_parsers::Element;
use minidom::{Tokenizer, tree_builder::TreeBuilder};
use minidom::{tokenize, tree_builder::TreeBuilder};
use crate::Error;
/// Anything that can be sent or received on an XMPP/XML stream
@ -30,18 +30,15 @@ pub struct XMPPCodec {
/// Outgoing
ns: Option<String>,
/// Incoming
tokenizer: Tokenizer,
stanza_builder: TreeBuilder,
}
impl XMPPCodec {
/// Constructor
pub fn new() -> Self {
let tokenizer = Tokenizer::new();
let stanza_builder = TreeBuilder::new();
XMPPCodec {
ns: None,
tokenizer,
stanza_builder,
}
}
@ -58,10 +55,7 @@ impl Decoder for XMPPCodec {
type Error = Error;
fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
self.tokenizer.push(buf);
buf.clear();
while let Some(token) = self.tokenizer.pull()? {
while let Some(token) = tokenize(buf)? {
let had_stream_root = self.stanza_builder.depth() > 0;
self.stanza_builder.process_token(token)?;
let has_stream_root = self.stanza_builder.depth() > 0;
@ -218,7 +212,6 @@ mod tests {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
b.clear();
b.put_slice(b"</stream:stream>");
let r = c.decode(&mut b);
assert!(match r {
@ -238,7 +231,6 @@ mod tests {
_ => false,
});
b.clear();
b.put_slice("<test>ß</test".as_bytes());
let r = c.decode(&mut b);
assert!(match r {
@ -246,7 +238,6 @@ mod tests {
_ => false,
});
b.clear();
b.put_slice(b">");
let r = c.decode(&mut b);
assert!(match r {
@ -266,7 +257,6 @@ mod tests {
_ => false,
});
b.clear();
b.put(&b"<test>\xc3"[..]);
let r = c.decode(&mut b);
assert!(match r {
@ -274,7 +264,6 @@ mod tests {
_ => false,
});
b.clear();
b.put(&b"\x9f</test>"[..]);
let r = c.decode(&mut b);
assert!(match r {
@ -295,7 +284,6 @@ mod tests {
_ => false,
});
b.clear();
b.put_slice(b"<status xml:lang='en'>Test status</status>");
let r = c.decode(&mut b);
assert!(match r {
@ -345,7 +333,6 @@ mod tests {
_ => false,
});
b.clear();
b.put_slice(b"<message ");
b.put_slice(b"type='chat'><body>Foo</body></message>");
let r = c.decode(&mut b);