xmpp-rs/tokio-xmpp/src/xmpp_codec.rs

532 lines
17 KiB
Rust
Raw Normal View History

2018-08-02 17:58:19 +00:00
//! XML stream parser for XMPP
2018-12-18 18:04:31 +00:00
use crate::{ParseError, ParserError};
use bytes::{BufMut, BytesMut};
2020-03-18 20:11:21 +00:00
use log::{debug, error};
2017-06-01 22:42:57 +00:00
use std;
use std::borrow::Cow;
2018-12-18 18:04:31 +00:00
use std::collections::vec_deque::VecDeque;
use std::collections::HashMap;
use std::default::Default;
2018-12-18 18:04:31 +00:00
use std::fmt::Write;
use std::io;
use std::iter::FromIterator;
2018-09-06 15:46:06 +00:00
use std::str::from_utf8;
2020-03-05 00:25:24 +00:00
use std::sync::Arc;
use std::sync::Mutex;
use tokio_util::codec::{Decoder, Encoder};
use xml5ever::buffer_queue::BufferQueue;
2017-07-18 18:12:17 +00:00
use xml5ever::interface::Attribute;
2018-12-18 18:04:31 +00:00
use xml5ever::tokenizer::{Tag, TagKind, Token, TokenSink, XmlTokenizer};
use xmpp_parsers::Element;
2017-06-01 22:42:57 +00:00
2018-08-02 17:58:19 +00:00
/// Anything that can be sent or received on an XMPP/XML stream
2018-12-20 19:39:01 +00:00
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Packet {
2018-08-02 17:58:19 +00:00
/// `<stream:stream>` start tag
StreamStart(HashMap<String, String>),
2018-08-02 17:58:19 +00:00
/// A complete stanza or nonza
Stanza(Element),
2018-08-02 17:58:19 +00:00
/// Plain text (think whitespace keep-alive)
Text(String),
2018-08-02 17:58:19 +00:00
/// `</stream:stream>` closing tag
StreamEnd,
}
2017-06-01 22:42:57 +00:00
type QueueItem = Result<Packet, ParserError>;
2018-08-02 17:58:19 +00:00
/// Parser state
struct ParserSink {
2017-07-18 18:12:17 +00:00
// Ready stanzas, shared with XMPPCodec
2020-03-05 00:25:24 +00:00
queue: Arc<Mutex<VecDeque<QueueItem>>>,
// Parsing stack
stack: Vec<Element>,
2017-07-18 18:12:17 +00:00
ns_stack: Vec<HashMap<Option<String>, String>>,
2017-06-01 22:42:57 +00:00
}
impl ParserSink {
2020-03-05 00:25:24 +00:00
pub fn new(queue: Arc<Mutex<VecDeque<QueueItem>>>) -> Self {
ParserSink {
queue,
stack: vec![],
2017-07-18 18:12:17 +00:00
ns_stack: vec![],
2017-06-01 22:42:57 +00:00
}
}
2017-06-01 22:42:57 +00:00
fn push_queue(&self, pkt: Packet) {
2020-03-05 00:25:24 +00:00
self.queue.lock().unwrap().push_back(Ok(pkt));
}
fn push_queue_error(&self, e: ParserError) {
2020-03-05 00:25:24 +00:00
self.queue.lock().unwrap().push_back(Err(e));
2017-06-01 22:42:57 +00:00
}
2018-08-02 17:58:19 +00:00
/// Lookup XML namespace declaration for given prefix (or no prefix)
2017-07-18 18:12:17 +00:00
fn lookup_ns(&self, prefix: &Option<String>) -> Option<&str> {
for nss in self.ns_stack.iter().rev() {
2017-07-20 22:19:08 +00:00
if let Some(ns) = nss.get(prefix) {
return Some(ns);
2017-07-18 18:12:17 +00:00
}
}
None
}
fn handle_start_tag(&mut self, tag: Tag) {
2017-07-18 18:12:17 +00:00
let mut nss = HashMap::new();
2018-12-18 18:04:31 +00:00
let is_prefix_xmlns = |attr: &Attribute| {
attr.name
.prefix
.as_ref()
.map(|prefix| prefix.eq_str_ignore_ascii_case("xmlns"))
.unwrap_or(false)
};
2017-07-18 18:12:17 +00:00
for attr in &tag.attrs {
match attr.name.local.as_ref() {
"xmlns" => {
nss.insert(None, attr.value.as_ref().to_owned());
2018-12-18 18:04:31 +00:00
}
2017-07-18 18:12:17 +00:00
prefix if is_prefix_xmlns(attr) => {
2018-12-18 18:04:31 +00:00
nss.insert(Some(prefix.to_owned()), attr.value.as_ref().to_owned());
}
2017-07-18 18:12:17 +00:00
_ => (),
}
}
self.ns_stack.push(nss);
let el = {
let el_ns = self
.lookup_ns(&tag.name.prefix.map(|prefix| prefix.as_ref().to_owned()))
.unwrap();
let mut el_builder = Element::builder(tag.name.local.as_ref(), el_ns);
2017-07-18 18:12:17 +00:00
for attr in &tag.attrs {
match attr.name.local.as_ref() {
"xmlns" => (),
_ if is_prefix_xmlns(attr) => (),
_ => {
2019-01-13 20:05:19 +00:00
let attr_name = if let Some(ref prefix) = attr.name.prefix {
Cow::Owned(format!("{}:{}", prefix, attr.name.local))
} else {
2019-01-13 20:05:19 +00:00
Cow::Borrowed(attr.name.local.as_ref())
};
el_builder = el_builder.attr(attr_name, attr.value.as_ref());
2018-12-18 18:04:31 +00:00
}
2017-07-18 18:12:17 +00:00
}
}
el_builder.build()
};
if self.stack.is_empty() {
2018-12-18 18:04:31 +00:00
let attrs = HashMap::from_iter(tag.attrs.iter().map(|attr| {
(
attr.name.local.as_ref().to_owned(),
attr.value.as_ref().to_owned(),
)
}));
2017-07-18 18:12:17 +00:00
self.push_queue(Packet::StreamStart(attrs));
}
self.stack.push(el);
}
fn handle_end_tag(&mut self) {
let el = self.stack.pop().unwrap();
2017-07-18 18:12:17 +00:00
self.ns_stack.pop();
match self.stack.len() {
// </stream:stream>
2018-12-18 18:04:31 +00:00
0 => self.push_queue(Packet::StreamEnd),
// </stanza>
2018-12-18 18:04:31 +00:00
1 => self.push_queue(Packet::Stanza(el)),
len => {
let parent = &mut self.stack[len - 1];
parent.append_child(el);
2018-12-18 18:04:31 +00:00
}
}
2017-06-01 22:42:57 +00:00
}
}
impl TokenSink for ParserSink {
fn process_token(&mut self, token: Token) {
match token {
Token::TagToken(tag) => match tag.kind {
2018-12-18 18:04:31 +00:00
TagKind::StartTag => self.handle_start_tag(tag),
TagKind::EndTag => self.handle_end_tag(),
TagKind::EmptyTag => {
self.handle_start_tag(tag);
self.handle_end_tag();
2018-12-18 18:04:31 +00:00
}
TagKind::ShortTag => self.push_queue_error(ParserError::ShortTag),
},
2018-12-18 18:04:31 +00:00
Token::CharacterTokens(tendril) => match self.stack.len() {
0 | 1 => self.push_queue(Packet::Text(tendril.into())),
len => {
let el = &mut self.stack[len - 1];
el.append_text_node(tendril);
}
},
Token::EOFToken => self.push_queue(Packet::StreamEnd),
Token::ParseError(s) => {
2018-09-06 15:46:06 +00:00
self.push_queue_error(ParserError::Parse(ParseError(s)));
2018-12-18 18:04:31 +00:00
}
_ => (),
}
}
// fn end(&mut self) {
// }
2017-06-01 22:42:57 +00:00
}
2018-08-02 17:58:19 +00:00
/// Stateful encoder/decoder for a bytestream from/to XMPP `Packet`
2017-06-01 22:42:57 +00:00
pub struct XMPPCodec {
2017-07-18 18:12:17 +00:00
/// Outgoing
ns: Option<String>,
/// Incoming
parser: XmlTokenizer<ParserSink>,
2017-07-18 18:12:17 +00:00
/// For handling incoming truncated utf8
// TODO: optimize using tendrils?
buf: Vec<u8>,
2017-07-18 18:12:17 +00:00
/// Shared with ParserSink
2020-03-05 00:25:24 +00:00
queue: Arc<Mutex<VecDeque<QueueItem>>>,
2017-06-01 22:42:57 +00:00
}
impl XMPPCodec {
2018-08-02 17:58:19 +00:00
/// Constructor
2017-06-01 22:42:57 +00:00
pub fn new() -> Self {
2020-03-05 00:25:24 +00:00
let queue = Arc::new(Mutex::new(VecDeque::new()));
let sink = ParserSink::new(queue.clone());
// TODO: configure parser?
let parser = XmlTokenizer::new(sink, Default::default());
2017-06-01 22:42:57 +00:00
XMPPCodec {
2017-07-18 18:12:17 +00:00
ns: None,
parser,
queue,
buf: vec![],
2017-06-01 22:42:57 +00:00
}
}
}
2017-07-20 22:19:08 +00:00
impl Default for XMPPCodec {
fn default() -> Self {
Self::new()
}
}
2017-06-04 00:05:08 +00:00
impl Decoder for XMPPCodec {
type Item = Packet;
type Error = ParserError;
2017-06-01 22:42:57 +00:00
2017-06-04 00:05:08 +00:00
fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
let buf1: Box<dyn AsRef<[u8]>> = if !self.buf.is_empty() && !buf.is_empty() {
2018-12-18 18:04:31 +00:00
let mut prefix = std::mem::replace(&mut self.buf, vec![]);
2020-03-05 00:25:24 +00:00
prefix.extend_from_slice(&buf.split_to(buf.len()));
2018-12-18 18:04:31 +00:00
Box::new(prefix)
} else {
2020-03-05 00:25:24 +00:00
Box::new(buf.split_to(buf.len()))
2018-12-18 18:04:31 +00:00
};
let buf1 = buf1.as_ref().as_ref();
match from_utf8(buf1) {
Ok(s) => {
debug!("<< {:?}", s);
2018-12-18 18:04:31 +00:00
if !s.is_empty() {
2019-06-12 14:44:13 +00:00
let mut buffer_queue = BufferQueue::new();
let tendril = FromIterator::from_iter(s.chars());
2019-06-12 14:44:13 +00:00
buffer_queue.push_back(tendril);
self.parser.feed(&mut buffer_queue);
2017-06-19 00:34:16 +00:00
}
2018-12-18 18:04:31 +00:00
}
// Remedies for truncated utf8
Err(e) if e.valid_up_to() >= buf1.len() - 3 => {
// Prepare all the valid data
let mut b = BytesMut::with_capacity(e.valid_up_to());
b.put(&buf1[0..e.valid_up_to()]);
// Retry
let result = self.decode(&mut b);
// Keep the tail back in
self.buf.extend_from_slice(&buf1[e.valid_up_to()..]);
return result;
2018-12-18 18:04:31 +00:00
}
Err(e) => {
2020-05-29 22:14:32 +00:00
error!(
"error {} at {}/{} in {:?}",
e,
e.valid_up_to(),
buf1.len(),
buf1
);
return Err(ParserError::Utf8(e));
2018-12-18 18:04:31 +00:00
}
2017-06-01 22:42:57 +00:00
}
2020-03-05 00:25:24 +00:00
match self.queue.lock().unwrap().pop_front() {
None => Ok(None),
2018-12-18 18:04:31 +00:00
Some(result) => result.map(|pkt| Some(pkt)),
}
2017-06-01 22:42:57 +00:00
}
fn decode_eof(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
2017-06-04 00:05:08 +00:00
self.decode(buf)
}
}
2020-06-21 23:32:01 +00:00
impl Encoder<Packet> for XMPPCodec {
type Error = io::Error;
2017-06-04 00:05:08 +00:00
2020-06-21 23:32:01 +00:00
fn encode(&mut self, item: Packet, dst: &mut BytesMut) -> Result<(), Self::Error> {
2017-07-18 20:12:00 +00:00
let remaining = dst.capacity() - dst.len();
let max_stanza_size: usize = 2usize.pow(16);
if remaining < max_stanza_size {
dst.reserve(max_stanza_size - remaining);
}
fn to_io_err<E: Into<Box<dyn std::error::Error + Send + Sync>>>(e: E) -> io::Error {
io::Error::new(io::ErrorKind::InvalidInput, e)
}
2017-06-04 00:05:08 +00:00
match item {
Packet::StreamStart(start_attrs) => {
let mut buf = String::new();
write!(buf, "<stream:stream").map_err(to_io_err)?;
2017-07-20 22:19:08 +00:00
for (name, value) in start_attrs {
write!(buf, " {}=\"{}\"", escape(&name), escape(&value)).map_err(to_io_err)?;
2017-07-18 18:12:17 +00:00
if name == "xmlns" {
self.ns = Some(value);
}
}
write!(buf, ">\n").map_err(to_io_err)?;
debug!(">> {:?}", buf);
write!(dst, "{}", buf).map_err(to_io_err)
2018-12-18 18:04:31 +00:00
}
Packet::Stanza(stanza) => stanza
.write_to(&mut WriteBytes::new(dst))
.and_then(|_| {
debug!(">> {:?}", dst);
Ok(())
})
.map_err(|e| to_io_err(format!("{}", e))),
Packet::Text(text) => write_text(&text, dst)
.and_then(|_| {
debug!(">> {:?}", dst);
Ok(())
})
.map_err(to_io_err),
2020-05-29 22:14:32 +00:00
Packet::StreamEnd => write!(dst, "</stream:stream>\n").map_err(to_io_err),
2017-06-01 22:42:57 +00:00
}
}
}
2018-08-02 17:58:19 +00:00
/// Write XML-escaped text string
2017-07-18 18:12:17 +00:00
pub fn write_text<W: Write>(text: &str, writer: &mut W) -> Result<(), std::fmt::Error> {
2017-08-14 01:56:08 +00:00
write!(writer, "{}", escape(text))
2017-07-18 18:12:17 +00:00
}
2017-07-20 22:19:08 +00:00
/// Copied from `RustyXML` for now
pub fn escape(input: &str) -> String {
let mut result = String::with_capacity(input.len());
for c in input.chars() {
match c {
'&' => result.push_str("&amp;"),
'<' => result.push_str("&lt;"),
'>' => result.push_str("&gt;"),
'\'' => result.push_str("&apos;"),
'"' => result.push_str("&quot;"),
2018-12-18 18:04:31 +00:00
o => result.push(o),
}
}
result
}
2017-08-14 01:56:08 +00:00
/// BytesMut impl only std::fmt::Write but not std::io::Write. The
/// latter trait is required for minidom's
/// `Element::write_to_inner()`.
struct WriteBytes<'a> {
dst: &'a mut BytesMut,
}
impl<'a> WriteBytes<'a> {
fn new(dst: &'a mut BytesMut) -> Self {
WriteBytes { dst }
}
}
impl<'a> std::io::Write for WriteBytes<'a> {
fn write(&mut self, buf: &[u8]) -> std::result::Result<usize, std::io::Error> {
self.dst.put_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use bytes::BytesMut;
#[test]
fn test_stream_start() {
let mut c = XMPPCodec::new();
let mut b = BytesMut::with_capacity(1024);
2020-03-05 00:25:24 +00:00
b.put_slice(b"<?xml version='1.0'?><stream:stream xmlns:stream='http://etherx.jabber.org/streams' version='1.0' xmlns='jabber:client'>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
}
2019-01-26 20:07:15 +00:00
#[test]
fn test_stream_end() {
let mut c = XMPPCodec::new();
let mut b = BytesMut::with_capacity(1024);
2020-03-05 00:25:24 +00:00
b.put_slice(b"<?xml version='1.0'?><stream:stream xmlns:stream='http://etherx.jabber.org/streams' version='1.0' xmlns='jabber:client'>");
2019-01-26 20:07:15 +00:00
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
b.clear();
2020-03-05 00:25:24 +00:00
b.put_slice(b"</stream:stream>");
2019-01-26 20:07:15 +00:00
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamEnd)) => true,
_ => false,
});
}
#[test]
fn test_truncated_stanza() {
let mut c = XMPPCodec::new();
let mut b = BytesMut::with_capacity(1024);
2020-03-05 00:25:24 +00:00
b.put_slice(b"<?xml version='1.0'?><stream:stream xmlns:stream='http://etherx.jabber.org/streams' version='1.0' xmlns='jabber:client'>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
b.clear();
2020-03-05 00:25:24 +00:00
b.put_slice("<test>ß</test".as_bytes());
let r = c.decode(&mut b);
assert!(match r {
Ok(None) => true,
_ => false,
});
b.clear();
2020-03-05 00:25:24 +00:00
b.put_slice(b">");
let r = c.decode(&mut b);
assert!(match r {
2018-12-18 18:04:31 +00:00
Ok(Some(Packet::Stanza(ref el))) if el.name() == "test" && el.text() == "ß" => true,
_ => false,
});
}
#[test]
fn test_truncated_utf8() {
let mut c = XMPPCodec::new();
let mut b = BytesMut::with_capacity(1024);
2020-03-05 00:25:24 +00:00
b.put_slice(b"<?xml version='1.0'?><stream:stream xmlns:stream='http://etherx.jabber.org/streams' version='1.0' xmlns='jabber:client'>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
b.clear();
b.put(&b"<test>\xc3"[..]);
let r = c.decode(&mut b);
assert!(match r {
Ok(None) => true,
_ => false,
});
b.clear();
b.put(&b"\x9f</test>"[..]);
let r = c.decode(&mut b);
assert!(match r {
2018-12-18 18:04:31 +00:00
Ok(Some(Packet::Stanza(ref el))) if el.name() == "test" && el.text() == "ß" => true,
_ => false,
});
}
2017-07-18 20:12:00 +00:00
/// test case for https://gitlab.com/xmpp-rs/tokio-xmpp/issues/3
#[test]
fn test_atrribute_prefix() {
let mut c = XMPPCodec::new();
let mut b = BytesMut::with_capacity(1024);
2020-03-05 00:25:24 +00:00
b.put_slice(b"<?xml version='1.0'?><stream:stream xmlns:stream='http://etherx.jabber.org/streams' version='1.0' xmlns='jabber:client'>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
b.clear();
2020-03-05 00:25:24 +00:00
b.put_slice(b"<status xml:lang='en'>Test status</status>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::Stanza(ref el)))
if el.name() == "status"
&& el.text() == "Test status"
&& el.attr("xml:lang").map_or(false, |a| a == "en") =>
true,
_ => false,
});
}
2017-07-18 20:12:00 +00:00
/// By default, encode() only get's a BytesMut that has 8kb space reserved.
#[test]
fn test_large_stanza() {
2020-03-05 00:25:24 +00:00
use futures::{executor::block_on, sink::SinkExt};
2018-12-18 18:04:31 +00:00
use std::io::Cursor;
2020-03-05 00:25:24 +00:00
use tokio_util::codec::FramedWrite;
let mut framed = FramedWrite::new(Cursor::new(vec![]), XMPPCodec::new());
2017-07-18 20:12:00 +00:00
let mut text = "".to_owned();
for _ in 0..2usize.pow(15) {
text = text + "A";
}
let stanza = Element::builder("message", "jabber:client")
.append(
Element::builder("body", "jabber:client")
.append(text.as_ref())
.build(),
)
2017-07-18 20:12:00 +00:00
.build();
2020-03-05 00:25:24 +00:00
block_on(framed.send(Packet::Stanza(stanza))).expect("send");
2018-12-18 18:04:31 +00:00
assert_eq!(
framed.get_ref().get_ref(),
&("<message xmlns=\"jabber:client\"><body>".to_owned() + &text + "</body></message>")
.as_bytes()
2018-12-18 18:04:31 +00:00
);
2017-07-18 20:12:00 +00:00
}
#[test]
fn test_cut_out_stanza() {
let mut c = XMPPCodec::new();
let mut b = BytesMut::with_capacity(1024);
2020-03-05 00:25:24 +00:00
b.put_slice(b"<?xml version='1.0'?><stream:stream xmlns:stream='http://etherx.jabber.org/streams' version='1.0' xmlns='jabber:client'>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::StreamStart(_))) => true,
_ => false,
});
b.clear();
2020-03-05 00:25:24 +00:00
b.put_slice(b"<message ");
b.put_slice(b"type='chat'><body>Foo</body></message>");
let r = c.decode(&mut b);
assert!(match r {
Ok(Some(Packet::Stanza(_))) => true,
_ => false,
});
}
}