use std; use std::default::Default; use std::iter::FromIterator; use std::cell::RefCell; use std::rc::Rc; use std::fmt::Write; use std::str::from_utf8; use std::io::{Error, ErrorKind}; use std::collections::HashMap; use std::collections::vec_deque::VecDeque; use tokio_io::codec::{Encoder, Decoder}; use minidom::Element; use xml5ever::tokenizer::{XmlTokenizer, TokenSink, Token, Tag, TagKind}; use bytes::*; // const NS_XMLNS: &'static str = "http://www.w3.org/2000/xmlns/"; #[derive(Debug)] pub enum Packet { Error(Box), StreamStart(HashMap), Stanza(Element), Text(String), StreamEnd, } struct ParserSink { // Ready stanzas queue: Rc>>, // Parsing stack stack: Vec, } impl ParserSink { pub fn new(queue: Rc>>) -> Self { ParserSink { queue, stack: vec![], } } fn push_queue(&self, pkt: Packet) { println!("push: {:?}", pkt); self.queue.borrow_mut().push_back(pkt); } fn handle_start_tag(&mut self, tag: Tag) { let el = tag_to_element(&tag); self.stack.push(el); } fn handle_end_tag(&mut self) { let el = self.stack.pop().unwrap(); match self.stack.len() { // 0 => self.push_queue(Packet::StreamEnd), // 1 => self.push_queue(Packet::Stanza(el)), len => { let parent = &mut self.stack[len - 1]; parent.append_child(el); }, } } } fn tag_to_element(tag: &Tag) -> Element { let el_builder = Element::builder(tag.name.local.as_ref()) .ns(tag.name.ns.as_ref()); let el_builder = tag.attrs.iter().fold( el_builder, |el_builder, attr| el_builder.attr( attr.name.local.as_ref(), attr.value.as_ref() ) ); el_builder.build() } impl TokenSink for ParserSink { fn process_token(&mut self, token: Token) { println!("Token: {:?}", token); match token { Token::TagToken(tag) => match tag.kind { TagKind::StartTag => self.handle_start_tag(tag), TagKind::EndTag => self.handle_end_tag(), TagKind::EmptyTag => { self.handle_start_tag(tag); self.handle_end_tag(); }, TagKind::ShortTag => self.push_queue(Packet::Error(Box::new(Error::new(ErrorKind::InvalidInput, "ShortTag")))), }, Token::CharacterTokens(tendril) => match self.stack.len() { 0 | 1 => self.push_queue(Packet::Text(tendril.into())), len => { let el = &mut self.stack[len - 1]; el.append_text_node(tendril); }, }, Token::EOFToken => self.push_queue(Packet::StreamEnd), Token::ParseError(s) => { println!("ParseError: {:?}", s); self.push_queue(Packet::Error(Box::new(Error::new(ErrorKind::InvalidInput, (*s).to_owned())))) }, _ => (), } } // fn end(&mut self) { // } } pub struct XMPPCodec { parser: XmlTokenizer, // For handling truncated utf8 buf: Vec, queue: Rc>>, } impl XMPPCodec { pub fn new() -> Self { let queue = Rc::new(RefCell::new((VecDeque::new()))); let sink = ParserSink::new(queue.clone()); // TODO: configure parser? let parser = XmlTokenizer::new(sink, Default::default()); XMPPCodec { parser, queue, buf: vec![], } } } impl Decoder for XMPPCodec { type Item = Packet; type Error = Error; fn decode(&mut self, buf: &mut BytesMut) -> Result, Self::Error> { println!("decode {} bytes", buf.len()); let buf1: Box> = if self.buf.len() > 0 && buf.len() > 0 { let mut prefix = std::mem::replace(&mut self.buf, vec![]); prefix.extend_from_slice(buf.take().as_ref()); Box::new(prefix) } else { Box::new(buf.take()) }; let buf1 = buf1.as_ref().as_ref(); match from_utf8(buf1) { Ok(s) => { if s.len() > 0 { println!("<< {}", s); let tendril = FromIterator::from_iter(s.chars()); self.parser.feed(tendril); } }, // Remedies for truncated utf8 Err(e) if e.valid_up_to() >= buf1.len() - 3 => { // Prepare all the valid data let mut b = BytesMut::with_capacity(e.valid_up_to()); b.put(&buf1[0..e.valid_up_to()]); // Retry let result = self.decode(&mut b); // Keep the tail back in self.buf.extend_from_slice(&buf1[e.valid_up_to()..]); return result; }, Err(e) => { println!("error {} at {}/{} in {:?}", e, e.valid_up_to(), buf1.len(), buf1); return Err(Error::new(ErrorKind::InvalidInput, e)); }, } let result = self.queue.borrow_mut().pop_front(); Ok(result) } fn decode_eof(&mut self, buf: &mut BytesMut) -> Result, Self::Error> { self.decode(buf) } } impl Encoder for XMPPCodec { type Item = Packet; type Error = Error; fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> { println!("encode {:?}", item); match item { Packet::StreamStart(start_attrs) => { let mut buf = String::new(); write!(buf, "\n").unwrap(); print!(">> {}", buf); write!(dst, "{}", buf) .map_err(|_| Error::from(ErrorKind::InvalidInput)) }, Packet::Stanza(stanza) => { println!(">> {:?}", stanza); let mut root_ns = None; // TODO stanza.write_to_inner(&mut dst.clone().writer(), &mut root_ns) .map_err(|_| Error::from(ErrorKind::InvalidInput)) }, Packet::Text(text) => { let escaped = escape(&text); println!(">> {}", escaped); write!(dst, "{}", escaped) .map_err(|_| Error::from(ErrorKind::InvalidInput)) }, // TODO: Implement all _ => Ok(()) } } } /// Copied from RustyXML for now pub fn escape(input: &str) -> String { let mut result = String::with_capacity(input.len()); for c in input.chars() { match c { '&' => result.push_str("&"), '<' => result.push_str("<"), '>' => result.push_str(">"), '\'' => result.push_str("'"), '"' => result.push_str("""), o => result.push(o) } } result } #[cfg(test)] mod tests { use super::*; use bytes::BytesMut; #[test] fn test_stream_start() { let mut c = XMPPCodec::new(); let mut b = BytesMut::with_capacity(1024); b.put(r""); let r = c.decode(&mut b); assert!(match r { Ok(Some(Packet::StreamStart(_))) => true, _ => false, }); } #[test] fn test_truncated_stanza() { let mut c = XMPPCodec::new(); let mut b = BytesMut::with_capacity(1024); b.put(r""); let r = c.decode(&mut b); assert!(match r { Ok(Some(Packet::StreamStart(_))) => true, _ => false, }); b.clear(); b.put(r"ß true, _ => false, }); b.clear(); b.put(r">"); let r = c.decode(&mut b); assert!(match r { Ok(Some(Packet::Stanza(ref el))) if el.name() == "test" && el.text() == "ß" => true, _ => false, }); } #[test] fn test_truncated_utf8() { let mut c = XMPPCodec::new(); let mut b = BytesMut::with_capacity(1024); b.put(r""); let r = c.decode(&mut b); assert!(match r { Ok(Some(Packet::StreamStart(_))) => true, _ => false, }); b.clear(); b.put(&b"\xc3"[..]); let r = c.decode(&mut b); assert!(match r { Ok(None) => true, _ => false, }); b.clear(); b.put(&b"\x9f"[..]); let r = c.decode(&mut b); assert!(match r { Ok(Some(Packet::Stanza(ref el))) if el.name() == "test" && el.text() == "ß" => true, _ => false, }); } }