// Copyright (c) 2019 Emmanuel Gil Peyrot // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::message::MessagePayload; use crate::ns; use crate::util::error::Error; use minidom::{Element, Node}; use std::collections::HashMap; use std::convert::TryFrom; // TODO: Use a proper lang type. type Lang = String; /// Container for formatted text. #[derive(Debug, Clone)] pub struct XhtmlIm { /// Map of language to body element. bodies: HashMap, } impl XhtmlIm { /// Serialise formatted text to HTML. pub fn to_html(self) -> String { let mut html = Vec::new(); // TODO: use the best language instead. for (lang, body) in self.bodies { if lang.is_empty() { assert!(body.xml_lang.is_none()); } else { assert_eq!(Some(lang), body.xml_lang); } for tag in body.children { html.push(tag.to_html()); } break; } html.concat() } /// Removes all unknown elements. fn flatten(self) -> XhtmlIm { let mut bodies = HashMap::new(); for (lang, body) in self.bodies { let children = body.children.into_iter().fold(vec![], |mut acc, child| { match child { Child::Tag(Tag::Unknown(children)) => acc.extend(children), any => acc.push(any), } acc }); let body = Body { children, ..body }; bodies.insert(lang, body); } XhtmlIm { bodies } } } impl MessagePayload for XhtmlIm {} impl TryFrom for XhtmlIm { type Error = Error; fn try_from(elem: Element) -> Result { check_self!(elem, "html", XHTML_IM); check_no_attributes!(elem, "html"); let mut bodies = HashMap::new(); for child in elem.children() { if child.is("body", ns::XHTML) { let child = child.clone(); let lang = match child.attr("xml:lang") { Some(lang) => lang, None => "", } .to_string(); let body = Body::try_from(child)?; match bodies.insert(lang, body) { None => (), Some(_) => { return Err(Error::ParseError( "Two identical language bodies found in XHTML-IM.", )) } } } else { return Err(Error::ParseError("Unknown element in XHTML-IM.")); } } Ok(XhtmlIm { bodies }.flatten()) } } impl From for Element { fn from(wrapper: XhtmlIm) -> Element { Element::builder("html") .ns(ns::XHTML_IM) .append_all(wrapper.bodies.into_iter().map(|(lang, body)| { if lang.is_empty() { assert!(body.xml_lang.is_none()); } else { assert_eq!(Some(lang), body.xml_lang); } Element::from(body) })) .build() } } #[derive(Debug, Clone)] enum Child { Tag(Tag), Text(String), } impl Child { fn to_html(self) -> String { match self { Child::Tag(tag) => tag.to_html(), Child::Text(text) => text, } } } #[derive(Debug, Clone)] struct Property { key: String, value: String, } type Css = Vec; fn get_style_string(style: Css) -> Option { let mut result = vec![]; for Property { key, value } in style { result.push(format!("{}: {}", key, value)); } if result.is_empty() { return None; } Some(result.join("; ")) } #[derive(Debug, Clone)] struct Body { style: Css, xml_lang: Option, children: Vec, } impl TryFrom for Body { type Error = Error; fn try_from(elem: Element) -> Result { let mut children = vec![]; for child in elem.nodes() { match child { Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)), Node::Text(text) => children.push(Child::Text(text.clone())), } } Ok(Body { style: parse_css(elem.attr("style")), xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()), children, }) } } impl From for Element { fn from(body: Body) -> Element { Element::builder("body") .ns(ns::XHTML) .attr("style", get_style_string(body.style)) .attr("xml:lang", body.xml_lang) .append_all(children_to_nodes(body.children)) .build() } } #[derive(Debug, Clone)] enum Tag { A { href: Option, style: Css, type_: Option, children: Vec, }, Blockquote { style: Css, children: Vec, }, Br, Cite { style: Css, children: Vec, }, Em { children: Vec, }, Img { src: Option, alt: Option, }, // TODO: height, width, style Li { style: Css, children: Vec, }, Ol { style: Css, children: Vec, }, P { style: Css, children: Vec, }, Span { style: Css, children: Vec, }, Strong { children: Vec, }, Ul { style: Css, children: Vec, }, Unknown(Vec), } impl Tag { fn to_html(self) -> String { match self { Tag::A { href, style, type_, children, } => { let href = write_attr(href, "href"); let style = write_attr(get_style_string(style), "style"); let type_ = write_attr(type_, "type"); format!( "{}", href, style, type_, children_to_html(children) ) } Tag::Blockquote { style, children } => { let style = write_attr(get_style_string(style), "style"); format!( "{}", style, children_to_html(children) ) } Tag::Br => String::from("
"), Tag::Cite { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Em { children } => format!("{}", children_to_html(children)), Tag::Img { src, alt } => { let src = write_attr(src, "src"); let alt = write_attr(alt, "alt"); format!("", src, alt) } Tag::Li { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Ol { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::P { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}

", style, children_to_html(children)) } Tag::Span { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Strong { children } => format!("{}", children_to_html(children)), Tag::Ul { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Unknown(_) => { panic!("No unknown element should be present in XHTML-IM after parsing.") } } } } impl TryFrom for Tag { type Error = Error; fn try_from(elem: Element) -> Result { let mut children = vec![]; for child in elem.nodes() { match child { Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)), Node::Text(text) => children.push(Child::Text(text.clone())), } } Ok(match elem.name() { "a" => Tag::A { href: elem.attr("href").map(|href| href.to_string()), style: parse_css(elem.attr("style")), type_: elem.attr("type").map(|type_| type_.to_string()), children, }, "blockquote" => Tag::Blockquote { style: parse_css(elem.attr("style")), children, }, "br" => Tag::Br, "cite" => Tag::Cite { style: parse_css(elem.attr("style")), children, }, "em" => Tag::Em { children }, "img" => Tag::Img { src: elem.attr("src").map(|src| src.to_string()), alt: elem.attr("alt").map(|alt| alt.to_string()), }, "li" => Tag::Li { style: parse_css(elem.attr("style")), children, }, "ol" => Tag::Ol { style: parse_css(elem.attr("style")), children, }, "p" => Tag::P { style: parse_css(elem.attr("style")), children, }, "span" => Tag::Span { style: parse_css(elem.attr("style")), children, }, "strong" => Tag::Strong { children }, "ul" => Tag::Ul { style: parse_css(elem.attr("style")), children, }, _ => Tag::Unknown(children), }) } } impl From for Element { fn from(tag: Tag) -> Element { let (name, attrs, children) = match tag { Tag::A { href, style, type_, children, } => ( "a", { let mut attrs = vec![]; if let Some(href) = href { attrs.push(("href", href)); } if let Some(style) = get_style_string(style) { attrs.push(("style", style)); } if let Some(type_) = type_ { attrs.push(("type", type_)); } attrs }, children, ), Tag::Blockquote { style, children } => ( "blockquote", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::Br => ("br", vec![], vec![]), Tag::Cite { style, children } => ( "cite", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::Em { children } => ("em", vec![], children), Tag::Img { src, alt } => { let mut attrs = vec![]; if let Some(src) = src { attrs.push(("src", src)); } if let Some(alt) = alt { attrs.push(("alt", alt)); } ("img", attrs, vec![]) } Tag::Li { style, children } => ( "li", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::Ol { style, children } => ( "ol", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::P { style, children } => ( "p", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::Span { style, children } => ( "span", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::Strong { children } => ("strong", vec![], children), Tag::Ul { style, children } => ( "ul", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children, ), Tag::Unknown(_) => { panic!("No unknown element should be present in XHTML-IM after parsing.") } }; let mut builder = Element::builder(name) .ns(ns::XHTML) .append_all(children_to_nodes(children)); for (key, value) in attrs { builder = builder.attr(key, value); } builder.build() } } fn children_to_nodes(children: Vec) -> impl IntoIterator { children.into_iter().map(|child| match child { Child::Tag(tag) => Node::Element(Element::from(tag)), Child::Text(text) => Node::Text(text), }) } fn children_to_html(children: Vec) -> String { children .into_iter() .map(|child| child.to_html()) .collect::>() .concat() } fn write_attr(attr: Option, name: &str) -> String { match attr { Some(attr) => format!(" {}='{}'", name, attr), None => String::new(), } } fn parse_css(style: Option<&str>) -> Css { let mut properties = vec![]; if let Some(style) = style { // TODO: make that parser a bit more resilient to things. for part in style.split(";") { let mut part = part .splitn(2, ":") .map(|a| a.to_string()) .collect::>(); let key = part.pop().unwrap(); let value = part.pop().unwrap(); properties.push(Property { key, value }); } } properties } #[cfg(test)] mod tests { use super::*; #[cfg(target_pointer_width = "32")] #[test] #[ignore] fn test_size() { assert_size!(XhtmlIm, 0); assert_size!(Child, 0); assert_size!(Tag, 0); } #[cfg(target_pointer_width = "64")] #[test] fn test_size() { assert_size!(XhtmlIm, 56); assert_size!(Child, 112); assert_size!(Tag, 104); } #[test] fn test_empty() { let elem: Element = "" .parse() .unwrap(); let xhtml = XhtmlIm::try_from(elem).unwrap(); assert_eq!(xhtml.bodies.len(), 0); let elem: Element = "" .parse() .unwrap(); let xhtml = XhtmlIm::try_from(elem).unwrap(); assert_eq!(xhtml.bodies.len(), 1); let elem: Element = "" .parse() .unwrap(); let xhtml = XhtmlIm::try_from(elem).unwrap(); assert_eq!(xhtml.bodies.len(), 2); } #[test] fn invalid_two_same_langs() { let elem: Element = "" .parse() .unwrap(); let error = XhtmlIm::try_from(elem).unwrap_err(); let message = match error { Error::ParseError(string) => string, _ => panic!(), }; assert_eq!(message, "Two identical language bodies found in XHTML-IM."); } #[test] fn test_tag() { let elem: Element = "" .parse() .unwrap(); let body = Body::try_from(elem).unwrap(); assert_eq!(body.children.len(), 0); let elem: Element = "

Hello world!

" .parse() .unwrap(); let mut body = Body::try_from(elem).unwrap(); assert_eq!(body.style.len(), 0); assert_eq!(body.xml_lang, None); assert_eq!(body.children.len(), 1); let p = match body.children.pop() { Some(Child::Tag(tag)) => tag, _ => panic!(), }; let mut children = match p { Tag::P { style, children } => { assert_eq!(style.len(), 0); assert_eq!(children.len(), 1); children } _ => panic!(), }; let text = match children.pop() { Some(Child::Text(text)) => text, _ => panic!(), }; assert_eq!(text, "Hello world!"); } #[test] fn test_unknown_element() { let elem: Element = "Hello world!" .parse() .unwrap(); let parsed = XhtmlIm::try_from(elem).unwrap(); let parsed2 = parsed.clone(); let html = parsed.to_html(); assert_eq!(html, "Hello world!"); let elem = Element::from(parsed2); assert_eq!(String::from(&elem), "Hello world!"); } #[test] fn test_generate_html() { let elem: Element = "

Hello world!

" .parse() .unwrap(); let xhtml_im = XhtmlIm::try_from(elem).unwrap(); let html = xhtml_im.to_html(); assert_eq!(html, "

Hello world!

"); let elem: Element = "

Hello world!

" .parse() .unwrap(); let xhtml_im = XhtmlIm::try_from(elem).unwrap(); let html = xhtml_im.to_html(); assert_eq!(html, "

Hello world!

"); } #[test] fn generate_tree() { let world = "world".to_string(); Body { style: vec![], xml_lang: Some("en".to_string()), children: vec![Child::Tag(Tag::P { style: vec![], children: vec![ Child::Text("Hello ".to_string()), Child::Tag(Tag::Strong { children: vec![Child::Text(world)], }), Child::Text("!".to_string()), ], })], }; } }