// Copyright (c) 2019 Emmanuel Gil Peyrot // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::util::error::Error; use crate::message::MessagePayload; use crate::ns; use minidom::{Element, Node}; use std::convert::TryFrom; use std::collections::HashMap; // TODO: Use a proper lang type. type Lang = String; /// Container for formatted text. #[derive(Debug, Clone)] pub struct XhtmlIm { /// Map of language to body element. bodies: HashMap, } impl XhtmlIm { /// Serialise formatted text to HTML. pub fn to_html(self) -> String { let mut html = Vec::new(); // TODO: use the best language instead. for (lang, body) in self.bodies { if lang.is_empty() { assert!(body.xml_lang.is_none()); } else { assert_eq!(Some(lang), body.xml_lang); } for tag in body.children { html.push(tag.to_html()); } break; } html.concat() } /// Removes all unknown elements. fn flatten(self) -> XhtmlIm { let mut bodies = HashMap::new(); for (lang, body) in self.bodies { let children = body.children.into_iter().fold(vec![], |mut acc, child| { match child { Child::Tag(Tag::Unknown(children)) => acc.extend(children), any => acc.push(any), } acc }); let body = Body { children, ..body }; bodies.insert(lang, body); } XhtmlIm { bodies, } } } impl MessagePayload for XhtmlIm {} impl TryFrom for XhtmlIm { type Error = Error; fn try_from(elem: Element) -> Result { check_self!(elem, "html", XHTML_IM); check_no_attributes!(elem, "html"); let mut bodies = HashMap::new(); for child in elem.children() { if child.is("body", ns::XHTML) { let child = child.clone(); let lang = match child.attr("xml:lang") { Some(lang) => lang, None => "", }.to_string(); let body = Body::try_from(child)?; match bodies.insert(lang, body) { None => (), Some(_) => return Err(Error::ParseError("Two identical language bodies found in XHTML-IM.")) } } else { return Err(Error::ParseError("Unknown element in XHTML-IM.")); } } Ok(XhtmlIm { bodies }.flatten()) } } impl From for Element { fn from(wrapper: XhtmlIm) -> Element { Element::builder("html") .ns(ns::XHTML_IM) .append(wrapper.bodies.into_iter().map(|(ref lang, ref body)| { if lang.is_empty() { assert!(body.xml_lang.is_none()); } else { assert_eq!(Some(lang), body.xml_lang.as_ref()); } Element::from(body.clone()) }).collect::>()) .build() } } #[derive(Debug, Clone)] enum Child { Tag(Tag), Text(String), } impl Child { fn to_html(self) -> String { match self { Child::Tag(tag) => tag.to_html(), Child::Text(text) => text, } } } #[derive(Debug, Clone)] struct Property { key: String, value: String, } type Css = Vec; fn get_style_string(style: Css) -> Option { let mut result = vec![]; for Property { key, value } in style { result.push(format!("{}: {}", key, value)); } if result.is_empty() { return None; } Some(result.join("; ")) } #[derive(Debug, Clone)] struct Body { style: Css, xml_lang: Option, children: Vec, } impl TryFrom for Body { type Error = Error; fn try_from(elem: Element) -> Result { let mut children = vec![]; for child in elem.nodes() { match child { Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)), Node::Text(text) => children.push(Child::Text(text.clone())), Node::Comment(_) => unimplemented!() // XXX: remove! } } Ok(Body { style: parse_css(elem.attr("style")), xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()), children }) } } impl From for Element { fn from(body: Body) -> Element { Element::builder("body") .ns(ns::XHTML) .attr("style", get_style_string(body.style)) .attr("xml:lang", body.xml_lang) .append(children_to_nodes(body.children)) .build() } } #[derive(Debug, Clone)] enum Tag { A { href: Option, style: Css, type_: Option, children: Vec }, Blockquote { style: Css, children: Vec }, Br, Cite { style: Css, children: Vec }, Em { children: Vec }, Img { src: Option, alt: Option }, // TODO: height, width, style Li { style: Css, children: Vec }, Ol { style: Css, children: Vec }, P { style: Css, children: Vec }, Span { style: Css, children: Vec }, Strong { children: Vec }, Ul { style: Css, children: Vec }, Unknown(Vec), } impl Tag { fn to_html(self) -> String { match self { Tag::A { href, style, type_, children } => { let href = write_attr(href, "href"); let style = write_attr(get_style_string(style), "style"); let type_ = write_attr(type_, "type"); format!("{}", href, style, type_, children_to_html(children)) }, Tag::Blockquote { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) }, Tag::Br => String::from("
"), Tag::Cite { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) }, Tag::Em { children } => format!("{}", children_to_html(children)), Tag::Img { src, alt } => { let src = write_attr(src, "src"); let alt = write_attr(alt, "alt"); format!("", src, alt) } Tag::Li { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Ol { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::P { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}

", style, children_to_html(children)) } Tag::Span { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Strong { children } => format!("{}", children_to_html(children)), Tag::Ul { style, children } => { let style = write_attr(get_style_string(style), "style"); format!("{}", style, children_to_html(children)) } Tag::Unknown(children) => children_to_html(children), } } } impl TryFrom for Tag { type Error = Error; fn try_from(elem: Element) -> Result { let mut children = vec![]; for child in elem.nodes() { match child { Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)), Node::Text(text) => children.push(Child::Text(text.clone())), Node::Comment(_) => unimplemented!() // XXX: remove! } } Ok(match elem.name() { "a" => Tag::A { href: elem.attr("href").map(|href| href.to_string()), style: parse_css(elem.attr("style")), type_: elem.attr("type").map(|type_| type_.to_string()), children }, "blockquote" => Tag::Blockquote { style: parse_css(elem.attr("style")), children }, "br" => Tag::Br, "cite" => Tag::Cite { style: parse_css(elem.attr("style")), children }, "em" => Tag::Em { children }, "img" => Tag::Img { src: elem.attr("src").map(|src| src.to_string()), alt: elem.attr("alt").map(|alt| alt.to_string()) }, "li" => Tag::Li { style: parse_css(elem.attr("style")), children }, "ol" => Tag::Ol { style: parse_css(elem.attr("style")), children }, "p" => Tag::P { style: parse_css(elem.attr("style")), children }, "span" => Tag::Span { style: parse_css(elem.attr("style")), children }, "strong" => Tag::Strong { children }, "ul" => Tag::Ul { style: parse_css(elem.attr("style")), children }, _ => Tag::Unknown(children), }) } } impl From for Element { fn from(tag: Tag) -> Element { let (name, attrs, children) = match tag { Tag::A { href, style, type_, children } => ("a", { let mut attrs = vec![]; if let Some(href) = href { attrs.push(("href", href)); } if let Some(style) = get_style_string(style) { attrs.push(("style", style)); } if let Some(type_) = type_ { attrs.push(("type", type_)); } attrs }, children), Tag::Blockquote { style, children } => ("blockquote", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::Br => ("br", vec![], vec![]), Tag::Cite { style, children } => ("cite", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::Em { children } => ("em", vec![], children), Tag::Img { src, alt } => { let mut attrs = vec![]; if let Some(src) = src { attrs.push(("src", src)); } if let Some(alt) = alt { attrs.push(("alt", alt)); } ("img", attrs, vec![]) }, Tag::Li { style, children } => ("li", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::Ol { style, children } => ("ol", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::P { style, children } => ("p", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::Span { style, children } => ("span", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::Strong { children } => ("strong", vec![], children), Tag::Ul { style, children } => ("ul", match get_style_string(style) { Some(style) => vec![("style", style)], None => vec![], }, children), Tag::Unknown(children) => return Element::builder("unknown").ns(ns::XHTML).append(children_to_nodes(children)).build(), }; let mut builder = Element::builder(name) .ns(ns::XHTML) .append(children_to_nodes(children)); for (key, value) in attrs { builder = builder.attr(key, value); } builder.build() } } fn children_to_nodes(children: Vec) -> Vec { children.into_iter().map(|child| match child { Child::Tag(tag) => Node::Element(Element::from(tag)), Child::Text(text) => Node::Text(text), }).collect::>() } fn children_to_html(children: Vec) -> String { children.into_iter().map(|child| child.to_html()).collect::>().concat() } fn write_attr(attr: Option, name: &str) -> String { match attr { Some(attr) => format!(" {}='{}'", name, attr), None => String::new(), } } fn parse_css(style: Option<&str>) -> Css { let mut properties = vec![]; if let Some(style) = style { // TODO: make that parser a bit more resilient to things. for part in style.split(";") { let mut part = part.splitn(2, ":").map(|a| a.to_string()).collect::>(); let key = part.pop().unwrap(); let value = part.pop().unwrap(); properties.push(Property { key, value }); } } properties } #[cfg(test)] mod tests { use super::*; #[cfg(target_pointer_width = "32")] #[test] #[ignore] fn test_size() { assert_size!(XhtmlIm, 0); assert_size!(Child, 0); assert_size!(Tag, 0); } #[cfg(target_pointer_width = "64")] #[test] fn test_size() { assert_size!(XhtmlIm, 56); assert_size!(Child, 112); assert_size!(Tag, 104); } #[test] fn test_empty() { let elem: Element = "" .parse() .unwrap(); let xhtml = XhtmlIm::try_from(elem).unwrap(); assert_eq!(xhtml.bodies.len(), 0); let elem: Element = "" .parse() .unwrap(); let xhtml = XhtmlIm::try_from(elem).unwrap(); assert_eq!(xhtml.bodies.len(), 1); let elem: Element = "" .parse() .unwrap(); let xhtml = XhtmlIm::try_from(elem).unwrap(); assert_eq!(xhtml.bodies.len(), 2); } #[test] fn invalid_two_same_langs() { let elem: Element = "" .parse() .unwrap(); let error = XhtmlIm::try_from(elem).unwrap_err(); let message = match error { Error::ParseError(string) => string, _ => panic!(), }; assert_eq!(message, "Two identical language bodies found in XHTML-IM."); } #[test] fn test_tag() { let elem: Element = "" .parse() .unwrap(); let body = Body::try_from(elem).unwrap(); assert_eq!(body.children.len(), 0); let elem: Element = "

Hello world!

" .parse() .unwrap(); let mut body = Body::try_from(elem).unwrap(); assert_eq!(body.style.len(), 0); assert_eq!(body.xml_lang, None); assert_eq!(body.children.len(), 1); let p = match body.children.pop() { Some(Child::Tag(tag)) => tag, _ => panic!(), }; let mut children = match p { Tag::P { style, children } => { assert_eq!(style.len(), 0); assert_eq!(children.len(), 1); children }, _ => panic!(), }; let text = match children.pop() { Some(Child::Text(text)) => text, _ => panic!(), }; assert_eq!(text, "Hello world!"); } #[test] fn test_unknown_element() { let elem: Element = "Hello world!" .parse() .unwrap(); let parsed = XhtmlIm::try_from(elem).unwrap(); let parsed2 = parsed.clone(); let html = parsed.to_html(); assert_eq!(html, "Hello world!"); let elem = Element::from(parsed2); assert_eq!(String::from(&elem), "Hello world!"); } #[test] fn test_generate_html() { let elem: Element = "

Hello world!

" .parse() .unwrap(); let xhtml_im = XhtmlIm::try_from(elem).unwrap(); let html = xhtml_im.to_html(); assert_eq!(html, "

Hello world!

"); let elem: Element = "

Hello world!

" .parse() .unwrap(); let xhtml_im = XhtmlIm::try_from(elem).unwrap(); let html = xhtml_im.to_html(); assert_eq!(html, "

Hello world!

"); } #[test] fn generate_tree() { let world = "world".to_string(); Body { style: vec![], xml_lang: Some("en".to_string()), children: vec![ Child::Tag(Tag::P { style: vec![], children: vec![ Child::Text("Hello ".to_string()), Child::Tag(Tag::Strong { children: vec![ Child::Text(world), ] }), Child::Text("!".to_string()), ] }), ] }; } }