diff --git a/minidom/src/token.rs b/minidom/src/token.rs index 092849e..9699c50 100644 --- a/minidom/src/token.rs +++ b/minidom/src/token.rs @@ -176,6 +176,7 @@ impl Token { let (s, _) = space0(s)?; let (s, delim) = one_of("'\"")(s)?; let (s, value) = Self::parse_text(delim, s)?; + let value = Self::normalize_attribute_value(value); let (s, _) = char(delim)(s)?; Ok((s, (name, value))) } @@ -249,6 +250,16 @@ impl Token { } s } + + /// https://www.w3.org/TR/2008/REC-xml-20081126/#AVNormalize + /// + /// assumes normalize_newlines() already done + fn normalize_attribute_value(mut s: Cow) -> Cow { + if s.find("\t").is_some() || s.find("\n").is_some() { + s = Cow::from(s.replace(|c| c == '\t' || c == '\n', " ")); + } + s + } } #[cfg(test)] @@ -338,6 +349,22 @@ mod tests { ); } + #[test] + fn test_attrs_normalized() { + assert_eq!( + Ok((&b""[..], Token::StartTag { + name: "a".into(), + attrs: vec![ + attr("a", "x y"), + attr("b", " "), + attr("c", "a b"), + ], + self_closing: false, + })), + Token::parse(b"") + ); + } + #[test] fn test_attrs_entities() { assert_eq!(