From e34a9e7d940b3d1c3a2a4f4567fb708442e2e94a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maxime=20=E2=80=9Cpep=E2=80=9D=20Buquet?= Date: Sat, 21 Oct 2023 23:52:40 +0200 Subject: [PATCH] ScanElement: Add test cases for text nodes filtering; rework comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maxime “pep” Buquet --- README.md | 3 + src/element.rs | 154 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 134 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index c41f993..4ed3825 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,9 @@ has no special treatment for this. Be sure to include namespaces in your Namespaced attributes aren't yet handled by minidom so `scansion:strict` also isn't treated in any special way. +Text nodes that aren't the unique child of an element and containing only +whitespace are considered insignificant, and removed from the comparison. + # Reporting bugs Everything that's supported by upstream should be supported by this library, diff --git a/src/element.rs b/src/element.rs index 7675653..2f41548 100644 --- a/src/element.rs +++ b/src/element.rs @@ -89,10 +89,9 @@ impl PartialEq for ScanNode { } } +// Text nodes that aren't the unique child of an element and containing only whitespace are +// considered insignificant, and removed from the comparison. fn filter_whitespace_nodes(nodes: Vec) -> Vec { - // Tags with mixed significant text and children tags aren't valid in XMPP, so we know we - // can remove these before comparing. - // XXX: ^^^^^ This isn't true. let filter_nodes = |(prev_type, mut acc): (Option, Vec), node| { let type_ = match node { Node::Text(_) => NodeType::Text, @@ -153,9 +152,9 @@ impl ScanNodes { } } -/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can -/// remove them. Text leaves are compared as is. When comparing strictly, elements must be exactly the -/// same. +/// When comparing strictly, there must be the same number of elements and these elements must be +/// exactly the same. Text nodes that aren't the unique child of an element and containing only +/// white-space are considered insignificant, and removed from the comparison. impl PartialEq> for ScanNodes { fn eq(&self, other: &Vec) -> bool { let filtered_self = filter_whitespace_nodes(self.nodes.clone()) @@ -168,9 +167,9 @@ impl PartialEq> for ScanNodes { } } -/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can -/// remove them. Text leaves are compared as is. When doing non-strict comparison, the target -/// element must have all attributes and children of the test element but it can have more. +/// When doing non-strict comparison, the target element must have all attributes and children of +/// the test element but it can have more. Text nodes that aren't the unique child of an element +/// and containing only white-space are considered insignificant, and removed from the comparison. impl PartialEq> for ScanNodes { fn eq(&self, other: &Vec) -> bool { let filtered_other = filter_whitespace_nodes(other.clone()); @@ -433,27 +432,24 @@ mod tests { } #[test] - fn compare_element_non_strict_whitespace_success() { + fn compare_element_non_strict_whitespace_failure() { let elem1: Element = "\n\t" .parse() .unwrap(); let elem2: Element = "".parse().unwrap(); let scan1 = ScanElement::new(elem1); - assert_eq!(scan1, elem2); - } - - #[test] - fn compare_element_non_strict_whitespace_failure() { - let elem1: Element = "\n\tfoo" - .parse() - .unwrap(); - let elem2: Element = "\n\tfoo\t" - .parse() - .unwrap(); - let scan1 = ScanElement::new(elem1); - assert_ne!(scan1, elem2); + + let elem3: Element = "\n\tfoo" + .parse() + .unwrap(); + let elem4: Element = "\n\tfoo\t" + .parse() + .unwrap(); + let scan2 = ScanElement::new(elem3); + + assert_ne!(scan2, elem4); } #[test] @@ -703,4 +699,116 @@ mod tests { assert_eq!(scan1, elem2); } + + #[test] + fn non_significant_text_nodes_are_filtered_non_strict() { + let elem1: Element = "\t" + .parse() + .unwrap(); + let elem2: Element = "\t\n" + .parse() + .unwrap(); + let scan1 = ScanElement::new(elem1); + + assert_eq!(scan1, elem2); + + let elem3: Element = "\n" + .parse() + .unwrap(); + let elem4: Element = "\t" + .parse() + .unwrap(); + let scan2 = ScanElement::new(elem3); + + assert_eq!(scan2, elem4); + + let elem5: Element = " \n" + .parse() + .unwrap(); + let elem6: Element = "\t" + .parse() + .unwrap(); + let scan3 = ScanElement::new(elem5); + + assert_eq!(scan3, elem6); + } + + #[test] + fn non_significant_text_nodes_are_filtered_strict() { + let elem1: Element = "\t" + .parse() + .unwrap(); + let elem2: Element = "\t\n" + .parse() + .unwrap(); + let scan1 = ScanElement::new(elem1); + + assert_eq!(scan1, elem2); + + let elem3: Element = "\n" + .parse() + .unwrap(); + let elem4: Element = "\t" + .parse() + .unwrap(); + let scan2 = ScanElement::new(elem3); + + assert_eq!(scan2, elem4); + + let elem5: Element = " \n" + .parse() + .unwrap(); + let elem6: Element = "\t" + .parse() + .unwrap(); + let scan3 = ScanElement::new(elem5); + + assert_eq!(scan3, elem6); + } + + #[test] + fn significant_text_nodes_arent_filtered_non_strict() { + let elem1: Element = "abc" + .parse() + .unwrap(); + let elem2: Element = " abc " + .parse() + .unwrap(); + let scan1 = ScanElement::new(elem1); + + assert_ne!(scan1, elem2); + + let elem3: Element = " " + .parse() + .unwrap(); + let elem4: Element = "" + .parse() + .unwrap(); + let scan2 = ScanElement::new(elem3); + + assert_ne!(scan2, elem4); + } + + #[test] + fn significant_text_nodes_arent_filtered_strict() { + let elem1: Element = "abc" + .parse() + .unwrap(); + let elem2: Element = " abc " + .parse() + .unwrap(); + let scan1 = ScanElement::new(elem1); + + assert_ne!(scan1, elem2); + + let elem3: Element = " " + .parse() + .unwrap(); + let elem4: Element = "" + .parse() + .unwrap(); + let scan2 = ScanElement::new(elem3); + + assert_ne!(scan2, elem4); + } }