ScanElement: Add test cases for text nodes filtering; rework comments

Signed-off-by: Maxime “pep” Buquet <pep@bouah.net>
2023-10-21 23:52:40 +02:00 · 2023-10-21 23:52:40 +02:00 · e34a9e7d94
commit e34a9e7d94
parent 84784e2adb
2 changed files with 134 additions and 23 deletions
--- a/README.md
+++ b/README.md
@ -71,6 +71,9 @@ has no special treatment for this. Be sure to include namespaces in your
 Namespaced attributes aren't yet handled by minidom so `scansion:strict`
 also isn't treated in any special way.
 Text nodes that aren't the unique child of an element and containing only
 whitespace are considered insignificant, and removed from the comparison.
 # Reporting bugs
 Everything that's supported by upstream should be supported by this library,
--- a/src/element.rs
+++ b/src/element.rs
@ -89,10 +89,9 @@ impl PartialEq<Node> for ScanNode {
 	}
 }
 // Text nodes that aren't the unique child of an element and containing only whitespace are
 // considered insignificant, and removed from the comparison.
 fn filter_whitespace_nodes(nodes: Vec<Node>) -> Vec<Node> {
 	// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we
 	// can remove these before comparing.
 	// XXX: ^^^^^ This isn't true.
 	let filter_nodes = |(prev_type, mut acc): (Option<NodeType>, Vec<Node>), node| {
 		let type_ = match node {
 			Node::Text(_) => NodeType::Text,
@ -153,9 +152,9 @@ impl ScanNodes<StrictComparison> {
 	}
 }
-/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can
+/// When comparing strictly, there must be the same number of elements and these elements must be
-/// remove them. Text leaves are compared as is. When comparing strictly, elements must be exactly the
+/// exactly the same. Text nodes that aren't the unique child of an element and containing only
-/// same.
+/// white-space are considered insignificant, and removed from the comparison.
 impl PartialEq<Vec<Node>> for ScanNodes<StrictComparison> {
 	fn eq(&self, other: &Vec<Node>) -> bool {
 		let filtered_self = filter_whitespace_nodes(self.nodes.clone())
@ -168,9 +167,9 @@ impl PartialEq<Vec<Node>> for ScanNodes<StrictComparison> {
 	}
 }
-/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can
+/// When doing non-strict comparison, the target element must have all attributes and children of
-/// remove them. Text leaves are compared as is. When doing non-strict comparison, the target
+/// the test element but it can have more. Text nodes that aren't the unique child of an element
-/// element must have all attributes and children of the test element but it can have more.
+/// and containing only white-space are considered insignificant, and removed from the comparison.
 impl PartialEq<Vec<Node>> for ScanNodes<NonStrictComparison> {
 	fn eq(&self, other: &Vec<Node>) -> bool {
 		let filtered_other = filter_whitespace_nodes(other.clone());
@ -433,27 +432,24 @@ mod tests {
 	}
 	#[test]
-	fn compare_element_non_strict_whitespace_success() {
+	fn compare_element_non_strict_whitespace_failure() {
 		let elem1: Element = "<presence xmlns='foo'>\n\t<foo/></presence>"
 			.parse()
 			.unwrap();
 		let elem2: Element = "<presence xmlns='foo'><foo/></presence>".parse().unwrap();
 		let scan1 = ScanElement::new(elem1);
 		assert_eq!(scan1, elem2);
 	}
 	#[test]
 	fn compare_element_non_strict_whitespace_failure() {
 		let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>\n\tfoo</presence>"
 			.parse()
 			.unwrap();
 		let elem2: Element = "<presence xmlns='foo'>\n\tfoo\t</presence>"
 			.parse()
 			.unwrap();
 		let scan1 = ScanElement::new(elem1);
 		assert_ne!(scan1, elem2);
 		let elem3: Element = "<presence scansion:strict='false' xmlns='foo'>\n\tfoo</presence>"
 			.parse()
 			.unwrap();
 		let elem4: Element = "<presence xmlns='foo'>\n\tfoo\t</presence>"
 			.parse()
 			.unwrap();
 		let scan2 = ScanElement::new(elem3);
 		assert_ne!(scan2, elem4);
 	}
 	#[test]
@ -703,4 +699,116 @@ mod tests {
 		assert_eq!(scan1, elem2);
 	}
 	#[test]
 	fn non_significant_text_nodes_are_filtered_non_strict() {
 		let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>\t<foo/></presence>"
 			.parse()
 			.unwrap();
 		let elem2: Element = "<presence xmlns='foo'>\t\n<foo/></presence>"
 			.parse()
 			.unwrap();
 		let scan1 = ScanElement::new(elem1);
 		assert_eq!(scan1, elem2);
 		let elem3: Element = "<presence scansion:strict='false' xmlns='foo'><foo/>\n</presence>"
 			.parse()
 			.unwrap();
 		let elem4: Element = "<presence xmlns='foo'><foo/>\t</presence>"
 			.parse()
 			.unwrap();
 		let scan2 = ScanElement::new(elem3);
 		assert_eq!(scan2, elem4);
 		let elem5: Element = "<presence scansion:strict='false' xmlns='foo'>  <foo/>\n</presence>"
 			.parse()
 			.unwrap();
 		let elem6: Element = "<presence xmlns='foo'><foo/>\t</presence>"
 			.parse()
 			.unwrap();
 		let scan3 = ScanElement::new(elem5);
 		assert_eq!(scan3, elem6);
 	}
 	#[test]
 	fn non_significant_text_nodes_are_filtered_strict() {
 		let elem1: Element = "<presence scansion:strict='true' xmlns='foo'>\t<foo/></presence>"
 			.parse()
 			.unwrap();
 		let elem2: Element = "<presence xmlns='foo'>\t\n<foo/></presence>"
 			.parse()
 			.unwrap();
 		let scan1 = ScanElement::new(elem1);
 		assert_eq!(scan1, elem2);
 		let elem3: Element = "<presence scansion:strict='true' xmlns='foo'><foo/>\n</presence>"
 			.parse()
 			.unwrap();
 		let elem4: Element = "<presence xmlns='foo'><foo/>\t</presence>"
 			.parse()
 			.unwrap();
 		let scan2 = ScanElement::new(elem3);
 		assert_eq!(scan2, elem4);
 		let elem5: Element = "<presence scansion:strict='true' xmlns='foo'>  <foo/>\n</presence>"
 			.parse()
 			.unwrap();
 		let elem6: Element = "<presence xmlns='foo'><foo/>\t</presence>"
 			.parse()
 			.unwrap();
 		let scan3 = ScanElement::new(elem5);
 		assert_eq!(scan3, elem6);
 	}
 	#[test]
 	fn significant_text_nodes_arent_filtered_non_strict() {
 		let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>abc</presence>"
 			.parse()
 			.unwrap();
 		let elem2: Element = "<presence xmlns='foo'> abc </presence>"
 			.parse()
 			.unwrap();
 		let scan1 = ScanElement::new(elem1);
 		assert_ne!(scan1, elem2);
 		let elem3: Element = "<presence scansion:strict='false' xmlns='foo'>  </presence>"
 			.parse()
 			.unwrap();
 		let elem4: Element = "<presence xmlns='foo'></presence>"
 			.parse()
 			.unwrap();
 		let scan2 = ScanElement::new(elem3);
 		assert_ne!(scan2, elem4);
 	}
 	#[test]
 	fn significant_text_nodes_arent_filtered_strict() {
 		let elem1: Element = "<presence scansion:strict='true' xmlns='foo'>abc</presence>"
 			.parse()
 			.unwrap();
 		let elem2: Element = "<presence xmlns='foo'> abc </presence>"
 			.parse()
 			.unwrap();
 		let scan1 = ScanElement::new(elem1);
 		assert_ne!(scan1, elem2);
 		let elem3: Element = "<presence scansion:strict='true' xmlns='foo'>  </presence>"
 			.parse()
 			.unwrap();
 		let elem4: Element = "<presence xmlns='foo'></presence>"
 			.parse()
 			.unwrap();
 		let scan2 = ScanElement::new(elem3);
 		assert_ne!(scan2, elem4);
 	}
 }