ScanElement: Add test cases for text nodes filtering; rework comments

Signed-off-by: Maxime “pep” Buquet <pep@bouah.net>
This commit is contained in:
Maxime “pep” Buquet 2023-10-21 23:52:40 +02:00
parent 84784e2adb
commit e34a9e7d94
Signed by: pep
GPG key ID: DEDA74AEECA9D0F2
2 changed files with 134 additions and 23 deletions

View file

@ -71,6 +71,9 @@ has no special treatment for this. Be sure to include namespaces in your
Namespaced attributes aren't yet handled by minidom so `scansion:strict` Namespaced attributes aren't yet handled by minidom so `scansion:strict`
also isn't treated in any special way. also isn't treated in any special way.
Text nodes that aren't the unique child of an element and containing only
whitespace are considered insignificant, and removed from the comparison.
# Reporting bugs # Reporting bugs
Everything that's supported by upstream should be supported by this library, Everything that's supported by upstream should be supported by this library,

View file

@ -89,10 +89,9 @@ impl PartialEq<Node> for ScanNode {
} }
} }
// Text nodes that aren't the unique child of an element and containing only whitespace are
// considered insignificant, and removed from the comparison.
fn filter_whitespace_nodes(nodes: Vec<Node>) -> Vec<Node> { fn filter_whitespace_nodes(nodes: Vec<Node>) -> Vec<Node> {
// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we
// can remove these before comparing.
// XXX: ^^^^^ This isn't true.
let filter_nodes = |(prev_type, mut acc): (Option<NodeType>, Vec<Node>), node| { let filter_nodes = |(prev_type, mut acc): (Option<NodeType>, Vec<Node>), node| {
let type_ = match node { let type_ = match node {
Node::Text(_) => NodeType::Text, Node::Text(_) => NodeType::Text,
@ -153,9 +152,9 @@ impl ScanNodes<StrictComparison> {
} }
} }
/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can /// When comparing strictly, there must be the same number of elements and these elements must be
/// remove them. Text leaves are compared as is. When comparing strictly, elements must be exactly the /// exactly the same. Text nodes that aren't the unique child of an element and containing only
/// same. /// white-space are considered insignificant, and removed from the comparison.
impl PartialEq<Vec<Node>> for ScanNodes<StrictComparison> { impl PartialEq<Vec<Node>> for ScanNodes<StrictComparison> {
fn eq(&self, other: &Vec<Node>) -> bool { fn eq(&self, other: &Vec<Node>) -> bool {
let filtered_self = filter_whitespace_nodes(self.nodes.clone()) let filtered_self = filter_whitespace_nodes(self.nodes.clone())
@ -168,9 +167,9 @@ impl PartialEq<Vec<Node>> for ScanNodes<StrictComparison> {
} }
} }
/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can /// When doing non-strict comparison, the target element must have all attributes and children of
/// remove them. Text leaves are compared as is. When doing non-strict comparison, the target /// the test element but it can have more. Text nodes that aren't the unique child of an element
/// element must have all attributes and children of the test element but it can have more. /// and containing only white-space are considered insignificant, and removed from the comparison.
impl PartialEq<Vec<Node>> for ScanNodes<NonStrictComparison> { impl PartialEq<Vec<Node>> for ScanNodes<NonStrictComparison> {
fn eq(&self, other: &Vec<Node>) -> bool { fn eq(&self, other: &Vec<Node>) -> bool {
let filtered_other = filter_whitespace_nodes(other.clone()); let filtered_other = filter_whitespace_nodes(other.clone());
@ -433,27 +432,24 @@ mod tests {
} }
#[test] #[test]
fn compare_element_non_strict_whitespace_success() { fn compare_element_non_strict_whitespace_failure() {
let elem1: Element = "<presence xmlns='foo'>\n\t<foo/></presence>" let elem1: Element = "<presence xmlns='foo'>\n\t<foo/></presence>"
.parse() .parse()
.unwrap(); .unwrap();
let elem2: Element = "<presence xmlns='foo'><foo/></presence>".parse().unwrap(); let elem2: Element = "<presence xmlns='foo'><foo/></presence>".parse().unwrap();
let scan1 = ScanElement::new(elem1); let scan1 = ScanElement::new(elem1);
assert_eq!(scan1, elem2);
}
#[test]
fn compare_element_non_strict_whitespace_failure() {
let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>\n\tfoo</presence>"
.parse()
.unwrap();
let elem2: Element = "<presence xmlns='foo'>\n\tfoo\t</presence>"
.parse()
.unwrap();
let scan1 = ScanElement::new(elem1);
assert_ne!(scan1, elem2); assert_ne!(scan1, elem2);
let elem3: Element = "<presence scansion:strict='false' xmlns='foo'>\n\tfoo</presence>"
.parse()
.unwrap();
let elem4: Element = "<presence xmlns='foo'>\n\tfoo\t</presence>"
.parse()
.unwrap();
let scan2 = ScanElement::new(elem3);
assert_ne!(scan2, elem4);
} }
#[test] #[test]
@ -703,4 +699,116 @@ mod tests {
assert_eq!(scan1, elem2); assert_eq!(scan1, elem2);
} }
#[test]
fn non_significant_text_nodes_are_filtered_non_strict() {
let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>\t<foo/></presence>"
.parse()
.unwrap();
let elem2: Element = "<presence xmlns='foo'>\t\n<foo/></presence>"
.parse()
.unwrap();
let scan1 = ScanElement::new(elem1);
assert_eq!(scan1, elem2);
let elem3: Element = "<presence scansion:strict='false' xmlns='foo'><foo/>\n</presence>"
.parse()
.unwrap();
let elem4: Element = "<presence xmlns='foo'><foo/>\t</presence>"
.parse()
.unwrap();
let scan2 = ScanElement::new(elem3);
assert_eq!(scan2, elem4);
let elem5: Element = "<presence scansion:strict='false' xmlns='foo'> <foo/>\n</presence>"
.parse()
.unwrap();
let elem6: Element = "<presence xmlns='foo'><foo/>\t</presence>"
.parse()
.unwrap();
let scan3 = ScanElement::new(elem5);
assert_eq!(scan3, elem6);
}
#[test]
fn non_significant_text_nodes_are_filtered_strict() {
let elem1: Element = "<presence scansion:strict='true' xmlns='foo'>\t<foo/></presence>"
.parse()
.unwrap();
let elem2: Element = "<presence xmlns='foo'>\t\n<foo/></presence>"
.parse()
.unwrap();
let scan1 = ScanElement::new(elem1);
assert_eq!(scan1, elem2);
let elem3: Element = "<presence scansion:strict='true' xmlns='foo'><foo/>\n</presence>"
.parse()
.unwrap();
let elem4: Element = "<presence xmlns='foo'><foo/>\t</presence>"
.parse()
.unwrap();
let scan2 = ScanElement::new(elem3);
assert_eq!(scan2, elem4);
let elem5: Element = "<presence scansion:strict='true' xmlns='foo'> <foo/>\n</presence>"
.parse()
.unwrap();
let elem6: Element = "<presence xmlns='foo'><foo/>\t</presence>"
.parse()
.unwrap();
let scan3 = ScanElement::new(elem5);
assert_eq!(scan3, elem6);
}
#[test]
fn significant_text_nodes_arent_filtered_non_strict() {
let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>abc</presence>"
.parse()
.unwrap();
let elem2: Element = "<presence xmlns='foo'> abc </presence>"
.parse()
.unwrap();
let scan1 = ScanElement::new(elem1);
assert_ne!(scan1, elem2);
let elem3: Element = "<presence scansion:strict='false' xmlns='foo'> </presence>"
.parse()
.unwrap();
let elem4: Element = "<presence xmlns='foo'></presence>"
.parse()
.unwrap();
let scan2 = ScanElement::new(elem3);
assert_ne!(scan2, elem4);
}
#[test]
fn significant_text_nodes_arent_filtered_strict() {
let elem1: Element = "<presence scansion:strict='true' xmlns='foo'>abc</presence>"
.parse()
.unwrap();
let elem2: Element = "<presence xmlns='foo'> abc </presence>"
.parse()
.unwrap();
let scan1 = ScanElement::new(elem1);
assert_ne!(scan1, elem2);
let elem3: Element = "<presence scansion:strict='true' xmlns='foo'> </presence>"
.parse()
.unwrap();
let elem4: Element = "<presence xmlns='foo'></presence>"
.parse()
.unwrap();
let scan2 = ScanElement::new(elem3);
assert_ne!(scan2, elem4);
}
} }