Implement scansion:strict

Signed-off-by: Maxime “pep” Buquet <pep@bouah.net>
This commit is contained in:
Maxime “pep” Buquet 2023-04-15 19:08:25 +02:00
parent e9c6d32445
commit 974a942f5a
Signed by: pep
GPG key ID: DEDA74AEECA9D0F2
5 changed files with 516 additions and 10 deletions

View file

@ -9,11 +9,9 @@ description = "Parser for the Scansion DSL"
[dependencies] [dependencies]
nom = "7.1" nom = "7.1"
jid = "0.9" jid = "0.9"
minidom = "0.15.1"
nom_locate = "4.0.0" nom_locate = "4.0.0"
[dev-dependencies]
pretty_assertions = "1.3"
# [patch.crates-io] # [patch.crates-io]
# jid = { path = "../xmpp-rs/jid" } # jid = { path = "../xmpp-rs/jid" }
# minidom = { path = "../xmpp-rs/minidom" } # minidom = { path = "../xmpp-rs/minidom" }

509
src/element.rs Normal file
View file

@ -0,0 +1,509 @@
// Copyright (c) 2023-2099 Crate Authors
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//! Scansion uses the `scansion:strict` XML attribute to change the way elements are being
//! compared.
//! <https://matthewwild.co.uk/projects/scansion/character-classes/client/#matching-rules>
//!
//! Non-strict mode:
//! - All attributes in the expected stanza must be present in the received stanza. Additional
//! attributes in the received stanza are ignored.
//! - All tags in the expected stanza must be present in the received stanza. Additional tags are
//! ignored. Order is ignored.
//!
//! Strict-mode:
//! - All attributes in the expected stanza must be in the received stanza, and vice-versa.
//! Additional attributes in the received stanza are not allowed.
//! - All tags in the expected stanza must be in the received stanza, additional tags are not
//! allowed. Order must be the same.
//!
//! For both modes:
//! - if an attribute value is `{scansion:any}` in the expected stanza, the attribute must be
//! present in the received stanza, but the value is ignored. This is useful for e.g. id attributes
//! or other attributes that may vary unpredictably.
//!
//! By default tags in the default namespace are matched using the non-strict rules, but tags with
//! their own namespace are matched using the strict rules. You can override the matching behaviour
//! for any tag by adding a `scansion:strict` attribute with a value of `true` or `false`.
//!
//!
//! ```conf
//! # By default this would match any message, by ignoring extra payloads. However we enable strict
//! # mode to ensure that it only matches a completely empty message stanza:
//!
//! Louise receives:
//! <message scansion:strict="true"/>
//! ```
use std::ops::Deref;
use std::fmt::Debug;
use std::marker::PhantomData;
use minidom::{Element, Node};
/// Namespaces used for Client entities
pub static DEFAULT_NS: &str = "jabber:client";
/// Namespace used for scansion attributes
pub static SCANSION_NS: &str = "https://matthewwild.co.uk/projects/scansion";
/// Strict Comparison marker
#[derive(Debug)]
pub struct StrictComparison;
/// Non Strict Comparison marker
#[derive(Debug)]
pub struct NonStrictComparison;
/// To be used during comparison of text nodes
#[derive(Debug, Clone, PartialEq)]
pub enum Space {
/// Keep spaces during comparison
Keep,
/// Dismiss spaces during comparison
Dismiss,
}
#[derive(Debug, Clone, PartialEq)]
enum NodeType {
Text,
Tag,
}
#[derive(Debug, Clone)]
struct ScanNode {
pub node: Node,
}
impl ScanNode {
fn new(node: Node) -> ScanNode {
ScanNode { node }
}
}
impl PartialEq<Node> for ScanNode {
fn eq(&self, other: &Node) -> bool {
println!("PartialEq<Node> for ScanNode:\n{:?}\n{:?}", self, other);
match (&self.node, other) {
(Node::Text(text1), Node::Text(text2)) => {
text1 == text2
},
(Node::Element(elem1), Node::Element(elem2)) => {
ScanElement::new(&elem1) == elem2
},
_ => false,
}
}
}
fn filter_whitespace_nodes(nodes: Vec<Node>) -> Vec<Node> {
// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we
// can remove these before comparing.
let filter_nodes = |(prev_type, mut acc): (Option<NodeType>, Vec<Node>), node| {
let type_ = match node {
Node::Text(_) => NodeType::Text,
_ => NodeType::Tag,
};
if let Some(prev_type) = prev_type {
if type_ != prev_type && type_ == NodeType::Text {
return (Some(prev_type), acc);
} else if type_ != prev_type {
acc.pop();
}
}
acc.push(node);
(Some(type_), acc)
};
let rm_empty_text = |node: &Node| {
match node {
Node::Text(text) => {
text.trim().len() != 0
},
_ => true,
}
};
let nodes = nodes
.into_iter()
.fold((None::<NodeType>, vec![]), filter_nodes).1;
println!("filter_whitespace_nodes: {:?}", nodes);
// Don't remove possibly significant whitespace text leaves
if nodes.iter().count() == 1 {
println!("Count: 1");
nodes
} else {
println!("Count: plus");
nodes
.into_iter()
.filter(rm_empty_text)
.collect()
}
}
#[derive(Debug)]
struct ScanNodes<T: Debug> {
pub nodes: Vec<Node>,
_strict: PhantomData<T>,
}
impl ScanNodes<NonStrictComparison> {
fn new(nodes: Vec<Node>) -> ScanNodes<NonStrictComparison> {
Self { nodes, _strict: PhantomData }
}
}
impl ScanNodes<StrictComparison> {
fn new_strict(nodes: Vec<Node>) -> ScanNodes<StrictComparison> {
Self { nodes, _strict: PhantomData }
}
}
/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can
/// remove them. Text leaves are compared as is. When comparing strictly, elements must be exactly the
/// same.
impl PartialEq<Vec<Node>> for ScanNodes<StrictComparison> {
fn eq(&self, other: &Vec<Node>) -> bool {
let filtered_self = filter_whitespace_nodes(self.nodes.clone())
.into_iter()
.map(ScanNode::new)
.collect::<Vec<_>>();
let filtered_other = filter_whitespace_nodes(other.clone());
filtered_self == filtered_other
}
}
/// Tags with mixed significant text and children tags aren't valid in XMPP, so we know we can
/// remove them. Text leaves are compared as is. When doing non-strict comparison, the target
/// element must have all attributes and children of the test element but it can have more.
impl PartialEq<Vec<Node>> for ScanNodes<NonStrictComparison> {
fn eq(&self, other: &Vec<Node>) -> bool {
let filtered_other = filter_whitespace_nodes(other.clone());
filter_whitespace_nodes(self.nodes.clone())
.into_iter()
// Maps nodes to their comparison result
.fold(true, |res, node| {
let scan = ScanNode::new(node);
res &&
filtered_other.iter().find(|onode| {
&&scan == onode
}).is_some()
})
}
}
/// Custom Element implementation reimplementing P.unwrap() artialEq.
///
/// Comparison between elements needs to take into accounts the `scansion:strict` attribute which
/// changes the way the comparison is done.
/// Also uses the custom ScanNode implementation.
#[derive(Debug, Clone)]
pub struct ScanElement<'a> {
elem: &'a Element,
}
impl<'a> Deref for ScanElement<'a> {
type Target = Element;
fn deref(&self) -> &Self::Target {
&self.elem
}
}
impl<'a> ScanElement<'a> {
pub fn new(elem: &'a Element) -> ScanElement {
Self { elem }
}
}
impl<'a> PartialEq<&Element> for ScanElement<'a> {
fn eq(&self, other: &&Element) -> bool {
let self_ns = self.elem.ns();
if self.elem.name() == other.name() &&
self_ns == other.ns() {
println!("FOO0: {{{}}}{}", self.elem.ns(), self.elem.name());
for attr in self.elem.attrs() {
println!("FOO1: attr: {:?}", attr);
}
let strict_attr = self.elem.attr("scansion:strict");
// Force true if scansion:strict is set or if a tag isn't in the default ns.
let strict = if let Some(val) = strict_attr {
val == "true" || val == "1"
} else {
self_ns != DEFAULT_NS
};
println!("FOO: Strict: {:?}", strict);
for (attr, val) in self.elem.attrs() {
println!("FOO: Attr: {} / {}", attr, val);
match (attr, other.attr(attr)) {
(attr, _) if attr == "scansion:strict" => continue,
(_, None) => return false,
(_, Some(oval)) if val != oval => return false,
foo => println!("FOO: {:?}", foo),
}
}
let onodes = other.nodes().cloned().collect::<Vec<_>>();
println!("FOO-2: foo2 {:?}", onodes);
// Compare attributes count
if strict {
let count = self.elem.attrs().into_iter().count();
let ocount = other.attrs().into_iter().count();
match strict_attr {
None if count != ocount => return false,
Some(_) if count != ocount + 1 => return false,
_ => (),
}
let nodes = ScanNodes::new_strict(self.elem.nodes().cloned().collect());
println!("FOO-1: foo1 {:?}", nodes);
nodes == onodes
} else {
let nodes = ScanNodes::<NonStrictComparison>::new(self.elem.nodes().cloned().collect());
println!("FOO-1: foo1 {:?}", nodes);
nodes == onodes
}
} else {
false
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
#[test]
fn compare_nodes_simple() {
let text1 = Node::Text(String::from("\t\t"));
let text2 = Node::Text(String::from("\t"));
let elem1 = Node::Element(Element::from_str("<foo xmlns='bar'/>").unwrap());
assert_eq!(
ScanNodes::new_strict(vec![elem1.clone()]),
vec![elem1.clone()],
);
assert_ne!(
ScanNodes::new_strict(vec![text1.clone()]),
vec![text2.clone()],
);
}
#[test]
fn compare_nodes_mixed() {
let text1 = Node::Text(String::from("\t\t"));
let text2 = Node::Text(String::from("\t"));
let elem1 = Node::Element(Element::from_str("<foo xmlns='bar'/>").unwrap());
assert_eq!(
ScanNodes::new_strict(vec![elem1.clone()]),
vec![elem1.clone()],
);
assert_eq!(
ScanNodes::new_strict(vec![text1.clone(), elem1.clone(), text2.clone()]),
vec![elem1.clone()],
);
assert_eq!(
ScanNodes::new_strict(vec![text1.clone(), elem1.clone()]),
vec![elem1.clone(), text2.clone()],
);
assert_eq!(
ScanNodes::new_strict(vec![elem1.clone(), text1.clone(), elem1.clone()]),
vec![elem1.clone(), elem1.clone()],
);
assert_ne!(
ScanNodes::new_strict(vec![elem1.clone(), text1.clone(), elem1.clone()]),
vec![elem1.clone()],
);
assert_ne!(
ScanNodes::new_strict(vec![Node::Text(String::from("\n\tfoo\n"))]),
vec![Node::Text(String::from("\n\tfoo"))],
);
}
#[test]
fn compare_element_non_strict_whitespace_success() {
let elem1: Element = "<presence xmlns='foo'>\n\t<foo/></presence>".parse().unwrap();
let elem2: Element = "<presence xmlns='foo'><foo/></presence>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_eq!(scan1, &elem2);
}
#[test]
fn compare_element_non_strict_whitespace_failure() {
let elem1: Element = "<presence scansion:strict='false' xmlns='foo'>\n\tfoo</presence>".parse().unwrap();
let elem2: Element = "<presence xmlns='foo'>\n\tfoo\t</presence>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_ne!(scan1, &elem2);
}
#[test]
fn compare_element_strict_attributes_success() {
let elem1: Element = "<presence xmlns='foo'/>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_eq!(scan1, &elem1);
let elem2: Element = "<presence scansion:strict='true' xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc'/>
</presence>".parse().unwrap();
println!("BAR0: {:?}", elem2);
let elem3: Element = "<presence xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc'/>
</presence>".parse().unwrap();
println!("BAR1: {:?}", elem3);
let scan2 = ScanElement::new(&elem2);
assert_eq!(scan2, &elem3);
}
#[test]
fn compare_element_strict_attributes_failure() {
let elem1: Element = "<presence xmlns='foo' foo='bar'/>".parse().unwrap();
let elem2: Element = "<presence xmlns='foo'/>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_ne!(scan1, &elem2);
}
#[test]
fn compare_element_strict_nodes_success() {
let elem1: Element = "<presence scansion:strict='true' xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc' />
<x xmlns='vcard-temp:x:update'>
<photo/>
</x>
</presence>".parse().unwrap();
// The same, minus 'scansion:strict'
let elem2: Element = "<presence xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc' />
<x xmlns='vcard-temp:x:update'>
<photo/>
</x>
</presence>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_eq!(scan1, &elem2);
}
#[test]
fn compare_element_strict_nodes_failure() {
let elem1: Element = "<presence scansion:strict='true' xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc' />
<x xmlns='vcard-temp:x:update'>
<photo/>
</x>
</presence>".parse().unwrap();
println!("BAR0: {:?}", elem1);
let elem2: Element = "<presence scansion:strict='true' xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc' />
<x xmlns='vcard-temp:x:update'/>
</presence>".parse().unwrap();
println!("BAR1: {:?}", elem2);
let scan1 = ScanElement::new(&elem1);
assert_ne!(scan1, &elem2);
}
#[test]
fn compare_element_non_strict_attributes_success() {
let elem1: Element = "<presence scansion:strict='false' xmlns='foo'/>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_eq!(scan1, &elem1);
let elem2: Element = "<presence xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc' scansion:strict='false' />
</presence>".parse().unwrap();
let elem3: Element = "<presence xmlns='jabber:client' foo='bar'>
<x xmlns='http://jabber.org/protocol/muc' baz='qxx' />
</presence>".parse().unwrap();
let scan2 = ScanElement::new(&elem2);
assert_eq!(scan2, &elem3);
}
#[test]
fn compare_element_non_strict_attributes_failure() {
let elem1: Element = "<presence scansion:strict='false' foo='bar' xmlns='foo'/>".parse().unwrap();
let elem2: Element = "<presence xmlns='foo' />".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_ne!(scan1, &elem2);
let elem2: Element = "<presence xmlns='jabber:client' foo='bar'>
<x xmlns='http://jabber.org/protocol/muc'/>
</presence>".parse().unwrap();
let elem3: Element = "<presence xmlns='jabber:client'>
<x xmlns='http://jabber.org/protocol/muc'/>
</presence>".parse().unwrap();
let scan2 = ScanElement::new(&elem2);
assert_ne!(scan2, &elem3);
}
#[test]
fn compare_element_non_strict_elem_success() {
let elem1: Element = "<presence scansion:strict='false' xmlns='foo'/>".parse().unwrap();
let elem2: Element = "<presence xmlns='foo'><foo/></presence>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_eq!(scan1, &elem2);
// 'jabber:client' is non strict by default
let elem3: Element = "<presence xmlns='jabber:client'/>".parse().unwrap();
let elem4: Element = "<presence xmlns='jabber:client'><foo/></presence>".parse().unwrap();
let scan3 = ScanElement::new(&elem3);
assert_eq!(scan3, &elem4);
}
#[test]
fn compare_element_non_strict_elem_failure() {
let elem2: Element = "<message scansion:strict='false' xmlns='jabber:client'>
<body>foo</body>
</message>".parse().unwrap();
let elem3: Element = "<message xmlns='jabber:client'>
<body>bar</body>
</message>".parse().unwrap();
let scan2 = ScanElement::new(&elem2);
assert_ne!(scan2, &elem3);
}
#[test]
fn compare_element_propagate_strictness() {
let elem1: Element = "<message scansion:strict='true' xmlns='jabber:client'>
<foo><bar /></foo>
</message>".parse().unwrap();
let elem2: Element = "<message scansion:strict='true' xmlns='jabber:client'>
<foo><bar baz='qxx' /></foo>
</message>".parse().unwrap();
let scan1 = ScanElement::new(&elem1);
assert_ne!(scan1, &elem2);
}
}

View file

@ -4,11 +4,10 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this // License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/. // file, You can obtain one at http://mozilla.org/MPL/2.0/.
mod parsers; pub mod element;
mod types; pub mod parsers;
pub mod types;
pub static DEFAULT_NS: &str = "jabber:client";
pub static SCANSION_NS: &str = "https://matthewwild.co.uk/projects/scansion";
pub use element::ScanElement;
pub use parsers::parse_spec; pub use parsers::parse_spec;
pub use types::{Action, Client, Metadata, Spec}; pub use types::{Action, Client, Metadata, Spec};

View file

@ -282,7 +282,6 @@ pub fn parse_spec(i: &str) -> Result<Spec, Token> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use pretty_assertions::assert_eq;
fn get_client(name: &str) -> Client { fn get_client(name: &str) -> Client {
Client::new( Client::new(

View file

@ -4,9 +4,10 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this // License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/. // file, You can obtain one at http://mozilla.org/MPL/2.0/.
use jid::Jid;
use std::collections::HashMap; use std::collections::HashMap;
use jid::Jid;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct Metadata { pub struct Metadata {
pub title: String, pub title: String,