Example: Add jxmpp Jid corpus parser
Signed-off-by: Maxime “pep” Buquet <pep@bouah.net>
This commit is contained in:
parent
216d9c4a8d
commit
1ad3459968
2 changed files with 254 additions and 0 deletions
|
@ -23,5 +23,13 @@ icu = { version = "0.1", optional = true }
|
|||
minidom = { version = "0.15", optional = true }
|
||||
serde = { version = "1.0", features = ["derive"], optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
jid = { version = "*", features = ["stringprep"] }
|
||||
nom = "7.1.3"
|
||||
|
||||
[features]
|
||||
stringprep = ["icu"]
|
||||
|
||||
[[example]]
|
||||
name = "corpus"
|
||||
required-features = ["stringprep"]
|
||||
|
|
246
jid/examples/corpus.rs
Normal file
246
jid/examples/corpus.rs
Normal file
|
@ -0,0 +1,246 @@
|
|||
// Copyright (c) 2023 Maxime “pep” Buquet <pep@bouah.net>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Affero General Public License as published by the
|
||||
// Free Software Foundation, either version 3 of the License, or (at your
|
||||
// option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
||||
// for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! Tests with a corpus file respecting the following formats:
|
||||
//! For valid JIDs: https://github.com/igniterealtime/jxmpp/blob/master/jxmpp-strings-testframework/src/main/resources/xmpp-strings/jids/valid/main
|
||||
//! For invalid JIDs: https://github.com/igniterealtime/jxmpp/blob/master/jxmpp-strings-testframework/src/main/resources/xmpp-strings/jids/invalid/main
|
||||
|
||||
use std::env::args;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use jid::{Jid, FullJid, BareJid};
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while},
|
||||
combinator::opt,
|
||||
multi::many0,
|
||||
sequence::tuple,
|
||||
IResult,
|
||||
};
|
||||
|
||||
pub type Input<'a> = &'a str;
|
||||
pub type Output<'a> = (&'a str, &'a str, &'a str);
|
||||
|
||||
enum EntryOrComment<'a> {
|
||||
ValidJid((Input<'a>, Output<'a>)),
|
||||
InvalidJid(Option<Input<'a>>),
|
||||
Comment(&'a str),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ValidCorpus<'a> {
|
||||
pub input: Vec<Input<'a>>,
|
||||
pub output: Vec<Output<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> ValidCorpus<'a> {
|
||||
pub fn into_inner(self) -> (Vec<Input<'a>>, Vec<Output<'a>>) {
|
||||
(self.input, self.output)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct InvalidCorpus<'a> {
|
||||
pub input: Vec<Option<&'a str>>,
|
||||
}
|
||||
|
||||
// Valid Jid parsing
|
||||
|
||||
fn parse_noctrlchr(i: &str) -> IResult<&str, &str> {
|
||||
Ok(take_while(|c| c != '\u{000A}' && c != '\u{001E}' && c != '\u{001F}')(i)?)
|
||||
}
|
||||
|
||||
fn parse_commentline(i: &str) -> IResult<&str, EntryOrComment> {
|
||||
let (i, (comment, _)) = tuple((
|
||||
opt(parse_noctrlchr),
|
||||
tag("\n"),
|
||||
))(i)?;
|
||||
let comment = if let Some(comment) = comment {
|
||||
comment
|
||||
} else {
|
||||
""
|
||||
};
|
||||
Ok((i, EntryOrComment::Comment(comment)))
|
||||
}
|
||||
|
||||
fn parse_jidheader(i: &str) -> IResult<&str, ()> {
|
||||
let (i, _) = tag("jid:\n")(i)?;
|
||||
Ok((i, ()))
|
||||
}
|
||||
|
||||
fn parse_unnormalized_jid(i: &str) -> IResult<&str, &str> {
|
||||
let (i, (jid, _)) = tuple((parse_noctrlchr, tag("\u{001E}\n")))(i)?;
|
||||
Ok((i, jid))
|
||||
}
|
||||
|
||||
fn parse_normalized_jid(i: &str) -> IResult<&str, (&str, &str, &str)> {
|
||||
let (i, (node, _)) = tuple((parse_noctrlchr, tag("\u{001F}")))(i)?;
|
||||
println!("FOO5: {node:?}");
|
||||
let (i, (domain, _)) = tuple((parse_noctrlchr, tag("\u{001F}")))(i)?;
|
||||
println!("FOO6: {domain:?}\n{i:?}");
|
||||
let (i, (resource, _)) = tuple((parse_noctrlchr, tag("\u{001E}\n")))(i)?;
|
||||
println!("FOO7: {resource:?}");
|
||||
/*
|
||||
let (i, (node, _, domain, _, resource, _)) = tuple((
|
||||
parse_noctrlchr, tag("\u{001F}"),
|
||||
parse_noctrlchr, tag("\u{001F}"),
|
||||
parse_noctrlchr, tag("\u{001E}\n"),
|
||||
))(i)?;
|
||||
*/
|
||||
Ok((i, (node, domain, resource)))
|
||||
}
|
||||
|
||||
fn parse_valid_jid_entry(i: &str) -> IResult<&str, EntryOrComment> {
|
||||
let (i, header) = parse_jidheader(i)?;
|
||||
println!("FOO1: {header:?}");
|
||||
let (i, input) = parse_unnormalized_jid(i)?;
|
||||
println!("FOO2: {input:?}");
|
||||
let (i, output) = parse_normalized_jid(i)?;
|
||||
println!("FOO3: {output:?}");
|
||||
/*
|
||||
let (i, (header, input, output)) = tuple((
|
||||
parse_jidheader,
|
||||
parse_unnormalized_jid,
|
||||
parse_normalized_jid,
|
||||
))(i)?;
|
||||
*/
|
||||
Ok((i, EntryOrComment::ValidJid((input, output))))
|
||||
}
|
||||
|
||||
fn parse_valid_entry(i: &str) -> IResult<&str, Option<EntryOrComment>> {
|
||||
let (i, opt_entry) = opt(alt((parse_valid_jid_entry, parse_commentline)))(i)?;
|
||||
Ok((i, opt_entry))
|
||||
}
|
||||
|
||||
pub fn parse_valid_corpus(i: &str) -> IResult<&str, ValidCorpus> {
|
||||
let mut corp = ValidCorpus { input: vec![], output: vec![] };
|
||||
let (i, entries) = many0(parse_valid_entry)(i)?;
|
||||
for entry in entries {
|
||||
match entry {
|
||||
Some(EntryOrComment::ValidJid((input, output))) => {
|
||||
corp.input.push(input);
|
||||
corp.output.push(output);
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
Ok((i, corp))
|
||||
}
|
||||
|
||||
// Invalid Jid parsing
|
||||
|
||||
fn parse_norschr(i: &str) -> IResult<&str, &str> {
|
||||
Ok(take_while(|c| c != '\u{000A}' && c != '\u{001E}' && c != '\u{001F}')(i)?)
|
||||
}
|
||||
|
||||
fn parse_invalid_jid_header(i: &str) -> IResult<&str, ()> {
|
||||
let (i, _) = tag("invalid jid:\n")(i)?;
|
||||
Ok((i, ()))
|
||||
}
|
||||
|
||||
fn parse_invalid_jid(i: &str) -> IResult<&str, Option<&str>> {
|
||||
let (i, (jid, _)) = tuple((opt(parse_norschr), tag("\u{001E}\n")))(i)?;
|
||||
Ok((i, jid))
|
||||
}
|
||||
fn parse_invalid_jid_entry(i: &str) -> IResult<&str, EntryOrComment> {
|
||||
let (i, (_, jid)) = tuple((
|
||||
parse_invalid_jid_header,
|
||||
parse_invalid_jid,
|
||||
))(i)?;
|
||||
Ok((i, EntryOrComment::InvalidJid(jid)))
|
||||
}
|
||||
|
||||
fn parse_invalid_entry(i: &str) -> IResult<&str, Option<EntryOrComment>> {
|
||||
let (i, opt_entry) = opt(alt((parse_invalid_jid_entry, parse_commentline)))(i)?;
|
||||
Ok((i, opt_entry))
|
||||
}
|
||||
|
||||
pub fn parse_invalid_corpus(i: &str) -> IResult<&str, InvalidCorpus> {
|
||||
let mut corp = InvalidCorpus { input: vec![] };
|
||||
let (i, entries) = many0(parse_invalid_entry)(i)?;
|
||||
for entry in entries {
|
||||
match entry {
|
||||
Some(EntryOrComment::InvalidJid(input)) => {
|
||||
corp.input.push(input);
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
Ok((i, corp))
|
||||
}
|
||||
|
||||
fn main() -> io::Result<()> {
|
||||
let args: Vec<String> = args().collect();
|
||||
if args.len() != 3 {
|
||||
println!("Usage: {} <valid-file-path> <invalid-file-path>", args[0]);
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"Invalid argument count",
|
||||
));
|
||||
}
|
||||
|
||||
let valid_path = Path::new(&args[1]);
|
||||
let invalid_path = Path::new(&args[2]);
|
||||
if !valid_path.exists() || !invalid_path.exists() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"At least one specified file doesn't exist",
|
||||
));
|
||||
}
|
||||
|
||||
let mut file = File::open(valid_path)?;
|
||||
let mut buf = String::new();
|
||||
file.read_to_string(&mut buf)?;
|
||||
|
||||
let parsed_corpus = parse_valid_corpus(&buf);
|
||||
if let Ok((_, corpus)) = parsed_corpus {
|
||||
let (inputs, outputs) = corpus.into_inner();
|
||||
let iter = inputs.into_iter().zip(outputs.into_iter());
|
||||
for (input, (onode, odomain, oresource)) in iter {
|
||||
println!("INPUT: {:?}", input);
|
||||
let mut success = true;
|
||||
match Jid::from_str(input) {
|
||||
Ok(Jid::Full(FullJid { node, domain, resource })) => {
|
||||
if !node.as_ref().map(|s| s == onode).unwrap_or_else(|| onode.len() == 0) ||
|
||||
domain != odomain ||
|
||||
resource != oresource {
|
||||
success = false;
|
||||
}
|
||||
},
|
||||
Ok(Jid::Bare(BareJid { node, domain })) => {
|
||||
if !node.as_ref().map(|s| s == onode).unwrap_or_else(|| onode.len() == 0) ||
|
||||
domain != odomain {
|
||||
success = false;
|
||||
}
|
||||
},
|
||||
_ => success = false,
|
||||
}
|
||||
if success {
|
||||
println!(": \x1b[32m OK\x1b[0m\n");
|
||||
} else {
|
||||
println!(": \x1b[31mERR\x1b[0m\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("Couldn't parse valid corpus file: {parsed_corpus:?}"),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in a new issue