6f304d197d
stringprep can make transformations to a JID, the most well-known one is making the nodepart and domainpart lowercase but it does much more than that. It is extremely common to have to validate already-normalised JIDs though, and since https://github.com/sfackler/rust-stringprep/pull/4 this is exactly what the stringprep crate does, by returning Cow::Borrowed() for common ASCII-only cases. This commit further reduces time spent by an additional -15%..-58% when already using this stringprep improvement, in addition to the 89.5%..98.5% change brought by this improvement (and +1.3% total when the JID isn’t normalised yet). For instance, my own full JID parses in 1.83 µs before these changes, 132 ns with just the stringprep optimisation, and 46 ns with also this commit, on an i7-8700K.
173 lines
5.8 KiB
Rust
173 lines
5.8 KiB
Rust
// Copyright (c) 2023 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#![deny(missing_docs)]
|
|
|
|
//! Provides a type for Jabber IDs.
|
|
//!
|
|
//! For usage, check the documentation on the `Jid` struct.
|
|
|
|
use crate::Error;
|
|
use core::num::NonZeroU16;
|
|
use memchr::memchr;
|
|
use std::borrow::Cow;
|
|
use std::str::FromStr;
|
|
use stringprep::{nameprep, nodeprep, resourceprep};
|
|
|
|
fn length_check(len: usize, error_empty: Error, error_too_long: Error) -> Result<(), Error> {
|
|
if len == 0 {
|
|
Err(error_empty)
|
|
} else if len > 1023 {
|
|
Err(error_too_long)
|
|
} else {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
pub(crate) struct InnerJid {
|
|
pub(crate) normalized: String,
|
|
pub(crate) at: Option<NonZeroU16>,
|
|
pub(crate) slash: Option<NonZeroU16>,
|
|
}
|
|
|
|
impl InnerJid {
|
|
pub(crate) fn new(unnormalized: &str) -> Result<InnerJid, Error> {
|
|
let bytes = unnormalized.as_bytes();
|
|
let mut orig_at = memchr(b'@', bytes);
|
|
let mut orig_slash = memchr(b'/', bytes);
|
|
if orig_at.is_some() && orig_slash.is_some() && orig_at > orig_slash {
|
|
// This is part of the resource, not a node@domain separator.
|
|
orig_at = None;
|
|
}
|
|
|
|
let normalized = match (orig_at, orig_slash) {
|
|
(Some(at), Some(slash)) => {
|
|
let node = nodeprep(&unnormalized[..at]).map_err(|_| Error::NodePrep)?;
|
|
length_check(node.len(), Error::NodeEmpty, Error::NodeTooLong)?;
|
|
|
|
let domain = nameprep(&unnormalized[at + 1..slash]).map_err(|_| Error::NamePrep)?;
|
|
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
|
|
|
|
let resource =
|
|
resourceprep(&unnormalized[slash + 1..]).map_err(|_| Error::ResourcePrep)?;
|
|
length_check(resource.len(), Error::ResourceEmpty, Error::ResourceTooLong)?;
|
|
|
|
orig_at = Some(node.len());
|
|
orig_slash = Some(node.len() + domain.len() + 1);
|
|
match (node, domain, resource) {
|
|
(Cow::Borrowed(_), Cow::Borrowed(_), Cow::Borrowed(_)) => {
|
|
unnormalized.to_string()
|
|
}
|
|
(node, domain, resource) => format!("{node}@{domain}/{resource}"),
|
|
}
|
|
}
|
|
(Some(at), None) => {
|
|
let node = nodeprep(&unnormalized[..at]).map_err(|_| Error::NodePrep)?;
|
|
length_check(node.len(), Error::NodeEmpty, Error::NodeTooLong)?;
|
|
|
|
let domain = nameprep(&unnormalized[at + 1..]).map_err(|_| Error::NamePrep)?;
|
|
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
|
|
|
|
orig_at = Some(node.len());
|
|
match (node, domain) {
|
|
(Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(),
|
|
(node, domain) => format!("{node}@{domain}"),
|
|
}
|
|
}
|
|
(None, Some(slash)) => {
|
|
let domain = nameprep(&unnormalized[..slash]).map_err(|_| Error::NamePrep)?;
|
|
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
|
|
|
|
let resource =
|
|
resourceprep(&unnormalized[slash + 1..]).map_err(|_| Error::ResourcePrep)?;
|
|
length_check(resource.len(), Error::ResourceEmpty, Error::ResourceTooLong)?;
|
|
|
|
orig_slash = Some(domain.len());
|
|
match (domain, resource) {
|
|
(Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(),
|
|
(domain, resource) => format!("{domain}/{resource}"),
|
|
}
|
|
}
|
|
(None, None) => {
|
|
let domain = nameprep(unnormalized).map_err(|_| Error::NamePrep)?;
|
|
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
|
|
|
|
domain.into_owned()
|
|
}
|
|
};
|
|
|
|
Ok(InnerJid {
|
|
normalized,
|
|
at: orig_at.and_then(|x| NonZeroU16::new(x as u16)),
|
|
slash: orig_slash.and_then(|x| NonZeroU16::new(x as u16)),
|
|
})
|
|
}
|
|
|
|
pub(crate) fn node(&self) -> Option<&str> {
|
|
self.at.and_then(|at| {
|
|
let at = u16::from(at) as usize;
|
|
Some(&self.normalized[..at])
|
|
})
|
|
}
|
|
|
|
pub(crate) fn domain(&self) -> &str {
|
|
match (self.at, self.slash) {
|
|
(Some(at), Some(slash)) => {
|
|
let at = u16::from(at) as usize;
|
|
let slash = u16::from(slash) as usize;
|
|
&self.normalized[at + 1..slash]
|
|
}
|
|
(Some(at), None) => {
|
|
let at = u16::from(at) as usize;
|
|
&self.normalized[at + 1..]
|
|
}
|
|
(None, Some(slash)) => {
|
|
let slash = u16::from(slash) as usize;
|
|
&self.normalized[..slash]
|
|
}
|
|
(None, None) => &self.normalized,
|
|
}
|
|
}
|
|
|
|
pub(crate) fn resource(&self) -> Option<&str> {
|
|
self.slash.and_then(|slash| {
|
|
let slash = u16::from(slash) as usize;
|
|
Some(&self.normalized[slash + 1..])
|
|
})
|
|
}
|
|
}
|
|
|
|
impl FromStr for InnerJid {
|
|
type Err = Error;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
InnerJid::new(s)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
macro_rules! assert_size (
|
|
($t:ty, $sz:expr) => (
|
|
assert_eq!(::std::mem::size_of::<$t>(), $sz);
|
|
);
|
|
);
|
|
|
|
#[cfg(target_pointer_width = "32")]
|
|
#[test]
|
|
fn test_size() {
|
|
assert_size!(InnerJid, 16);
|
|
}
|
|
|
|
#[cfg(target_pointer_width = "64")]
|
|
#[test]
|
|
fn test_size() {
|
|
assert_size!(InnerJid, 32);
|
|
}
|
|
}
|