xmpp-rs/jid/src/inner.rs
Emmanuel Gil Peyrot 6f304d197d jid: Optimise for no-transform JIDs
stringprep can make transformations to a JID, the most well-known one is
making the nodepart and domainpart lowercase but it does much more than
that.

It is extremely common to have to validate already-normalised JIDs
though, and since https://github.com/sfackler/rust-stringprep/pull/4
this is exactly what the stringprep crate does, by returning
Cow::Borrowed() for common ASCII-only cases.

This commit further reduces time spent by an additional -15%..-58% when
already using this stringprep improvement, in addition to the
89.5%..98.5% change brought by this improvement (and +1.3% total when
the JID isn’t normalised yet).

For instance, my own full JID parses in 1.83 µs before these changes,
132 ns with just the stringprep optimisation, and 46 ns with also this
commit, on an i7-8700K.
2023-07-16 19:50:12 +02:00

173 lines
5.8 KiB
Rust

// Copyright (c) 2023 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
#![deny(missing_docs)]
//! Provides a type for Jabber IDs.
//!
//! For usage, check the documentation on the `Jid` struct.
use crate::Error;
use core::num::NonZeroU16;
use memchr::memchr;
use std::borrow::Cow;
use std::str::FromStr;
use stringprep::{nameprep, nodeprep, resourceprep};
fn length_check(len: usize, error_empty: Error, error_too_long: Error) -> Result<(), Error> {
if len == 0 {
Err(error_empty)
} else if len > 1023 {
Err(error_too_long)
} else {
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct InnerJid {
pub(crate) normalized: String,
pub(crate) at: Option<NonZeroU16>,
pub(crate) slash: Option<NonZeroU16>,
}
impl InnerJid {
pub(crate) fn new(unnormalized: &str) -> Result<InnerJid, Error> {
let bytes = unnormalized.as_bytes();
let mut orig_at = memchr(b'@', bytes);
let mut orig_slash = memchr(b'/', bytes);
if orig_at.is_some() && orig_slash.is_some() && orig_at > orig_slash {
// This is part of the resource, not a node@domain separator.
orig_at = None;
}
let normalized = match (orig_at, orig_slash) {
(Some(at), Some(slash)) => {
let node = nodeprep(&unnormalized[..at]).map_err(|_| Error::NodePrep)?;
length_check(node.len(), Error::NodeEmpty, Error::NodeTooLong)?;
let domain = nameprep(&unnormalized[at + 1..slash]).map_err(|_| Error::NamePrep)?;
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
let resource =
resourceprep(&unnormalized[slash + 1..]).map_err(|_| Error::ResourcePrep)?;
length_check(resource.len(), Error::ResourceEmpty, Error::ResourceTooLong)?;
orig_at = Some(node.len());
orig_slash = Some(node.len() + domain.len() + 1);
match (node, domain, resource) {
(Cow::Borrowed(_), Cow::Borrowed(_), Cow::Borrowed(_)) => {
unnormalized.to_string()
}
(node, domain, resource) => format!("{node}@{domain}/{resource}"),
}
}
(Some(at), None) => {
let node = nodeprep(&unnormalized[..at]).map_err(|_| Error::NodePrep)?;
length_check(node.len(), Error::NodeEmpty, Error::NodeTooLong)?;
let domain = nameprep(&unnormalized[at + 1..]).map_err(|_| Error::NamePrep)?;
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
orig_at = Some(node.len());
match (node, domain) {
(Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(),
(node, domain) => format!("{node}@{domain}"),
}
}
(None, Some(slash)) => {
let domain = nameprep(&unnormalized[..slash]).map_err(|_| Error::NamePrep)?;
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
let resource =
resourceprep(&unnormalized[slash + 1..]).map_err(|_| Error::ResourcePrep)?;
length_check(resource.len(), Error::ResourceEmpty, Error::ResourceTooLong)?;
orig_slash = Some(domain.len());
match (domain, resource) {
(Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(),
(domain, resource) => format!("{domain}/{resource}"),
}
}
(None, None) => {
let domain = nameprep(unnormalized).map_err(|_| Error::NamePrep)?;
length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?;
domain.into_owned()
}
};
Ok(InnerJid {
normalized,
at: orig_at.and_then(|x| NonZeroU16::new(x as u16)),
slash: orig_slash.and_then(|x| NonZeroU16::new(x as u16)),
})
}
pub(crate) fn node(&self) -> Option<&str> {
self.at.and_then(|at| {
let at = u16::from(at) as usize;
Some(&self.normalized[..at])
})
}
pub(crate) fn domain(&self) -> &str {
match (self.at, self.slash) {
(Some(at), Some(slash)) => {
let at = u16::from(at) as usize;
let slash = u16::from(slash) as usize;
&self.normalized[at + 1..slash]
}
(Some(at), None) => {
let at = u16::from(at) as usize;
&self.normalized[at + 1..]
}
(None, Some(slash)) => {
let slash = u16::from(slash) as usize;
&self.normalized[..slash]
}
(None, None) => &self.normalized,
}
}
pub(crate) fn resource(&self) -> Option<&str> {
self.slash.and_then(|slash| {
let slash = u16::from(slash) as usize;
Some(&self.normalized[slash + 1..])
})
}
}
impl FromStr for InnerJid {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
InnerJid::new(s)
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! assert_size (
($t:ty, $sz:expr) => (
assert_eq!(::std::mem::size_of::<$t>(), $sz);
);
);
#[cfg(target_pointer_width = "32")]
#[test]
fn test_size() {
assert_size!(InnerJid, 16);
}
#[cfg(target_pointer_width = "64")]
#[test]
fn test_size() {
assert_size!(InnerJid, 32);
}
}