From 6f304d197d77142d5d7adfb2ddb7fd13921ce0ee Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sat, 15 Jul 2023 19:25:14 +0200 Subject: [PATCH] jid: Optimise for no-transform JIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stringprep can make transformations to a JID, the most well-known one is making the nodepart and domainpart lowercase but it does much more than that. It is extremely common to have to validate already-normalised JIDs though, and since https://github.com/sfackler/rust-stringprep/pull/4 this is exactly what the stringprep crate does, by returning Cow::Borrowed() for common ASCII-only cases. This commit further reduces time spent by an additional -15%..-58% when already using this stringprep improvement, in addition to the 89.5%..98.5% change brought by this improvement (and +1.3% total when the JID isn’t normalised yet). For instance, my own full JID parses in 1.83 µs before these changes, 132 ns with just the stringprep optimisation, and 46 ns with also this commit, on an i7-8700K. --- jid/CHANGELOG.md | 1 + jid/Cargo.toml | 2 +- jid/src/inner.rs | 18 +++++++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/jid/CHANGELOG.md b/jid/CHANGELOG.md index 111cb8c..ef233e9 100644 --- a/jid/CHANGELOG.md +++ b/jid/CHANGELOG.md @@ -8,6 +8,7 @@ Unreleased * Additions - Parsing invalid JIDs with stringprep feature no longer results in panic, returning Error with NodePrep, NamePrep or ResourcePrep variant instead (#84) + - Parsing already-normalized JIDs with stringprep is much faster, about 20 times. - JID parts are now typed as NodePart, DomainPart and ResourcePart ; once part into those types, JID operations cannot fail - BareJid::with_resource appends a ResourcePart to a BareJid to produce a FullJid (#204) diff --git a/jid/Cargo.toml b/jid/Cargo.toml index 5758c64..63dea0d 100644 --- a/jid/Cargo.toml +++ b/jid/Cargo.toml @@ -22,4 +22,4 @@ gitlab = { repository = "xmpp-rs/xmpp-rs" } memchr = "2.5" minidom = { version = "0.15", optional = true } serde = { version = "1.0", features = ["derive"], optional = true } -stringprep = "0.1.2" +stringprep = "0.1.3" diff --git a/jid/src/inner.rs b/jid/src/inner.rs index e124b42..644e4c2 100644 --- a/jid/src/inner.rs +++ b/jid/src/inner.rs @@ -13,6 +13,7 @@ use crate::Error; use core::num::NonZeroU16; use memchr::memchr; +use std::borrow::Cow; use std::str::FromStr; use stringprep::{nameprep, nodeprep, resourceprep}; @@ -57,7 +58,12 @@ impl InnerJid { orig_at = Some(node.len()); orig_slash = Some(node.len() + domain.len() + 1); - format!("{node}@{domain}/{resource}") + match (node, domain, resource) { + (Cow::Borrowed(_), Cow::Borrowed(_), Cow::Borrowed(_)) => { + unnormalized.to_string() + } + (node, domain, resource) => format!("{node}@{domain}/{resource}"), + } } (Some(at), None) => { let node = nodeprep(&unnormalized[..at]).map_err(|_| Error::NodePrep)?; @@ -67,7 +73,10 @@ impl InnerJid { length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?; orig_at = Some(node.len()); - format!("{node}@{domain}") + match (node, domain) { + (Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(), + (node, domain) => format!("{node}@{domain}"), + } } (None, Some(slash)) => { let domain = nameprep(&unnormalized[..slash]).map_err(|_| Error::NamePrep)?; @@ -78,7 +87,10 @@ impl InnerJid { length_check(resource.len(), Error::ResourceEmpty, Error::ResourceTooLong)?; orig_slash = Some(domain.len()); - format!("{domain}/{resource}") + match (domain, resource) { + (Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(), + (domain, resource) => format!("{domain}/{resource}"), + } } (None, None) => { let domain = nameprep(unnormalized).map_err(|_| Error::NamePrep)?;