From 6eb25755a3d02b5abcf4e5b14832ce3a1fed6ce6 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Mon, 2 Dec 2019 02:57:20 +0100 Subject: [PATCH] WIP: Add ICU bindings for stringprep, idna2008 and spoof checker. --- Cargo.toml | 2 + icu/Cargo.toml | 12 ++++ icu/build.rs | 5 ++ icu/src/bindings.c | 54 +++++++++++++++ icu/src/bindings.rs | 149 ++++++++++++++++++++++++++++++++++++++++++ icu/src/error.rs | 67 +++++++++++++++++++ icu/src/idna2008.rs | 69 +++++++++++++++++++ icu/src/lib.rs | 141 +++++++++++++++++++++++++++++++++++++++ icu/src/spoof.rs | 52 +++++++++++++++ icu/src/stringprep.rs | 88 +++++++++++++++++++++++++ jid/Cargo.toml | 1 + jid/src/lib.rs | 33 +++++++++- 12 files changed, 671 insertions(+), 2 deletions(-) create mode 100644 icu/Cargo.toml create mode 100644 icu/build.rs create mode 100644 icu/src/bindings.c create mode 100644 icu/src/bindings.rs create mode 100644 icu/src/error.rs create mode 100644 icu/src/idna2008.rs create mode 100644 icu/src/lib.rs create mode 100644 icu/src/spoof.rs create mode 100644 icu/src/stringprep.rs diff --git a/Cargo.toml b/Cargo.toml index a5c75e3f..7c4bd093 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = [ # alphabetically sorted + "icu", "jid", "minidom", "parsers", @@ -8,6 +9,7 @@ members = [ # alphabetically sorted ] [patch.crates-io] +icu = { path = "icu" } jid = { path = "jid" } minidom = { path = "minidom" } tokio-xmpp = { path = "tokio-xmpp" } diff --git a/icu/Cargo.toml b/icu/Cargo.toml new file mode 100644 index 00000000..119bc5b1 --- /dev/null +++ b/icu/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "icu" +version = "0.1.0" +authors = ["Emmanuel Gil Peyrot "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[build-dependencies] +cc = "1" diff --git a/icu/build.rs b/icu/build.rs new file mode 100644 index 00000000..b1f9e02c --- /dev/null +++ b/icu/build.rs @@ -0,0 +1,5 @@ +fn main() { + cc::Build::new().file("src/bindings.c").compile("bindings"); + println!("cargo:rustc-link-lib=dylib=icuuc"); + println!("cargo:rustc-link-lib=dylib=icui18n"); +} diff --git a/icu/src/bindings.c b/icu/src/bindings.c new file mode 100644 index 00000000..b9fc55a7 --- /dev/null +++ b/icu/src/bindings.c @@ -0,0 +1,54 @@ +// This file is a stupid wrapper to avoid the automated suffixing libicu is +// doing in unicode/urename.h. +// +// By default it will suffix each of its symbols with "_65" (with 65 being the +// soname), which completely messes with Rust’s binding ability. + +#include +#include +#include +#include +#include +#include +#include +#include + +const char* icu_error_code_to_name(UErrorCode code) { + return u_errorName(code); +} + +UIDNA* icu_idna_open(uint32_t options, UErrorCode* pErrorCode) { + return uidna_openUTS46(options, pErrorCode); +} + +int32_t icu_idna_name_to_ascii(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) { + return uidna_nameToASCII_UTF8(idna, name, length, dest, capacity, pInfo, pErrorCode); +} + +int32_t icu_idna_name_to_unicode(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) { + return uidna_nameToUnicodeUTF8(idna, name, length, dest, capacity, pInfo, pErrorCode); +} + +UStringPrepProfile* icu_stringprep_open(UStringPrepProfileType type, UErrorCode* status) { + return usprep_openByType(type, status); +} + +int32_t icu_stringprep_prepare(const UStringPrepProfile* prep, const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status) { + return usprep_prepare(prep, src, srcLength, dest, destCapacity, options, parseError, status); +} + +void icu_trace_set_level(UTraceLevel traceLevel) { + utrace_setLevel(traceLevel); +} + +USpoofChecker* icu_spoof_open(UErrorCode* status) { + return uspoof_open(status); +} + +void icu_spoof_set_checks(USpoofChecker* sc, int32_t checks, UErrorCode* status) { + uspoof_setChecks(sc, checks, status); +} + +int32_t icu_spoof_get_skeleton(USpoofChecker* sc, uint32_t type, const char* id, int32_t length, char* dest, int32_t destCapacity, UErrorCode* status) { + return uspoof_getSkeletonUTF8(sc, type, id, length, dest, destCapacity, status); +} diff --git a/icu/src/bindings.rs b/icu/src/bindings.rs new file mode 100644 index 00000000..e55ea100 --- /dev/null +++ b/icu/src/bindings.rs @@ -0,0 +1,149 @@ +//! Crate wrapping what we need from ICU’s C API for JIDs. +//! +//! See http://site.icu-project.org/ + +use std::os::raw::c_char; + +// From unicode/umachine.h +pub(crate) type UChar = u16; + +// From unicode/utypes.h +pub(crate) type UErrorCode = u32; +pub(crate) const U_ZERO_ERROR: UErrorCode = 0; + +pub(crate) type UStringPrepProfile = u32; +type UParseError = u32; + +// From unicode/usprep.h +pub(crate) const USPREP_DEFAULT: i32 = 0; +pub(crate) const USPREP_ALLOW_UNASSIGNED: i32 = 1; + +pub(crate) type UStringPrepProfileType = u32; +pub(crate) const USPREP_RFC3491_NAMEPREP: UStringPrepProfileType = 0; +pub(crate) const USPREP_RFC3920_NODEPREP: UStringPrepProfileType = 7; +pub(crate) const USPREP_RFC3920_RESOURCEPREP: UStringPrepProfileType = 8; +pub(crate) const USPREP_RFC4013_SASLPREP: UStringPrepProfileType = 10; + +// From unicode/utrace.h +type UTraceLevel = i32; +pub(crate) const UTRACE_VERBOSE: UTraceLevel = 9; + +// From unicode/uidna.h +#[repr(C)] +pub(crate) struct UIDNA { + _unused: [u8; 0], +} +type UBool = i8; + +#[repr(C)] +pub(crate) struct UIDNAInfo { + size: i16, + is_transitional_different: UBool, + reserved_b3: UBool, + errors: u32, + reserved_i2: i32, + reserved_i3: i32, +} + +impl UIDNAInfo { + pub(crate) fn new() -> UIDNAInfo { + assert_eq!(std::mem::size_of::(), 16); + UIDNAInfo { + size: std::mem::size_of::() as i16, + is_transitional_different: false as UBool, + reserved_b3: false as UBool, + errors: 0, + reserved_i2: 0, + reserved_i3: 0, + } + } + + // TODO: Return a String instead, or a custom error type, this is a bitflag (defined in + // uidna.h) where multiple errors can be accumulated. + pub(crate) fn get_errors(&self) -> u32 { + self.errors + } +} + +pub(crate) const UIDNA_DEFAULT: u32 = 0; +pub(crate) const UIDNA_USE_STD3_RULES: u32 = 2; + +pub(crate) type UIdnaFunction = unsafe extern "C" fn( + *const UIDNA, + *const u8, + i32, + *mut u8, + i32, + *mut UIDNAInfo, + *mut u32, +) -> i32; + +// From unicode/uspoof.h +#[repr(C)] +pub(crate) struct USpoofChecker { + _unused: [u8; 0], +} +pub(crate) const USPOOF_CONFUSABLE: i32 = 7; + +#[link(name = "bindings")] +extern "C" { + // From unicode/ustring.h + pub(crate) fn icu_error_code_to_name(code: UErrorCode) -> *const c_char; + + // From unicode/usprep.h + pub(crate) fn icu_stringprep_open( + type_: UStringPrepProfileType, + status: *mut UErrorCode, + ) -> *mut UStringPrepProfile; + pub(crate) fn icu_stringprep_prepare( + prep: *const UStringPrepProfile, + src: *const UChar, + srcLength: i32, + dest: *mut UChar, + destCapacity: i32, + options: i32, + parseError: *mut UParseError, + status: *mut UErrorCode, + ) -> i32; + + // From unicode/utrace.h + pub(crate) fn icu_trace_set_level(traceLevel: UTraceLevel); + + // From unicode/uidna.h + pub(crate) fn icu_idna_open(options: u32, pErrorCode: *mut UErrorCode) -> *mut UIDNA; + pub(crate) fn icu_idna_name_to_ascii( + idna: *const UIDNA, + name: *const u8, + length: i32, + dest: *mut u8, + capacity: i32, + pInfo: *mut UIDNAInfo, + pErrorCode: *mut UErrorCode, + ) -> i32; + pub(crate) fn icu_idna_name_to_unicode( + idna: *const UIDNA, + name: *const u8, + length: i32, + dest: *mut u8, + capacity: i32, + pInfo: *mut UIDNAInfo, + pErrorCode: *mut UErrorCode, + ) -> i32; + + // From unicode/uspoof.h + pub(crate) fn icu_spoof_open(status: *mut UErrorCode) -> *mut USpoofChecker; + pub(crate) fn icu_spoof_set_checks( + sc: *mut USpoofChecker, + checks: i32, + status: *mut UErrorCode, + ); + pub(crate) fn icu_spoof_get_skeleton( + sc: *const USpoofChecker, + type_: u32, + id: *const u8, + length: i32, + dest: *mut u8, + destCapacity: i32, + status: *mut UErrorCode, + ) -> i32; +} diff --git a/icu/src/error.rs b/icu/src/error.rs new file mode 100644 index 00000000..555ccfe8 --- /dev/null +++ b/icu/src/error.rs @@ -0,0 +1,67 @@ +//! Crate wrapping what we need from ICU’s C API for JIDs. +//! +//! See http://site.icu-project.org/ + +use crate::bindings::{icu_error_code_to_name, UErrorCode}; +use std::ffi::CStr; + +/// Errors this library can produce. +#[derive(Debug)] +pub enum Error { + /// An error produced by one of the ICU functions. + Icu(String), + + /// An error produced by one of the IDNA2008 ICU functions. + Idna(u32), + + /// Some ICU function didn’t produce a valid UTF-8 string, should never happen. + Utf8(std::string::FromUtf8Error), + + /// Some ICU function didn’t produce a valid UTF-8 string, should never happen. + Utf16(std::char::DecodeUtf16Error), + + /// Some string was too long for its profile in JID. + TooLong, +} + +impl PartialEq for Error { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Error::Icu(s1), Error::Icu(s2)) => s1 == s2, + (Error::Idna(s1), Error::Idna(s2)) => s1 == s2, + // TODO: compare by something here? + (Error::Utf8(_s1), Error::Utf8(_s2)) => true, + (Error::Utf16(_s1), Error::Utf16(_s2)) => true, + (Error::TooLong, Error::TooLong) => true, + _ => false, + } + } +} + +impl Eq for Error {} + +impl Error { + pub(crate) fn from_icu_code(err: UErrorCode) -> Error { + let ptr = unsafe { icu_error_code_to_name(err) }; + let c_str = unsafe { CStr::from_ptr(ptr) }; + Error::Icu(c_str.to_string_lossy().into_owned()) + } +} + +impl From for Error { + fn from(err: UErrorCode) -> Error { + Error::from_icu_code(err) + } +} + +impl From for Error { + fn from(err: std::string::FromUtf8Error) -> Error { + Error::Utf8(err) + } +} + +impl From for Error { + fn from(err: std::char::DecodeUtf16Error) -> Error { + Error::Utf16(err) + } +} diff --git a/icu/src/idna2008.rs b/icu/src/idna2008.rs new file mode 100644 index 00000000..117730c2 --- /dev/null +++ b/icu/src/idna2008.rs @@ -0,0 +1,69 @@ +//! Crate wrapping what we need from ICU’s C API for JIDs. +//! +//! See http://site.icu-project.org/ + +use crate::bindings::{ + icu_idna_name_to_ascii, icu_idna_name_to_unicode, icu_idna_open, UErrorCode, UIDNAInfo, + UIdnaFunction, UIDNA, U_ZERO_ERROR, +}; +use crate::error::Error; + +/// TODO: IDNA2008 support. +pub struct Idna { + inner: *mut UIDNA, +} + +impl Idna { + /// Create a new Idna struct. + pub fn new(options: u32) -> Result { + let mut err: UErrorCode = U_ZERO_ERROR; + let inner = unsafe { icu_idna_open(options, &mut err) }; + match err { + U_ZERO_ERROR => Ok(Idna { inner }), + err => Err(err), + } + } + + /// Converts a whole domain name into its ASCII form for DNS lookup. + pub fn to_ascii(&self, input: &str) -> Result { + self.idna(input, icu_idna_name_to_ascii) + } + + /// Converts a whole domain name into its Unicode form for human-readable display. + pub fn to_unicode(&self, input: &str) -> Result { + self.idna(input, icu_idna_name_to_unicode) + } + + fn idna(&self, input: &str, function: UIdnaFunction) -> Result { + if input.len() > 255 { + return Err(Error::TooLong); + } + + let mut err: UErrorCode = U_ZERO_ERROR; + let mut dest: Vec = vec![0u8; 256]; + let mut info = UIDNAInfo::new(); + let len = unsafe { + function( + self.inner, + input.as_ptr(), + input.len() as i32, + dest.as_mut_ptr(), + dest.len() as i32, + &mut info, + &mut err, + ) + }; + if err != U_ZERO_ERROR { + return Err(Error::from_icu_code(err)); + } + let errors = info.get_errors(); + if errors != 0 { + return Err(Error::Idna(errors)); + } + if len > 255 { + return Err(Error::TooLong); + } + dest.truncate(len as usize); + Ok(String::from_utf8(dest)?) + } +} diff --git a/icu/src/lib.rs b/icu/src/lib.rs new file mode 100644 index 00000000..9f301af4 --- /dev/null +++ b/icu/src/lib.rs @@ -0,0 +1,141 @@ +//! Crate wrapping what we need from ICU’s C API for JIDs. +//! +//! See http://site.icu-project.org/ + +#![deny(missing_docs)] + +mod bindings; +mod error; +mod idna2008; +mod spoof; +mod stringprep; + +use crate::bindings::{ + icu_trace_set_level, UIDNA_DEFAULT, UIDNA_USE_STD3_RULES, USPOOF_CONFUSABLE, + USPREP_RFC3491_NAMEPREP, USPREP_RFC3920_NODEPREP, USPREP_RFC3920_RESOURCEPREP, + USPREP_RFC4013_SASLPREP, UTRACE_VERBOSE, +}; +pub use crate::error::Error; +pub use crate::idna2008::Idna; +pub use crate::spoof::SpoofChecker; +pub use crate::stringprep::Stringprep; + +/// How unassigned codepoints should be handled. +pub enum Strict { + /// All codepoints should be assigned, otherwise an error will be emitted. + True, + + /// Codepoints can be unassigned. + AllowUnassigned, +} + +/// Main struct of this module, exposing the needed ICU functions to JID. +pub struct Icu { + /// Perform stringprep using the Nameprep profile. + /// + /// See [RFC3491](https://tools.ietf.org/html/rfc3491). + pub nameprep: Stringprep, + + /// Perform stringprep using the Nodeprep profile. + /// + /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A). + pub nodeprep: Stringprep, + + /// Perform stringprep using the Resourceprep profile. + /// + /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A). + pub resourceprep: Stringprep, + + /// Perform stringprep using the Saslprep profile. + /// + /// See [RFC4013](https://tools.ietf.org/html/rfc4013). + pub saslprep: Stringprep, + + /// IDNA2008 support. + /// + /// See [RFC5891](https://tools.ietf.org/html/rfc5891). + pub idna2008: Idna, + + /// Spoof checker TODO: better doc. + pub spoofchecker: SpoofChecker, +} + +impl Icu { + /// Create a new ICU struct, initialising stringprep profiles, IDNA2008, as well as a spoof + /// checker. + pub fn new() -> Result { + unsafe { icu_trace_set_level(UTRACE_VERBOSE) }; + + let nameprep = Stringprep::new(USPREP_RFC3491_NAMEPREP)?; + let nodeprep = Stringprep::new(USPREP_RFC3920_NODEPREP)?; + let resourceprep = Stringprep::new(USPREP_RFC3920_RESOURCEPREP)?; + let saslprep = Stringprep::new(USPREP_RFC4013_SASLPREP)?; + + let mut options = UIDNA_DEFAULT; + options |= UIDNA_USE_STD3_RULES; + let idna2008 = Idna::new(options)?; + + let spoofchecker = SpoofChecker::new(USPOOF_CONFUSABLE)?; + + Ok(Icu { + nameprep, + nodeprep, + resourceprep, + saslprep, + idna2008, + spoofchecker, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nameprep() { + let name = "Link"; + let icu = Icu::new().unwrap(); + let name = icu.nodeprep.stringprep(name, Strict::True).unwrap(); + assert_eq!(name, "link"); + } + + #[test] + fn resourceprep() { + let name = "Test™"; + let icu = Icu::new().unwrap(); + let name = icu + .resourceprep + .stringprep(name, Strict::AllowUnassigned) + .unwrap(); + assert_eq!(name, "TestTM"); + } + + #[test] + fn idna() { + let name = "☃.coM"; + let icu = Icu::new().unwrap(); + let name = icu.idna2008.to_ascii(name).unwrap(); + assert_eq!(name, "xn--n3h.com"); + + let name = "xn--N3H.com"; + let icu = Icu::new().unwrap(); + let name = icu.idna2008.to_unicode(name).unwrap(); + assert_eq!(name, "☃.com"); + } + + #[test] + fn spoof() { + // Non-breakable and narrow non-breakable spaces spoofing. + let name = "foo bar baz"; + let icu = Icu::new().unwrap(); + let name = icu.spoofchecker.get_skeleton(name).unwrap(); + assert_eq!(name, "foo bar baz"); + + // Cyrillic spoofing. + let name = "Неllо wоrld"; + let icu = Icu::new().unwrap(); + let name = icu.spoofchecker.get_skeleton(name).unwrap(); + assert_eq!(name, "Hello world"); + } +} diff --git a/icu/src/spoof.rs b/icu/src/spoof.rs new file mode 100644 index 00000000..d0346ab0 --- /dev/null +++ b/icu/src/spoof.rs @@ -0,0 +1,52 @@ +//! Crate wrapping what we need from ICU’s C API for JIDs. +//! +//! See http://site.icu-project.org/ + +use crate::bindings::{ + icu_spoof_get_skeleton, icu_spoof_open, icu_spoof_set_checks, UErrorCode, USpoofChecker, + U_ZERO_ERROR, +}; +use crate::error::Error; + +/// TODO: spoof checker. +pub struct SpoofChecker { + inner: *mut USpoofChecker, +} + +impl SpoofChecker { + /// Create a new SpoofChecker. + pub fn new(checks: i32) -> Result { + let mut err: UErrorCode = U_ZERO_ERROR; + let inner = unsafe { icu_spoof_open(&mut err) }; + if err != U_ZERO_ERROR { + return Err(err); + } + unsafe { icu_spoof_set_checks(inner, checks, &mut err) }; + if err != U_ZERO_ERROR { + return Err(err); + } + Ok(SpoofChecker { inner }) + } + + /// Transform a string into a skeleton for matching it with other potentially similar strings. + pub fn get_skeleton(&self, input: &str) -> Result { + let mut err: UErrorCode = U_ZERO_ERROR; + let mut dest: Vec = vec![0u8; 256]; + let len = unsafe { + icu_spoof_get_skeleton( + self.inner, + 0, + input.as_ptr(), + input.len() as i32, + dest.as_mut_ptr(), + dest.len() as i32, + &mut err, + ) + }; + if err != U_ZERO_ERROR { + return Err(Error::from_icu_code(err)); + } + dest.truncate(len as usize); + Ok(String::from_utf8(dest)?) + } +} diff --git a/icu/src/stringprep.rs b/icu/src/stringprep.rs new file mode 100644 index 00000000..8020b637 --- /dev/null +++ b/icu/src/stringprep.rs @@ -0,0 +1,88 @@ +//! Crate wrapping what we need from ICU’s C API for JIDs. +//! +//! See http://site.icu-project.org/ + +use crate::bindings::{ + icu_stringprep_open, icu_stringprep_prepare, UChar, UErrorCode, UStringPrepProfile, + UStringPrepProfileType, USPREP_ALLOW_UNASSIGNED, USPREP_DEFAULT, U_ZERO_ERROR, +}; +use crate::error::Error; +use crate::Strict; +use std::ptr::null_mut; + +/// Struct representing a given stringprep profile. +pub struct Stringprep { + inner: *mut UStringPrepProfile, +} + +impl Stringprep { + /// Create a new Stringprep struct for the given profile. + pub(crate) fn new(profile: UStringPrepProfileType) -> Result { + let mut err: UErrorCode = U_ZERO_ERROR; + let inner = unsafe { icu_stringprep_open(profile, &mut err) }; + match err { + U_ZERO_ERROR => Ok(Stringprep { inner }), + err => Err(err), + } + } + + /// Perform a stringprep operation using this profile. + /// + /// # Panics + /// Panics if ICU doesn’t return a valid UTF-16 string, which should never happen. + pub fn stringprep(&self, input: &str, strict: Strict) -> Result { + if input.len() > 1023 { + return Err(Error::TooLong); + } + + // ICU works on UTF-16 data, so convert it first. + let unprepped: Vec = input.encode_utf16().collect(); + + // Now do the actual stringprep operation. + let mut prepped: Vec = vec![0u16; 1024]; + let flags = match strict { + Strict::True => USPREP_DEFAULT, + Strict::AllowUnassigned => USPREP_ALLOW_UNASSIGNED, + }; + self.prepare(&unprepped, &mut prepped, flags)?; + + // And then convert it back to UTF-8. + let output = std::char::decode_utf16(prepped.into_iter()) + //.map(Result::unwrap) + .try_fold(Vec::new(), |mut acc, c| match c { + Ok(c) => { + acc.push(c); + Ok(acc) + } + Err(err) => Err(err), + })?; + let output: String = output.into_iter().collect(); + + if output.len() > 1023 { + return Err(Error::TooLong); + } + + Ok(output) + } + + fn prepare(&self, input: &[UChar], buf: &mut Vec, flags: i32) -> Result<(), UErrorCode> { + let mut err: UErrorCode = U_ZERO_ERROR; + let prepped_len = unsafe { + icu_stringprep_prepare( + self.inner, + input.as_ptr(), + input.len() as i32, + buf.as_mut_ptr(), + buf.len() as i32, + flags, + null_mut(), + &mut err, + ) + }; + if err != U_ZERO_ERROR { + return Err(err); + } + buf.truncate(prepped_len as usize); + Ok(()) + } +} diff --git a/jid/Cargo.toml b/jid/Cargo.toml index 16dfbedb..a8601d0f 100644 --- a/jid/Cargo.toml +++ b/jid/Cargo.toml @@ -19,5 +19,6 @@ edition = "2018" gitlab = { repository = "xmpp-rs/xmpp-rs" } [dependencies] +icu = { version = "0.1", optional = true } minidom = { version = "0.15", optional = true } serde = { version = "1.0", features = ["derive"], optional = true } diff --git a/jid/src/lib.rs b/jid/src/lib.rs index 5b4f4869..ab6b62b7 100644 --- a/jid/src/lib.rs +++ b/jid/src/lib.rs @@ -22,8 +22,11 @@ use std::str::FromStr; #[cfg(feature = "serde")] use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; +#[cfg(feature = "icu")] +use icu::{Icu, Strict}; + /// An error that signifies that a `Jid` cannot be parsed from a string. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] pub enum JidParseError { /// Happens when there is no domain, that is either the string is empty, /// starts with a /, or contains the @/ sequence. @@ -37,6 +40,10 @@ pub enum JidParseError { /// Happens when the resource is empty, that is the string ends with a /. EmptyResource, + + #[cfg(feature = "icu")] + /// TODO + IcuError(icu::Error), } impl StdError for JidParseError {} @@ -51,6 +58,8 @@ impl fmt::Display for JidParseError { JidParseError::NoResource => "no resource found in this full JID", JidParseError::EmptyNode => "nodepart empty despite the presence of a @", JidParseError::EmptyResource => "resource empty despite the presence of a /", + #[cfg(feature = "icu")] + JidParseError::IcuError(_err) => "TODO", } ) } @@ -388,7 +397,19 @@ fn _from_str(s: &str) -> Result { } else if let ParserState::Resource = state { return Err(JidParseError::EmptyResource); } - Ok((node, domain.ok_or(JidParseError::NoDomain)?, resource)) + let domain = domain.ok_or(JidParseError::NoDomain)?; + #[cfg(feature = "icu")] + let (node, domain, resource) = { + let icu = Icu::new().unwrap(); + let node = node.map(|node| icu.nodeprep(&node, Strict::AllowUnassigned).unwrap()); + let domain = icu.idna2008.to_unicode(&domain).unwrap(); + let resource = resource.map(|resource| { + icu.resourceprep(&resource, Strict::AllowUnassigned) + .unwrap() + }); + (node, domain, resource) + }; + Ok((node, domain, resource)) } impl FromStr for FullJid { @@ -905,4 +926,12 @@ mod tests { .build(); assert_eq!(elem.attr("from"), Some(String::from(bare).as_ref())); } + + #[cfg(feature = "icu")] + #[test] + fn icu_jid() { + let full = FullJid::from_str("Test@☃.coM/Test™").unwrap(); + let equiv = FullJid::new("test", "☃.com", "TestTM"); + assert_eq!(full, equiv); + } }