diff --git a/Cargo.toml b/Cargo.toml index 7c4bd093..a5c75e3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,5 @@ [workspace] members = [ # alphabetically sorted - "icu", "jid", "minidom", "parsers", @@ -9,7 +8,6 @@ members = [ # alphabetically sorted ] [patch.crates-io] -icu = { path = "icu" } jid = { path = "jid" } minidom = { path = "minidom" } tokio-xmpp = { path = "tokio-xmpp" } diff --git a/icu/Cargo.toml b/icu/Cargo.toml deleted file mode 100644 index 119bc5b1..00000000 --- a/icu/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "icu" -version = "0.1.0" -authors = ["Emmanuel Gil Peyrot "] -edition = "2018" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] - -[build-dependencies] -cc = "1" diff --git a/icu/build.rs b/icu/build.rs deleted file mode 100644 index b1f9e02c..00000000 --- a/icu/build.rs +++ /dev/null @@ -1,5 +0,0 @@ -fn main() { - cc::Build::new().file("src/bindings.c").compile("bindings"); - println!("cargo:rustc-link-lib=dylib=icuuc"); - println!("cargo:rustc-link-lib=dylib=icui18n"); -} diff --git a/icu/src/bindings.c b/icu/src/bindings.c deleted file mode 100644 index b9fc55a7..00000000 --- a/icu/src/bindings.c +++ /dev/null @@ -1,54 +0,0 @@ -// This file is a stupid wrapper to avoid the automated suffixing libicu is -// doing in unicode/urename.h. -// -// By default it will suffix each of its symbols with "_65" (with 65 being the -// soname), which completely messes with Rust’s binding ability. - -#include -#include -#include -#include -#include -#include -#include -#include - -const char* icu_error_code_to_name(UErrorCode code) { - return u_errorName(code); -} - -UIDNA* icu_idna_open(uint32_t options, UErrorCode* pErrorCode) { - return uidna_openUTS46(options, pErrorCode); -} - -int32_t icu_idna_name_to_ascii(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) { - return uidna_nameToASCII_UTF8(idna, name, length, dest, capacity, pInfo, pErrorCode); -} - -int32_t icu_idna_name_to_unicode(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) { - return uidna_nameToUnicodeUTF8(idna, name, length, dest, capacity, pInfo, pErrorCode); -} - -UStringPrepProfile* icu_stringprep_open(UStringPrepProfileType type, UErrorCode* status) { - return usprep_openByType(type, status); -} - -int32_t icu_stringprep_prepare(const UStringPrepProfile* prep, const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status) { - return usprep_prepare(prep, src, srcLength, dest, destCapacity, options, parseError, status); -} - -void icu_trace_set_level(UTraceLevel traceLevel) { - utrace_setLevel(traceLevel); -} - -USpoofChecker* icu_spoof_open(UErrorCode* status) { - return uspoof_open(status); -} - -void icu_spoof_set_checks(USpoofChecker* sc, int32_t checks, UErrorCode* status) { - uspoof_setChecks(sc, checks, status); -} - -int32_t icu_spoof_get_skeleton(USpoofChecker* sc, uint32_t type, const char* id, int32_t length, char* dest, int32_t destCapacity, UErrorCode* status) { - return uspoof_getSkeletonUTF8(sc, type, id, length, dest, destCapacity, status); -} diff --git a/icu/src/bindings.rs b/icu/src/bindings.rs deleted file mode 100644 index d306e442..00000000 --- a/icu/src/bindings.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! Crate wrapping what we need from ICU’s C API for JIDs. -//! -//! See - -use std::os::raw::c_char; - -// From unicode/umachine.h -pub(crate) type UChar = u16; - -// From unicode/utypes.h -pub(crate) type UErrorCode = u32; -pub(crate) const U_ZERO_ERROR: UErrorCode = 0; - -pub(crate) type UStringPrepProfile = u32; -type UParseError = u32; - -// From unicode/usprep.h -pub(crate) const USPREP_DEFAULT: i32 = 0; -pub(crate) const USPREP_ALLOW_UNASSIGNED: i32 = 1; - -pub(crate) type UStringPrepProfileType = u32; -pub(crate) const USPREP_RFC3491_NAMEPREP: UStringPrepProfileType = 0; -pub(crate) const USPREP_RFC3920_NODEPREP: UStringPrepProfileType = 7; -pub(crate) const USPREP_RFC3920_RESOURCEPREP: UStringPrepProfileType = 8; -pub(crate) const USPREP_RFC4013_SASLPREP: UStringPrepProfileType = 10; - -// From unicode/utrace.h -type UTraceLevel = i32; -pub(crate) const UTRACE_VERBOSE: UTraceLevel = 9; - -// From unicode/uidna.h -#[repr(C)] -pub(crate) struct UIDNA { - _unused: [u8; 0], -} -type UBool = i8; - -#[repr(C)] -pub(crate) struct UIDNAInfo { - size: i16, - is_transitional_different: UBool, - reserved_b3: UBool, - errors: u32, - reserved_i2: i32, - reserved_i3: i32, -} - -impl UIDNAInfo { - pub(crate) fn new() -> UIDNAInfo { - assert_eq!(std::mem::size_of::(), 16); - UIDNAInfo { - size: std::mem::size_of::() as i16, - is_transitional_different: false as UBool, - reserved_b3: false as UBool, - errors: 0, - reserved_i2: 0, - reserved_i3: 0, - } - } - - // TODO: Return a String instead, or a custom error type, this is a bitflag (defined in - // uidna.h) where multiple errors can be accumulated. - pub(crate) fn get_errors(&self) -> u32 { - self.errors - } -} - -pub(crate) const UIDNA_DEFAULT: u32 = 0; -pub(crate) const UIDNA_USE_STD3_RULES: u32 = 2; - -pub(crate) type UIdnaFunction = unsafe extern "C" fn( - *const UIDNA, - *const u8, - i32, - *mut u8, - i32, - *mut UIDNAInfo, - *mut u32, -) -> i32; - -// From unicode/uspoof.h -#[repr(C)] -pub(crate) struct USpoofChecker { - _unused: [u8; 0], -} -pub(crate) const USPOOF_CONFUSABLE: i32 = 7; - -#[link(name = "bindings")] -extern "C" { - // From unicode/ustring.h - pub(crate) fn icu_error_code_to_name(code: UErrorCode) -> *const c_char; - - // From unicode/usprep.h - pub(crate) fn icu_stringprep_open( - type_: UStringPrepProfileType, - status: *mut UErrorCode, - ) -> *mut UStringPrepProfile; - pub(crate) fn icu_stringprep_prepare( - prep: *const UStringPrepProfile, - src: *const UChar, - srcLength: i32, - dest: *mut UChar, - destCapacity: i32, - options: i32, - parseError: *mut UParseError, - status: *mut UErrorCode, - ) -> i32; - - // From unicode/utrace.h - pub(crate) fn icu_trace_set_level(traceLevel: UTraceLevel); - - // From unicode/uidna.h - pub(crate) fn icu_idna_open(options: u32, pErrorCode: *mut UErrorCode) -> *mut UIDNA; - pub(crate) fn icu_idna_name_to_ascii( - idna: *const UIDNA, - name: *const u8, - length: i32, - dest: *mut u8, - capacity: i32, - pInfo: *mut UIDNAInfo, - pErrorCode: *mut UErrorCode, - ) -> i32; - pub(crate) fn icu_idna_name_to_unicode( - idna: *const UIDNA, - name: *const u8, - length: i32, - dest: *mut u8, - capacity: i32, - pInfo: *mut UIDNAInfo, - pErrorCode: *mut UErrorCode, - ) -> i32; - - // From unicode/uspoof.h - pub(crate) fn icu_spoof_open(status: *mut UErrorCode) -> *mut USpoofChecker; - pub(crate) fn icu_spoof_set_checks( - sc: *mut USpoofChecker, - checks: i32, - status: *mut UErrorCode, - ); - pub(crate) fn icu_spoof_get_skeleton( - sc: *const USpoofChecker, - type_: u32, - id: *const u8, - length: i32, - dest: *mut u8, - destCapacity: i32, - status: *mut UErrorCode, - ) -> i32; -} diff --git a/icu/src/error.rs b/icu/src/error.rs deleted file mode 100644 index af7f42e7..00000000 --- a/icu/src/error.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Crate wrapping what we need from ICU’s C API for JIDs. -//! -//! See - -use crate::bindings::{icu_error_code_to_name, UErrorCode}; -use std::ffi::CStr; - -/// Errors this library can produce. -#[derive(Debug, PartialEq, Eq)] -pub enum Error { - /// An error produced by one of the ICU functions. - Icu(String), - - /// An error produced by one of the IDNA2008 ICU functions. - Idna(u32), - - /// Some ICU function didn’t produce a valid UTF-8 string, should never happen. - Utf8(std::string::FromUtf8Error), - - /// Some ICU function didn’t produce a valid UTF-8 string, should never happen. - Utf16(std::char::DecodeUtf16Error), - - /// Some string was too long for its profile in JID. - TooLong, -} - -impl Error { - pub(crate) fn from_icu_code(err: UErrorCode) -> Error { - let ptr = unsafe { icu_error_code_to_name(err) }; - let c_str = unsafe { CStr::from_ptr(ptr) }; - Error::Icu(c_str.to_string_lossy().into_owned()) - } -} - -impl From for Error { - fn from(err: UErrorCode) -> Error { - Error::from_icu_code(err) - } -} - -impl From for Error { - fn from(err: std::string::FromUtf8Error) -> Error { - Error::Utf8(err) - } -} - -impl From for Error { - fn from(err: std::char::DecodeUtf16Error) -> Error { - Error::Utf16(err) - } -} diff --git a/icu/src/idna2008.rs b/icu/src/idna2008.rs deleted file mode 100644 index 04798b59..00000000 --- a/icu/src/idna2008.rs +++ /dev/null @@ -1,69 +0,0 @@ -//! Crate wrapping what we need from ICU’s C API for JIDs. -//! -//! See - -use crate::bindings::{ - icu_idna_name_to_ascii, icu_idna_name_to_unicode, icu_idna_open, UErrorCode, UIDNAInfo, - UIdnaFunction, UIDNA, U_ZERO_ERROR, -}; -use crate::error::Error; - -/// TODO: IDNA2008 support. -pub struct Idna { - inner: *mut UIDNA, -} - -impl Idna { - /// Create a new Idna struct. - pub fn new(options: u32) -> Result { - let mut err: UErrorCode = U_ZERO_ERROR; - let inner = unsafe { icu_idna_open(options, &mut err) }; - match err { - U_ZERO_ERROR => Ok(Idna { inner }), - err => Err(err), - } - } - - /// Converts a whole domain name into its ASCII form for DNS lookup. - pub fn to_ascii(&self, input: &str) -> Result { - self.idna(input, icu_idna_name_to_ascii) - } - - /// Converts a whole domain name into its Unicode form for human-readable display. - pub fn to_unicode(&self, input: &str) -> Result { - self.idna(input, icu_idna_name_to_unicode) - } - - fn idna(&self, input: &str, function: UIdnaFunction) -> Result { - if input.len() > 255 { - return Err(Error::TooLong); - } - - let mut err: UErrorCode = U_ZERO_ERROR; - let mut dest: Vec = vec![0u8; 256]; - let mut info = UIDNAInfo::new(); - let len = unsafe { - function( - self.inner, - input.as_ptr(), - input.len() as i32, - dest.as_mut_ptr(), - dest.len() as i32, - &mut info, - &mut err, - ) - }; - if err != U_ZERO_ERROR { - return Err(Error::from_icu_code(err)); - } - let errors = info.get_errors(); - if errors != 0 { - return Err(Error::Idna(errors)); - } - if len > 255 { - return Err(Error::TooLong); - } - dest.truncate(len as usize); - Ok(String::from_utf8(dest)?) - } -} diff --git a/icu/src/lib.rs b/icu/src/lib.rs deleted file mode 100644 index 31afb5e0..00000000 --- a/icu/src/lib.rs +++ /dev/null @@ -1,154 +0,0 @@ -//! Crate wrapping what we need from ICU’s C API for JIDs. -//! -//! See - -#![deny(missing_docs)] - -mod bindings; -mod error; -mod idna2008; -mod spoof; -mod stringprep; - -use crate::bindings::{ - icu_trace_set_level, UIDNA_DEFAULT, UIDNA_USE_STD3_RULES, USPOOF_CONFUSABLE, - USPREP_RFC3491_NAMEPREP, USPREP_RFC3920_NODEPREP, USPREP_RFC3920_RESOURCEPREP, - USPREP_RFC4013_SASLPREP, UTRACE_VERBOSE, -}; -pub use crate::error::Error; -pub use crate::idna2008::Idna; -pub use crate::spoof::SpoofChecker; -use crate::stringprep::Stringprep; - -/// How unassigned codepoints should be handled. -pub enum Strict { - /// All codepoints should be assigned, otherwise an error will be emitted. - True, - - /// Codepoints can be unassigned. - AllowUnassigned, -} - -/// Main struct of this module, exposing the needed ICU functions to JID. -pub struct Icu { - nameprep: Stringprep, - nodeprep: Stringprep, - resourceprep: Stringprep, - saslprep: Stringprep, - - /// IDNA2008 support. - /// - /// See [RFC5891](https://tools.ietf.org/html/rfc5891). - pub idna2008: Idna, - - /// Spoof checker TODO: better doc. - pub spoofchecker: SpoofChecker, -} - -impl Icu { - /// Create a new ICU struct, initialising stringprep profiles, IDNA2008, as well as a spoof - /// checker. - pub fn new() -> Result { - unsafe { icu_trace_set_level(UTRACE_VERBOSE) }; - - let nameprep = Stringprep::new(USPREP_RFC3491_NAMEPREP)?; - let nodeprep = Stringprep::new(USPREP_RFC3920_NODEPREP)?; - let resourceprep = Stringprep::new(USPREP_RFC3920_RESOURCEPREP)?; - let saslprep = Stringprep::new(USPREP_RFC4013_SASLPREP)?; - - let mut options = UIDNA_DEFAULT; - options |= UIDNA_USE_STD3_RULES; - let idna2008 = Idna::new(options)?; - - let spoofchecker = SpoofChecker::new(USPOOF_CONFUSABLE)?; - - Ok(Icu { - nameprep, - nodeprep, - resourceprep, - saslprep, - idna2008, - spoofchecker, - }) - } - - /// Perform stringprep using the Nameprep profile. - /// - /// See [RFC3491](https://tools.ietf.org/html/rfc3491). - pub fn nameprep(&self, string: &str, strict: Strict) -> Result { - self.nameprep.stringprep(string, strict) - } - - /// Perform stringprep using the Nodeprep profile. - /// - /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A). - pub fn nodeprep(&self, string: &str, strict: Strict) -> Result { - self.nodeprep.stringprep(string, strict) - } - - /// Perform stringprep using the Resourceprep profile. - /// - /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A). - pub fn resourceprep(&self, string: &str, strict: Strict) -> Result { - self.resourceprep.stringprep(string, strict) - } - - /// Perform stringprep using the Saslprep profile. - /// - /// See [RFC4013](https://tools.ietf.org/html/rfc4013). - pub fn saslprep(&self, string: &str, strict: Strict) -> Result { - self.saslprep.stringprep(string, strict) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn nameprep() { - let name = "Link"; - let icu = Icu::new().unwrap(); - let name = icu.nodeprep.stringprep(name, Strict::True).unwrap(); - assert_eq!(name, "link"); - } - - #[test] - fn resourceprep() { - let name = "Test™"; - let icu = Icu::new().unwrap(); - let name = icu - .resourceprep - .stringprep(name, Strict::AllowUnassigned) - .unwrap(); - assert_eq!(name, "TestTM"); - } - - #[test] - fn idna() { - let name = "☃.coM"; - let icu = Icu::new().unwrap(); - let name = icu.idna2008.to_ascii(name).unwrap(); - assert_eq!(name, "xn--n3h.com"); - - let name = "xn--N3H.com"; - let icu = Icu::new().unwrap(); - let name = icu.idna2008.to_unicode(name).unwrap(); - assert_eq!(name, "☃.com"); - } - - #[test] - fn spoof() { - // Non-breakable and narrow non-breakable spaces spoofing. - let name = "foo bar baz"; - let icu = Icu::new().unwrap(); - let name = icu.spoofchecker.get_skeleton(name).unwrap(); - assert_eq!(name, "foo bar baz"); - - // Cyrillic spoofing. - let name = "Неllо wоrld"; - let icu = Icu::new().unwrap(); - let name = icu.spoofchecker.get_skeleton(name).unwrap(); - assert_eq!(name, "Hello world"); - } -} diff --git a/icu/src/spoof.rs b/icu/src/spoof.rs deleted file mode 100644 index 80d22d40..00000000 --- a/icu/src/spoof.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! Crate wrapping what we need from ICU’s C API for JIDs. -//! -//! See - -use crate::bindings::{ - icu_spoof_get_skeleton, icu_spoof_open, icu_spoof_set_checks, UErrorCode, USpoofChecker, - U_ZERO_ERROR, -}; -use crate::error::Error; - -/// TODO: spoof checker. -pub struct SpoofChecker { - inner: *mut USpoofChecker, -} - -impl SpoofChecker { - /// Create a new SpoofChecker. - pub fn new(checks: i32) -> Result { - let mut err: UErrorCode = U_ZERO_ERROR; - let inner = unsafe { icu_spoof_open(&mut err) }; - if err != U_ZERO_ERROR { - return Err(err); - } - unsafe { icu_spoof_set_checks(inner, checks, &mut err) }; - if err != U_ZERO_ERROR { - return Err(err); - } - Ok(SpoofChecker { inner }) - } - - /// Transform a string into a skeleton for matching it with other potentially similar strings. - pub fn get_skeleton(&self, input: &str) -> Result { - let mut err: UErrorCode = U_ZERO_ERROR; - let mut dest: Vec = vec![0u8; 256]; - let len = unsafe { - icu_spoof_get_skeleton( - self.inner, - 0, - input.as_ptr(), - input.len() as i32, - dest.as_mut_ptr(), - dest.len() as i32, - &mut err, - ) - }; - if err != U_ZERO_ERROR { - return Err(Error::from_icu_code(err)); - } - dest.truncate(len as usize); - Ok(String::from_utf8(dest)?) - } -} diff --git a/icu/src/stringprep.rs b/icu/src/stringprep.rs deleted file mode 100644 index ee6b150b..00000000 --- a/icu/src/stringprep.rs +++ /dev/null @@ -1,88 +0,0 @@ -//! Crate wrapping what we need from ICU’s C API for JIDs. -//! -//! See - -use crate::bindings::{ - icu_stringprep_open, icu_stringprep_prepare, UChar, UErrorCode, UStringPrepProfile, - UStringPrepProfileType, USPREP_ALLOW_UNASSIGNED, USPREP_DEFAULT, U_ZERO_ERROR, -}; -use crate::error::Error; -use crate::Strict; -use std::ptr::null_mut; - -/// Struct representing a given stringprep profile. -pub(crate) struct Stringprep { - inner: *mut UStringPrepProfile, -} - -impl Stringprep { - /// Create a new Stringprep struct for the given profile. - pub(crate) fn new(profile: UStringPrepProfileType) -> Result { - let mut err: UErrorCode = U_ZERO_ERROR; - let inner = unsafe { icu_stringprep_open(profile, &mut err) }; - match err { - U_ZERO_ERROR => Ok(Stringprep { inner }), - err => Err(err), - } - } - - /// Perform a stringprep operation using this profile. - /// - /// # Panics - /// Panics if ICU doesn’t return a valid UTF-16 string, which should never happen. - pub(crate) fn stringprep(&self, input: &str, strict: Strict) -> Result { - if input.len() > 1023 { - return Err(Error::TooLong); - } - - // ICU works on UTF-16 data, so convert it first. - let unprepped: Vec = input.encode_utf16().collect(); - - // Now do the actual stringprep operation. - let mut prepped: Vec = vec![0u16; 1024]; - let flags = match strict { - Strict::True => USPREP_DEFAULT, - Strict::AllowUnassigned => USPREP_ALLOW_UNASSIGNED, - }; - self.prepare(&unprepped, &mut prepped, flags)?; - - // And then convert it back to UTF-8. - let output = std::char::decode_utf16(prepped.into_iter()) - //.map(Result::unwrap) - .try_fold(Vec::new(), |mut acc, c| match c { - Ok(c) => { - acc.push(c); - Ok(acc) - } - Err(err) => Err(err), - })?; - let output: String = output.into_iter().collect(); - - if output.len() > 1023 { - return Err(Error::TooLong); - } - - Ok(output) - } - - fn prepare(&self, input: &[UChar], buf: &mut Vec, flags: i32) -> Result<(), UErrorCode> { - let mut err: UErrorCode = U_ZERO_ERROR; - let prepped_len = unsafe { - icu_stringprep_prepare( - self.inner, - input.as_ptr(), - input.len() as i32, - buf.as_mut_ptr(), - buf.len() as i32, - flags, - null_mut(), - &mut err, - ) - }; - if err != U_ZERO_ERROR { - return Err(err); - } - buf.truncate(prepped_len as usize); - Ok(()) - } -}