WIP: Add ICU bindings for stringprep, idna2008 and spoof checker.
This commit is contained in:
parent
a425a039ca
commit
6eb25755a3
12 changed files with 671 additions and 2 deletions
|
@ -1,5 +1,6 @@
|
|||
[workspace]
|
||||
members = [ # alphabetically sorted
|
||||
"icu",
|
||||
"jid",
|
||||
"minidom",
|
||||
"parsers",
|
||||
|
@ -8,6 +9,7 @@ members = [ # alphabetically sorted
|
|||
]
|
||||
|
||||
[patch.crates-io]
|
||||
icu = { path = "icu" }
|
||||
jid = { path = "jid" }
|
||||
minidom = { path = "minidom" }
|
||||
tokio-xmpp = { path = "tokio-xmpp" }
|
||||
|
|
12
icu/Cargo.toml
Normal file
12
icu/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "icu"
|
||||
version = "0.1.0"
|
||||
authors = ["Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1"
|
5
icu/build.rs
Normal file
5
icu/build.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
fn main() {
|
||||
cc::Build::new().file("src/bindings.c").compile("bindings");
|
||||
println!("cargo:rustc-link-lib=dylib=icuuc");
|
||||
println!("cargo:rustc-link-lib=dylib=icui18n");
|
||||
}
|
54
icu/src/bindings.c
Normal file
54
icu/src/bindings.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
// This file is a stupid wrapper to avoid the automated suffixing libicu is
|
||||
// doing in unicode/urename.h.
|
||||
//
|
||||
// By default it will suffix each of its symbols with "_65" (with 65 being the
|
||||
// soname), which completely messes with Rust’s binding ability.
|
||||
|
||||
#include <unicode/umachine.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/usprep.h>
|
||||
#include <unicode/utrace.h>
|
||||
#include <unicode/uidna.h>
|
||||
#include <unicode/uspoof.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <string.h>
|
||||
|
||||
const char* icu_error_code_to_name(UErrorCode code) {
|
||||
return u_errorName(code);
|
||||
}
|
||||
|
||||
UIDNA* icu_idna_open(uint32_t options, UErrorCode* pErrorCode) {
|
||||
return uidna_openUTS46(options, pErrorCode);
|
||||
}
|
||||
|
||||
int32_t icu_idna_name_to_ascii(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
|
||||
return uidna_nameToASCII_UTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
|
||||
}
|
||||
|
||||
int32_t icu_idna_name_to_unicode(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
|
||||
return uidna_nameToUnicodeUTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
|
||||
}
|
||||
|
||||
UStringPrepProfile* icu_stringprep_open(UStringPrepProfileType type, UErrorCode* status) {
|
||||
return usprep_openByType(type, status);
|
||||
}
|
||||
|
||||
int32_t icu_stringprep_prepare(const UStringPrepProfile* prep, const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status) {
|
||||
return usprep_prepare(prep, src, srcLength, dest, destCapacity, options, parseError, status);
|
||||
}
|
||||
|
||||
void icu_trace_set_level(UTraceLevel traceLevel) {
|
||||
utrace_setLevel(traceLevel);
|
||||
}
|
||||
|
||||
USpoofChecker* icu_spoof_open(UErrorCode* status) {
|
||||
return uspoof_open(status);
|
||||
}
|
||||
|
||||
void icu_spoof_set_checks(USpoofChecker* sc, int32_t checks, UErrorCode* status) {
|
||||
uspoof_setChecks(sc, checks, status);
|
||||
}
|
||||
|
||||
int32_t icu_spoof_get_skeleton(USpoofChecker* sc, uint32_t type, const char* id, int32_t length, char* dest, int32_t destCapacity, UErrorCode* status) {
|
||||
return uspoof_getSkeletonUTF8(sc, type, id, length, dest, destCapacity, status);
|
||||
}
|
149
icu/src/bindings.rs
Normal file
149
icu/src/bindings.rs
Normal file
|
@ -0,0 +1,149 @@
|
|||
//! Crate wrapping what we need from ICU’s C API for JIDs.
|
||||
//!
|
||||
//! See http://site.icu-project.org/
|
||||
|
||||
use std::os::raw::c_char;
|
||||
|
||||
// From unicode/umachine.h
|
||||
pub(crate) type UChar = u16;
|
||||
|
||||
// From unicode/utypes.h
|
||||
pub(crate) type UErrorCode = u32;
|
||||
pub(crate) const U_ZERO_ERROR: UErrorCode = 0;
|
||||
|
||||
pub(crate) type UStringPrepProfile = u32;
|
||||
type UParseError = u32;
|
||||
|
||||
// From unicode/usprep.h
|
||||
pub(crate) const USPREP_DEFAULT: i32 = 0;
|
||||
pub(crate) const USPREP_ALLOW_UNASSIGNED: i32 = 1;
|
||||
|
||||
pub(crate) type UStringPrepProfileType = u32;
|
||||
pub(crate) const USPREP_RFC3491_NAMEPREP: UStringPrepProfileType = 0;
|
||||
pub(crate) const USPREP_RFC3920_NODEPREP: UStringPrepProfileType = 7;
|
||||
pub(crate) const USPREP_RFC3920_RESOURCEPREP: UStringPrepProfileType = 8;
|
||||
pub(crate) const USPREP_RFC4013_SASLPREP: UStringPrepProfileType = 10;
|
||||
|
||||
// From unicode/utrace.h
|
||||
type UTraceLevel = i32;
|
||||
pub(crate) const UTRACE_VERBOSE: UTraceLevel = 9;
|
||||
|
||||
// From unicode/uidna.h
|
||||
#[repr(C)]
|
||||
pub(crate) struct UIDNA {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
type UBool = i8;
|
||||
|
||||
#[repr(C)]
|
||||
pub(crate) struct UIDNAInfo {
|
||||
size: i16,
|
||||
is_transitional_different: UBool,
|
||||
reserved_b3: UBool,
|
||||
errors: u32,
|
||||
reserved_i2: i32,
|
||||
reserved_i3: i32,
|
||||
}
|
||||
|
||||
impl UIDNAInfo {
|
||||
pub(crate) fn new() -> UIDNAInfo {
|
||||
assert_eq!(std::mem::size_of::<UIDNAInfo>(), 16);
|
||||
UIDNAInfo {
|
||||
size: std::mem::size_of::<UIDNAInfo>() as i16,
|
||||
is_transitional_different: false as UBool,
|
||||
reserved_b3: false as UBool,
|
||||
errors: 0,
|
||||
reserved_i2: 0,
|
||||
reserved_i3: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Return a String instead, or a custom error type, this is a bitflag (defined in
|
||||
// uidna.h) where multiple errors can be accumulated.
|
||||
pub(crate) fn get_errors(&self) -> u32 {
|
||||
self.errors
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const UIDNA_DEFAULT: u32 = 0;
|
||||
pub(crate) const UIDNA_USE_STD3_RULES: u32 = 2;
|
||||
|
||||
pub(crate) type UIdnaFunction = unsafe extern "C" fn(
|
||||
*const UIDNA,
|
||||
*const u8,
|
||||
i32,
|
||||
*mut u8,
|
||||
i32,
|
||||
*mut UIDNAInfo,
|
||||
*mut u32,
|
||||
) -> i32;
|
||||
|
||||
// From unicode/uspoof.h
|
||||
#[repr(C)]
|
||||
pub(crate) struct USpoofChecker {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
pub(crate) const USPOOF_CONFUSABLE: i32 = 7;
|
||||
|
||||
#[link(name = "bindings")]
|
||||
extern "C" {
|
||||
// From unicode/ustring.h
|
||||
pub(crate) fn icu_error_code_to_name(code: UErrorCode) -> *const c_char;
|
||||
|
||||
// From unicode/usprep.h
|
||||
pub(crate) fn icu_stringprep_open(
|
||||
type_: UStringPrepProfileType,
|
||||
status: *mut UErrorCode,
|
||||
) -> *mut UStringPrepProfile;
|
||||
pub(crate) fn icu_stringprep_prepare(
|
||||
prep: *const UStringPrepProfile,
|
||||
src: *const UChar,
|
||||
srcLength: i32,
|
||||
dest: *mut UChar,
|
||||
destCapacity: i32,
|
||||
options: i32,
|
||||
parseError: *mut UParseError,
|
||||
status: *mut UErrorCode,
|
||||
) -> i32;
|
||||
|
||||
// From unicode/utrace.h
|
||||
pub(crate) fn icu_trace_set_level(traceLevel: UTraceLevel);
|
||||
|
||||
// From unicode/uidna.h
|
||||
pub(crate) fn icu_idna_open(options: u32, pErrorCode: *mut UErrorCode) -> *mut UIDNA;
|
||||
pub(crate) fn icu_idna_name_to_ascii(
|
||||
idna: *const UIDNA,
|
||||
name: *const u8,
|
||||
length: i32,
|
||||
dest: *mut u8,
|
||||
capacity: i32,
|
||||
pInfo: *mut UIDNAInfo,
|
||||
pErrorCode: *mut UErrorCode,
|
||||
) -> i32;
|
||||
pub(crate) fn icu_idna_name_to_unicode(
|
||||
idna: *const UIDNA,
|
||||
name: *const u8,
|
||||
length: i32,
|
||||
dest: *mut u8,
|
||||
capacity: i32,
|
||||
pInfo: *mut UIDNAInfo,
|
||||
pErrorCode: *mut UErrorCode,
|
||||
) -> i32;
|
||||
|
||||
// From unicode/uspoof.h
|
||||
pub(crate) fn icu_spoof_open(status: *mut UErrorCode) -> *mut USpoofChecker;
|
||||
pub(crate) fn icu_spoof_set_checks(
|
||||
sc: *mut USpoofChecker,
|
||||
checks: i32,
|
||||
status: *mut UErrorCode,
|
||||
);
|
||||
pub(crate) fn icu_spoof_get_skeleton(
|
||||
sc: *const USpoofChecker,
|
||||
type_: u32,
|
||||
id: *const u8,
|
||||
length: i32,
|
||||
dest: *mut u8,
|
||||
destCapacity: i32,
|
||||
status: *mut UErrorCode,
|
||||
) -> i32;
|
||||
}
|
67
icu/src/error.rs
Normal file
67
icu/src/error.rs
Normal file
|
@ -0,0 +1,67 @@
|
|||
//! Crate wrapping what we need from ICU’s C API for JIDs.
|
||||
//!
|
||||
//! See http://site.icu-project.org/
|
||||
|
||||
use crate::bindings::{icu_error_code_to_name, UErrorCode};
|
||||
use std::ffi::CStr;
|
||||
|
||||
/// Errors this library can produce.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// An error produced by one of the ICU functions.
|
||||
Icu(String),
|
||||
|
||||
/// An error produced by one of the IDNA2008 ICU functions.
|
||||
Idna(u32),
|
||||
|
||||
/// Some ICU function didn’t produce a valid UTF-8 string, should never happen.
|
||||
Utf8(std::string::FromUtf8Error),
|
||||
|
||||
/// Some ICU function didn’t produce a valid UTF-8 string, should never happen.
|
||||
Utf16(std::char::DecodeUtf16Error),
|
||||
|
||||
/// Some string was too long for its profile in JID.
|
||||
TooLong,
|
||||
}
|
||||
|
||||
impl PartialEq for Error {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
match (self, other) {
|
||||
(Error::Icu(s1), Error::Icu(s2)) => s1 == s2,
|
||||
(Error::Idna(s1), Error::Idna(s2)) => s1 == s2,
|
||||
// TODO: compare by something here?
|
||||
(Error::Utf8(_s1), Error::Utf8(_s2)) => true,
|
||||
(Error::Utf16(_s1), Error::Utf16(_s2)) => true,
|
||||
(Error::TooLong, Error::TooLong) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Error {}
|
||||
|
||||
impl Error {
|
||||
pub(crate) fn from_icu_code(err: UErrorCode) -> Error {
|
||||
let ptr = unsafe { icu_error_code_to_name(err) };
|
||||
let c_str = unsafe { CStr::from_ptr(ptr) };
|
||||
Error::Icu(c_str.to_string_lossy().into_owned())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<UErrorCode> for Error {
|
||||
fn from(err: UErrorCode) -> Error {
|
||||
Error::from_icu_code(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::string::FromUtf8Error> for Error {
|
||||
fn from(err: std::string::FromUtf8Error) -> Error {
|
||||
Error::Utf8(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::char::DecodeUtf16Error> for Error {
|
||||
fn from(err: std::char::DecodeUtf16Error) -> Error {
|
||||
Error::Utf16(err)
|
||||
}
|
||||
}
|
69
icu/src/idna2008.rs
Normal file
69
icu/src/idna2008.rs
Normal file
|
@ -0,0 +1,69 @@
|
|||
//! Crate wrapping what we need from ICU’s C API for JIDs.
|
||||
//!
|
||||
//! See http://site.icu-project.org/
|
||||
|
||||
use crate::bindings::{
|
||||
icu_idna_name_to_ascii, icu_idna_name_to_unicode, icu_idna_open, UErrorCode, UIDNAInfo,
|
||||
UIdnaFunction, UIDNA, U_ZERO_ERROR,
|
||||
};
|
||||
use crate::error::Error;
|
||||
|
||||
/// TODO: IDNA2008 support.
|
||||
pub struct Idna {
|
||||
inner: *mut UIDNA,
|
||||
}
|
||||
|
||||
impl Idna {
|
||||
/// Create a new Idna struct.
|
||||
pub fn new(options: u32) -> Result<Idna, UErrorCode> {
|
||||
let mut err: UErrorCode = U_ZERO_ERROR;
|
||||
let inner = unsafe { icu_idna_open(options, &mut err) };
|
||||
match err {
|
||||
U_ZERO_ERROR => Ok(Idna { inner }),
|
||||
err => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a whole domain name into its ASCII form for DNS lookup.
|
||||
pub fn to_ascii(&self, input: &str) -> Result<String, Error> {
|
||||
self.idna(input, icu_idna_name_to_ascii)
|
||||
}
|
||||
|
||||
/// Converts a whole domain name into its Unicode form for human-readable display.
|
||||
pub fn to_unicode(&self, input: &str) -> Result<String, Error> {
|
||||
self.idna(input, icu_idna_name_to_unicode)
|
||||
}
|
||||
|
||||
fn idna(&self, input: &str, function: UIdnaFunction) -> Result<String, Error> {
|
||||
if input.len() > 255 {
|
||||
return Err(Error::TooLong);
|
||||
}
|
||||
|
||||
let mut err: UErrorCode = U_ZERO_ERROR;
|
||||
let mut dest: Vec<u8> = vec![0u8; 256];
|
||||
let mut info = UIDNAInfo::new();
|
||||
let len = unsafe {
|
||||
function(
|
||||
self.inner,
|
||||
input.as_ptr(),
|
||||
input.len() as i32,
|
||||
dest.as_mut_ptr(),
|
||||
dest.len() as i32,
|
||||
&mut info,
|
||||
&mut err,
|
||||
)
|
||||
};
|
||||
if err != U_ZERO_ERROR {
|
||||
return Err(Error::from_icu_code(err));
|
||||
}
|
||||
let errors = info.get_errors();
|
||||
if errors != 0 {
|
||||
return Err(Error::Idna(errors));
|
||||
}
|
||||
if len > 255 {
|
||||
return Err(Error::TooLong);
|
||||
}
|
||||
dest.truncate(len as usize);
|
||||
Ok(String::from_utf8(dest)?)
|
||||
}
|
||||
}
|
141
icu/src/lib.rs
Normal file
141
icu/src/lib.rs
Normal file
|
@ -0,0 +1,141 @@
|
|||
//! Crate wrapping what we need from ICU’s C API for JIDs.
|
||||
//!
|
||||
//! See http://site.icu-project.org/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
mod bindings;
|
||||
mod error;
|
||||
mod idna2008;
|
||||
mod spoof;
|
||||
mod stringprep;
|
||||
|
||||
use crate::bindings::{
|
||||
icu_trace_set_level, UIDNA_DEFAULT, UIDNA_USE_STD3_RULES, USPOOF_CONFUSABLE,
|
||||
USPREP_RFC3491_NAMEPREP, USPREP_RFC3920_NODEPREP, USPREP_RFC3920_RESOURCEPREP,
|
||||
USPREP_RFC4013_SASLPREP, UTRACE_VERBOSE,
|
||||
};
|
||||
pub use crate::error::Error;
|
||||
pub use crate::idna2008::Idna;
|
||||
pub use crate::spoof::SpoofChecker;
|
||||
pub use crate::stringprep::Stringprep;
|
||||
|
||||
/// How unassigned codepoints should be handled.
|
||||
pub enum Strict {
|
||||
/// All codepoints should be assigned, otherwise an error will be emitted.
|
||||
True,
|
||||
|
||||
/// Codepoints can be unassigned.
|
||||
AllowUnassigned,
|
||||
}
|
||||
|
||||
/// Main struct of this module, exposing the needed ICU functions to JID.
|
||||
pub struct Icu {
|
||||
/// Perform stringprep using the Nameprep profile.
|
||||
///
|
||||
/// See [RFC3491](https://tools.ietf.org/html/rfc3491).
|
||||
pub nameprep: Stringprep,
|
||||
|
||||
/// Perform stringprep using the Nodeprep profile.
|
||||
///
|
||||
/// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
|
||||
pub nodeprep: Stringprep,
|
||||
|
||||
/// Perform stringprep using the Resourceprep profile.
|
||||
///
|
||||
/// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
|
||||
pub resourceprep: Stringprep,
|
||||
|
||||
/// Perform stringprep using the Saslprep profile.
|
||||
///
|
||||
/// See [RFC4013](https://tools.ietf.org/html/rfc4013).
|
||||
pub saslprep: Stringprep,
|
||||
|
||||
/// IDNA2008 support.
|
||||
///
|
||||
/// See [RFC5891](https://tools.ietf.org/html/rfc5891).
|
||||
pub idna2008: Idna,
|
||||
|
||||
/// Spoof checker TODO: better doc.
|
||||
pub spoofchecker: SpoofChecker,
|
||||
}
|
||||
|
||||
impl Icu {
|
||||
/// Create a new ICU struct, initialising stringprep profiles, IDNA2008, as well as a spoof
|
||||
/// checker.
|
||||
pub fn new() -> Result<Icu, Error> {
|
||||
unsafe { icu_trace_set_level(UTRACE_VERBOSE) };
|
||||
|
||||
let nameprep = Stringprep::new(USPREP_RFC3491_NAMEPREP)?;
|
||||
let nodeprep = Stringprep::new(USPREP_RFC3920_NODEPREP)?;
|
||||
let resourceprep = Stringprep::new(USPREP_RFC3920_RESOURCEPREP)?;
|
||||
let saslprep = Stringprep::new(USPREP_RFC4013_SASLPREP)?;
|
||||
|
||||
let mut options = UIDNA_DEFAULT;
|
||||
options |= UIDNA_USE_STD3_RULES;
|
||||
let idna2008 = Idna::new(options)?;
|
||||
|
||||
let spoofchecker = SpoofChecker::new(USPOOF_CONFUSABLE)?;
|
||||
|
||||
Ok(Icu {
|
||||
nameprep,
|
||||
nodeprep,
|
||||
resourceprep,
|
||||
saslprep,
|
||||
idna2008,
|
||||
spoofchecker,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn nameprep() {
|
||||
let name = "Link";
|
||||
let icu = Icu::new().unwrap();
|
||||
let name = icu.nodeprep.stringprep(name, Strict::True).unwrap();
|
||||
assert_eq!(name, "link");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resourceprep() {
|
||||
let name = "Test™";
|
||||
let icu = Icu::new().unwrap();
|
||||
let name = icu
|
||||
.resourceprep
|
||||
.stringprep(name, Strict::AllowUnassigned)
|
||||
.unwrap();
|
||||
assert_eq!(name, "TestTM");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn idna() {
|
||||
let name = "☃.coM";
|
||||
let icu = Icu::new().unwrap();
|
||||
let name = icu.idna2008.to_ascii(name).unwrap();
|
||||
assert_eq!(name, "xn--n3h.com");
|
||||
|
||||
let name = "xn--N3H.com";
|
||||
let icu = Icu::new().unwrap();
|
||||
let name = icu.idna2008.to_unicode(name).unwrap();
|
||||
assert_eq!(name, "☃.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spoof() {
|
||||
// Non-breakable and narrow non-breakable spaces spoofing.
|
||||
let name = "foo bar baz";
|
||||
let icu = Icu::new().unwrap();
|
||||
let name = icu.spoofchecker.get_skeleton(name).unwrap();
|
||||
assert_eq!(name, "foo bar baz");
|
||||
|
||||
// Cyrillic spoofing.
|
||||
let name = "Неllо wоrld";
|
||||
let icu = Icu::new().unwrap();
|
||||
let name = icu.spoofchecker.get_skeleton(name).unwrap();
|
||||
assert_eq!(name, "Hello world");
|
||||
}
|
||||
}
|
52
icu/src/spoof.rs
Normal file
52
icu/src/spoof.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
//! Crate wrapping what we need from ICU’s C API for JIDs.
|
||||
//!
|
||||
//! See http://site.icu-project.org/
|
||||
|
||||
use crate::bindings::{
|
||||
icu_spoof_get_skeleton, icu_spoof_open, icu_spoof_set_checks, UErrorCode, USpoofChecker,
|
||||
U_ZERO_ERROR,
|
||||
};
|
||||
use crate::error::Error;
|
||||
|
||||
/// TODO: spoof checker.
|
||||
pub struct SpoofChecker {
|
||||
inner: *mut USpoofChecker,
|
||||
}
|
||||
|
||||
impl SpoofChecker {
|
||||
/// Create a new SpoofChecker.
|
||||
pub fn new(checks: i32) -> Result<SpoofChecker, UErrorCode> {
|
||||
let mut err: UErrorCode = U_ZERO_ERROR;
|
||||
let inner = unsafe { icu_spoof_open(&mut err) };
|
||||
if err != U_ZERO_ERROR {
|
||||
return Err(err);
|
||||
}
|
||||
unsafe { icu_spoof_set_checks(inner, checks, &mut err) };
|
||||
if err != U_ZERO_ERROR {
|
||||
return Err(err);
|
||||
}
|
||||
Ok(SpoofChecker { inner })
|
||||
}
|
||||
|
||||
/// Transform a string into a skeleton for matching it with other potentially similar strings.
|
||||
pub fn get_skeleton(&self, input: &str) -> Result<String, Error> {
|
||||
let mut err: UErrorCode = U_ZERO_ERROR;
|
||||
let mut dest: Vec<u8> = vec![0u8; 256];
|
||||
let len = unsafe {
|
||||
icu_spoof_get_skeleton(
|
||||
self.inner,
|
||||
0,
|
||||
input.as_ptr(),
|
||||
input.len() as i32,
|
||||
dest.as_mut_ptr(),
|
||||
dest.len() as i32,
|
||||
&mut err,
|
||||
)
|
||||
};
|
||||
if err != U_ZERO_ERROR {
|
||||
return Err(Error::from_icu_code(err));
|
||||
}
|
||||
dest.truncate(len as usize);
|
||||
Ok(String::from_utf8(dest)?)
|
||||
}
|
||||
}
|
88
icu/src/stringprep.rs
Normal file
88
icu/src/stringprep.rs
Normal file
|
@ -0,0 +1,88 @@
|
|||
//! Crate wrapping what we need from ICU’s C API for JIDs.
|
||||
//!
|
||||
//! See http://site.icu-project.org/
|
||||
|
||||
use crate::bindings::{
|
||||
icu_stringprep_open, icu_stringprep_prepare, UChar, UErrorCode, UStringPrepProfile,
|
||||
UStringPrepProfileType, USPREP_ALLOW_UNASSIGNED, USPREP_DEFAULT, U_ZERO_ERROR,
|
||||
};
|
||||
use crate::error::Error;
|
||||
use crate::Strict;
|
||||
use std::ptr::null_mut;
|
||||
|
||||
/// Struct representing a given stringprep profile.
|
||||
pub struct Stringprep {
|
||||
inner: *mut UStringPrepProfile,
|
||||
}
|
||||
|
||||
impl Stringprep {
|
||||
/// Create a new Stringprep struct for the given profile.
|
||||
pub(crate) fn new(profile: UStringPrepProfileType) -> Result<Stringprep, UErrorCode> {
|
||||
let mut err: UErrorCode = U_ZERO_ERROR;
|
||||
let inner = unsafe { icu_stringprep_open(profile, &mut err) };
|
||||
match err {
|
||||
U_ZERO_ERROR => Ok(Stringprep { inner }),
|
||||
err => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a stringprep operation using this profile.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if ICU doesn’t return a valid UTF-16 string, which should never happen.
|
||||
pub fn stringprep(&self, input: &str, strict: Strict) -> Result<String, Error> {
|
||||
if input.len() > 1023 {
|
||||
return Err(Error::TooLong);
|
||||
}
|
||||
|
||||
// ICU works on UTF-16 data, so convert it first.
|
||||
let unprepped: Vec<UChar> = input.encode_utf16().collect();
|
||||
|
||||
// Now do the actual stringprep operation.
|
||||
let mut prepped: Vec<UChar> = vec![0u16; 1024];
|
||||
let flags = match strict {
|
||||
Strict::True => USPREP_DEFAULT,
|
||||
Strict::AllowUnassigned => USPREP_ALLOW_UNASSIGNED,
|
||||
};
|
||||
self.prepare(&unprepped, &mut prepped, flags)?;
|
||||
|
||||
// And then convert it back to UTF-8.
|
||||
let output = std::char::decode_utf16(prepped.into_iter())
|
||||
//.map(Result::unwrap)
|
||||
.try_fold(Vec::new(), |mut acc, c| match c {
|
||||
Ok(c) => {
|
||||
acc.push(c);
|
||||
Ok(acc)
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
})?;
|
||||
let output: String = output.into_iter().collect();
|
||||
|
||||
if output.len() > 1023 {
|
||||
return Err(Error::TooLong);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn prepare(&self, input: &[UChar], buf: &mut Vec<UChar>, flags: i32) -> Result<(), UErrorCode> {
|
||||
let mut err: UErrorCode = U_ZERO_ERROR;
|
||||
let prepped_len = unsafe {
|
||||
icu_stringprep_prepare(
|
||||
self.inner,
|
||||
input.as_ptr(),
|
||||
input.len() as i32,
|
||||
buf.as_mut_ptr(),
|
||||
buf.len() as i32,
|
||||
flags,
|
||||
null_mut(),
|
||||
&mut err,
|
||||
)
|
||||
};
|
||||
if err != U_ZERO_ERROR {
|
||||
return Err(err);
|
||||
}
|
||||
buf.truncate(prepped_len as usize);
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -19,5 +19,6 @@ edition = "2018"
|
|||
gitlab = { repository = "xmpp-rs/xmpp-rs" }
|
||||
|
||||
[dependencies]
|
||||
icu = { version = "0.1", optional = true }
|
||||
minidom = { version = "0.15", optional = true }
|
||||
serde = { version = "1.0", features = ["derive"], optional = true }
|
||||
|
|
|
@ -22,8 +22,11 @@ use std::str::FromStr;
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
#[cfg(feature = "icu")]
|
||||
use icu::{Icu, Strict};
|
||||
|
||||
/// An error that signifies that a `Jid` cannot be parsed from a string.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum JidParseError {
|
||||
/// Happens when there is no domain, that is either the string is empty,
|
||||
/// starts with a /, or contains the @/ sequence.
|
||||
|
@ -37,6 +40,10 @@ pub enum JidParseError {
|
|||
|
||||
/// Happens when the resource is empty, that is the string ends with a /.
|
||||
EmptyResource,
|
||||
|
||||
#[cfg(feature = "icu")]
|
||||
/// TODO
|
||||
IcuError(icu::Error),
|
||||
}
|
||||
|
||||
impl StdError for JidParseError {}
|
||||
|
@ -51,6 +58,8 @@ impl fmt::Display for JidParseError {
|
|||
JidParseError::NoResource => "no resource found in this full JID",
|
||||
JidParseError::EmptyNode => "nodepart empty despite the presence of a @",
|
||||
JidParseError::EmptyResource => "resource empty despite the presence of a /",
|
||||
#[cfg(feature = "icu")]
|
||||
JidParseError::IcuError(_err) => "TODO",
|
||||
}
|
||||
)
|
||||
}
|
||||
|
@ -388,7 +397,19 @@ fn _from_str(s: &str) -> Result<StringJid, JidParseError> {
|
|||
} else if let ParserState::Resource = state {
|
||||
return Err(JidParseError::EmptyResource);
|
||||
}
|
||||
Ok((node, domain.ok_or(JidParseError::NoDomain)?, resource))
|
||||
let domain = domain.ok_or(JidParseError::NoDomain)?;
|
||||
#[cfg(feature = "icu")]
|
||||
let (node, domain, resource) = {
|
||||
let icu = Icu::new().unwrap();
|
||||
let node = node.map(|node| icu.nodeprep(&node, Strict::AllowUnassigned).unwrap());
|
||||
let domain = icu.idna2008.to_unicode(&domain).unwrap();
|
||||
let resource = resource.map(|resource| {
|
||||
icu.resourceprep(&resource, Strict::AllowUnassigned)
|
||||
.unwrap()
|
||||
});
|
||||
(node, domain, resource)
|
||||
};
|
||||
Ok((node, domain, resource))
|
||||
}
|
||||
|
||||
impl FromStr for FullJid {
|
||||
|
@ -905,4 +926,12 @@ mod tests {
|
|||
.build();
|
||||
assert_eq!(elem.attr("from"), Some(String::from(bare).as_ref()));
|
||||
}
|
||||
|
||||
#[cfg(feature = "icu")]
|
||||
#[test]
|
||||
fn icu_jid() {
|
||||
let full = FullJid::from_str("Test@☃.coM/Test™").unwrap();
|
||||
let equiv = FullJid::new("test", "☃.com", "TestTM");
|
||||
assert_eq!(full, equiv);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue