79 lines
2.6 KiB
Python
79 lines
2.6 KiB
Python
|
from __future__ import unicode_literals
|
||
|
|
||
|
import sys
|
||
|
import stringprep
|
||
|
import unicodedata
|
||
|
|
||
|
|
||
|
def saslprep(text, strict=True):
|
||
|
"""
|
||
|
Return a processed version of the given string, using the SASLPrep
|
||
|
profile of stringprep.
|
||
|
|
||
|
:param text: The string to process, in UTF-8.
|
||
|
:param strict: If ``True``, prevent the use of unassigned code points.
|
||
|
"""
|
||
|
|
||
|
if sys.version_info < (3, 0):
|
||
|
if type(text) == str:
|
||
|
text = text.decode('us-ascii')
|
||
|
|
||
|
# Mapping:
|
||
|
#
|
||
|
# - non-ASCII space characters [StringPrep, C.1.2] that can be
|
||
|
# mapped to SPACE (U+0020), and
|
||
|
#
|
||
|
# - the 'commonly mapped to nothing' characters [StringPrep, B.1]
|
||
|
# that can be mapped to nothing.
|
||
|
buffer = ''
|
||
|
for char in text:
|
||
|
if stringprep.in_table_c12(char):
|
||
|
buffer += ' '
|
||
|
elif not stringprep.in_table_b1(char):
|
||
|
buffer += char
|
||
|
|
||
|
# Normalization using form KC
|
||
|
text = unicodedata.normalize('NFKC', buffer)
|
||
|
|
||
|
# Check for bidirectional string
|
||
|
buffer = ''
|
||
|
first_is_randal = False
|
||
|
if text:
|
||
|
first_is_randal = stringprep.in_table_d1(text[0])
|
||
|
if first_is_randal and not stringprep.in_table_d1(text[-1]):
|
||
|
raise UnicodeError('Section 6.3 [end]')
|
||
|
|
||
|
# Check for prohibited characters
|
||
|
for x in range(len(text)):
|
||
|
if strict and stringprep.in_table_a1(text[x]):
|
||
|
raise UnicodeError('Unassigned Codepoint')
|
||
|
if stringprep.in_table_c12(text[x]):
|
||
|
raise UnicodeError('In table C.1.2')
|
||
|
if stringprep.in_table_c21(text[x]):
|
||
|
raise UnicodeError('In table C.2.1')
|
||
|
if stringprep.in_table_c22(text[x]):
|
||
|
raise UnicodeError('In table C.2.2')
|
||
|
if stringprep.in_table_c3(text[x]):
|
||
|
raise UnicodeError('In table C.3')
|
||
|
if stringprep.in_table_c4(text[x]):
|
||
|
raise UnicodeError('In table C.4')
|
||
|
if stringprep.in_table_c5(text[x]):
|
||
|
raise UnicodeError('In table C.5')
|
||
|
if stringprep.in_table_c6(text[x]):
|
||
|
raise UnicodeError('In table C.6')
|
||
|
if stringprep.in_table_c7(text[x]):
|
||
|
raise UnicodeError('In table C.7')
|
||
|
if stringprep.in_table_c8(text[x]):
|
||
|
raise UnicodeError('In table C.8')
|
||
|
if stringprep.in_table_c9(text[x]):
|
||
|
raise UnicodeError('In table C.9')
|
||
|
if x:
|
||
|
if first_is_randal and stringprep.in_table_d2(text[x]):
|
||
|
raise UnicodeError('Section 6.2')
|
||
|
if not first_is_randal and \
|
||
|
x != len(text) - 1 and \
|
||
|
stringprep.in_table_d1(text[x]):
|
||
|
raise UnicodeError('Section 6.3')
|
||
|
|
||
|
return text
|