poezio/poezio/poopt.py
2018-07-22 17:31:25 +02:00

185 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright 2017 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
#
# This file is part of Poezio.
#
# Poezio is free software: you can redistribute it and/or modify
# it under the terms of the zlib license. See the COPYING file.
'''This is a template module just for instruction. And poopt.'''
from typing import List, Tuple
# CFFI codepath.
from cffi import FFI
ffi = FFI()
ffi.cdef("""
typedef long wchar_t;
int wcwidth(wchar_t c);
""")
libc = ffi.dlopen(None)
# Cython codepath.
#cdef extern from "wchar.h":
# ctypedef Py_UCS4 wchar_t
# int wcwidth(wchar_t c)
# Just checking if the return value is -1. In some (all?) implementations,
# wcwidth("😆") returns -1 while it should return 2. In these cases, we
# return 1 instead because this is by far the most probable real value.
# Since the string is received from python, and the unicode character is
# extracted with mbrtowc(), and supposing these two compononents are not
# bugged, and since poezios code should never pass '\t', '\n' or their
# friends, a return value of -1 from wcwidth() is considered to be a bug in
# wcwidth() (until proven otherwise). xwcwidth() is here to work around
# this bug.
def xwcwidth(c: str) -> int:
character = ord(c)
res = libc.wcwidth(character)
if res == -1 and c != '\x19':
return 1
return res
# cut_text: takes a string and returns a tuple of int.
#
# Each two int tuple is a line, represented by the ending position it
# (where it should be cut). Not that this position is calculed using the
# position of the python string characters, not just the individual bytes.
#
# For example,
# poopt_cut_text("vivent les réfrigérateurs", 6);
# will return [(0, 6), (7, 10), (11, 17), (17, 22), (22, 24)], meaning that
# the lines are
# "vivent", "les", "réfrig", "érateu" and "rs"
def cut_text(string: str, width: int) -> List[Tuple[int, int]]:
'''cut_text(text, width)
Return a list of two-tuple, the first int is the starting position of the line and the second is its end.'''
# The list of tuples that we return
retlist = []
# The start position (in the python-string) of the next line
#: unsigned int
start_pos = 0
# The position of the last space seen in the current line. This is used
# to cut on spaces instead of cutting inside words, if possible (aka if
# there is a space)
#: int
last_space = -1
# The number of columns taken by chars between start_pos and last_space
#: size_t
cols_until_space = 0
# Number of columns taken to display the current line so far
#: size_t
columns = 0
#: wchar_t
#wc = 0
# The position, considering unicode chars (aka, the position in the
# python string). This is used to determine the position in the python
# string at which we should cut */
#: unsigned int
#spos = -1
in_special_character = False
for spos, wc in enumerate(string):
# Special case to skip poezio special characters that are contained
# in the python string, but should not be counted as chars because
# they will not be displayed. Those are the formatting chars (to
# insert colors or things like that in the string)
if in_special_character:
# Skip everything until the end of this format marker, but
# without increasing the number of columns of the current
# line. Because these chars are not printed.
if wc in ('u', 'a', 'i', 'b', 'o', '}'):
in_special_character = False
continue
if wc == '\x19':
in_special_character = True
continue
# This is one condition to end the line: an explicit \n is found
if wc == '\n':
spos += 1
retlist.append((start_pos, spos))
# And then initiate a new line
start_pos = spos
last_space = -1
columns = 0
continue
# Get the number of columns needed to display this character. May be 0, 1 or 2
cols = xwcwidth(wc)
# This is the second condition to end the line: we have consumed
# enough columns to fill a whole line
if columns + cols > width:
# If possible, cut on a space
if last_space != -1:
retlist.append((start_pos, last_space))
start_pos = last_space + 1
last_space = -1
columns -= (cols_until_space + 1)
else:
# Otherwise, cut in the middle of a word
retlist.append((start_pos, spos))
start_pos = spos
columns = 0
# We save the position of the last space seen in this line, and the
# number of columns we have until now. This helps us keep track of
# the columns to count when we will use that space as a cutting
# point, later
if wc == ' ':
last_space = spos
cols_until_space = columns
# We advanced from one char, increment spos by one and add the
# char's columns to the line's columns
columns += cols
# We are at the end of the string, append the last line, not finished
retlist.append((start_pos, spos + 1))
return retlist
# wcswidth: An emulation of the POSIX wcswidth(3) function using xwcwidth.
def wcswidth(string: str) -> int:
'''wcswidth(s)
The wcswidth() function returns the number of columns needed to represent the wide-character string pointed to by s. Raise UnicodeError if an invalid unicode value is passed'''
columns = 0
for wc in string:
columns += xwcwidth(wc)
return columns
# cut_by_columns: takes a python string and a number of columns, returns a
# python string truncated to take at most that many columns
# For example cut_by_columns(n, "エメルカ") will return:
# - n == 5 -> "エメ" (which takes only 4 columns since we can't cut the
# next character in half)
# - n == 2 -> "エ"
# - n == 1 -> ""
# - n == 42 -> "エメルカ"
# - etc
def cut_by_columns(string: str, limit: int) -> str:
'''cut_by_columns(string, limit)
returns a string truncated to take at most limit columns'''
spos = 0
columns = 0
for wc in string:
if columns == limit:
break
cols = xwcwidth(wc)
if columns + cols > limit:
break
spos += 1
columns += cols
return string[:spos]