From f8c0d127509e1d0f41681cf151b56d41974cacad Mon Sep 17 00:00:00 2001 From: Florent Le Coz Date: Tue, 25 Jun 2013 01:08:16 +0200 Subject: [PATCH] Workaround for a bug (?) where wcwidth returns -1 for valid printable chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example 😆 --- src/pooptmodule.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/pooptmodule.c b/src/pooptmodule.c index 7593d377..b45f49a0 100644 --- a/src/pooptmodule.c +++ b/src/pooptmodule.c @@ -17,6 +17,22 @@ python functions that are too slow. */ PyObject *ErrorObject; +/** + * Internal functions + */ +/* Just checking if the return value is -1. In some (all?) implementations, + wcwidth("😆") returns -1 while it should return 1. In these cases, we + return 1 instead because this is by far the most probable real value. As + for \n, \t and their friends, they are not supposed to be passed in this + function, ever. */ +int xwcwidth(wchar_t c) +{ + const int res = wcwidth(c); + if (res == -1) + return 1; + return res; +} + /*** The module functions ***/ @@ -114,8 +130,6 @@ static PyObject* poopt_cut_text(PyObject* self, PyObject* args) } buffer += consumed; - /* Get the number of columns needed to display this character. May be 0, 1 or 2 */ - const size_t cols = wcwidth(wc); /* This is one condition to end the line: an explicit \n is found */ if (wc == (wchar_t)'\n') @@ -130,6 +144,9 @@ static PyObject* poopt_cut_text(PyObject* self, PyObject* args) continue ; } + /* Get the number of columns needed to display this character. May be 0, 1 or 2 */ + const size_t cols = xwcwidth(wc); + /* This is the second condition to end the line: we have consumed * enough characters to fill a whole line */ if (columns + cols > width) @@ -172,7 +189,7 @@ static PyObject* poopt_cut_text(PyObject* self, PyObject* args) } /** - wcwidth: An emulation of the POSIX wcswidth(3) function using wcwidth and mbrtowc. + wcswidth: An emulation of the POSIX wcswidth(3) function using wcwidth and mbrtowc. */ PyDoc_STRVAR(poopt_wcswidth_doc, "wcswidth(s)\n\n\nThe wcswidth() function returns the number of columns needed to represent the wide-character string pointed to by s. Raise UnicodeError if an invalid unicode value is passed"); static PyObject* poopt_wcswidth(PyObject* self, PyObject* args) @@ -202,7 +219,7 @@ static PyObject* poopt_wcswidth(PyObject* self, PyObject* args) return NULL; } string += consumed; - res += wcwidth(wc); + res += xwcwidth(wc); } return Py_BuildValue("i", res); }