From 7fd9d1fad48eaece46bcc226b38154ca7a0d21f0 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Thu, 2 Apr 2020 13:54:59 +0200 Subject: [PATCH 1/2] Use try/except block to properly catch and handle the exception Now no more warnings --- pandas/_libs/hashtable_class_helper.pxi.in | 4 ---- pandas/_libs/tslibs/util.pxd | 9 ++++----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 3ce3bc519b311..ea1714fc914d3 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -12,9 +12,6 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in from pandas._libs.tslibs.util cimport get_c_string from pandas._libs.missing cimport C_NA -cdef extern from "Python.h": - void PyErr_Clear() - {{py: # name, dtype, c_type @@ -794,7 +791,6 @@ cdef class StringHashTable(HashTable): # if ignore_na is False, we also stringify NaN/None/etc. v = get_c_string(val) if v == NULL: - PyErr_Clear() v = get_c_string(repr(val)) vecs[i] = v diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index e7f6b3334eb65..a199a5abeda1f 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -238,11 +238,10 @@ cdef inline const char* get_c_string_buf_and_size(str py_string, ------- buf : const char* """ - cdef: - const char *buf - - buf = PyUnicode_AsUTF8AndSize(py_string, length) - return buf + try: + return PyUnicode_AsUTF8AndSize(py_string, length) + except UnicodeEncodeError: + return NULL cdef inline const char* get_c_string(str py_string): From 41ad12d6db0b5eb98f1e1f7a4467be87ea40597f Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 19 Apr 2020 15:25:36 +0200 Subject: [PATCH 2/2] Handle exception one level higher instead --- pandas/_libs/hashtable_class_helper.pxi.in | 5 +++-- pandas/_libs/tslibs/util.pxd | 9 +++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index ea1714fc914d3..c251c92cb072a 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -789,8 +789,9 @@ cdef class StringHashTable(HashTable): labels[i] = na_sentinel else: # if ignore_na is False, we also stringify NaN/None/etc. - v = get_c_string(val) - if v == NULL: + try: + v = get_c_string(val) + except UnicodeEncodeError: v = get_c_string(repr(val)) vecs[i] = v diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index a199a5abeda1f..cc98781dc73cf 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -219,7 +219,7 @@ cdef inline bint is_nan(object val): cdef inline const char* get_c_string_buf_and_size(str py_string, - Py_ssize_t *length): + Py_ssize_t *length) except NULL: """ Extract internal char* buffer of unicode or bytes object `py_string` with getting length of this internal buffer saved in `length`. @@ -238,11 +238,8 @@ cdef inline const char* get_c_string_buf_and_size(str py_string, ------- buf : const char* """ - try: - return PyUnicode_AsUTF8AndSize(py_string, length) - except UnicodeEncodeError: - return NULL + return PyUnicode_AsUTF8AndSize(py_string, length) -cdef inline const char* get_c_string(str py_string): +cdef inline const char* get_c_string(str py_string) except NULL: return get_c_string_buf_and_size(py_string, NULL)