From e52e37a1946730b068f05a4dee8f065b55717edd Mon Sep 17 00:00:00 2001 From: Kendall Masse Date: Sat, 8 Feb 2020 03:11:33 -0500 Subject: [PATCH 1/2] BUG: Fixed encoding of pd.NA with to_json (#31748) --- doc/source/whatsnew/v1.0.2.rst | 5 +-- pandas/_libs/src/ujson/python/objToJSON.c | 12 +++++++ pandas/tests/io/json/test_pandas.py | 41 +++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 94dc1e0c007ca..70aaaa6d0a60d 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -25,8 +25,9 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- -- +**I/O** + +- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 5c5b80648aed1..5cb782a0051af 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -54,6 +54,7 @@ static PyTypeObject *cls_dataframe; static PyTypeObject *cls_series; static PyTypeObject *cls_index; static PyTypeObject *cls_nat; +static PyTypeObject *cls_na; PyObject *cls_timedelta; npy_int64 get_nat(void) { return NPY_MIN_INT64; } @@ -151,6 +152,7 @@ int PdBlock_iterNext(JSOBJ, JSONTypeContext *); void *initObjToJSON(void) { PyObject *mod_pandas; PyObject *mod_nattype; + PyObject *mod_natype; PyObject *mod_decimal = PyImport_ImportModule("decimal"); type_decimal = (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal"); @@ -176,6 +178,12 @@ void *initObjToJSON(void) { Py_DECREF(mod_nattype); } + mod_natype = PyImport_ImportModule("pandas._libs.missing"); + if (mod_natype) { + cls_na = (PyTypeObject *)PyObject_GetAttrString(mod_natype, "NAType"); + Py_DECREF(mod_natype); + } + /* Initialise numpy API */ import_array(); // GH 31463 @@ -1909,6 +1917,10 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { "%R (0d array) is not JSON serializable at the moment", obj); goto INVALID; + } else if (PyObject_TypeCheck(obj, cls_na)) { + PRINTMARK(); + tc->type = JT_NULL; + return; } ISITERABLE: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index bb873c71e8a35..f2d35bfb3b5ae 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1640,3 +1640,44 @@ def test_deprecate_numpy_argument_read_json(self): with tm.assert_produces_warning(FutureWarning): result = read_json(expected.to_json(), numpy=True) tm.assert_frame_equal(result, expected) + + def test_frame_int_overflow(self): + # GH 30320 + encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}]) + expected = DataFrame({"col": ["31900441201190696999", "Text"]}) + result = read_json(encoded_json) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dataframe,expected", + [ + ( + pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}), + '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,' + '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}', + ) + ], + ) + def test_json_multiindex(self, dataframe, expected): + series = dataframe.stack() + result = series.to_json(orient="index") + assert result == expected + + def test_to_s3(self, s3_resource): + # GH 28375 + mock_bucket_name, target_file = "pandas-test", "test.json" + df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]}) + df.to_json(f"s3://{mock_bucket_name}/{target_file}") + assert target_file in ( + obj.key for obj in s3_resource.Bucket("pandas-test").objects.all() + ) + + def test_json_pandas_na(self): + # GH 31615 + result = pd.DataFrame([[pd.NA]]).to_json() + assert result == '{"0":{"0":null}}' + + def test_json_pandas_nulls(self, nulls_fixture): + # GH 31615 + result = pd.DataFrame([[nulls_fixture]]).to_json() + assert result == '{"0":{"0":null}}' From 9f8e652d4cc6a66c8bfbd005fe9afb738488a59b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 8 Feb 2020 09:16:53 +0100 Subject: [PATCH 2/2] remove added test --- pandas/tests/io/json/test_pandas.py | 31 ----------------------------- 1 file changed, 31 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index f2d35bfb3b5ae..4b7936d3159a4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1641,37 +1641,6 @@ def test_deprecate_numpy_argument_read_json(self): result = read_json(expected.to_json(), numpy=True) tm.assert_frame_equal(result, expected) - def test_frame_int_overflow(self): - # GH 30320 - encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}]) - expected = DataFrame({"col": ["31900441201190696999", "Text"]}) - result = read_json(encoded_json) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "dataframe,expected", - [ - ( - pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}), - '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,' - '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}', - ) - ], - ) - def test_json_multiindex(self, dataframe, expected): - series = dataframe.stack() - result = series.to_json(orient="index") - assert result == expected - - def test_to_s3(self, s3_resource): - # GH 28375 - mock_bucket_name, target_file = "pandas-test", "test.json" - df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]}) - df.to_json(f"s3://{mock_bucket_name}/{target_file}") - assert target_file in ( - obj.key for obj in s3_resource.Bucket("pandas-test").objects.all() - ) - def test_json_pandas_na(self): # GH 31615 result = pd.DataFrame([[pd.NA]]).to_json()