From f8618b8d86cbe7ec7925cd695f386063531e787f Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Wed, 29 Nov 2017 18:22:25 +0000 Subject: [PATCH 1/4] EHN: Add index parameter to to_json --- doc/source/whatsnew/v0.22.0.txt | 1 + pandas/core/generic.py | 13 ++++- pandas/io/json/json.py | 58 +++++++++++++++++++--- pandas/tests/io/json/test_pandas.py | 77 +++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 32b548e5f32f1..f3dac20758441 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -136,6 +136,7 @@ Other Enhancements - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) +- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) .. _whatsnew_0220.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ea4a645927d7b..93b4b6a10dea7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1603,7 +1603,8 @@ def _repr_latex_(self): def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False, compression=None): + default_handler=None, lines=False, compression=None, + index=True): """ Convert the object to a JSON string. @@ -1671,6 +1672,13 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.21.0 + index : boolean, default True + Whether to include the index values in the JSON string. A + ValueError will be thrown if index is False when orient is not + 'split' or 'table'. + + .. versionadded:: 0.22.0 + Returns ------- same type as input object with filtered info axis @@ -1723,7 +1731,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=double_precision, force_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - lines=lines, compression=compression) + lines=lines, compression=compression, + index=index) def to_hdf(self, path_or_buf, key, **kwargs): """Write the contained data to an HDF5 file using HDFStore. diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 21736673350d8..abfbd7d6ea962 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -28,7 +28,12 @@ # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False, compression=None): + default_handler=None, lines=False, compression=None, + index=True): + + if not index and orient not in ['split', 'table']: + raise ValueError("'index=False' is only valid when 'orient' is " + "'split' or 'table'") path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': @@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', s = writer( obj, orient=orient, date_format=date_format, double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + date_unit=date_unit, default_handler=default_handler, + index=index).write() if lines: s = _convert_to_line_delimits(s) @@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', class Writer(object): def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None): self.obj = obj if orient is None: @@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision, self.ensure_ascii = ensure_ascii self.date_unit = date_unit self.default_handler = default_handler + self.index = index self.is_copy = None self._format_axes() @@ -108,6 +115,19 @@ def _format_axes(self): raise ValueError("Series index must be unique for orient=" "'{orient}'".format(orient=self.orient)) + def write(self): + if not self.index and self.orient == 'split': + self.obj = {"name": self.obj.name, "data": self.obj.values} + return dumps( + self.obj, + orient=self.orient, + double_precision=self.double_precision, + ensure_ascii=self.ensure_ascii, + date_unit=self.date_unit, + iso_dates=self.date_format == 'iso', + default_handler=self.default_handler + ) + class FrameWriter(Writer): _default_orient = 'columns' @@ -123,12 +143,26 @@ def _format_axes(self): raise ValueError("DataFrame columns must be unique for orient=" "'{orient}'.".format(orient=self.orient)) + def write(self): + if not self.index and self.orient == 'split': + self.obj = self.obj.to_dict(orient='split') + del self.obj["index"] + return dumps( + self.obj, + orient=self.orient, + double_precision=self.double_precision, + ensure_ascii=self.ensure_ascii, + date_unit=self.date_unit, + iso_dates=self.date_format == 'iso', + default_handler=self.default_handler + ) + class JSONTableWriter(FrameWriter): _default_orient = 'records' def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None): """ Adds a `schema` attribut with the Table Schema, resets the index (can't do in caller, because the schema inference needs @@ -137,7 +171,7 @@ def __init__(self, obj, orient, date_format, double_precision, """ super(JSONTableWriter, self).__init__( obj, orient, date_format, double_precision, ensure_ascii, - date_unit, default_handler=default_handler) + date_unit, index, default_handler=default_handler) if date_format != 'iso': msg = ("Trying to write with `orient='table'` and " @@ -146,7 +180,7 @@ def __init__(self, obj, orient, date_format, double_precision, .format(fmt=date_format)) raise ValueError(msg) - self.schema = build_table_schema(obj) + self.schema = build_table_schema(obj, index=self.index) # NotImplementd on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): @@ -173,7 +207,17 @@ def __init__(self, obj, orient, date_format, double_precision, self.orient = 'records' def write(self): - data = super(JSONTableWriter, self).write() + if not self.index: + self.obj = self.obj.drop('index', axis=1) + data = dumps( + self.obj, + orient=self.orient, + double_precision=self.double_precision, + ensure_ascii=self.ensure_ascii, + date_unit=self.date_unit, + iso_dates=self.date_format == 'iso', + default_handler=self.default_handler + ) serialized = '{{"schema": {schema}, "data": {data}}}'.format( schema=dumps(self.schema), data=data) return serialized diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fe447534efdc7..95c5cdc401fef 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1147,3 +1147,80 @@ def test_data_frame_size_after_to_json(self): size_after = df.memory_usage(index=True, deep=True).sum() assert size_before == size_after + + def test_index_false_to_json(self): + # GH 17394 + # Testing index parameter in to_json + import json + df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) + + result = df.to_json(orient='split', index=False) + result = json.loads(result) + + expected = { + 'columns': ['a', 'b'], + 'data': [[1, 2], [4, 5]] + } + + assert result == expected + + result = df.to_json(orient='table', index=False) + result = json.loads(result) + + schema = { + 'fields': [{'name': 'a', 'type': 'integer'}, + {'name': 'b', 'type': 'integer'}], + 'pandas_version': '0.20.0' + } + + expected = { + 'schema': schema, + 'data': [{'a': 1, 'b': 2}, {'a': 4, 'b': 5}] + } + + assert result == expected + + s = pd.Series([1, 2, 3], name='A') + + result = s.to_json(orient='split', index=False) + result = json.loads(result) + + expected = { + 'name': 'A', + 'data': [1, 2, 3] + } + + assert result == expected + + result = s.to_json(orient='table', index=False) + result = json.loads(result) + + fields = [{'name': 'A', 'type': 'integer'}] + + schema = { + 'fields': fields, + 'pandas_version': '0.20.0' + } + + expected = { + 'schema': schema, + 'data': [{'A': 1}, {'A': 2}, {'A': 3}] + } + + assert result == expected + + @pytest.mark.parametrize('orient', [ + ('records'), + ('index'), + ('columns'), + ('values'), + ]) + def test_index_false_error_to_json(self, orient): + # GH 17394 + # Testing error message from to_json with index=False + df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) + + with tm.assert_raises_regex(ValueError, "'index=False' is only " + "valid when 'orient' is " + "'split' or 'table'"): + df.to_json(orient=orient, index=False) From 90c1a323e902accc4cf0a683bd1e824d7725c576 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 2 Dec 2017 15:55:41 +0000 Subject: [PATCH 2/4] Use super() in dervived class and add comments to tests --- pandas/io/json/json.py | 43 +++++++---------------------- pandas/tests/io/json/test_pandas.py | 7 ++++- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index abfbd7d6ea962..5663ac3c81c1a 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -55,7 +55,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', obj, orient=orient, date_format=date_format, double_precision=double_precision, ensure_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - index=index).write() + index=index)._write() if lines: s = _convert_to_line_delimits(s) @@ -95,7 +95,7 @@ def __init__(self, obj, orient, date_format, double_precision, def _format_axes(self): raise AbstractMethodError(self) - def write(self): + def _write(self): return dumps( self.obj, orient=self.orient, @@ -115,18 +115,10 @@ def _format_axes(self): raise ValueError("Series index must be unique for orient=" "'{orient}'".format(orient=self.orient)) - def write(self): + def _write(self): if not self.index and self.orient == 'split': self.obj = {"name": self.obj.name, "data": self.obj.values} - return dumps( - self.obj, - orient=self.orient, - double_precision=self.double_precision, - ensure_ascii=self.ensure_ascii, - date_unit=self.date_unit, - iso_dates=self.date_format == 'iso', - default_handler=self.default_handler - ) + return super(SeriesWriter, self)._write() class FrameWriter(Writer): @@ -143,19 +135,11 @@ def _format_axes(self): raise ValueError("DataFrame columns must be unique for orient=" "'{orient}'.".format(orient=self.orient)) - def write(self): + def _write(self): if not self.index and self.orient == 'split': self.obj = self.obj.to_dict(orient='split') del self.obj["index"] - return dumps( - self.obj, - orient=self.orient, - double_precision=self.double_precision, - ensure_ascii=self.ensure_ascii, - date_unit=self.date_unit, - iso_dates=self.date_format == 'iso', - default_handler=self.default_handler - ) + return super(FrameWriter, self)._write() class JSONTableWriter(FrameWriter): @@ -205,21 +189,14 @@ def __init__(self, obj, orient, date_format, double_precision, self.obj = obj.reset_index() self.date_format = 'iso' self.orient = 'records' + self.index = index - def write(self): + def _write(self): if not self.index: self.obj = self.obj.drop('index', axis=1) - data = dumps( - self.obj, - orient=self.orient, - double_precision=self.double_precision, - ensure_ascii=self.ensure_ascii, - date_unit=self.date_unit, - iso_dates=self.date_format == 'iso', - default_handler=self.default_handler - ) + data = super(JSONTableWriter, self)._write() serialized = '{{"schema": {schema}, "data": {data}}}'.format( - schema=dumps(self.schema), data=data) + schema=dumps(self.schema), data=data) return serialized diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 95c5cdc401fef..f925327a7bc38 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -9,6 +9,7 @@ read_json, compat) from datetime import timedelta import pandas as pd +import json from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network, @@ -1151,7 +1152,8 @@ def test_data_frame_size_after_to_json(self): def test_index_false_to_json(self): # GH 17394 # Testing index parameter in to_json - import json + + # Testing DataFrame.to_json(orient='split', index=False) df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) result = df.to_json(orient='split', index=False) @@ -1164,6 +1166,7 @@ def test_index_false_to_json(self): assert result == expected + # Testing DataFrame.to_json(orient='table', index=False) result = df.to_json(orient='table', index=False) result = json.loads(result) @@ -1180,6 +1183,7 @@ def test_index_false_to_json(self): assert result == expected + # Testing Series.to_json(orient='split', index=False) s = pd.Series([1, 2, 3], name='A') result = s.to_json(orient='split', index=False) @@ -1192,6 +1196,7 @@ def test_index_false_to_json(self): assert result == expected + # Testing Series.to_json(orient='table', index=False) result = s.to_json(orient='table', index=False) result = json.loads(result) From 1a3e41ac29a7038fd41546cbbacc6166a80cf71d Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Mon, 4 Dec 2017 10:26:13 +0000 Subject: [PATCH 3/4] rewriting .write and ._write --- pandas/io/json/json.py | 61 +++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 5663ac3c81c1a..fe62d9566233d 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -55,7 +55,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', obj, orient=orient, date_format=date_format, double_precision=double_precision, ensure_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - index=index)._write() + index=index).write() if lines: s = _convert_to_line_delimits(s) @@ -95,15 +95,21 @@ def __init__(self, obj, orient, date_format, double_precision, def _format_axes(self): raise AbstractMethodError(self) - def _write(self): + def write(self): + return self._write(self.obj, self.orient, self.double_precision, + self.ensure_ascii, self.date_unit, + self.date_format == 'iso', self.default_handler) + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): return dumps( - self.obj, - orient=self.orient, - double_precision=self.double_precision, - ensure_ascii=self.ensure_ascii, - date_unit=self.date_unit, - iso_dates=self.date_format == 'iso', - default_handler=self.default_handler + obj, + orient=orient, + double_precision=double_precision, + ensure_ascii=ensure_ascii, + date_unit=date_unit, + iso_dates=iso_dates, + default_handler=default_handler ) @@ -115,10 +121,14 @@ def _format_axes(self): raise ValueError("Series index must be unique for orient=" "'{orient}'".format(orient=self.orient)) - def _write(self): - if not self.index and self.orient == 'split': - self.obj = {"name": self.obj.name, "data": self.obj.values} - return super(SeriesWriter, self)._write() + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + if not self.index and orient == 'split': + obj = {"name": obj.name, "data": obj.values} + return super(SeriesWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, default_handler) class FrameWriter(Writer): @@ -135,11 +145,15 @@ def _format_axes(self): raise ValueError("DataFrame columns must be unique for orient=" "'{orient}'.".format(orient=self.orient)) - def _write(self): - if not self.index and self.orient == 'split': - self.obj = self.obj.to_dict(orient='split') - del self.obj["index"] - return super(FrameWriter, self)._write() + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + if not self.index and orient == 'split': + obj = obj.to_dict(orient='split') + del obj["index"] + return super(FrameWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, default_handler) class JSONTableWriter(FrameWriter): @@ -191,10 +205,15 @@ def __init__(self, obj, orient, date_format, double_precision, self.orient = 'records' self.index = index - def _write(self): + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): if not self.index: - self.obj = self.obj.drop('index', axis=1) - data = super(JSONTableWriter, self)._write() + obj = obj.drop('index', axis=1) + data = super(JSONTableWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, + default_handler) serialized = '{{"schema": {schema}, "data": {data}}}'.format( schema=dumps(self.schema), data=data) return serialized From b204d093f20c056c0e12578152df0215e7bcc491 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Fri, 8 Dec 2017 13:05:08 +0000 Subject: [PATCH 4/4] parametrize tests and allow index parameter to handle MultiIndex and index name --- pandas/core/generic.py | 6 +- pandas/io/json/json.py | 8 ++- pandas/tests/io/json/test_pandas.py | 90 +++++++++++------------------ 3 files changed, 43 insertions(+), 61 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 93b4b6a10dea7..79ba18140c651 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1673,9 +1673,9 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.21.0 index : boolean, default True - Whether to include the index values in the JSON string. A - ValueError will be thrown if index is False when orient is not - 'split' or 'table'. + Whether to include the index values in the JSON string. Not + including the index (``index=False``) is only supported when + orient is 'split' or 'table'. .. versionadded:: 0.22.0 diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index fe62d9566233d..0e0aae0506809 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -200,15 +200,17 @@ def __init__(self, obj, orient, date_format, double_precision, if is_period_dtype(obj.index): obj.index = obj.index.to_timestamp() - self.obj = obj.reset_index() + # exclude index from obj if index=False + if not self.index: + self.obj = obj.reset_index(drop=True) + else: + self.obj = obj.reset_index(drop=False) self.date_format = 'iso' self.orient = 'records' self.index = index def _write(self, obj, orient, double_precision, ensure_ascii, date_unit, iso_dates, default_handler): - if not self.index: - obj = obj.drop('index', axis=1) data = super(JSONTableWriter, self)._write(obj, orient, double_precision, ensure_ascii, date_unit, diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index f925327a7bc38..7cf3d6cd7b612 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1149,80 +1149,60 @@ def test_data_frame_size_after_to_json(self): assert size_before == size_after - def test_index_false_to_json(self): + @pytest.mark.parametrize('data, expected', [ + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), + {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'), + {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'], + index=[['a', 'b'], ['c', 'd']]), + {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), + (Series([1, 2, 3], name='A'), + {'name': 'A', 'data': [1, 2, 3]}), + (Series([1, 2, 3], name='A').rename_axis('foo'), + {'name': 'A', 'data': [1, 2, 3]}), + (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]), + {'name': 'A', 'data': [1, 2]}), + ]) + def test_index_false_to_json_split(self, data, expected): # GH 17394 - # Testing index parameter in to_json - - # Testing DataFrame.to_json(orient='split', index=False) - df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) - - result = df.to_json(orient='split', index=False) - result = json.loads(result) - - expected = { - 'columns': ['a', 'b'], - 'data': [[1, 2], [4, 5]] - } - - assert result == expected + # Testing index=False in to_json with orient='split' - # Testing DataFrame.to_json(orient='table', index=False) - result = df.to_json(orient='table', index=False) + result = data.to_json(orient='split', index=False) result = json.loads(result) - schema = { - 'fields': [{'name': 'a', 'type': 'integer'}, - {'name': 'b', 'type': 'integer'}], - 'pandas_version': '0.20.0' - } - - expected = { - 'schema': schema, - 'data': [{'a': 1, 'b': 2}, {'a': 4, 'b': 5}] - } - assert result == expected - # Testing Series.to_json(orient='split', index=False) - s = pd.Series([1, 2, 3], name='A') - - result = s.to_json(orient='split', index=False) - result = json.loads(result) - - expected = { - 'name': 'A', - 'data': [1, 2, 3] - } - - assert result == expected + @pytest.mark.parametrize('data', [ + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')), + (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'], + index=[['a', 'b'], ['c', 'd']])), + (Series([1, 2, 3], name='A')), + (Series([1, 2, 3], name='A').rename_axis('foo')), + (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])), + ]) + def test_index_false_to_json_table(self, data): + # GH 17394 + # Testing index=False in to_json with orient='table' - # Testing Series.to_json(orient='table', index=False) - result = s.to_json(orient='table', index=False) + result = data.to_json(orient='table', index=False) result = json.loads(result) - fields = [{'name': 'A', 'type': 'integer'}] - - schema = { - 'fields': fields, - 'pandas_version': '0.20.0' - } - expected = { - 'schema': schema, - 'data': [{'A': 1}, {'A': 2}, {'A': 3}] + 'schema': pd.io.json.build_table_schema(data, index=False), + 'data': DataFrame(data).to_dict(orient='records') } assert result == expected @pytest.mark.parametrize('orient', [ - ('records'), - ('index'), - ('columns'), - ('values'), + 'records', 'index', 'columns', 'values' ]) def test_index_false_error_to_json(self, orient): # GH 17394 # Testing error message from to_json with index=False + df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) with tm.assert_raises_regex(ValueError, "'index=False' is only "