Skip to content

BUG: pd.read_json throwing error on bytes input #46935

@galipremsagar

Description

@galipremsagar

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

In [1]: buffer = (
   ...:         b'{"amount": 100, "name": "Alice"}\n'
   ...:         b'{"amount": 200, "name": "Bob"}\n'
   ...:         b'{"amount": 300, "name": "Charlie"}\n'
   ...:         b'{"amount": 400, "name": "Dennis"}\n'
   ...:     )

In [2]: import pandas as pd

In [3]: pd.read_json(buffer)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [3], in <cell line: 1>()
----> 1 pd.read_json(buffer)

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/util/_decorators.py:207, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
    205     else:
    206         kwargs[new_arg_name] = new_arg_value
--> 207 return func(*args, **kwargs)

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/io/json/_json.py:588, in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options)
    585 if convert_axes is None and orient != "table":
    586     convert_axes = True
--> 588 json_reader = JsonReader(
    589     path_or_buf,
    590     orient=orient,
    591     typ=typ,
    592     dtype=dtype,
    593     convert_axes=convert_axes,
    594     convert_dates=convert_dates,
    595     keep_default_dates=keep_default_dates,
    596     numpy=numpy,
    597     precise_float=precise_float,
    598     date_unit=date_unit,
    599     encoding=encoding,
    600     lines=lines,
    601     chunksize=chunksize,
    602     compression=compression,
    603     nrows=nrows,
    604     storage_options=storage_options,
    605     encoding_errors=encoding_errors,
    606 )
    608 if chunksize:
    609     return json_reader

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/io/json/_json.py:673, in JsonReader.__init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression, nrows, storage_options, encoding_errors)
    670     if not self.lines:
    671         raise ValueError("nrows can only be passed if lines=True")
--> 673 data = self._get_data_from_filepath(filepath_or_buffer)
    674 self.data = self._preprocess_data(data)

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/io/json/_json.py:710, in JsonReader._get_data_from_filepath(self, filepath_or_buffer)
    703 filepath_or_buffer = stringify_path(filepath_or_buffer)
    704 if (
    705     not isinstance(filepath_or_buffer, str)
    706     or is_url(filepath_or_buffer)
    707     or is_fsspec_url(filepath_or_buffer)
    708     or file_exists(filepath_or_buffer)
    709 ):
--> 710     self.handles = get_handle(
    711         filepath_or_buffer,
    712         "r",
    713         encoding=self.encoding,
    714         compression=self.compression,
    715         storage_options=self.storage_options,
    716         errors=self.encoding_errors,
    717     )
    718     filepath_or_buffer = self.handles.handle
    720 return filepath_or_buffer

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/io/common.py:826, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    821     is_wrapped = not (
    822         isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close
    823     )
    825 if "r" in ioargs.mode and not hasattr(handle, "read"):
--> 826     raise TypeError(
    827         "Expected file path name or file-like object, "
    828         f"got {type(ioargs.filepath_or_buffer)} type"
    829     )
    831 handles.reverse()  # close the most recently added buffer first
    832 if ioargs.should_close:

TypeError: Expected file path name or file-like object, got <class 'bytes'> type

Issue Description

When a bytes input is passed to pd.read_json it parsed the input and returned data frame until 1.3.x versions. But throwing an error in 1.4.2 version.

Expected Behavior

Same as previous versions? or if this is a breaking behavior not explicitly called out in the changelog?

Out[11]: 
   amount     name
0     100    Alice
1     200      Bob
2     300  Charlie
3     400   Dennis

Installed Versions

In [4]: pd.show_versions()

AssertionError Traceback (most recent call last)
Input In [4], in <cell line: 1>()
----> 1 pd.show_versions()

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/util/_print_versions.py:109, in show_versions(as_json)
94 """
95 Provide useful information, important for bug reports.
96
(...)
106 * If True, outputs info in JSON format to the console.
107 """
108 sys_info = _get_sys_info()
--> 109 deps = _get_dependency_info()
111 if as_json:
112 j = {"system": sys_info, "dependencies": deps}

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/util/_print_versions.py:88, in _get_dependency_info()
86 result: dict[str, JSONSerializable] = {}
87 for modname in deps:
---> 88 mod = import_optional_dependency(modname, errors="ignore")
89 result[modname] = get_version(mod) if mod else None
90 return result

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/pandas/compat/_optional.py:138, in import_optional_dependency(name, extra, errors, min_version)
133 msg = (
134 f"Missing optional dependency '{install_name}'. {extra} "
135 f"Use pip or conda to install {install_name}."
136 )
137 try:
--> 138 module = importlib.import_module(name)
139 except ImportError:
140 if errors == "raise":

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/importlib/init.py:127, in import_module(name, package)
125 break
126 level += 1
--> 127 return _bootstrap._gcd_import(name[level:], package, level)

File :1014, in _gcd_import(name, package, level)

File :991, in find_and_load(name, import)

File :975, in find_and_load_unlocked(name, import)

File :671, in _load_unlocked(spec)

File :843, in exec_module(self, module)

File :219, in _call_with_frames_removed(f, *args, **kwds)

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/setuptools/init.py:8, in
5 import re
6 import warnings
----> 8 import _distutils_hack.override # noqa: F401
10 import distutils.core
11 from distutils.errors import DistutilsOptionError

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/_distutils_hack/override.py:1, in
----> 1 import('_distutils_hack').do_override()

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/_distutils_hack/init.py:72, in do_override()
70 if enabled():
71 warn_distutils_present()
---> 72 ensure_local_distutils()

File /nvme/0/pgali/envs/cudfdev/lib/python3.8/site-packages/_distutils_hack/init.py:59, in ensure_local_distutils()
57 # check that submodules load as expected
58 core = importlib.import_module('distutils.core')
---> 59 assert '_distutils' in core.file, core.file
60 assert 'setuptools._distutils.log' not in sys.modules

AssertionError: /nvme/0/pgali/envs/cudfdev/lib/python3.8/distutils/core.py

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIO JSONread_json, to_json, json_normalizeRegressionFunctionality that used to work in a prior pandas version

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions