-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
BUG: use EA.astype in ExtensionBlock.to_native_types #28841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
b50c581
890fe82
ded3d00
330f455
ef65a96
ba55396
1663b3d
a5ba129
61eaf92
f517e1a
10defeb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .array import ListArray, ListDtype, make_data | ||
|
||
__all__ = ["ListArray", "ListDtype", "make_data"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
"""Test extension array for storing nested data in a pandas container. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think newline after """ |
||
|
||
The ListArray stores an ndarray of lists. | ||
""" | ||
import numbers | ||
import random | ||
import string | ||
|
||
import numpy as np | ||
|
||
from pandas.core.dtypes.base import ExtensionDtype | ||
|
||
import pandas as pd | ||
from pandas.core.arrays import ExtensionArray | ||
|
||
|
||
class ListDtype(ExtensionDtype): | ||
type = list | ||
name = "list" | ||
na_value = np.nan | ||
|
||
@classmethod | ||
def construct_array_type(cls): | ||
"""Return the array type associated with this dtype | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. newline, period |
||
|
||
Returns | ||
------- | ||
type | ||
""" | ||
return ListArray | ||
|
||
@classmethod | ||
def construct_from_string(cls, string): | ||
if string == cls.name: | ||
return cls() | ||
else: | ||
raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string)) | ||
|
||
|
||
class ListArray(ExtensionArray): | ||
dtype = ListDtype() | ||
__array_priority__ = 1000 | ||
|
||
def __init__(self, values, dtype=None, copy=False): | ||
if not isinstance(values, np.ndarray): | ||
raise TypeError("Need to pass a numpy array as values") | ||
for val in values: | ||
if not isinstance(val, self.dtype.type) and not pd.isna(val): | ||
raise TypeError("All values must be of type " + str(self.dtype.type)) | ||
self.data = values | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
data = np.empty(len(scalars), dtype=object) | ||
data[:] = scalars | ||
return cls(data) | ||
|
||
def __getitem__(self, item): | ||
if isinstance(item, numbers.Integral): | ||
return self.data[item] | ||
else: | ||
# slice, list-like, mask | ||
return type(self)(self.data[item]) | ||
|
||
def __len__(self) -> int: | ||
return len(self.data) | ||
|
||
def isna(self): | ||
return np.array( | ||
[not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool | ||
) | ||
|
||
def take(self, indexer, allow_fill=False, fill_value=None): | ||
# re-implement here, since NumPy has trouble setting | ||
# sized objects like UserDicts into scalar slots of | ||
# an ndarary. | ||
indexer = np.asarray(indexer) | ||
msg = ( | ||
"Index is out of bounds or cannot do a " | ||
"non-empty take from an empty array." | ||
) | ||
|
||
if allow_fill: | ||
if fill_value is None: | ||
fill_value = self.dtype.na_value | ||
# bounds check | ||
if (indexer < -1).any(): | ||
raise ValueError | ||
try: | ||
output = [ | ||
self.data[loc] if loc != -1 else fill_value for loc in indexer | ||
] | ||
except IndexError: | ||
raise IndexError(msg) | ||
else: | ||
try: | ||
output = [self.data[loc] for loc in indexer] | ||
except IndexError: | ||
raise IndexError(msg) | ||
|
||
return self._from_sequence(output) | ||
|
||
def copy(self): | ||
return type(self)(self.data[:]) | ||
|
||
def astype(self, dtype, copy=True): | ||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: | ||
if copy: | ||
return self.copy() | ||
return self | ||
elif pd.api.types.is_string_dtype(dtype) and not pd.api.types.is_object_dtype( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could import at the top |
||
dtype | ||
): | ||
# numpy has problems with astype(str) for nested elements | ||
return np.array([str(x) for x in self.data], dtype=dtype) | ||
return np.array(self.data, dtype=dtype, copy=copy) | ||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
data = np.concatenate([x.data for x in to_concat]) | ||
return cls(data) | ||
|
||
|
||
def make_data(): | ||
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer | ||
data = np.empty(100, dtype=object) | ||
data[:] = [ | ||
[random.choice(string.ascii_letters) for _ in range(random.randint(0, 10))] | ||
for _ in range(100) | ||
] | ||
return data |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import pytest | ||
|
||
import pandas as pd | ||
|
||
from .array import ListArray, ListDtype, make_data | ||
|
||
|
||
@pytest.fixture | ||
def dtype(): | ||
return ListDtype() | ||
|
||
|
||
@pytest.fixture | ||
def data(): | ||
"""Length-100 ListArray for semantics test.""" | ||
data = make_data() | ||
|
||
while len(data[0]) == len(data[1]): | ||
data = make_data() | ||
|
||
return ListArray(data) | ||
|
||
|
||
def test_to_csv(data): | ||
# https://github.com/pandas-dev/pandas/issues/28840 | ||
# array with list-likes fail when doing astype(str) on the numpy array | ||
# which was done in to_native_types | ||
s = pd.Series(data) | ||
res = s.to_csv() | ||
assert str(data[0]) in res |
Uh oh!
There was an error while loading. Please reload this page.