Skip to content

TST[string]: update expecteds for using_string_dtype to fix xfails #61727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -444,13 +440,12 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
)
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
assert all(isinstance(x, np.str_) for x in cat.categories)
# We can't pass all-strings because the constructor would cast
# those to StringDtype post-PDEP14
cat = Categorical(["1", "0", "1", 2], [np.str_("0"), np.str_("1"), 2])
assert all(isinstance(x, (np.str_, int)) for x in cat.categories)

def test_constructor_from_categorical_with_dtype(self):
dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
Expand Down
27 changes: 19 additions & 8 deletions pandas/tests/arrays/categorical/test_repr.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
Categorical,
Expand Down Expand Up @@ -77,17 +74,19 @@ def test_print_none_width(self):
with option_context("display.width", None):
assert exp == repr(a)

@pytest.mark.skipif(
using_string_dtype(),
reason="Change once infer_string is set to True by default",
)
def test_unicode_print(self):
def test_unicode_print(self, using_infer_string):
c = Categorical(["aaaaa", "bb", "cccc"] * 20)
expected = """\
['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc']
Length: 60
Categories (3, object): ['aaaaa', 'bb', 'cccc']"""

if using_infer_string:
expected = expected.replace(
"(3, object): ['aaaaa', 'bb', 'cccc']",
"(3, str): [aaaaa, bb, cccc]",
)

assert repr(c) == expected

c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
Expand All @@ -96,6 +95,12 @@ def test_unicode_print(self):
Length: 60
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501

if using_infer_string:
expected = expected.replace(
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
"(3, str): [ああああ, いいいいい, ううううううう]",
)

assert repr(c) == expected

# unicode option should not affect to Categorical, as it doesn't care
Expand All @@ -106,6 +111,12 @@ def test_unicode_print(self):
Length: 60
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501

if using_infer_string:
expected = expected.replace(
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
"(3, str): [ああああ, いいいいい, ううううううう]",
)

assert repr(c) == expected

def test_categorical_repr(self):
Expand Down
12 changes: 2 additions & 10 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -745,10 +743,7 @@ def test_astype_tz_object_conversion(self, tz):
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) GH#60639")
def test_astype_dt64_to_string(
self, frame_or_series, tz_naive_fixture, using_infer_string
):
def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
# GH#41409
tz = tz_naive_fixture

Expand All @@ -766,10 +761,7 @@ def test_astype_dt64_to_string(
item = result.iloc[0]
if frame_or_series is DataFrame:
item = item.iloc[0]
if using_infer_string:
assert item is np.nan
else:
assert item is pd.NA
assert item is pd.NA

# For non-NA values, we should match what we get for non-EA str
alt = obj.astype(str)
Expand Down
12 changes: 5 additions & 7 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -76,10 +74,7 @@ def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):


class TestGroupBy:
# TODO(infer_string) resample sum introduces 0's
# https://github.com/pandas-dev/pandas/issues/60229
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_groupby_with_timegrouper(self):
def test_groupby_with_timegrouper(self, using_infer_string):
# GH 4161
# TimeGrouper requires a sorted index
# also verifies that the resultant index has the correct name
Expand Down Expand Up @@ -116,8 +111,11 @@ def test_groupby_with_timegrouper(self):
{"Buyer": 0, "Quantity": 0},
index=exp_dti,
)
# Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"
# Cast to object/str to avoid implicit cast when setting
# entry to "CarlCarlCarl"
expected = expected.astype({"Buyer": object})
if using_infer_string:
expected = expected.astype({"Buyer": "str"})
expected.iloc[0, 0] = "CarlCarlCarl"
expected.iloc[6, 0] = "CarlCarl"
expected.iloc[18, 0] = "Joe"
Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/indexes/base_class/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype
import pandas._config.config as cf

from pandas import Index
Expand All @@ -16,7 +15,6 @@ def test_repr_is_valid_construction_code(self):
res = eval(repr(idx))
tm.assert_index_equal(res, idx)

@pytest.mark.xfail(using_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
Expand Down Expand Up @@ -77,11 +75,13 @@ def test_repr_is_valid_construction_code(self):
),
],
)
def test_string_index_repr(self, index, expected):
def test_string_index_repr(self, index, expected, using_infer_string):
result = repr(index)
if using_infer_string:
expected = expected.replace("dtype='object'", "dtype='str'")

assert result == expected

@pytest.mark.xfail(using_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
Expand Down Expand Up @@ -121,11 +121,16 @@ def test_string_index_repr(self, index, expected):
),
],
)
def test_string_index_repr_with_unicode_option(self, index, expected):
def test_string_index_repr_with_unicode_option(
self, index, expected, using_infer_string
):
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
result = repr(index)
assert result == expected

if using_infer_string:
expected = expected.replace("dtype='object'", "dtype='str'")
assert result == expected

def test_repr_summary(self):
with cf.option_context("display.max_seq_items", 10):
Expand Down
44 changes: 36 additions & 8 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -1395,8 +1393,7 @@ def test_unicode_name_in_footer(self):
sf = fmt.SeriesFormatter(s, name="\u05e2\u05d1\u05e8\u05d9\u05ea")
sf._get_footer() # should not raise exception

@pytest.mark.xfail(using_string_dtype(), reason="Fixup when arrow is default")
def test_east_asian_unicode_series(self):
def test_east_asian_unicode_series(self, using_infer_string):
# not aligned properly because of east asian width

# unicode index
Expand All @@ -1409,6 +1406,8 @@ def test_east_asian_unicode_series(self):
"ええええ D\ndtype: object",
]
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode values
Expand All @@ -1422,7 +1421,8 @@ def test_east_asian_unicode_series(self):
"dtype: object",
]
)

if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# both
Expand All @@ -1439,7 +1439,8 @@ def test_east_asian_unicode_series(self):
"dtype: object",
]
)

if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode footer
Expand All @@ -1452,6 +1453,8 @@ def test_east_asian_unicode_series(self):
"ああ あ\nいいいい いい\nう ううう\n"
"えええ ええええ\nName: おおおおおおお, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# MultiIndex
Expand Down Expand Up @@ -1495,6 +1498,8 @@ def test_east_asian_unicode_series(self):
"3 ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

s.index = ["ああ", "いいいい", "う", "えええ"]
Expand All @@ -1503,6 +1508,8 @@ def test_east_asian_unicode_series(self):
"えええ ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# Enable Unicode option -----------------------------------------
Expand All @@ -1516,6 +1523,8 @@ def test_east_asian_unicode_series(self):
"あ a\nいい bb\nううう CCC\n"
"ええええ D\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode values
Expand All @@ -1527,6 +1536,8 @@ def test_east_asian_unicode_series(self):
"a あ\nbb いい\nc ううう\n"
"ddd ええええ\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected
# both
s = Series(
Expand All @@ -1539,6 +1550,8 @@ def test_east_asian_unicode_series(self):
"う ううう\n"
"えええ ええええ\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode footer
Expand All @@ -1554,6 +1567,8 @@ def test_east_asian_unicode_series(self):
"えええ ええええ\n"
"Name: おおおおおおお, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# MultiIndex
Expand Down Expand Up @@ -1599,6 +1614,8 @@ def test_east_asian_unicode_series(self):
"3 ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

s.index = ["ああ", "いいいい", "う", "えええ"]
Expand All @@ -1608,6 +1625,8 @@ def test_east_asian_unicode_series(self):
"えええ ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# ambiguous unicode
Expand All @@ -1621,6 +1640,8 @@ def test_east_asian_unicode_series(self):
"¡¡ ううう\n"
"えええ ええええ\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

def test_float_trim_zeros(self):
Expand Down Expand Up @@ -1770,27 +1791,34 @@ def chck_ncols(self, s):
ncolsizes = len({len(line.strip()) for line in lines})
assert ncolsizes == 1

@pytest.mark.xfail(using_string_dtype(), reason="change when arrow is default")
def test_format_explicit(self):
def test_format_explicit(self, using_infer_string):
test_sers = gen_series_formatting()
with option_context("display.max_rows", 4, "display.show_dimensions", False):
res = repr(test_sers["onel"])
exp = "0 a\n1 a\n ..\n98 a\n99 a\ndtype: object"
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res
res = repr(test_sers["twol"])
exp = "0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype: object"
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res
res = repr(test_sers["asc"])
exp = (
"0 a\n1 ab\n ... \n4 abcde\n5 "
"abcdef\ndtype: object"
)
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res
res = repr(test_sers["desc"])
exp = (
"5 abcdef\n4 abcde\n ... \n1 ab\n0 "
"a\ndtype: object"
)
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res

def test_ncols(self):
Expand Down
Loading