From dd19a1d7a450aa5211de5a3e01cdc114500f60cc Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 11 Jun 2020 21:55:43 +0100 Subject: [PATCH 1/3] BUG: format pd.NA --- doc/source/whatsnew/v1.1.0.rst | 2 ++ pandas/_libs/missing.pyx | 6 ++++++ pandas/tests/scalar/test_na_scalar.py | 10 ++++++++++ 3 files changed, 18 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 92f7c0f6b59a3..9199739bbfbe9 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -908,6 +908,8 @@ Missing - Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`) - :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`) - Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`) +- :class:`NA` will now always work when passed into a format string. Previously a ``ValueError`` was raised if any format parameters were supplied to the format string. + For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``""`` (:issue:`xxxxx`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 6d4d1e95fe8c3..fdd06fe631b97 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -349,6 +349,12 @@ class NAType(C_NAType): def __repr__(self) -> str: return "" + def __format__(self, format_spec) -> str: + try: + return self.__repr__().__format__(format_spec) + except ValueError: + return self.__repr__() + def __bool__(self): raise TypeError("boolean value of NA is ambiguous") diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index a0e3f8984fbe4..44e620154f424 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -22,6 +22,16 @@ def test_repr(): assert str(NA) == "" +def test_format(): + assert format(NA) == "" + assert format(NA, ">10") == " " + assert format(NA, "xxx") == "" # accept arbitrary format strings + + assert "{}".format(NA) == "" + assert "{:>10}".format(NA) == " " + assert "{:xxx}".format(NA) == "" # accept arbitrary format strings + + def test_truthiness(): msg = "boolean value of NA is ambiguous" From 9c973762968bad4f6d24fd005a420a89c501aaa3 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 12 Jun 2020 23:12:01 +0100 Subject: [PATCH 2/3] changes --- doc/source/whatsnew/v1.1.0.rst | 3 +-- pandas/_libs/missing.pyx | 6 ++++++ pandas/tests/scalar/test_na_scalar.py | 12 ++++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9199739bbfbe9..2a112a9f141e1 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -908,8 +908,7 @@ Missing - Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`) - :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`) - Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`) -- :class:`NA` will now always work when passed into a format string. Previously a ``ValueError`` was raised if any format parameters were supplied to the format string. - For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``""`` (:issue:`xxxxx`) +- passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``""`` (:issue:`34740`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index fdd06fe631b97..d117a65409cb4 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -350,6 +350,12 @@ class NAType(C_NAType): return "" def __format__(self, format_spec) -> str: + # accept same format_spec as np.nan + try: + format(np.nan, format_spec) + except ValueError: + raise ValueError("Invalid format specifier") + # if a string format_spec use it, else just return the repr try: return self.__repr__().__format__(format_spec) except ValueError: diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index 44e620154f424..d11648527f600 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -23,13 +23,21 @@ def test_repr(): def test_format(): + # GH-34740 assert format(NA) == "" assert format(NA, ">10") == " " - assert format(NA, "xxx") == "" # accept arbitrary format strings assert "{}".format(NA) == "" assert "{:>10}".format(NA) == " " - assert "{:xxx}".format(NA) == "" # accept arbitrary format strings + + +def test_format_raises(): + # GH-34740 + with pytest.raises(ValueError, match="Invalid format specifier"): + format(NA, "xxx") + + with pytest.raises(ValueError, match="Invalid format specifier"): + "{:xxx}".format(NA) def test_truthiness(): From c3f236df177582ba8c9ea330aa03284be4a878ed Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 13 Jun 2020 13:42:36 +0100 Subject: [PATCH 3/3] make NA.format__ more permissive --- pandas/_libs/missing.pyx | 6 ------ pandas/tests/scalar/test_na_scalar.py | 11 ++--------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index d117a65409cb4..fdd06fe631b97 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -350,12 +350,6 @@ class NAType(C_NAType): return "" def __format__(self, format_spec) -> str: - # accept same format_spec as np.nan - try: - format(np.nan, format_spec) - except ValueError: - raise ValueError("Invalid format specifier") - # if a string format_spec use it, else just return the repr try: return self.__repr__().__format__(format_spec) except ValueError: diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index d11648527f600..dc5eb15348c1b 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -26,18 +26,11 @@ def test_format(): # GH-34740 assert format(NA) == "" assert format(NA, ">10") == " " + assert format(NA, "xxx") == "" # NA is flexible, accept any format spec assert "{}".format(NA) == "" assert "{:>10}".format(NA) == " " - - -def test_format_raises(): - # GH-34740 - with pytest.raises(ValueError, match="Invalid format specifier"): - format(NA, "xxx") - - with pytest.raises(ValueError, match="Invalid format specifier"): - "{:xxx}".format(NA) + assert "{:xxx}".format(NA) == "" def test_truthiness():