From c586522ad7aef5d24b7914d60046de32810231bd Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 24 May 2021 22:17:41 +0200 Subject: [PATCH 01/80] add future warning --- pandas/core/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6d7c803685255..ce54a12a49a33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3260,6 +3260,12 @@ def to_latex( \bottomrule \end{{tabular}} """ + warnings.warn( + "this method is deprecated in favour of `Styler.to_latex()`", + FutureWarning, + stacklevel=2, + ) + # Get defaults from the pandas config if self.ndim == 1: self = self.to_frame() From e03e01476a32a08f5e466572642abf5ab918a0e4 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 16 Aug 2021 19:30:12 +0200 Subject: [PATCH 02/80] build format_index mechanics --- pandas/io/formats/style.py | 44 ++------- pandas/io/formats/style_render.py | 103 +++++++++++++++++++- pandas/io/formats/templates/html_table.tpl | 4 +- pandas/tests/io/formats/style/test_style.py | 2 + 4 files changed, 112 insertions(+), 41 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index a72de753d6a8a..9481e2898c1e8 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -32,7 +32,6 @@ import pandas as pd from pandas import ( - Index, IndexSlice, RangeIndex, ) @@ -57,6 +56,7 @@ Tooltips, maybe_convert_css_to_tuples, non_reducing_slice, + refactor_levels, ) try: @@ -1074,6 +1074,8 @@ def _copy(self, deepcopy: bool = False) -> Styler: ] deep = [ # nested lists or dicts "_display_funcs", + "_display_funcs_index", + "_display_funcs_columns", "hidden_rows", "hidden_columns", "ctx", @@ -1262,7 +1264,7 @@ def _apply_index( f"`axis` must be one of 0, 1, 'index', 'columns', got {axis}" ) - levels_ = _refactor_levels(level, obj) + levels_ = refactor_levels(level, obj) data = DataFrame(obj.to_list()).loc[:, levels_] if method == "apply": @@ -2052,7 +2054,7 @@ def hide_index( raise ValueError("`subset` and `level` cannot be passed simultaneously") if subset is None: - levels_ = _refactor_levels(level, self.index) + levels_ = refactor_levels(level, self.index) self.hide_index_ = [ True if lev in levels_ else False for lev in range(self.index.nlevels) ] @@ -2164,7 +2166,7 @@ def hide_columns( raise ValueError("`subset` and `level` cannot be passed simultaneously") if subset is None: - levels_ = _refactor_levels(level, self.columns) + levels_ = refactor_levels(level, self.columns) self.hide_columns_ = [ True if lev in levels_ else False for lev in range(self.columns.nlevels) ] @@ -3358,37 +3360,3 @@ def css_calc(x, left: float, right: float, align: str): index=data.index, columns=data.columns, ) - - -def _refactor_levels( - level: Level | list[Level] | None, - obj: Index, -) -> list[Level]: - """ - Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. - - Parameters - ---------- - level : int, str, list - Original ``level`` arg supplied to above methods. - obj: - Either ``self.index`` or ``self.columns`` - - Returns - ------- - list : refactored arg with a list of levels to hide - """ - if level is None: - levels_: list[Level] = list(range(obj.nlevels)) - elif isinstance(level, int): - levels_ = [level] - elif isinstance(level, str): - levels_ = [obj._get_level_number(level)] - elif isinstance(level, list): - levels_ = [ - obj._get_level_number(lev) if not isinstance(lev, int) else lev - for lev in level - ] - else: - raise ValueError("`level` must be of type `int`, `str` or list of such") - return levels_ diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index aa58b3abbd06c..b381e52ab7b82 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -22,6 +22,7 @@ from pandas._config import get_option from pandas._libs import lib +from pandas._typing import Level from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.generic import ABCSeries @@ -108,7 +109,13 @@ def __init__( self._todo: list[tuple[Callable, tuple, dict]] = [] self.tooltips: Tooltips | None = None def_precision = get_option("display.precision") - self._display_funcs: DefaultDict[ # maps (row, col) -> formatting function + self._display_funcs: DefaultDict[ # maps (row, col) -> format func + tuple[int, int], Callable[[Any], str] + ] = defaultdict(lambda: partial(_default_formatter, precision=def_precision)) + self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func + tuple[int, int], Callable[[Any], str] + ] = defaultdict(lambda: partial(_default_formatter, precision=def_precision)) + self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: partial(_default_formatter, precision=def_precision)) @@ -346,6 +353,7 @@ def _translate_header( f"{col_heading_class} level{r} col{c}", value, _is_visible(c, r, col_lengths), + display_value=self._display_funcs_columns[(r, c)](value), attributes=( f'colspan="{col_lengths.get((r, c), 0)}"' if col_lengths.get((r, c), 0) > 1 @@ -502,6 +510,7 @@ def _translate_body( f"{row_heading_class} level{c} row{r}", value, (_is_visible(r, c, idx_lengths) and not self.hide_index_[c]), + display_value=self._display_funcs_index[(r, c)](value), attributes=( f'rowspan="{idx_lengths.get((c, r), 0)}"' if idx_lengths.get((c, r), 0) > 1 @@ -791,6 +800,64 @@ def format( return self + def format_index( + self, + formatter: ExtFormatter | None = None, + axis: int | str = 0, + level: Level | list[Level] | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + ) -> StylerRenderer: + r""" """ + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + elif axis == 1: + display_funcs_, obj = self._display_funcs_columns, self.columns + + levels_ = refactor_levels(level, obj) + + if all( + ( + formatter is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + ) + ): + display_funcs_.clear() + return self # clear the formatter / revert to default and avoid looping + + if not isinstance(formatter, dict): + formatter = {level: formatter for level in levels_} + else: + formatter = { + obj._get_level_number(level): formatter_ + for level, formatter_ in formatter.items() + } + + for level in set(formatter.keys()).union(levels_): + format_func = _maybe_wrap_formatter( + formatter.get(level), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + ) + + for i in range(len(obj)): + if axis == 0: + display_funcs_[(i, level)] = format_func + else: + display_funcs_[(level, i)] = format_func + + return self + def _element( html_element: str, @@ -1113,6 +1180,40 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: return style +def refactor_levels( + level: Level | list[Level] | None, + obj: Index, +) -> list[Level]: + """ + Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. + + Parameters + ---------- + level : int, str, list + Original ``level`` arg supplied to above methods. + obj: + Either ``self.index`` or ``self.columns`` + + Returns + ------- + list : refactored arg with a list of levels to hide + """ + if level is None: + levels_: list[Level] = list(range(obj.nlevels)) + elif isinstance(level, int): + levels_ = [level] + elif isinstance(level, str): + levels_ = [obj._get_level_number(level)] + elif isinstance(level, list): + levels_ = [ + obj._get_level_number(lev) if not isinstance(lev, int) else lev + for lev in level + ] + else: + raise ValueError("`level` must be of type `int`, `str` or list of such") + return levels_ + + class Tooltips: """ An extension to ``Styler`` that allows for and manipulates tooltips on hover diff --git a/pandas/io/formats/templates/html_table.tpl b/pandas/io/formats/templates/html_table.tpl index 3e3a40b9fdaa6..8cf3ed00fc991 100644 --- a/pandas/io/formats/templates/html_table.tpl +++ b/pandas/io/formats/templates/html_table.tpl @@ -21,13 +21,13 @@ {% if exclude_styles %} {% for c in r %} {% if c.is_visible != False %} - <{{c.type}} {{c.attributes}}>{{c.value}} + <{{c.type}} {{c.attributes}}>{{c.display_value}} {% endif %} {% endfor %} {% else %} {% for c in r %} {% if c.is_visible != False %} - <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.value}} + <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}} {% endif %} {% endfor %} {% endif %} diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 6cc4b889d369a..0276afe4fbc3e 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -51,6 +51,8 @@ def mi_styler_comp(mi_styler): mi_styler.hide_index([("i0", "i1_a")]) mi_styler.set_table_attributes('class="box"') mi_styler.format(na_rep="MISSING", precision=3) + mi_styler.format_index(precision=2, axis=0) + mi_styler.format_index(precision=4, axis=1) mi_styler.highlight_max(axis=None) mi_styler.applymap_index(lambda x: "color: white;", axis=0) mi_styler.applymap_index(lambda x: "color: black;", axis=1) From fb862791288c69e833ce0ba4465dd511c7ee9764 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 17 Aug 2021 18:02:19 +0200 Subject: [PATCH 03/80] test index formatter display_value, and clearing --- pandas/tests/io/formats/style/test_format.py | 42 +++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 299643028c141..eda0e2eeecd34 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -34,6 +34,28 @@ def test_display_format(styler): assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_display_format_index(styler, index, columns): + exp_index = ["x", "y"] + if index: + styler.format_index(lambda v: v.upper(), axis=0) + exp_index = ["X", "Y"] + + exp_columns = ["A", "B"] + if columns: + styler.format_index(lambda v: v.lower(), axis=1) + exp_columns = ["a", "b"] + + ctx = styler._translate(True, True) + + for r, row in enumerate(ctx["body"]): + assert row[0]["display_value"] == exp_index[r] + + for c, col in enumerate(ctx["head"][1:]): + assert col["display_value"] == exp_columns[c] + + def test_format_dict(styler): ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True) assert ctx["body"][0][1]["display_value"] == "0.0" @@ -90,12 +112,20 @@ def test_format_non_numeric_na(): assert ctx["body"][1][2]["display_value"] == "-" -def test_format_clear(styler): - assert (0, 0) not in styler._display_funcs # using default - styler.format("{:.2f") - assert (0, 0) in styler._display_funcs # formatter is specified - styler.format() - assert (0, 0) not in styler._display_funcs # formatter cleared to default +@pytest.mark.parametrize( + "func, attr, kwargs", + [ + ("format", "_display_funcs", {}), + ("format_index", "_display_funcs_index", {"axis": 0}), + ("format_index", "_display_funcs_columns", {"axis": 1}), + ], +) +def test_format_clear(styler, func, attr, kwargs): + assert (0, 0) not in getattr(styler, attr) # using default + getattr(styler, func)("{:.2f}", **kwargs) + assert (0, 0) in getattr(styler, attr) # formatter is specified + getattr(styler, func)(**kwargs) + assert (0, 0) not in getattr(styler, attr) # formatter cleared to default @pytest.mark.parametrize( From 846e5a21e885f6bfb5a304a461d475338d0f3a35 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 17 Aug 2021 19:16:52 +0200 Subject: [PATCH 04/80] prelim doc string --- pandas/io/formats/style_render.py | 130 +++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index a1d3dd8d30848..79bb6f8853fcb 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -811,7 +811,135 @@ def format_index( thousands: str | None = None, escape: str | None = None, ) -> StylerRenderer: - r""" """ + r""" + Format the text display value of index labels or column headers. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + axis : int, str + Whether to apply the formatter to the index or column headers. + level : int, str, list + The level(s) over which to apply the generic formatter. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + decimal : str, default "." + Character used as decimal separator for floats, complex and integers + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + + Returns + ------- + self : Styler + + Notes + ----- + This method assigns a formatting function, ``formatter``, to each level label + in the DataFrame's index or column headers. If ``formatter`` is ``None``, + then the default formatter is used. + If a callable then that function should take a label value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to MultiIndex level numbers or names, and values should + be string or callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``level`` argument defines which levels of a MultiIndex to apply the + method to. If the ``formatter`` argument is given in dict form but does + not include all levels within the level argument then these unspecified levels + will have the default formatter applied. Any levels in the formatter dict + specifically excluded from the level argument will raise a ``KeyError``. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]]) + >>> df.style.format(na_rep='MISS', precision=3) # doctest: +SKIP + 0 1 2 + 0 MISS 1.000 A + 1 2.000 MISS 3.000 + + Using a ``formatter`` specification on consistent column dtypes + + >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) # doctest: +SKIP + 0 1 2 + 0 MISS 1.00 A + 1 2.00 MISS 3.000000 + + Using the default ``formatter`` for unspecified columns + + >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1) + ... # doctest: +SKIP + 0 1 2 + 0 MISS £ 1.0 A + 1 2.00 MISS 3.0 + + Multiple ``na_rep`` or ``precision`` specifications under the default + ``formatter``. + + >>> df.style.format(na_rep='MISS', precision=1, subset=[0]) + ... .format(na_rep='PASS', precision=2, subset=[1, 2]) # doctest: +SKIP + 0 1 2 + 0 MISS 1.00 A + 1 2.0 PASS 3.00 + + Using a callable ``formatter`` function. + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') + ... # doctest: +SKIP + 0 1 2 + 0 MISS 1.0000 STRING + 1 2.0 MISS FLOAT + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([['
', '"A&B"', None]]) + >>> s = df.style.format( + ... '{0}', escape="html", na_rep="NA" + ... ) + >>> s.to_html() # doctest: +SKIP + ... + <div></div> + "A&B" + NA + ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) + >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + {} & {0} \\ + 0 & \textbf{123} \\ + 1 & \textbf{\textasciitilde \space \textasciicircum } \\ + 2 & \textbf{\$\%\#} \\ + \end{tabular} + """ if axis == 0: display_funcs_, obj = self._display_funcs_index, self.index elif axis == 1: From 7e9400ad594cdcac895c0f0ad18b8aa62a737d1f Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 18 Aug 2021 18:14:55 +0200 Subject: [PATCH 05/80] format_index docs --- pandas/io/formats/style_render.py | 82 +++++++++++++------------------ 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 79bb6f8853fcb..6f52b5801b614 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -867,7 +867,7 @@ def format_index( method to. If the ``formatter`` argument is given in dict form but does not include all levels within the level argument then these unspecified levels will have the default formatter applied. Any levels in the formatter dict - specifically excluded from the level argument will raise a ``KeyError``. + specifically excluded from the level argument will be ignored. When using a ``formatter`` string the dtypes must be compatible, otherwise a `ValueError` will be raised. @@ -876,80 +876,66 @@ def format_index( -------- Using ``na_rep`` and ``precision`` with the default ``formatter`` - >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]]) - >>> df.style.format(na_rep='MISS', precision=3) # doctest: +SKIP - 0 1 2 - 0 MISS 1.000 A - 1 2.000 MISS 3.000 + >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]]) + >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP + 2.000 MISS 4.000 + 0 1 2 3 - Using a ``formatter`` specification on consistent column dtypes + Using a ``formatter`` specification on consistent dtypes in a level - >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) # doctest: +SKIP - 0 1 2 - 0 MISS 1.00 A - 1 2.00 MISS 3.000000 + >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP + 2.00 MISS 4.00 + 0 1 2 3 - Using the default ``formatter`` for unspecified columns + Using the default ``formatter`` for unspecified levels - >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1) + >>> df = pd.DataFrame([[1, 2, 3]], + ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]])) + >>> df.style.format_index({0: lambda v: upper(v)}, axis=1, precision=1) ... # doctest: +SKIP - 0 1 2 - 0 MISS £ 1.0 A - 1 2.00 MISS 3.0 - - Multiple ``na_rep`` or ``precision`` specifications under the default - ``formatter``. - - >>> df.style.format(na_rep='MISS', precision=1, subset=[0]) - ... .format(na_rep='PASS', precision=2, subset=[1, 2]) # doctest: +SKIP - 0 1 2 - 0 MISS 1.00 A - 1 2.0 PASS 3.00 + A B + 2.0 nan 4.0 + 0 1 2 3 Using a callable ``formatter`` function. >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' - >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') + >>> df.style.format_index(func, axis=1, na_rep='MISS') ... # doctest: +SKIP - 0 1 2 - 0 MISS 1.0000 STRING - 1 2.0 MISS FLOAT + STRING STRING + FLOAT MISS FLOAT + 0 1 2 3 Using a ``formatter`` with HTML ``escape`` and ``na_rep``. - >>> df = pd.DataFrame([['
', '"A&B"', None]]) - >>> s = df.style.format( - ... '{0}', escape="html", na_rep="NA" - ... ) - >>> s.to_html() # doctest: +SKIP - ... - <div></div> - "A&B" - NA + >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None]) + >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA") + $ "A" + $ A&B + NA ... Using a ``formatter`` with LaTeX ``escape``. - >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) - >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() + >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"]) + >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex() ... # doctest: +SKIP - \begin{tabular}{ll} - {} & {0} \\ - 0 & \textbf{123} \\ - 1 & \textbf{\textasciitilde \space \textasciicircum } \\ - 2 & \textbf{\$\%\#} \\ + \begin{tabular}{lrrr} + {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\ + 0 & 1 & 2 & 3 \\ \end{tabular} """ + axis = self.data._get_axis_number(axis) if axis == 0: display_funcs_, obj = self._display_funcs_index, self.index - elif axis == 1: + else: display_funcs_, obj = self._display_funcs_columns, self.columns - levels_ = refactor_levels(level, obj) if all( ( formatter is None, + level is None, precision is None, decimal == ".", thousands is None, @@ -968,7 +954,7 @@ def format_index( for level, formatter_ in formatter.items() } - for level in set(formatter.keys()).union(levels_): + for level in levels_: format_func = _maybe_wrap_formatter( formatter.get(level), na_rep=na_rep, From 87a6c88b2feccdb67b3f18b553ba713a62e43283 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 19 Aug 2021 21:15:03 +0200 Subject: [PATCH 06/80] refactor for perf --- pandas/io/formats/style_render.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 62dfcfd5662b9..5873903ec302f 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -954,9 +954,9 @@ def format_index( for level, formatter_ in formatter.items() } - for level in levels_: + for lvl in levels_: format_func = _maybe_wrap_formatter( - formatter.get(level), + formatter.get(lvl), na_rep=na_rep, precision=precision, decimal=decimal, @@ -964,11 +964,8 @@ def format_index( escape=escape, ) - for i in range(len(obj)): - if axis == 0: - display_funcs_[(i, level)] = format_func - else: - display_funcs_[(level, i)] = format_func + for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: + display_funcs_[idx] = format_func return self From 9c969adeacd2f55a404bc18605f589b519ab415d Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 20 Aug 2021 07:25:37 +0200 Subject: [PATCH 07/80] add test --- pandas/tests/io/formats/style/test_format.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index eda0e2eeecd34..590b8ec700661 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -62,6 +62,12 @@ def test_format_dict(styler): assert ctx["body"][0][2]["display_value"] == "-60.90%" +def test_format_index_dict(styler): + ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True) + for i, val in enumerate(["X", "Y"]): + assert ctx["body"][i][0]["display_value"] == val + + def test_format_string(styler): ctx = styler.format("{:.2f}")._translate(True, True) assert ctx["body"][0][1]["display_value"] == "0.00" From 26f390647ce79d3ebfefddaeb91001a181e4fd4c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 21 Aug 2021 11:03:15 +0200 Subject: [PATCH 08/80] add tests: escape --- pandas/tests/io/formats/style/test_format.py | 22 +++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 590b8ec700661..13d1fb2c72cb8 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, DataFrame, IndexSlice, NaT, @@ -39,13 +40,13 @@ def test_display_format(styler): def test_display_format_index(styler, index, columns): exp_index = ["x", "y"] if index: - styler.format_index(lambda v: v.upper(), axis=0) + styler.format_index(lambda v: v.upper(), axis=0) # test callable exp_index = ["X", "Y"] exp_columns = ["A", "B"] if columns: - styler.format_index(lambda v: v.lower(), axis=1) - exp_columns = ["a", "b"] + styler.format_index("*{}*", axis=1) # test string + exp_columns = ["*A*", "*B*"] ctx = styler._translate(True, True) @@ -103,6 +104,14 @@ def test_format_with_na_rep(): assert ctx["body"][1][2]["display_value"] == "120.00%" +def test_format_index_with_na_rep(): + df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA]) + ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "A" + for i in [2, 3, 4, 5]: + assert ctx["head"][0][i]["display_value"] == "--" + + def test_format_non_numeric_na(): # GH 21527 28358 df = DataFrame( @@ -159,6 +168,13 @@ def test_format_escape_html(escape, exp): expected = f'&{exp}&' assert expected in s.to_html() + # also test format_index() + styler = Styler(DataFrame(columns=[chars]), uuid_len=0) + styler.format_index("&{0}&", escape=None, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&" + styler.format_index("&{0}&", escape=escape, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" + def test_format_escape_na_rep(): # tests the na_rep is not escaped From ec404189cf08f2219a5462bb927d5a8bc0c196da Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 21 Aug 2021 11:09:35 +0200 Subject: [PATCH 09/80] add tests: escape na_rep --- pandas/tests/io/formats/style/test_format.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 13d1fb2c72cb8..f5ae822b475c4 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -185,6 +185,14 @@ def test_format_escape_na_rep(): assert ex in s.to_html() assert expected2 in s.to_html() + # also test for format_index() + df = DataFrame(columns=['<>&"', None]) + styler = Styler(df, uuid_len=0) + styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1) + ctx = styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "X&<>&">X" + assert ctx["head"][0][2]["display_value"] == "&" + def test_format_escape_floats(styler): # test given formatter for number format is not impacted by escape From 0de5397741c26ff72448bd74a902c31170b91b35 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 21 Aug 2021 11:16:56 +0200 Subject: [PATCH 10/80] add tests: raises --- pandas/tests/io/formats/style/test_format.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index f5ae822b475c4..ba6c364482ad1 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -206,9 +206,10 @@ def test_format_escape_floats(styler): @pytest.mark.parametrize("formatter", [5, True, [2.0]]) -def test_format_raises(styler, formatter): +@pytest.mark.parametrize("func", ["format", "format_index"]) +def test_format_raises(styler, formatter, func): with pytest.raises(TypeError, match="expected str or callable"): - styler.format(formatter) + getattr(styler, func)(formatter) def test_format_with_precision(): From 6fe8285993a48e6cda1032e80630a17af3f4b4d5 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 21 Aug 2021 13:00:15 +0200 Subject: [PATCH 11/80] test decimal and thousands --- pandas/tests/io/formats/style/test_format.py | 36 +++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index ba6c364482ad1..ee13322f1253a 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -271,41 +271,43 @@ def test_format_subset(): @pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) @pytest.mark.parametrize("decimal", [".", "*"]) @pytest.mark.parametrize("precision", [None, 2]) -def test_format_thousands(formatter, decimal, precision): - s = DataFrame([[1000000.123456789]]).style # test float - result = s.format( +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_thousands(formatter, decimal, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float thousands="_", formatter=formatter, decimal=decimal, precision=precision )._translate(True, True) - assert "1_000_000" in result["body"][0][1]["display_value"] + assert "1_000_000" in result["body"][0][col]["display_value"] - s = DataFrame([[1000000]]).style # test int - result = s.format( + styler = DataFrame([[1000000]], index=[1000000]).style + result = getattr(styler, func)( # testing int thousands="_", formatter=formatter, decimal=decimal, precision=precision )._translate(True, True) - assert "1_000_000" in result["body"][0][1]["display_value"] + assert "1_000_000" in result["body"][0][col]["display_value"] - s = DataFrame([[1 + 1000000.123456789j]]).style # test complex - result = s.format( + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex thousands="_", formatter=formatter, decimal=decimal, precision=precision )._translate(True, True) - assert "1_000_000" in result["body"][0][1]["display_value"] + assert "1_000_000" in result["body"][0][col]["display_value"] @pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) @pytest.mark.parametrize("thousands", [None, ",", "*"]) @pytest.mark.parametrize("precision", [None, 4]) -def test_format_decimal(formatter, thousands, precision): - s = DataFrame([[1000000.123456789]]).style # test float - result = s.format( +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_decimal(formatter, thousands, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate(True, True) - assert "000_123" in result["body"][0][1]["display_value"] + assert "000_123" in result["body"][0][col]["display_value"] - s = DataFrame([[1 + 1000000.123456789j]]).style # test complex - result = s.format( + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate(True, True) - assert "000_123" in result["body"][0][1]["display_value"] + assert "000_123" in result["body"][0][col]["display_value"] def test_str_escape_error(): From 6b61e31dab66b6cbd6003cce8638bc6bf13221a2 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 21 Aug 2021 17:15:50 +0200 Subject: [PATCH 12/80] test precision --- pandas/tests/io/formats/style/test_format.py | 44 ++++++++++---------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index ee13322f1253a..bcfbd0e12051e 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -212,28 +212,30 @@ def test_format_raises(styler, formatter, func): getattr(styler, func)(formatter) -def test_format_with_precision(): +@pytest.mark.parametrize( + "precision, expected", + [ + (1, ["1.0", "2.0", "3.2", "4.6"]), + (2, ["1.00", "2.01", "3.21", "4.57"]), + (3, ["1.000", "2.009", "3.212", "4.566"]), + ], +) +def test_format_with_precision(precision, expected): # Issue #13257 - df = DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"]) - s = Styler(df) - - ctx = s.format(precision=1)._translate(True, True) - assert ctx["body"][0][1]["display_value"] == "1.0" - assert ctx["body"][0][2]["display_value"] == "2.0" - assert ctx["body"][1][1]["display_value"] == "3.2" - assert ctx["body"][1][2]["display_value"] == "4.6" - - ctx = s.format(precision=2)._translate(True, True) - assert ctx["body"][0][1]["display_value"] == "1.00" - assert ctx["body"][0][2]["display_value"] == "2.01" - assert ctx["body"][1][1]["display_value"] == "3.21" - assert ctx["body"][1][2]["display_value"] == "4.57" - - ctx = s.format(precision=3)._translate(True, True) - assert ctx["body"][0][1]["display_value"] == "1.000" - assert ctx["body"][0][2]["display_value"] == "2.009" - assert ctx["body"][1][1]["display_value"] == "3.212" - assert ctx["body"][1][2]["display_value"] == "4.566" + df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566]) + styler = Styler(df) + styler.format(precision=precision) + styler.format_index(precision=precision, axis=1) + + ctx = styler._translate(True, True) + for col, exp in enumerate(expected): + assert ctx["body"][0][col + 1]["display_value"] == exp # format test + assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test + + +def test_format_index_level(): + # TODO + pass def test_format_subset(): From 666e460e3f2c8fa4e22b4a926af82f8e694d17e1 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 30 Aug 2021 18:53:41 +0200 Subject: [PATCH 13/80] whats new --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index be647e344f270..37822fc8d3dcd 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -70,7 +70,7 @@ Styler :class:`.Styler` has been further developed in 1.4.0. The following enhancements have been made: - - Styling of indexing has been added, with :meth:`.Styler.apply_index` and :meth:`.Styler.applymap_index`. These mirror the signature of the methods already used to style data values, and work with both HTML and LaTeX format (:issue:`41893`). + - Styling and formatting of indexes has been added, with :meth:`.Styler.apply_index`, :meth:`.Styler.applymap_index` and :meth:`.Styler.format_index`. These mirror the signature of the methods already used to style and format data values, and work with both HTML and LaTeX format (:issue:`41893`, :issue:`43101`). - :meth:`.Styler.bar` introduces additional arguments to control alignment and display (:issue:`26070`, :issue:`36419`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`). - :meth:`.Styler.to_latex` introduces keyword argument ``environment``, which also allows a specific "longtable" entry through a separate jinja2 template (:issue:`41866`). - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index`` and ``sparse_columns`` (:issue:`41946`) From 49bb7316346486b64c5f1893525fa1bc404547a1 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 30 Aug 2021 19:22:19 +0200 Subject: [PATCH 14/80] level tests --- pandas/tests/io/formats/style/test_format.py | 37 ++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index bcfbd0e12051e..ddb9e8dc9d7f3 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -5,6 +5,7 @@ NA, DataFrame, IndexSlice, + MultiIndex, NaT, Timestamp, ) @@ -233,9 +234,39 @@ def test_format_with_precision(precision, expected): assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test -def test_format_index_level(): - # TODO - pass +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize( + "level, expected", + [ + (0, ["X", "X", "_", "_"]), # level int + ("zero", ["X", "X", "_", "_"]), # level name + (1, ["_", "_", "X", "X"]), # other level int + ("one", ["_", "_", "X", "X"]), # other level name + ([0, 1], ["X", "X", "X", "X"]), # both levels + ([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous + ([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name + (["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed + ], +) +def test_format_index_level(axis, level, expected): + midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"]) + df = DataFrame([[1, 2], [3, 4]]) + if axis == 0: + df.index = midx + else: + df.columns = midx + + styler = df.style.format_index(lambda v: "X", level=level, axis=axis) + ctx = styler._translate(True, True) + + if axis == 0: # compare index + result = [ctx["body"][s][0]["display_value"] for s in range(2)] + result += [ctx["body"][s][1]["display_value"] for s in range(2)] + else: # compare columns + result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)] + result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)] + + assert expected == result def test_format_subset(): From 044cd052093723b4a9a9d22c4f52806d7f8983a3 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 30 Aug 2021 20:46:42 +0200 Subject: [PATCH 15/80] user guide --- doc/source/user_guide/style.ipynb | 54 +++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 4de54c5d9471c..75d86e67466db 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -150,15 +150,14 @@ "\n", "### Formatting Values\n", "\n", - "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value. To control the display value, the text is printed in each cell, and we can use the [.format()][formatfunc] method to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table or for individual columns. \n", + "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value, in both datavlaues and index or columns headers. To control the display value, the text is printed in each cell as string, and we can use the [.format()][formatfunc] and [.format_index()][formatfuncindex] methods to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table, or index, or for individual columns, or MultiIndex levels. \n", "\n", - "Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n", - "\n", - "Here is an example of using the multiple options to control the formatting generally and with specific column formatters.\n", + "Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):` \n", "\n", "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n", "[format]: https://docs.python.org/3/library/string.html#format-specification-mini-language\n", - "[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst" + "[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst\n", + "[formatfuncindex]: ../reference/api/pandas.io.formats.style.Styler.format_index.rst" ] }, { @@ -173,6 +172,49 @@ " })" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using Styler to manipulate the display is a useful feature because maintaining the indexing and datavalues for other purposes gives greater control. You do not have to overwrite your DataFrame to display it how you like. Here is an example of using the formatting functions whilst still relying on the underlying data for indexing and calculations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n", + " index=pd.date_range(start=\"2021-01-01\", periods=10),\n", + " columns=[\"Tokyo\", \"Beijing\"])\n", + "\n", + "def rain_condition(v): \n", + " if v < 1.5:\n", + " return \"dry\"\n", + " elif v < 2.75:\n", + " return \"wet\"\n", + " return \"very wet\"\n", + "\n", + "def make_pretty(styler):\n", + " styler.set_caption(\"Rainfall Level\")\n", + " styler.format(rain_condition)\n", + " styler.format_index(lambda v: v.strftime(\"%A\"))\n", + " styler.background_gradient(axis=None, vmin=1, vmax=5)\n", + " return styler\n", + "\n", + "weather_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "weather_df.loc[\"2021-01-04\":\"2021-01-08\"].style.pipe(make_pretty)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -187,7 +229,7 @@ "\n", "Hiding does not change the integer arrangement of CSS classes, e.g. hiding the first two columns of a DataFrame means the column class indexing will start at `col2`, since `col0` and `col1` are simply ignored.\n", "\n", - "We can update our `Styler` object to hide some data and format the values.\n", + "We can update our `Styler` object from before to hide some data and format the values.\n", "\n", "[hideidx]: ../reference/api/pandas.io.formats.style.Styler.hide_index.rst\n", "[hidecols]: ../reference/api/pandas.io.formats.style.Styler.hide_columns.rst" From 8fc497da77a10416d65b8ef868d2906ab56bc64e Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 31 Aug 2021 12:52:18 +0200 Subject: [PATCH 16/80] typing fix --- pandas/io/formats/style_render.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 5873903ec302f..92dbaf9c47ab2 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1294,7 +1294,7 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: def refactor_levels( level: Level | list[Level] | None, obj: Index, -) -> list[Level]: +) -> list[int]: """ Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. @@ -1310,7 +1310,7 @@ def refactor_levels( list : refactored arg with a list of levels to hide """ if level is None: - levels_: list[Level] = list(range(obj.nlevels)) + levels_: list[int] = list(range(obj.nlevels)) elif isinstance(level, int): levels_ = [level] elif isinstance(level, str): From 8fb9519bcc97bdd77a7be97120577381534de6fb Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 31 Aug 2021 13:04:03 +0200 Subject: [PATCH 17/80] user guide refactor --- doc/source/user_guide/style.ipynb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 75d86e67466db..7b7df83274620 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -190,17 +190,17 @@ " columns=[\"Tokyo\", \"Beijing\"])\n", "\n", "def rain_condition(v): \n", - " if v < 1.5:\n", - " return \"dry\"\n", + " if v < 1.75:\n", + " return \"Dry\"\n", " elif v < 2.75:\n", - " return \"wet\"\n", - " return \"very wet\"\n", + " return \"Rain\"\n", + " return \"Heavy Rain\"\n", "\n", "def make_pretty(styler):\n", - " styler.set_caption(\"Rainfall Level\")\n", + " styler.set_caption(\"Weather Conditions\")\n", " styler.format(rain_condition)\n", " styler.format_index(lambda v: v.strftime(\"%A\"))\n", - " styler.background_gradient(axis=None, vmin=1, vmax=5)\n", + " styler.background_gradient(axis=None, vmin=1, vmax=5, cmap=\"YlGnBu\")\n", " return styler\n", "\n", "weather_df" @@ -2016,7 +2016,6 @@ } ], "metadata": { - "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python 3", "language": "python", From df7548c4c2039b1563cc703680e06e1baa27ce59 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 1 Sep 2021 08:53:34 +0200 Subject: [PATCH 18/80] input to axis --- pandas/io/formats/style_render.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index f1b0d6922cd5a..f252d4cb0a130 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -833,7 +833,7 @@ def format_index( ---------- formatter : str, callable, dict or None Object to define how values are displayed. See notes. - axis : int, str + axis : {0, "index", 1, "columns"} Whether to apply the formatter to the index or column headers. level : int, str, list The level(s) over which to apply the generic formatter. From 5091d49b7dc74c02e6f286fd29ed38d60c69ae5b Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 1 Sep 2021 10:41:09 +0200 Subject: [PATCH 19/80] reformating --- pandas/core/generic.py | 190 ++++++++++++++++++++++++----------------- 1 file changed, 111 insertions(+), 79 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fac3d47647cf9..9f5cd8f76d147 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3115,27 +3115,31 @@ class (index) object 'bird' 'bird' 'mammal' 'mammal' def to_latex( self, buf=None, - columns=None, - col_space=None, + *, + encoding=None, header=True, index=True, - na_rep="NaN", - formatters=None, - float_format=None, - sparsify=None, + columns=None, + formatter=None, + na_rep=None, + precision=None, + decimal=".", + thousands=None, + escape=False, index_names=True, bold_rows=False, column_format=None, longtable=None, - escape=None, - encoding=None, - decimal=".", multicolumn=None, multicolumn_format=None, multirow=None, caption=None, label=None, position=None, + col_space=None, + formatters=None, + float_format=None, + sparsify=None, ): r""" Render object to a LaTeX tabular, longtable, or nested table/tabular. @@ -3150,35 +3154,77 @@ def to_latex( .. versionchanged:: 1.2.0 Added position argument, changed meaning of caption argument. + .. versionchanged:: 1.4.0 + Now uses ``Styler.to_latex`` implementation via jinja2 templating. + Parameters ---------- buf : str, Path or StringIO-like, optional, default None Buffer to write to. If None, the output is returned as a string. + encoding : str, optional + Character encoding setting for file output, defaults to "utf-8" if None. + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + + .. versionadded:: 1.4.0 + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns`` value. + .. versionadded:: 1.4.0 + + header : bool + Whether to print column headers. + + .. versionchanged:: 1.4.0 + index : bool + Whether to print index labels. columns : list of label, optional The subset of columns to write. Writes all columns by default. - col_space : int, optional - The minimum width of each column. - header : bool or list of str, default True - Write out the column names. If a list of strings is given, - it is assumed to be aliases for the column names. - index : bool, default True - Write row names (index). - na_rep : str, default 'NaN' - Missing data representation. - formatters : list of functions or dict of {{str: function}}, optional - Formatter functions to apply to columns' elements by position or - name. The result of each function must be a unicode string. - List must be of length equal to the number of columns. - float_format : one-parameter function or str, optional, default None - Formatter for floating point numbers. For example - ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will - both result in 0.1234 being formatted as 0.12. - sparsify : bool, optional - Set to False for a DataFrame with a hierarchical index to print - every multiindex key at each row. By default, the value will be - read from the config module. + + formatter : str, callable, dict, optional + Object to define how values are displayed. See notes for ``Styler.format`` + + .. versionadded:: 1.4.0 + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + + .. versionchanged:: 1.4.0 + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + + .. versionadded:: 1.4.0 + decimal : str, default "." + Character used as decimal separator for floats, complex and integers + + .. versionadded:: 1.4.0 + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers + + .. versionadded:: 1.4.0 + escape : bool, + Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{{``, ``}}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + + .. versionchanged:: 1.4.0 + formatters : list, tuple or dict of one-param. functions, optional + + .. deprecated:: 1.4.0 + Use ``formatter`` instead, which is passed to ``Styler.format``. + float_format : one-parameter function, optional, default None + + .. deprecated:: 1.4.0 + Deprecated in favour of using arguments native to ``Styler.format`` index_names : bool, default True - Prints the names of the indexes. + + .. deprecated:: 1.4.0 + Remove the names of indexes before rendering. bold_rows : bool, default False Make the row labels bold in the output. column_format : str, optional @@ -3194,11 +3240,7 @@ def to_latex( By default, the value will be read from the pandas config module. When set to False prevents from escaping latex special characters in column names. - encoding : str, optional - A string representing the encoding to use in the output file, - defaults to 'utf-8'. - decimal : str, default '.' - Character recognized as decimal separator, e.g. ',' in Europe. + multicolumn : bool, default True Use \multicolumn to enhance MultiIndex columns. The default will be read from the config module. @@ -3220,7 +3262,6 @@ def to_latex( .. versionchanged:: 1.2.0 Optionally allow caption to be a tuple ``(full_caption, short_caption)``. - label : str, optional The LaTeX label to be placed inside ``\label{{}}`` in the output. This is used with ``\ref{{}}`` in the main ``.tex`` file. @@ -3231,6 +3272,15 @@ def to_latex( ``\begin{{}}`` in the output. .. versionadded:: 1.2.0 + sparsify : bool + + .. deprecated:: 1.4.0 + Use ``sparse_columns`` and ``sparse_rows``instead. + col_space : int, optional + + .. deprecated:: 1.4.0 + Adding LaTeX styling commands renders spacing not applicable. + {returns} See Also -------- @@ -3253,53 +3303,35 @@ def to_latex( \bottomrule \end{{tabular}} """ - warnings.warn( - "this method is deprecated in favour of `Styler.to_latex()`", - FutureWarning, - stacklevel=2, - ) - - # Get defaults from the pandas config - if self.ndim == 1: - self = self.to_frame() - if longtable is None: - longtable = config.get_option("display.latex.longtable") - if escape is None: - escape = config.get_option("display.latex.escape") - if multicolumn is None: - multicolumn = config.get_option("display.latex.multicolumn") - if multicolumn_format is None: - multicolumn_format = config.get_option("display.latex.multicolumn_format") - if multirow is None: - multirow = config.get_option("display.latex.multirow") - - self = cast("DataFrame", self) - formatter = DataFrameFormatter( - self, - columns=columns, - col_space=col_space, + # warnings.warn( + # "this method is deprecated in favour of `Styler.to_latex()`", + # FutureWarning, + # stacklevel=2, + # ) + from pandas.io.formats.style import Styler + + styler = Styler( + self if self.ndim > 1 else self.to_frame(), + uuid="", + formatter=formatter, na_rep=na_rep, - header=header, - index=index, - formatters=formatters, - float_format=float_format, - bold_rows=bold_rows, - sparsify=sparsify, - index_names=index_names, - escape=escape, + precision=precision, decimal=decimal, + thousands=thousands, + escape="latex" if escape else None, ) - return DataFrameRenderer(formatter).to_latex( + + if not header: + styler.hide_columns() + if not index: + styler.hide_index() + if columns: + hidden = [col for col in styler.columns if col not in columns] + styler.hide_columns(hidden) + + return styler.to_latex( buf=buf, - column_format=column_format, - longtable=longtable, encoding=encoding, - multicolumn=multicolumn, - multicolumn_format=multicolumn_format, - multirow=multirow, - caption=caption, - label=label, - position=position, ) @final From c686adb0ed7ec1cd642ec5cca2be6e522616ba68 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 1 Sep 2021 14:34:26 +0200 Subject: [PATCH 20/80] add and document arg changes --- pandas/core/generic.py | 165 ++++++++++++++++++++++++++++------------- 1 file changed, 115 insertions(+), 50 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f5cd8f76d147..25c2317d3e588 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3117,29 +3117,38 @@ def to_latex( buf=None, *, encoding=None, + sparse_index=None, + sparse_columns=None, header=True, index=True, columns=None, + column_format=None, + position=None, + position_float=None, + hrules=False, + label=None, + caption=None, + multirow_align="c", + multicol_align="r", + siunitx=False, + environment=None, formatter=None, na_rep=None, precision=None, decimal=".", thousands=None, escape=False, - index_names=True, - bold_rows=False, - column_format=None, + bold_headers=False, longtable=None, multicolumn=None, multicolumn_format=None, multirow=None, - caption=None, - label=None, - position=None, + index_names=True, col_space=None, formatters=None, float_format=None, sparsify=None, + bold_rows=False, ): r""" Render object to a LaTeX tabular, longtable, or nested table/tabular. @@ -3183,7 +3192,71 @@ def to_latex( Whether to print index labels. columns : list of label, optional The subset of columns to write. Writes all columns by default. + column_format : str, optional + The LaTeX column specification placed in location: + + \\begin{{tabular}}{{}} + + Defaults to 'l' for index and + non-numeric data columns, and, for numeric data columns, + to 'r' by default, or 'S' if ``siunitx`` is ``True``. + + .. versionchanged:: 1.4.0 + position : str, optional + The LaTeX positional argument (e.g. 'h!') for tables, placed in location: + + \\begin{{table}}[] + + .. versionchanged:: 1.2.0 + position_float : {{"centering", "raggedleft", "raggedright"}}, optional + The LaTeX float command placed in location: + + \\begin{{table}}[] + + \\ + + Cannot be used if ``environment`` is "longtable". + + .. versionadded:: 1.4.0 + hrules : bool, default False + Set to `True` to add \\toprule, \\midrule and \\bottomrule from the + {{booktabs}} LaTeX package. + + .. versionadded:: 1.4.0 + label : str, optional + The LaTeX label included as: \\label{{