diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 034a56b2ac0cb..9891a39aa9713 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -66,6 +66,35 @@ Current Behavior: result +.. _whatsnew_0240.enhancements.output_formatting: + +Output Formatting Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`) + +.. ipython:: python + + df = pd.DataFrame({ + 'int_col': [1, 2, 3, 4, 5], + 'text_col': ['alpha', 'beta', 'gamma', 'delta', 'epsilon'], + 'float_col': [0.0, 0.25, 0.5, 0.75, 1.0]}) + df.info() + +Previous Behavior: + +.. code-block:: python + + In [1]: df.info() + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + int_col 5 non-null int64 + text_col 5 non-null object + float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 200.0+ bytes + .. _whatsnew_0240.enhancements.other: Other Enhancements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66f51cd0dae45..4d1663a2d3df8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2121,9 +2121,11 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, RangeIndex: 5 entries, 0 to 4 Data columns (total 3 columns): - int_col 5 non-null int64 - text_col 5 non-null object - float_col 5 non-null float64 + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 200.0+ bytes @@ -2161,9 +2163,11 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): - column_1 1000000 non-null object - column_2 1000000 non-null object - column_3 1000000 non-null object + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 22.9+ MB @@ -2171,9 +2175,11 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): - column_1 1000000 non-null object - column_2 1000000 non-null object - column_3 1000000 non-null object + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 188.8 MB """ @@ -2192,48 +2198,62 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, return cols = self.columns + cols_count = len(cols) # hack if max_cols is None: - max_cols = get_option('display.max_info_columns', - len(self.columns) + 1) + max_cols = get_option('display.max_info_columns', cols_count + 1) max_rows = get_option('display.max_info_rows', len(self) + 1) if null_counts is None: - show_counts = ((len(self.columns) <= max_cols) and + show_counts = ((cols_count <= max_cols) and (len(self) < max_rows)) else: show_counts = null_counts - exceeds_info_cols = len(self.columns) > max_cols + exceeds_info_cols = cols_count > max_cols def _verbose_repr(): - lines.append('Data columns (total %d columns):' % - len(self.columns)) - space = max(len(pprint_thing(k)) for k in self.columns) + 4 + lines.append('Data columns (total ' + '{count} columns):'.format(count=cols_count)) + space = max(len(pprint_thing(k)) for k in cols) + len_column = len(pprint_thing('Column')) + space = max(space, len_column) + 4 + space_num = len(pprint_thing(cols_count)) + len_id = len(pprint_thing(' #.')) + space_num = max(space_num, len_id) + 2 counts = None - tmpl = "{count}{dtype}" + header = _put_str(' #.', space_num) + _put_str('Column', space) if show_counts: counts = self.count() if len(cols) != len(counts): # pragma: no cover raise AssertionError( 'Columns must equal counts ' - '({cols:d} != {counts:d})'.format( - cols=len(cols), counts=len(counts))) - tmpl = "{count} non-null {dtype}" - + '({cols_count} != {count})'.format( + cols_count=cols_count, count=len(counts))) + col_header = 'Non-Null Count & Dtype' + tmpl = '{count} non-null {dtype}' + else: + col_header = 'Dtype' + tmpl = '{count}{dtype}' + header += col_header + + lines.append(header) + lines.append(_put_str('-' * len_id, space_num) + + _put_str('-' * len_column, space) + + '-' * len(pprint_thing(col_header))) dtypes = self.dtypes - for i, col in enumerate(self.columns): + for i, col in enumerate(cols): dtype = dtypes.iloc[i] col = pprint_thing(col) - - count = "" + line_no = _put_str(' {num}'.format(num=i), space_num) + count = '' if show_counts: count = counts.iloc[i] - lines.append(_put_str(col, space) + tmpl.format(count=count, - dtype=dtype)) + lines.append(line_no + _put_str(col, space) + + tmpl.format(count=count, dtype=dtype)) def _non_verbose_repr(): lines.append(self.columns._summary(name='Columns')) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 668613c494a47..ac6bb8d78c072 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -217,13 +217,33 @@ def test_info_memory(self): RangeIndex: 2 entries, 0 to 1 Data columns (total 1 columns): - a 2 non-null int64 + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 a 2 non-null int64 dtypes: int64(1) memory usage: {} bytes """.format(bytes)) assert result == expected + def test_info_without_null_counts(self): + df = pd.DataFrame({'a': [1, 2]}) + buf = StringIO() + df.info(buf=buf, null_counts=False) + buf.seek(0) + lines = buf.readlines() + result = ''.join(lines[:-1]) + expected = textwrap.dedent('''\ + + RangeIndex: 2 entries, 0 to 1 + Data columns (total 1 columns): + #. Column Dtype + --- ------ ----- + 0 a int64 + dtypes: int64(1) + ''') + assert result == expected + def test_info_wide(self): from pandas import set_option, reset_option io = StringIO() @@ -259,8 +279,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self): frame.info(buf=io) io.seek(0) lines = io.readlines() - assert 'a 1 non-null int64\n' == lines[3] - assert 'a 1 non-null float64\n' == lines[4] + assert ' 0 a 1 non-null int64\n' == lines[5] + assert ' 1 a 1 non-null float64\n' == lines[6] def test_info_shows_column_dtypes(self): dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', @@ -274,12 +294,13 @@ def test_info_shows_column_dtypes(self): df.info(buf=buf) res = buf.getvalue() for i, dtype in enumerate(dtypes): - name = '%d %d non-null %s' % (i, n, dtype) + name = '%s %d non-null %s' % (i, n, dtype) + assert name in res def test_info_max_cols(self): df = DataFrame(np.random.randn(10, 5)) - for len_, verbose in [(5, None), (5, False), (10, True)]: + for len_, verbose in [(5, None), (5, False), (12, True)]: # For verbose always ^ setting ^ summarize ^ full output with option_context('max_info_columns', 4): buf = StringIO() @@ -287,8 +308,7 @@ def test_info_max_cols(self): res = buf.getvalue() assert len(res.strip().split('\n')) == len_ - for len_, verbose in [(10, None), (5, False), (10, True)]: - + for len_, verbose in [(12, None), (5, False), (12, True)]: # max_cols no exceeded with option_context('max_info_columns', 5): buf = StringIO() @@ -296,7 +316,7 @@ def test_info_max_cols(self): res = buf.getvalue() assert len(res.strip().split('\n')) == len_ - for len_, max_cols in [(10, 5), (5, 4)]: + for len_, max_cols in [(12, 5), (5, 4)]: # setting truncates with option_context('max_info_columns', 4): buf = StringIO()