Skip to content

BUG: API Change with skiprows from v0.14.1 to v0.15.2 #9306

Closed
@tlmaloney

Description

@tlmaloney

Whereas this worked in v0.14.1:

In [1]: import pandas as pd

In [2]: from StringIO import StringIO

In [3]: pd.__version__
Out[3]: '0.14.1'

In [4]: data = '#header\n\na,b,c\n1,2,3\n4,5,6'

In [5]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')

In [6]: df
Out[6]: 
   b  c
a      
1  2  3
4  5  6

In v0.15.2 and the current dev it doesn't. There is a workaround, which requires you to explicitly use header.

In [1]: import pandas as pd

In [2]: pd.__version__
Out[2]: '0.15.2-103-gfda5012'

In [3]: from StringIO import StringIO

In [4]: data = '#header\n#header\na,b,c\n1,2,3\n4,5,6'

In [5]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')

In [6]: df
Out[6]: 
   b  c
a      
1  2  3
4  5  6

In [7]: data = '#header\n\na,b,c\n1,2,3\n4,5,6'

In [8]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-8-26f3ae16e644> in <module>()
----> 1 df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    463                     skip_blank_lines=skip_blank_lines)
    464 
--> 465         return _read(filepath_or_buffer, kwds)
    466 
    467     parser_f.__name__ = name

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    249         return parser
    250 
--> 251     return parser.read()
    252 
    253 _parser_defaults = {

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
    708                 raise ValueError('skip_footer not supported for iteration')
    709 
--> 710         ret = self._engine.read(nrows)
    711 
    712         if self.options.get('as_recarray'):

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
   1177                     values = data.pop(i)
   1178                 else:
-> 1179                     values = data.pop(self.index_col[i])
   1180 
   1181                 values = self._maybe_parse_dates(values, i,

KeyError: 'a'

In [9]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a', header=1) # Need to specify header kwarg to make this work

In [10]: df
Out[10]: 
   b  c
a      
1  2  3
4  5  6

In [11]: data = '\n#header\na,b,c\n1,2,3\n4,5,6'

In [12]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-26f3ae16e644> in <module>()
----> 1 df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    463                     skip_blank_lines=skip_blank_lines)
    464 
--> 465         return _read(filepath_or_buffer, kwds)
    466 
    467     parser_f.__name__ = name

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    249         return parser
    250 
--> 251     return parser.read()
    252 
    253 _parser_defaults = {

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
    708                 raise ValueError('skip_footer not supported for iteration')
    709 
--> 710         ret = self._engine.read(nrows)
    711 
    712         if self.options.get('as_recarray'):

/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
   1177                     values = data.pop(i)
   1178                 else:
-> 1179                     values = data.pop(self.index_col[i])
   1180 
   1181                 values = self._maybe_parse_dates(values, i,

KeyError: 'a'

In [13]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a', header=1) # Need to specify header kwarg to make this work

In [14]: df
Out[14]: 
   b  c
a      
1  2  3
4  5  6

In [15]: df = pd.read_csv(StringIO(data), skiprows=2)

In [16]: df
Out[16]: 
Empty DataFrame
Columns: []
Index: [(a, b, c), (1, 2, 3), (4, 5, 6)]

In [17]: df = pd.read_csv(StringIO(data), skiprows=2, header=0)

In [18]: df
Out[18]: 
Empty DataFrame
Columns: []
Index: [(a, b, c), (1, 2, 3), (4, 5, 6)]

In [19]: df = pd.read_csv(StringIO(data), skiprows=2, header=1)

In [20]: df
Out[20]: 
   a  b  c
0  1  2  3
1  4  5  6

Metadata

Metadata

Assignees

No one assigned

    Labels

    IO CSVread_csv, to_csv

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions