Closed
Description
Whereas this worked in v0.14.1:
In [1]: import pandas as pd
In [2]: from StringIO import StringIO
In [3]: pd.__version__
Out[3]: '0.14.1'
In [4]: data = '#header\n\na,b,c\n1,2,3\n4,5,6'
In [5]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
In [6]: df
Out[6]:
b c
a
1 2 3
4 5 6
In v0.15.2 and the current dev it doesn't. There is a workaround, which requires you to explicitly use header
.
In [1]: import pandas as pd
In [2]: pd.__version__
Out[2]: '0.15.2-103-gfda5012'
In [3]: from StringIO import StringIO
In [4]: data = '#header\n#header\na,b,c\n1,2,3\n4,5,6'
In [5]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
In [6]: df
Out[6]:
b c
a
1 2 3
4 5 6
In [7]: data = '#header\n\na,b,c\n1,2,3\n4,5,6'
In [8]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-8-26f3ae16e644> in <module>()
----> 1 df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
463 skip_blank_lines=skip_blank_lines)
464
--> 465 return _read(filepath_or_buffer, kwds)
466
467 parser_f.__name__ = name
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
249 return parser
250
--> 251 return parser.read()
252
253 _parser_defaults = {
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
708 raise ValueError('skip_footer not supported for iteration')
709
--> 710 ret = self._engine.read(nrows)
711
712 if self.options.get('as_recarray'):
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
1177 values = data.pop(i)
1178 else:
-> 1179 values = data.pop(self.index_col[i])
1180
1181 values = self._maybe_parse_dates(values, i,
KeyError: 'a'
In [9]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a', header=1) # Need to specify header kwarg to make this work
In [10]: df
Out[10]:
b c
a
1 2 3
4 5 6
In [11]: data = '\n#header\na,b,c\n1,2,3\n4,5,6'
In [12]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-12-26f3ae16e644> in <module>()
----> 1 df = pd.read_csv(StringIO(data), skiprows=2, index_col='a')
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
463 skip_blank_lines=skip_blank_lines)
464
--> 465 return _read(filepath_or_buffer, kwds)
466
467 parser_f.__name__ = name
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
249 return parser
250
--> 251 return parser.read()
252
253 _parser_defaults = {
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
708 raise ValueError('skip_footer not supported for iteration')
709
--> 710 ret = self._engine.read(nrows)
711
712 if self.options.get('as_recarray'):
/home/tmaloney/vedev/pandas-test-03/lib/python2.7/site-packages/pandas-0.15.2_103_gfda5012-py2.7-linux-x86_64.egg/pandas/io/parsers.pyc in read(self, nrows)
1177 values = data.pop(i)
1178 else:
-> 1179 values = data.pop(self.index_col[i])
1180
1181 values = self._maybe_parse_dates(values, i,
KeyError: 'a'
In [13]: df = pd.read_csv(StringIO(data), skiprows=2, index_col='a', header=1) # Need to specify header kwarg to make this work
In [14]: df
Out[14]:
b c
a
1 2 3
4 5 6
In [15]: df = pd.read_csv(StringIO(data), skiprows=2)
In [16]: df
Out[16]:
Empty DataFrame
Columns: []
Index: [(a, b, c), (1, 2, 3), (4, 5, 6)]
In [17]: df = pd.read_csv(StringIO(data), skiprows=2, header=0)
In [18]: df
Out[18]:
Empty DataFrame
Columns: []
Index: [(a, b, c), (1, 2, 3), (4, 5, 6)]
In [19]: df = pd.read_csv(StringIO(data), skiprows=2, header=1)
In [20]: df
Out[20]:
a b c
0 1 2 3
1 4 5 6