Skip to content

Selecting columns from a HDFStore with a MultiIndex fails #3748

@hmgaudecker

Description

@hmgaudecker

Using the example from the docs here: http://pandas.pydata.org/pandas-docs/dev/io.html#storing-multi-index-dataframes but specifying the 'columns' in [1219] keyword when selecting:

store.select('df_mi', columns=['A', 'B'])

leads to the failure below. The original example works, as does selecting columns with a standard index. I'm on a fairly recent master (~1 week) and Python 3.3. Hope this is not expected and I did not miss an issue, I did search for a bit.

KeyError                                  Traceback (most recent call last)
<ipython-input-67-678a9c77d998> in <module>()
----> 1 store.select('df_mi', columns=['A', 'B'])

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/io/pytables.py in select(self, key, where, start, stop, columns, iterator, chunksize, **kwargs)
    413             return TableIterator(func, nrows=s.nrows, start=start, stop=stop, chunksize=chunksize)
    414 
--> 415         return TableIterator(func, nrows=s.nrows, start=start, stop=stop).get_values()
    416 
    417     def select_as_coordinates(self, key, where=None, start=None, stop=None, **kwargs):

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/io/pytables.py in get_values(self)
    931 
    932     def get_values(self):
--> 933         return self.func(self.start, self.stop)
    934 
    935 

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/io/pytables.py in func(_start, _stop)
    408         # what we are actually going to do for a chunk
    409         def func(_start, _stop):
--> 410             return s.read(where=where, start=_start, stop=_stop, columns=columns, **kwargs)
    411 
    412         if iterator or chunksize is not None:

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/io/pytables.py in read(self, *args, **kwargs)
   3142     def read(self, *args, **kwargs):
   3143         df = super(AppendableMultiFrameTable, self).read(*args, **kwargs)
-> 3144         df.set_index(self.levels, inplace=True)
   3145         return df
   3146 

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
   2795                 names.append(None)
   2796             else:
-> 2797                 level = frame[col].values
   2798                 names.append(col)
   2799                 if drop:

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/core/frame.py in __getitem__(self, key)
   1992         else:
   1993             # get column
-> 1994             return self._get_item_cache(key)
   1995 
   1996     def _getitem_slice(self, key):

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/core/generic.py in _get_item_cache(self, item)
    572             return cache[item]
    573         except Exception:
--> 574             values = self._data.get(item)
    575             res = self._box_item_values(item, values)
    576             cache[item] = res

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/core/internals.py in get(self, item)
   1646 
   1647     def get(self, item):
-> 1648         _, block = self._find_block(item)
   1649         return block.get(item)
   1650 

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/core/internals.py in _find_block(self, item)
   1773 
   1774     def _find_block(self, item):
-> 1775         self._check_have(item)
   1776         for i, block in enumerate(self.blocks):
   1777             if item in block:

/home/xxx/anaconda/envs/py33/lib/python3.3/site-packages/pandas-0.12.0.dev_2bd1cf8-py3.3-linux-x86_64.egg/pandas/core/internals.py in _check_have(self, item)
   1780     def _check_have(self, item):
   1781         if item not in self.items:
-> 1782             raise KeyError('no item named %s' % com.pprint_thing(item))
   1783 
   1784     def reindex_axis(self, new_axis, method=None, axis=0, copy=True):

KeyError: 'no item named foo'

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIO DataIO issues that don't fit into a more specific label

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions