diff --git a/doc/source/release.rst b/doc/source/release.rst index 159deaabb943f..64888e2c6a00c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -83,6 +83,7 @@ pandas 0.13 - ``HDFStore`` + - handle a passed ``Series`` in table format (:issue:`4330`) - added an ``is_open`` property to indicate if the underlying file handle is_open; a closed store will now report 'CLOSED' when viewing the store (rather than raising an error) (:issue:`4409`) diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 7ccda591da733..5003aa654d9fb 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -39,6 +39,7 @@ API changes - ``HDFStore`` - Significant table writing performance improvements + - handle a passed ``Series`` in table format (:issue:`4330`) - added an ``is_open`` property to indicate if the underlying file handle is_open; a closed store will now report 'CLOSED' when viewing the store (rather than raising an error) (:issue:`4409`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3d5ba43943e98..608bbe4703272 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -126,6 +126,8 @@ class DuplicateWarning(Warning): # table class map _TABLE_MAP = { u('generic_table') : 'GenericTable', + u('appendable_series') : 'AppendableSeriesTable', + u('appendable_multiseries'): 'AppendableMultiSeriesTable', u('appendable_frame') : 'AppendableFrameTable', u('appendable_multiframe') : 'AppendableMultiFrameTable', u('appendable_panel') : 'AppendablePanelTable', @@ -913,7 +915,14 @@ def error(t): # if we are a writer, determin the tt if value is not None: - if pt == u('frame_table'): + if pt == u('series_table'): + index = getattr(value,'index',None) + if index is not None: + if index.nlevels == 1: + tt = u('appendable_series') + elif index.nlevels > 1: + tt = u('appendable_multiseries') + elif pt == u('frame_table'): index = getattr(value,'index',None) if index is not None: if index.nlevels == 1: @@ -1692,6 +1701,10 @@ def copy(self): new_self = copy.copy(self) return new_self + @property + def storage_obj_type(self): + return self.obj_type + @property def shape(self): return self.nrows @@ -2369,6 +2382,11 @@ def validate(self, other): # should never get here raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,sv,ov)) + @property + def is_multi_index(self): + """ the levels attribute is 1 or a list in the case of a multi-index """ + return isinstance(self.levels,list) + @property def nrows_expected(self): """ based on our axes, compute the expected nrows """ @@ -2419,7 +2437,7 @@ def queryables(self): # compute the values_axes queryables return dict([(a.cname, a.kind) for a in self.index_axes] + - [(self.obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] + + [(self.storage_obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] + [(v.cname, v.kind) for v in self.values_axes if v.name in set(self.data_columns)] ) @@ -3277,6 +3295,62 @@ def read(self, where=None, columns=None, **kwargs): return df +class AppendableSeriesTable(AppendableFrameTable): + """ support the new appendable table formats """ + pandas_kind = u('series_table') + table_type = u('appendable_series') + ndim = 2 + obj_type = Series + storage_obj_type = DataFrame + + @property + def is_transposed(self): + return False + + def get_object(self, obj): + return obj + + def write(self, obj, data_columns=None, **kwargs): + """ we are going to write this as a frame table """ + if not isinstance(obj, DataFrame): + name = obj.name or 'values' + obj = DataFrame({ name : obj }, index=obj.index) + obj.columns = [name] + return super(AppendableSeriesTable, self).write(obj=obj, data_columns=obj.columns, **kwargs) + + def read(self, columns=None, **kwargs): + + is_multi_index = self.is_multi_index + if columns is not None and is_multi_index: + for n in self.levels: + if n not in columns: + columns.insert(0, n) + s = super(AppendableSeriesTable, self).read(columns=columns, **kwargs) + if is_multi_index: + s.set_index(self.levels, inplace=True) + + s = s.iloc[:,0] + + # remove the default name + if s.name == 'values': + s.name = None + return s + +class AppendableMultiSeriesTable(AppendableSeriesTable): + """ support the new appendable table formats """ + pandas_kind = u('series_table') + table_type = u('appendable_multiseries') + + def write(self, obj, **kwargs): + """ we are going to write this as a frame table """ + name = obj.name or 'values' + cols = list(obj.index.names) + cols.append(name) + self.levels = list(obj.index.names) + obj = obj.reset_index() + obj.columns = cols + return super(AppendableMultiSeriesTable, self).write(obj=obj, **kwargs) + class GenericTable(AppendableFrameTable): """ a table that read/writes the generic pytables table format """ pandas_kind = u('frame_table') diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 376bb13b24f96..cfe162c887799 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -580,6 +580,52 @@ def test_append(self): store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported tm.assert_frame_equal(store['uints'], uint_data) + def test_append_series(self): + + with ensure_clean(self.path) as store: + + # basic + ss = tm.makeStringSeries() + ts = tm.makeTimeSeries() + ns = Series(np.arange(100)) + + store.append('ss', ss) + result = store['ss'] + tm.assert_series_equal(result, ss) + self.assert_(result.name is None) + + store.append('ts', ts) + result = store['ts'] + tm.assert_series_equal(result, ts) + self.assert_(result.name is None) + + ns.name = 'foo' + store.append('ns', ns) + result = store['ns'] + tm.assert_series_equal(result, ns) + self.assert_(result.name == ns.name) + + # select on the values + expected = ns[ns>60] + result = store.select('ns',Term('foo>60')) + tm.assert_series_equal(result,expected) + + # select on the index and values + expected = ns[(ns>70) & (ns.index<90)] + result = store.select('ns',[Term('foo>70'), Term('index<90')]) + tm.assert_series_equal(result,expected) + + # multi-index + mi = DataFrame(np.random.randn(5,1),columns=['A']) + mi['B'] = np.arange(len(mi)) + mi['C'] = 'foo' + mi.loc[3:5,'C'] = 'bar' + mi.set_index(['C','B'],inplace=True) + s = mi.stack() + s.index = s.index.droplevel(2) + store.append('mi', s) + tm.assert_series_equal(store['mi'], s) + def test_encoding(self): if sys.byteorder != 'little': @@ -1270,10 +1316,6 @@ def test_append_misc(self): p4d = tm.makePanel4D() self.assertRaises(TypeError, store.put,'p4d',p4d) - # unsupported data type for table - s = tm.makeStringSeries() - self.assertRaises(TypeError, store.append,'s',s) - # unsuported data types self.assertRaises(TypeError, store.put,'abc',None) self.assertRaises(TypeError, store.put,'abc','123')