Skip to content

ENH: In HDFStore, handle a passed Series in table format (GH4330) #4664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 24, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pandas 0.13

- ``HDFStore``

- handle a passed ``Series`` in table format (:issue:`4330`)
- added an ``is_open`` property to indicate if the underlying file handle is_open;
a closed store will now report 'CLOSED' when viewing the store (rather than raising an error)
(:issue:`4409`)
Expand Down
1 change: 1 addition & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ API changes
- ``HDFStore``

- Significant table writing performance improvements
- handle a passed ``Series`` in table format (:issue:`4330`)
- added an ``is_open`` property to indicate if the underlying file handle is_open;
a closed store will now report 'CLOSED' when viewing the store (rather than raising an error)
(:issue:`4409`)
Expand Down
78 changes: 76 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class DuplicateWarning(Warning):
# table class map
_TABLE_MAP = {
u('generic_table') : 'GenericTable',
u('appendable_series') : 'AppendableSeriesTable',
u('appendable_multiseries'): 'AppendableMultiSeriesTable',
u('appendable_frame') : 'AppendableFrameTable',
u('appendable_multiframe') : 'AppendableMultiFrameTable',
u('appendable_panel') : 'AppendablePanelTable',
Expand Down Expand Up @@ -913,7 +915,14 @@ def error(t):
# if we are a writer, determin the tt
if value is not None:

if pt == u('frame_table'):
if pt == u('series_table'):
index = getattr(value,'index',None)
if index is not None:
if index.nlevels == 1:
tt = u('appendable_series')
elif index.nlevels > 1:
tt = u('appendable_multiseries')
elif pt == u('frame_table'):
index = getattr(value,'index',None)
if index is not None:
if index.nlevels == 1:
Expand Down Expand Up @@ -1692,6 +1701,10 @@ def copy(self):
new_self = copy.copy(self)
return new_self

@property
def storage_obj_type(self):
return self.obj_type

@property
def shape(self):
return self.nrows
Expand Down Expand Up @@ -2369,6 +2382,11 @@ def validate(self, other):
# should never get here
raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,sv,ov))

@property
def is_multi_index(self):
""" the levels attribute is 1 or a list in the case of a multi-index """
return isinstance(self.levels,list)

@property
def nrows_expected(self):
""" based on our axes, compute the expected nrows """
Expand Down Expand Up @@ -2419,7 +2437,7 @@ def queryables(self):

# compute the values_axes queryables
return dict([(a.cname, a.kind) for a in self.index_axes] +
[(self.obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] +
[(self.storage_obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] +
[(v.cname, v.kind) for v in self.values_axes if v.name in set(self.data_columns)]
)

Expand Down Expand Up @@ -3277,6 +3295,62 @@ def read(self, where=None, columns=None, **kwargs):
return df


class AppendableSeriesTable(AppendableFrameTable):
""" support the new appendable table formats """
pandas_kind = u('series_table')
table_type = u('appendable_series')
ndim = 2
obj_type = Series
storage_obj_type = DataFrame

@property
def is_transposed(self):
return False

def get_object(self, obj):
return obj

def write(self, obj, data_columns=None, **kwargs):
""" we are going to write this as a frame table """
if not isinstance(obj, DataFrame):
name = obj.name or 'values'
obj = DataFrame({ name : obj }, index=obj.index)
obj.columns = [name]
return super(AppendableSeriesTable, self).write(obj=obj, data_columns=obj.columns, **kwargs)

def read(self, columns=None, **kwargs):

is_multi_index = self.is_multi_index
if columns is not None and is_multi_index:
for n in self.levels:
if n not in columns:
columns.insert(0, n)
s = super(AppendableSeriesTable, self).read(columns=columns, **kwargs)
if is_multi_index:
s.set_index(self.levels, inplace=True)

s = s.iloc[:,0]

# remove the default name
if s.name == 'values':
s.name = None
return s

class AppendableMultiSeriesTable(AppendableSeriesTable):
""" support the new appendable table formats """
pandas_kind = u('series_table')
table_type = u('appendable_multiseries')

def write(self, obj, **kwargs):
""" we are going to write this as a frame table """
name = obj.name or 'values'
cols = list(obj.index.names)
cols.append(name)
self.levels = list(obj.index.names)
obj = obj.reset_index()
obj.columns = cols
return super(AppendableMultiSeriesTable, self).write(obj=obj, **kwargs)

class GenericTable(AppendableFrameTable):
""" a table that read/writes the generic pytables table format """
pandas_kind = u('frame_table')
Expand Down
50 changes: 46 additions & 4 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,52 @@ def test_append(self):
store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported
tm.assert_frame_equal(store['uints'], uint_data)

def test_append_series(self):

with ensure_clean(self.path) as store:

# basic
ss = tm.makeStringSeries()
ts = tm.makeTimeSeries()
ns = Series(np.arange(100))

store.append('ss', ss)
result = store['ss']
tm.assert_series_equal(result, ss)
self.assert_(result.name is None)

store.append('ts', ts)
result = store['ts']
tm.assert_series_equal(result, ts)
self.assert_(result.name is None)

ns.name = 'foo'
store.append('ns', ns)
result = store['ns']
tm.assert_series_equal(result, ns)
self.assert_(result.name == ns.name)

# select on the values
expected = ns[ns>60]
result = store.select('ns',Term('foo>60'))
tm.assert_series_equal(result,expected)

# select on the index and values
expected = ns[(ns>70) & (ns.index<90)]
result = store.select('ns',[Term('foo>70'), Term('index<90')])
tm.assert_series_equal(result,expected)

# multi-index
mi = DataFrame(np.random.randn(5,1),columns=['A'])
mi['B'] = np.arange(len(mi))
mi['C'] = 'foo'
mi.loc[3:5,'C'] = 'bar'
mi.set_index(['C','B'],inplace=True)
s = mi.stack()
s.index = s.index.droplevel(2)
store.append('mi', s)
tm.assert_series_equal(store['mi'], s)

def test_encoding(self):

if sys.byteorder != 'little':
Expand Down Expand Up @@ -1270,10 +1316,6 @@ def test_append_misc(self):
p4d = tm.makePanel4D()
self.assertRaises(TypeError, store.put,'p4d',p4d)

# unsupported data type for table
s = tm.makeStringSeries()
self.assertRaises(TypeError, store.append,'s',s)

# unsuported data types
self.assertRaises(TypeError, store.put,'abc',None)
self.assertRaises(TypeError, store.put,'abc','123')
Expand Down