From 4f33d77139c9c15fe38dd5974a8629d55582be6d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Dec 2019 17:38:57 -0800 Subject: [PATCH 1/2] REF: collect attribute-setting at the end of create_axes --- pandas/io/pytables.py | 46 ++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a48d9abc3c13b..1c2d38bb027eb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3681,7 +3681,7 @@ def read_axes( return True - def get_object(self, obj): + def get_object(self, obj, transposed: bool): """ return the data for this obj """ return obj @@ -3785,15 +3785,13 @@ def create_axes( ) # create according to the new data - self.non_index_axes = [] - self.data_columns = [] + nia: List = [] + dcs = [] # nan_representation if nan_rep is None: nan_rep = "nan" - self.nan_rep = nan_rep - # create axes to index and non_index index_axes_map = dict() for i, a in enumerate(obj.axes): @@ -3810,7 +3808,7 @@ def create_axes( # necessary append_axis = list(a) if existing_table is not None: - indexer = len(self.non_index_axes) + indexer = len(nia) exist_axis = existing_table.non_index_axes[indexer][1] if not array_equivalent( np.array(append_axis), np.array(exist_axis) @@ -3827,34 +3825,37 @@ def create_axes( info["names"] = list(a.names) info["type"] = type(a).__name__ - self.non_index_axes.append((i, append_axis)) + nia.append((i, append_axis)) + + self.non_index_axes = nia # set axis positions (based on the axes) new_index_axes = [index_axes_map[a] for a in axes] for j, iax in enumerate(new_index_axes): iax.set_pos(j) iax.update_info(self.info) - self.index_axes = new_index_axes - j = len(self.index_axes) + j = len(new_index_axes) # check for column conflicts - for a in self.axes: + for a in new_index_axes: a.maybe_set_size(min_itemsize=min_itemsize) # reindex by our non_index_axes & compute data_columns - for a in self.non_index_axes: + for a in nia: obj = _reindex_axis(obj, a[0], a[1]) def get_blk_items(mgr, blocks): return [mgr.items.take(blk.mgr_locs) for blk in blocks] + transposed = new_index_axes[0].axis == 1 + # figure out data_columns and get out blocks - block_obj = self.get_object(obj)._consolidate() + block_obj = self.get_object(obj, transposed)._consolidate() blocks = block_obj._data.blocks blk_items = get_blk_items(block_obj._data, blocks) - if len(self.non_index_axes): - axis, axis_labels = self.non_index_axes[0] + if len(nia): + axis, axis_labels = nia[0] data_columns = self.validate_data_columns(data_columns, min_itemsize) if len(data_columns): mgr = block_obj.reindex( @@ -3892,7 +3893,7 @@ def get_blk_items(mgr, blocks): blk_items = new_blk_items # add my values - self.values_axes = [] + vaxes = [] for i, (b, b_items) in enumerate(zip(blocks, blk_items)): # shape of the data column are the indexable axes @@ -3906,7 +3907,7 @@ def get_blk_items(mgr, blocks): if not (name is None or isinstance(name, str)): # TODO: should the message here be more specifically non-str? raise ValueError("cannot have non-object label DataIndexableCol") - self.data_columns.append(name) + dcs.append(name) # make sure that we match up the existing columns # if we have an existing table @@ -3934,10 +3935,15 @@ def get_blk_items(mgr, blocks): ) col.set_pos(j) - self.values_axes.append(col) + vaxes.append(col) j += 1 + self.nan_rep = nan_rep + self.data_columns = dcs + self.values_axes = vaxes + self.index_axes = new_index_axes + # validate our min_itemsize self.validate_min_itemsize(min_itemsize) @@ -4390,9 +4396,9 @@ class AppendableFrameTable(AppendableTable): def is_transposed(self) -> bool: return self.index_axes[0].axis == 1 - def get_object(self, obj): + def get_object(self, obj, transposed: bool): """ these are written transposed """ - if self.is_transposed: + if transposed: obj = obj.T return obj @@ -4474,7 +4480,7 @@ class AppendableSeriesTable(AppendableFrameTable): def is_transposed(self) -> bool: return False - def get_object(self, obj): + def get_object(self, obj, transposed: bool): return obj def write(self, obj, data_columns=None, **kwargs): From c023acfbfba8417058261bd644e9c42d866649fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Dec 2019 09:08:31 -0800 Subject: [PATCH 2/2] more descriptive names --- pandas/io/pytables.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 397e5a67a3983..e1b3e5c83f7db 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3838,8 +3838,8 @@ def create_axes( ) # create according to the new data - nia: List = [] - dcs = [] + new_non_index_axes: List = [] + new_data_columns: List[Optional[str]] = [] # nan_representation if nan_rep is None: @@ -3861,7 +3861,7 @@ def create_axes( # necessary append_axis = list(a) if existing_table is not None: - indexer = len(nia) + indexer = len(new_non_index_axes) exist_axis = existing_table.non_index_axes[indexer][1] if not array_equivalent( np.array(append_axis), np.array(exist_axis) @@ -3878,9 +3878,9 @@ def create_axes( info["names"] = list(a.names) info["type"] = type(a).__name__ - nia.append((i, append_axis)) + new_non_index_axes.append((i, append_axis)) - self.non_index_axes = nia + self.non_index_axes = new_non_index_axes # set axis positions (based on the axes) new_index_axes = [index_axes_map[a] for a in axes] @@ -3895,7 +3895,7 @@ def create_axes( a.maybe_set_size(min_itemsize=min_itemsize) # reindex by our non_index_axes & compute data_columns - for a in nia: + for a in new_non_index_axes: obj = _reindex_axis(obj, a[0], a[1]) def get_blk_items(mgr, blocks): @@ -3907,8 +3907,8 @@ def get_blk_items(mgr, blocks): block_obj = self.get_object(obj, transposed)._consolidate() blocks = block_obj._data.blocks blk_items = get_blk_items(block_obj._data, blocks) - if len(nia): - axis, axis_labels = nia[0] + if len(new_non_index_axes): + axis, axis_labels = new_non_index_axes[0] data_columns = self.validate_data_columns(data_columns, min_itemsize) if len(data_columns): mgr = block_obj.reindex( @@ -3960,7 +3960,7 @@ def get_blk_items(mgr, blocks): if not (name is None or isinstance(name, str)): # TODO: should the message here be more specifically non-str? raise ValueError("cannot have non-object label DataIndexableCol") - dcs.append(name) + new_data_columns.append(name) # make sure that we match up the existing columns # if we have an existing table @@ -3993,7 +3993,7 @@ def get_blk_items(mgr, blocks): j += 1 self.nan_rep = nan_rep - self.data_columns = dcs + self.data_columns = new_data_columns self.values_axes = vaxes self.index_axes = new_index_axes