|
21 | 21 | from pandas.tseries.api import PeriodIndex, DatetimeIndex
|
22 | 22 | from pandas.core.common import adjoin
|
23 | 23 | from pandas.core.algorithms import match, unique
|
24 |
| - |
| 24 | +from pandas.core.strings import str_len |
25 | 25 | from pandas.core.categorical import Factor
|
26 | 26 | from pandas.core.common import _asarray_tuplesafe, _try_sort
|
27 | 27 | from pandas.core.internals import BlockManager, make_block, form_blocks
|
@@ -507,7 +507,7 @@ def _write_to_group(self, key, value, table=False, append=False,
|
507 | 507 | wrapper(value)
|
508 | 508 | group._v_attrs.pandas_type = kind
|
509 | 509 | group._v_attrs.pandas_version = _version
|
510 |
| - group._v_attrs.meta = getattr(value,'meta',None) |
| 510 | + #group._v_attrs.meta = getattr(value,'meta',None) |
511 | 511 |
|
512 | 512 | def _write_series(self, group, series):
|
513 | 513 | self._write_index(group, 'index', series.index)
|
@@ -848,10 +848,10 @@ def _read_group(self, group, where=None, **kwargs):
|
848 | 848 | kind = _LEGACY_MAP.get(kind, kind)
|
849 | 849 | handler = self._get_handler(op='read', kind=kind)
|
850 | 850 | v = handler(group, where, **kwargs)
|
851 |
| - if v is not None: |
852 |
| - meta = getattr(group._v_attrs,'meta',None) |
853 |
| - if meta is not None: |
854 |
| - v.meta = meta |
| 851 | + #if v is not None: |
| 852 | + # meta = getattr(group._v_attrs,'meta',None) |
| 853 | + # if meta is not None: |
| 854 | + # v.meta = meta |
855 | 855 | return v
|
856 | 856 |
|
857 | 857 | def _read_series(self, group, where=None):
|
@@ -1001,16 +1001,22 @@ def validate_and_set(self, table, append, **kwargs):
|
1001 | 1001 | self.validate_attr(append)
|
1002 | 1002 | self.set_attr()
|
1003 | 1003 |
|
1004 |
| - def validate_col(self): |
1005 |
| - """ validate this column & set table data for it """ |
| 1004 | + def validate_col(self, itemsize = None): |
| 1005 | + """ validate this column: return the compared against itemsize """ |
1006 | 1006 |
|
1007 | 1007 | # validate this column for string truncation (or reset to the max size)
|
1008 |
| - if self.kind == 'string': |
| 1008 | + dtype = getattr(self,'dtype',None) |
| 1009 | + if self.kind == 'string' or (dtype is not None and dtype.startswith('string')): |
1009 | 1010 |
|
1010 | 1011 | c = self.col
|
1011 | 1012 | if c is not None:
|
1012 |
| - if c.itemsize < self.itemsize: |
1013 |
| - raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" % (self.cname,self.itemsize,c.itemsize)) |
| 1013 | + if itemsize is None: |
| 1014 | + itemsize = self.itemsize |
| 1015 | + if c.itemsize < itemsize: |
| 1016 | + raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" % (self.cname,itemsize,c.itemsize)) |
| 1017 | + return c.itemsize |
| 1018 | + |
| 1019 | + return None |
1014 | 1020 |
|
1015 | 1021 |
|
1016 | 1022 | def validate_attr(self, append):
|
@@ -1404,26 +1410,35 @@ def create_axes(self, axes, obj, validate = True, min_itemsize = None):
|
1404 | 1410 | # a string column
|
1405 | 1411 | if b.dtype.name == 'object':
|
1406 | 1412 |
|
| 1413 | + # itemsize is the maximum length of a string (along any dimension) |
| 1414 | + itemsize = _itemsize_string_array(values) |
| 1415 | + |
1407 | 1416 | # specified min_itemsize?
|
1408 | 1417 | if isinstance(min_itemsize, dict):
|
1409 |
| - min_itemsize = int(min_itemsize.get('values')) |
| 1418 | + itemsize = max(int(min_itemsize.get('values')),itemsize) |
| 1419 | + |
| 1420 | + # check for column in the values conflicts |
| 1421 | + if existing_table is not None and validate: |
| 1422 | + eci = existing_table.values_axes[i].validate_col(itemsize) |
| 1423 | + if eci > itemsize: |
| 1424 | + itemsize = eci |
1410 | 1425 |
|
1411 |
| - if min_itemsize is None: |
1412 |
| - min_itemsize = values.dtype.itemsize |
| 1426 | + atom = _tables().StringCol(itemsize = itemsize, shape = shape) |
| 1427 | + utype = 'S%s' % itemsize |
| 1428 | + kind = 'string' |
1413 | 1429 |
|
1414 |
| - atom = _tables().StringCol(itemsize = min_itemsize, shape = shape) |
1415 |
| - utype = 'S%s' % min_itemsize |
1416 | 1430 | else:
|
1417 | 1431 | atom = getattr(_tables(),"%sCol" % b.dtype.name.capitalize())(shape = shape)
|
1418 | 1432 | utype = atom._deftype
|
| 1433 | + kind = b.dtype.name |
1419 | 1434 |
|
1420 | 1435 | # coerce data to this type
|
1421 | 1436 | try:
|
1422 | 1437 | values = values.astype(utype)
|
1423 | 1438 | except (Exception), detail:
|
1424 | 1439 | raise Exception("cannot coerce data type -> [dtype->%s]" % b.dtype.name)
|
1425 | 1440 |
|
1426 |
| - dc = DataCol.create_for_block(i = i, values = list(b.items), kind = b.dtype.name, typ = atom, data = values, pos = j) |
| 1441 | + dc = DataCol.create_for_block(i = i, values = list(b.items), kind = kind, typ = atom, data = values, pos = j) |
1427 | 1442 | j += 1
|
1428 | 1443 | self.values_axes.append(dc)
|
1429 | 1444 |
|
@@ -1663,7 +1678,6 @@ def write_data(self):
|
1663 | 1678 | """ fast writing of data: requires specific cython routines each axis shape """
|
1664 | 1679 |
|
1665 | 1680 | # create the masks & values
|
1666 |
| - #import pdb; pdb.set_trace() |
1667 | 1681 | masks = []
|
1668 | 1682 | for a in self.values_axes:
|
1669 | 1683 |
|
@@ -1694,7 +1708,6 @@ def write_data(self):
|
1694 | 1708 | if len(rows):
|
1695 | 1709 | self.table.append(rows)
|
1696 | 1710 | except (Exception), detail:
|
1697 |
| - #import pdb; pdb.set_trace() |
1698 | 1711 | raise Exception("tables cannot write this data -> %s" % str(detail))
|
1699 | 1712 |
|
1700 | 1713 | def delete(self, where = None):
|
@@ -1849,6 +1862,10 @@ def create_table(parent, group, typ = None, **kwargs):
|
1849 | 1862 | return _TABLE_MAP.get(tt)(parent, group, **kwargs)
|
1850 | 1863 |
|
1851 | 1864 |
|
| 1865 | +def _itemsize_string_array(arr): |
| 1866 | + """ return the maximum size of elements in a strnig array """ |
| 1867 | + return max([ str_len(arr[v]).max() for v in range(arr.shape[0]) ]) |
| 1868 | + |
1852 | 1869 | def _convert_index(index):
|
1853 | 1870 | if isinstance(index, DatetimeIndex):
|
1854 | 1871 | converted = index.asi8
|
@@ -2247,14 +2264,20 @@ def f(values, freq=None, tz=None):
|
2247 | 2264 |
|
2248 | 2265 | def create_debug_memory(parent):
|
2249 | 2266 | _debug_memory = getattr(parent,'_debug_memory',False)
|
| 2267 | + def get_memory(s): |
| 2268 | + pass |
| 2269 | + |
2250 | 2270 | if not _debug_memory:
|
2251 |
| - def get_memory(s): |
2252 |
| - pass |
| 2271 | + pass |
2253 | 2272 | else:
|
2254 |
| - import psutil, os |
2255 |
| - def get_memory(s): |
2256 |
| - p = psutil.Process(os.getpid()) |
2257 |
| - (rss,vms) = p.get_memory_info() |
2258 |
| - mp = p.get_memory_percent() |
2259 |
| - print "[%s] cur_mem->%.2f (MB),per_mem->%.2f" % (s,rss/1000000.0,mp) |
| 2273 | + try: |
| 2274 | + import psutil, os |
| 2275 | + def get_memory(s): |
| 2276 | + p = psutil.Process(os.getpid()) |
| 2277 | + (rss,vms) = p.get_memory_info() |
| 2278 | + mp = p.get_memory_percent() |
| 2279 | + print "[%s] cur_mem->%.2f (MB),per_mem->%.2f" % (s,rss/1000000.0,mp) |
| 2280 | + except: |
| 2281 | + pass |
| 2282 | + |
2260 | 2283 | return get_memory
|
0 commit comments