diff --git a/pandas/core/config.py b/pandas/core/config.py index e974689470c46..b6f00034429b2 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -57,8 +57,8 @@ import pandas.compat as compat DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') -RegisteredOption = namedtuple( - 'RegisteredOption', 'key defval doc validator cb') +RegisteredOption = namedtuple('RegisteredOption', + 'key defval doc validator cb') _deprecated_options = {} # holds deprecated option metdata _registered_options = {} # holds registered option metdata @@ -67,14 +67,14 @@ class OptionError(AttributeError, KeyError): - """Exception for pandas.options, backwards compatible with KeyError - checks""" - + checks + """ # # User API + def _get_single_key(pat, silent): keys = _select_options(pat) if len(keys) == 0: @@ -106,14 +106,14 @@ def _set_option(*args, **kwargs): nargs = len(args) if not nargs or nargs % 2 != 0: raise ValueError("Must provide an even number of non-keyword " - "arguments") + "arguments") # default to false silent = kwargs.pop('silent', False) if kwargs: raise TypeError('_set_option() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) for k, v in zip(args[::2], args[1::2]): key = _get_single_key(k, silent) @@ -129,6 +129,7 @@ def _set_option(*args, **kwargs): if o.cb: o.cb(key) + def _describe_option(pat='', _print_desc=True): keys = _select_options(pat) @@ -168,9 +169,7 @@ def get_default_val(pat): class DictWrapper(object): - - """ provide attribute-style access to a nested dict - """ + """ provide attribute-style access to a nested dict""" def __init__(self, d, prefix=""): object.__setattr__(self, "d", d) @@ -202,7 +201,6 @@ def __getattr__(self, key): def __dir__(self): return list(self.d.keys()) - # For user convenience, we'd like to have the available options described # in the docstring. For dev convenience we'd like to generate the docstrings # dynamically instead of maintaining them by hand. To this, we use the @@ -213,7 +211,6 @@ def __dir__(self): class CallableDynamicDoc(object): - def __init__(self, func, doc_tmpl): self.__doc_tmpl__ = doc_tmpl self.__func__ = func @@ -228,6 +225,7 @@ def __doc__(self): return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) + _get_option_tmpl = """ get_option(pat) @@ -384,10 +382,8 @@ class option_context(object): def __init__(self, *args): if not (len(args) % 2 == 0 and len(args) >= 2): - raise ValueError( - 'Need to invoke as' - 'option_context(pat, val, [(pat, val), ...)).' - ) + raise ValueError('Need to invoke as' + 'option_context(pat, val, [(pat, val), ...)).') self.ops = list(zip(args[::2], args[1::2])) @@ -462,8 +458,8 @@ def register_option(key, defval, doc='', validator=None, cb=None): cursor = cursor[p] if not isinstance(cursor, dict): - raise OptionError("Path prefix to option '%s' is already an option" - % '.'.join(path[:-1])) + raise OptionError("Path prefix to option '%s' is already an option" % + '.'.join(path[:-1])) cursor[path[-1]] = defval # initialize @@ -520,10 +516,10 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None): _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) - # # functions internal to the module + def _select_options(pat): """returns a list of keys matching `pat` @@ -681,7 +677,6 @@ def pp(name, ks): else: return s - # # helpers @@ -717,7 +712,6 @@ def config_prefix(prefix): global register_option, get_option, set_option, reset_option def wrap(func): - def inner(key, *args, **kwds): pkey = '%s.%s' % (prefix, key) return func(pkey, *args, **kwds) @@ -735,10 +729,10 @@ def inner(key, *args, **kwds): get_option = _get_option register_option = _register_option - # These factories and methods are handy for use as the validator # arg in register_option + def is_type_factory(_type): """ @@ -790,10 +784,10 @@ def inner(x): def is_one_of_factory(legal_values): def inner(x): from pandas.core.common import pprint_thing as pp - if not x in legal_values: + if x not in legal_values: pp_values = lmap(pp, legal_values) - raise ValueError("Value must be one of %s" - % pp("|".join(pp_values))) + raise ValueError("Value must be one of %s" % + pp("|".join(pp_values))) return inner diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index df24ef6f3743e..01a39583001c1 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -11,12 +11,10 @@ """ import pandas.core.config as cf -from pandas.core.config import (is_int, is_bool, is_text, is_float, - is_instance_factory, is_one_of_factory, - get_default_val) +from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, + is_one_of_factory, get_default_val) from pandas.core.format import detect_console_encoding - # # options from the "display" namespace @@ -61,8 +59,8 @@ pc_max_categories_doc = """ : int - This sets the maximum number of categories pandas should output when printing - out a `Categorical` or a Series of dtype "category". + This sets the maximum number of categories pandas should output when + printing out a `Categorical` or a Series of dtype "category". """ pc_max_info_cols_doc = """ @@ -146,9 +144,11 @@ pc_east_asian_width_doc = """ : boolean - Whether to use the Unicode East Asian Width to calculate the display text width + Whether to use the Unicode East Asian Width to calculate the display text + width. Enabling this may affect to the performance (default: False) """ + pc_ambiguous_as_wide_doc = """ : boolean Whether to handle Unicode characters belong to Ambiguous as Wide (width=2) @@ -197,7 +197,8 @@ : int or None df.info() will usually show null-counts for each column. For large frames this can be quite slow. max_info_rows and max_info_cols - limit this null check only to frames with smaller dimensions then specified. + limit this null check only to frames with smaller dimensions than + specified. """ pc_large_repr_doc = """ @@ -222,15 +223,16 @@ pc_latex_escape = """ : bool - This specifies if the to_latex method of a Dataframe uses escapes special + This specifies if the to_latex method of a Dataframe uses escapes special characters. - method. Valid values: False,True + method. Valid values: False,True """ pc_latex_longtable = """ :bool - This specifies if the to_latex method of a Dataframe uses the longtable format. - method. Valid values: False,True + This specifies if the to_latex method of a Dataframe uses the longtable + format. + method. Valid values: False,True """ style_backup = dict() @@ -244,7 +246,7 @@ def mpl_style_cb(key): val = cf.get_option(key) if 'matplotlib' not in sys.modules.keys(): - if not(val): # starting up, we get reset to None + if not val: # starting up, we get reset to None return val raise Exception("matplotlib has not been imported. aborting") @@ -267,7 +269,8 @@ def mpl_style_cb(key): validator=is_instance_factory((int, type(None)))) cf.register_option('max_rows', 60, pc_max_rows_doc, validator=is_instance_factory([type(None), int])) - cf.register_option('max_categories', 8, pc_max_categories_doc, validator=is_int) + cf.register_option('max_categories', 8, pc_max_categories_doc, + validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) cf.register_option('max_columns', 20, pc_max_cols_doc, validator=is_instance_factory([type(None), int])) @@ -305,28 +308,29 @@ def mpl_style_cb(key): cf.register_option('line_width', get_default_val('display.width'), pc_line_width_doc) cf.register_option('memory_usage', True, pc_memory_usage_doc, - validator=is_one_of_factory([None, True, False, 'deep'])) + validator=is_one_of_factory([None, True, + False, 'deep'])) cf.register_option('unicode.east_asian_width', False, pc_east_asian_width_doc, validator=is_bool) cf.register_option('unicode.ambiguous_as_wide', False, pc_east_asian_width_doc, validator=is_bool) - cf.register_option('latex.escape',True, pc_latex_escape, - validator=is_bool) - cf.register_option('latex.longtable',False,pc_latex_longtable, - validator=is_bool) + cf.register_option('latex.escape', True, pc_latex_escape, + validator=is_bool) + cf.register_option('latex.longtable', False, pc_latex_longtable, + validator=is_bool) cf.deprecate_option('display.line_width', msg=pc_line_width_deprecation_warning, rkey='display.width') -cf.deprecate_option('display.height', - msg=pc_height_deprecation_warning, +cf.deprecate_option('display.height', msg=pc_height_deprecation_warning, rkey='display.max_rows') tc_sim_interactive_doc = """ : boolean Whether to simulate interactive mode for purposes of testing """ + with cf.config_prefix('mode'): cf.register_option('sim_interactive', False, tc_sim_interactive_doc) @@ -349,7 +353,6 @@ def use_inf_as_null_cb(key): cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, cb=use_inf_as_null_cb) - # user warnings chained_assignment = """ : string @@ -361,7 +364,6 @@ def use_inf_as_null_cb(key): cf.register_option('chained_assignment', 'warn', chained_assignment, validator=is_one_of_factory([None, 'warn', 'raise'])) - # Set up the io.excel specific configuration. writer_engine_doc = """ : string @@ -371,8 +373,7 @@ def use_inf_as_null_cb(key): with cf.config_prefix('io.excel'): # going forward, will be additional writers - for ext, options in [('xls', ['xlwt']), - ('xlsm', ['openpyxl'])]: + for ext, options in [('xls', ['xlwt']), ('xlsm', ['openpyxl'])]: default = options.pop(0) if options: options = " " + ", ".join(options) @@ -384,14 +385,13 @@ def use_inf_as_null_cb(key): def _register_xlsx(engine, other): cf.register_option('xlsx.writer', engine, - writer_engine_doc.format(ext='xlsx', - default=engine, + writer_engine_doc.format(ext='xlsx', default=engine, others=", '%s'" % other), validator=str) try: # better memory footprint - import xlsxwriter + import xlsxwriter # noqa _register_xlsx('xlsxwriter', 'openpyxl') except ImportError: # fallback diff --git a/pandas/core/convert.py b/pandas/core/convert.py index 3745d4f5f6914..7f4fe73c688f8 100644 --- a/pandas/core/convert.py +++ b/pandas/core/convert.py @@ -9,11 +9,10 @@ isnull) import pandas.lib as lib + # TODO: Remove in 0.18 or 2017, which ever is sooner -def _possibly_convert_objects(values, convert_dates=True, - convert_numeric=True, - convert_timedeltas=True, - copy=True): +def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, + convert_timedeltas=True, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar @@ -27,16 +26,16 @@ def _possibly_convert_objects(values, convert_dates=True, # we take an aggressive stance and convert to datetime64[ns] if convert_dates == 'coerce': - new_values = _possibly_cast_to_datetime( - values, 'M8[ns]', errors='coerce') + new_values = _possibly_cast_to_datetime(values, 'M8[ns]', + errors='coerce') # if we are all nans then leave me alone if not isnull(new_values).all(): values = new_values else: - values = lib.maybe_convert_objects( - values, convert_datetime=convert_dates) + values = lib.maybe_convert_objects(values, + convert_datetime=convert_dates) # convert timedeltas if convert_timedeltas and values.dtype == np.object_: @@ -57,8 +56,8 @@ def _possibly_convert_objects(values, convert_dates=True, if values.dtype == np.object_: if convert_numeric: try: - new_values = lib.maybe_convert_numeric( - values, set(), coerce_numeric=True) + new_values = lib.maybe_convert_numeric(values, set(), + coerce_numeric=True) # if we are all nans then leave me alone if not isnull(new_values).all(): @@ -84,9 +83,8 @@ def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, raise ValueError('At least one of datetime, numeric or timedelta must ' 'be True.') elif conversion_count > 1 and coerce: - raise ValueError("Only one of 'datetime', 'numeric' or " - "'timedelta' can be True when when coerce=True.") - + raise ValueError("Only one of 'datetime', 'numeric' or " + "'timedelta' can be True when when coerce=True.") if isinstance(values, (list, tuple)): # List or scalar @@ -110,19 +108,16 @@ def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # Soft conversions if datetime: - values = lib.maybe_convert_objects(values, - convert_datetime=datetime) + values = lib.maybe_convert_objects(values, convert_datetime=datetime) if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert - values = lib.maybe_convert_objects(values, - convert_timedelta=timedelta) + values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) if numeric and is_object_dtype(values.dtype): try: - converted = lib.maybe_convert_numeric(values, - set(), - coerce_numeric=True) + converted = lib.maybe_convert_numeric(values, set(), + coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isnull(converted).all() else values values = values.copy() if copy else values diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index 28cd97f437f29..91b33d30004b6 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -1,8 +1,8 @@ """A collection of random tools for dealing with dates in Python""" -from pandas.tseries.tools import * -from pandas.tseries.offsets import * -from pandas.tseries.frequencies import * +from pandas.tseries.tools import * # noqa +from pandas.tseries.offsets import * # noqa +from pandas.tseries.frequencies import * # noqa day = DateOffset() bday = BDay() diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 37c8e8b1d8829..1ffa836a75a1b 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,8 +1,9 @@ import numpy as np from pandas.compat import zip -from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, is_list_like, - is_categorical_dtype, is_object_dtype, take_1d) +from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, + is_list_like, is_categorical_dtype, + is_object_dtype, take_1d) import pandas.compat as compat from pandas.core.base import AccessorProperty, NoNewAttributesMixin from pandas.util.decorators import Appender, deprecate_kwarg @@ -11,7 +12,6 @@ import warnings import textwrap - _shared_docs = dict() @@ -138,11 +138,13 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): try: result = lib.map_infer_mask(arr, f, mask.view(np.uint8)) except (TypeError, AttributeError): + def g(x): try: return f(x) except (TypeError, AttributeError): return na_value + return _map(g, arr, dtype=dtype) if na_value is not np.nan: np.putmask(result, mask, na_value) @@ -206,7 +208,8 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): if regex.groups > 0: warnings.warn("This pattern has match groups. To actually get the" - " groups, use str.extract.", UserWarning, stacklevel=3) + " groups, use str.extract.", UserWarning, + stacklevel=3) f = lambda x: bool(regex.search(x)) else: @@ -314,6 +317,7 @@ def str_repeat(arr, repeats): repeated : Series/Index of objects """ if np.isscalar(repeats): + def rep(x): try: return compat.binary_type.__mul__(x, repeats) @@ -322,6 +326,7 @@ def rep(x): return _na_map(rep, arr) else: + def rep(x, r): try: return compat.binary_type.__mul__(x, r) @@ -360,7 +365,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): See Also -------- - contains : analagous, but less strict, relying on re.search instead of + contains : analogous, but less strict, relying on re.search instead of re.match extract : now preferred to the deprecated usage of match (as_indexer=False) @@ -467,7 +472,6 @@ def str_extract(arr, pat, flags=0): 2 NaN NaN """ - from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.index import Index @@ -475,7 +479,7 @@ def str_extract(arr, pat, flags=0): # just to be safe, check this if regex.groups == 0: raise ValueError("This pattern contains no groups to capture.") - empty_row = [np.nan]*regex.groups + empty_row = [np.nan] * regex.groups def f(x): if not isinstance(x, compat.string_types): @@ -498,10 +502,8 @@ def f(x): if arr.empty: result = DataFrame(columns=columns, dtype=object) else: - result = DataFrame([f(val) for val in arr], - columns=columns, - index=arr.index, - dtype=object) + result = DataFrame([f(val) for val in arr], columns=columns, + index=arr.index, dtype=object) return result, name @@ -542,7 +544,8 @@ def str_get_dummies(arr, sep='|'): # GH9980, Index.str does not support get_dummies() as it returns a frame if isinstance(arr, Index): - raise TypeError("get_dummies is not supported for string methods on Index") + raise TypeError("get_dummies is not supported for string methods on " + "Index") # TODO remove this hack? arr = arr.fillna('') @@ -818,6 +821,7 @@ def f(x): if stop is not None: y += x[local_stop:] return y + return _na_map(f, arr) @@ -919,10 +923,10 @@ def str_translate(arr, table, deletechars=None): Parameters ---------- table : dict (python 3), str or None (python 2) - In python 3, table is a mapping of Unicode ordinals to Unicode ordinals, - strings, or None. Unmapped characters are left untouched. Characters - mapped to None are deleted. :meth:`str.maketrans` is a helper function - for making translation tables. + In python 3, table is a mapping of Unicode ordinals to Unicode + ordinals, strings, or None. Unmapped characters are left untouched. + Characters mapped to None are deleted. :meth:`str.maketrans` is a + helper function for making translation tables. In python 2, table is either a string of length 256 or None. If the table argument is None, no translation is applied and the operation simply removes the characters in deletechars. :func:`string.maketrans` @@ -942,7 +946,8 @@ def str_translate(arr, table, deletechars=None): if compat.PY3: raise ValueError("deletechars is not a valid argument for " "str.translate in python 3. You should simply " - "specify character deletions in the table argument") + "specify character deletions in the table " + "argument") f = lambda x: x.translate(table, deletechars) return _na_map(f, arr) @@ -1040,15 +1045,16 @@ def wrapper3(self, pat, na=np.nan): def copy(source): "Copy a docstring from another source function (if present)" + def do_copy(target): if source.__doc__: target.__doc__ = source.__doc__ return target + return do_copy class StringMethods(NoNewAttributesMixin): - """ Vectorized string functions for Series and Index. NAs stay NA unless handled otherwise by a particular method. Patterned after Python's string @@ -1069,8 +1075,7 @@ def __init__(self, data): def __getitem__(self, key): if isinstance(key, slice): - return self.slice(start=key.start, stop=key.stop, - step=key.step) + return self.slice(start=key.start, stop=key.stop, step=key.step) else: return self.get(key) @@ -1087,8 +1092,8 @@ def _wrap_result(self, result, use_codes=True, name=None): # for category, we do the stuff on the categories, so blow it up # to the full series again # But for some operations, we have to do the stuff on the full values, - # so make it possible to skip this step as the method already did this before - # the transformation... + # so make it possible to skip this step as the method already did this + # before the transformation... if use_codes and self._is_categorical: result = take_1d(result, self._orig.cat.codes) @@ -1142,11 +1147,13 @@ def _wrap_result_expand(self, result, expand=False): else: index = self._orig.index if expand: + def cons_row(x): if is_list_like(x): return x else: - return [ x ] + return [x] + cons = self._orig._constructor_expanddim data = [cons_row(x) for x in result] return cons(data, index=index) @@ -1161,9 +1168,8 @@ def cat(self, others=None, sep=None, na_rep=None): result = str_cat(data, others=others, sep=sep, na_rep=na_rep) return self._wrap_result(result, use_codes=(not self._is_categorical)) - - @deprecate_kwarg('return_type', 'expand', - mapping={'series': False, 'frame': True}) + @deprecate_kwarg('return_type', 'expand', mapping={'series': False, + 'frame': True}) @copy(str_split) def split(self, pat=None, n=-1, expand=False): result = str_split(self._data, pat, n=n) @@ -1217,17 +1223,24 @@ def rsplit(self, pat=None, n=-1, expand=False): 1 D_E _ F 2 X """) - @Appender(_shared_docs['str_partition'] % {'side': 'first', - 'return': '3 elements containing the string itself, followed by two empty strings', - 'also': 'rpartition : Split the string at the last occurrence of `sep`'}) + + @Appender(_shared_docs['str_partition'] % { + 'side': 'first', + 'return': '3 elements containing the string itself, followed by two ' + 'empty strings', + 'also': 'rpartition : Split the string at the last occurrence of `sep`' + }) def partition(self, pat=' ', expand=True): f = lambda x: x.partition(pat) result = _na_map(f, self._data) return self._wrap_result_expand(result, expand=expand) - @Appender(_shared_docs['str_partition'] % {'side': 'last', - 'return': '3 elements containing two empty strings, followed by the string itself', - 'also': 'partition : Split the string at the first occurrence of `sep`'}) + @Appender(_shared_docs['str_partition'] % { + 'side': 'last', + 'return': '3 elements containing two empty strings, followed by the ' + 'string itself', + 'also': 'partition : Split the string at the first occurrence of `sep`' + }) def rpartition(self, pat=' ', expand=True): f = lambda x: x.rpartition(pat) result = _na_map(f, self._data) @@ -1245,14 +1258,14 @@ def join(self, sep): @copy(str_contains) def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): - result = str_contains(self._data, pat, case=case, flags=flags, - na=na, regex=regex) + result = str_contains(self._data, pat, case=case, flags=flags, na=na, + regex=regex) return self._wrap_result(result) @copy(str_match) def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False): - result = str_match(self._data, pat, case=case, flags=flags, - na=na, as_indexer=as_indexer) + result = str_match(self._data, pat, case=case, flags=flags, na=na, + as_indexer=as_indexer) return self._wrap_result(result) @copy(str_replace) @@ -1289,7 +1302,7 @@ def pad(self, width, side='left', fillchar=' '): """) @Appender(_shared_docs['str_pad'] % dict(side='left and right', - method='center')) + method='center')) def center(self, width, fillchar=' '): return self.pad(width, side='both', fillchar=fillchar) @@ -1349,19 +1362,19 @@ def encode(self, encoding, errors="strict"): """) @Appender(_shared_docs['str_strip'] % dict(side='left and right sides', - method='strip')) + method='strip')) def strip(self, to_strip=None): result = str_strip(self._data, to_strip, side='both') return self._wrap_result(result) @Appender(_shared_docs['str_strip'] % dict(side='left side', - method='lstrip')) + method='lstrip')) def lstrip(self, to_strip=None): result = str_strip(self._data, to_strip, side='left') return self._wrap_result(result) @Appender(_shared_docs['str_strip'] % dict(side='right side', - method='rstrip')) + method='rstrip')) def rstrip(self, to_strip=None): result = str_strip(self._data, to_strip, side='right') return self._wrap_result(result) @@ -1417,14 +1430,16 @@ def extract(self, pat, flags=0): %(also)s """) - @Appender(_shared_docs['find'] % dict(side='lowest', method='find', - also='rfind : Return highest indexes in each strings')) + @Appender(_shared_docs['find'] % + dict(side='lowest', method='find', + also='rfind : Return highest indexes in each strings')) def find(self, sub, start=0, end=None): result = str_find(self._data, sub, start=start, end=end, side='left') return self._wrap_result(result) - @Appender(_shared_docs['find'] % dict(side='highest', method='rfind', - also='find : Return lowest indexes in each strings')) + @Appender(_shared_docs['find'] % + dict(side='highest', method='rfind', + also='find : Return lowest indexes in each strings')) def rfind(self, sub, start=0, end=None): result = str_find(self._data, sub, start=start, end=end, side='right') return self._wrap_result(result) @@ -1450,9 +1465,9 @@ def normalize(self, form): _shared_docs['index'] = (""" Return %(side)s indexes in each strings where the substring is - fully contained between [start:end]. This is the same as ``str.%(similar)s`` - except instead of returning -1, it raises a ValueError when the substring - is not found. Equivalent to standard ``str.%(method)s``. + fully contained between [start:end]. This is the same as + ``str.%(similar)s`` except instead of returning -1, it raises a ValueError + when the substring is not found. Equivalent to standard ``str.%(method)s``. Parameters ---------- @@ -1472,14 +1487,16 @@ def normalize(self, form): %(also)s """) - @Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index', - also='rindex : Return highest indexes in each strings')) + @Appender(_shared_docs['index'] % + dict(side='lowest', similar='find', method='index', + also='rindex : Return highest indexes in each strings')) def index(self, sub, start=0, end=None): result = str_index(self._data, sub, start=start, end=end, side='left') return self._wrap_result(result) - @Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex', - also='index : Return lowest indexes in each strings')) + @Appender(_shared_docs['index'] % + dict(side='highest', similar='rfind', method='rindex', + also='index : Return lowest indexes in each strings')) def rindex(self, sub, start=0, end=None): result = str_index(self._data, sub, start=start, end=end, side='right') return self._wrap_result(result) @@ -1568,6 +1585,7 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) + class StringAccessorMixin(object): """ Mixin to add a `.str` acessor to the class.""" @@ -1575,15 +1593,15 @@ class StringAccessorMixin(object): def _make_str_accessor(self): from pandas.core.series import Series from pandas.core.index import Index - if isinstance(self, Series) and not( - (is_categorical_dtype(self.dtype) and - is_object_dtype(self.values.categories)) or - (is_object_dtype(self.dtype))): - # it's neither a string series not a categorical series with strings - # inside the categories. - # this really should exclude all series with any non-string values (instead of test - # for object dtype), but that isn't practical for performance reasons until we have a - # str dtype (GH 9343) + if (isinstance(self, Series) and + not ((is_categorical_dtype(self.dtype) and + is_object_dtype(self.values.categories)) or + (is_object_dtype(self.dtype)))): + # it's neither a string series not a categorical series with + # strings inside the categories. + # this really should exclude all series with any non-string values + # (instead of test for object dtype), but that isn't practical for + # performance reasons until we have a str dtype (GH 9343) raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") @@ -1592,10 +1610,12 @@ def _make_str_accessor(self): allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') if self.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " - "(i.e. inferred_type is 'string', 'unicode' or 'mixed')") + "(i.e. inferred_type is 'string', 'unicode' or " + "'mixed')") raise AttributeError(message) if self.nlevels > 1: - message = "Can only use .str accessor with Index, not MultiIndex" + message = ("Can only use .str accessor with Index, not " + "MultiIndex") raise AttributeError(message) return StringMethods(self) diff --git a/pandas/core/style.py b/pandas/core/style.py index d8cb53e04ea03..b2302f311e01e 100644 --- a/pandas/core/style.py +++ b/pandas/core/style.py @@ -154,9 +154,7 @@ def __init__(self, data, precision=None, table_styles=None, uuid=None, self.table_attributes = table_attributes def _repr_html_(self): - ''' - Hooks into Jupyter notebook rich display system. - ''' + """Hooks into Jupyter notebook rich display system.""" return self.render() def _translate(self): @@ -196,31 +194,34 @@ def _translate(self): head = [] for r in range(n_clvls): - row_es = [{"type": "th", "value": BLANK_VALUE, + row_es = [{"type": "th", + "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}] * n_rlvls for c in range(len(clabels[0])): cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] - cs.extend(cell_context.get( - "col_headings", {}).get(r, {}).get(c, [])) - row_es.append({"type": "th", "value": clabels[r][c], + cs.extend( + cell_context.get("col_headings", {}).get(r, {}).get(c, [])) + row_es.append({"type": "th", + "value": clabels[r][c], "class": " ".join(cs)}) head.append(row_es) body = [] for r, idx in enumerate(self.data.index): cs = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r] - cs.extend(cell_context.get( - "row_headings", {}).get(r, {}).get(c, [])) + cs.extend( + cell_context.get("row_headings", {}).get(r, {}).get(c, [])) row_es = [{"type": "th", "value": rlabels[r][c], - "class": " ".join(cs)} - for c in range(len(rlabels[r]))] + "class": " ".join(cs)} for c in range(len(rlabels[r]))] for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row%s" % r, "col%s" % c] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) - row_es.append({"type": "td", "value": self.data.iloc[r][c], - "class": " ".join(cs), "id": "_".join(cs[1:])}) + row_es.append({"type": "td", + "value": self.data.iloc[r][c], + "class": " ".join(cs), + "id": "_".join(cs[1:])}) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] @@ -228,10 +229,8 @@ def _translate(self): props.append(x.split(":")) else: props.append(['', '']) - cellstyle.append( - {'props': props, - 'selector': "row%s_col%s" % (r, c)} - ) + cellstyle.append({'props': props, + 'selector': "row%s_col%s" % (r, c)}) body.append(row_es) return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, @@ -262,8 +261,8 @@ def render(self): # filter out empty styles, every cell will have a class # but the list of props may just be [['', '']]. # so we have the neested anys below - trimmed = [x for x in d['cellstyle'] if - any(any(y) for y in x['props'])] + trimmed = [x for x in d['cellstyle'] + if any(any(y) for y in x['props'])] d['cellstyle'] = trimmed return self.template.render(**d) @@ -306,22 +305,21 @@ def __deepcopy__(self, memo): return self._copy(deepcopy=True) def clear(self): - ''' - "Reset" the styler, removing any previously applied styles. + """"Reset" the styler, removing any previously applied styles. Returns None. - ''' + """ self.ctx.clear() self._todo = [] def _compute(self): - ''' + """ Execute the style functions built up in `self._todo`. Relies on the conventions that all style functions go through .apply or .applymap. The append styles to apply as tuples of (application method, *args, **kwargs) - ''' + """ r = self for func, args, kwargs in self._todo: r = func(self)(*args, **kwargs) @@ -369,8 +367,7 @@ def apply(self, func, axis=0, subset=None, **kwargs): rather than column-wise or row-wise. """ self._todo.append((lambda instance: getattr(instance, '_apply'), - (func, axis, subset), - kwargs)) + (func, axis, subset), kwargs)) return self def _applymap(self, func, subset=None, **kwargs): @@ -404,8 +401,7 @@ def applymap(self, func, subset=None, **kwargs): """ self._todo.append((lambda instance: getattr(instance, '_applymap'), - (func, subset), - kwargs)) + (func, subset), kwargs)) return self def set_precision(self, precision): @@ -574,8 +570,8 @@ def highlight_null(self, null_color='red'): self.applymap(self._highlight_null, null_color=null_color) return self - def background_gradient(self, cmap='PuBu', low=0, high=0, - axis=0, subset=None): + def background_gradient(self, cmap='PuBu', low=0, high=0, axis=0, + subset=None): """ Color the background in a gradient according to the data in each column (optionally row). @@ -648,8 +644,8 @@ def set_properties(self, subset=None, **kwargs): >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df.style.set_properties(color="white", align="right") """ - values = ';'.join('{p}: {v}'.format(p=p, v=v) for p, v in - kwargs.items()) + values = ';'.join('{p}: {v}'.format(p=p, v=v) + for p, v in kwargs.items()) f = lambda x: values return self.applymap(f, subset=subset) @@ -658,7 +654,8 @@ def _bar(s, color, width): normed = width * (s - s.min()) / (s.max() - s.min()) base = 'width: 10em; height: 80%;' - attrs = base + 'background: linear-gradient(90deg,{c} {w}%, transparent 0%)' + attrs = (base + 'background: linear-gradient(90deg,{c} {w}%, ' + 'transparent 0%)') return [attrs.format(c=color, w=x) if x != 0 else base for x in normed] def bar(self, subset=None, axis=0, color='#d65f5f', width=100): @@ -741,9 +738,7 @@ def _highlight_handler(self, subset=None, color='yellow', axis=None, @staticmethod def _highlight_extrema(data, color='yellow', max_=True): - ''' - highlight the min or max in a Series or DataFrame - ''' + """Highlight the min or max in a Series or DataFrame""" attr = 'background-color: {0}'.format(color) if data.ndim == 1: # Series from .apply if max_: