Skip to content

implement Array.query #1058

@gdementen

Description

@gdementen

Here is a bit of code I did for Geert:

def eval_expr_on_array(arr, expr):
    compiled = compile(expr, '<expr>', 'eval')
    symbols = compiled.co_names
    context = {symbol: arr[symbol] for symbol in symbols}
    return eval(compiled, context)

>>> arr = ndtest("axis=a,b,c,d")
>>> expr = "a + b - (c * d)"
>>> eval_expr_on_array(arr, expr)
-5

But it does not support ambiguous labels. Then, for the fun of it, I thought it wouldn't be too hard to implement that and I did the following code:

class ArrayLabel:
    def __init__(self, array, name):
        self.array = array
        self.name = name

    def eval(self):
        return self.array[self.name]
    
    def _binop(opname):
        fullname = f'__{opname}__'
        super_method = getattr(Array, fullname)
        def opmethod(self, other) -> 'Array':
            assert isinstance(other, ArrayLabel), f"{other} is not ArrayLabel ({type(other)})"
            other = other.eval()
            return super_method(self.eval(), other)
        opmethod.__name__ = fullname
        return opmethod
        
    __lt__ = _binop('lt')
    __le__ = _binop('le')
    __eq__ = _binop('eq')
    __ne__ = _binop('ne')
    __gt__ = _binop('gt')
    __ge__ = _binop('ge')
    __add__ = _binop('add')
    __radd__ = _binop('radd')
    __sub__ = _binop('sub')
    __rsub__ = _binop('rsub')
    __mul__ = _binop('mul')
    __rmul__ = _binop('rmul')
    # div and rdiv are not longer used on Python3+
    __truediv__ = _binop('truediv')
    __rtruediv__ = _binop('rtruediv')
    __floordiv__ = _binop('floordiv')
    __rfloordiv__ = _binop('rfloordiv')
    __mod__ = _binop('mod')
    __rmod__ = _binop('rmod')
    __divmod__ = _binop('divmod')
    __rdivmod__ = _binop('rdivmod')
    __pow__ = _binop('pow')
    __rpow__ = _binop('rpow')
    __lshift__ = _binop('lshift')
    __rlshift__ = _binop('rlshift')
    __rshift__ = _binop('rshift')
    __rrshift__ = _binop('rrshift')
    __and__ = _binop('and')
    __rand__ = _binop('rand')
    __xor__ = _binop('xor')
    __rxor__ = _binop('rxor')
    __or__ = _binop('or')
    __ror__ = _binop('ror')

class AxisModifier:
    def __init__(self, array, axis):
        self.array = array
        self.axis = axis
    def __getitem__(self, subset):
        assert isinstance(subset, ArrayLabel)
        return ArrayLabel(self.array, self.axis[subset.name])

def query_array(array, expr):
    compiled = compile(expr, '<expr>', 'eval')
    symbols = compiled.co_names
    axes = array.axes
    context = {symbol: AxisModifier(array, axes[symbol]) if symbol in axes else ArrayLabel(array, symbol)
               for symbol in symbols}
    return eval(compiled, context)

>>> arr = ndtest("axis1=a,b,c,d;axis2=b,e,f")
>>> expr = "a + axis1[b] - (c * d)"
>>> query_array(arr, expr)
axis2    b    e    f
       -51  -65  -81

... but it's buggy. It depends on the evaluation order of operands and we get at some point binops between an Array and an ArrayLabel, which return False (courtesy of Array._binop which does so for unknown types -- btw, this behavior was intended for == and is really surprising for other operators)

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions