Skip to content

Fix Categorical.logp with take_along_axis #3572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
### Maintenance
- Moved math operations out of `Rice`, `TruncatedNormal`, `Triangular` and `ZeroInflatedNegativeBinomial` `random` methods. Math operations on values returned by `draw_values` might not broadcast well, and all the `size` aware broadcasting is left to `generate_samples`. Fixes [#3481](https://github.com/pymc-devs/pymc3/issues/3481) and [#3508](https://github.com/pymc-devs/pymc3/issues/3508)
- Parallelization of population steppers (`DEMetropolis`) is now set via the `cores` argument. ([#3559](https://github.com/pymc-devs/pymc3/pull/3559))
- Fixed a bug in `Categorical.logp`. In the case of multidimensional `p`'s, the indexing was done wrong leading to incorrectly shaped tensors that consumed `O(n**2)` memory instead of `O(n)`. This fixes issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535)
- Fixed a defect in `OrderedLogistic.__init__` that unnecessarily increased the dimensionality of the underlying `p`. Related to issue issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) but was not the true cause of it.
- SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573)
- SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579)
- Now uses `multiprocessong` rather than `psutil` to count CPUs, which results in reliable core counts on Chromebooks.
Expand Down
25 changes: 19 additions & 6 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .distribution import Discrete, draw_values, generate_samples
from .shape_utils import broadcast_distribution_samples
from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
from ..theanof import floatX, intX
from ..theanof import floatX, intX, take_along_axis


__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull',
Expand Down Expand Up @@ -997,8 +997,21 @@ def logp(self, value):
p = p_ / tt.sum(p_, axis=-1, keepdims=True)

if p.ndim > 1:
if p.ndim > value_clip.ndim:
value_clip = tt.shape_padleft(
value_clip, p_.ndim - value_clip.ndim
)
elif p.ndim < value_clip.ndim:
p = tt.shape_padleft(
p, value_clip.ndim - p_.ndim
)
pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
a = tt.log(p.dimshuffle(pattern)[value_clip])
a = tt.log(
take_along_axis(
p.dimshuffle(pattern),
value_clip,
)
)
else:
a = tt.log(p[value_clip])

Expand Down Expand Up @@ -1571,13 +1584,13 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
self.eta = tt.as_tensor_variable(floatX(eta))
self.cutpoints = tt.as_tensor_variable(cutpoints)

pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta))
p_cum = tt.concatenate([
tt.zeros_like(tt.shape_padright(pa[:, 0])),
tt.zeros_like(tt.shape_padright(pa[..., 0])),
pa,
tt.ones_like(tt.shape_padright(pa[:, 0]))
tt.ones_like(tt.shape_padright(pa[..., 0]))
], axis=-1)
p = p_cum[:, 1:] - p_cum[:, :-1]
p = p_cum[..., 1:] - p_cum[..., :-1]

super().__init__(p=p, *args, **kwargs)

Expand Down
32 changes: 32 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1335,3 +1335,35 @@ def test_discrete_trafo():
with pytest.raises(ValueError) as err:
Binomial('a', n=5, p=0.5, transform='log')
err.match('Transformations for discrete distributions')


@pytest.mark.parametrize("shape", [tuple(), (1,), (3, 1), (3, 2)], ids=str)
def test_orderedlogistic_dimensions(shape):
# Test for issue #3535
loge = np.log10(np.exp(1))
size = 7
p = np.ones(shape + (10,)) / 10
cutpoints = np.tile(logit(np.linspace(0, 1, 11)[1:-1]), shape + (1,))
obs = np.random.randint(0, 1, size=(size,) + shape)
with Model():
ol = OrderedLogistic(
"ol",
eta=np.zeros(shape),
cutpoints=cutpoints,
shape=shape,
observed=obs
)
c = Categorical(
"c",
p=p,
shape=shape,
observed=obs
)
ologp = ol.logp({"ol": 1}) * loge
clogp = c.logp({"c": 1}) * loge
expected = -np.prod((size,) + shape)

assert c.distribution.p.ndim == (len(shape) + 1)
assert np.allclose(clogp, expected)
assert ol.distribution.p.ndim == (len(shape) + 1)
assert np.allclose(ologp, expected)
220 changes: 217 additions & 3 deletions pymc3/tests/test_theanof.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,61 @@
import collections

import pytest
from theano import theano
from itertools import product

from theano import theano, tensor as tt
import numpy as np

from pymc3.theanof import set_theano_conf, take_along_axis, _conversion_map
from pymc3.vartypes import int_types


FLOATX = str(theano.config.floatX)
INTX = str(_conversion_map[FLOATX])


def _make_along_axis_idx(arr_shape, indices, axis):
# compute dimensions to iterate over
if str(indices.dtype) not in int_types:
raise IndexError('`indices` must be an integer array')
shape_ones = (1,) * indices.ndim
dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim))

from pymc3.theanof import set_theano_conf
# build a fancy index, consisting of orthogonal aranges, with the
# requested index inserted at the right location
fancy_index = []
for dim, n in zip(dest_dims, arr_shape):
if dim is None:
fancy_index.append(indices)
else:
ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:]
fancy_index.append(np.arange(n).reshape(ind_shape))

return tuple(fancy_index)


if hasattr(np, "take_along_axis"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if hasattr(np, "take_along_axis"):
# np.take_along_axis added in 1.18.0, so
# this can be removed once this version of numpy is required
if hasattr(np, "take_along_axis"):

np_take_along_axis = np.take_along_axis
else:
def np_take_along_axis(arr, indices, axis):
if arr.shape[axis] <= 32:
# We can safely test with numpy's choose
arr = np.moveaxis(arr, axis, 0)
indices = np.moveaxis(indices, axis, 0)
out = np.choose(indices, arr)
return np.moveaxis(out, 0, axis)
else:
# numpy's choose cannot handle such a large axis so we
# just use the implementation of take_along_axis. This is kind of
# cheating because our implementation is the same as the one below
if axis < 0:
_axis = arr.ndim + axis
else:
_axis = axis
if _axis < 0 or _axis >= arr.ndim:
raise ValueError(
"Supplied axis {} is out of bounds".format(axis)
)
return arr[_make_along_axis_idx(arr.shape, indices, _axis)]


class TestSetTheanoConfig:
Expand All @@ -26,3 +78,165 @@ def test_restore(self):
assert conf == {'compute_test_value': 'off'}
conf = set_theano_conf(conf)
assert conf == {'compute_test_value': 'raise'}


class TestTakeAlongAxis():
def setup_class(self):
self.inputs_buffer = dict()
self.output_buffer = dict()
self.func_buffer = dict()

def _input_tensors(self, shape):
ndim = len(shape)
arr = tt.TensorType(FLOATX, [False] * ndim)("arr")
indices = tt.TensorType(INTX, [False] * ndim)("indices")
arr.tag.test_value = np.zeros(shape, dtype=FLOATX)
indices.tag.test_value = np.zeros(shape, dtype=INTX)
return arr, indices

def get_input_tensors(self, shape):
ndim = len(shape)
try:
return self.inputs_buffer[ndim]
except KeyError:
arr, indices = self._input_tensors(shape)
self.inputs_buffer[ndim] = arr, indices
return arr, indices

def _output_tensor(self, arr, indices, axis):
return take_along_axis(arr, indices, axis)

def get_output_tensors(self, shape, axis):
ndim = len(shape)
try:
return self.output_buffer[(ndim, axis)]
except KeyError:
arr, indices = self.get_input_tensors(shape)
out = self._output_tensor(arr, indices, axis)
self.output_buffer[(ndim, axis)] = out
return out

def _function(self, arr, indices, out):
return theano.function([arr, indices], [out])

def get_function(self, shape, axis):
ndim = len(shape)
try:
return self.func_buffer[(ndim, axis)]
except KeyError:
arr, indices = self.get_input_tensors(shape)
out = self.get_output_tensors(shape, axis)
func = self._function(arr, indices, out)
self.func_buffer[(ndim, axis)] = func
return func

@staticmethod
def get_input_values(shape, axis, samples):
arr = np.random.randn(*shape).astype(FLOATX)
size = list(shape)
size[axis] = samples
size = tuple(size)
indices = np.random.randint(
low=0, high=shape[axis], size=size, dtype=INTX
)
return arr, indices

@pytest.mark.parametrize(
["shape", "axis", "samples"],
product(
[
(1,),
(3,),
(3, 1),
(3, 2),
(1, 1),
(1, 2),
(40, 40), # choose fails here
(5, 1, 1),
(5, 1, 2),
(5, 3, 1),
(5, 3, 2),
],
[0, -1],
[1, 10],
),
ids=str,
)
def test_take_along_axis(self, shape, axis, samples):
arr, indices = self.get_input_values(shape, axis, samples)
func = self.get_function(shape, axis)
assert np.allclose(
np_take_along_axis(arr, indices, axis=axis),
func(arr, indices)[0]
)

@pytest.mark.parametrize(
["shape", "axis", "samples"],
product(
[
(1,),
(3,),
(3, 1),
(3, 2),
(1, 1),
(1, 2),
(40, 40), # choose fails here
(5, 1, 1),
(5, 1, 2),
(5, 3, 1),
(5, 3, 2),
],
[0, -1],
[1, 10],
),
ids=str,
)
def test_take_along_axis_grad(self, shape, axis, samples):
if axis < 0:
_axis = len(shape) + axis
else:
_axis = axis
# Setup the theano function
t_arr, t_indices = self.get_input_tensors(shape)
t_out2 = theano.grad(
tt.sum(self._output_tensor(t_arr**2, t_indices, axis)),
t_arr,
)
func = theano.function([t_arr, t_indices], [t_out2])

# Test that the gradient gives the same output as what is expected
arr, indices = self.get_input_values(shape, axis, samples)
expected_grad = np.zeros_like(arr)
slicer = [slice(None)] * len(shape)
for i in range(indices.shape[axis]):
slicer[axis] = i
inds = indices[slicer].reshape(
shape[:_axis] + (1,) + shape[_axis + 1:]
)
inds = _make_along_axis_idx(shape, inds, _axis)
expected_grad[inds] += 1
expected_grad *= 2 * arr
out = func(arr, indices)[0]
assert np.allclose(out, expected_grad)

@pytest.mark.parametrize("axis", [-4, 4], ids=str)
def test_axis_failure(self, axis):
arr, indices = self.get_input_tensors((3, 1))
with pytest.raises(ValueError):
take_along_axis(arr, indices, axis=axis)

def test_ndim_failure(self):
arr = tt.TensorType(FLOATX, [False] * 3)("arr")
indices = tt.TensorType(INTX, [False] * 2)("indices")
arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX)
indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=INTX)
with pytest.raises(ValueError):
take_along_axis(arr, indices)

def test_dtype_failure(self):
arr = tt.TensorType(FLOATX, [False] * 3)("arr")
indices = tt.TensorType(FLOATX, [False] * 3)("indices")
arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX)
indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=FLOATX)
with pytest.raises(IndexError):
take_along_axis(arr, indices)
Loading