diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 772e1a6ce6..f3c116a8ff 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -29,7 +29,7 @@ from aesara import tensor as at from aesara.compile.builders import OpFromGraph from aesara.graph import node_rewriter -from aesara.graph.basic import Node, Variable, clone_replace +from aesara.graph.basic import Node, clone_replace from aesara.graph.rewriting.basic import in2out from aesara.graph.utils import MetaType from aesara.tensor.basic import as_tensor_variable @@ -42,9 +42,6 @@ from pymc.distributions.shape_utils import ( Dims, Shape, - StrongDims, - StrongShape, - change_dist_size, convert_dims, convert_shape, convert_size, @@ -154,35 +151,6 @@ def fn(*args, **kwargs): return fn -def _make_rv_and_resize_shape_from_dims( - *, - cls, - dims: Optional[StrongDims], - model, - observed, - args, - **kwargs, -) -> Tuple[Variable, StrongShape]: - """Creates the RV, possibly using dims or observed to determine a resize shape (if needed).""" - resize_shape_from_dims = None - size_or_shape = kwargs.get("size") or kwargs.get("shape") - - # Preference is given to size or shape. If not specified, we rely on dims and - # finally, observed, to determine the shape of the variable. Because dims can be - # specified on the fly, we need a two-step process where we first create the RV - # without dims information and then resize it. - if not size_or_shape and observed is not None: - kwargs["shape"] = tuple(observed.shape) - - # Create the RV without dims information - rv_out = cls.dist(*args, **kwargs) - - if not size_or_shape and dims is not None: - resize_shape_from_dims = shape_from_dims(dims, tuple(rv_out.shape), model) - - return rv_out, resize_shape_from_dims - - class SymbolicRandomVariable(OpFromGraph): """Symbolic Random Variable @@ -311,17 +279,15 @@ def __new__( if observed is not None: observed = convert_observed_data(observed) - # Create the RV, without taking `dims` into consideration - rv_out, resize_shape_from_dims = _make_rv_and_resize_shape_from_dims( - cls=cls, dims=dims, model=model, observed=observed, args=args, **kwargs - ) + # Preference is given to size or shape. If not specified, we rely on dims and + # finally, observed, to determine the shape of the variable. + if not ("size" in kwargs or "shape" in kwargs): + if dims is not None: + kwargs["shape"] = shape_from_dims(dims, model) + elif observed is not None: + kwargs["shape"] = tuple(observed.shape) - # Resize variable based on `dims` information - if resize_shape_from_dims: - resize_size_from_dims = find_size( - shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp - ) - rv_out = change_dist_size(dist=rv_out, new_size=resize_size_from_dims, expand=False) + rv_out = cls.dist(*args, **kwargs) rv_out = model.register_rv( rv_out, diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 74a96ba4b1..59c6666249 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -480,17 +480,13 @@ def convert_size(size: Size) -> Optional[StrongSize]: return size -def shape_from_dims( - dims: StrongDims, shape_implied: Sequence[TensorVariable], model -) -> StrongShape: +def shape_from_dims(dims: StrongDims, model) -> StrongShape: """Determines shape from a `dims` tuple. Parameters ---------- dims : array-like A vector of dimension names or None. - shape_implied : tensor_like of int - Shape of RV implied from its inputs alone. model : pm.Model The current model on stack. @@ -499,20 +495,15 @@ def shape_from_dims( dims : tuple of (str or None) Names or None for all RV dimensions. """ - ndim_resize = len(dims) - len(shape_implied) - # Dims must be known already or be inferrable from implied dimensions of the RV - unknowndim_resize_dims = set(dims[:ndim_resize]) - set(model.dim_lengths) - if unknowndim_resize_dims: + # Dims must be known already + unknowndim_dims = set(dims) - set(model.dim_lengths) + if unknowndim_dims: raise KeyError( - f"Dimensions {unknowndim_resize_dims} are unknown to the model and cannot be used to specify a `size`." + f"Dimensions {unknowndim_dims} are unknown to the model and cannot be used to specify a `shape`." ) - # The numeric/symbolic resize tuple can be created using model.RV_dim_lengths - return tuple( - model.dim_lengths[dname] if dname in model.dim_lengths else shape_implied[i] - for i, dname in enumerate(dims) - ) + return tuple(model.dim_lengths[dname] for dname in dims) def find_size( diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py index 8dad510a85..623a3d306d 100644 --- a/pymc/tests/distributions/test_shape_utils.py +++ b/pymc/tests/distributions/test_shape_utils.py @@ -312,39 +312,16 @@ def test_simultaneous_dims_and_observed(self): assert pmodel.RV_dims["y"] == ("ddata",) assert y.eval().shape == (3,) - def test_define_dims_on_the_fly(self): + def test_define_dims_on_the_fly_raises(self): + # Check that trying to use dims that are not pre-specified fails, even if their + # length could be inferred from the shape of the variables + msg = "Dimensions {'patient'} are unknown to the model" with pm.Model() as pmodel: - agedata = aesara.shared(np.array([10, 20, 30])) + with pytest.raises(KeyError, match=msg): + pm.Normal("x", [0, 1, 2], dims=("patient",)) - # Associate the "patient" dim with an implied dimension - age = pm.Normal("age", agedata, dims=("patient",)) - assert "patient" in pmodel.dim_lengths - assert pmodel.dim_lengths["patient"].eval() == 3 - - # Use the dim to replicate a new RV - effect = pm.Normal("effect", 0, dims=("patient",)) - assert effect.ndim == 1 - assert effect.eval().shape == (3,) - - # Now change the length of the implied dimension - agedata.set_value([1, 2, 3, 4]) - # The change should propagate all the way through - assert effect.eval().shape == (4,) - - def test_define_dims_on_the_fly_from_observed(self): - with pm.Model() as pmodel: - data = aesara.shared(np.zeros((4, 5))) - x = pm.Normal("x", observed=data, dims=("patient", "trials")) - assert pmodel.dim_lengths["patient"].eval() == 4 - assert pmodel.dim_lengths["trials"].eval() == 5 - - # Use dim to create a new RV - x_noisy = pm.Normal("x_noisy", 0, dims=("patient", "trials")) - assert x_noisy.eval().shape == (4, 5) - - # Change data patient dims - data.set_value(np.zeros((10, 6))) - assert x_noisy.eval().shape == (10, 6) + with pytest.raises(KeyError, match=msg): + pm.Normal("x", observed=[0, 1, 2], dims=("patient",)) def test_can_resize_data_defined_size(self): with pm.Model() as pmodel: diff --git a/pymc/tests/test_data.py b/pymc/tests/test_data.py index a6f1dd36df..c294ef3c3c 100644 --- a/pymc/tests/test_data.py +++ b/pymc/tests/test_data.py @@ -28,7 +28,6 @@ import pymc as pm from pymc.aesaraf import GeneratorOp, floatX -from pymc.exceptions import ShapeError from pymc.tests.helpers import SeededTest, select_by_precision @@ -371,20 +370,6 @@ def test_symbolic_coords(self): assert pmodel.dim_lengths["row"].eval() == 4 assert pmodel.dim_lengths["column"].eval() == 5 - def test_no_resize_of_implied_dimensions(self): - with pm.Model() as pmodel: - # Imply a dimension through RV params - pm.Normal("n", mu=[1, 2, 3], dims="city") - # _Use_ the dimension for a data variable - inhabitants = pm.MutableData("inhabitants", [100, 200, 300], dims="city") - - # Attempting to re-size the dimension through the data variable would - # cause shape problems in InferenceData conversion, because the RV remains (3,). - with pytest.raises( - ShapeError, match="was initialized from 'n' which is not a shared variable" - ): - pmodel.set_data("inhabitants", [1, 2, 3, 4]) - def test_implicit_coords_series(self): pd = pytest.importorskip("pandas") ser_sales = pd.Series( diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py index c7bea809fa..17929973ab 100644 --- a/pymc/tests/test_model.py +++ b/pymc/tests/test_model.py @@ -742,7 +742,7 @@ def test_shapeerror_from_resize_immutable_dim_from_RV(): Even if the variable being updated is a SharedVariable and has other dimensions that are mutable. """ - with pm.Model() as pmodel: + with pm.Model(coords={"fixed": range(3)}) as pmodel: pm.Normal("a", mu=[1, 2, 3], dims="fixed") assert isinstance(pmodel.dim_lengths["fixed"], TensorVariable) @@ -751,7 +751,8 @@ def test_shapeerror_from_resize_immutable_dim_from_RV(): # This is fine because the "fixed" dim is not resized pmodel.set_data("m", [[1, 2, 3], [3, 4, 5]]) - with pytest.raises(ShapeError, match="was initialized from 'a'"): + msg = "The 'm' variable already had 3 coord values defined for its fixed dimension" + with pytest.raises(ValueError, match=msg): # Can't work because the "fixed" dimension is linked to a # TensorVariable with constant shape. # Note that the new data tries to change both dimensions @@ -826,7 +827,7 @@ def test_set_dim(): def test_set_dim_with_coords(): - """Test the concious re-sizing of dims created through add_coord() with coord value.""" + """Test the conscious re-sizing of dims created through add_coord() with coord value.""" with pm.Model() as pmodel: pmodel.add_coord("mdim", mutable=True, length=2, values=["A", "B"]) a = pm.Normal("a", dims="mdim") @@ -904,6 +905,17 @@ def test_set_data_indirect_resize_with_coords(): pmodel.set_data("mdata", [1, 2], coords=dict(mdim=[1, 2, 3])) +def test_set_data_constant_shape_error(): + with pm.Model() as pmodel: + x = pm.Normal("x", size=7) + pmodel.add_coord("weekday", length=x.shape[0]) + pm.MutableData("y", np.arange(7), dims="weekday") + + msg = "because the dimension was initialized from 'x' which is not a shared variable" + with pytest.raises(ShapeError, match=msg): + pmodel.set_data("y", np.arange(10)) + + def test_model_logpt_deprecation_warning(): with pm.Model() as m: x = pm.Normal("x", 0, 1, size=2)