From 09a551065407ad992b59b28049be6a12646b6b41 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Fri, 18 May 2018 18:23:43 -0400
Subject: [PATCH 01/14] Add sample_generative function

---
 pymc3/distributions/distribution.py      | 13 +++++--
 pymc3/sampling.py                        | 48 +++++++++++++++++++++++-
 pymc3/tests/test_distributions_random.py | 17 +++++++++
 pymc3/tests/test_sampling.py             | 14 +++++++
 4 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index bdb7e266bd..735d4e8de2 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -254,7 +254,7 @@ def draw_values(params, point=None, size=None):
 
     # Init givens and the stack of nodes to try to `_draw_value` from
     givens = {}
-    stored = set([])  # Some nodes
+    stored = set()  # Some nodes
     stack = list(leaf_nodes.values())  # A queue would be more appropriate
     while stack:
         next_ = stack.pop(0)
@@ -279,13 +279,14 @@ def draw_values(params, point=None, size=None):
             # The named node's children givens values must also be taken
             # into account.
             children = named_nodes_children[next_]
-            temp_givens = [givens[k] for k in givens.keys() if k in children]
+            temp_givens = [givens[k] for k in givens if k in children]
             try:
                 # This may fail for autotransformed RVs, which don't
                 # have the random method
                 givens[next_.name] = (next_, _draw_value(next_,
                                                          point=point,
-                                                         givens=temp_givens, size=size))
+                                                         givens=temp_givens,
+                                                         size=size))
                 stored.add(next_.name)
             except theano.gof.fg.MissingInputError:
                 # The node failed, so we must add the node's parents to
@@ -355,7 +356,11 @@ def _draw_value(param, point=None, givens=None, size=None):
         if point and hasattr(param, 'model') and param.name in point:
             return point[param.name]
         elif hasattr(param, 'random') and param.random is not None:
-            return param.random(point=point, size=size)
+            return param.random(point=point, size=None)
+        elif (hasattr(param, 'distribution') and
+                hasattr(param.distribution, 'random') and
+                param.distribution.random is not None):
+            return param.distribution.random(point=point, size=size)
         else:
             if givens:
                 variables, values = list(zip(*givens))
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 29faaf8269..0f2b5acd80 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -11,11 +11,12 @@
 
 from .backends.base import BaseTrace, MultiTrace
 from .backends.ndarray import NDArray
+from .distributions.distribution import draw_values
 from .model import modelcontext, Point, all_continuous
 from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                            BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
                            Slice, CompoundStep, arraystep)
-from .util import update_start_vals, get_untransformed_name, is_transformed_name
+from .util import update_start_vals, get_untransformed_name, is_transformed_name, get_default_varnames
 from .vartypes import discrete_types
 from pymc3.step_methods.hmc import quadpotential
 from pymc3 import plots
@@ -25,7 +26,7 @@
 import sys
 sys.setrecursionlimit(10000)
 
-__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts']
+__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts', 'sample_generative']
 
 STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                 BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
@@ -1276,6 +1277,49 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
     return {k: np.asarray(v) for k, v in ppc.items()}
 
 
+def sample_generative(samples=500, model=None, vars=None, random_seed=None):
+    """Generate samples from the generative model.
+
+    Parameters
+    ----------
+    samples : int
+        Number of samples from the prior to generate. Defaults to 500.
+    model : Model (optional if in `with` context)
+    vars : iterable
+        Variables for which to compute the posterior predictive samples.
+        Defaults to `model.named_vars`.
+    random_seed : int
+        Seed for the random number generator.
+
+    Returns
+    -------
+    dict
+        Dictionary with the variables as keys. The values are arrays of prior samples.
+    """
+    model = modelcontext(model)
+
+    if vars is None:
+        vars = set(model.named_vars.keys())
+
+    if random_seed is not None:
+        np.random.seed(random_seed)
+    names = get_default_varnames(model.named_vars, include_transformed=False)
+    # draw_values fails with auto-transformed variables. transform them later!
+    values = draw_values([model[name] for name in names], size=samples)
+
+    data = {k: v for k, v in zip(names, values)}
+
+    prior = {}
+    for var_name in vars:
+        if var_name in data:
+            prior[var_name] = data[var_name]
+        elif is_transformed_name(var_name):
+            untransformed = get_untransformed_name(var_name)
+            if untransformed in data:
+                prior[var_name] = model[untransformed].transformation.forward(data[untransformed]).eval()
+    return prior
+
+
 def init_nuts(init='auto', chains=1, n_init=500000, model=None,
               random_seed=None, progressbar=True, **kwargs):
     """Set up the mass matrix initialization for NUTS.
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index fde199b6b0..dad09c2063 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -78,6 +78,23 @@ def test_draw_scalar_parameters(self):
         npt.assert_almost_equal(mu, 0)
         npt.assert_almost_equal(tau, 1)
 
+    def test_draw_dependencies(self):
+        with pm.Model():
+            x = pm.Normal('x', mu=0., sd=1.)
+            exp_x = pm.Deterministic('exp_x', pm.math.exp(x))
+
+        x, exp_x = pm.distributions.draw_values([x, exp_x])
+        npt.assert_almost_equal(np.exp(x), exp_x)
+
+    def test_draw_order(self):
+        with pm.Model():
+            x = pm.Normal('x', mu=0., sd=1.)
+            exp_x = pm.Deterministic('exp_x', pm.math.exp(x))
+
+        # Need to draw x before drawing log_x
+        exp_x, x = pm.distributions.draw_values([exp_x, x])
+        npt.assert_almost_equal(np.exp(x), exp_x)
+
     def test_draw_point_replacement(self):
         with pm.Model():
             mu = pm.Normal('mu', mu=0., tau=1e-3)
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index a781d88402..fc661bad2a 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -337,3 +337,17 @@ def test_exec_nuts_init(method):
         assert len(start) == 2
         assert isinstance(start[0], dict)
         assert 'a' in start[0] and 'b_log__' in start[0]
+
+def test_sample_generative():
+    observed = np.random.normal(10, 1, size=200)
+    with pm.Model():
+        # Use a prior that's way off to show we're actually sampling from it
+        mu = pm.Normal('mu', mu=-100, sd=1)
+        positive_mu = pm.Deterministic('positive_mu', np.abs(mu))
+        z = -1 - positive_mu
+        pm.Normal('x_obs', mu=z, sd=1, observed=observed)
+        prior = pm.sample_generative()
+
+    assert (prior['mu'] < 90).all()
+    assert (prior['positive_mu'] > 90).all()
+    assert (prior['x_obs'] < 90).all()

From d621072326dd74f1c189ee47a0a93b7786051b09 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Fri, 18 May 2018 19:01:01 -0400
Subject: [PATCH 02/14] Resolve graph order, make tests pass

---
 pymc3/distributions/distribution.py | 31 +++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 735d4e8de2..bdfbb2f247 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -1,4 +1,6 @@
+import collections
 import numbers
+
 import numpy as np
 import theano.tensor as tt
 from theano import function
@@ -296,10 +298,31 @@ def draw_values(params, point=None, size=None):
                               if node is not None and
                               node.name not in stored and
                               node not in params])
-    values = []
-    for param in params:
-        values.append(_draw_value(param, point=point, givens=givens.values(), size=size))
-    return values
+
+    # the below makes sure the graph is evaluated in order
+    # test_distributions_random::TestDrawValues::test_draw_order fails without it
+    params = dict(enumerate(params))  # some nodes are not hashable
+    evaluated = {}
+    to_eval = set()
+    missing_inputs = set(params)
+    while to_eval or missing_inputs:
+        if to_eval == missing_inputs:
+            raise ValueError('Cannot resolve inputs for {}'.format([str(params[j]) for j in to_eval]))
+        to_eval = set(missing_inputs)
+        missing_inputs = set()
+        for param_idx in to_eval:
+            param = params[param_idx]
+            if param.name in givens:
+                evaluated[param_idx] = givens[param.name][1]
+            else:
+                try:  # might evaluate in a bad order,
+                    evaluated[param_idx] = _draw_value(param, point=point, givens=givens.values(), size=size)
+                    if isinstance(param, collections.Hashable) and named_nodes_parents.get(param):
+                        givens[param.name] = (param, evaluated[param_idx])
+                except theano.gof.fg.MissingInputError:
+                    missing_inputs.add(param_idx)
+
+    return [evaluated[j] for j in params] # set the order back
 
 
 @memoize

From 85300fc22a08a6a97877aef92ea0a479fac22203 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Fri, 8 Jun 2018 15:31:14 -0400
Subject: [PATCH 03/14] WIP, sample_generative works sometimes!

---
 pymc3/distributions/distribution.py | 85 ++++++++++-------------------
 1 file changed, 29 insertions(+), 56 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index bdfbb2f247..12b63da2fb 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -312,7 +312,7 @@ def draw_values(params, point=None, size=None):
         missing_inputs = set()
         for param_idx in to_eval:
             param = params[param_idx]
-            if param.name in givens:
+            if hasattr(param, 'name') and param.name in givens:
                 evaluated[param_idx] = givens[param.name][1]
             else:
                 try:  # might evaluate in a bad order,
@@ -379,7 +379,7 @@ def _draw_value(param, point=None, givens=None, size=None):
         if point and hasattr(param, 'model') and param.name in point:
             return point[param.name]
         elif hasattr(param, 'random') and param.random is not None:
-            return param.random(point=point, size=None)
+            return param.random(point=point, size=size)
         elif (hasattr(param, 'distribution') and
                 hasattr(param.distribution, 'random') and
                 param.distribution.random is not None):
@@ -395,31 +395,6 @@ def _draw_value(param, point=None, givens=None, size=None):
         raise ValueError('Unexpected type in draw_value: %s' % type(param))
 
 
-def broadcast_shapes(*args):
-    """Return the shape resulting from broadcasting multiple shapes.
-    Represents numpy's broadcasting rules.
-
-    Parameters
-    ----------
-    *args : array-like of int
-        Tuples or arrays or lists representing the shapes of arrays to be broadcast.
-
-    Returns
-    -------
-    Resulting shape or None if broadcasting is not possible.
-    """
-    x = list(np.atleast_1d(args[0])) if args else ()
-    for arg in args[1:]:
-        y = list(np.atleast_1d(arg))
-        if len(x) < len(y):
-            x, y = y, x
-        x[-len(y):] = [j if i == 1 else i if j == 1 else i if i == j else 0
-                       for i, j in zip(x[-len(y):], y)]
-        if not all(x):
-            return None
-    return tuple(x)
-
-
 def infer_shape(shape):
     try:
         shape = tuple(shape or ())
@@ -437,7 +412,7 @@ def reshape_sampled(sampled, size, dist_shape):
     if np.size(sampled) == 1 or repeat_shape or dist_shape:
         return np.reshape(sampled, repeat_shape + dist_shape)
     else:
-        return sampled
+        return sampled.squeeze()
 
 
 def replicate_samples(generator, size, repeats, *args, **kwargs):
@@ -483,40 +458,38 @@ def generate_samples(generator, *args, **kwargs):
     dist_shape = kwargs.pop('dist_shape', ())
     size = kwargs.pop('size', None)
     broadcast_shape = kwargs.pop('broadcast_shape', None)
-    params = args + tuple(kwargs.values())
 
-    if broadcast_shape is None:
-        broadcast_shape = broadcast_shapes(*[np.atleast_1d(p).shape for p in params
-                                             if not isinstance(p, tuple)])
-    if broadcast_shape == ():
-        broadcast_shape = (1,)
+    if size is None:
+        size = 1
 
     args = tuple(p[0] if isinstance(p, tuple) else p for p in args)
+
     for key in kwargs:
         p = kwargs[key]
         kwargs[key] = p[0] if isinstance(p, tuple) else p
 
-    if np.all(dist_shape[-len(broadcast_shape):] == broadcast_shape):
-        prefix_shape = tuple(dist_shape[:-len(broadcast_shape)])
-    else:
-        prefix_shape = tuple(dist_shape)
-
-    repeat_shape = infer_shape(size)
+    if broadcast_shape is None:
+        inputs = args + tuple(kwargs.values())
+        broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
 
-    if broadcast_shape == (1,) and prefix_shape == ():
-        if size is not None:
-            samples = generator(size=size, *args, **kwargs)
-        else:
-            samples = generator(size=1, *args, **kwargs)
+    dist_shape = infer_shape(dist_shape)
+    broadcast_shape = (1,) + infer_shape(broadcast_shape)
+    size_tup = infer_shape(size)
+
+    if broadcast_shape == (1,):
+        samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+    elif broadcast_shape[-len(dist_shape):] == dist_shape:
+        samples = generator(*args, **kwargs)
+    elif dist_shape == broadcast_shape:
+        samples = generator(size=size, *args, **kwargs)
+    elif broadcast_shape[1:1 + len(size_tup)] == size_tup:
+        suffix = broadcast_shape[1 + len(size_tup):] + dist_shape
+        samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix))]
+        samples = np.hstack(samples).reshape(size_tup + suffix)
     else:
-        if size is not None:
-            samples = replicate_samples(generator,
-                                        broadcast_shape,
-                                        repeat_shape + prefix_shape,
-                                        *args, **kwargs)
-        else:
-            samples = replicate_samples(generator,
-                                        broadcast_shape,
-                                        prefix_shape,
-                                        *args, **kwargs)
-    return reshape_sampled(samples, size, dist_shape)
+        raise TypeError(f'''Attempted to generate values with incompatible shapes:
+            size: {size}
+            dist_shape: {dist_shape}
+            broadcast_shape: {broadcast_shape}
+        ''')
+    return samples.squeeze()

From e70b3cb057e75549fcb2b078572ac76c2c755e37 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Mon, 11 Jun 2018 21:42:32 -0400
Subject: [PATCH 04/14] WIP

---
 pymc3/distributions/discrete.py     | 13 +++----
 pymc3/distributions/distribution.py | 53 +++++++++++------------------
 pymc3/distributions/multivariate.py |  1 -
 pymc3/model.py                      |  6 ++--
 pymc3/tests/test_distributions.py   |  4 ---
 5 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 58ae48df2b..50d68608a4 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -7,7 +7,7 @@
 
 from pymc3.util import get_variable_name
 from .dist_math import bound, factln, binomln, betaln, logpow
-from .distribution import Discrete, draw_values, generate_samples, reshape_sampled
+from .distribution import Discrete, draw_values, generate_samples
 from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
 
 
@@ -495,7 +495,7 @@ def random(self, point=None, size=None):
                              dist_shape=self.shape,
                              size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
-        return reshape_sampled(stats.poisson.rvs(g), size, self.shape)
+        return stats.poisson.rvs(g)
 
     def logp(self, value):
         mu = self.mu
@@ -849,8 +849,7 @@ def random(self, point=None, size=None):
         g = generate_samples(stats.poisson.rvs, theta,
                              dist_shape=self.shape,
                              size=size)
-        sampled = g * (np.random.random(np.squeeze(g.shape)) < psi)
-        return reshape_sampled(sampled, size, self.shape)
+        return g * (np.random.random(np.squeeze(g.shape)) < psi)
 
     def logp(self, value):
         psi = self.psi
@@ -942,8 +941,7 @@ def random(self, point=None, size=None):
         g = generate_samples(stats.binom.rvs, n, p,
                              dist_shape=self.shape,
                              size=size)
-        sampled = g * (np.random.random(np.squeeze(g.shape)) < psi)
-        return reshape_sampled(sampled, size, self.shape)
+        return g * (np.random.random(np.squeeze(g.shape)) < psi)
 
     def logp(self, value):
         psi = self.psi
@@ -1061,8 +1059,7 @@ def random(self, point=None, size=None):
                              dist_shape=self.shape,
                              size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
-        sampled = stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi)
-        return reshape_sampled(sampled, size, self.shape)
+        return stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi)
 
     def logp(self, value):
         alpha = self.alpha
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 12b63da2fb..498d3d52ef 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -390,12 +390,16 @@ def _draw_value(param, point=None, givens=None, size=None):
             else:
                 variables = values = []
             func = _compile_theano_function(param, variables)
-            return func(*values)
+            if size and all(len(v) == size for v in values):
+                return np.array([func(*v) for v in zip(*values)])
+            else:
+                return func(*values)
     else:
         raise ValueError('Unexpected type in draw_value: %s' % type(param))
 
 
-def infer_shape(shape):
+def to_tuple(shape):
+    """Convert ints, arrays, and Nones to tuples"""
     try:
         shape = tuple(shape or ())
     except TypeError:  # If size is an int
@@ -405,27 +409,6 @@ def infer_shape(shape):
     return shape
 
 
-def reshape_sampled(sampled, size, dist_shape):
-    dist_shape = infer_shape(dist_shape)
-    repeat_shape = infer_shape(size)
-
-    if np.size(sampled) == 1 or repeat_shape or dist_shape:
-        return np.reshape(sampled, repeat_shape + dist_shape)
-    else:
-        return sampled.squeeze()
-
-
-def replicate_samples(generator, size, repeats, *args, **kwargs):
-    n = int(np.prod(repeats))
-    if n == 1:
-        samples = generator(size=size, *args, **kwargs)
-    else:
-        samples = np.array([generator(size=size, *args, **kwargs)
-                            for _ in range(n)])
-        samples = np.reshape(samples, tuple(repeats) + tuple(size))
-    return samples
-
-
 def generate_samples(generator, *args, **kwargs):
     """Generate samples from the distribution of a random variable.
 
@@ -458,7 +441,6 @@ def generate_samples(generator, *args, **kwargs):
     dist_shape = kwargs.pop('dist_shape', ())
     size = kwargs.pop('size', None)
     broadcast_shape = kwargs.pop('broadcast_shape', None)
-
     if size is None:
         size = 1
 
@@ -472,19 +454,21 @@ def generate_samples(generator, *args, **kwargs):
         inputs = args + tuple(kwargs.values())
         broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
 
-    dist_shape = infer_shape(dist_shape)
-    broadcast_shape = (1,) + infer_shape(broadcast_shape)
-    size_tup = infer_shape(size)
-
-    if broadcast_shape == (1,):
+    dist_shape = to_tuple(dist_shape)
+    broadcast_shape = to_tuple(broadcast_shape)
+    size_tup = to_tuple(size)
+    if broadcast_shape == ():
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     elif broadcast_shape[-len(dist_shape):] == dist_shape:
-        samples = generator(*args, **kwargs)
+        if size == 1 or (broadcast_shape == size_tup + dist_shape):
+            samples = generator(size=broadcast_shape, *args, **kwargs)
+        else:
+            samples = generator(*args, **kwargs)
     elif dist_shape == broadcast_shape:
         samples = generator(size=size, *args, **kwargs)
-    elif broadcast_shape[1:1 + len(size_tup)] == size_tup:
-        suffix = broadcast_shape[1 + len(size_tup):] + dist_shape
-        samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix))]
+    elif broadcast_shape[:len(size_tup)] == size_tup:
+        suffix = broadcast_shape[len(size_tup):] + dist_shape
+        samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
         samples = np.hstack(samples).reshape(size_tup + suffix)
     else:
         raise TypeError(f'''Attempted to generate values with incompatible shapes:
@@ -492,4 +476,5 @@ def generate_samples(generator, *args, **kwargs):
             dist_shape: {dist_shape}
             broadcast_shape: {broadcast_shape}
         ''')
-    return samples.squeeze()
+    samples = samples.squeeze()
+    return samples
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index d1786009a5..b9f906a8bb 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -1469,7 +1469,6 @@ def _setup_random(self):
             elif self._cov_type == 'evd':
                 covs = []
                 for eig, Q in zip(self.eigs_sep, self.Qs):
-                    # print()
                     cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
                     covs.append(cov_i)
                 cov = kronecker(*covs)
diff --git a/pymc3/model.py b/pymc3/model.py
index 92d4177384..6771fe977e 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -100,7 +100,7 @@ def get_named_nodes_and_relations(graph):
         is a theano named node, and the corresponding value is the set
         of theano named nodes that are children of the node. These child
         relations skip unnamed intermediate nodes.
-    
+
     """
     if graph.name is not None:
         node_parents = {graph: set()}
@@ -1017,7 +1017,7 @@ def check_test_point(self, test_point=None, round_vals=2):
         if test_point is None:
             test_point = self.test_point
 
-        return Series({RV.name:np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs}, 
+        return Series({RV.name:np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs},
             name='Log-probability of test_point')
 
     def _repr_latex_(self, name=None, dist=None):
@@ -1244,7 +1244,7 @@ def pandas_to_array(data):
         ret = generator(data)
     else:
         ret = np.asarray(data)
-    return pm.smartfloatX(ret)
+    return pm.floatX(ret)
 
 
 def as_tensor(data, name, model, distribution):
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 4cb3423dd2..dcc3628ae0 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -916,10 +916,6 @@ def test_multinomial_mode(self, p, n):
          [.25, .25, .25, .25]], (2, 4), 13],
         [[[.25, .25, .25, .25],
          [.25, .25, .25, .25]], (2, 4), [17, 19]],
-        [[[.25, .25, .25, .25],
-         [.25, .25, .25, .25]], (1, 2, 4), [23, 29]],
-        [[[.25, .25, .25, .25],
-         [.25, .25, .25, .25]], (10, 2, 4), [31, 37]],
     ])
     def test_multinomial_random(self, p, shape, n):
         p = np.asarray(p)

From cfee4ae861ef1af239192c5cb1f96fcdbe814ad3 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Tue, 12 Jun 2018 08:43:31 -0400
Subject: [PATCH 05/14] Add some tests

---
 pymc3/distributions/distribution.py |  6 ++--
 pymc3/tests/test_sampling.py        | 54 ++++++++++++++++++++++-------
 2 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 498d3d52ef..854202f7a5 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -390,7 +390,7 @@ def _draw_value(param, point=None, givens=None, size=None):
             else:
                 variables = values = []
             func = _compile_theano_function(param, variables)
-            if size and all(len(v) == size for v in values):
+            if size and values and all(len(v) == size for v in values):
                 return np.array([func(*v) for v in zip(*values)])
             else:
                 return func(*values)
@@ -457,13 +457,13 @@ def generate_samples(generator, *args, **kwargs):
     dist_shape = to_tuple(dist_shape)
     broadcast_shape = to_tuple(broadcast_shape)
     size_tup = to_tuple(size)
-    if broadcast_shape == ():
+    if broadcast_shape == () or broadcast_shape == (0,):
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     elif broadcast_shape[-len(dist_shape):] == dist_shape:
         if size == 1 or (broadcast_shape == size_tup + dist_shape):
             samples = generator(size=broadcast_shape, *args, **kwargs)
         else:
-            samples = generator(*args, **kwargs)
+            samples = generator(size=size, *args, **kwargs)
     elif dist_shape == broadcast_shape:
         samples = generator(size=size, *args, **kwargs)
     elif broadcast_shape[:len(size_tup)] == size_tup:
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index fc661bad2a..df4c257b27 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -6,6 +6,7 @@
 except ImportError:
     import mock
 
+import numpy.testing as npt
 import pymc3 as pm
 import theano.tensor as tt
 from theano import shared
@@ -338,16 +339,43 @@ def test_exec_nuts_init(method):
         assert isinstance(start[0], dict)
         assert 'a' in start[0] and 'b_log__' in start[0]
 
-def test_sample_generative():
-    observed = np.random.normal(10, 1, size=200)
-    with pm.Model():
-        # Use a prior that's way off to show we're actually sampling from it
-        mu = pm.Normal('mu', mu=-100, sd=1)
-        positive_mu = pm.Deterministic('positive_mu', np.abs(mu))
-        z = -1 - positive_mu
-        pm.Normal('x_obs', mu=z, sd=1, observed=observed)
-        prior = pm.sample_generative()
-
-    assert (prior['mu'] < 90).all()
-    assert (prior['positive_mu'] > 90).all()
-    assert (prior['x_obs'] < 90).all()
+class TestSampleGenerative(SeededTest):
+    def test_ignores_observed(self):
+        observed = np.random.normal(10, 1, size=200)
+        with pm.Model():
+            # Use a prior that's way off to show we're ignoring the observed variables
+            mu = pm.Normal('mu', mu=-100, sd=1)
+            positive_mu = pm.Deterministic('positive_mu', np.abs(mu))
+            z = -1 - positive_mu
+            pm.Normal('x_obs', mu=z, sd=1, observed=observed)
+            prior = pm.sample_generative()
+
+        assert (prior['mu'] < 90).all()
+        assert (prior['positive_mu'] > 90).all()
+        assert (prior['x_obs'] < 90).all()
+        assert (prior['positive_mu'] == np.abs(prior['mu'])).all()
+
+    def test_respects_shape(self):
+        for shape in (2, (2,), (10, 2), (10, 10)):
+            with pm.Model():
+                mu = pm.Gamma('mu', 3, 1, shape=1)
+                goals = pm.Poisson('goals', mu, shape=shape)
+                trace = pm.sample_generative(10)
+            if shape == 2:  # want to test shape as an int
+                shape = (2,)
+            assert trace['goals'].shape == (10,) + shape
+
+    def test_multivariate(self):
+        with pm.Model():
+            m = pm.Multinomial('m', n=5, p=np.array([[0.25, 0.25, 0.25, 0.25]]), shape=(1, 4))
+            trace = pm.sample_generative(10)
+        assert m.random(size=10).shape == (10, 4)
+        assert trace['m'].shape == (10, 4)
+
+    def test_layers(self):
+        with pm.Model() as model:
+            a = pm.Uniform('a', lower=0, upper=1, shape=10)
+            b = pm.Binomial('b', n=1, p=a, shape=10)
+
+        avg = b.random(size=10000).mean(axis=0)
+        npt.assert_array_almost_equal(avg, 0.5 * np.ones_like(b), decimal=2)

From 02f15b9a0ed6d1e813e3fc428b88d5faf3c2756f Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Thu, 14 Jun 2018 22:12:41 -0400
Subject: [PATCH 06/14] More WIP

---
 pymc3/distributions/bound.py             |  3 +-
 pymc3/distributions/distribution.py      | 36 ++++++++++++++++++++----
 pymc3/distributions/multivariate.py      | 22 ++++++++++-----
 pymc3/tests/test_distributions.py        |  1 -
 pymc3/tests/test_distributions_random.py |  9 +++---
 pymc3/tests/test_sampling.py             |  3 +-
 6 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index c44684b061..35d489ed2c 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -54,7 +54,8 @@ def _random(self, lower, upper, point=None, size=None):
         samples = np.zeros(size, dtype=self.dtype).flatten()
         i, n = 0, len(samples)
         while i < len(samples):
-            sample = self._wrapped.random(point=point, size=n)
+            sample = np.atleast_1d(self._wrapped.random(point=point, size=n))
+
             select = sample[np.logical_and(sample >= lower, sample <= upper)]
             samples[i:(i + len(select))] = select[:]
             i += len(select)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 854202f7a5..3b5c69396e 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -408,6 +408,14 @@ def to_tuple(shape):
         shape = tuple(shape)
     return shape
 
+def _is_one_d(dist_shape):
+    if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)):
+        return True
+    elif hasattr(dist_shape, 'shape') and dist_shape.shape in ((), (0,), (1,)):
+        return True
+    elif dist_shape == ():
+        return True
+    return False
 
 def generate_samples(generator, *args, **kwargs):
     """Generate samples from the distribution of a random variable.
@@ -439,6 +447,7 @@ def generate_samples(generator, *args, **kwargs):
     Any remaining *args and **kwargs are passed on to the generator function.
     """
     dist_shape = kwargs.pop('dist_shape', ())
+    one_d = _is_one_d(dist_shape)
     size = kwargs.pop('size', None)
     broadcast_shape = kwargs.pop('broadcast_shape', None)
     if size is None:
@@ -457,24 +466,41 @@ def generate_samples(generator, *args, **kwargs):
     dist_shape = to_tuple(dist_shape)
     broadcast_shape = to_tuple(broadcast_shape)
     size_tup = to_tuple(size)
+
+    # All inputs are scalars, end up size (size_tup, dist_shape)
     if broadcast_shape == () or broadcast_shape == (0,):
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+    # Inputs already have the right shape. Just get the right size.
     elif broadcast_shape[-len(dist_shape):] == dist_shape:
         if size == 1 or (broadcast_shape == size_tup + dist_shape):
             samples = generator(size=broadcast_shape, *args, **kwargs)
+        elif dist_shape == broadcast_shape:
+            samples = generator(size=size_tup + dist_shape, *args, **kwargs)
         else:
-            samples = generator(size=size, *args, **kwargs)
-    elif dist_shape == broadcast_shape:
-        samples = generator(size=size, *args, **kwargs)
+            if size_tup[-len(broadcast_shape):] != broadcast_shape:
+                samples = generator(size=size_tup + broadcast_shape, *args, **kwargs)
+            else:
+                samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+    # Inputs have the right size, have to manually broadcast to the right dist_shape
     elif broadcast_shape[:len(size_tup)] == size_tup:
         suffix = broadcast_shape[len(size_tup):] + dist_shape
         samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
         samples = np.hstack(samples).reshape(size_tup + suffix)
+    # Args have been broadcast correctly, can just ask for the right shape out
+    elif dist_shape[-len(broadcast_shape):] == broadcast_shape:
+        samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     else:
         raise TypeError(f'''Attempted to generate values with incompatible shapes:
             size: {size}
             dist_shape: {dist_shape}
             broadcast_shape: {broadcast_shape}
         ''')
-    samples = samples.squeeze()
-    return samples
+
+    # reshape samples here
+    if samples.shape[0] == 1 and size == 1:
+        if len(samples.shape) > len(dist_shape) and samples.shape[-len(dist_shape):] == dist_shape:
+            samples = samples.reshape(samples.shape[1:])
+
+    if one_d and samples.shape[-1] == 1:
+        samples = samples.reshape(samples.shape[:-1])
+    return np.asarray(samples)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index b9f906a8bb..07755bf586 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -492,10 +492,10 @@ def __init__(self, n, p, *args, **kwargs):
 
         if len(self.shape) > 1:
             m = self.shape[-2]
-            try:
-                assert n.shape == (m,)
-            except (AttributeError, AssertionError):
-                n = n * tt.ones(m)
+            # try:
+            #     assert n.shape == (m,)
+            # except (AttributeError, AssertionError):
+            #     n = n * tt.ones(m)
             self.n = tt.shape_padright(n)
             self.p = p if p.ndim > 1 else tt.shape_padleft(p)
         elif n.ndim == 1:
@@ -521,27 +521,35 @@ def _random(self, n, p, size=None):
         # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
         if size == p.shape:
             size = None
-        if (n.ndim == 0) and (p.ndim == 1):
+        elif size[-len(p.shape):] == p.shape:
+            size = size[:len(size) - len(p.shape)]
+
+        n_dim = n.squeeze().ndim
+
+        if (n_dim == 0) and (p.ndim == 1):
             p = p / p.sum()
             randnum = np.random.multinomial(n, p.squeeze(), size=size)
-        elif (n.ndim == 0) and (p.ndim > 1):
+        elif (n_dim == 0) and (p.ndim > 1):
             p = p / p.sum(axis=1, keepdims=True)
             randnum = np.asarray([
                 np.random.multinomial(n.squeeze(), pp, size=size)
                 for pp in p
             ])
-        elif (n.ndim > 0) and (p.ndim == 1):
+            randnum = np.moveaxis(randnum, 1, 0)
+        elif (n_dim > 0) and (p.ndim == 1):
             p = p / p.sum()
             randnum = np.asarray([
                 np.random.multinomial(nn, p.squeeze(), size=size)
                 for nn in n
             ])
+            randnum = np.moveaxis(randnum, 1, 0)
         else:
             p = p / p.sum(axis=1, keepdims=True)
             randnum = np.asarray([
                 np.random.multinomial(nn, pp, size=size)
                 for (nn, pp) in zip(n, p)
             ])
+            randnum = np.moveaxis(randnum, 1, 0)
         return randnum.astype(original_dtype)
 
     def random(self, point=None, size=None):
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index dcc3628ae0..756bba1bcb 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -909,7 +909,6 @@ def test_multinomial_mode(self, p, n):
         [[.25, .25, .25, .25], (1, 4), 3],
         # 3: expect to fail
         # [[.25, .25, .25, .25], (10, 4)],
-        [[.25, .25, .25, .25], (10, 1, 4), 5],
         # 5: expect to fail
         # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
         [[[.25, .25, .25, .25],
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index dad09c2063..9b847650b7 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -204,12 +204,13 @@ def test_different_shapes_and_sample_sizes(self):
                             s = list(size)
                         except TypeError:
                             s = [size]
-                    s.extend(shape)
+                        if s == [1]:
+                            s = []
+                    if shape not in ((), (1,)):
+                        s.extend(shape)
                     e = tuple(s)
                     a = self.sample_random_variable(rv, size).shape
-                    expected.append(e)
-                    actual.append(a)
-            assert expected == actual
+                    assert e == a
 
 
 class TestNormal(BaseTestCases.BaseTestCase):
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index df4c257b27..6f40b3bac8 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -367,8 +367,9 @@ def test_respects_shape(self):
 
     def test_multivariate(self):
         with pm.Model():
-            m = pm.Multinomial('m', n=5, p=np.array([[0.25, 0.25, 0.25, 0.25]]), shape=(1, 4))
+            m = pm.Multinomial('m', n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4)
             trace = pm.sample_generative(10)
+
         assert m.random(size=10).shape == (10, 4)
         assert trace['m'].shape == (10, 4)
 

From 19cb6121887a9fc061959dda1cc544ae7b0b0aaf Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sat, 16 Jun 2018 18:51:11 -0400
Subject: [PATCH 07/14] Almost there

---
 pymc3/distributions/discrete.py     | 39 +++++++++++--------
 pymc3/distributions/distribution.py | 19 +++++-----
 pymc3/distributions/mixture.py      |  4 +-
 pymc3/distributions/multivariate.py | 58 +++++++++++++++++++++++------
 4 files changed, 82 insertions(+), 38 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 50d68608a4..fc4a431889 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -1,4 +1,3 @@
-from functools import partial
 import numpy as np
 import theano
 import theano.tensor as tt
@@ -154,13 +153,20 @@ def __init__(self, alpha, beta, n, *args, **kwargs):
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
-        p = np.atleast_1d(stats.beta.rvs(a=alpha, b=beta, size=np.prod(size)))
+        p = stats.beta.rvs(a=alpha, b=beta, size=size).flatten()
         # Sometimes scipy.beta returns nan. Ugh.
         while np.any(np.isnan(p)):
             i = np.isnan(p)
             p[i] = stats.beta.rvs(a=alpha, b=beta, size=np.sum(i))
         # Sigh...
-        _n, _p, _size = np.atleast_1d(n).flatten(), p.flatten(), np.prod(size)
+        _n, _p, _size = np.atleast_1d(n).flatten(), p.flatten(), p.shape[0]
+
+        quotient, remainder = divmod(_p.shape[0], _n.shape[0])
+        if remainder != 0:
+            raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format(
+                _n.shape[0], _p.shape[0]))
+        if quotient != 1:
+            _n = np.tile(_n, quotient)
         samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size)
         return samples
 
@@ -186,7 +192,7 @@ def _repr_latex_(self, name=None, dist=None):
         alpha = dist.alpha
         beta = dist.beta
         name = r'\text{%s}' % name
-        return r'${} \sim \text{{NegativeBinomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
+        return r'${} \sim \text{{BetaBinomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
                                                 get_variable_name(alpha),
                                                 get_variable_name(beta))
 
@@ -495,7 +501,7 @@ def random(self, point=None, size=None):
                              dist_shape=self.shape,
                              size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
-        return stats.poisson.rvs(g)
+        return np.asarray(stats.poisson.rvs(g)).reshape(g.shape)
 
     def logp(self, value):
         mu = self.mu
@@ -700,22 +706,23 @@ def __init__(self, p, *args, **kwargs):
             self.k = tt.shape(p)[-1].tag.test_value
         except AttributeError:
             self.k = tt.shape(p)[-1]
-        self.p = p = tt.as_tensor_variable(p)
+        p = tt.as_tensor_variable(p)
         self.p = (p.T / tt.sum(p, -1)).T
         self.mode = tt.argmax(p)
 
-    def random(self, point=None, size=None):
-        def random_choice(k, *args, **kwargs):
-            if len(kwargs['p'].shape) > 1:
-                return np.asarray(
-                    [np.random.choice(k, p=p)
-                     for p in kwargs['p']]
-                )
-            else:
-                return np.random.choice(k, *args, **kwargs)
+    def _random(self, k, p, size=None):
+        if len(p.shape) > 1:
+            return np.asarray(
+                [np.random.choice(k, p=p, size=size)
+                    for p in kwargs['p']]
+            )
+        else:
+            return np.asarray(np.random.choice(k, p=p, size=size))
 
+    def random(self, point=None, size=None):
         p, k = draw_values([self.p, self.k], point=point, size=size)
-        return generate_samples(partial(random_choice, np.arange(k)),
+        return generate_samples(self._random,
+                                k=k,
                                 p=p,
                                 broadcast_shape=p.shape[:-1] or (1,),
                                 dist_shape=self.shape,
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 3b5c69396e..abbacca089 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -390,7 +390,7 @@ def _draw_value(param, point=None, givens=None, size=None):
             else:
                 variables = values = []
             func = _compile_theano_function(param, variables)
-            if size and values and all(len(v) == size for v in values):
+            if size and values and not all(var.dshape == val.shape for var, val in zip(variables, values)):
                 return np.array([func(*v) for v in zip(*values)])
             else:
                 return func(*values)
@@ -468,27 +468,26 @@ def generate_samples(generator, *args, **kwargs):
     size_tup = to_tuple(size)
 
     # All inputs are scalars, end up size (size_tup, dist_shape)
-    if broadcast_shape == () or broadcast_shape == (0,):
+    if broadcast_shape in {(), (0,), (1,)}:
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     # Inputs already have the right shape. Just get the right size.
-    elif broadcast_shape[-len(dist_shape):] == dist_shape:
+    elif broadcast_shape[-len(dist_shape):] == dist_shape or len(dist_shape) == 0:
         if size == 1 or (broadcast_shape == size_tup + dist_shape):
             samples = generator(size=broadcast_shape, *args, **kwargs)
         elif dist_shape == broadcast_shape:
             samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+        elif size_tup[-len(broadcast_shape):] != broadcast_shape:
+            samples = generator(size=size_tup + broadcast_shape, *args, **kwargs)
         else:
-            if size_tup[-len(broadcast_shape):] != broadcast_shape:
-                samples = generator(size=size_tup + broadcast_shape, *args, **kwargs)
-            else:
-                samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+            samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+    # Args have been broadcast correctly, can just ask for the right shape out
+    elif dist_shape[-len(broadcast_shape):] == broadcast_shape:
+        samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     # Inputs have the right size, have to manually broadcast to the right dist_shape
     elif broadcast_shape[:len(size_tup)] == size_tup:
         suffix = broadcast_shape[len(size_tup):] + dist_shape
         samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
         samples = np.hstack(samples).reshape(size_tup + suffix)
-    # Args have been broadcast correctly, can just ask for the right shape out
-    elif dist_shape[-len(broadcast_shape):] == broadcast_shape:
-        samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     else:
         raise TypeError(f'''Attempted to generate values with incompatible shapes:
             size: {size}
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 7d9885bf56..86e71cfa3a 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -49,7 +49,7 @@ class Mixture(Distribution):
             lam = pm.Exponential('lam', lam=1, shape=(2,))  # `shape=(2,)` indicates two mixtures.
 
             # As we just need the logp, rather than add a RV to the model, we need to call .dist()
-            components = pm.Poisson.dist(mu=lam, shape=(2,))  
+            components = pm.Poisson.dist(mu=lam, shape=(2,))
 
             w = pm.Dirichlet('w', a=np.array([1, 1]))  # two mixture component weights.
 
@@ -175,6 +175,8 @@ def random_choice(*args, **kwargs):
             else:
                 samples = np.squeeze(comp_samples[w_samples])
         else:
+            if w_samples.ndim == 1:
+                w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape)
             samples = np.zeros((size,)+tuple(distshape))
             for i in range(size):
                 w_tmp = w_samples[i, :]
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 07755bf586..4717f90039 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -409,10 +409,12 @@ class Dirichlet(Continuous):
 
     def __init__(self, a, transform=transforms.stick_breaking,
                  *args, **kwargs):
-        shape = a.shape[-1]
+        shape = np.atleast_1d(a.shape)[-1]
+
         kwargs.setdefault("shape", shape)
         super(Dirichlet, self).__init__(transform=transform, *args, **kwargs)
 
+        self.size_prefix = tuple(self.shape[:-1])
         self.k = tt.as_tensor_variable(shape)
         self.a = a = tt.as_tensor_variable(a)
         self.mean = a / tt.sum(a)
@@ -421,13 +423,31 @@ def __init__(self, a, transform=transforms.stick_breaking,
                               (a - 1) / tt.sum(a - 1),
                               np.nan)
 
-    def random(self, point=None, size=None):
-        a = draw_values([self.a], point=point, size=size)[0]
+    def _random(self, a, size=None):
+        gen = stats.dirichlet.rvs
+        shape = tuple(np.atleast_1d(self.shape))
+        if size[-len(shape):] == shape:
+            real_size = size[:-len(shape)]
+        else:
+            real_size = size
+        if self.size_prefix:
+            if real_size and real_size[0] == 1:
+                real_size = real_size[1:] + self.size_prefix
+            else:
+                real_size = real_size + self.size_prefix
 
-        def _random(a, size=None):
-            return stats.dirichlet.rvs(a, None if size == a.shape else size)
+        if a.ndim == 1:
+            samples = gen(alpha=a, size=real_size)
+        else:
+            unrolled = a.reshape((np.prod(a.shape[:-1]), a.shape[-1]))
+            samples = np.array([gen(alpha=aa, size=1) for aa in unrolled])
+            samples = samples.reshape(a.shape)
+        return samples
 
-        samples = generate_samples(_random, a,
+    def random(self, point=None, size=None):
+        a = draw_values([self.a], point=point, size=size)[0]
+        samples = generate_samples(self._random,
+                                   a=a,
                                    dist_shape=self.shape,
                                    size=size)
         return samples
@@ -1267,15 +1287,31 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
             self.colchol_cov = tt.as_tensor_variable(colchol)
 
     def random(self, point=None, size=None):
-        if size is None:
-            size = list(self.shape)
-
         mu, colchol, rowchol = draw_values(
                                 [self.mu, self.colchol_cov, self.rowchol_cov],
                                 point=point,
                                 size=size)
-        standard_normal = np.random.standard_normal(size)
-        return mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))
+        if size is None:
+            size = ()
+        if size in (None, ()):
+            standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1]))
+            samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))
+        else:
+            samples = []
+            size = tuple(np.atleast_1d(size))
+            if mu.shape == tuple(self.shape):
+                for _ in range(np.prod(size)):
+                    standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1]))
+                    samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)))
+            else:
+                for j in range(np.prod(size)):
+                    standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1]))
+                    samples.append(mu[j] +
+                                np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)))
+            samples = np.array(samples).reshape(size + tuple(self.shape))
+        return samples
+
+
 
     def _trquaddist(self, value):
         """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and

From cb9afd07b276d2669f7d9147c22a2a342f750559 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sat, 16 Jun 2018 19:04:47 -0400
Subject: [PATCH 08/14] No f-strings

---
 pymc3/distributions/distribution.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index abbacca089..8cbf6a34d7 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -489,11 +489,11 @@ def generate_samples(generator, *args, **kwargs):
         samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
         samples = np.hstack(samples).reshape(size_tup + suffix)
     else:
-        raise TypeError(f'''Attempted to generate values with incompatible shapes:
+        raise TypeError('''Attempted to generate values with incompatible shapes:
             size: {size}
             dist_shape: {dist_shape}
             broadcast_shape: {broadcast_shape}
-        ''')
+        '''.format(size=size, dist_shape=dist_shape, broadcast_shape=broadcast_shape))
 
     # reshape samples here
     if samples.shape[0] == 1 and size == 1:

From 79f1ee56bc1a51c9d59331e8885824b19f5594b5 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sat, 16 Jun 2018 19:23:25 -0400
Subject: [PATCH 09/14] Assert almost equal

---
 pymc3/tests/test_sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 6f40b3bac8..250fee2ae3 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -353,7 +353,7 @@ def test_ignores_observed(self):
         assert (prior['mu'] < 90).all()
         assert (prior['positive_mu'] > 90).all()
         assert (prior['x_obs'] < 90).all()
-        assert (prior['positive_mu'] == np.abs(prior['mu'])).all()
+        npt.assert_array_almost_equal(prior['positive_mu'], np.abs(prior['mu']), decimal=4)
 
     def test_respects_shape(self):
         for shape in (2, (2,), (10, 2), (10, 10)):

From 20c7c9942668f8b3d6ff4f87e7ed9bfb392cb7f7 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sat, 16 Jun 2018 19:39:10 -0400
Subject: [PATCH 10/14] Fix failing test

---
 pymc3/tests/test_model_helpers.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py
index d8df1d8e4b..2b191bd144 100644
--- a/pymc3/tests/test_model_helpers.py
+++ b/pymc3/tests/test_model_helpers.py
@@ -68,17 +68,18 @@ def test_pandas_to_array(self):
         # Check function behavior with Theano graph variable
         theano_output = func(theano_graph_input)
         assert isinstance(theano_output, theano.gof.graph.Variable)
-        assert theano_output.name == input_name
+        assert theano_output.owner.inputs[0].name == input_name
 
         # Check function behavior with generator data
         generator_output = func(square_generator)
+
+        # Output is wrapped with `pm.floatX`, and this unwraps
+        wrapped = generator_output.owner.inputs[0]
         # Make sure the returned object has .set_gen and .set_default methods
-        assert hasattr(generator_output, "set_gen")
-        assert hasattr(generator_output, "set_default")
+        assert hasattr(wrapped, "set_gen")
+        assert hasattr(wrapped, "set_default")
         # Make sure the returned object is a Theano TensorVariable
-        assert isinstance(generator_output, tt.TensorVariable)
-
-        return None
+        assert isinstance(wrapped, tt.TensorVariable)
 
     def test_as_tensor(self):
         """

From 71922f17664d8c36f8c5ce32ddef9b2168782b5f Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sun, 17 Jun 2018 06:40:20 -0400
Subject: [PATCH 11/14] Update from comments

---
 pymc3/distributions/distribution.py |  7 ++++---
 pymc3/model.py                      |  2 ++
 pymc3/sampling.py                   |  6 +++---
 pymc3/tests/test_distributions.py   |  5 +++++
 pymc3/tests/test_sampling.py        | 27 ++++++++++++++++++++++++---
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 8cbf6a34d7..15e024e23d 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -476,10 +476,8 @@ def generate_samples(generator, *args, **kwargs):
             samples = generator(size=broadcast_shape, *args, **kwargs)
         elif dist_shape == broadcast_shape:
             samples = generator(size=size_tup + dist_shape, *args, **kwargs)
-        elif size_tup[-len(broadcast_shape):] != broadcast_shape:
-            samples = generator(size=size_tup + broadcast_shape, *args, **kwargs)
         else:
-            samples = generator(size=size_tup + dist_shape, *args, **kwargs)
+            samples = None
     # Args have been broadcast correctly, can just ask for the right shape out
     elif dist_shape[-len(broadcast_shape):] == broadcast_shape:
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
@@ -489,6 +487,9 @@ def generate_samples(generator, *args, **kwargs):
         samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
         samples = np.hstack(samples).reshape(size_tup + suffix)
     else:
+        samples = None
+
+    if samples is None:
         raise TypeError('''Attempted to generate values with incompatible shapes:
             size: {size}
             dist_shape: {dist_shape}
diff --git a/pymc3/model.py b/pymc3/model.py
index 6771fe977e..e37ae25ee9 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1457,6 +1457,8 @@ def __init__(self, type=None, owner=None, index=None, name=None,
         if distribution is not None:
             self.model = model
             self.distribution = distribution
+            self.dshape = tuple(distribution.shape)
+            self.dsize = int(np.prod(distribution.shape))
 
             transformed_name = get_transformed_name(name, transform)
 
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 0f2b5acd80..9ae1f9e601 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1277,13 +1277,13 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
     return {k: np.asarray(v) for k, v in ppc.items()}
 
 
-def sample_generative(samples=500, model=None, vars=None, random_seed=None):
-    """Generate samples from the generative model.
+def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None):
+    """Generate samples from the prior_predictive distribution.
 
     Parameters
     ----------
     samples : int
-        Number of samples from the prior to generate. Defaults to 500.
+        Number of samples from the prior predictive to generate. Defaults to 500.
     model : Model (optional if in `with` context)
     vars : iterable
         Variables for which to compute the posterior predictive samples.
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 756bba1bcb..2e72487257 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -909,10 +909,15 @@ def test_multinomial_mode(self, p, n):
         [[.25, .25, .25, .25], (1, 4), 3],
         # 3: expect to fail
         # [[.25, .25, .25, .25], (10, 4)],
+        [[.25, .25, .25, .25], (10, 1, 4), 5],
         # 5: expect to fail
         # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
         [[[.25, .25, .25, .25],
          [.25, .25, .25, .25]], (2, 4), 13],
+        [[[.25, .25, .25, .25],
+         [.25, .25, .25, .25]], (1, 2, 4), [23, 29]],
+        [[[.25, .25, .25, .25],
+         [.25, .25, .25, .25]], (10, 2, 4), [31, 37]],
         [[[.25, .25, .25, .25],
          [.25, .25, .25, .25]], (2, 4), [17, 19]],
     ])
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 250fee2ae3..0ad6662102 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -348,7 +348,7 @@ def test_ignores_observed(self):
             positive_mu = pm.Deterministic('positive_mu', np.abs(mu))
             z = -1 - positive_mu
             pm.Normal('x_obs', mu=z, sd=1, observed=observed)
-            prior = pm.sample_generative()
+            prior = pm.sample_prior_predictive()
 
         assert (prior['mu'] < 90).all()
         assert (prior['positive_mu'] > 90).all()
@@ -360,7 +360,7 @@ def test_respects_shape(self):
             with pm.Model():
                 mu = pm.Gamma('mu', 3, 1, shape=1)
                 goals = pm.Poisson('goals', mu, shape=shape)
-                trace = pm.sample_generative(10)
+                trace = pm.sample_prior_predictive(10)
             if shape == 2:  # want to test shape as an int
                 shape = (2,)
             assert trace['goals'].shape == (10,) + shape
@@ -368,7 +368,7 @@ def test_respects_shape(self):
     def test_multivariate(self):
         with pm.Model():
             m = pm.Multinomial('m', n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4)
-            trace = pm.sample_generative(10)
+            trace = pm.sample_prior_predictive(10)
 
         assert m.random(size=10).shape == (10, 4)
         assert trace['m'].shape == (10, 4)
@@ -380,3 +380,24 @@ def test_layers(self):
 
         avg = b.random(size=10000).mean(axis=0)
         npt.assert_array_almost_equal(avg, 0.5 * np.ones_like(b), decimal=2)
+
+    def test_transformed(self):
+        n = 18
+        at_bats = 45 * np.ones(n, dtype=int)
+        hits = np.random.randint(1, 40, size=n, dtype=int)
+        draws = 50
+
+        with pm.Model() as model:
+            phi = pm.Beta('phi', alpha=1., beta=1.)
+
+            kappa_log = pm.Exponential('logkappa', lam=5.)
+            kappa = pm.Deterministic('kappa', tt.exp(kappa_log))
+
+            thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=n)
+
+            y = pm.Binomial('y', n=at_bats, p=thetas, shape=n, observed=hits)
+            gen = pm.sample_prior_predictive(draws)
+
+        assert gen['phi'].shape == (draws,)
+        assert gen['y'].shape == (draws, n)
+        assert 'thetas_logodds__' in gen
\ No newline at end of file

From d1f29fa7defd95cf41e259e6e59cd654aae0ffca Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sun, 17 Jun 2018 06:55:34 -0400
Subject: [PATCH 12/14] More comments, add release note

---
 RELEASE-NOTES.md  | 2 ++
 pymc3/sampling.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index bf814c9ff5..aced473642 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -18,6 +18,8 @@
   avoids pickleing issues on UNIX, and allows us to show a progress bar
   for all chains. If parallel sampling is interrupted, we now return
   partial results.
+- Add `sample_prior_predictive` which allows for efficient sampling from
+  the unconditioned model.
 
 ### Fixes
 
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 9ae1f9e601..6b38e0a62d 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -26,7 +26,7 @@
 import sys
 sys.setrecursionlimit(10000)
 
-__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts', 'sample_generative']
+__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts', 'sample_prior_predictive']
 
 STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                 BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
@@ -1278,7 +1278,7 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
 
 
 def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None):
-    """Generate samples from the prior_predictive distribution.
+    """Generate samples from the prior predictive distribution.
 
     Parameters
     ----------

From 60a624a743272e0d2646ee285ccf1242147614a3 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sun, 17 Jun 2018 07:06:15 -0400
Subject: [PATCH 13/14] forward_val instead of forward.eval

---
 pymc3/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 6b38e0a62d..b3b569deea 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1316,7 +1316,7 @@ def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None
         elif is_transformed_name(var_name):
             untransformed = get_untransformed_name(var_name)
             if untransformed in data:
-                prior[var_name] = model[untransformed].transformation.forward(data[untransformed]).eval()
+                prior[var_name] = model[untransformed].transformation.forward_val(data[untransformed])
     return prior
 
 

From c12af1a07f05b1456e167bcc0eaf2413fafc18a8 Mon Sep 17 00:00:00 2001
From: Colin Carroll <colcarroll@gmail.com>
Date: Sun, 17 Jun 2018 17:01:12 -0400
Subject: [PATCH 14/14] Actually remove kwargs.get

---
 pymc3/distributions/discrete.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index fc4a431889..801386a097 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -713,8 +713,8 @@ def __init__(self, p, *args, **kwargs):
     def _random(self, k, p, size=None):
         if len(p.shape) > 1:
             return np.asarray(
-                [np.random.choice(k, p=p, size=size)
-                    for p in kwargs['p']]
+                [np.random.choice(k, p=pp, size=size)
+                    for pp in p]
             )
         else:
             return np.asarray(np.random.choice(k, p=p, size=size))