Skip to content

Instantly share code, notes, and snippets.

@jakevdp
Last active September 23, 2021 15:24

Revisions

  1. jakevdp revised this gist Jun 29, 2013. 1 changed file with 4 additions and 2 deletions.
    6 changes: 4 additions & 2 deletions generative.py
    Original file line number Diff line number Diff line change
    @@ -101,10 +101,12 @@ def sample(self, n_samples=1, random_state=None):
    'kde':KernelDensity}


    # This should inherit from BaseNB
    class GenerativeBayes(BaseNB):
    """Generative Bayes Classifier"""
    # note: interface is essentially the same as that of GaussianNB,
    # and if density_estimator is `NormalApproximation`, it should
    # give the same results.
    def __init__(self, density_estimator, **kwargs):
    # TODO: add prior. Compute from data?
    if isinstance(density_estimator, str):
    dclass = DENSITY_ESTIMATORS.get(density_estimator)
    self.density_estimator = dclass(**kwargs)
  2. jakevdp created this gist Jun 29, 2013.
    135 changes: 135 additions & 0 deletions generative.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,135 @@
    """
    Bayesian Generative Classifier
    ------------------------------
    """
    # Author: Jake Vanderplas <[email protected]>

    import numpy as np
    from sklearn.neighbors.kde import KernelDensity
    from sklearn.mixture import GMM
    from sklearn.base import BaseEstimator, clone
    from sklearn.utils import array2d, check_random_state
    from sklearn.naive_bayes import BaseNB


    class NormalApproximation(BaseEstimator):
    """Normal Approximation Density Estimator"""
    def __init__(self):
    pass

    def fit(self, X):
    """Fit the Normal Approximation to data
    Parameters
    ----------
    X: array_like, shape (n_samples, n_features)
    List of n_features-dimensional data points. Each row
    corresponds to a single data point.
    """
    X = array2d(X)
    epsilon = 1e-9
    self.mean = X.mean(0)
    self.var = X.var(0) + epsilon
    return self

    def eval(self, X):
    """Evaluate the model on the data
    Parameters
    ----------
    X : array_like
    An array of points to query. Last dimension should match dimension
    of training data (n_features)
    Returns
    -------
    density : ndarray
    The array of density evaluations. This has shape X.shape[:-1]
    """
    X = array2d(X)
    if X.shape[-1] != self.mean.shape[0]:
    raise ValueError("dimension of X must match that of training data")
    norm = 1. / np.sqrt(2 ** X.shape[-1] * np.sum(self.var))
    res = np.log(norm * np.exp(-0.5 * ((X - self.mean) ** 2
    / self.var).sum(1)))
    return res

    def score(self, X):
    """Compute the log probability under the model.
    Parameters
    ----------
    X : array_like, shape (n_samples, n_features)
    List of n_features-dimensional data points. Each row
    corresponds to a single data point.
    Returns
    -------
    logprob : array_like, shape (n_samples,)
    Log probabilities of each data point in X
    """
    return np.sum(np.log(self.eval(X)))

    def sample(self, n_samples=1, random_state=None):
    """Generate random samples from the model.
    Parameters
    ----------
    n_samples : int, optional
    Number of samples to generate. Defaults to 1.
    random_state: RandomState or an int seed (0 by default)
    A random number generator instance
    Returns
    -------
    X : array_like, shape (n_samples, n_features)
    List of samples
    """
    rng = check_random_state(random_state)

    try:
    n_samples = n_samples + (1,)
    except TypeError:
    n_samples = (n_samples, 1)

    return rng.normal(self.mean, self.std, size=n_samples)


    DENSITY_ESTIMATORS = {'norm_approx':NormalApproximation,
    'gmm':GMM,
    'kde':KernelDensity}


    # This should inherit from BaseNB
    class GenerativeBayes(BaseNB):
    def __init__(self, density_estimator, **kwargs):
    # TODO: add prior. Compute from data?
    if isinstance(density_estimator, str):
    dclass = DENSITY_ESTIMATORS.get(density_estimator)
    self.density_estimator = dclass(**kwargs)
    elif isinstance(density_estimator, type):
    self.density_estimator = density_estimator(**kwargs)
    else:
    self.density_estimator = density_estimator

    def fit(self, X, y):
    X = array2d(X)
    y = np.asarray(y)
    self.classes_ = np.sort(np.unique(y))
    n_classes = len(self.classes_)
    n_samples, n_features = X.shape

    self.class_prior_ = np.array([np.float(np.sum(y == y_i)) / n_samples
    for y_i in self.classes_])
    self.estimators_ = [clone(self.density_estimator).fit(X[y == c])
    for c in self.classes_]
    return self

    def _joint_log_likelihood(self, X):
    X = array2d(X)
    jll = np.array([np.log(prior) + dens.eval(X)
    for (prior, dens)
    in zip(self.class_prior_,
    self.estimators_)]).T
    return jll