Source code for probflow.models.categorical_model

import matplotlib.pyplot as plt
import numpy as np

from probflow.utils.plotting import plot_categorical_dist

from .model import Model


[docs]class CategoricalModel(Model): """Abstract base class for probflow models where the dependent variable (the target) is categorical (e.g. drawn from a Bernoulli distribution). TODO : why use this over just Model This class inherits several methods from :class:`.Module`: * :attr:`~parameters` * :attr:`~modules` * :attr:`~trainable_variables` * :meth:`~kl_loss` * :meth:`~kl_loss_batch` * :meth:`~reset_kl_loss` * :meth:`~add_kl_loss` as well as several methods from :class:`.Model`: * :meth:`~log_likelihood` * :meth:`~train_step` * :meth:`~fit` * :meth:`~stop_training` * :meth:`~set_learning_rate` * :meth:`~predictive_sample` * :meth:`~aleatoric_sample` * :meth:`~epistemic_sample` * :meth:`~predict` * :meth:`~metric` * :meth:`~posterior_mean` * :meth:`~posterior_sample` * :meth:`~posterior_ci` * :meth:`~prior_sample` * :meth:`~posterior_plot` * :meth:`~prior_plot` * :meth:`~log_prob` * :meth:`~prob` * :meth:`~save` * :meth:`~summary` and adds the following categorical-model-specific methods: * :meth:`~pred_dist_plot` * :meth:`~calibration_curve` Example ------- TODO """
[docs] def pred_dist_plot(self, x, n=10000, cols=1, batch_size=None, **kwargs): """Plot posterior predictive distribution from the model given ``x``. TODO: Docs... Parameters ---------- x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator| Independent variable values of the dataset to evaluate (aka the "features"). n : int Number of samples to draw from the model given ``x``. Default = 10000 cols : int Divide the subplots into a grid with this many columns (if ``individually=True``. batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). **kwargs Additional keyword arguments are passed to :func:`.plot_categorical_dist` """ # Sample from the predictive distribution samples = self.predictive_sample(x, n=n, batch_size=batch_size) # Independent variable must be scalar Ns = samples.shape[0] N = samples.shape[1] if samples.ndim > 2 and any(e > 1 for e in samples.shape[2:]): raise NotImplementedError( "only categorical dependent variables are supported" ) else: samples = samples.reshape([Ns, N]) # Plot the predictive distributions rows = np.ceil(N / cols) for i in range(N): plt.subplot(rows, cols, i + 1) plot_categorical_dist(samples[:, i]) plt.xlabel("Datapoint " + str(i)) plt.tight_layout()
[docs] def calibration_curve( self, x, y=None, split_by=None, bins=10, plot=True, batch_size=None ): """Plot and return the categorical calibration curve. Plots and returns the calibration curve (estimated probability of outcome vs the true probability of that outcome). Parameters ---------- x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator| Independent variable values of the dataset to evaluate (aka the "features"). Or a |DataGenerator| for both x and y. y : |ndarray| or |DataFrame| or |Series| or Tensor Dependent variable values of the dataset to evaluate (aka the "target"). split_by : int Draw the calibration curve independently for datapoints with each unique value in `x[:,split_by]` (a categorical column). bins : int, list of float, or |ndarray| Bins used to compute the curve. If an integer, will use `bins` evenly-spaced bins from 0 to 1. If a vector, `bins` is the vector of bin edges. plot : bool Whether to plot the curve batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). #TODO: split by continuous cols as well? Then will need to define bins or edges too TODO: Docs... """
# TODO