Source code for probflow.models.continuous_model

import matplotlib.pyplot as plt
import numpy as np

from probflow.data import DataGenerator, make_generator
from probflow.utils.casting import to_numpy
from probflow.utils.plotting import plot_by, plot_dist

from .model import Model


[docs]class ContinuousModel(Model):
    """Abstract base class for probflow models where the dependent variable
    (the target) is continuous and 1-dimensional.

    The only advantage to using this over the more general :class:`.Model` is
    that :class:`ContinuousModel` also includes several methods specific to
    continuous models, for tasks such as getting the predictive intervals,
    coverage, R-squared value, or calibration metrics (see below for the full
    list of methods).

    .. admonition:: Only supports scalar dependent variables

        Note that the methods of :class:`.ContinuousModel` only support scalar,
        continuous dependent variables (not *any* continuous model, as the name
        might suggest).  For models which have a multidimensional output, just
        use the more general :class:`.Model`; for models with categorical
        output (i.e., classifiers), use :class:`.CategoricalModel`; and for
        models which have a discrete output (e.g. a Poisson regression), use
        :class:`.DiscreteModel`.

    This class inherits several methods from :class:`.Module`:

    * :attr:`~parameters`
    * :attr:`~modules`
    * :attr:`~trainable_variables`
    * :meth:`~kl_loss`
    * :meth:`~kl_loss_batch`
    * :meth:`~reset_kl_loss`
    * :meth:`~add_kl_loss`

    as well as several methods from :class:`.Model`:

    * :meth:`~log_likelihood`
    * :meth:`~train_step`
    * :meth:`~fit`
    * :meth:`~stop_training`
    * :meth:`~set_learning_rate`
    * :meth:`~predictive_sample`
    * :meth:`~aleatoric_sample`
    * :meth:`~epistemic_sample`
    * :meth:`~predict`
    * :meth:`~metric`
    * :meth:`~posterior_mean`
    * :meth:`~posterior_sample`
    * :meth:`~posterior_ci`
    * :meth:`~prior_sample`
    * :meth:`~posterior_plot`
    * :meth:`~prior_plot`
    * :meth:`~log_prob`
    * :meth:`~prob`
    * :meth:`~save`
    * :meth:`~summary`

    and adds the following continuous-model-specific methods:

    * :meth:`~predictive_interval`
    * :meth:`~aleatoric_interval`
    * :meth:`~epistemic_interval`
    * :meth:`~pred_dist_plot`
    * :meth:`~predictive_prc`
    * :meth:`~pred_dist_covered`
    * :meth:`~pred_dist_coverage`
    * :meth:`~coverage_by`
    * :meth:`~r_squared`
    * :meth:`~r_squared_plot`
    * :meth:`~residuals`
    * :meth:`~residuals_plot`
    * :meth:`~calibration_curve`
    * :meth:`~calibration_curve_plot`
    * :meth:`~calibration_metric`
    * :meth:`~sharpness`
    * :meth:`~coefficient_of_variation`


    Example
    -------

    TODO

    """

    def _intervals(self, fn, x, side, ci=0.95, n=1000, batch_size=None):
        """Compute intervals on some type of sample"""

        # Compute in batches?
        if batch_size is not None:
            intervals = [
                self._intervals(fn, x_data, side, ci=ci, n=n)
                for x_data, y_data in make_generator(
                    x, test=True, batch_size=batch_size
                )
            ]
            return (np.concatenate(e, axis=0) for e in zip(*intervals))

        # No batching (or this is a batch)
        samples = fn(x, n=n)
        if side == "lower":
            return np.percentile(samples, 100 * (1.0 - ci), axis=0)
        elif side == "upper":
            return np.percentile(samples, 100 * ci, axis=0)
        else:
            lb = 100 * (1.0 - ci) / 2.0
            prcs = np.percentile(samples, [lb, 100.0 - lb], axis=0)
            return prcs[0, ...], prcs[1, ...]

[docs]    def predictive_interval(
        self, x, ci=0.95, side="both", n=1000, batch_size=None
    ):
        r"""Compute confidence intervals on the model's estimate of the target
        given ``x``, including all sources of uncertainty.

        TODO: docs

        TODO: using side= both, upper, vs lower


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        ci : float between 0 and 1
            Inner proportion of predictive distribution to use a the
            confidence interval.
            Default = 0.95
        side : str {'lower', 'upper', 'both'}
            Whether to get the one- or two-sided interval, and which side to
            get.  If ``'both'`` (default), gets the upper and lower bounds of
            the central ``ci`` interval.  If ``'lower'``, gets the lower bound
            on the one-sided ``ci`` interval.  If ``'upper'``, gets the upper
            bound on the one-sided ``ci`` interval.
        n : int
            Number of samples from the posterior predictive distribution to
            take to compute the confidence intervals.
            Default = 1000
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        lb : |ndarray|
            Lower bounds of the ``ci`` confidence intervals on the predictions
            for samples in ``x``.  Doesn't return this if ``side='upper'``.
        ub : |ndarray|
            Upper bounds of the ``ci`` confidence intervals on the predictions
            for samples in ``x``.  Doesn't return this if ``side='lower'``.
        """
        return self._intervals(
            self.predictive_sample, x, side, ci=ci, n=n, batch_size=batch_size
        )

[docs]    def aleatoric_interval(
        self, x, ci=0.95, side="both", n=1000, batch_size=None
    ):
        r"""Compute confidence intervals on the model's estimate of the target
        given ``x``, including only aleatoric uncertainty (uncertainty due to
        noise).

        TODO: docs


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        ci : float between 0 and 1
            Inner proportion of predictive distribution to use a the
            confidence interval.
            Default = 0.95
        side : str {'lower', 'upper', 'both'}
            Whether to get the one- or two-sided interval, and which side to
            get.  If ``'both'`` (default), gets the upper and lower bounds of
            the central ``ci`` interval.  If ``'lower'``, gets the lower bound
            on the one-sided ``ci`` interval.  If ``'upper'``, gets the upper
            bound on the one-sided ``ci`` interval.
        n : int
            Number of samples from the aleatoric predictive distribution to
            take to compute the confidence intervals.
            Default = 1000
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        lb : |ndarray|
            Lower bounds of the ``ci`` confidence intervals on the predictions
            for samples in ``x``.  Doesn't return this if ``side='upper'``.
        ub : |ndarray|
            Upper bounds of the ``ci`` confidence intervals on the predictions
            for samples in ``x``.  Doesn't return this if ``side='lower'``.
        """
        return self._intervals(
            self.aleatoric_sample, x, side, ci=ci, n=n, batch_size=batch_size
        )

[docs]    def epistemic_interval(
        self, x, ci=0.95, side="both", n=1000, batch_size=None
    ):
        r"""Compute confidence intervals on the model's estimate of the target
        given ``x``, including only epistemic uncertainty (uncertainty due to
        uncertainty as to the model's parameter values).

        TODO: docs


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        ci : float between 0 and 1
            Inner proportion of predictive distribution to use a the
            confidence interval.
            Default = 0.95
        side : str {'lower', 'upper', 'both'}
            Whether to get the one- or two-sided interval, and which side to
            get.  If ``'both'`` (default), gets the upper and lower bounds of
            the central ``ci`` interval.  If ``'lower'``, gets the lower bound
            on the one-sided ``ci`` interval.  If ``'upper'``, gets the upper
            bound on the one-sided ``ci`` interval.
        n : int
            Number of samples from the epistemic predictive distribution to
            take to compute the confidence intervals.
            Default = 1000
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        lb : |ndarray|
            Lower bounds of the ``ci`` confidence intervals on the predictions
            for samples in ``x``.  Doesn't return this if ``side='upper'``.
        ub : |ndarray|
            Upper bounds of the ``ci`` confidence intervals on the predictions
            for samples in ``x``.  Doesn't return this if ``side='lower'``.
        """
        return self._intervals(
            self.epistemic_sample, x, side, ci=ci, n=n, batch_size=batch_size
        )

[docs]    def pred_dist_plot(
        self, x, n=10000, cols=1, individually=False, batch_size=None, **kwargs
    ):
        r"""Plot posterior predictive distribution from the model given ``x``.

        TODO: Docs...


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        n : int
            Number of samples to draw from the model given ``x``.
            Default = 10000
        cols : int
            Divide the subplots into a grid with this many columns (if
            ``individually=True``.
        individually : bool
            If ``True``, plot one subplot per datapoint in ``x``, otherwise
            plot all the predictive distributions on the same plot.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """

        # Sample from the predictive distribution
        samples = self.predictive_sample(x, n=n, batch_size=batch_size)

        # Independent variable must be scalar
        Ns = samples.shape[0]
        N = samples.shape[1]
        if samples.ndim > 2 and any(e > 1 for e in samples.shape[2:]):
            raise NotImplementedError(
                "only scalar dependent variables are supported"
            )
        else:
            samples = samples.reshape([Ns, N])

        # Plot the predictive distributions
        if individually:
            rows = np.ceil(N / cols)
            for i in range(N):
                plt.subplot(rows, cols, i + 1)
                plot_dist(samples[:, i], **kwargs)
                plt.xlabel("Predicted dependent variable value for " + str(i))
            plt.tight_layout()
        else:
            plot_dist(samples, **kwargs)
            plt.xlabel("Predicted dependent variable value")

    def _get_y(self, x, y):
        """Get y, even when x is a DataGenerator and y is None"""
        if y is not None:
            return y
        else:
            y_true = [d for _, d in make_generator(x, y, test=True)]
            return np.concatenate(to_numpy(y_true), axis=0)

[docs]    def predictive_prc(self, x, y=None, n=1000, batch_size=None):
        r"""Compute the percentile of each observation along the posterior
        predictive distribution.

        TODO: Docs...  Returns a percentile between 0 and 1

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of samples to draw from the model given ``x``.
            Default = 1000
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        prcs : |ndarray| of float between 0 and 1
        """

        # Need both x and y data
        if y is None and not isinstance(x, DataGenerator):
            raise TypeError("need both x and y to compute predictive prc")

        # Compute in batches?
        if batch_size is not None:
            return np.concatenate(
                [
                    self.predictive_prc(x_data, y_data, n=n)
                    for x_data, y_data in make_generator(
                        x, y, batch_size=batch_size
                    )
                ],
                axis=0,
            )

        # Sample from the predictive distribution
        samples = self.predictive_sample(x, n=n, batch_size=batch_size)

        # Independent variable must be scalar
        if samples.ndim > 2 and any(e > 1 for e in samples.shape[2:]):
            raise NotImplementedError(
                "only scalar dependent variables are supported"
            )

        # Reshape
        Ns = samples.shape[0]
        N = samples.shape[1]
        samples = samples.reshape([Ns, N])
        y = self._get_y(x, y).reshape([1, N])

        # Percentiles of true y data along predictive distribution
        prcs = np.argmax(np.sort(samples, axis=0) > y, axis=0) / Ns

        # Argmax returns 0 when all samples are less than true value!
        prcs[np.reshape(np.max(samples, axis=0) < y, [N])] = 1.0

        # Return percentiles
        return prcs.reshape([N, 1])

[docs]    def pred_dist_covered(
        self, x, y=None, n: int = 1000, ci: float = 0.95, batch_size=None
    ):
        r"""Compute whether each observation was covered by a given confidence
        interval.

        TODO: Docs...

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of samples to draw from the model given ``x``.
            Default = 1000
        ci : float between 0 and 1
            Confidence interval to use.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        TODO
        """

        # Check values
        if n < 1:
            raise ValueError("n must be greater than 0")
        if ci < 0.0 or ci > 1.0:
            raise ValueError("ci must be between 0 and 1")

        # Compute the predictive percentile of each observation
        pred_prcs = self.predictive_prc(x, y=y, n=n, batch_size=batch_size)

        # Determine what samples fall in the inner ci proportion
        lb = (1.0 - ci) / 2.0
        ub = 1.0 - lb
        return (pred_prcs >= lb) & (pred_prcs < ub)

[docs]    def pred_dist_coverage(self, x, y=None, n=1000, ci=0.95, batch_size=None):
        r"""Compute what percent of samples are covered by a given confidence
        interval.

        TODO: Docs...

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of samples to draw from the model given ``x``.
            Default = 1000
        ci : float between 0 and 1
            Confidence interval to use.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).


        Returns
        -------
        prc_covered : float between 0 and 1
            Proportion of the samples which were covered by the predictive
            distribution's confidence interval.
        """
        return self.pred_dist_covered(
            x, y=y, n=n, ci=ci, batch_size=batch_size
        ).mean()

[docs]    def coverage_by(
        self,
        x_by,
        x,
        y=None,
        n: int = 1000,
        ci: float = 0.95,
        bins: int = 30,
        plot: bool = True,
        ideal_line_kwargs: dict = {},
        batch_size=None,
        **kwargs,
    ):
        r"""Compute and plot the coverage of a given confidence interval
        of the posterior predictive distribution as a function of specified
        independent variables.

        TODO: Docs...

        Parameters
        ----------
        x_by : int or str or list of int or list of str
            Which independent variable(s) to plot the log probability as a
            function of.  That is, which columns in ``x`` to plot by.
        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        ci : float between 0 and 1
            Inner percentile to find the coverage of.  For example, if
            ``ci=0.95``, will compute the coverage of the inner 95% of the
            posterior predictive distribution.
        bins : int
            Number of bins to use for x_by
        ideal_line_kwargs : dict
            Dict of args to pass to matplotlib.pyplot.plot for ideal coverage
            line.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).
        **kwargs
            Additional keyword arguments are passed to plot_by

        Returns
        -------
        xo : |ndarray|
            Values of x_by corresponding to bin centers.
        co : |ndarray|
            Coverage of the ``ci`` confidence interval of the predictive
            distribution in each bin.
        """

        # Compute whether each sample was covered by the predictive interval
        covered = self.pred_dist_covered(
            x, y=y, n=n, ci=ci, batch_size=batch_size
        )

        # Plot coverage proportion as a fn of x_by cols of x
        xo, co = plot_by(x_by, 100 * covered, label="Actual", **kwargs)

        # Line kwargs
        if "linestyle" not in ideal_line_kwargs:
            ideal_line_kwargs["linestyle"] = "--"
        if "color" not in ideal_line_kwargs:
            ideal_line_kwargs["color"] = "k"

        # Also plot ideal line
        plt.axhline(100 * ci, label="Ideal", **ideal_line_kwargs)
        plt.legend()
        plt.ylabel(str(100 * ci) + "% predictive interval coverage")
        plt.xlabel("Independent variable")

        return xo, co

[docs]    def r_squared(self, x, y=None, n=1000, batch_size=None):
        r"""Compute the Bayesian R-squared distribution (Gelman et al., 2018).

        TODO: more info


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of posterior draws to use for computing the r-squared
            distribution.  Default = `1000`.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).


        Returns
        -------
        |ndarray|
            Samples from the r-squared distribution.  Size: ``(num_samples,)``.


        Examples
        --------
        TODO: Docs...


        References
        ----------

        - Andrew Gelman, Ben Goodrich, Jonah Gabry, & Aki Vehtari.
          `R-squared for Bayesian regression models. <https://doi.org/10.1080/00031305.2018.1549100>`_
          *The American Statistician*, 2018.

        """

        # Get true y values
        y_true = self._get_y(x, y)

        # Predict y with samples from the posterior distribution
        y_pred = self.epistemic_sample(x, n=n, batch_size=batch_size)

        # Compute Bayesian R^2
        v_fit = np.var(y_pred, axis=1)
        v_res = np.var(y_pred - np.expand_dims(y_true, 0), axis=1)
        return v_fit / (v_fit + v_res)

[docs]    def r_squared_plot(
        self, x, y=None, n=1000, style="hist", batch_size=None, **kwargs
    ):
        r"""Plot the Bayesian R-squared distribution.

        See :meth:`~r_squared` for more info on the Bayesian R-squared metric.

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of posterior draws to use for computing the r-squared
            distribution.  Default = `1000`.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """
        r2 = self.r_squared(x, y, n=n, batch_size=batch_size)
        plot_dist(r2, style=style, **kwargs)
        plt.xlabel("Bayesian R squared")

[docs]    def residuals(self, x, y=None, batch_size=None):
        r"""Compute the residuals of the model's predictions.

        TODO: docs...

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        |ndarray|
            The residuals.

        Example
        -------

        TODO

        """
        y_true = self._get_y(x, y)
        y_pred = self.predict(x, batch_size=batch_size)
        return y_true - y_pred

[docs]    def residuals_plot(self, x, y=None, batch_size=None, **kwargs):
        r"""Plot the distribution of residuals of the model's predictions.

        TODO: docs...

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """
        res = self.residuals(x, y, batch_size=batch_size)
        plot_dist(res, **kwargs)
        plt.xlabel("Residual (True - Predicted)")

[docs]    def calibration_curve(self, x, y, n=1000, resolution=100, batch_size=None):
        r"""Compute the regression calibration curve (Kuleshov et al., 2018).

        The regression calibration curve compares the empirical cumulative
        probability to the cumulative probability predicted by a regression
        model (Kuleshov et al., 2018).  First, a vector :math:`p` of :math:`m`
        confidence levels are chosen, which correspond to the predicted
        cumulative probabilities:

        .. math::

            0 \leq p_1 \leq p_2 \leq \ldots \leq p_m \leq 1

        Then, a vector of empirical frequencies :math:`\hat{p}` at each of the
        predicted frequencies is computed by using validation data:

        .. math::

            \hat{p}_j = \frac{1}{N} \sum_{i=1}^N [ P_M(x_i \leq y_i) \leq p_j ]

        where :math:`N` is the number of validation datapoints, :math:`P_M(x_i
        \leq y_i)` is the model's predicted cumulative probability of datapoint
        :math:`i` (i.e., the percentile along the model's predicted probability
        distribution at which the true value of :math:`y_i` falls), and
        :math:`\sum_i [ a_i \leq b_i ]` is just the count of elements of
        :math:`a` which are less than corresponding elements in :math:`b`.

        The calibration curve then plots :math:`p` against :math:`\hat{p}`.


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of samples to draw from the model for computing the
            predictive percentile.  Default = 1000
        resolution : int
            Number of confidence levels to evaluate at.  This corresponds to
            the :math:`m` parameter in section 3.5 of (Kuleshov et al., 2018).
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        p : |ndarray|
            The predicted cumulative frequencies, :math:`p`.
        p_hat : |ndarray|
            The empirical cumulative frequencies, :math:`\hat{p}`.

        Example
        -------

        Supposing we have some training data (``x_train`` and ``y_train``) and
        validation data (``x_val`` and ``y_val``), and have already fit a model
        to the training data,

        .. code-block:: python3

            model = # some ProbFlow model...
            model.fit(x_train, y_train)

        Then we can compute the calibration curve with
        :meth:`~calibration_curve`:

        .. code-block:: python3

            p_pred, p_empirical = model.calibration_curve(x_val, y_val)

        The returned values can be used directly or plotted against one another
        to get the calibration curve (as in Figure 3 in  Kuleshov et al., 2018)

        .. code-block:: python3

            import matplotlib.pyplot as plt
            plt.plot(p_pred, p_empirical)

        Or, even more simply, just use :meth:`~calibration_curve_plot`.


        See also
        --------

        * :meth:`~calibration_curve_plot`
        * :meth:`~expected_calibration_error`


        References
        ----------

        - Volodymyr Kuleshov, Nathan Fenner, and Stefano Ermon.
          `Accurate Uncertainties for Deep Learning Using Calibrated Regression
          <https://arxiv.org/abs/1807.00263>`_, 2018.

        """
        pred_prc = self.predictive_prc(x, y, n=n, batch_size=batch_size)
        p = np.linspace(0, 1, resolution + 2)[1:-1]
        p_hat = np.array([np.mean(pred_prc < tp) for tp in p])
        return p, p_hat

[docs]    def calibration_curve_plot(
        self, x, y, n=1000, resolution=100, batch_size=None, **kwargs
    ):
        r"""Plot the regression calibration curve.

        See :meth:`~calibration_curve` for more info about the regression
        calibration curve.

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of samples to draw from the model for computing the
            predictive percentile.  Default = 1000
        resolution : int
            Number of confidence levels to evaluate at.  This corresponds to
            the :math:`m` parameter in section 3.5 of (Kuleshov et al., 2018).
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`


        See also
        --------

        * :meth:`~calibration_curve`
        * :meth:`~expected_calibration_error`

        """
        p, p_hat = self.calibration_curve(
            x, y, n=n, resolution=resolution, batch_size=batch_size
        )
        plt.plot(p, p_hat, **kwargs)
        plt.xlabel("Predicted cumulative probability")
        plt.ylabel("Empirical cumulative probability")

    def _calibration_metric(self, metric: str, p, p_hat):
        if metric == "msce":
            return np.mean(np.square(p - p_hat))
        elif metric == "rmsce":
            return np.sqrt(np.mean(np.square(p - p_hat)))
        elif metric == "mace":
            return np.mean(np.abs(p - p_hat))
        elif metric == "ma":
            p0 = np.concatenate([[0.0], p, [1.0]])
            p0_hat = np.concatenate([[0.0], p_hat, [1.0]])
            return np.trapz(np.abs(p0 - p0_hat), p0)
        else:
            raise ValueError(f"Unknown calibration metric {metric}")

[docs]    def calibration_metric(
        self, metric, x, y=None, n=1000, resolution=100, batch_size=None
    ):
        r"""Compute one or more of several calibration metrics

        Regression calibration metrics measure the error between a model's
        regression calibration curve and the ideal calibration curve - i.e.,
        what the curve would be if the model were perfectly calibrated (see
        `Kuleshov et al., 2018 <https://arxiv.org/abs/1807.00263>`_ and `Chung
        et al., 2020 <https://arxiv.org/abs/2011.09588>`_).  First, a vector
        :math:`p` of :math:`m` confidence levels are chosen, which correspond
        to the predicted cumulative probabilities:

        .. math::

            0 \leq p_1 \leq p_2 \leq \ldots \leq p_m \leq 1

        Then, a vector of empirical frequencies :math:`\hat{p}` at each of the
        predicted frequencies is computed by using validation data:

        .. math::

            \hat{p}_j =
                \frac{1}{N} \sum_{i=1}^N [ P_M(x_i \leq y_i) \leq p_j ]

        where :math:`N` is the number of validation datapoints, :math:`P_M(x_i
        \leq y_i)` is the model's predicted cumulative probability of datapoint
        :math:`i` (i.e., the percentile along the model's predicted probability
        distribution at which the true value of :math:`y_i` falls), and
        :math:`\sum_i [ a_i \leq b_i ]` is just the count of elements of
        :math:`a` which are less than corresponding elements in :math:`b`.

        Various metrics can be computed from these curves to measure how
        accurately the regression model captures uncertainty:

        The **mean squared calibration error (MSCE)** is the mean squared error
        between the empirical and predicted frequencies,

        .. math::

            MSCE = \frac{1}{m} \sum_{j=1}^m (p_j - \hat{p}_j)^2

        The **root mean squared calibration error (RMSCE)** is just the square
        root of the MSCE:

        .. math::

            RMSCE = \sqrt{\frac{1}{m} \sum_{j=1}^m (p_j - \hat{p}_j)^2}

        The **mean absolute calibration error (MACE)** is the mean of the
        absolute differences between the empirical and predicted frequencies:

        .. math::

            MACE = \frac{1}{m} \sum_{j=1}^m | p_j - \hat{p}_j |

        And the **miscalibration area (MA)** is the area between the
        calibration curve and the ideal calibration curve (the identity line
        from (0, 0) to (1, 1):

        .. math::

            MA = \int_0^1 p_x - \hat{p}_x dx

        Note that MA is equal to MACE as the number of bins (set by the
        ``resolution`` keyword argument) goes to infinity.

        To choose which metric to compute, pass the name of the metric
        (``msce``, ``rmsce``, ``mace``, or ``ma``) as the first argument to
        this function (or a list of them to compute multiple).

        See `Kuleshov et al., 2018 <https://arxiv.org/abs/1807.00263>`_, `Chung
        et al., 2020 <https://arxiv.org/abs/2011.09588>`_ and the user guide
        page on :doc:`/user_guide/evaluating` for discussions of evaluating
        uncertainty estimates using calibration metrics, among other metrics.
        Note that calibration is generally less important than accuracy, but
        more important than other metrics like :meth:`~sharpness` and any
        :meth:`dispersion_metric`.


        Parameters
        ----------
        metric : str {'msce', 'rmsce', 'mace', or 'ma'} or List[str]
            Which metric(s) to compute (see above for the definition of each
            metric).  To compute multiple metrics, pass a list of the metric
            names you'd like to compute.  Available metrics are:

            * ``msce``: mean squared calibration error
            * ``rmsce``: root mean squared calibration error
            * ``mace``: mean absolute calibration error
            * ``ma``: miscalibration area

        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of samples to draw from the model for computing the
            predictive percentile.  Default = 1000
        resolution : int
            Number of confidence levels to evaluate at.  This corresponds to
            the :math:`m` parameter in section 3.5 of (Kuleshov et al., 2018).
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).


        Returns
        -------
        float or Dict[str, float]
            The requested calibration metric.  If a list of metric names was
            passed, will return a dict whose keys are the metrics, and whose
            values are the corresponding metric values.


        Example
        -------

        Supposing we have some training data (``x_train`` and ``y_train``) and
        validation data (``x_val`` and ``y_val``), and have already fit a model
        to the training data,

        .. code-block:: python3

            model = # some ProbFlow model...
            model.fit(x_train, y_train)

        Then we can compute different calibration metrics using
        :meth:`~expected_calibration_error`.  For example, to compute the mean
        squared calibration error (MSCE):

        .. code-block:: pycon

            >>> model.calibration_metric("msce", x_val, y_val)
            0.123

        Or, to compute the mean absolute calibration error (MACE):

        .. code-block:: pycon

            >>> model.calibration_metric("mace", x_val, y_val)
            0.211

        To compute multiple metrics at the same time, pass a list of metric
        names:

        .. code-block:: pycon

            >>> model.calibration_metric(["msce", "mace"], x_val, y_val)
            {"msce": 0.123, "mace": 0.211}


        See also
        --------

        * :meth:`~calibration_curve`
        * :meth:`~calibration_curve_plot`
        * :meth:`~sharpness`
        * :meth:`~dispersion_metric`


        References
        ----------

        - Volodymyr Kuleshov, Nathan Fenner, and Stefano Ermon.
          `Accurate Uncertainties for Deep Learning Using Calibrated Regression
          <https://arxiv.org/abs/1807.00263>`_, 2018.
        - Youngseog Chung, Willie Neiswanger, Ian Char, Jeff Schneider.
          `Beyond Pinball Loss: Quantile Methods for Calibrated Uncertainty
          Quantification <https://arxiv.org/abs/2011.09588>`_, 2020.

        """
        p, p_hat = self.calibration_curve(
            x, y, n=n, resolution=resolution, batch_size=batch_size
        )
        if isinstance(metric, list):
            return {m: self._calibration_metric(m, p, p_hat) for m in metric}
        else:
            return self._calibration_metric(metric, p, p_hat)

[docs]    def sharpness(self, x, n=1000, batch_size=None):
        r"""Compute the sharpness of the model's uncertainty estimates

        The "sharpness" of a model's uncertainty estimates is the root mean of
        the estimated variances:

        .. math::

            SHA = \sqrt{\frac{1}{N} \sum_{i=1}^N \text{Var}(\hat{Y}_i)}

        See `Tran et al., 2020 <https://arxiv.org/abs/1912.10066>`_  and the
        user guide page on :doc:`/user_guide/evaluating` for discussions of
        evaluating uncertainty estimates using sharpness, among other metrics.
        Note that the sharpness should generally be one of the later things you
        consider - accuracy and calibration usually being more important.


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        n : int
            Number of samples to draw from the model.  Default = 1000
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).


        Returns
        -------
        float
            The sharpness of the model's uncertainty estimates


        Example
        -------

        Supposing we have some training data (``x_train`` and ``y_train``) and
        validation data (``x_val`` and ``y_val``), and have already fit a model
        to the training data,

        .. code-block:: python3

            model = # some ProbFlow model...
            model.fit(x_train, y_train)

        Then we can compute the sharpness of our model's predictions with:

        .. code-block:: pycon

            >>> model.sharpness(x_val)
            0.173


        See also
        --------

        * :meth:`~calibration_metric`
        * :meth:`~dispersion_metric`


        References
        ----------

        - Kevin Tran, Willie Neiswanger, Junwoong Yoon, Qingyang Zhang, Eric
          Xing, Zachary W. Ulissi.  `Methods for comparing uncertainty
          quantifications for material property predictions
          <https://arxiv.org/abs/1912.10066>`_, 2020.

        """
        samples = self.predictive_sample(x, n=n, batch_size=batch_size)
        return np.sqrt(np.mean(np.var(samples, axis=0)))

    def _dispersion_metric(self, metric, samples):
        stds = np.std(samples, axis=0)
        if metric in ["cv", "cov", "coefficient_of_variation"]:
            return np.std(stds) / np.mean(stds)
        elif metric in ["qcd", "qcod", "quartile_coefficient_of_dispersion"]:
            q1 = np.percentile(stds, 25)
            q3 = np.percentile(stds, 75)
            return (q3 - q1) / (q3 + q1)
        else:
            raise ValueError(f"Unknown dispersion metric {metric}")

[docs]    def dispersion_metric(self, metric, x, n=1000, batch_size=None):
        r"""Compute one or more of several calibration metrics

        Dispersion metrics measure how much a model's uncertainty estimates
        vary.  There are several different dispersion metrics:

        The **coefficient of variation** (:math:`C_v`) is the ratio of the
        standard deviation to the mean (of the model's uncertainty standard
        deviations):

        .. math::

            C_v =

        The **quartile coefficient of dispersion** (:math:`QCD`) is less
        sensitive to outliers, as it simply measures the difference between the
        first and third quartile (of the model's uncertainty standard
        deviations) to their sum:

        .. math::

            QCD = \frac{Q_3 - Q_1}{Q_3 + Q_1}

        See `Tran et al., 2020 <https://arxiv.org/abs/1912.10066>`_  and the
        user guide page on :doc:`/user_guide/evaluating` for discussions of
        evaluating uncertainty estimates using dispersion metrics, among other
        metrics.  Note that dispersion metrics should generally be one of the
        last things you consider - accuracy, calibration, and sharpness usually
        being more important.


        Parameters
        ----------
        metric : str {'cv' or 'qcd'} or List[str]
            Dispersion metric to compute.  Or,
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        n : int
            Number of samples to draw from the model.  Default = 1000
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).


        Returns
        -------
        float or Dict[str, float]
            The requested dispersion metric.  If a list of metric names was
            passed, will return a dict whose keys are the metrics, and whose
            values are the corresponding metric values.


        Example
        -------

        Supposing we have some training data (``x_train`` and ``y_train``) and
        validation data (``x_val`` and ``y_val``), and have already fit a model
        to the training data,

        .. code-block:: python3

            model = # some ProbFlow model...
            model.fit(x_train, y_train)

        Then we can compute the coefficient of variation of our model's
        predictions with:

        .. code-block:: pycon

            >>> model.dispersion_metric('cv', x_val)
            0.732

        Or the quartile coefficient of dispersion with:

        .. code-block:: pycon

            >>> model.dispersion_metric('qcd', x_val)
            0.625


        See also
        --------

        * :meth:`~calibration_metric`
        * :meth:`~sharpness`


        References
        ----------

        - Kevin Tran, Willie Neiswanger, Junwoong Yoon, Qingyang Zhang, Eric
          Xing, Zachary W. Ulissi.  `Methods for comparing uncertainty
          quantifications for material property predictions
          <https://arxiv.org/abs/1912.10066>`_, 2020.

        """
        samples = self.predictive_sample(x, n=n, batch_size=batch_size)
        if isinstance(metric, list):
            return {m: self._dispersion_metric(m, samples) for m in metric}
        else:
            return self._dispersion_metric(metric, samples)