Source code for dvhastats.ui

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# ui.py
"""DVHA-Stats classes for user interaction"""
#
# Copyright (c) 2020 Dan Cutright
# Copyright (c) 2020 Arka Roy
# This file is part of DVHA-Stats, released under a MIT license.
#    See the file LICENSE included with this distribution, also
#    available at https://github.com/cutright/DVHA-Stats


from os.path import dirname, join
import numpy as np
from dvhastats.utilities import import_data
from dvhastats import plot
from dvhastats import stats

SCRIPT_DIR = dirname(__file__)
PARENT_DIR = dirname(SCRIPT_DIR)
TEST_DATA_PATH = join(PARENT_DIR, "tests", "testdata", "multivariate_data.csv")


[docs]class DVHAStatsBaseClass: """Base Class for DVHAStats objects and child objects""" def __init__(self): """Initialization of DVHAStatsBaseClass for common attr/methods""" self.plots = []
[docs] def close(self, figure_number): """Close a plot by figure_number""" for i, p in enumerate(self.plots): if p.figure.number == figure_number: p.close() self.plots.pop(i) return
[docs]class DVHAStats(DVHAStatsBaseClass): """The main UI class object for DVHAStats Parameters ---------- data : numpy.array, dict, str, None Input data (2-D) with N rows of observations and p columns of variables. The CSV file must have a header row for column names. Test data is loaded if None var_names : list of str, optional If data is a numpy array, optionally provide the column names. x_axis : numpy.array, list, optional Specify x_axis for plotting purposes. Default is based on row number in data avg_len : int When plotting raw data, a trend line will be plotted using this value as an averaging length. If N < avg_len + 1 will not plot a trend line del_const_vars : bool Automatically delete any variables that have constant data. The names of these variables are stored in the excluded_vars attr. Default value is False. """ def __init__( self, data=None, var_names=None, x_axis=None, avg_len=5, del_const_vars=False, ): """Class used to calculated various statistics""" DVHAStatsBaseClass.__init__(self) data = TEST_DATA_PATH if data is None else data self.data, self.var_names = import_data(data, var_names) self.x_axis = x_axis self.deleted_vars = [] self.box_cox_data = None self.avg_len = avg_len if del_const_vars: self.del_const_vars()
[docs] def get_data_by_var_name(self, var_name): """Get the single variable array based on var_name Parameters ---------- var_name : int, str The name (str) or index (int) of the variable of interest Returns ---------- np.ndarray The column of data for the given var_name """ index = self.get_index_by_var_name(var_name) return self.data[:, index]
[docs] def get_index_by_var_name(self, var_name): """Get the variable index by var_name Parameters ---------- var_name : int, str The name (str) or index (int) of the variable of interest Returns ---------- int The column index for the given var_name """ if var_name in self.var_names: index = self.var_names.index(var_name) elif isinstance(var_name, int) and var_name in range( self.variable_count ): return var_name else: msg = "%s is not a valid var_name\n%s" % ( var_name, ",".join(self.var_names), ) raise AttributeError(msg) return index
@property def observations(self): """Number of observations in data Returns ---------- int Number of rows in data """ return self.data.shape[0] @property def variable_count(self): """Number of variables in data Returns ---------- int Number of columns in data""" return self.data.shape[1]
[docs] def del_var(self, var_name): """Determine if data by var_name is constant Parameters ---------- var_name : int, str The var_name to delete (or index of variable) """ index = self.get_index_by_var_name(var_name) self.deleted_vars.append(self.var_names[index]) self.data = np.delete(self.data, index, axis=1) self.var_names.pop(index)
[docs] def del_const_vars(self): """Permanently remove variables with no variation""" self.deleted_vars.extend(self.constant_vars) del_indices = self.constant_var_indices self.data = self.non_const_data for i in del_indices[::-1]: self.var_names.pop(i)
[docs] def correlation_matrix(self, corr_type="Pearson"): """Get a Pearson-R or Spearman correlation matrices Parameters ---------- corr_type : str Either "Pearson" or "Spearman" Returns ---------- CorrelationMatrixUI A CorrelationMatrixUI class object """ return CorrelationMatrixUI( self.data, self.var_names, corr_type=corr_type )
[docs] def is_constant(self, var_name): """Determine if data by var_name is constant Parameters ---------- var_name : int, str The var_name to check (or index of variable) Returns ---------- bool True if all values of var_name are the same (i.e., no variation) """ data = self.get_data_by_var_name(var_name) return stats.is_arr_constant(data)
@property def constant_vars(self): """Get a list of all constant variables Returns ---------- list Names of variables with no variation """ return [v for v in self.var_names if self.is_constant(v)] @property def constant_var_indices(self): """Get a list of all constant variable indices Returns ---------- list Indices of variables with no variation """ return [i for i, v in enumerate(self.var_names) if self.is_constant(v)] @property def non_const_data(self): """Return self.data excluding any constant variables Returns ---------- np.ndarray Data with constant variables removed. This does not alter the data property. """ return np.delete(self.data, self.constant_var_indices, axis=1)
[docs] def histogram(self, var_name, bins="auto", nan_policy="omit"): """Get a Histogram class object var_name : str, int The name (str) or index (int) of teh variable to plot bins : int, list, str, optional See https://numpy.org/doc/stable/reference/generated/numpy.histogram.html for details nan_policy : str Value must be one of the following: ‘propagate’, ‘raise’, ‘omit’ Defines how to handle when input contains nan. The following options are available (default is ‘omit’): ‘propagate’: returns nan ‘raise’: throws an error ‘omit’: performs the calculations ignoring nan values """ data = self.get_data_by_var_name(var_name) return stats.Histogram(data, bins, nan_policy)
[docs] def linear_reg( self, y, y_var_name=None, reg_vars=None, saved_reg=None, back_elim=False, back_elim_p=0.05, ): """Initialize a MultiVariableRegression class object Parameters ---------- y : np.ndarray, list, str, int Dependent data based on DVHAStats.data. If y is str or int, then it is assumed to be the var_name or index of data to be set as the dependent variable y_var_name : int, str, optional Optionally provide name of the dependent variable. Automatically set if y is str or int reg_vars : list, optional Optionally specify variable names or indices of data to be used in the regression saved_reg : MultiVariableRegression, optional If supplied, predicted values (y-hat) will be calculated with DVHAStats.data and the regression from saved_reg. This is useful if testing a regression model on new data. back_elim : bool Automatically perform backward elimination if True back_elim_p : float p-value threshold for backward elimination Returns ---------- LinearRegUI A LinearRegUI class object. """ input_data = self.__process_reg_input(y, reg_vars, y_var_name) return LinearRegUI( input_data["data"], input_data["y"], saved_reg, var_names=input_data["var_names"], y_var_name=input_data["y_var_name"], back_elim=back_elim, back_elim_p=back_elim_p, )
def __process_reg_input(self, y, reg_vars, y_var_name): excl = [] if reg_vars is not None: incl = [self.get_index_by_var_name(v) for v in reg_vars] excl = [i for i in range(self.variable_count) if i not in incl] if isinstance(y, (str, int)): y_index = self.get_index_by_var_name(y) y_var_name = self.var_names[y_index] if y_index not in excl: excl.append(self.get_index_by_var_name(y)) excl.sort() y = self.get_data_by_var_name(y) data = np.delete(self.data, excl, axis=1) var_names = [v for i, v in enumerate(self.var_names) if i not in excl] return { "data": data, "y": y, "var_names": var_names, "y_var_name": y_var_name, }
[docs] def univariate_control_chart( self, var_name, std=3, ucl_limit=None, lcl_limit=None, box_cox=False, box_cox_alpha=None, box_cox_lmbda=None, const_policy="propagate", ): """ Calculate control limits for a standard univariate Control Chart Parameters ---------- var_name : str, int The name (str) or index (int) of teh variable to plot std : int, float, optional Number of standard deviations used to calculate if a y-value is out-of-control ucl_limit : float, optional Limit the upper control limit to this value lcl_limit : float, optional Limit the lower control limit to this value box_cox : bool, optional Set to true to perform a Box-Cox transformation on data prior to calculating the control chart statistics box_cox_alpha : float, optional If alpha is not None, return the 100 * (1-alpha)% confidence interval for lmbda as the third output argument. Must be between 0.0 and 1.0. box_cox_lmbda : float, optional If lmbda is not None, do the transformation for that value. If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument. const_policy : str {‘propagate’, ‘raise’, 'omit'} Defines how to handle when data is constant. The following options are available (default is ‘propagate’): ‘propagate’: returns nan ‘raise’: throws an error 'omit': remove NaN data Returns ---------- stats.ControlChart stats.ControlChart class object """ kwargs = {"std": std, "ucl_limit": ucl_limit, "lcl_limit": lcl_limit} index = self.get_index_by_var_name(var_name) var_name = self.var_names[index] if box_cox: if self.box_cox_data is None: cc_data = self.box_cox_by_index( index, alpha=box_cox_alpha, lmbda=box_cox_lmbda, const_policy=const_policy, ) else: cc_data = self.box_cox_data[:, index] plot_title = "Univariate Control Chart with Box-Cox Transformation" else: cc_data = self.data[:, index] plot_title = None if const_policy == "propagate" and stats.is_nan_arr(cc_data): plot_title = "Cannot calculate control chart with const data!" data = ControlChartUI( cc_data, var_name=var_name, plot_title=plot_title, **kwargs ) return data
[docs] def univariate_control_charts(self, **kwargs): """ Calculate Control charts for all variables Parameters ---------- kwargs : any See univariate_control_chart for keyword parameters Returns ---------- dict ControlChart class objects stored in a dictionary with var_names and indices as keys (can use var_name or index) """ data = {} for i, key in enumerate(self.var_names): data[key] = self.univariate_control_chart(key, **kwargs) data[i] = data[key] return data
[docs] def hotelling_t2( self, alpha=0.05, box_cox=False, box_cox_alpha=None, box_cox_lmbda=None, const_policy="omit", ): """ Calculate control limits for a standard univariate Control Chart Parameters ---------- alpha : float Significance level used to determine the upper control limit (ucl) box_cox : bool, optional Set to true to perform a Box-Cox transformation on data prior to calculating the control chart statistics box_cox_alpha : float, optional If alpha is not None, return the 100 * (1-alpha)% confidence interval for lmbda as the third output argument. Must be between 0.0 and 1.0. box_cox_lmbda : float, optional If lmbda is not None, do the transformation for that value. If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument. const_policy : str {‘raise’, 'omit'} Defines how to handle when data is constant. The following options are available (default is ‘raise’): ‘raise’: throws an error 'omit': exclude constant variables from calculation Returns ---------- HotellingT2UI HotellingT2UI class object """ if box_cox: if self.box_cox_data is None: self.box_cox( alpha=box_cox_alpha, lmbda=box_cox_lmbda, const_policy=const_policy, ) data = self.box_cox_data if const_policy == "omit": data = stats.remove_const_column(data) plot_title = ( "Multivariate Control Chart with Box-Cox Transformation" ) else: data = self.non_const_data if const_policy == "omit" else self.data plot_title = None return HotellingT2UI(data, alpha, plot_title=plot_title)
[docs] def box_cox_by_index( self, index, alpha=None, lmbda=None, const_policy="propagate" ): """ Parameters ---------- index : int, str The index corresponding to the variable data to have a box-cox transformation applied. If index is a string, it will be assumed to be the var_name lmbda : None, scalar, optional If lmbda is not None, do the transformation for that value. If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument. alpha : None, float, optional If alpha is not None, return the 100 * (1-alpha)% confidence interval for lmbda as the third output argument. Must be between 0.0 and 1.0. const_policy : str {‘propagate’, ‘raise’, 'omit'} Defines how to handle when data is constant. The following options are available (default is ‘propagate’): ‘propagate’: returns nan ‘raise’: throws an error 'omit': remove Returns ---------- np.ndarray Results from stats.box_cox """ if self.box_cox_data is None: self.box_cox_data = np.zeros_like(self.data) if isinstance(index, str): index = self.get_index_by_var_name(index) self.box_cox_data[:, index] = stats.box_cox( self.data[:, index], alpha=alpha, lmbda=lmbda, const_policy=const_policy, ) return self.box_cox_data[:, index]
[docs] def box_cox(self, alpha=None, lmbda=None, const_policy="propagate"): """Apply box_cox_by_index for all data""" for i in range(self.variable_count): self.box_cox_by_index( i, alpha=alpha, lmbda=lmbda, const_policy=const_policy )
[docs] def pca(self, n_components=0.95, transform=True, **kwargs): """Return an sklearn PCA-like object, see PCA object for details Parameters ---------- n_components : int, float, None or str Number of components to keep. if n_components is not set all components are kept: n_components == min(n_samples, n_features) If n_components == 'mle' and svd_solver == 'full', Minka’s MLE is used to guess the dimension. Use of n_components == 'mle' will interpret svd_solver == 'auto' as svd_solver == 'full'. If 0 < n_components < 1 and svd_solver == 'full', select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components. If svd_solver == 'arpack', the number of components must be strictly less than the minimum of n_features and n_samples. transform : bool Fit the model and apply the dimensionality reduction kwargs : any Provide any keyword arguments for sklearn.decomposition.PCA: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html Returns ---------- PCAUI A principal component analysis object inherited from sklearn.decomposition.PCA """ return PCAUI( self.data, var_names=self.var_names, n_components=n_components, transform=transform, **kwargs )
[docs] def risk_adjusted_control_chart( self, y, std=3, ucl_limit=None, lcl_limit=None, saved_reg=None, y_name=None, reg_vars=None, back_elim=False, back_elim_p=0.05, ): """ Calculate control limits for a Risk-Adjusted Control Chart Parameters ---------- y : list, np.ndarray 1-D Input data (dependent data) std : int, float, optional Number of standard deviations used to calculate if a y-value is out-of-control. ucl_limit : float, optional Limit the upper control limit to this value lcl_limit : float, optional Limit the lower control limit to this value saved_reg : MultiVariableRegression, optional Optionally provide a previously calculated regression y_name : int, str, optional Optionally provide name of the dependent variable. Automatically set if y is str or int reg_vars : list, optional Optionally specify variable names or indices of data to be used in the regression saved_reg : MultiVariableRegression, optional If supplied, predicted values (y-hat) will be calculated with DVHAStats.data and the regression from saved_reg. This is useful if testing a regression model on new data. back_elim : bool Automatically perform backward elimination if True back_elim_p : float p-value threshold for backward elimination """ input_data = self.__process_reg_input(y, reg_vars, y_name) return RiskAdjustedControlChartUI( input_data["data"], input_data["y"], std=std, ucl_limit=ucl_limit, lcl_limit=lcl_limit, saved_reg=saved_reg, x=self.x_axis, y_name=input_data["y_var_name"], var_names=input_data["var_names"], back_elim=back_elim, back_elim_p=back_elim_p, )
def __add_tend_line(self, var_name, plot_index): """Add trend line based on moving average""" trend_x, trend_y = stats.moving_avg( self.get_data_by_var_name(var_name), self.avg_len ) self.plots[plot_index].add_line( trend_y, x=trend_x, line_color="black", line_width=0.75 )
[docs] def show(self, var_name=None, plot_type="trend", **kwargs): """Display a plot of var_name with matplotlib Parameters ---------- var_name : str, int, None The name (str) or index (int) of the variable to plot. If None and plot_type="boxplot", all variables will be plotted. plot_type : str Either "trend", "hist", "box" kwargs : any If plot_type is "hist", pass any of the matplotlib hist key word arguments Returns ---------- int The number of the newly created matplotlib figure """ plot_type = plot_type.lower() if plot_type not in {"trend", "hist", "box"}: msg = "plot_type must be in 'trend', 'hist', or 'box'" raise NotImplementedError(msg) if var_name is None: if plot_type != "box": msg = "Must specify var_name if plot_type in ('trend', 'hist')" raise NotImplementedError(msg) index = None else: index = self.get_index_by_var_name(var_name) var_name = self.var_names[index] if plot_type == "trend": self.plots.append( plot.Plot( self.data[:, index], x=self.x_axis, xlabel="Observation", ylabel=var_name, title="", ) ) self.__add_tend_line(var_name, -1) elif plot_type == "hist": self.plots.append( plot.Histogram(self.data[:, index], xlabel=var_name, **kwargs) ) elif plot_type == "box": data = self.data if var_name is None else self.data[:, index] xlabels = self.var_names if var_name is None else [var_name] self.plots.append(plot.BoxPlot(data, xlabels=xlabels, **kwargs)) return self.plots[-1].figure.number
[docs]class ControlChartUI(DVHAStatsBaseClass, stats.ControlChart): """Univariate Control Chart Parameters ---------- y : list, np.ndarray Input data (1-D) std : int, float, optional Number of standard deviations used to calculate if a y-value is out-of-control. ucl_limit : float, optional Limit the upper control limit to this value lcl_limit : float, optional Limit the lower control limit to this value plot_title : str, optional Over-ride the plot title """ def __init__( self, y, std=3, ucl_limit=None, lcl_limit=None, var_name=None, x=None, plot_title=None, ): """Calculate control limits for a standard univariate Control Chart""" DVHAStatsBaseClass.__init__(self) stats.ControlChart.__init__( self, y, std=std, ucl_limit=ucl_limit, lcl_limit=lcl_limit, x=x ) self.plot_title = ( "Univariate Control Chart" if plot_title is None else plot_title ) self.var_name = var_name
[docs] def show(self): """Display the univariate control chart with matplotlib Returns ---------- int The number of the newly created matplotlib figure """ self.plots.append( plot.ControlChart( title=self.plot_title, ylabel=self.var_name, **self.chart_data ) ) return self.plots[-1].figure.number
[docs]class RiskAdjustedControlChartUI( DVHAStatsBaseClass, stats.RiskAdjustedControlChart ): """Risk-Adjusted Control Chart using a Multi-Variable Regression Parameters ---------- X : array-like Input array (independent data) y : list, np.ndarray 1-D Input data (dependent data) std : int, float, optional Number of standard deviations used to calculate if a y-value is out-of-control. ucl_limit : float, optional Limit the upper control limit to this value lcl_limit : float, optional Limit the lower control limit to this value x : list, np.ndarray, optional x-axis values plot_title : str, optional Over-ride the plot title saved_reg : MultiVariableRegression, optional Optionally provide a previously calculated regression var_names : list, optional Optionally provide names of the variables back_elim : bool Automatically perform backward elimination if True back_elim_p : float p-value threshold for backward elimination """ def __init__( self, X, y, std=3, ucl_limit=None, lcl_limit=None, x=None, y_name=None, var_names=None, saved_reg=None, plot_title=None, back_elim=False, back_elim_p=0.05, ): """Calculate control limits for a Risk-Adjusted Control Chart""" DVHAStatsBaseClass.__init__(self) stats.RiskAdjustedControlChart.__init__( self, X, y, std=std, ucl_limit=ucl_limit, lcl_limit=lcl_limit, x=x, saved_reg=saved_reg, var_names=var_names, back_elim=back_elim, back_elim_p=back_elim_p, ) self.plot_title = ( "Risk-Adjusted Control Chart" if plot_title is None else plot_title ) self.var_name = y_name
[docs] def show(self): """Display the risk-adjusted control chart with matplotlib Returns ---------- int The number of the newly created matplotlib figure """ self.plots.append( plot.ControlChart( title=self.plot_title, ylabel=self.var_name, **self.chart_data ) ) return self.plots[-1].figure.number
[docs]class HotellingT2UI(DVHAStatsBaseClass, stats.HotellingT2): """Hotelling's t-squared statistic for multivariate hypothesis testing Parameters ---------- data : np.ndarray A 2-D array of data to perform multivariate analysis. (e.g., DVHAStats.data) alpha : float The significance level used to calculate the upper control limit (UCL) plot_title : str, optional Over-ride the plot title """ def __init__(self, data, alpha=0.05, plot_title=None): """Initialize the Hotelling T^2 class""" DVHAStatsBaseClass.__init__(self) stats.HotellingT2.__init__(self, data, alpha=alpha) self.plot_title = ( "Multivariate Control Chart" if plot_title is None else plot_title )
[docs] def show(self): """Display the multivariate control chart with matplotlib Returns ---------- int The number of the newly created matplotlib figure """ self.plots.append( plot.ControlChart( title=self.plot_title, ylabel="Hottelling T^2", **self.chart_data ) ) return self.plots[-1].figure.number
[docs]class PCAUI(DVHAStatsBaseClass, stats.PCA): """Hotelling's t-squared statistic for multivariate hypothesis testing Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. var_names : str, optional Names of the independent variables in X n_components : int, float, None or str Number of components to keep. if n_components is not set all components are kept: n_components == min(n_samples, n_features) If n_components == 'mle' and svd_solver == 'full', Minka’s MLE is used to guess the dimension. Use of n_components == 'mle' will interpret svd_solver == 'auto' as svd_solver == 'full'. If 0 < n_components < 1 and svd_solver == 'full', select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components. If svd_solver == 'arpack', the number of components must be strictly less than the minimum of n_features and n_samples. transform : bool Fit the model and apply the dimensionality reduction kwargs : any Provide any keyword arguments for sklearn.decomposition.PCA: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html """ def __init__( self, X, var_names=None, n_components=0.95, transform=True, **kwargs ): """Initialize PCA and perform fit. Inherits sklearn.decomposition.PCA""" # print(kwargs) DVHAStatsBaseClass.__init__(self) stats.PCA.__init__( self, X, n_components=n_components, transform=transform, **kwargs ) self.var_names = range(X.shape[1]) if var_names is None else var_names
[docs] def show(self, plot_type="feature_map", absolute=True): """Create a heat map of PCA components Parameters ---------- plot_type : str Select a plot type to display. Options include: feature_map. absolute : bool Heat map will display the absolute values in PCA components if True Returns ---------- int The number of the newly created matplotlib figure """ if plot_type == "feature_map": data = self.feature_map_data if absolute: data = abs(data) self.plots.append(plot.PCAFeatureMap(data, self.var_names)) return self.plots[-1].figure.number
[docs]class CorrelationMatrixUI(DVHAStatsBaseClass, stats.CorrelationMatrix): """Pearson-R correlation matrix UI object Parameters ---------- X : np.ndarray Input data (2-D) with N rows of observations and p columns of variables. var_names : list, optional Optionally set the variable names with a list of str corr_type : str Either "Pearson" or "Spearman" cmap : str matplotlib compatible color map """ def __init__( self, X, var_names=None, corr_type="Pearson", cmap="coolwarm" ): """Initialization of CorrelationMatrix object""" DVHAStatsBaseClass.__init__(self) stats.CorrelationMatrix.__init__(self, X=X, corr_type=corr_type) self.var_names = range(X.shape[1]) if var_names is None else var_names self.cmap = cmap
[docs] def show(self, absolute=False, corr=True): """Create a heat map of PCA components Parameters ---------- absolute : bool Heat map will display the absolute values in PCA components if True corr : bool Plot a p-value matrix if False, correlation matrix if True. Returns ---------- int The number of the newly created matplotlib figure """ data = self.corr if corr else self.p data = abs(data) if absolute else data mat_type = "Pearson-R" if self.corr_type == "pearson" else "Spearman" value_type = ["p-value", "Correlation"][corr] title = "%s %s Matrix" % (mat_type, value_type) self.plots.append( plot.HeatMap( data, xlabels=self.var_names, ylabels=self.var_names, cmap=self.cmap, title=title, ) ) return self.plots[-1].figure.number
[docs]class LinearRegUI(DVHAStatsBaseClass, stats.MultiVariableRegression): """A MultiVariableRegression class UI object Parameters ---------- y : np.ndarray, list Dependent data based on DVHAStats.data saved_reg : MultiVariableRegression, optional If supplied, predicted values (y-hat) will be calculated with DVHAStats.data and the regression from saved_reg. This is useful if testing a regression model on new data. var_names : list, optional Optionally provide names of the independent variables y_var_name : int, str, optional Optionally provide name of the dependent variable back_elim : bool Automatically perform backward elimination if True back_elim_p : float p-value threshold for backward elimination """ def __init__( self, X, y, saved_reg=None, var_names=None, y_var_name=None, back_elim=False, back_elim_p=0.05, ): """Initialization of LinearRegUI object""" DVHAStatsBaseClass.__init__(self) stats.MultiVariableRegression.__init__( self, X=X, y=y, saved_reg=saved_reg, var_names=var_names, y_var_name=y_var_name, back_elim=back_elim, back_elim_p=back_elim_p, )
[docs] def show(self, plot_type="residual"): """Create a Residual or Probability Plot Parameters ---------- plot_type : str Either "residual" or "prob" Returns ---------- int The number of the newly created matplotlib figure """ if plot_type not in {"residual", "prob"}: return title = ( "Multi-Variable Linear Regression" if self.X.shape[1] > 1 else "Linear Regression" ) if plot_type == "residual": data = self.chart_data self.plots.append( plot.Plot( data["resid"], x=data["y"], title=title, xlabel="Fitted Values", ylabel="Residual", line=False, ) ) x_zero = [np.min(data["y"]), np.max(data["y"])] y_zero = [0, 0] self.plots[-1].add_line( y_zero, x_zero, line_color="black", line_style="--" ) elif plot_type == "prob": data = self.prob_plot self.plots.append( plot.Plot( data["y"], x=data["x"], title="Probability Plot", xlabel="Quantiles", ylabel="Ordered Values", line=False, ) ) self.plots[-1].add_line( data["y_trend"], data["x_trend"], line_color="black", line_style="--", ) return self.plots[-1].figure.number