.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/utils.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_utils.py: ============== utils ============== This file contains utility functions which are used in other files. .. GENERATED FROM PYTHON SOURCE LINES 8-35 .. code-block:: Python import sys import warnings from typing import Union, Any, List import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from matplotlib.lines import Line2D import shap import ngboost import tensorflow as tf from tensorflow import keras import tensorflow_probability as tfp from tensorflow.keras import layers from easy_mpl import scatter, regplot, plot, hist from easy_mpl.utils import AddMarginalPlots # to add marginal plots along an axes from SeqMetrics import RegressionMetrics from ai4water.utils.utils import get_version_info from ai4water.functional import Model as FModel .. GENERATED FROM PYTHON SOURCE LINES 36-39 .. code-block:: Python SAVE = False .. GENERATED FROM PYTHON SOURCE LINES 40-55 .. code-block:: Python COLUMN_MAPS = { 'Wastewater concentration (Ci)': "WW Conc.", 'Cyanobacterial cell count': 'Ini. CC', 'Sonicator power density':'Sonic. PD', 'Concentration of H2O2':'h20 Conc.', #'Solution pH': 'sol_ph' } COLUMN_MAPS_ = {v:k for k,v in COLUMN_MAPS.items()} COLUMN_MAPS_['ww_conc'] = "Wastewater Conc." COLUMN_MAPS_['sonic_pd'] = "Sonicator Power" COLUMN_MAPS_['h20_conc.'] = 'H2O2 Conc.' COLUMN_MAPS_['ini_cc'] = "Ini. Cell Count" .. GENERATED FROM PYTHON SOURCE LINES 56-91 .. code-block:: Python def read_data( inputs:List[str]=None, target = None )->pd.DataFrame: df = pd.read_csv("data_0315.csv") if inputs is None: inputs = ['Time (min)', 'Cyanobacterial cell count', #'Wastewater concentration (Ci)', 'Sonicator power density', 'Concentration of H2O2', 'Volume (mL)', 'Solution pH'] # calculate efficiency and put it in dataframe ini_cell_count = df['Cyanobacterial cell count'].values fin_cell_count = df['final count/mL'].values efficiency = ((ini_cell_count - fin_cell_count) / ini_cell_count) * 100 efficiency = np.where(efficiency<0.0, 0.0, efficiency) df['Efficiency'] = efficiency if target is None: target = "Efficiency" if not isinstance(target, list): target = [target] columns = inputs + target df = df[columns] df = df.rename(columns=COLUMN_MAPS) return df .. GENERATED FROM PYTHON SOURCE LINES 92-108 .. code-block:: Python def prior(kernel_size, bias_size, dtype=None): # Define the prior weight distribution as Normal of mean=0 and stddev=1. # Note that, in this example, the we prior distribution is not trainable, # as we fix its parameters. n = kernel_size + bias_size prior_model = keras.Sequential( [ tfp.layers.DistributionLambda( lambda t: tfp.distributions.MultivariateNormalDiag( loc=tf.zeros(n), scale_diag=tf.ones(n) ) ) ] ) return prior_model .. GENERATED FROM PYTHON SOURCE LINES 109-126 .. code-block:: Python def posterior(kernel_size, bias_size, dtype=None): # Define variational posterior weight distribution as multivariate Gaussian. # Note that the learnable parameters for this distribution are the means, # variances, and covariances. n = kernel_size + bias_size posterior_model = tf.keras.Sequential( [ tfp.layers.VariableLayer( tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype ), tfp.layers.MultivariateNormalTriL(n), ] ) return posterior_model .. GENERATED FROM PYTHON SOURCE LINES 127-192 .. code-block:: Python class BayesModel(FModel): """ A model which can be used to quantify aleotoric uncertainty, or epsitemic uncertainty or both. Following parameters must be defined in a dictionary called ``layers``. >>> model = BayesModel(model={"layers": {'hidden_units': [1,], 'train_size': 100, ... 'activation': 'sigmoid'}}) hidden_units : List[int] train_size : int activation : str uncertainty_type : str either ``epistemic`` or ``aleoteric`` or ``both`` """ def add_layers(self, *args, **kwargs)->tuple: hidden_units = self.config['model']['layers']['hidden_units'] train_size = self.config['model']['layers']['train_size'] activation = self.config['model']['layers']['activation'] uncertainty_type = self.config['model']['layers'].get('uncertainty_type', 'epistemic') assert uncertainty_type in ("epistemic", "aleoteric", "both") epistemic = False aleoteric = False if uncertainty_type in ("epistemic", "both"): epistemic = True if uncertainty_type in ("aleoteric", "both"): aleoteric = True inputs = layers.Input(shape=len(self.input_features, ), dtype=tf.float32) features = layers.BatchNormalization()(inputs) if epistemic: # Create hidden layers with weight uncertainty using the # DenseVariational layer. for units in hidden_units: features = tfp.layers.DenseVariational( units = units, make_prior_fn = prior, make_posterior_fn = posterior, kl_weight = 1 / train_size, activation = activation, )(features) else: for units in hidden_units: features = layers.Dense(units, activation=activation)(features) if aleoteric: # Create a probabilisticå output (Normal distribution), and use # the `Dense` layer # to produce the parameters of the distribution. # We set units=2 to learn both the mean and the variance of the # Normal distribution. distribution_params = layers.Dense(units=2)(features) outputs = tfp.layers.IndependentNormal(1)(distribution_params) else: # The output is deterministic: a single point estimate. outputs = layers.Dense(units=1)(features) return inputs, outputs .. GENERATED FROM PYTHON SOURCE LINES 193-318 .. code-block:: Python def shap_scatter( feature_shap_values:np.ndarray, feature_data:Union[pd.DataFrame, np.ndarray, pd.Series], color_feature:pd.Series=None, color_feature_is_categorical:bool = False, feature_name:str = '', show_hist:bool = True, palette_name = "tab10", s:int = 70, ax:plt.Axes = None, edgecolors='black', linewidth=0.8, alpha=0.8, show:bool = True, **scatter_kws, ): """ :param feature_shap_values: :param feature_data: :param color_feature: :param color_feature_is_categorical: :param feature_name: :param show_hist: :param palette_name: only relevant if ``color_feature_is_categorical`` is True :param s: :param ax: :param edgecolors: :param linewidth: :param alpha: :param show: :param scatter_kws: :return: """ if ax is None: fig, ax = plt.subplots() if color_feature is None: c = None else: if color_feature_is_categorical: if isinstance(palette_name, (tuple, list)): assert len(palette_name) == len(color_feature.unique()) rgb_values = palette_name else: rgb_values = sns.color_palette(palette_name, color_feature.unique().__len__()) color_map = dict(zip(color_feature.unique(), rgb_values)) c= color_feature.map(color_map) else: c = color_feature.values.reshape(-1,) _, pc = scatter( feature_data, feature_shap_values, c=c, s=s, marker="o", edgecolors=edgecolors, linewidth=linewidth, alpha=alpha, ax=ax, show=False, **scatter_kws ) if color_feature is not None: feature_wrt_name = ' '.join(color_feature.name.split('_')) if color_feature_is_categorical: # add a legend handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=v, label=k, markersize=8) for k, v in color_map.items()] ax.legend(title=feature_wrt_name, handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', title_fontsize=14 ) else: cbar = plt.colorbar(pc, aspect=80) cbar.ax.set_ylabel(feature_wrt_name, rotation=90, labelpad=14, fontsize=14, weight="bold") set_yticklabels(cbar.ax, max_ticks=None) cbar.set_alpha(1) cbar.outline.set_visible(False) ax.set_xlabel(feature_name) ax.set_ylabel(f"SHAP value for {feature_name}") ax.axhline(0, color='grey', linewidth=1.3, alpha=0.3, linestyle='--') set_xticklabels(ax, max_ticks=None) set_yticklabels(ax, max_ticks=None) if show_hist: if isinstance(feature_data, (pd.Series, pd.DataFrame)): feature_data = feature_data.values x = feature_data if len(x) >= 500: bin_edges = 50 elif len(x) >= 200: bin_edges = 20 elif len(x) >= 100: bin_edges = 10 else: bin_edges = 5 ax2 = ax.twinx() xlim = ax.get_xlim() ax2.hist(x.reshape(-1,), bin_edges, range=(xlim[0], xlim[1]), density=False, facecolor='#000000', alpha=0.1, zorder=-1) ax2.set_ylim(0, len(x)) ax2.set_yticks([]) if show: plt.show() return ax .. GENERATED FROM PYTHON SOURCE LINES 319-413 .. code-block:: Python def set_xticklabels( ax:plt.Axes, max_ticks:Union[int, Any] = 5, dtype = int, weight = "bold", fontsize:Union[int, float]=12, max_xtick_val=None, min_xtick_val=None, **kwargs ): """ :param ax: :param max_ticks: maximum number of ticks, if not set, all the default ticks will be used :param dtype: :param weight: :param fontsize: :param max_xtick_val: maxikum value of tick :param min_xtick_val: :return: """ return set_ticklabels(ax, "x", max_ticks, dtype, weight, fontsize, max_tick_val=max_xtick_val, min_tick_val=min_xtick_val, **kwargs) def set_yticklabels( ax:plt.Axes, max_ticks:Union[int, Any] = 5, dtype=int, weight="bold", fontsize:int=12, max_ytick_val = None, min_ytick_val = None, **kwargs ): return set_ticklabels( ax, "y", max_ticks, dtype, weight, fontsize=fontsize, max_tick_val=max_ytick_val, min_tick_val=min_ytick_val, **kwargs ) def set_ticklabels( ax:plt.Axes, which:str = "x", max_ticks:int = 5, dtype=int, weight="bold", fontsize:int=12, max_tick_val = None, min_tick_val = None, **kwargs ): """ :param ax: :param which: :param max_ticks: :param dtype: :param weight: :param fontsize: :param max_tick_val: :param min_tick_val: :param kwargs: any keyword arguments of axes.set_{x/y}ticklabels() :return: """ ticks_ = getattr(ax, f"get_{which}ticks")() ticks = np.array(ticks_) if len(ticks)<1: warnings.warn(f"can not get {which}ticks {ticks_}") return if max_ticks: ticks = np.linspace(min_tick_val or min(ticks), max_tick_val or max(ticks), max_ticks) ticks = ticks.astype(dtype) getattr(ax, f"set_{which}ticks")(ticks) getattr(ax, f"set_{which}ticklabels")(ticks, weight=weight, fontsize=fontsize, **kwargs ) return ax .. GENERATED FROM PYTHON SOURCE LINES 414-433 .. code-block:: Python def set_rcParams(**kwargs): plt.rcParams.update({'axes.labelsize': '14'}) plt.rcParams.update({'axes.labelweight': 'bold'}) plt.rcParams.update({'xtick.labelsize': '12'}) plt.rcParams.update({'ytick.labelsize': '12'}) plt.rcParams.update({'font.weight': 'bold'}) plt.rcParams.update({'legend.title_fontsize': '12'}) if sys.platform == "linux": kwargs['font.family'] = 'serif' kwargs['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif'] else: kwargs['font.family'] = "Times New Roman" for k,v in kwargs.items(): plt.rcParams[k] = v return .. GENERATED FROM PYTHON SOURCE LINES 434-489 .. code-block:: Python def residual_plot( train_true, train_prediction, test_true, test_prediction, label="Prediction", show:bool = False ): fig, axis = plt.subplots(1, 2, sharey="all" , gridspec_kw={'width_ratios': [2, 1]}) test_y = test_true.reshape(-1, ) - test_prediction.reshape(-1, ) train_y = train_true.reshape(-1, ) - train_prediction.reshape(-1, ) train_hist_kws = dict(bins=20, linewidth=0.5, edgecolor="k", grid=False, color="#009E73", orientation='horizontal') hist(train_y, show=False, ax=axis[1], label="Training", **train_hist_kws) plot(train_prediction, train_y, 'o', show=False, ax=axis[0], color="#009E73", markerfacecolor="#009E73", markeredgecolor="black", markeredgewidth=0.5, alpha=0.7, label="Training" ) _hist_kws = dict(bins=40, linewidth=0.5, edgecolor="k", grid=False, color=np.array([225, 121, 144]) / 256.0, orientation='horizontal') hist(test_y, show=False, ax=axis[1], **_hist_kws) set_xticklabels(axis[1], 3) plot(test_prediction, test_y, 'o', show=False, ax=axis[0], color="darksalmon", markerfacecolor=np.array([225, 121, 144]) / 256.0, markeredgecolor="black", markeredgewidth=0.5, ax_kws=dict( xlabel=label, ylabel="Residual", legend_kws=dict(loc="upper left"), ), alpha=0.7, label="Test", ) set_xticklabels(axis[0], 5) set_yticklabels(axis[0], 5) axis[0].axhline(0.0, color="black") plt.subplots_adjust(wspace=0.15) if show: plt.show() return .. GENERATED FROM PYTHON SOURCE LINES 490-590 .. code-block:: Python def regression_plot( train_true, train_pred, test_true, test_pred, label, max_xtick_val = None, max_ytick_val = None, min_xtick_val=None, min_ytick_val=None, max_ticks = 5, show=False ): TRAIN_RIDGE_LINE_KWS = [{'color': '#009E73', 'lw': 1.0}, {'color': '#009E73', 'lw': 1.0}] TRAIN_HIST_KWS = [{'color': '#009E73', 'bins': 50}, {'color': '#009E73', 'bins': 50}] ax = regplot(train_true, train_pred, marker_size=35, marker_color="#009E73", line_color='k', fill_color='k', scatter_kws={'edgecolors': 'black', 'linewidth': 0.5, 'alpha': 0.5, }, label="Training", show=False ) axHistx, axHisty = AddMarginalPlots( ax, ridge=False, pad=0.25, size=0.7, ridge_line_kws=TRAIN_RIDGE_LINE_KWS, hist_kws=TRAIN_HIST_KWS )(train_true, train_pred) train_r2 = RegressionMetrics(train_true, train_pred).r2() test_r2 = RegressionMetrics(test_true, test_pred).r2() ax.annotate(f'Training $R^2$= {round(train_r2, 2)}', xy=(0.95, 0.30), xycoords='axes fraction', horizontalalignment='right', verticalalignment='top', fontsize=12, weight="bold") ax.annotate(f'Test $R^2$= {round(test_r2, 2)}', xy=(0.95, 0.20), xycoords='axes fraction', horizontalalignment='right', verticalalignment='top', fontsize=12, weight="bold") ax_ = regplot(test_true, test_pred, marker_size=35, marker_color=np.array([225, 121, 144]) / 256.0, line_style=None, scatter_kws={'edgecolors': 'black', 'linewidth': 0.5, 'alpha': 0.5, }, show=False, label="Test", ax=ax ) ax_.legend(fontsize=12, prop=dict(weight="bold")) TEST_RIDGE_LINE_KWS = [{'color': np.array([225, 121, 144]) / 256.0, 'lw': 1.0}, {'color': np.array([225, 121, 144]) / 256.0, 'lw': 1.0}] TEST_HIST_KWS = [{'color': np.array([225, 121, 144]) / 256.0, 'bins': 50}, {'color': np.array([225, 121, 144]) / 256.0, 'bins': 50}] AddMarginalPlots( ax, ridge=False, pad=0.25, size=0.7, ridge_line_kws=TEST_RIDGE_LINE_KWS, hist_kws=TEST_HIST_KWS )(test_true, test_pred, axHistx, axHisty) set_xticklabels( ax_, max_xtick_val=max_xtick_val, min_xtick_val=min_xtick_val, max_ticks=max_ticks, ) set_yticklabels( ax_, max_ytick_val=max_ytick_val, min_ytick_val=min_ytick_val, max_ticks=max_ticks ) ax.set_xlabel(f"Observed {label}") ax.set_ylabel(f"Predicted {label}") if show: plt.show() return ax .. GENERATED FROM PYTHON SOURCE LINES 591-633 .. code-block:: Python def ci_from_dist( distribution, coverage:float, true_array:np.ndarray, label:str, fill_color, line_color, ): """ plots confidence interval from distribution :param coverage: :param distribution: :param true_array: :param label: :param fill_color: :param line_color: :return: """ lower_90, upper_90 = distribution.interval(coverage) axes = plot(true_array, show=False, color=line_color, label='True') axes.fill_between(np.arange(len(lower_90)), lower_90, upper_90, color=fill_color, label=f"{int(coverage*100)}% CI", alpha=0.6 ) plt.legend() xticks = np.array(axes.get_xticks()).astype(int) axes.set_xticklabels(xticks, weight="bold", fontsize=12) yticks = np.array(axes.get_yticks()).astype(int) axes.set_yticklabels(yticks, weight="bold", fontsize=12) axes.set_xlabel('Samples', weight="bold", fontsize=12) axes.set_ylabel(f"Total {label}", weight="bold", fontsize=12) axes.grid(visible=True, ls='--', color='lightgrey') return .. GENERATED FROM PYTHON SOURCE LINES 634-652 .. code-block:: Python def plot_1d_pdp(pdp, train_data, feature, show=True): """1D pdp""" pdp_vals, ice_vals = pdp.calc_pdp_1dim(train_data, feature) ax = pdp.plot_pdp_1dim(pdp_vals, ice_vals, train_data, feature, pdp_line_kws={'color': '#5f3946'}, ice_color="#c8c0aa" ) ax.set_xlabel(COLUMN_MAPS_.get(feature, feature)) ax.set_ylabel(f"E[f(x) | " + feature + "]") if show: plt.tight_layout() plt.show() return .. GENERATED FROM PYTHON SOURCE LINES 653-704 .. code-block:: Python def plot_stds( mean:np.ndarray, std:np.ndarray, label:str, pediction:np.ndarray = None, num_stds:int = 3, show:bool = True ): """ plots standard deviations around mean/prediction array """ if pediction is None: ax = plot(mean, show=False, color="grey", label="$\mu$", ax_kws=dict(ylabel=label, xlabel="Samples"), ) else: ax = plot(pediction, show=False, color="grey", label="pediction", ax_kws=dict(ylabel=label, xlabel="Samples"), ) if num_stds >= 3: ax.fill_between(np.arange(len(std)), mean - (3 * std), mean + (3 * std), color="lightsteelblue", label="$\mu$ $\u00B1$ 3 $\sigma$", ) if num_stds >= 2: ax.fill_between(np.arange(len(std)), mean - (2 * std), mean + (2 * std), color="cornflowerblue", label="$\mu$ $\u00B1$ 2 $\sigma$" ) ax.fill_between(np.arange(len(std)), mean - std, mean + std, color="royalblue", label="$\mu$ $\u00B1$ $\sigma$" ) plt.legend() if show: plt.tight_layout() plt.show() return .. GENERATED FROM PYTHON SOURCE LINES 705-712 .. code-block:: Python def version_info()->dict: info = get_version_info() info['ngboost'] = ngboost.__version__ info['shap'] = shap.__version__ return info .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 0.010 seconds) .. _sphx_glr_download_auto_examples_utils.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: utils.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: utils.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: utils.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_