.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/aleoteric_nn.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_aleoteric_nn.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_aleoteric_nn.py:


==============================================
3.1 probablistic NN (Disinfection Efficiency)
==============================================
This file shows how to capture aleoteric uncertainty in a neural network
for modeling microbial disinfection efficiency (%) data.

.. GENERATED FROM PYTHON SOURCE LINES 8-29

.. code-block:: Python

    # First we import all the required libaries/functions

    import os

    import numpy as np  # for array processing

    import pandas as pd

    import matplotlib.pyplot as plt  # for plotting

    from easy_mpl import plot  # plotting functions

    from SeqMetrics import RegressionMetrics  # to calculate performance metrics

    from ai4water.utils import TrainTestSplit  # for splitting the data into training and test sets
    from ai4water.utils.utils import get_version_info

    # some helper functions
    from utils import read_data, BayesModel, SAVE
    from utils import set_rcParams, residual_plot, regression_plot


.. GENERATED FROM PYTHON SOURCE LINES 30-31

print version of libraries being used.

.. GENERATED FROM PYTHON SOURCE LINES 31-35

.. code-block:: Python


    for lib,ver in get_version_info().items():
        print(lib, ver)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    python 3.9.20 (main, Nov  5 2024, 16:07:55) 
    [GCC 11.4.0]
    os posix
    ai4water 1.07
    easy_mpl 0.21.4
    SeqMetrics 2.0.0
    tensorflow 2.10.1
    keras.api._v2.keras 2.10.0
    numpy 1.21.6
    pandas 1.5.3
    matplotlib 3.7.1
    h5py 3.13.0
    sklearn 1.3.1
    seaborn 0.13.2


.. GENERATED FROM PYTHON SOURCE LINES 36-37

setting global values for plotting

.. GENERATED FROM PYTHON SOURCE LINES 37-39

.. code-block:: Python

    set_rcParams()


.. GENERATED FROM PYTHON SOURCE LINES 40-41

Define loss function

.. GENERATED FROM PYTHON SOURCE LINES 41-44

.. code-block:: Python

    def negative_loglikelihood(targets, estimated_distribution):
        return -estimated_distribution.log_prob(targets)


.. GENERATED FROM PYTHON SOURCE LINES 45-46

prepare data

.. GENERATED FROM PYTHON SOURCE LINES 46-54

.. code-block:: Python


    data = read_data()

    input_features = data.columns.tolist()[0:-1]
    output_features = data.columns.tolist()[-1:]

    print(input_features)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Time (min)', 'Ini. CC', 'Sonic. PD', 'h20 Conc.', 'Volume (mL)', 'Solution pH']


.. GENERATED FROM PYTHON SOURCE LINES 55-57

.. code-block:: Python

    print(output_features)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Efficiency']


.. GENERATED FROM PYTHON SOURCE LINES 58-61

split data into training and test sets
We set the seed for reproducibility. This will ensure that on very run,
the data is splitted in exactly the same way.

.. GENERATED FROM PYTHON SOURCE LINES 61-70

.. code-block:: Python


    TrainX, TestX, TrainY, TestY = TrainTestSplit(seed=313).split_by_random(
        data[input_features],
        data[output_features]
    )

    # printing the shape of training and test arrays
    print(TrainX.shape, TestX.shape, TrainY.shape, TestY.shape)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    (219, 6) (95, 6) (219, 1) (95, 1)


.. GENERATED FROM PYTHON SOURCE LINES 71-77

hyperparameters
================
Following hyperparameters have been optimized for the given dataset.

The hidden layers will consist of four fully connected layers and each
layer will consist of 32 neurrons.

.. GENERATED FROM PYTHON SOURCE LINES 77-87

.. code-block:: Python


    hidden_units = [32, 32, 32, 32]
    learning_rate = 0.0043944
    activation = "elu"
    train_size = len(TrainX)

    num_epochs = 1000
    batch_size = 40
    uncertainty_type = "aleoteric"


.. GENERATED FROM PYTHON SOURCE LINES 88-90

Model Building and training
============================

.. GENERATED FROM PYTHON SOURCE LINES 90-110

.. code-block:: Python

    model = BayesModel(
        model = {"layers": dict(hidden_units=hidden_units,
                                train_size=train_size,
                                activation=activation,
                                uncertainty_type=uncertainty_type,
                                )},
        batch_size=batch_size,
        epochs=num_epochs,
        lr=learning_rate,
        input_features=input_features,
        output_features=output_features,
        category= "DL",
        optimizer="RMSprop",
        loss = negative_loglikelihood,
        #wandb_config=dict(project="flowcam", entity="atherabbas", monitor="val_loss")
    )

    # resetting global seed for reproducibility
    model.reset_global_seed(313)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none


                building DL model for  
                regression problem using layers
    Model: "model"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #   
    =================================================================
     input_1 (InputLayer)        [(None, 6)]               0         
                                                                 
     batch_normalization (BatchN  (None, 6)                24        
     ormalization)                                                   
                                                                 
     dense (Dense)               (None, 32)                224       
                                                                 
     dense_1 (Dense)             (None, 32)                1056      
                                                                 
     dense_2 (Dense)             (None, 32)                1056      
                                                                 
     dense_3 (Dense)             (None, 32)                1056      
                                                                 
     dense_4 (Dense)             (None, 2)                 66        
                                                                 
     independent_normal (Indepen  ((None, 1),              0         
     dentNormal)                  (None, 1))                         
                                                                 
    =================================================================
    Total params: 3,482
    Trainable params: 3,470
    Non-trainable params: 12
    _________________________________________________________________
    dot plot of model could not be plotted due to You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


.. GENERATED FROM PYTHON SOURCE LINES 111-115

model training

We provide the test data (x,y pairs for test set) as ``validation_data``. This
data will be used for early stopping.

.. GENERATED FROM PYTHON SOURCE LINES 115-123

.. code-block:: Python


    h = model.fit(
        x=TrainX.values.astype(np.float32),
        y=TrainY.values.astype(np.float32),
        validation_data=(TestX.values.astype(np.float32), TestY.values.astype(np.float32)),
        verbose=0
    )


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_001.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_001.png, /auto_examples/images/sphx_glr_aleoteric_nn_001_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ********** Successfully loaded weights from weights_396_3.04718.hdf5 file **********


.. GENERATED FROM PYTHON SOURCE LINES 124-126

Since our model is probabalistic, we can see that it gives
different prediction even though we make prediction on same input data

.. GENERATED FROM PYTHON SOURCE LINES 126-130

.. code-block:: Python


    for i in range(5):
        print(model.predict(TestX[0:2], verbose=False).reshape(-1,))


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    [43.098385 38.313217]
    [ 4.6727104 54.98606  ]
    [26.041084 65.09413 ]
    [38.01809  39.603497]
    [17.294708 45.028538]


.. GENERATED FROM PYTHON SOURCE LINES 131-134

Prediction on Training data
=============================
If we call the model, the output is the learned distribution.

.. GENERATED FROM PYTHON SOURCE LINES 134-139

.. code-block:: Python


    train_dist = model._model(TrainX)

    print(type(train_dist))


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    <class 'tensorflow_probability.python.layers.internal.distribution_tensor_coercible._TensorCoercible'>


.. GENERATED FROM PYTHON SOURCE LINES 140-154

.. code-block:: Python

    train_mean = train_dist.mean().numpy().reshape(-1,)
    train_std = train_dist.stddev().numpy().reshape(-1, )

    pd.DataFrame(
        np.column_stack([train_mean, TrainY.values]),
        columns=['true', 'prediction']
    ).to_csv(os.path.join(model.path, 'train.csv'), index=False)

    metrics = RegressionMetrics(TrainY.values, train_mean)
    print(f"R2: {metrics.r2()}")
    print(f"R2 Score: {metrics.r2_score()}")
    print(f"RMSE Score: {metrics.rmse()}")
    print(f"MAE: {metrics.mae()}")


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    R2: 0.836968806030221
    R2 Score: 0.8152239921144592
    RMSE Score: 9.297222541448338
    MAE: 5.5407476271


.. GENERATED FROM PYTHON SOURCE LINES 155-183

.. code-block:: Python

    st, en = 0, 50  # draw CI for first 50 samples only

    _, ax = plt.subplots()
    ax.grid(visible=True, ls='--', color='lightgrey')
    ax = plot(train_mean[st:en], show=False, color="grey", label="$\mu$",
              ax_kws=dict(ylabel="Disinfection Efficiency (%)", xlabel="Samples",
                          ylabel_kws={"fontsize": 12, 'weight': 'bold'},
                          xlabel_kws={"fontsize": 12, 'weight': 'bold'}),
              ax=ax,
              )

    ax.fill_between(np.arange(len(train_std[st:en])),
                    train_mean[st:en] - (2* train_std[st:en]),
                    train_mean[st:en] + (2* train_std[st:en]),
                    color="cornflowerblue",
                    label="$\mu$ $\u00B1$ 2 $\sigma$"
                    )
    ax.fill_between(np.arange(len(train_std[st:en])),
                    train_mean[st:en] - train_std[st:en],
                    train_mean[st:en] + train_std[st:en],
                    color="royalblue",
                    label="$\mu$ $\u00B1$  $\sigma$"
                    )
    ax.grid(visible=True, ls='--', color='lightgrey')
    plt.legend()
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_002.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_002.png, /auto_examples/images/sphx_glr_aleoteric_nn_002_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 184-217

.. code-block:: Python


    _, ax = plt.subplots()
    ax.grid(visible=True, ls='--', color='lightgrey')
    ax = plot(TrainY.values, show=False, color="grey", label="True",
              ax_kws=dict(ylabel="Disinfection Efficiency (%)", xlabel="Samples"),
              ax=ax
              )

    ax.fill_between(np.arange(len(train_mean)),
                    train_mean - (3*train_std),
                    train_mean + (3*train_std),
                    color="lightsteelblue",
                    label="$\mu$ $\u00B1$ 3 $\sigma$",
                    )

    ax.fill_between(np.arange(len(train_std)),
                    train_mean - (2*train_std),
                    train_mean + (2*train_std),
                    color="cornflowerblue",
                    label="$\mu$ $\u00B1$ 2 $\sigma$"
                    )
    ax.fill_between(np.arange(len(train_std)),
                    train_mean - train_std,
                    train_mean + train_std,
                    color="royalblue",
                    label="$\mu$ $\u00B1$  $\sigma$"
                    )

    plt.legend()
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_003.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_003.png, /auto_examples/images/sphx_glr_aleoteric_nn_003_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 218-220

Prediction on Test data
==========================

.. GENERATED FROM PYTHON SOURCE LINES 220-236

.. code-block:: Python


    test_dist = model._model(TestX)
    test_mean = test_dist.mean().numpy().reshape(-1,)
    test_std = test_dist.stddev().numpy().reshape(-1,)

    pd.DataFrame(
        np.column_stack([test_mean, TestY.values]),
        columns=['true', 'prediction']
    ).to_csv(os.path.join(model.path, 'test.csv'), index=False)

    metrics = RegressionMetrics(TestY.values, test_mean)
    print(f"R2: {metrics.r2()}")
    print(f"R2 Score: {metrics.r2_score()}")
    print(f"RMSE Score: {metrics.rmse()}")
    print(f"MAE: {metrics.mae()}")


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    R2: 0.6806308826527441
    R2 Score: 0.6775488562664058
    RMSE Score: 10.285320921172987
    MAE: 6.21472232619559


.. GENERATED FROM PYTHON SOURCE LINES 237-269

.. code-block:: Python

    _, ax = plt.subplots()
    ax.grid(visible=True, ls='--', color='lightgrey')
    ax = plot(test_mean, show=False, color="grey", label="$\mu$",
              ax_kws=dict(ylabel="Disinfection Efficiency (%)", xlabel="Samples"),
              ax=ax,
              )

    ax.fill_between(np.arange(len(test_mean)),
                    test_mean - (3*test_std),
                    test_mean + (3*test_std),
                    color="lightsteelblue",
                    label="$\mu$ $\u00B1$ 3 $\sigma$",

                    )

    ax.fill_between(np.arange(len(test_mean)),
                    test_mean - (2*test_std),
                    test_mean + (2*test_std),
                    color="cornflowerblue",
                    label="$\mu$ $\u00B1$ 2 $\sigma$"
                    )
    ax.fill_between(np.arange(len(test_mean)),
                    test_mean - test_std,
                    test_mean + test_std,
                    color="royalblue",
                    label="$\mu$ $\u00B1$  $\sigma$"
                    )
    ax.grid(visible=True, ls='--', color='lightgrey')
    plt.legend()
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_004.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_004.png, /auto_examples/images/sphx_glr_aleoteric_nn_004_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 270-303

.. code-block:: Python

    _, ax = plt.subplots()
    ax.grid(visible=True, ls='--', color='lightgrey')
    ax = plot(TestY.values, show=False, color="grey", label="True",
              ax_kws=dict(ylabel="Disinfection Efficiency (%)", xlabel="Samples",
                          ylabel_kws={"fontsize": 12, 'weight': 'bold'},
                          xlabel_kws={"fontsize": 12, 'weight': 'bold'}),
              ax=ax
              )

    ax.fill_between(np.arange(len(test_mean)),
                    test_mean - (3*test_std),
                    test_mean + (3*test_std),
                    color="lightsteelblue",
                    label="$\mu$ $\u00B1$ 3 $\sigma$",
                    )

    ax.fill_between(np.arange(len(test_mean)),
                    test_mean - (2*test_std),
                    test_mean + (2*test_std),
                    color="cornflowerblue",
                    label="$\mu$ $\u00B1$ 2 $\sigma$"
                    )
    ax.fill_between(np.arange(len(test_mean)),
                    test_mean - test_std,
                    test_mean + test_std,
                    color="royalblue",
                    label="$\mu$ $\u00B1$  $\sigma$"
                    )
    ax.grid(visible=True, ls='--', color='lightgrey')
    plt.legend()
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_005.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_005.png, /auto_examples/images/sphx_glr_aleoteric_nn_005_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 304-309

.. code-block:: Python


    total_dist = model._model(data[input_features])
    total_mean = total_dist.mean().numpy().reshape(-1,)
    total_std = total_dist.stddev().numpy().reshape(-1,)


.. GENERATED FROM PYTHON SOURCE LINES 310-341

.. code-block:: Python

    _, ax = plt.subplots()
    ax.grid(visible=True, ls='--', color='lightgrey')
    ax = plot(total_mean, show=False, color="grey", label="$\mu$",
              ax_kws=dict(ylabel="Disinfection Efficiency (%)", xlabel="Samples"),
              ax=ax,
              )

    ax.fill_between(np.arange(len(total_mean)),
                    total_mean - (3 * total_std),
                    total_mean + (3 * total_std),
                    color="lightsteelblue",
                    label="$\mu$ $\u00B1$ 3 $\sigma$",
                    )

    ax.fill_between(np.arange(len(total_mean)),
                    total_mean - (2 * total_std),
                    total_mean + (2 * total_std),
                    color="cornflowerblue",
                    label="$\mu$ $\u00B1$ 2 $\sigma$"
                    )
    ax.fill_between(np.arange(len(total_mean)),
                    total_mean - total_std,
                    total_mean + total_std,
                    color="royalblue",
                    label="$\mu$ $\u00B1$  $\sigma$"
                    )
    ax.grid(visible=True, ls='--', color='lightgrey')
    plt.legend()
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_006.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_006.png, /auto_examples/images/sphx_glr_aleoteric_nn_006_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 342-354

.. code-block:: Python

    set_rcParams()

    residual_plot(
        TrainY.values,
        train_mean,
        TestY.values,
        test_mean,
    )
    if SAVE:
        plt.savefig("results/figures/residue_aleoteric_eff", dpi=600, bbox_inches="tight")
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_007.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_007.png, /auto_examples/images/sphx_glr_aleoteric_nn_007_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 355-367

.. code-block:: Python


    ax = regression_plot(
        TrainY.values, train_mean,
        TestY.values, test_mean,
        label="Disinfection Efficiency (%)"
    )
    ax.set_xlim([-2, 100])
    ax.set_ylim([-2, 100])
    if SAVE:
        plt.savefig("results/figures/reg_aleot_eff", dpi=600, bbox_inches="tight")
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_008.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_008.png, /auto_examples/images/sphx_glr_aleoteric_nn_008_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.


.. GENERATED FROM PYTHON SOURCE LINES 368-369

plot 95 % confidence interval

.. GENERATED FROM PYTHON SOURCE LINES 369-388

.. code-block:: Python


    total_upper = total_mean + (1.96 * total_std)
    total_lower = total_mean - (1.96 * total_std)

    _, ax = plt.subplots()
    ax.fill_between(np.arange(len(total_lower)),
                    total_upper, total_lower,
                    label="95% CI",
                    alpha=0.6, color='forestgreen')
    _ = plot(data[output_features].values,
             color="forestgreen", label="Prediction",
              ax=ax, show=False)
    ax.set_xlabel("Samples")
    ax.set_ylabel("Disinfection Efficiency (%)")
    if SAVE:
        plt.savefig("results/figures/ci_95_aleot_eff", dpi=600, bbox_inches="tight")
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_009.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_009.png, /auto_examples/images/sphx_glr_aleoteric_nn_009_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 389-390

plot the 90% confidence interval

.. GENERATED FROM PYTHON SOURCE LINES 390-409

.. code-block:: Python


    total_upper = total_mean + (1.645 * total_std)
    total_lower = total_mean - (1.645 * total_std)

    _, ax = plt.subplots()
    ax.fill_between(np.arange(len(total_lower)),
                    total_upper, total_lower,
                    label="90% CI",
                    alpha=0.6,
                    color=np.array([217, 140, 122])/255)
    _ = plot(data[output_features].values, color=np.array([180, 27, 40])/255,
             label="Prediction",
              ax=ax, show=False)
    ax.set_xlabel("Samples")
    ax.set_ylabel("Disinfection Efficiency (%)")
    if SAVE:
        plt.savefig("results/figures/ci_90_aleot_eff", dpi=600, bbox_inches="tight")
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_aleoteric_nn_010.png
   :alt: aleoteric nn
   :srcset: /auto_examples/images/sphx_glr_aleoteric_nn_010.png, /auto_examples/images/sphx_glr_aleoteric_nn_010_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 16.765 seconds)


.. _sphx_glr_download_auto_examples_aleoteric_nn.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: aleoteric_nn.ipynb <aleoteric_nn.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: aleoteric_nn.py <aleoteric_nn.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: aleoteric_nn.zip <aleoteric_nn.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_