import time
time_start_notebook = time.time()


%%capture
# capture will not print in notebook
import os
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    # install modules
    !pip install scikit-plot
    !pip install lrcurve
    !pip install watermark
    !pip install git+https://github.com/tensorflow/docs
    !pip install keras-tuner --upgrade
    !pip install tensorflow_addons

    ## print
    print('Environment: Google Colaboratory.')


# using GPU in colab


import sys
ENV_COLAB = 'google.colab' in sys.modules
if ENV_COLAB:
    !nvidia-smi


import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

import os
import sys

# random state
SEED = 0
RNG = np.random.RandomState(SEED)

# Jupyter notebook settings for pandas
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 100) # None for all the rows
pd.set_option('display.max_colwidth', 50)

import scipy
from scipy import stats

# scale and split
import sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics

# deep learning
import tensorflow as tf
from tensorflow import keras # Do not import keras, import from tensorflow

import tensorflow_addons as tfa # tfa.metrics.F1Score
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
from tensorboard.plugins.hparams import api as hp

# model evaluation
import scikitplot
from scikitplot import metrics as skmetrics
import lrcurve
from lrcurve import KerasLearningCurve

import keras_tuner as kt

# versions
%load_ext watermark
%watermark -a "Bhishan Poudel" -d -v -m
print()
%watermark -iv

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Bhishan Poudel 2021-08-11 

CPython 3.7.7
IPython 7.19.0

compiler   : Clang 4.0.1 (tags/RELEASE_401/final)
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit

tensorflow        2.5.0
scikitplot        0.3.7
tensorflow_addons 0.13.0
json              2.0.9
seaborn           0.10.1
sklearn           0.23.2
keras_tuner       1.0.3
autopep8          1.5.2
scipy             1.4.1
pandas            1.1.1
numpy             1.19.5
matplotlib        3.2.1
tensorflow.keras  2.5.0


def show_methods(method, ncols=3):
    x = [i for i in dir(method) if i[0]!='_' ]
    return pd.DataFrame(np.array_split(x,ncols)).T.fillna('')


def set_random_seed(seed):
    import os
    import random
    import numpy as np
    import tensorflow as tf

    os.environ['PYTHONHASHSEED']=str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)


def model_evaluation(model_name, desc, ytest, yprobs1d,df_eval=None,
                     show=True,col_sort='Profit',threshold=0.5):
    if df_eval is None:
        df_eval = pd.DataFrame({'Model': [],
                        'Description':[],
                        'Accuracy':[],
                        'Precision':[],
                        'Recall':[],
                        'F1':[],
                        'AUC':[],
                        'AUCPR':[],
                        'Profit':[],
                    })
    
    # squeeze yprobs
    yprobs1d = np.array(yprobs1d).squeeze()

    # make sure yprobs is floating (8,32,64 or any)
    if not isinstance(yprobs1d[0],np.floating):
        print(f"Here yprobs1d[0] = {yprobs1d[0]}.\
        Are you sure this is y_prob and NOT the y_pred?")

    # assert length
    assert len(ytest) == len(yprobs1d), "ytest and yprobs1d must of same length."

    # prediction from probs
    ypreds = (yprobs.flatten()>threshold).astype(np.int8)

    # model evaluation
    average = 'binary'
    prec,rec,thr = sklearn.metrics.precision_recall_curve(ytest,yprobs1d)
    auc_pr = sklearn.metrics.auc(rec,prec)
    
    # profit
    cm = sklearn.metrics.confusion_matrix(ytest,ypreds)
    tn, fp, fn, tp = cm.ravel() # first element tn is ignored, last tp is imp.
    profit = 400*tp - 200*fn - 100*fp

    row_eval = [model_name,desc, 
                sklearn.metrics.accuracy_score(ytest, ypreds),
                sklearn.metrics.precision_score(ytest, ypreds, average=average),
                sklearn.metrics.recall_score(ytest, ypreds, average=average),
                sklearn.metrics.f1_score(ytest, ypreds, average=average),
                sklearn.metrics.roc_auc_score(ytest, yprobs1d),
                auc_pr,
                profit,
                ]

    df_eval.loc[len(df_eval)] = row_eval
    df_eval = df_eval.drop_duplicates()
    df_eval = df_eval.sort_values(col_sort)
    df_eval['Profit'] = df_eval['Profit'].astype(int)

    if show:
        print("Confusion Matrix:")
        print(cm)

        display(df_eval.style
                .background_gradient(subset=[col_sort])
                .format({'Profit': "{:,}"})
               )

    return df_eval


ifile = "https://github.com/bhishanpdl/Datasets/blob/master/Projects/Fraud_detection/raw/creditcard.csv.zip?raw=true"
# ifile = '../data/raw/creditcard.csv.zip'

df_raw = pd.read_csv(ifile,compression='zip')
print(df_raw.shape)
df_raw.head()

(284807, 31)


target = 'Class'
display(df_raw[target].value_counts())
sns.countplot(x=df_raw[target])

0    284315
1       492
Name: Class, dtype: int64

<matplotlib.axes._subplots.AxesSubplot at 0x7fcd44f23c50>


neg, pos = np.bincount(df_raw['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Examples:
    Total: 284807
    Positive: 492 (0.17% of total)


cols_drop = ['Time']

df = df_raw.drop(cols_drop,axis=1)
df.shape

(284807, 30)


eps=0.001 # 0 => 0.1¢
df['Ammount'] = np.log(df.pop('Amount')+eps)


from sklearn.model_selection import train_test_split

target = 'Class'

df_Xtrain_orig,df_Xtest,ser_ytrain_orig,ser_ytest = train_test_split(df.drop([target],axis=1),
                                             df[target],
                                             test_size=0.2,
                                             stratify=df[target],
                                             random_state=SEED)

df_Xtrain,df_Xvalid,ser_ytrain,ser_yvalid = train_test_split(df_Xtrain_orig,
                                             ser_ytrain_orig,
                                             test_size=0.2,
                                             stratify=ser_ytrain_orig,
                                             random_state=SEED)

ytrain = np.array(ser_ytrain)
yvalid = np.array(ser_yvalid)
ytest = np.array(ser_ytest)

df.shape, df_Xtrain.shape, ser_ytrain.shape

((284807, 30), (182276, 29), (182276,))


from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(df_Xtrain)

Xtrain = scaler.transform(df_Xtrain)
Xvalid = scaler.transform(df_Xvalid)
Xtest  = scaler.transform(df_Xtest)

# clip the values
Xtrain = np.clip(Xtrain, -5, 5)
Xvalid = np.clip(Xvalid, -5, 5)
Xtest = np.clip(Xtest, -5, 5)


neg, pos = np.bincount(df_raw['Class'])
total = neg + pos

bias_initializer = np.log([pos/neg])
bias_initializer

array([-6.35935934])


weight_for_0 = (1 / neg)*(total)/2.0 
weight_for_1 = (1 / pos)*(total)/2.0

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

Weight for class 0: 0.50
Weight for class 1: 289.44


n_feats = Xtrain.shape[-1]
class_weight = {0: weight_for_0, 1: weight_for_1}

#============================================================
PARAMS_MODEL = {
    # layer 1
    'L1_units': 16,
    'L1_act': 'elu',
    'L1_dropout': 0.5,

    # optimizer
    'adam_lr': 1e-3,
}

#============================================================
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

#============================================================
PARAMS_FIT = {
    'epochs': 100,
    'batch_size': 2048,
    'class_weight0': class_weight[0],
    'class_weight1': class_weight[1],
    'patience': 10,
    'shuffle': True,
    }

#============================================================
# callbacks
cb_early = tf.keras.callbacks.EarlyStopping(
    monitor='val_prc', 
    verbose=1,
    patience=PARAMS_FIT['patience'],
    mode='max',
    restore_best_weights=True)

#cb_checkpt = keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.h5")
cb_lr = lrcurve.KerasLearningCurve()
cb_dots = tfdocs.modeling.EpochDots()
callbacks = [cb_early, cb_lr,cb_dots]


def make_model(metrics=METRICS, bias_initializer=None,n_feats=n_feats):
    if bias_initializer is not None:
        bias_initializer = tf.keras.initializers.Constant(bias_initializer)

    model = keras.Sequential([
        # layer 1
        keras.layers.Dense(PARAMS_MODEL['L1_units'],
                           activation=PARAMS_MODEL['L1_act'],
                           kernel_regularizer=tf.keras.regularizers.l2(0.0001),
                           input_shape=(n_feats,)),

        keras.layers.Dropout(PARAMS_MODEL['L1_dropout']),

        # last layer is dense 1 with activation sigmoid
        keras.layers.Dense(1, activation='sigmoid',
                         bias_initializer=bias_initializer),
    ])

    model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=PARAMS_MODEL['adam_lr']),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=metrics)

    return model

model = make_model()
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_2 (Dense)              (None, 16)                480       
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
=================================================================
Total params: 497
Trainable params: 497
Non-trainable params: 0
_________________________________________________________________


type(model)

tensorflow.python.keras.engine.sequential.Sequential


from tensorflow.keras.utils import plot_model

# save keras sequential model
plot_model(model,'model_medium.png')

# For functional model
# tf.keras.utils.plot_model(model, 'model_medium.png')


%%time
set_random_seed(SEED)
model = make_model(bias_initializer=bias_initializer)

history = model.fit(
    Xtrain,
    ytrain,
    batch_size=PARAMS_FIT['batch_size'],
    epochs=PARAMS_FIT['epochs'],
    callbacks=callbacks,
    validation_data=(Xvalid, yvalid),
    verbose=0,
    class_weight=class_weight
    )

Epoch: 0, accuracy:0.9956,  auc:0.8024,  fn:191.0000,  fp:604.0000,  loss:1.5321,  prc:0.2119,  precision:0.1703,  recall:0.3937,  tn:181357.0000,  tp:124.0000,  val_accuracy:0.9989,  val_auc:0.9520,  val_fn:19.0000,  val_fp:29.0000,  val_loss:0.0130,  val_prc:0.6436,  val_precision:0.6742,  val_recall:0.7595,  val_tn:45461.0000,  val_tp:60.0000,  
.............Restoring model weights from the end of the best epoch.
.Epoch 00014: early stopping
CPU times: user 11.2 s, sys: 1.4 s, total: 12.6 s
Wall time: 8.69 s


matplotlib.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


import tensorflow_docs.plots


show_methods(tensorflow_docs.plots)


show_methods(history)


# tfdocs.plots.HistoryPlotter?


df_history = pd.DataFrame(history.history)
df_history['epoch'] = history.epoch # we need to add it separately

print(df_history.shape)
df_history.head(2).append(df_history.tail(2))

(14, 21)


history.history.keys()

dict_keys(['loss', 'tp', 'fp', 'tn', 'fn', 'accuracy', 'precision', 'recall', 'auc', 'prc', 'val_loss', 'val_tp', 'val_fp', 'val_tn', 'val_fn', 'val_accuracy', 'val_precision', 'val_recall', 'val_auc', 'val_prc'])


dict_history = {'medium': history}
metrics =  ['loss', 'auc', 'precision', 'recall']


# plotter can plot only one metric
plotter = tfdocs.plots.HistoryPlotter(metric = 'loss', smoothing_std=10)
plotter.plot(dict_history)


def plot_metrics(history):
    metrics =  ['loss', 'auc', 'precision', 'recall']
    for n, metric in enumerate(metrics):
        name = metric.replace("_"," ").capitalize()
        plt.subplot(2,2,n+1)
        plt.plot(history.epoch,  history.history[metric], color=colors[0], label='Train')
        plt.plot(history.epoch, history.history['val_'+metric],
                    color=colors[0], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        if metric == 'loss':
            plt.ylim([0, plt.ylim()[1]])
        elif metric == 'auc':
            plt.ylim([0.8,1])
        else:
            plt.ylim([0,1])

        plt.legend()


plot_metrics(history)


from sklearn import metrics as skmetrics


yprobs = model.predict(Xtest) # in keras, predict_proba is deprecated.
yprobs[:5]

array([[0.00280005],
       [0.00117731],
       [0.01025069],
       [0.00233176],
       [0.00363791]], dtype=float32)


yprobs.shape, yprobs.squeeze().shape

((56962, 1), (56962,))


yprobs1d = yprobs.flatten()
ypreds = (yprobs1d>0.5).astype(np.int8)
ypreds[:5]

array([0, 0, 0, 0, 0], dtype=int8)


skmetrics.confusion_matrix(ytest, ypreds)

array([[56538,   326],
       [   18,    80]])


from scikitplot import metrics as skpmetrics


skpmetrics.plot_confusion_matrix(ytest,ypreds)

<matplotlib.axes._subplots.AxesSubplot at 0x7fcd60815350>


def plot_cm(labels, predictions, p=0.5):
    cm = sklearn.metrics.confusion_matrix(labels, predictions > p)
    plt.figure(figsize=(5,5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title('Confusion matrix @{:.2f}'.format(p))
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

    print('Legitimate Transactions Detected (True Negatives): ', cm[0][0])
    print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1])
    print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0])
    print('Fraudulent Transactions Detected (True Positives): ', cm[1][1])
    print('Total Fraudulent Transactions: ', np.sum(cm[1]))


plot_cm(ytest,ypreds)

Legitimate Transactions Detected (True Negatives):  56538
Legitimate Transactions Incorrectly Detected (False Positives):  326
Fraudulent Transactions Missed (False Negatives):  18
Fraudulent Transactions Detected (True Positives):  80
Total Fraudulent Transactions:  98


desc = "medium model"
yprobs = model.predict(Xtest) # in keras, predict_proba is deprecated.

yprobs1d = yprobs.flatten()
ypreds = (yprobs1d>0.5).astype(np.int8)

yprobs1d[:5]

array([0.00280005, 0.00117731, 0.01025069, 0.00233176, 0.00363791],
      dtype=float32)


type(yprobs1d[0])

numpy.float32


isinstance(yprobs1d[0],float)

False


isinstance(yprobs1d[0],np.floating)

True


df_eval = model_evaluation("keras", desc, ytest, yprobs1d,df_eval=None)

Confusion Matrix:
[[56538   326]
 [   18    80]]


from tensorboard.plugins.hparams import api as hp


%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# Clear any logs from previous runs
!rm -rf ./logs/


HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

# METRIC_ACCURACY = 'accuracy'
METRIC_NAME = 'auc' # upper case AUC gives error later.
METRIC = 'AUC'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC, display_name=METRIC_NAME)],
  )


cb_early = tf.keras.callbacks.EarlyStopping(
    monitor='val_'+METRIC_NAME, 
    verbose=1,
    patience=PARAMS_FIT['patience'],
    mode='max',
    restore_best_weights=True)

cb_dots = tfdocs.modeling.EpochDots()
callbacks = [cb_early, cb_dots]


def train_test_model(hparams):
    model = tf.keras.models.Sequential([

    # first layer
    tf.keras.layers.Dense(hparams[HP_NUM_UNITS],
                          activation=tf.nn.relu,
                          input_shape=(n_feats,)),
    tf.keras.layers.Dropout(hparams[HP_DROPOUT]),

    # last layer
    tf.keras.layers.Dense(1, activation='sigmoid'),
    ])
    model.compile(
      optimizer=hparams[HP_OPTIMIZER],
      loss='binary_crossentropy',
      metrics=[METRIC],
    )

    model.fit(Xtrain,
            ytrain,
            batch_size=PARAMS_FIT['batch_size'],
            epochs=PARAMS_FIT['epochs'],
            callbacks=callbacks,
            validation_data=(Xvalid, yvalid),
            class_weight=class_weight,
            verbose=0,
            )
    _, mymetric = model.evaluate(Xvalid, yvalid)
    return mymetric


show_methods(tf.summary)


hparams = {
    HP_NUM_UNITS: HP_NUM_UNITS.domain.values[0],
    HP_DROPOUT: HP_DROPOUT.domain.min_value,
    HP_OPTIMIZER: HP_OPTIMIZER.domain.values[0],
}

hp.hparams(hparams)  # record the values used in this trial
mymetric = train_test_model(hparams)
tf.summary.scalar(METRIC, mymetric, step=1)

Epoch: 0, auc:0.7376,  loss:0.6258,  val_auc:0.9782,  val_loss:0.5502,  
............Restoring model weights from the end of the best epoch.
.Epoch 00013: early stopping
1425/1425 [==============================] - 1s 720us/step - loss: 0.3081 - auc: 0.9893

<tf.Tensor: shape=(), dtype=bool, numpy=False>


def run(run_dir, hparams):
    mymetric = None
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        mymetric = train_test_model(hparams)
        tf.summary.scalar(METRIC, mymetric, step=1)
    return mymetric


session_num = 0
lst_series = []

for num_units in HP_NUM_UNITS.domain.values:
    for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
        for optimizer in HP_OPTIMIZER.domain.values:
            hparams = {
              HP_NUM_UNITS: num_units,
              HP_DROPOUT: dropout_rate,
              HP_OPTIMIZER: optimizer,
            }
            run_name = "run-%d" % session_num
            print('\n--- Starting trial: %s' % run_name)
            dict_params = {h.name: hparams[h] for h in hparams}
            ser = pd.Series(dict_params)

            mymetric = run('logs/hparam_tuning/' + run_name, hparams)
            ser[METRIC_NAME] = mymetric
            lst_series.append(ser)
            session_num += 1

# epoch 0 auc: are values before fitting
# loss: auc: are values after fitting.

--- Starting trial: run-0

Epoch: 0, auc:0.8588,  loss:0.4884,  val_auc:0.9912,  val_loss:0.5324,  
............Restoring model weights from the end of the best epoch.
.Epoch 00013: early stopping
1425/1425 [==============================] - 1s 754us/step - loss: 0.3068 - auc: 0.9972 0s - loss: 0.3064 -

--- Starting trial: run-1

Epoch: 0, auc:0.8652,  loss:0.5198,  val_auc:0.9674,  val_loss:0.6584,  
...................................................Restoring model weights from the end of the best epoch.
.Epoch 00052: early stopping
1425/1425 [==============================] - 1s 776us/step - loss: 0.1086 - auc: 0.9879

--- Starting trial: run-2

Epoch: 0, auc:0.6288,  loss:0.7768,  val_auc:0.9292,  val_loss:0.5340,  
....................Restoring model weights from the end of the best epoch.
.Epoch 00021: early stopping
1425/1425 [==============================] - 1s 785us/step - loss: 0.1365 - auc: 0.9959

--- Starting trial: run-3

Epoch: 0, auc:0.8563,  loss:0.5908,  val_auc:0.9561,  val_loss:0.7649,  
....................................................Restoring model weights from the end of the best epoch.
.Epoch 00053: early stopping
1425/1425 [==============================] - 1s 797us/step - loss: 0.1121 - auc: 0.9869

--- Starting trial: run-4

Epoch: 0, auc:0.7864,  loss:0.5691,  val_auc:0.9792,  val_loss:0.4454,  
.................Restoring model weights from the end of the best epoch.
.Epoch 00018: early stopping
1425/1425 [==============================] - 1s 845us/step - loss: 0.1165 - auc: 0.9938

--- Starting trial: run-5

Epoch: 0, auc:0.9033,  loss:0.4280,  val_auc:0.9810,  val_loss:0.5153,  
.................................Restoring model weights from the end of the best epoch.
.Epoch 00034: early stopping
1425/1425 [==============================] - 2s 1ms/step - loss: 0.1280 - auc: 0.9903

--- Starting trial: run-6

Epoch: 0, auc:0.8931,  loss:0.4900,  val_auc:0.9886,  val_loss:0.5400,  
...............Restoring model weights from the end of the best epoch.
.Epoch 00016: early stopping
1425/1425 [==============================] - 1s 825us/step - loss: 0.1465 - auc: 0.9951

--- Starting trial: run-7

Epoch: 0, auc:0.8325,  loss:0.5484,  val_auc:0.9778,  val_loss:0.6555,  
................Restoring model weights from the end of the best epoch.
.Epoch 00017: early stopping
1425/1425 [==============================] - 1s 933us/step - loss: 0.2175 - auc: 0.9923


df_hpo = pd.concat(lst_series,axis=1).T # hyperparameter optimization dataframe

df_hpo['num_units'] = df_hpo['num_units'].astype(int)
df_hpo[['dropout','auc']] = df_hpo[['dropout','auc']].astype(float)
df_hpo.sort_values('auc').style.background_gradient(subset=['auc'])


%tensorboard --logdir logs/hparam_tuning

ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.


# predict using best model from hyperparameter tuning


params_best = df_hpo.nlargest(1,'auc').iloc[0].to_dict()
params_best = {i:v for i,v in params_best.items() if i !='auc'}

params_best

{'num_units': 16, 'dropout': 0.1, 'optimizer': 'adam'}


model = tf.keras.models.Sequential([
                                 
    # first layer
    tf.keras.layers.Dense(params_best['num_units'],
                          activation=tf.nn.relu,
                          input_shape=(n_feats,)),
    tf.keras.layers.Dropout(params_best['dropout']),

    # last layer
    tf.keras.layers.Dense(1, activation='sigmoid'),
  ])

# compile the model
model.compile(
      optimizer=params_best['optimizer'],
      loss='binary_crossentropy',
      metrics=[METRIC],
  )

# fit the model
history = model.fit(Xtrain,
            ytrain,
            batch_size=PARAMS_FIT['batch_size'],
            epochs=PARAMS_FIT['epochs'],
            callbacks=callbacks,
            validation_data=(Xvalid, yvalid),
            class_weight=class_weight,
            verbose=0,
            )

# prediction
yprobs = model.predict(Xtest)
ypreds = (yprobs.flatten()>0.5).astype(np.int8)

df_eval = model_evaluation("keras hparams", desc, ytest, ypreds,df_eval=df_eval)

Epoch: 0, auc:0.8512,  loss:0.4795,  val_auc:0.9589,  val_loss:0.4121,  
..................Restoring model weights from the end of the best epoch.
.Epoch 00019: early stopping
Here yprobs1d[0] = 0.        Are you sure this is y_prob and NOT the y_pred?
Confusion Matrix:
[[55650  1214]
 [   11    87]]


df_eval


import keras_tuner as kt

show_methods(kt)


from keras_tuner import HyperModel, RandomSearch, Hyperband, BayesianOptimization


METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]


class BinaryClassificationHyperModel(HyperModel):
    def __init__(self, input_shape,bias_initializer):
        self.input_shape = input_shape
        self.bias_initializer = tf.keras.initializers.Constant(bias_initializer)

    def build(self, hp):
        # parameters
        hp_units1 = hp.Int('units1',16,48,16,default=16)
        hp_act1 = hp.Choice(
            'dense_activation',
            values=['relu','elu'],
            default='relu',
            )
        hp_dropout1 = hp.Float(
            'dropout',
            min_value=0.2,
            max_value=0.7,
            default=0.5,
            step=0.1)

        hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
        hp_optimizers = hp.Choice('optimizer',values=['adam','sgd'])

        # define the model
        model = tf.keras.Sequential()

        # first layer
        model.add(
            tf.keras.layers.Dense(
                units=hp_units1,
                activation=hp_act1,
                kernel_initializer='zeros',
                bias_initializer='zeros',
                input_shape=input_shape
            )
        )
        model.add(tf.keras.layers.Dropout(hp_dropout1))


        # last layer
        model.add(tf.keras.layers.Dense(1, activation='sigmoid',
                               bias_initializer=self.bias_initializer)
        )

        model.compile(
            optimizer='adam',loss='binary_crossentropy',metrics=METRICS
        )

        return model

input_shape = (Xtrain.shape[1],)
bias_initializer = bias_initializer # np array of one element
hypermodel = BinaryClassificationHyperModel(input_shape,bias_initializer)


# kt.RandomSearch?


tuner_rs = kt.RandomSearch(
            hypermodel,
            objective=kt.Objective("val_prc", direction="max"),
            seed=SEED,
            max_trials=4,
            executions_per_trial=1,
            overwrite=True
        )


show_methods(tuner_rs)


# tuner_rs.search?
# tuner_rs.search(*fit_args, **fit_kwargs)


%%time
tuner_rs.search(Xtrain, ytrain,
                epochs=2,
                validation_split=0.2,
                verbose=1,
                )

Trial 4 Complete [00h 00m 22s]
val_prc: 0.7927567958831787

Best val_prc So Far: 0.8070851564407349
Total elapsed time: 00h 01m 20s
INFO:tensorflow:Oracle triggered exit
CPU times: user 1min 36s, sys: 9.23 s, total: 1min 45s
Wall time: 1min 20s


best_model = tuner_rs.get_best_models(num_models=1)[0]

# prediction
yprobs = best_model.predict(Xtest) # in keras predict gives probs not preds
yprobs1d = yprobs.flatten()
ypreds = (yprobs.flatten()>0.5).astype(np.int8)

df_eval = model_evaluation("keras-tuner randomsearch", desc, ytest, yprobs1d,df_eval=df_eval)

Confusion Matrix:
[[56852    12]
 [   28    70]]


tuner_hb = kt.Hyperband(
            hypermodel,
            max_epochs=2,
            objective=kt.Objective("val_auc", direction="max"),
            seed=SEED,
            executions_per_trial=1,
            hyperband_iterations=2,
            # factor=3,
            # directory='my_dir',
            # project_name='hyperband'
            overwrite=True,
        )

cb_early = tf.keras.callbacks.EarlyStopping(
    monitor='val_prc', 
    verbose=1,
    patience=5,
    mode='max',
    restore_best_weights=True)

tuner_hb.search(Xtrain, ytrain,
                epochs=10,
                validation_split=0.2,
                verbose=1,
                callbacks = [cb_early] # patience must be smaller than max_epochs
                )

best_model = tuner_hb.get_best_models(num_models=1)[0]

Trial 4 Complete [00h 00m 16s]
val_auc: 0.9214053153991699

Best val_auc So Far: 0.9292096495628357
Total elapsed time: 00h 01m 23s
INFO:tensorflow:Oracle triggered exit


best_model = tuner_hb.get_best_models(num_models=1)[0]

# prediction
yprobs = best_model.predict(Xtest)
yprobs1d = yprobs.flatten()
ypreds = (yprobs.flatten()>0.5).astype(np.int8)

df_eval = model_evaluation("keras-tuner hyperband", desc, ytest, yprobs1d,df_eval=df_eval)

WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate
WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate
WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.
Confusion Matrix:
[[56852    12]
 [   32    66]]


# Get the optimal hyperparameters
best_hps=tuner_hb.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters and train it on the data for N epochs
model = tuner_hb.hypermodel.build(best_hps)

history = model.fit(Xtrain, ytrain, epochs=10, validation_split=0.2)

val_auc_per_epoch = history.history['val_prc']

best_epoch = val_auc_per_epoch.index(max(val_auc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/10
4557/4557 [==============================] - 10s 2ms/step - loss: 0.0040 - tp: 212.0000 - fp: 37.0000 - tn: 181924.0000 - fn: 103.0000 - accuracy: 0.9992 - precision: 0.8514 - recall: 0.6730 - auc: 0.9165 - prc: 0.7342 - val_loss: 0.0033 - val_tp: 48.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 16.0000 - val_accuracy: 0.9993 - val_precision: 0.8571 - val_recall: 0.7500 - val_auc: 0.9214 - val_prc: 0.7891
Epoch 2/10
4557/4557 [==============================] - 6s 1ms/step - loss: 0.0035 - tp: 179.0000 - fp: 24.0000 - tn: 145545.0000 - fn: 72.0000 - accuracy: 0.9993 - precision: 0.8818 - recall: 0.7131 - auc: 0.9294 - prc: 0.7572 - val_loss: 0.0035 - val_tp: 52.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 12.0000 - val_accuracy: 0.9995 - val_precision: 0.8667 - val_recall: 0.8125 - val_auc: 0.9211 - val_prc: 0.7950
Epoch 3/10
4557/4557 [==============================] - 9s 2ms/step - loss: 0.0035 - tp: 180.0000 - fp: 25.0000 - tn: 145544.0000 - fn: 71.0000 - accuracy: 0.9993 - precision: 0.8780 - recall: 0.7171 - auc: 0.9276 - prc: 0.7463 - val_loss: 0.0033 - val_tp: 52.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 12.0000 - val_accuracy: 0.9995 - val_precision: 0.8667 - val_recall: 0.8125 - val_auc: 0.9209 - val_prc: 0.8127
Epoch 4/10
4557/4557 [==============================] - 8s 2ms/step - loss: 0.0033 - tp: 185.0000 - fp: 24.0000 - tn: 145545.0000 - fn: 66.0000 - accuracy: 0.9994 - precision: 0.8852 - recall: 0.7371 - auc: 0.9294 - prc: 0.7706 - val_loss: 0.0033 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9211 - val_prc: 0.7992
Epoch 5/10
4557/4557 [==============================] - 7s 2ms/step - loss: 0.0033 - tp: 181.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 70.0000 - accuracy: 0.9994 - precision: 0.8916 - recall: 0.7211 - auc: 0.9295 - prc: 0.7591 - val_loss: 0.0033 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9291 - val_prc: 0.8079
Epoch 6/10
4557/4557 [==============================] - 8s 2ms/step - loss: 0.0032 - tp: 190.0000 - fp: 24.0000 - tn: 145545.0000 - fn: 61.0000 - accuracy: 0.9994 - precision: 0.8879 - recall: 0.7570 - auc: 0.9275 - prc: 0.7732 - val_loss: 0.0032 - val_tp: 45.0000 - val_fp: 4.0000 - val_tn: 36388.0000 - val_fn: 19.0000 - val_accuracy: 0.9994 - val_precision: 0.9184 - val_recall: 0.7031 - val_auc: 0.9138 - val_prc: 0.8036
Epoch 7/10
4557/4557 [==============================] - 9s 2ms/step - loss: 0.0031 - tp: 185.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 66.0000 - accuracy: 0.9994 - precision: 0.8894 - recall: 0.7371 - auc: 0.9334 - prc: 0.7793 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9213 - val_prc: 0.8040
Epoch 8/10
4557/4557 [==============================] - 9s 2ms/step - loss: 0.0032 - tp: 186.0000 - fp: 20.0000 - tn: 145549.0000 - fn: 65.0000 - accuracy: 0.9994 - precision: 0.9029 - recall: 0.7410 - auc: 0.9294 - prc: 0.7715 - val_loss: 0.0031 - val_tp: 50.0000 - val_fp: 7.0000 - val_tn: 36385.0000 - val_fn: 14.0000 - val_accuracy: 0.9994 - val_precision: 0.8772 - val_recall: 0.7812 - val_auc: 0.9214 - val_prc: 0.8162
Epoch 9/10
4557/4557 [==============================] - 12s 3ms/step - loss: 0.0032 - tp: 182.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 69.0000 - accuracy: 0.9994 - precision: 0.8922 - recall: 0.7251 - auc: 0.9315 - prc: 0.7823 - val_loss: 0.0032 - val_tp: 52.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 12.0000 - val_accuracy: 0.9994 - val_precision: 0.8525 - val_recall: 0.8125 - val_auc: 0.9208 - val_prc: 0.8197
Epoch 10/10
4557/4557 [==============================] - 10s 2ms/step - loss: 0.0031 - tp: 181.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 70.0000 - accuracy: 0.9994 - precision: 0.8873 - recall: 0.7211 - auc: 0.9354 - prc: 0.7959 - val_loss: 0.0031 - val_tp: 46.0000 - val_fp: 4.0000 - val_tn: 36388.0000 - val_fn: 18.0000 - val_accuracy: 0.9994 - val_precision: 0.9200 - val_recall: 0.7188 - val_auc: 0.9214 - val_prc: 0.8183
Best epoch: 9


# Re-instantiate the hypermodel and train it with the optimal number of epochs from above.
hypermodel = tuner_hb.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(Xtrain, ytrain, epochs=best_epoch, validation_split=0.2)

Epoch 1/9
4557/4557 [==============================] - 10s 2ms/step - loss: 0.0040 - tp: 211.0000 - fp: 32.0000 - tn: 181929.0000 - fn: 104.0000 - accuracy: 0.9993 - precision: 0.8683 - recall: 0.6698 - auc: 0.9166 - prc: 0.7427 - val_loss: 0.0034 - val_tp: 50.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 14.0000 - val_accuracy: 0.9994 - val_precision: 0.8621 - val_recall: 0.7812 - val_auc: 0.9292 - val_prc: 0.7983
Epoch 2/9
4557/4557 [==============================] - 7s 2ms/step - loss: 0.0033 - tp: 183.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 68.0000 - accuracy: 0.9994 - precision: 0.8927 - recall: 0.7291 - auc: 0.9314 - prc: 0.7680 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9211 - val_prc: 0.7982
Epoch 3/9
4557/4557 [==============================] - 7s 1ms/step - loss: 0.0034 - tp: 182.0000 - fp: 30.0000 - tn: 145539.0000 - fn: 69.0000 - accuracy: 0.9993 - precision: 0.8585 - recall: 0.7251 - auc: 0.9275 - prc: 0.7501 - val_loss: 0.0033 - val_tp: 52.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 12.0000 - val_accuracy: 0.9995 - val_precision: 0.8667 - val_recall: 0.8125 - val_auc: 0.9208 - val_prc: 0.8121
Epoch 4/9
4557/4557 [==============================] - 7s 2ms/step - loss: 0.0033 - tp: 179.0000 - fp: 26.0000 - tn: 145543.0000 - fn: 72.0000 - accuracy: 0.9993 - precision: 0.8732 - recall: 0.7131 - auc: 0.9274 - prc: 0.7670 - val_loss: 0.0032 - val_tp: 51.0000 - val_fp: 7.0000 - val_tn: 36385.0000 - val_fn: 13.0000 - val_accuracy: 0.9995 - val_precision: 0.8793 - val_recall: 0.7969 - val_auc: 0.9210 - val_prc: 0.8033
Epoch 5/9
4557/4557 [==============================] - 7s 1ms/step - loss: 0.0033 - tp: 185.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 66.0000 - accuracy: 0.9994 - precision: 0.8937 - recall: 0.7371 - auc: 0.9314 - prc: 0.7612 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8500 - val_recall: 0.7969 - val_auc: 0.9212 - val_prc: 0.7987
Epoch 6/9
4557/4557 [==============================] - 9s 2ms/step - loss: 0.0032 - tp: 188.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 63.0000 - accuracy: 0.9994 - precision: 0.8952 - recall: 0.7490 - auc: 0.9294 - prc: 0.7787 - val_loss: 0.0032 - val_tp: 46.0000 - val_fp: 4.0000 - val_tn: 36388.0000 - val_fn: 18.0000 - val_accuracy: 0.9994 - val_precision: 0.9200 - val_recall: 0.7188 - val_auc: 0.9138 - val_prc: 0.801701.0000 - fn: 19.0000 - accuracy: 0.9994 - precision: 0.9167 - recall: 0.7765 - auc: 0.9402 - pr - ETA: 4s -
Epoch 7/9
4557/4557 [==============================] - 10s 2ms/step - loss: 0.0032 - tp: 179.0000 - fp: 25.0000 - tn: 145544.0000 - fn: 72.0000 - accuracy: 0.9993 - precision: 0.8775 - recall: 0.7131 - auc: 0.9414 - prc: 0.7736 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8500 - val_recall: 0.7969 - val_auc: 0.9212 - val_prc: 0.8005
Epoch 8/9
4557/4557 [==============================] - 8s 2ms/step - loss: 0.0031 - tp: 183.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 68.0000 - accuracy: 0.9994 - precision: 0.8883 - recall: 0.7291 - auc: 0.9394 - prc: 0.7852 - val_loss: 0.0031 - val_tp: 51.0000 - val_fp: 7.0000 - val_tn: 36385.0000 - val_fn: 13.0000 - val_accuracy: 0.9995 - val_precision: 0.8793 - val_recall: 0.7969 - val_auc: 0.9214 - val_prc: 0.8150
Epoch 9/9
4557/4557 [==============================] - 9s 2ms/step - loss: 0.0031 - tp: 182.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 69.0000 - accuracy: 0.9994 - precision: 0.8878 - recall: 0.7251 - auc: 0.9395 - prc: 0.7932 - val_loss: 0.0033 - val_tp: 51.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8500 - val_recall: 0.7969 - val_auc: 0.9209 - val_prc: 0.81372.0 - ETA: 0s - loss: 0.0029 - tp: 162.0000 - fp: 20.0000 - tn: 129105.0000 - fn: 57.0000 - accuracy: 0.9994 - precision: 0.8901 - recall: 0.7397 - a - ETA: 0s - loss: 0.0029 - tp: 165.0000 - fp: 20.0000 - tn: 131757.0000 - fn: 58.0000 - accuracy: 0.9994 - precision: 0.8919 - recall: 0.7399 - auc: 0.9410 - prc: 0.81 - ETA: 0s - loss: 0.0029 - tp: 165.0000 - fp: 20.0000 - tn: 131789.0000 - fn: 58.0000 - accuracy: 0.9994 - precision: 0.8919 - recall

<tensorflow.python.keras.callbacks.History at 0x7fcd64de65d0>


# Model evaluation
eval_result = hypermodel.evaluate(Xtest, ytest)
print(eval_result)

1781/1781 [==============================] - 3s 1ms/step - loss: 0.0034 - tp: 73.0000 - fp: 13.0000 - tn: 56851.0000 - fn: 25.0000 - accuracy: 0.9993 - precision: 0.8488 - recall: 0.7449 - auc: 0.9327 - prc: 0.7961
[0.0033857824746519327, 73.0, 13.0, 56851.0, 25.0, 0.9993329048156738, 0.8488371968269348, 0.7448979616165161, 0.9326589107513428, 0.7960590720176697]


time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))

Time taken to run whole notebook: 0 hr 7 min 51 secs

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20	V21	V22	V23	V24	V25	V26	V27	V28	Amount
0	0.0	-1.359807	-0.072781	2.536347	1.378155	-0.338321	0.462388	0.239599	0.098698	0.363787	0.090794	-0.551600	-0.617801	-0.991390	-0.311169	1.468177	-0.470401	0.207971	0.025791	0.403993	0.251412	-0.018307	0.277838	-0.110474	0.066928	0.128539	-0.189115	0.133558	-0.021053	149.62
1	0.0	1.191857	0.266151	0.166480	0.448154	0.060018	-0.082361	-0.078803	0.085102	-0.255425	-0.166974	1.612727	1.065235	0.489095	-0.143772	0.635558	0.463917	-0.114805	-0.183361	-0.145783	-0.069083	-0.225775	-0.638672	0.101288	-0.339846	0.167170	0.125895	-0.008983	0.014724	2.69
2	1.0	-1.358354	-1.340163	1.773209	0.379780	-0.503198	1.800499	0.791461	0.247676	-1.514654	0.207643	0.624501	0.066084	0.717293	-0.165946	2.345865	-2.890083	1.109969	-0.121359	-2.261857	0.524980	0.247998	0.771679	0.909412	-0.689281	-0.327642	-0.139097	-0.055353	-0.059752	378.66
3	1.0	-0.966272	-0.185226	1.792993	-0.863291	-0.010309	1.247203	0.237609	0.377436	-1.387024	-0.054952	-0.226487	0.178228	0.507757	-0.287924	-0.631418	-1.059647	-0.684093	1.965775	-1.232622	-0.208038	-0.108300	0.005274	-0.190321	-1.175575	0.647376	-0.221929	0.062723	0.061458	123.50
4	2.0	-1.158233	0.877737	1.548718	0.403034	-0.407193	0.095921	0.592941	-0.270533	0.817739	0.753074	-0.822843	0.538196	1.345852	-1.119670	0.175121	-0.451449	-0.237033	-0.038195	0.803487	0.408542	-0.009431	0.798278	-0.137458	0.141267	-0.206010	0.502292	0.219422	0.215153	69.99

	0	1	2
0	epoch	on_predict_batch_end	on_train_batch_end
1	history	on_predict_begin	on_train_begin
2	model	on_predict_end	on_train_end
3	on_batch_begin	on_test_batch_begin	params
4	on_batch_end	on_test_batch_end	set_model
5	on_epoch_begin	on_test_begin	set_params
6	on_epoch_end	on_test_end	validation_data
7	on_predict_batch_begin	on_train_batch_begin

	loss	tp	fp	tn	fn	accuracy	precision	recall	auc	prc	val_loss	val_tp	val_fp	val_tn	val_fn	val_accuracy	val_precision	val_recall	val_auc	val_prc	epoch
0	1.532056	124.0	604.0	181357.0	191.0	0.995638	0.170330	0.393651	0.802429	0.211909	0.012996	60.0	29.0	45461.0	19.0	0.998947	0.674157	0.759494	0.951981	0.643560	0
1	0.720191	211.0	1149.0	180812.0	104.0	0.993126	0.155147	0.669841	0.907501	0.432612	0.016953	68.0	49.0	45441.0	11.0	0.998683	0.581197	0.860759	0.972293	0.708751	1
12	0.222140	286.0	7789.0	174172.0	29.0	0.957109	0.035418	0.907937	0.971043	0.197877	0.097169	77.0	1087.0	44403.0	2.0	0.976102	0.066151	0.974684	0.985996	0.600866	12
13	0.245960	274.0	7859.0	174102.0	41.0	0.956659	0.033690	0.869841	0.965964	0.202640	0.099953	77.0	1124.0	44366.0	2.0	0.975290	0.064113	0.974684	0.986617	0.600739	13

	0	1	2
0	SummaryWriter	graph	text
1	audio	histogram	trace_export
2	create_file_writer	image	trace_off
3	create_noop_writer	record_if	trace_on
4	experimental	scalar	write
5	flush	should_record_summaries

	Model	Description	Accuracy	Precision	Recall	F1	AUC	AUCPR	Profit
1	keras hparams	medium model	0.978494	0.066872	0.887755	0.124375	0.933203	0.477410	-88,800
0	keras	medium model	0.993961	0.197044	0.816327	0.317460	0.946087	0.680284	-4,200

Table of Contents

Description¶

Colab¶

Imports¶

Useful Scripts¶

Load the data¶

Data Processing¶

Class balance¶

Feature Selection¶

Log transform¶

Train-validation-test split with stratify¶

Normalize the data¶

Modelling: Keras Sequential¶

Bias Initializer¶

Class weights¶

Params and Metrics¶

Build the Model¶

Fit the model¶

Model Evaluation¶

Training History Plots¶

Confusion Matrix¶

Accuracy Recall Scores¶

Hyperparameter tuning using hparams¶

Hyperparameter tuning using keras-tuner¶

Build hypermodel¶

Keras tuner Random Search¶

keras tuner hyperband¶

Use the best model iteration¶

Time Taken¶

	num_units	dropout	optimizer	auc
3	16	0.200000	sgd	0.986942
1	16	0.100000	sgd	0.987914
5	32	0.100000	sgd	0.990269
7	32	0.200000	sgd	0.992330
4	32	0.100000	adam	0.993755
6	32	0.200000	adam	0.995096
2	16	0.200000	adam	0.995921
0	16	0.100000	adam	0.997168

	0	1	2
0	BayesianOptimization	Oracle	distribute
1	CloudLogger	RandomSearch	division
2	HyperModel	SklearnTuner	engine
3	HyperParameter	Tuner	oracles
4	HyperParameters	absolute_import	print_function
5	Hyperband	applications	protos
6	Logger	check_tf_version	tuners
7	Objective	config	utils

	0	1	2
0	directory	on_batch_end	remaining_trials
1	distribution_strategy	on_epoch_begin	results_summary
2	executions_per_trial	on_epoch_end	run_trial
3	get_best_hyperparameters	on_search_begin	save
4	get_best_models	on_search_end	save_model
5	get_state	on_trial_begin	search
6	get_trial_dir	on_trial_end	search_space_summary
7	hypermodel	oracle	seed
8	load_model	project_dir	set_state
9	logger	project_name	tuner_id
10	on_batch_begin	reload

	0	1	2
0	COLOR_CYCLE	np	prop_cycle
1	HistoryPlotter	plt