Fraud detection using Keras.
Resources
import time
time_start_notebook = time.time()
%%capture
# capture will not print in notebook
import os
import sys
ENV_COLAB = 'google.colab' in sys.modules
if ENV_COLAB:
# install modules
!pip install scikit-plot
!pip install lrcurve
!pip install watermark
!pip install git+https://github.com/tensorflow/docs
!pip install keras-tuner --upgrade
!pip install tensorflow_addons
## print
print('Environment: Google Colaboratory.')
# using GPU in colab
import sys
ENV_COLAB = 'google.colab' in sys.modules
if ENV_COLAB:
!nvidia-smi
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
import os
import sys
# random state
SEED = 0
RNG = np.random.RandomState(SEED)
# Jupyter notebook settings for pandas
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 100) # None for all the rows
pd.set_option('display.max_colwidth', 50)
import scipy
from scipy import stats
# scale and split
import sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
# deep learning
import tensorflow as tf
from tensorflow import keras # Do not import keras, import from tensorflow
import tensorflow_addons as tfa # tfa.metrics.F1Score
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
from tensorboard.plugins.hparams import api as hp
# model evaluation
import scikitplot
from scikitplot import metrics as skmetrics
import lrcurve
from lrcurve import KerasLearningCurve
import keras_tuner as kt
# versions
%load_ext watermark
%watermark -a "Bhishan Poudel" -d -v -m
print()
%watermark -iv
The watermark extension is already loaded. To reload it, use: %reload_ext watermark Bhishan Poudel 2021-08-11 CPython 3.7.7 IPython 7.19.0 compiler : Clang 4.0.1 (tags/RELEASE_401/final) system : Darwin release : 19.6.0 machine : x86_64 processor : i386 CPU cores : 4 interpreter: 64bit tensorflow 2.5.0 scikitplot 0.3.7 tensorflow_addons 0.13.0 json 2.0.9 seaborn 0.10.1 sklearn 0.23.2 keras_tuner 1.0.3 autopep8 1.5.2 scipy 1.4.1 pandas 1.1.1 numpy 1.19.5 matplotlib 3.2.1 tensorflow.keras 2.5.0
def show_methods(method, ncols=3):
x = [i for i in dir(method) if i[0]!='_' ]
return pd.DataFrame(np.array_split(x,ncols)).T.fillna('')
def set_random_seed(seed):
import os
import random
import numpy as np
import tensorflow as tf
os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)
def model_evaluation(model_name, desc, ytest, yprobs1d,df_eval=None,
show=True,col_sort='Profit',threshold=0.5):
if df_eval is None:
df_eval = pd.DataFrame({'Model': [],
'Description':[],
'Accuracy':[],
'Precision':[],
'Recall':[],
'F1':[],
'AUC':[],
'AUCPR':[],
'Profit':[],
})
# squeeze yprobs
yprobs1d = np.array(yprobs1d).squeeze()
# make sure yprobs is floating (8,32,64 or any)
if not isinstance(yprobs1d[0],np.floating):
print(f"Here yprobs1d[0] = {yprobs1d[0]}.\
Are you sure this is y_prob and NOT the y_pred?")
# assert length
assert len(ytest) == len(yprobs1d), "ytest and yprobs1d must of same length."
# prediction from probs
ypreds = (yprobs.flatten()>threshold).astype(np.int8)
# model evaluation
average = 'binary'
prec,rec,thr = sklearn.metrics.precision_recall_curve(ytest,yprobs1d)
auc_pr = sklearn.metrics.auc(rec,prec)
# profit
cm = sklearn.metrics.confusion_matrix(ytest,ypreds)
tn, fp, fn, tp = cm.ravel() # first element tn is ignored, last tp is imp.
profit = 400*tp - 200*fn - 100*fp
row_eval = [model_name,desc,
sklearn.metrics.accuracy_score(ytest, ypreds),
sklearn.metrics.precision_score(ytest, ypreds, average=average),
sklearn.metrics.recall_score(ytest, ypreds, average=average),
sklearn.metrics.f1_score(ytest, ypreds, average=average),
sklearn.metrics.roc_auc_score(ytest, yprobs1d),
auc_pr,
profit,
]
df_eval.loc[len(df_eval)] = row_eval
df_eval = df_eval.drop_duplicates()
df_eval = df_eval.sort_values(col_sort)
df_eval['Profit'] = df_eval['Profit'].astype(int)
if show:
print("Confusion Matrix:")
print(cm)
display(df_eval.style
.background_gradient(subset=[col_sort])
.format({'Profit': "{:,}"})
)
return df_eval
ifile = "https://github.com/bhishanpdl/Datasets/blob/master/Projects/Fraud_detection/raw/creditcard.csv.zip?raw=true"
# ifile = '../data/raw/creditcard.csv.zip'
df_raw = pd.read_csv(ifile,compression='zip')
print(df_raw.shape)
df_raw.head()
(284807, 31)
Time | V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 | V20 | V21 | V22 | V23 | V24 | V25 | V26 | V27 | V28 | Amount | Class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | -1.359807 | -0.072781 | 2.536347 | 1.378155 | -0.338321 | 0.462388 | 0.239599 | 0.098698 | 0.363787 | 0.090794 | -0.551600 | -0.617801 | -0.991390 | -0.311169 | 1.468177 | -0.470401 | 0.207971 | 0.025791 | 0.403993 | 0.251412 | -0.018307 | 0.277838 | -0.110474 | 0.066928 | 0.128539 | -0.189115 | 0.133558 | -0.021053 | 149.62 | 0 |
1 | 0.0 | 1.191857 | 0.266151 | 0.166480 | 0.448154 | 0.060018 | -0.082361 | -0.078803 | 0.085102 | -0.255425 | -0.166974 | 1.612727 | 1.065235 | 0.489095 | -0.143772 | 0.635558 | 0.463917 | -0.114805 | -0.183361 | -0.145783 | -0.069083 | -0.225775 | -0.638672 | 0.101288 | -0.339846 | 0.167170 | 0.125895 | -0.008983 | 0.014724 | 2.69 | 0 |
2 | 1.0 | -1.358354 | -1.340163 | 1.773209 | 0.379780 | -0.503198 | 1.800499 | 0.791461 | 0.247676 | -1.514654 | 0.207643 | 0.624501 | 0.066084 | 0.717293 | -0.165946 | 2.345865 | -2.890083 | 1.109969 | -0.121359 | -2.261857 | 0.524980 | 0.247998 | 0.771679 | 0.909412 | -0.689281 | -0.327642 | -0.139097 | -0.055353 | -0.059752 | 378.66 | 0 |
3 | 1.0 | -0.966272 | -0.185226 | 1.792993 | -0.863291 | -0.010309 | 1.247203 | 0.237609 | 0.377436 | -1.387024 | -0.054952 | -0.226487 | 0.178228 | 0.507757 | -0.287924 | -0.631418 | -1.059647 | -0.684093 | 1.965775 | -1.232622 | -0.208038 | -0.108300 | 0.005274 | -0.190321 | -1.175575 | 0.647376 | -0.221929 | 0.062723 | 0.061458 | 123.50 | 0 |
4 | 2.0 | -1.158233 | 0.877737 | 1.548718 | 0.403034 | -0.407193 | 0.095921 | 0.592941 | -0.270533 | 0.817739 | 0.753074 | -0.822843 | 0.538196 | 1.345852 | -1.119670 | 0.175121 | -0.451449 | -0.237033 | -0.038195 | 0.803487 | 0.408542 | -0.009431 | 0.798278 | -0.137458 | 0.141267 | -0.206010 | 0.502292 | 0.219422 | 0.215153 | 69.99 | 0 |
target = 'Class'
display(df_raw[target].value_counts())
sns.countplot(x=df_raw[target])
0 284315 1 492 Name: Class, dtype: int64
<matplotlib.axes._subplots.AxesSubplot at 0x7fcd44f23c50>
neg, pos = np.bincount(df_raw['Class'])
total = neg + pos
print('Examples:\n Total: {}\n Positive: {} ({:.2f}% of total)\n'.format(
total, pos, 100 * pos / total))
Examples: Total: 284807 Positive: 492 (0.17% of total)
cols_drop = ['Time']
df = df_raw.drop(cols_drop,axis=1)
df.shape
(284807, 30)
eps=0.001 # 0 => 0.1¢
df['Ammount'] = np.log(df.pop('Amount')+eps)
from sklearn.model_selection import train_test_split
target = 'Class'
df_Xtrain_orig,df_Xtest,ser_ytrain_orig,ser_ytest = train_test_split(df.drop([target],axis=1),
df[target],
test_size=0.2,
stratify=df[target],
random_state=SEED)
df_Xtrain,df_Xvalid,ser_ytrain,ser_yvalid = train_test_split(df_Xtrain_orig,
ser_ytrain_orig,
test_size=0.2,
stratify=ser_ytrain_orig,
random_state=SEED)
ytrain = np.array(ser_ytrain)
yvalid = np.array(ser_yvalid)
ytest = np.array(ser_ytest)
df.shape, df_Xtrain.shape, ser_ytrain.shape
((284807, 30), (182276, 29), (182276,))
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df_Xtrain)
Xtrain = scaler.transform(df_Xtrain)
Xvalid = scaler.transform(df_Xvalid)
Xtest = scaler.transform(df_Xtest)
# clip the values
Xtrain = np.clip(Xtrain, -5, 5)
Xvalid = np.clip(Xvalid, -5, 5)
Xtest = np.clip(Xtest, -5, 5)
neg, pos = np.bincount(df_raw['Class'])
total = neg + pos
bias_initializer = np.log([pos/neg])
bias_initializer
array([-6.35935934])
weight_for_0 = (1 / neg)*(total)/2.0
weight_for_1 = (1 / pos)*(total)/2.0
class_weight = {0: weight_for_0, 1: weight_for_1}
print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
Weight for class 0: 0.50 Weight for class 1: 289.44
n_feats = Xtrain.shape[-1]
class_weight = {0: weight_for_0, 1: weight_for_1}
#============================================================
PARAMS_MODEL = {
# layer 1
'L1_units': 16,
'L1_act': 'elu',
'L1_dropout': 0.5,
# optimizer
'adam_lr': 1e-3,
}
#============================================================
METRICS = [
tf.keras.metrics.TruePositives(name='tp'),
tf.keras.metrics.FalsePositives(name='fp'),
tf.keras.metrics.TrueNegatives(name='tn'),
tf.keras.metrics.FalseNegatives(name='fn'),
tf.keras.metrics.BinaryAccuracy(name='accuracy'),
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall'),
tf.keras.metrics.AUC(name='auc'),
tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]
#============================================================
PARAMS_FIT = {
'epochs': 100,
'batch_size': 2048,
'class_weight0': class_weight[0],
'class_weight1': class_weight[1],
'patience': 10,
'shuffle': True,
}
#============================================================
# callbacks
cb_early = tf.keras.callbacks.EarlyStopping(
monitor='val_prc',
verbose=1,
patience=PARAMS_FIT['patience'],
mode='max',
restore_best_weights=True)
#cb_checkpt = keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.h5")
cb_lr = lrcurve.KerasLearningCurve()
cb_dots = tfdocs.modeling.EpochDots()
callbacks = [cb_early, cb_lr,cb_dots]
def make_model(metrics=METRICS, bias_initializer=None,n_feats=n_feats):
if bias_initializer is not None:
bias_initializer = tf.keras.initializers.Constant(bias_initializer)
model = keras.Sequential([
# layer 1
keras.layers.Dense(PARAMS_MODEL['L1_units'],
activation=PARAMS_MODEL['L1_act'],
kernel_regularizer=tf.keras.regularizers.l2(0.0001),
input_shape=(n_feats,)),
keras.layers.Dropout(PARAMS_MODEL['L1_dropout']),
# last layer is dense 1 with activation sigmoid
keras.layers.Dense(1, activation='sigmoid',
bias_initializer=bias_initializer),
])
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=PARAMS_MODEL['adam_lr']),
loss=keras.losses.BinaryCrossentropy(),
metrics=metrics)
return model
model = make_model()
model.summary()
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_2 (Dense) (None, 16) 480 _________________________________________________________________ dropout_1 (Dropout) (None, 16) 0 _________________________________________________________________ dense_3 (Dense) (None, 1) 17 ================================================================= Total params: 497 Trainable params: 497 Non-trainable params: 0 _________________________________________________________________
type(model)
tensorflow.python.keras.engine.sequential.Sequential
from tensorflow.keras.utils import plot_model
# save keras sequential model
plot_model(model,'model_medium.png')
# For functional model
# tf.keras.utils.plot_model(model, 'model_medium.png')
%%time
set_random_seed(SEED)
model = make_model(bias_initializer=bias_initializer)
history = model.fit(
Xtrain,
ytrain,
batch_size=PARAMS_FIT['batch_size'],
epochs=PARAMS_FIT['epochs'],
callbacks=callbacks,
validation_data=(Xvalid, yvalid),
verbose=0,
class_weight=class_weight
)
Epoch: 0, accuracy:0.9956, auc:0.8024, fn:191.0000, fp:604.0000, loss:1.5321, prc:0.2119, precision:0.1703, recall:0.3937, tn:181357.0000, tp:124.0000, val_accuracy:0.9989, val_auc:0.9520, val_fn:19.0000, val_fp:29.0000, val_loss:0.0130, val_prc:0.6436, val_precision:0.6742, val_recall:0.7595, val_tn:45461.0000, val_tp:60.0000, .............Restoring model weights from the end of the best epoch. .Epoch 00014: early stopping CPU times: user 11.2 s, sys: 1.4 s, total: 12.6 s Wall time: 8.69 s
matplotlib.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
import tensorflow_docs.plots
show_methods(tensorflow_docs.plots)
0 | 1 | 2 | |
---|---|---|---|
0 | COLOR_CYCLE | np | prop_cycle |
1 | HistoryPlotter | plt |
show_methods(history)
0 | 1 | 2 | |
---|---|---|---|
0 | epoch | on_predict_batch_end | on_train_batch_end |
1 | history | on_predict_begin | on_train_begin |
2 | model | on_predict_end | on_train_end |
3 | on_batch_begin | on_test_batch_begin | params |
4 | on_batch_end | on_test_batch_end | set_model |
5 | on_epoch_begin | on_test_begin | set_params |
6 | on_epoch_end | on_test_end | validation_data |
7 | on_predict_batch_begin | on_train_batch_begin |
# tfdocs.plots.HistoryPlotter?
df_history = pd.DataFrame(history.history)
df_history['epoch'] = history.epoch # we need to add it separately
print(df_history.shape)
df_history.head(2).append(df_history.tail(2))
(14, 21)
loss | tp | fp | tn | fn | accuracy | precision | recall | auc | prc | val_loss | val_tp | val_fp | val_tn | val_fn | val_accuracy | val_precision | val_recall | val_auc | val_prc | epoch | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.532056 | 124.0 | 604.0 | 181357.0 | 191.0 | 0.995638 | 0.170330 | 0.393651 | 0.802429 | 0.211909 | 0.012996 | 60.0 | 29.0 | 45461.0 | 19.0 | 0.998947 | 0.674157 | 0.759494 | 0.951981 | 0.643560 | 0 |
1 | 0.720191 | 211.0 | 1149.0 | 180812.0 | 104.0 | 0.993126 | 0.155147 | 0.669841 | 0.907501 | 0.432612 | 0.016953 | 68.0 | 49.0 | 45441.0 | 11.0 | 0.998683 | 0.581197 | 0.860759 | 0.972293 | 0.708751 | 1 |
12 | 0.222140 | 286.0 | 7789.0 | 174172.0 | 29.0 | 0.957109 | 0.035418 | 0.907937 | 0.971043 | 0.197877 | 0.097169 | 77.0 | 1087.0 | 44403.0 | 2.0 | 0.976102 | 0.066151 | 0.974684 | 0.985996 | 0.600866 | 12 |
13 | 0.245960 | 274.0 | 7859.0 | 174102.0 | 41.0 | 0.956659 | 0.033690 | 0.869841 | 0.965964 | 0.202640 | 0.099953 | 77.0 | 1124.0 | 44366.0 | 2.0 | 0.975290 | 0.064113 | 0.974684 | 0.986617 | 0.600739 | 13 |
history.history.keys()
dict_keys(['loss', 'tp', 'fp', 'tn', 'fn', 'accuracy', 'precision', 'recall', 'auc', 'prc', 'val_loss', 'val_tp', 'val_fp', 'val_tn', 'val_fn', 'val_accuracy', 'val_precision', 'val_recall', 'val_auc', 'val_prc'])
dict_history = {'medium': history}
metrics = ['loss', 'auc', 'precision', 'recall']
# plotter can plot only one metric
plotter = tfdocs.plots.HistoryPlotter(metric = 'loss', smoothing_std=10)
plotter.plot(dict_history)
def plot_metrics(history):
metrics = ['loss', 'auc', 'precision', 'recall']
for n, metric in enumerate(metrics):
name = metric.replace("_"," ").capitalize()
plt.subplot(2,2,n+1)
plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train')
plt.plot(history.epoch, history.history['val_'+metric],
color=colors[0], linestyle="--", label='Val')
plt.xlabel('Epoch')
plt.ylabel(name)
if metric == 'loss':
plt.ylim([0, plt.ylim()[1]])
elif metric == 'auc':
plt.ylim([0.8,1])
else:
plt.ylim([0,1])
plt.legend()
plot_metrics(history)
from sklearn import metrics as skmetrics
yprobs = model.predict(Xtest) # in keras, predict_proba is deprecated.
yprobs[:5]
array([[0.00280005], [0.00117731], [0.01025069], [0.00233176], [0.00363791]], dtype=float32)
yprobs.shape, yprobs.squeeze().shape
((56962, 1), (56962,))
yprobs1d = yprobs.flatten()
ypreds = (yprobs1d>0.5).astype(np.int8)
ypreds[:5]
array([0, 0, 0, 0, 0], dtype=int8)
skmetrics.confusion_matrix(ytest, ypreds)
array([[56538, 326], [ 18, 80]])
from scikitplot import metrics as skpmetrics
skpmetrics.plot_confusion_matrix(ytest,ypreds)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcd60815350>
def plot_cm(labels, predictions, p=0.5):
cm = sklearn.metrics.confusion_matrix(labels, predictions > p)
plt.figure(figsize=(5,5))
sns.heatmap(cm, annot=True, fmt="d")
plt.title('Confusion matrix @{:.2f}'.format(p))
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
print('Legitimate Transactions Detected (True Negatives): ', cm[0][0])
print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1])
print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0])
print('Fraudulent Transactions Detected (True Positives): ', cm[1][1])
print('Total Fraudulent Transactions: ', np.sum(cm[1]))
plot_cm(ytest,ypreds)
Legitimate Transactions Detected (True Negatives): 56538 Legitimate Transactions Incorrectly Detected (False Positives): 326 Fraudulent Transactions Missed (False Negatives): 18 Fraudulent Transactions Detected (True Positives): 80 Total Fraudulent Transactions: 98
desc = "medium model"
yprobs = model.predict(Xtest) # in keras, predict_proba is deprecated.
yprobs1d = yprobs.flatten()
ypreds = (yprobs1d>0.5).astype(np.int8)
yprobs1d[:5]
array([0.00280005, 0.00117731, 0.01025069, 0.00233176, 0.00363791], dtype=float32)
type(yprobs1d[0])
numpy.float32
isinstance(yprobs1d[0],float)
False
isinstance(yprobs1d[0],np.floating)
True
df_eval = model_evaluation("keras", desc, ytest, yprobs1d,df_eval=None)
Confusion Matrix: [[56538 326] [ 18 80]]
Model | Description | Accuracy | Precision | Recall | F1 | AUC | AUCPR | Profit | |
---|---|---|---|---|---|---|---|---|---|
0 | keras | medium model | 0.993961 | 0.197044 | 0.816327 | 0.317460 | 0.946087 | 0.680284 | -4,200 |
from tensorboard.plugins.hparams import api as hp
%load_ext tensorboard
The tensorboard extension is already loaded. To reload it, use: %reload_ext tensorboard
# Clear any logs from previous runs
!rm -rf ./logs/
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))
# METRIC_ACCURACY = 'accuracy'
METRIC_NAME = 'auc' # upper case AUC gives error later.
METRIC = 'AUC'
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
metrics=[hp.Metric(METRIC, display_name=METRIC_NAME)],
)
cb_early = tf.keras.callbacks.EarlyStopping(
monitor='val_'+METRIC_NAME,
verbose=1,
patience=PARAMS_FIT['patience'],
mode='max',
restore_best_weights=True)
cb_dots = tfdocs.modeling.EpochDots()
callbacks = [cb_early, cb_dots]
def train_test_model(hparams):
model = tf.keras.models.Sequential([
# first layer
tf.keras.layers.Dense(hparams[HP_NUM_UNITS],
activation=tf.nn.relu,
input_shape=(n_feats,)),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
# last layer
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='binary_crossentropy',
metrics=[METRIC],
)
model.fit(Xtrain,
ytrain,
batch_size=PARAMS_FIT['batch_size'],
epochs=PARAMS_FIT['epochs'],
callbacks=callbacks,
validation_data=(Xvalid, yvalid),
class_weight=class_weight,
verbose=0,
)
_, mymetric = model.evaluate(Xvalid, yvalid)
return mymetric
show_methods(tf.summary)
0 | 1 | 2 | |
---|---|---|---|
0 | SummaryWriter | graph | text |
1 | audio | histogram | trace_export |
2 | create_file_writer | image | trace_off |
3 | create_noop_writer | record_if | trace_on |
4 | experimental | scalar | write |
5 | flush | should_record_summaries |
hparams = {
HP_NUM_UNITS: HP_NUM_UNITS.domain.values[0],
HP_DROPOUT: HP_DROPOUT.domain.min_value,
HP_OPTIMIZER: HP_OPTIMIZER.domain.values[0],
}
hp.hparams(hparams) # record the values used in this trial
mymetric = train_test_model(hparams)
tf.summary.scalar(METRIC, mymetric, step=1)
Epoch: 0, auc:0.7376, loss:0.6258, val_auc:0.9782, val_loss:0.5502, ............Restoring model weights from the end of the best epoch. .Epoch 00013: early stopping 1425/1425 [==============================] - 1s 720us/step - loss: 0.3081 - auc: 0.9893
<tf.Tensor: shape=(), dtype=bool, numpy=False>
def run(run_dir, hparams):
mymetric = None
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
mymetric = train_test_model(hparams)
tf.summary.scalar(METRIC, mymetric, step=1)
return mymetric
session_num = 0
lst_series = []
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('\n--- Starting trial: %s' % run_name)
dict_params = {h.name: hparams[h] for h in hparams}
ser = pd.Series(dict_params)
mymetric = run('logs/hparam_tuning/' + run_name, hparams)
ser[METRIC_NAME] = mymetric
lst_series.append(ser)
session_num += 1
# epoch 0 auc: are values before fitting
# loss: auc: are values after fitting.
--- Starting trial: run-0 Epoch: 0, auc:0.8588, loss:0.4884, val_auc:0.9912, val_loss:0.5324, ............Restoring model weights from the end of the best epoch. .Epoch 00013: early stopping 1425/1425 [==============================] - 1s 754us/step - loss: 0.3068 - auc: 0.9972 0s - loss: 0.3064 - --- Starting trial: run-1 Epoch: 0, auc:0.8652, loss:0.5198, val_auc:0.9674, val_loss:0.6584, ...................................................Restoring model weights from the end of the best epoch. .Epoch 00052: early stopping 1425/1425 [==============================] - 1s 776us/step - loss: 0.1086 - auc: 0.9879 --- Starting trial: run-2 Epoch: 0, auc:0.6288, loss:0.7768, val_auc:0.9292, val_loss:0.5340, ....................Restoring model weights from the end of the best epoch. .Epoch 00021: early stopping 1425/1425 [==============================] - 1s 785us/step - loss: 0.1365 - auc: 0.9959 --- Starting trial: run-3 Epoch: 0, auc:0.8563, loss:0.5908, val_auc:0.9561, val_loss:0.7649, ....................................................Restoring model weights from the end of the best epoch. .Epoch 00053: early stopping 1425/1425 [==============================] - 1s 797us/step - loss: 0.1121 - auc: 0.9869 --- Starting trial: run-4 Epoch: 0, auc:0.7864, loss:0.5691, val_auc:0.9792, val_loss:0.4454, .................Restoring model weights from the end of the best epoch. .Epoch 00018: early stopping 1425/1425 [==============================] - 1s 845us/step - loss: 0.1165 - auc: 0.9938 --- Starting trial: run-5 Epoch: 0, auc:0.9033, loss:0.4280, val_auc:0.9810, val_loss:0.5153, .................................Restoring model weights from the end of the best epoch. .Epoch 00034: early stopping 1425/1425 [==============================] - 2s 1ms/step - loss: 0.1280 - auc: 0.9903 --- Starting trial: run-6 Epoch: 0, auc:0.8931, loss:0.4900, val_auc:0.9886, val_loss:0.5400, ...............Restoring model weights from the end of the best epoch. .Epoch 00016: early stopping 1425/1425 [==============================] - 1s 825us/step - loss: 0.1465 - auc: 0.9951 --- Starting trial: run-7 Epoch: 0, auc:0.8325, loss:0.5484, val_auc:0.9778, val_loss:0.6555, ................Restoring model weights from the end of the best epoch. .Epoch 00017: early stopping 1425/1425 [==============================] - 1s 933us/step - loss: 0.2175 - auc: 0.9923
df_hpo = pd.concat(lst_series,axis=1).T # hyperparameter optimization dataframe
df_hpo['num_units'] = df_hpo['num_units'].astype(int)
df_hpo[['dropout','auc']] = df_hpo[['dropout','auc']].astype(float)
df_hpo.sort_values('auc').style.background_gradient(subset=['auc'])
num_units | dropout | optimizer | auc | |
---|---|---|---|---|
3 | 16 | 0.200000 | sgd | 0.986942 |
1 | 16 | 0.100000 | sgd | 0.987914 |
5 | 32 | 0.100000 | sgd | 0.990269 |
7 | 32 | 0.200000 | sgd | 0.992330 |
4 | 32 | 0.100000 | adam | 0.993755 |
6 | 32 | 0.200000 | adam | 0.995096 |
2 | 16 | 0.200000 | adam | 0.995921 |
0 | 16 | 0.100000 | adam | 0.997168 |
%tensorboard --logdir logs/hparam_tuning
ERROR: Could not find `tensorboard`. Please ensure that your PATH contains an executable `tensorboard` program, or explicitly specify the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY` environment variable.
# predict using best model from hyperparameter tuning
params_best = df_hpo.nlargest(1,'auc').iloc[0].to_dict()
params_best = {i:v for i,v in params_best.items() if i !='auc'}
params_best
{'num_units': 16, 'dropout': 0.1, 'optimizer': 'adam'}
model = tf.keras.models.Sequential([
# first layer
tf.keras.layers.Dense(params_best['num_units'],
activation=tf.nn.relu,
input_shape=(n_feats,)),
tf.keras.layers.Dropout(params_best['dropout']),
# last layer
tf.keras.layers.Dense(1, activation='sigmoid'),
])
# compile the model
model.compile(
optimizer=params_best['optimizer'],
loss='binary_crossentropy',
metrics=[METRIC],
)
# fit the model
history = model.fit(Xtrain,
ytrain,
batch_size=PARAMS_FIT['batch_size'],
epochs=PARAMS_FIT['epochs'],
callbacks=callbacks,
validation_data=(Xvalid, yvalid),
class_weight=class_weight,
verbose=0,
)
# prediction
yprobs = model.predict(Xtest)
ypreds = (yprobs.flatten()>0.5).astype(np.int8)
df_eval = model_evaluation("keras hparams", desc, ytest, ypreds,df_eval=df_eval)
Epoch: 0, auc:0.8512, loss:0.4795, val_auc:0.9589, val_loss:0.4121, ..................Restoring model weights from the end of the best epoch. .Epoch 00019: early stopping Here yprobs1d[0] = 0. Are you sure this is y_prob and NOT the y_pred? Confusion Matrix: [[55650 1214] [ 11 87]]
Model | Description | Accuracy | Precision | Recall | F1 | AUC | AUCPR | Profit | |
---|---|---|---|---|---|---|---|---|---|
1 | keras hparams | medium model | 0.978494 | 0.066872 | 0.887755 | 0.124375 | 0.933203 | 0.477410 | -88,800 |
0 | keras | medium model | 0.993961 | 0.197044 | 0.816327 | 0.317460 | 0.946087 | 0.680284 | -4,200 |
df_eval
Model | Description | Accuracy | Precision | Recall | F1 | AUC | AUCPR | Profit | |
---|---|---|---|---|---|---|---|---|---|
1 | keras hparams | medium model | 0.978494 | 0.066872 | 0.887755 | 0.124375 | 0.933203 | 0.477410 | -88800 |
0 | keras | medium model | 0.993961 | 0.197044 | 0.816327 | 0.317460 | 0.946087 | 0.680284 | -4200 |
import keras_tuner as kt
show_methods(kt)
0 | 1 | 2 | |
---|---|---|---|
0 | BayesianOptimization | Oracle | distribute |
1 | CloudLogger | RandomSearch | division |
2 | HyperModel | SklearnTuner | engine |
3 | HyperParameter | Tuner | oracles |
4 | HyperParameters | absolute_import | print_function |
5 | Hyperband | applications | protos |
6 | Logger | check_tf_version | tuners |
7 | Objective | config | utils |
from keras_tuner import HyperModel, RandomSearch, Hyperband, BayesianOptimization
METRICS = [
keras.metrics.TruePositives(name='tp'),
keras.metrics.FalsePositives(name='fp'),
keras.metrics.TrueNegatives(name='tn'),
keras.metrics.FalseNegatives(name='fn'),
keras.metrics.BinaryAccuracy(name='accuracy'),
keras.metrics.Precision(name='precision'),
keras.metrics.Recall(name='recall'),
keras.metrics.AUC(name='auc'),
keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]
class BinaryClassificationHyperModel(HyperModel):
def __init__(self, input_shape,bias_initializer):
self.input_shape = input_shape
self.bias_initializer = tf.keras.initializers.Constant(bias_initializer)
def build(self, hp):
# parameters
hp_units1 = hp.Int('units1',16,48,16,default=16)
hp_act1 = hp.Choice(
'dense_activation',
values=['relu','elu'],
default='relu',
)
hp_dropout1 = hp.Float(
'dropout',
min_value=0.2,
max_value=0.7,
default=0.5,
step=0.1)
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
hp_optimizers = hp.Choice('optimizer',values=['adam','sgd'])
# define the model
model = tf.keras.Sequential()
# first layer
model.add(
tf.keras.layers.Dense(
units=hp_units1,
activation=hp_act1,
kernel_initializer='zeros',
bias_initializer='zeros',
input_shape=input_shape
)
)
model.add(tf.keras.layers.Dropout(hp_dropout1))
# last layer
model.add(tf.keras.layers.Dense(1, activation='sigmoid',
bias_initializer=self.bias_initializer)
)
model.compile(
optimizer='adam',loss='binary_crossentropy',metrics=METRICS
)
return model
input_shape = (Xtrain.shape[1],)
bias_initializer = bias_initializer # np array of one element
hypermodel = BinaryClassificationHyperModel(input_shape,bias_initializer)
# kt.RandomSearch?
tuner_rs = kt.RandomSearch(
hypermodel,
objective=kt.Objective("val_prc", direction="max"),
seed=SEED,
max_trials=4,
executions_per_trial=1,
overwrite=True
)
show_methods(tuner_rs)
0 | 1 | 2 | |
---|---|---|---|
0 | directory | on_batch_end | remaining_trials |
1 | distribution_strategy | on_epoch_begin | results_summary |
2 | executions_per_trial | on_epoch_end | run_trial |
3 | get_best_hyperparameters | on_search_begin | save |
4 | get_best_models | on_search_end | save_model |
5 | get_state | on_trial_begin | search |
6 | get_trial_dir | on_trial_end | search_space_summary |
7 | hypermodel | oracle | seed |
8 | load_model | project_dir | set_state |
9 | logger | project_name | tuner_id |
10 | on_batch_begin | reload |
# tuner_rs.search?
# tuner_rs.search(*fit_args, **fit_kwargs)
%%time
tuner_rs.search(Xtrain, ytrain,
epochs=2,
validation_split=0.2,
verbose=1,
)
Trial 4 Complete [00h 00m 22s] val_prc: 0.7927567958831787 Best val_prc So Far: 0.8070851564407349 Total elapsed time: 00h 01m 20s INFO:tensorflow:Oracle triggered exit CPU times: user 1min 36s, sys: 9.23 s, total: 1min 45s Wall time: 1min 20s
best_model = tuner_rs.get_best_models(num_models=1)[0]
# prediction
yprobs = best_model.predict(Xtest) # in keras predict gives probs not preds
yprobs1d = yprobs.flatten()
ypreds = (yprobs.flatten()>0.5).astype(np.int8)
df_eval = model_evaluation("keras-tuner randomsearch", desc, ytest, yprobs1d,df_eval=df_eval)
Confusion Matrix: [[56852 12] [ 28 70]]
Model | Description | Accuracy | Precision | Recall | F1 | AUC | AUCPR | Profit | |
---|---|---|---|---|---|---|---|---|---|
1 | keras hparams | medium model | 0.978494 | 0.066872 | 0.887755 | 0.124375 | 0.933203 | 0.477410 | -88,800 |
0 | keras | medium model | 0.993961 | 0.197044 | 0.816327 | 0.317460 | 0.946087 | 0.680284 | -4,200 |
2 | keras-tuner randomsearch | medium model | 0.999298 | 0.853659 | 0.714286 | 0.777778 | 0.970462 | 0.797611 | 21,200 |
Hyperband determines the number of models to train in a bracket by computing 1 + log_factor(max_epochs)
and rounding it up to the nearest integer.
tuner_hb = kt.Hyperband(
hypermodel,
max_epochs=2,
objective=kt.Objective("val_auc", direction="max"),
seed=SEED,
executions_per_trial=1,
hyperband_iterations=2,
# factor=3,
# directory='my_dir',
# project_name='hyperband'
overwrite=True,
)
cb_early = tf.keras.callbacks.EarlyStopping(
monitor='val_prc',
verbose=1,
patience=5,
mode='max',
restore_best_weights=True)
tuner_hb.search(Xtrain, ytrain,
epochs=10,
validation_split=0.2,
verbose=1,
callbacks = [cb_early] # patience must be smaller than max_epochs
)
best_model = tuner_hb.get_best_models(num_models=1)[0]
Trial 4 Complete [00h 00m 16s] val_auc: 0.9214053153991699 Best val_auc So Far: 0.9292096495628357 Total elapsed time: 00h 01m 23s INFO:tensorflow:Oracle triggered exit
best_model = tuner_hb.get_best_models(num_models=1)[0]
# prediction
yprobs = best_model.predict(Xtest)
yprobs1d = yprobs.flatten()
ypreds = (yprobs.flatten()>0.5).astype(np.int8)
df_eval = model_evaluation("keras-tuner hyperband", desc, ytest, yprobs1d,df_eval=df_eval)
WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1 WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2 WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details. WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1 WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2 WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details. Confusion Matrix: [[56852 12] [ 32 66]]
Model | Description | Accuracy | Precision | Recall | F1 | AUC | AUCPR | Profit | |
---|---|---|---|---|---|---|---|---|---|
1 | keras hparams | medium model | 0.978494 | 0.066872 | 0.887755 | 0.124375 | 0.933203 | 0.477410 | -88,800 |
0 | keras | medium model | 0.993961 | 0.197044 | 0.816327 | 0.317460 | 0.946087 | 0.680284 | -4,200 |
3 | keras-tuner hyperband | medium model | 0.999228 | 0.846154 | 0.673469 | 0.750000 | 0.968274 | 0.782802 | 18,800 |
2 | keras-tuner randomsearch | medium model | 0.999298 | 0.853659 | 0.714286 | 0.777778 | 0.970462 | 0.797611 | 21,200 |
# Get the optimal hyperparameters
best_hps=tuner_hb.get_best_hyperparameters(num_trials=1)[0]
# Build the model with the optimal hyperparameters and train it on the data for N epochs
model = tuner_hb.hypermodel.build(best_hps)
history = model.fit(Xtrain, ytrain, epochs=10, validation_split=0.2)
val_auc_per_epoch = history.history['val_prc']
best_epoch = val_auc_per_epoch.index(max(val_auc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))
Epoch 1/10 4557/4557 [==============================] - 10s 2ms/step - loss: 0.0040 - tp: 212.0000 - fp: 37.0000 - tn: 181924.0000 - fn: 103.0000 - accuracy: 0.9992 - precision: 0.8514 - recall: 0.6730 - auc: 0.9165 - prc: 0.7342 - val_loss: 0.0033 - val_tp: 48.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 16.0000 - val_accuracy: 0.9993 - val_precision: 0.8571 - val_recall: 0.7500 - val_auc: 0.9214 - val_prc: 0.7891 Epoch 2/10 4557/4557 [==============================] - 6s 1ms/step - loss: 0.0035 - tp: 179.0000 - fp: 24.0000 - tn: 145545.0000 - fn: 72.0000 - accuracy: 0.9993 - precision: 0.8818 - recall: 0.7131 - auc: 0.9294 - prc: 0.7572 - val_loss: 0.0035 - val_tp: 52.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 12.0000 - val_accuracy: 0.9995 - val_precision: 0.8667 - val_recall: 0.8125 - val_auc: 0.9211 - val_prc: 0.7950 Epoch 3/10 4557/4557 [==============================] - 9s 2ms/step - loss: 0.0035 - tp: 180.0000 - fp: 25.0000 - tn: 145544.0000 - fn: 71.0000 - accuracy: 0.9993 - precision: 0.8780 - recall: 0.7171 - auc: 0.9276 - prc: 0.7463 - val_loss: 0.0033 - val_tp: 52.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 12.0000 - val_accuracy: 0.9995 - val_precision: 0.8667 - val_recall: 0.8125 - val_auc: 0.9209 - val_prc: 0.8127 Epoch 4/10 4557/4557 [==============================] - 8s 2ms/step - loss: 0.0033 - tp: 185.0000 - fp: 24.0000 - tn: 145545.0000 - fn: 66.0000 - accuracy: 0.9994 - precision: 0.8852 - recall: 0.7371 - auc: 0.9294 - prc: 0.7706 - val_loss: 0.0033 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9211 - val_prc: 0.7992 Epoch 5/10 4557/4557 [==============================] - 7s 2ms/step - loss: 0.0033 - tp: 181.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 70.0000 - accuracy: 0.9994 - precision: 0.8916 - recall: 0.7211 - auc: 0.9295 - prc: 0.7591 - val_loss: 0.0033 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9291 - val_prc: 0.8079 Epoch 6/10 4557/4557 [==============================] - 8s 2ms/step - loss: 0.0032 - tp: 190.0000 - fp: 24.0000 - tn: 145545.0000 - fn: 61.0000 - accuracy: 0.9994 - precision: 0.8879 - recall: 0.7570 - auc: 0.9275 - prc: 0.7732 - val_loss: 0.0032 - val_tp: 45.0000 - val_fp: 4.0000 - val_tn: 36388.0000 - val_fn: 19.0000 - val_accuracy: 0.9994 - val_precision: 0.9184 - val_recall: 0.7031 - val_auc: 0.9138 - val_prc: 0.8036 Epoch 7/10 4557/4557 [==============================] - 9s 2ms/step - loss: 0.0031 - tp: 185.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 66.0000 - accuracy: 0.9994 - precision: 0.8894 - recall: 0.7371 - auc: 0.9334 - prc: 0.7793 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9213 - val_prc: 0.8040 Epoch 8/10 4557/4557 [==============================] - 9s 2ms/step - loss: 0.0032 - tp: 186.0000 - fp: 20.0000 - tn: 145549.0000 - fn: 65.0000 - accuracy: 0.9994 - precision: 0.9029 - recall: 0.7410 - auc: 0.9294 - prc: 0.7715 - val_loss: 0.0031 - val_tp: 50.0000 - val_fp: 7.0000 - val_tn: 36385.0000 - val_fn: 14.0000 - val_accuracy: 0.9994 - val_precision: 0.8772 - val_recall: 0.7812 - val_auc: 0.9214 - val_prc: 0.8162 Epoch 9/10 4557/4557 [==============================] - 12s 3ms/step - loss: 0.0032 - tp: 182.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 69.0000 - accuracy: 0.9994 - precision: 0.8922 - recall: 0.7251 - auc: 0.9315 - prc: 0.7823 - val_loss: 0.0032 - val_tp: 52.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 12.0000 - val_accuracy: 0.9994 - val_precision: 0.8525 - val_recall: 0.8125 - val_auc: 0.9208 - val_prc: 0.8197 Epoch 10/10 4557/4557 [==============================] - 10s 2ms/step - loss: 0.0031 - tp: 181.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 70.0000 - accuracy: 0.9994 - precision: 0.8873 - recall: 0.7211 - auc: 0.9354 - prc: 0.7959 - val_loss: 0.0031 - val_tp: 46.0000 - val_fp: 4.0000 - val_tn: 36388.0000 - val_fn: 18.0000 - val_accuracy: 0.9994 - val_precision: 0.9200 - val_recall: 0.7188 - val_auc: 0.9214 - val_prc: 0.8183 Best epoch: 9
# Re-instantiate the hypermodel and train it with the optimal number of epochs from above.
hypermodel = tuner_hb.hypermodel.build(best_hps)
# Retrain the model
hypermodel.fit(Xtrain, ytrain, epochs=best_epoch, validation_split=0.2)
Epoch 1/9 4557/4557 [==============================] - 10s 2ms/step - loss: 0.0040 - tp: 211.0000 - fp: 32.0000 - tn: 181929.0000 - fn: 104.0000 - accuracy: 0.9993 - precision: 0.8683 - recall: 0.6698 - auc: 0.9166 - prc: 0.7427 - val_loss: 0.0034 - val_tp: 50.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 14.0000 - val_accuracy: 0.9994 - val_precision: 0.8621 - val_recall: 0.7812 - val_auc: 0.9292 - val_prc: 0.7983 Epoch 2/9 4557/4557 [==============================] - 7s 2ms/step - loss: 0.0033 - tp: 183.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 68.0000 - accuracy: 0.9994 - precision: 0.8927 - recall: 0.7291 - auc: 0.9314 - prc: 0.7680 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8644 - val_recall: 0.7969 - val_auc: 0.9211 - val_prc: 0.7982 Epoch 3/9 4557/4557 [==============================] - 7s 1ms/step - loss: 0.0034 - tp: 182.0000 - fp: 30.0000 - tn: 145539.0000 - fn: 69.0000 - accuracy: 0.9993 - precision: 0.8585 - recall: 0.7251 - auc: 0.9275 - prc: 0.7501 - val_loss: 0.0033 - val_tp: 52.0000 - val_fp: 8.0000 - val_tn: 36384.0000 - val_fn: 12.0000 - val_accuracy: 0.9995 - val_precision: 0.8667 - val_recall: 0.8125 - val_auc: 0.9208 - val_prc: 0.8121 Epoch 4/9 4557/4557 [==============================] - 7s 2ms/step - loss: 0.0033 - tp: 179.0000 - fp: 26.0000 - tn: 145543.0000 - fn: 72.0000 - accuracy: 0.9993 - precision: 0.8732 - recall: 0.7131 - auc: 0.9274 - prc: 0.7670 - val_loss: 0.0032 - val_tp: 51.0000 - val_fp: 7.0000 - val_tn: 36385.0000 - val_fn: 13.0000 - val_accuracy: 0.9995 - val_precision: 0.8793 - val_recall: 0.7969 - val_auc: 0.9210 - val_prc: 0.8033 Epoch 5/9 4557/4557 [==============================] - 7s 1ms/step - loss: 0.0033 - tp: 185.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 66.0000 - accuracy: 0.9994 - precision: 0.8937 - recall: 0.7371 - auc: 0.9314 - prc: 0.7612 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8500 - val_recall: 0.7969 - val_auc: 0.9212 - val_prc: 0.7987 Epoch 6/9 4557/4557 [==============================] - 9s 2ms/step - loss: 0.0032 - tp: 188.0000 - fp: 22.0000 - tn: 145547.0000 - fn: 63.0000 - accuracy: 0.9994 - precision: 0.8952 - recall: 0.7490 - auc: 0.9294 - prc: 0.7787 - val_loss: 0.0032 - val_tp: 46.0000 - val_fp: 4.0000 - val_tn: 36388.0000 - val_fn: 18.0000 - val_accuracy: 0.9994 - val_precision: 0.9200 - val_recall: 0.7188 - val_auc: 0.9138 - val_prc: 0.801701.0000 - fn: 19.0000 - accuracy: 0.9994 - precision: 0.9167 - recall: 0.7765 - auc: 0.9402 - pr - ETA: 4s - Epoch 7/9 4557/4557 [==============================] - 10s 2ms/step - loss: 0.0032 - tp: 179.0000 - fp: 25.0000 - tn: 145544.0000 - fn: 72.0000 - accuracy: 0.9993 - precision: 0.8775 - recall: 0.7131 - auc: 0.9414 - prc: 0.7736 - val_loss: 0.0034 - val_tp: 51.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8500 - val_recall: 0.7969 - val_auc: 0.9212 - val_prc: 0.8005 Epoch 8/9 4557/4557 [==============================] - 8s 2ms/step - loss: 0.0031 - tp: 183.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 68.0000 - accuracy: 0.9994 - precision: 0.8883 - recall: 0.7291 - auc: 0.9394 - prc: 0.7852 - val_loss: 0.0031 - val_tp: 51.0000 - val_fp: 7.0000 - val_tn: 36385.0000 - val_fn: 13.0000 - val_accuracy: 0.9995 - val_precision: 0.8793 - val_recall: 0.7969 - val_auc: 0.9214 - val_prc: 0.8150 Epoch 9/9 4557/4557 [==============================] - 9s 2ms/step - loss: 0.0031 - tp: 182.0000 - fp: 23.0000 - tn: 145546.0000 - fn: 69.0000 - accuracy: 0.9994 - precision: 0.8878 - recall: 0.7251 - auc: 0.9395 - prc: 0.7932 - val_loss: 0.0033 - val_tp: 51.0000 - val_fp: 9.0000 - val_tn: 36383.0000 - val_fn: 13.0000 - val_accuracy: 0.9994 - val_precision: 0.8500 - val_recall: 0.7969 - val_auc: 0.9209 - val_prc: 0.81372.0 - ETA: 0s - loss: 0.0029 - tp: 162.0000 - fp: 20.0000 - tn: 129105.0000 - fn: 57.0000 - accuracy: 0.9994 - precision: 0.8901 - recall: 0.7397 - a - ETA: 0s - loss: 0.0029 - tp: 165.0000 - fp: 20.0000 - tn: 131757.0000 - fn: 58.0000 - accuracy: 0.9994 - precision: 0.8919 - recall: 0.7399 - auc: 0.9410 - prc: 0.81 - ETA: 0s - loss: 0.0029 - tp: 165.0000 - fp: 20.0000 - tn: 131789.0000 - fn: 58.0000 - accuracy: 0.9994 - precision: 0.8919 - recall
<tensorflow.python.keras.callbacks.History at 0x7fcd64de65d0>
# Model evaluation
eval_result = hypermodel.evaluate(Xtest, ytest)
print(eval_result)
1781/1781 [==============================] - 3s 1ms/step - loss: 0.0034 - tp: 73.0000 - fp: 13.0000 - tn: 56851.0000 - fn: 25.0000 - accuracy: 0.9993 - precision: 0.8488 - recall: 0.7449 - auc: 0.9327 - prc: 0.7961 [0.0033857824746519327, 73.0, 13.0, 56851.0, 25.0, 0.9993329048156738, 0.8488371968269348, 0.7448979616165161, 0.9326589107513428, 0.7960590720176697]
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
'{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 7 min 51 secs