import time

time_start_notebook = time.time()


%%capture
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    # usual imports
    !pip install watermark
    !pip install scikit-plot

    # HPO
    !git clone https://github.com/thuijskens/scikit-hyperband.git
    sys.path.append('scikit-hyperband/hyperband')
    from search import HyperbandSearchCV

    # deep learning
    !pip install lrcurve


    print('Environment: Google Colab')


import numpy as np
import pandas as pd
import seaborn as sns
import os,sys,time
import matplotlib.pyplot as plt
sns.set()

from tqdm import tqdm_notebook as tqdm

SEED = 100
pd.set_option('max_columns',100)
pd.set_option('max_colwidth',200)
pd.set_option('plotting.backend','matplotlib') # matplotlib, bokeh, altair, plotly


import tensorflow as tf
import lrcurve
from lrcurve import KerasLearningCurve


%load_ext watermark
%watermark -iv

tensorflow 2.3.1
numpy      1.18.5
pandas     1.1.1
json       2.0.9
autopep8   1.5.2
seaborn    0.10.1


def show_methods(obj, ncols=4,contains=None):
    lst = [i for i in dir(obj) if i[0]!='_' ]
    if contains is not None:
        lst = [i for i in lst if contains in i]
    df = pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
    return df


def set_random_seed(seed):
    import os
    import random
    import numpy as np
    import tensorflow as tf
    
    os.environ['PYTHONHASHSEED']=str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)


path_data_train = '../data/raw/train.csv'
path_data_test = '../data/raw/test.csv'

if ENV_COLAB:
    path_data_train = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/train.csv'
    path_data_test = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/test.csv'


df_train = pd.read_csv(path_data_train)
df_test = pd.read_csv(path_data_test)

print(df_train.shape)
print(df_test.shape)
df_train.head(2).append(df_train.tail(2))

(5634, 21)
(1409, 21)


target_name = 'Churn'


def clean_data(dfx):
    dfx = dfx.copy()

    # from eda we see that gender has no effect
    cols_drop = ['customerID','gender']
    dfx = dfx.drop(cols_drop,axis=1)

    # impute 
    dfx['TotalCharges'] = pd.to_numeric(dfx['TotalCharges'],errors='coerce').fillna(0)

    # one hot encoding
    cols_cat = dfx.select_dtypes('object').columns
    dfx = pd.get_dummies(dfx,columns=cols_cat,drop_first=False)
    return dfx


df_Xtrain_full = clean_data(df_train.drop(target_name,axis=1))
df_Xtest  = clean_data(df_test.drop(target_name,axis=1))

ser_ytrain_full = df_train[target_name].map({'No':0,'Yes':1})
ser_ytest = df_test[target_name].map({'No':0,'Yes':1})

ytrain_full = np.array(ser_ytrain_full).ravel()
ytest = np.array(ser_ytest).ravel()


print(df_Xtrain_full.sum().sum(), df_Xtest.sum().sum(),
      df_Xtrain_full.isna().sum().sum(), df_Xtest.isna().sum().sum())

df_Xtrain_full.head()

13426940.15 3413079.1500000004 0 0


from sklearn.model_selection import train_test_split

df_Xtrain, df_Xvalid, ser_ytrain, ser_yvalid = train_test_split(
    df_Xtrain_full, ser_ytrain_full,
    test_size=0.2, random_state=SEED, stratify=ser_ytrain_full)

ytrain = ser_ytrain.to_numpy().ravel()
yvalid = ser_yvalid.to_numpy().ravel()


from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(df_Xtrain_full)

Xtrain_full = scaler.transform(df_Xtrain_full)
Xtest       = scaler.transform(df_Xtest)


# for imbalanced data we can use class_weight
neg,pos = np.bincount(ytrain_full)
weight_for_0 = 1.0 / neg
weight_for_1 = 1.0 / pos

class_weight = {0: weight_for_0, 1: weight_for_1}
print(class_weight)

{0: 0.00024160425223483932, 1: 0.0006688963210702341}


# for imbalanced data, bias_initializer help fast converging
bias_init_last_layer = np.log([pos/neg])
print(bias_init_last_layer)

[-1.01832801]


import keras
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization, Dropout
from keras.activations import relu, sigmoid
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.constraints import maxnorm
from sklearn.model_selection import StratifiedKFold

import sklearn
sklearn.metrics.SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_weighted'])


# show_methods(keras.metrics)


def build_fn(units, activation,n_feats):
    model = Sequential()
    for i, u in enumerate(units):
        if i == 0:
            model.add(Dense(u, kernel_initializer = 'he_uniform',
                            activation=activation,input_dim = n_feats))
            model.add(Dropout(0.1))
        else:
            model.add(Dense(u, kernel_initializer = 'he_uniform',
                            activation=activation))
            model.add(Dropout(0.1))

    # output layer
    model.add(Dense(1, kernel_initializer = 'glorot_uniform',
                    activation = 'sigmoid'))
 
    # compile
    model.compile(optimizer='Adamax',
                  loss='binary_crossentropy',
                  metrics=[keras.metrics.AUC(name='roc_auc')]
                  )
    return model


%%time

import warnings
warnings.simplefilter('ignore')

model = KerasClassifier(build_fn=build_fn, verbose=0)
n_feats     = df_Xtrain_full.shape[1]


units       = [(6, 3, 3), (10, 10), (45, 30, 15)]
batch_size  = [128, 256]
activations = ['sigmoid', 'relu']
epochs      = [30]

params_grid = dict(
    units      = units,
    activation = activations,
    batch_size = batch_size,
    epochs     = epochs,
    n_feats    = [n_feats]
    )


grid = GridSearchCV(model,params_grid,cv=5,n_jobs=1,scoring='roc_auc') 
# use n_jobs=1 to prevent memory leak

# grid = grid.fit(df_Xtrain_full, ytrain_full)
# best_params = grid.best_params_

best_params = """

{'activation': 'sigmoid',
 'batch_size': 128,
 'epochs': 30,
 'n_feats': 43,
 'units': (45, 30, 15)}
"""

CPU times: user 419 µs, sys: 4 µs, total: 423 µs
Wall time: 502 µs


model = build_fn(units=(45, 30, 15), activation='sigmoid',n_feats=n_feats)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 45)                1980      
_________________________________________________________________
dropout (Dropout)            (None, 45)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 30)                1380      
_________________________________________________________________
dropout_1 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 15)                465       
_________________________________________________________________
dropout_2 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 16        
=================================================================
Total params: 3,841
Trainable params: 3,841
Non-trainable params: 0
_________________________________________________________________


model.save('../models/keras_model.h5')


history = model.fit(
    df_Xtrain_full,ytrain_full,
    batch_size=128,
    epochs=30,
    verbose=0,
    callbacks=[],
    class_weight=class_weight
)


yprobs   = model.predict(df_Xtest).flatten()
yprobs2d = np.c_[1-yprobs,yprobs]
ypreds   = (yprobs.flatten()>0.5).astype(np.int8)


def model_eval_bin(model_name,ytest,ypreds,yprobs2d,show_plots=True):
    import sklearn.metrics as skmetrics
    import scikitplot.metrics as skpmetrics
    import os

    acc       = skmetrics.accuracy_score(ytest,ypreds)
    precision = skmetrics.precision_score(ytest,ypreds)
    recall    = skmetrics.recall_score(ytest,ypreds)
    f1        = skmetrics.f1_score(ytest,ypreds)
    auc       = skmetrics.roc_auc_score(ytest,ypreds)

    print(skmetrics.classification_report(ytest,ypreds))
    print(skmetrics.confusion_matrix(ytest,ypreds))

    df_res = pd.DataFrame({'Accuracy':[acc],
                          'Precision': [precision],
                          'Recall': [recall],
                          'F1-score': [f1],
                          'AUC': [auc]},index=[model_name])

    display(df_res.style.format("{:.4f}"))
    if not os.path.isdir('../outputs'):
        os.makedirs('../outputs')
    o = '.' if ENV_COLAB else '../outputs/'
    df_res.to_csv(o+f'model_{model_name}.csv',index=True)

    if show_plots:
        skpmetrics.plot_precision_recall(ytest,yprobs2d) # more focus on minority
        skpmetrics.plot_roc_curve(ytest,yprobs2d) # equal focus on both groups
        skpmetrics.plot_confusion_matrix(ytest,ypreds)

model_eval_bin('keras',ytest,ypreds,yprobs2d,show_plots=True)

              precision    recall  f1-score   support

           0       0.91      0.66      0.77      1035
           1       0.47      0.82      0.59       374

    accuracy                           0.70      1409
   macro avg       0.69      0.74      0.68      1409
weighted avg       0.79      0.70      0.72      1409

[[687 348]
 [ 69 305]]


def plot_keras_history(history,metric):
    plt.plot(history.history[metric]) 
    plt.plot(history.history['val_'+metric]) 
    plt.title(metric) 
    plt.ylabel(metric)
    plt.xlabel('epoch') 
    plt.legend(['train', 'test'], loc='upper left') 
    plt.show()


%%time

import tensorflow as tf
import lrcurve
from lrcurve import KerasLearningCurve
from keras.constraints import maxnorm

n_feats = df_Xtrain.shape[1]
METRIC = 'roc_auc' # use this name so that we can do sklearn gridsearch
n_rows = df_Xtrain.shape[0]


set_random_seed(SEED)

def get_model(n_rows,n_feats):
    model = Sequential()

    # first layer (input_dim)
    model.add(Dense(16, input_dim=n_feats,
                    activation='relu',
                    kernel_constraint=maxnorm(3)))
    model.add(Dropout(rate=0.2))

    # middle layers
    model.add(Dense(8, activation='relu',
                    kernel_constraint=maxnorm(3)))
    model.add(Dropout(rate=0.2))

    # last layer
    model.add(Dense(1, activation='sigmoid'))

    # compile the model
    lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
        0.001,
        decay_steps=(n_rows/32)*50,
        decay_rate=1,
        staircase=False)

    optimizer = tf.keras.optimizers.Adam(lr_schedule)
    model.compile(
        loss = "binary_crossentropy", 
        optimizer = optimizer, 
        metrics=[tf.keras.metrics.AUC(name=METRIC)]
        )
    
    return model


cb_early  = tf.keras.callbacks.EarlyStopping(monitor='val_'+METRIC,
                                             patience=50,
                                             restore_best_weights=True)
cb_lr = lrcurve.KerasLearningCurve()
callbacks = [
            #  cb_early,
             cb_lr]


model = get_model(n_rows=n_rows,n_feats=n_feats)

history = model.fit(df_Xtrain, ytrain,
                    validation_data=(df_Xvalid, yvalid),
                    epochs=100,
                    batch_size=64,
                    callbacks=callbacks,
                    class_weight=class_weight,
                    verbose=0
                    )

yprobs   = model.predict(df_Xtest).flatten()
yprobs2d = np.c_[1-yprobs,yprobs]
ypreds   = (yprobs.flatten()>0.5).astype(np.int8)

plot_keras_history(history,METRIC)
model_eval_bin('keras_2',ytest,ypreds,yprobs2d,show_plots=False)

              precision    recall  f1-score   support

           0       0.89      0.77      0.83      1035
           1       0.54      0.72      0.62       374

    accuracy                           0.76      1409
   macro avg       0.71      0.75      0.72      1409
weighted avg       0.79      0.76      0.77      1409

[[801 234]
 [103 271]]

CPU times: user 14.7 s, sys: 1.77 s, total: 16.4 s
Wall time: 11.3 s


time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))

Time taken to run whole notebook: 0 hr 0 min 19 secs

	customerID	gender	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
0	1621-YNCJH	Female	Yes	No	36	Yes	Yes	Fiber optic	Yes	Yes	Yes	Yes	No	Yes	Two year	Yes	Credit card (automatic)	106.05	3834.4	No
1	7143-BQIBA	Male	No	No	10	Yes	No	DSL	Yes	No	No	Yes	Yes	No	Month-to-month	No	Bank transfer (automatic)	62.25	612.95	No
5632	0862-PRCBS	Female	Yes	Yes	68	Yes	Yes	Fiber optic	No	Yes	No	Yes	Yes	Yes	Two year	Yes	Credit card (automatic)	103.75	7039.45	No
5633	4656-CAURT	Male	No	No	69	Yes	Yes	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Two year	No	Bank transfer (automatic)	23.95	1713.1	No

	SeniorCitizen	tenure	MonthlyCharges	TotalCharges	Partner_No	Partner_Yes	Dependents_No	Dependents_Yes	PhoneService_Yes	MultipleLines_No	MultipleLines_Yes	InternetService_DSL	InternetService_Fiber optic	InternetService_No	OnlineSecurity_No internet service	OnlineSecurity_Yes	OnlineBackup_No	OnlineBackup_No internet service	OnlineBackup_Yes	DeviceProtection_No	DeviceProtection_No internet service	DeviceProtection_Yes	TechSupport_No internet service	TechSupport_Yes	StreamingTV_No	StreamingTV_No internet service	StreamingTV_Yes	StreamingMovies_No	StreamingMovies_No internet service	StreamingMovies_Yes	Contract_Month-to-month	Contract_Two year	PaperlessBilling_No	PaperlessBilling_Yes	PaymentMethod_Bank transfer (automatic)	PaymentMethod_Credit card (automatic)	PaymentMethod_Mailed check
0	0	36	106.05	3834.40	0	1	1	0	1	0	1	0	1	0	0	1	0	0	1	0	0	1	0	1	1	0	0	0	0	1	0	1	0	1	0	1	0
1	0	10	62.25	612.95	1	0	1	0	1	1	0	1	0	0	0	1	1	0	0	1	0	0	0	1	0	0	1	1	0	0	1	0	1	0	1	0	0
2	0	25	19.15	477.60	0	1	0	1	1	1	0	0	0	1	1	0	0	1	0	0	1	0	1	0	0	1	0	0	1	0	1	0	0	1	0	0	1
3	0	7	20.00	137.60	1	0	1	0	1	1	0	0	0	1	1	0	0	1	0	0	1	0	1	0	0	1	0	0	1	0	1	0	1	0	1	0	0
4	1	24	20.30	459.95	1	0	1	0	1	1	0	0	0	1	1	0	0	1	0	0	1	0	1	0	0	1	0	0	1	0	1	0	0	1	0	0	1

Modelling Customer Churn using tensorflow and keras

Load the libraries

Colab¶

Useful Scripts

Load the Data

Data Processing

Train Validation Split

Scaling

Class Weights for Imbalanced Data

Modelling: Keras

Model Evaluation

Keras Sequential: Kernel Constraints

Time Taken