import time

time_start_notebook = time.time()


%%capture
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    # usual imports
    !pip install watermark
    !pip install scikit-plot

    # HPO
    !git clone https://github.com/thuijskens/scikit-hyperband.git
    sys.path.append('scikit-hyperband/hyperband')

    # update modules
    !pip uninstall xgboost
    !pip install -U xgboost

    print('Environment: Google Colab')


sys.path.append('../utils/')
from hyperband_search import HyperbandSearchCV


import numpy as np
import pandas as pd
import os,sys,time
import joblib
from tqdm import tqdm_notebook as tqdm

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly_express as px

# machine learning
from sklearn.preprocessing import OneHotEncoder
import imblearn
from imblearn.over_sampling import SMOTE
import sklearn.metrics as skmetrics

# warnings
import warnings
from sklearn.exceptions import ConvergenceWarning
from scipy.optimize.linesearch import LineSearchWarning
warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter("ignore", category=ConvergenceWarning)
warnings.simplefilter('ignore', category=LineSearchWarning)

# settings
sns.set()
SEED = 100
pd.set_option('max_columns',100)
pd.set_option('max_colwidth',200)
pd.set_option('plotting.backend','matplotlib') # matplotlib, bokeh, altair, plotly

%matplotlib inline
%load_ext watermark
%watermark -iv

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
seaborn        0.11.0
imblearn       0.7.0
autopep8       1.5.2
plotly_express 0.4.1
numpy          1.19.4
json           2.0.9
pandas         1.1.4
joblib         0.17.0
sklearn        0.23.1


def show_methods(obj, ncols=4,contains=None):
    lst = [i for i in dir(obj) if i[0]!='_' ]
    if contains is not None:
        lst = [i for i in lst if contains in i]
    df = pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
    return df


path_data_train = '../data/raw/train.csv'
path_data_test = '../data/raw/test.csv'

if ENV_COLAB:
    path_data_train = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/train.csv'
    path_data_test = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/test.csv'


df_train = pd.read_csv(path_data_train)
df_test = pd.read_csv(path_data_test)

print(df_train.shape)
print(df_test.shape)
df_train.head(2).append(df_train.tail(2))

(5634, 21)
(1409, 21)


ser_test_ids = df_test['customerID']


target_name = 'Churn'


import plotly_express as px


px.histogram(df_train, x=target_name,height=300,width=300)


px.histogram(df_train, x='gender', color=target_name,height=300,width=300)


def clean_data(dfx):
    dfx = dfx.copy()

    # from eda we see that gender has no effect
    cols_drop = ['customerID','gender']
    dfx = dfx.drop(cols_drop,axis=1)

    # replace values
    dic_replace = [
        {'SeniorCitizen': {0:'No', 1:'Yes'}},
        {'MultipleLines': {'No phone service':'N/A'}},
        {'SeniorCitizen': {'No':'Not_SenCit', 'Yes':'SeniorCitizen'}},
        {'Partner': {'No':'No_Partner', 'Yes':'Partner'}},
        {'Dependents': {'No':'No_Dependents', 'Yes':'Dependents'}},
        {'PaperlessBilling': {'No':'No_PaperlessBill', 'Yes':'PaperlessBill'}},
        {'PhoneService': {'No':'No_PhoneService', 'Yes':'PhoneService'}},
        {'MultipleLines': {'No':'No_MultiLines', 'Yes':'MultiLines', 'N/A': 'No_PhoneService'}},
        {'InternetService': {'No':'No_internet_service'}},
        {'OnlineSecurity': {'No':'No_OnlineSecurity', 'Yes':'OnlineSecurity'}},
        {'OnlineBackup': {'No':'No_OnlineBackup', 'Yes':'OnlineBackup'}},
        {'DeviceProtection': {'No':'No_DeviceProtection', 'Yes':'DeviceProtection'}},
        {'TechSupport': {'No':'No_TechSupport', 'Yes':'TechSupport'}},
        {'StreamingTV': {'No':'No_StreamingTV', 'Yes':'StreamingTV'}},
        {'StreamingMovies': {'No':'No_StreamingMov', 'Yes':'StreamingMov'}}
    ]
    for dic in dic_replace:
        dfx = dfx.replace(dic)

    # impute 
    dfx['TotalCharges'] = pd.to_numeric(dfx['TotalCharges'],errors='coerce').fillna(0)

    # sum of features
    dfx['SenCit_Dependents']  = dfx['SeniorCitizen'] + '_' + dfx['Dependents']
    dfx['SenCit_Partner']     = dfx['SeniorCitizen'] + '_' + dfx['Partner']
    dfx['SenCit_Contract']    = dfx['SeniorCitizen'] + '_' + dfx['Contract']
    dfx['SenCit_TechSupport'] = dfx['SeniorCitizen'] + '_' + dfx['TechSupport']
    dfx['SenCit_PayMeth']     = dfx['SeniorCitizen'] + '_' + dfx['PaymentMethod']
    dfx['Partner_Dependents'] = dfx['Partner']       + '_' + dfx['Dependents']

    # aggration features
    temp = (dfx.groupby('Contract')['TotalCharges'].agg(['mean'])
             .rename({'mean':'Contract_mean_totCharges'},axis=1))
    dfx = pd.merge(dfx, temp, on='Contract', how='left')

    dfx['Contract_totCharges_diff'] = (dfx['TotalCharges'] 
                                      - dfx['Contract_mean_totCharges'])

    temp = (dfx.groupby('PaymentMethod')['MonthlyCharges'].agg(['mean'])
             .rename({'mean':'PayMeth_mean_monthCharges'},axis=1))
    dfx = pd.merge(dfx, temp, on='PaymentMethod', how='left')

    dfx['PayMeth_monthCharges_diff'] = (dfx['MonthlyCharges'] 
                                       - dfx['PayMeth_mean_monthCharges'])

    multiLines_dict = {'No_PhoneService':0, 'No_MultiLines':1, 'MultiLines':2}
    dfx['MultipleLines_Ordinal'] = dfx['MultipleLines'].map(multiLines_dict)

    # Ordinal encoding of 'InternetService'
    intServ_dict = {'No_internet_service':0, 'DSL':1, 'Fiber_optic':2}
    dfx['InternetService_Ordinal'] = dfx['InternetService'].map(intServ_dict)

    # Ordinal encoding of 'Contract'
    contract_dict = {'Month-to-month':0, 'One_year':1, 'Two_year':2}
    dfx['Contract_Ordinal'] = dfx['Contract'].map(contract_dict)

    # Drop unnecessary columns that have been encoded
    ordinal_drop_cols = ['MultipleLines', 'InternetService', 'Contract']
    dfx.drop(ordinal_drop_cols, axis=1, inplace=True)

    # Apply one-hot encoder to the relevant columns
    cols_ohe = ['SeniorCitizen', 'Partner', 'Dependents', 
               'PaperlessBilling', 'PhoneService', 'OnlineSecurity', 
               'OnlineBackup', 'DeviceProtection', 'TechSupport',
               'StreamingTV', 'StreamingMovies', 'PaymentMethod',
               'SenCit_Dependents', 'Partner_Dependents', 'SenCit_Partner',
               'SenCit_Contract', 'SenCit_TechSupport', 'SenCit_PayMeth']

    enc_ohe = OneHotEncoder(handle_unknown='ignore', sparse=False)
    df_ohe = pd.DataFrame(enc_ohe.fit_transform(dfx[cols_ohe]))

    # Replace default column names with more descriptive ones
    df_ohe.columns = enc_ohe.get_feature_names(cols_ohe)

    # One-hot encoding removed index; put it back
    df_ohe.index = dfx.index

    # Remove categorical columns (will replace with one-hot encoding)
    dfx.drop(cols_ohe, axis=1, inplace=True)

    # Add one-hot encoded columns to numerical features
    dfx = pd.concat([dfx, df_ohe], axis=1)

    # remove columns
    cols_drop = ['InternetService_Ordinal','Contract_Ordinal']
    dfx = dfx.drop(cols_drop, axis=1)
    
    # remove white spaces from column names
    dfx = dfx.rename(columns=lambda x: x.strip())
    
    return dfx


df_train = clean_data(df_train)
df_test  = clean_data(df_test)


df_Xtrain  = df_train.drop(target_name,axis=1)
ser_ytrain = df_train[target_name].map({'No':0, 'Yes':1})

df_Xtest   = df_test.drop(target_name,axis=1)
ser_ytest  = df_test[target_name].map({'No':0, 'Yes':1})

ytrain = np.array(ser_ytrain).flatten()
ytest = np.array(ser_ytest).flatten()


df_Xtrain.sum().sum(), ser_ytrain.sum().sum()

(26621171.299999997, 1495)


df_Xtrain.head(2)


ser_ytrain.head(2)

0    0
1    0
Name: Churn, dtype: int64


from imblearn.over_sampling import SMOTE # smote needs sklearn 0.23.1
import sklearn
sklearn.__version__

'0.23.1'


smote = SMOTE(sampling_strategy=0.5, random_state=SEED)

df_Xtrain_smote, ser_ytrain_smote = smote.fit_resample(df_Xtrain,ser_ytrain)


# smote = SMOTE(ratio='minority', random_state=SEED)
# df_Xtrain_smote, ser_ytrain_smote = smote.fit_sample(df_Xtrain, ser_ytrain)


from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PowerTransformer


df_Xtrain.head()


df_Xtrain.columns[df_Xtrain.apply(pd.Series.nunique)>5]

Index(['tenure', 'MonthlyCharges', 'TotalCharges', 'Contract_totCharges_diff',
       'PayMeth_monthCharges_diff'],
      dtype='object')


# Define the columns we wish to transform
cols_scale = ['tenure', 'MonthlyCharges', 'TotalCharges',
              'Contract_totCharges_diff',
              'PayMeth_monthCharges_diff']

# Scale the relevant columns
transformer = ColumnTransformer([('yeo_johnson', PowerTransformer(), cols_scale)], 
                                remainder='passthrough')
transformer.fit(df_Xtrain)

df_Xtrain_scaled = pd.DataFrame(transformer.transform(df_Xtrain))
df_Xtest_scaled = pd.DataFrame(transformer.transform(df_Xtest))

#features
df_Xtrain_scaled.columns = df_Xtrain.columns
df_Xtest_scaled.columns = df_Xtest.columns


df_Xtrain_scaled.head()


df_Xtrain_scaled.sum().sum(), df_Xtest_scaled.sum().sum()

(13273998.150000809, 3374471.9234355474)


df_Xtrain_scaled.isna().sum().sum(), df_Xtest_scaled.isna().sum().sum()

(0, 0)


# Scale the relevant columns
transformer = ColumnTransformer([('yeo_johnson', PowerTransformer(), cols_scale)], 
                                remainder='passthrough')
transformer.fit(df_Xtrain_smote)

df_Xtrain_smote_scaled = pd.DataFrame(transformer.transform(df_Xtrain_smote))
df_Xtest_scaled = pd.DataFrame(transformer.transform(df_Xtest)) # Xtest is NEVER oversampled.

#features
df_Xtrain_smote_scaled.columns = df_Xtrain.columns
df_Xtest_scaled.columns = df_Xtest.columns


from sklearn.linear_model import LogisticRegression


# LogisticRegression?


model = LogisticRegression(random_state=SEED)
model.fit(df_Xtrain_smote_scaled, ser_ytrain_smote)
ypreds = model.predict(df_Xtest)
skmetrics.confusion_matrix(np.array(ser_ytest), ypreds)

array([[1035,    0],
       [ 374,    0]])


# help(HyperbandSearchCV)


X = df_Xtrain_smote_scaled
y = ser_ytrain_smote


X = df_Xtrain_scaled
y = ser_ytrain


show_methods(sklearn.exceptions)


%%time

import scipy.stats as stats
from sklearn import metrics
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score

import warnings
from scipy.optimize.linesearch import LineSearchWarning
from sklearn.exceptions import ConvergenceWarning

warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter("ignore", category=UserWarning)

warnings.simplefilter("ignore", category=ConvergenceWarning)
warnings.simplefilter('ignore', category=LineSearchWarning)

# Define our model
params_fixed = {'dual': False, 
                'random_state': SEED,
                'n_jobs': 1
               }

params_hyp = {
    'max_iter': stats.randint(200,500),
    'solver'  : ['lbfgs', 'sag', 'newton-cg'],
    'penalty' : ['l2'],
    'C'       : stats.loguniform(0.01, 10),
    }

# This gives
# [[384 651]
#  [ 55 319]]

# params_hyp = {
#     'max_iter'     : stats.randint(200,500),
#     'solver'       : ['saga'],  # newton-cg only support l2
#     'penalty'      : ['l1','l2','elasticnet'],
#     'C'            : stats.loguniform(0.01, 10),
#     'l1_ratio'     : stats.loguniform(0.01, 1), # must be <= 1.0
#     'class_weight' : [ None, 'balanced'],
#     }
# This gives much worse result
# array([[901, 134],
#        [182, 192]])
# # Wall time: 6min 30s

model = LogisticRegression(**params_fixed)

# Perform Hyperparameter Tuning
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=SEED)
grid = HyperbandSearchCV(model,params_hyp, 
                              resource_param = 'max_iter',
                              min_iter       = 200,
                              max_iter       = 1000,
                              cv             = cv, 
                              scoring        = 'roc_auc',
                              refit          = True,
                              verbose        = 0,
                              random_state   = SEED
                          )

# comment these after run
# grid.fit(X, y)
# print('Best parameters:  ', grid.best_params_)
# print('AUC - ROC score:  ', grid.best_score_)

# Wall time: 6min 30s

CPU times: user 2.23 ms, sys: 899 µs, total: 3.13 ms
Wall time: 3.49 ms


params_best = {'C': 0.42679058013626753, 'max_iter': 1000,
               'penalty': 'l2', 'solver': 'lbfgs'}


# params_best = grid.best_params_

params = params_fixed
params.update(params_best)

model = LogisticRegression(**params)


# Xtrain ==> smote + scaled
# Xtest ==> Xtest
model.fit(df_Xtrain_smote_scaled, ser_ytrain_smote)

ypreds_smote_scaled   = model.predict(df_Xtest)
yprobs2d_smote_scaled = model.predict_proba(df_Xtest)

skmetrics.confusion_matrix(ytest, ypreds_smote_scaled)

array([[384, 651],
       [ 55, 319]])


# Xtrain ==> orig
# Xtest ==> orig
model.fit(df_Xtrain, ser_ytrain)

ypreds_no_smote = model.predict(df_Xtest)
yprobs_no_smote = model.predict_proba(df_Xtest)

skmetrics.confusion_matrix(ytest, ypreds_no_smote)

array([[905, 130],
       [166, 208]])


# choose better predictions
ypreds   = ypreds_smote_scaled
yprobs2d = yprobs2d_smote_scaled

yprobs2d[:5]

array([[0.00000000e+00, 1.00000000e+00],
       [4.33068154e-02, 9.56693185e-01],
       [1.17439347e-09, 9.99999999e-01],
       [0.00000000e+00, 1.00000000e+00],
       [0.00000000e+00, 1.00000000e+00]])


pred_name = 'lr'
path_pred = f'../predictions/{pred_name}.csv'

df_preds = pd.DataFrame({'customerID': ser_test_ids})
df_preds[f'ypreds_{pred_name}'] = ypreds
df_preds[f'yprobs_{pred_name}'] = yprobs2d[:,1]

df_preds.to_csv(path_pred,index=False)

df_preds.head()


def model_eval_bin(model_name,ytest,ypreds,yprobs2d,show_plots=True):
    import sklearn.metrics as skmetrics
    import scikitplot.metrics as skpmetrics
    import os

    acc       = skmetrics.accuracy_score(ytest,ypreds)
    precision = skmetrics.precision_score(ytest,ypreds)
    recall    = skmetrics.recall_score(ytest,ypreds)
    f1        = skmetrics.f1_score(ytest,ypreds)
    auc       = skmetrics.roc_auc_score(ytest,ypreds)

    print(skmetrics.classification_report(ytest,ypreds))
    print(skmetrics.confusion_matrix(ytest,ypreds))

    df_res = pd.DataFrame({'Accuracy':[acc],
                          'Precision': [precision],
                          'Recall': [recall],
                          'F1-score': [f1],
                          'AUC': [auc]},index=[model_name])

    display(df_res.style.format("{:.4f}"))
    if not os.path.isdir('../outputs'):
        os.makedirs('../outputs')
    o = '.' if ENV_COLAB else '../outputs/'
    df_res.to_csv(o+f'model_{model_name}.csv',index=True)

    skpmetrics.plot_precision_recall(ytest,yprobs2d) # more focus on minority
    skpmetrics.plot_roc_curve(ytest,yprobs2d) # equal focus on both groups
    skpmetrics.plot_confusion_matrix(ytest,ypreds)

model_eval_bin('LR',ytest,ypreds,yprobs2d,show_plots=True)

              precision    recall  f1-score   support

           0       0.87      0.37      0.52      1035
           1       0.33      0.85      0.47       374

    accuracy                           0.50      1409
   macro avg       0.60      0.61      0.50      1409
weighted avg       0.73      0.50      0.51      1409

[[384 651]
 [ 55 319]]


time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))

Time taken to run whole notebook: 0 hr 0 min 9 secs

	0	1	2	3
0	ChangedBehaviorWarning	DataDimensionalityWarning	NonBLASDotWarning	SkipTestWarning
1	ConvergenceWarning	EfficiencyWarning	NotFittedError	UndefinedMetricWarning
2	DataConversionWarning	FitFailedWarning	PositiveSpectrumWarning

	customerID	ypreds_lr	yprobs_lr
0	1794-HBQTJ	1	1.000000
1	0356-OBMAC	1	0.956693
2	4077-CROMM	1	1.000000
3	5442-PPTJY	1	1.000000
4	2333-KWEWW	1	1.000000

Modelling Customer Churn using Logistic Regression and Hyperband

Load the libraries

Colab¶

Useful Scripts

Load the Data

Data Processing

Oversampling: SMOTE

Scaling Numerical Features (Yeo-Johnson)¶

Modelling: Logistic Regression

Hyperband SearchCV¶

Model Evaluation

Time Taken

	customerID	gender	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
0	1621-YNCJH	Female	Yes	No	36	Yes	Yes	Fiber optic	Yes	Yes	Yes	Yes	No	Yes	Two year	Yes	Credit card (automatic)	106.05	3834.4	No
1	7143-BQIBA	Male	No	No	10	Yes	No	DSL	Yes	No	No	Yes	Yes	No	Month-to-month	No	Bank transfer (automatic)	62.25	612.95	No
5632	0862-PRCBS	Female	Yes	Yes	68	Yes	Yes	Fiber optic	No	Yes	No	Yes	Yes	Yes	Two year	Yes	Credit card (automatic)	103.75	7039.45	No
5633	4656-CAURT	Male	No	No	69	Yes	Yes	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Two year	No	Bank transfer (automatic)	23.95	1713.1	No

	tenure	MonthlyCharges	TotalCharges	Contract_mean_totCharges	Contract_totCharges_diff	PayMeth_mean_monthCharges	PayMeth_monthCharges_diff	MultipleLines_Ordinal	SeniorCitizen_Not_SenCit	SeniorCitizen_SeniorCitizen	Partner_No_Partner	Partner_Partner	Dependents_Dependents	Dependents_No_Dependents	PaperlessBilling_No_PaperlessBill	PaperlessBilling_PaperlessBill	PhoneService_No_PhoneService	PhoneService_PhoneService	OnlineSecurity_No internet service	OnlineSecurity_No_OnlineSecurity	OnlineSecurity_OnlineSecurity	OnlineBackup_No internet service	OnlineBackup_No_OnlineBackup	OnlineBackup_OnlineBackup	DeviceProtection_DeviceProtection	DeviceProtection_No internet service	DeviceProtection_No_DeviceProtection	TechSupport_No internet service	TechSupport_No_TechSupport	TechSupport_TechSupport	StreamingTV_No internet service	StreamingTV_No_StreamingTV	StreamingTV_StreamingTV	StreamingMovies_No internet service	StreamingMovies_No_StreamingMov	StreamingMovies_StreamingMov	PaymentMethod_Bank transfer (automatic)	PaymentMethod_Credit card (automatic)	PaymentMethod_Electronic check	PaymentMethod_Mailed check	SenCit_Dependents_Not_SenCit_Dependents	SenCit_Dependents_Not_SenCit_No_Dependents	SenCit_Dependents_SeniorCitizen_Dependents	SenCit_Dependents_SeniorCitizen_No_Dependents	Partner_Dependents_No_Partner_Dependents	Partner_Dependents_No_Partner_No_Dependents	Partner_Dependents_Partner_Dependents	Partner_Dependents_Partner_No_Dependents	SenCit_Partner_Not_SenCit_No_Partner	SenCit_Partner_Not_SenCit_Partner	SenCit_Partner_SeniorCitizen_No_Partner	SenCit_Partner_SeniorCitizen_Partner	SenCit_Contract_Not_SenCit_Month-to-month	SenCit_Contract_Not_SenCit_One year	SenCit_Contract_Not_SenCit_Two year	SenCit_Contract_SeniorCitizen_Month-to-month	SenCit_Contract_SeniorCitizen_One year	SenCit_Contract_SeniorCitizen_Two year	SenCit_TechSupport_Not_SenCit_No internet service	SenCit_TechSupport_Not_SenCit_No_TechSupport	SenCit_TechSupport_Not_SenCit_TechSupport	SenCit_TechSupport_SeniorCitizen_No internet service	SenCit_TechSupport_SeniorCitizen_No_TechSupport	SenCit_TechSupport_SeniorCitizen_TechSupport	SenCit_PayMeth_Not_SenCit_Bank transfer (automatic)	SenCit_PayMeth_Not_SenCit_Credit card (automatic)	SenCit_PayMeth_Not_SenCit_Electronic check	SenCit_PayMeth_Not_SenCit_Mailed check	SenCit_PayMeth_SeniorCitizen_Bank transfer (automatic)	SenCit_PayMeth_SeniorCitizen_Credit card (automatic)	SenCit_PayMeth_SeniorCitizen_Electronic check	SenCit_PayMeth_SeniorCitizen_Mailed check
0	36	106.05	3834.40	3683.643192	150.756808	65.801934	40.248066	2	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0
1	10	62.25	612.95	1370.923131	-757.973131	67.564819	-5.314819	1	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	tenure	MonthlyCharges	TotalCharges	Contract_mean_totCharges	Contract_totCharges_diff	PayMeth_mean_monthCharges	PayMeth_monthCharges_diff	MultipleLines_Ordinal	SeniorCitizen_Not_SenCit	SeniorCitizen_SeniorCitizen	Partner_No_Partner	Partner_Partner	Dependents_Dependents	Dependents_No_Dependents	PaperlessBilling_No_PaperlessBill	PaperlessBilling_PaperlessBill	PhoneService_PhoneService	OnlineSecurity_No internet service	OnlineSecurity_OnlineSecurity	OnlineBackup_No internet service	OnlineBackup_No_OnlineBackup	OnlineBackup_OnlineBackup	DeviceProtection_DeviceProtection	DeviceProtection_No internet service	DeviceProtection_No_DeviceProtection	TechSupport_No internet service	TechSupport_TechSupport	StreamingTV_No internet service	StreamingTV_No_StreamingTV	StreamingTV_StreamingTV	StreamingMovies_No internet service	StreamingMovies_No_StreamingMov	StreamingMovies_StreamingMov	PaymentMethod_Bank transfer (automatic)	PaymentMethod_Credit card (automatic)	PaymentMethod_Mailed check	SenCit_Dependents_Not_SenCit_Dependents	SenCit_Dependents_Not_SenCit_No_Dependents	SenCit_Dependents_SeniorCitizen_No_Dependents	Partner_Dependents_No_Partner_No_Dependents	Partner_Dependents_Partner_Dependents	Partner_Dependents_Partner_No_Dependents	SenCit_Partner_Not_SenCit_No_Partner	SenCit_Partner_Not_SenCit_Partner	SenCit_Partner_SeniorCitizen_No_Partner	SenCit_Contract_Not_SenCit_Month-to-month	SenCit_Contract_Not_SenCit_Two year	SenCit_Contract_SeniorCitizen_Month-to-month	SenCit_TechSupport_Not_SenCit_No internet service	SenCit_TechSupport_Not_SenCit_TechSupport	SenCit_TechSupport_SeniorCitizen_No internet service	SenCit_PayMeth_Not_SenCit_Bank transfer (automatic)	SenCit_PayMeth_Not_SenCit_Credit card (automatic)	SenCit_PayMeth_Not_SenCit_Mailed check	SenCit_PayMeth_SeniorCitizen_Mailed check
0	0.367334	1.355904	0.883378	0.329235	1.553040	3683.643192	65.801934	2.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0
1	-0.786426	-0.056378	-0.528932	-0.235047	-0.268218	1370.923131	67.564819	1.0	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0
2	-0.034567	-1.531684	-0.674473	-0.329237	-0.901558	1370.923131	43.792328	1.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	1.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0
3	-0.999430	-1.501041	-1.274872	-0.568686	-1.626825	1370.923131	67.564819	1.0	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	1.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0
4	-0.075469	-1.490250	-0.695633	-0.341573	-0.864635	1370.923131	43.792328	1.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	1.0	1.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0