import time

time_start_notebook = time.time()


import numpy as np
import pandas as pd
import seaborn as sns
import os,sys,time
import matplotlib.pyplot as plt
sns.set()
import joblib

from tqdm import tqdm_notebook as tqdm

SEED = 100
pd.set_option('max_columns',100)
pd.set_option('max_colwidth',200)
pd.set_option('plotting.backend','matplotlib') # matplotlib, bokeh, altair, plotly

# special
import pycaret

%load_ext watermark
%watermark -iv

pandas    : 1.1.5
autopep8  : 1.5.4
sys       : 3.8.5 (default, Sep  4 2020, 02:22:02) 
[Clang 10.0.0 ]
json      : 2.0.9
joblib    : 1.0.0
matplotlib: 3.2.2
pycaret   : 2.2.2
seaborn   : 0.11.0
numpy     : 1.19.4


%%capture
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    # usual imports
    !pip install watermark
    !pip install scikit-plot

    # HPO for pycaret
    !pip install tune-sklearn
    !pip install optuna # hyperopt is already in colab

    # gpu version of lightgbm for pycaret
    !pip uninstall lightgbm -y
    !pip install lightgbm --install-option=--gpu --install-option="--opencl-include-dir=/usr/local/cuda/include/" --install-option="--opencl-library=/usr/local/cuda/lib64/libOpenCL.so"

    # regular pycaret without gpu 
    !pip install pycaret-nightly[full]

    # ipywidget
    !pip install ipywidgets
    !jupyter nbextension enable --py widgetsnbextension

    from pycaret.utils import enable_colab
    enable_colab()

    print('Environment: Google Colab')


def show_methods(obj, ncols=4,contains=None):
    lst = [i for i in dir(obj) if i[0]!='_' ]
    if contains is not None:
        lst = [i for i in lst if contains in i]
    df = pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
    return df


def compare_new_models(name,desc,mean_row,ofile,
                       df_eval=None,sort='Recall',show=True):
    """Create dataframe from output of pycaret new model.
    Parameters
    -----------
    name: str
        Name of the model. eg. xgboost
    desc: str
        Description of the model. e.g tuned,calibrated
    mean_arr: np.ndarray
        The mean row.
        e.g. 
        df_res = pyc.pull()
        mean_row = df_res.loc['Mean']
    ofile: str
        Output file name. e.g. 'pycaret_df_eval_lr.csv'
    df_eval: Pandas Dataframe
        Template pandas dataframe
    sort: str
       One of following string: Accuracy, AUC, Recall, Precision, F1, Kappa

    Returns:
       Pandas Dataframe.

    """
    
    if not isinstance(df_eval, pd.DataFrame):
        df_eval = pd.DataFrame({'Model': [],
                                'Description':[],
                                'Accuracy':[],
                                'AUC':[],
                                'Recall':[],
                                'Precision':[],
                                'F1':[],
                                'Kappa':[],
                                'MCC': [],
                                'LogLoss': []
                               })

    acc,auc,rec,pre,f1,kap,mcc,logloss = mean_row
    row = [name,desc,acc,auc,rec,pre,f1,kap,mcc,logloss]

    df_eval.loc[len(df_eval)] = row
    df_eval = df_eval.drop_duplicates()\
                     .sort_values(sort,ascending=False)
    df_eval.index = range(len(df_eval))
    
    df_style = (df_eval.style.apply(lambda ser:
                ['background: tomato'
                 if ser.name == sort else ''
                 for _ in ser]))

    if show:
        display(df_style)

    # save the data
    df_eval.to_csv(ofile,index=False)
    
    return df_eval


path_data_train = '../data/processed/train_cleaned.csv'
path_data_test = '../data/processed/test_cleaned.csv'

if ENV_COLAB:
    path_data_train = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/processed/train_cleaned.csv'
    path_data_test = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/processed/test_cleaned.csv'


df_train = pd.read_csv(path_data_train)
df_test = pd.read_csv(path_data_test)

print(df_train.shape)
print(df_test.shape)
df_train.head(2).append(df_train.tail(2))

(5634, 39)
(1409, 39)


path_data_test_raw = ('https://raw.githubusercontent.com/'
                      'bhishanpdl/Datasets/master/Projects/'
                      'Telco_Customer_Churn/raw/test.csv')
                      
df_test_raw1 = pd.read_csv(path_data_test_raw,usecols=['customerID'])
df_test_raw1.head(2)


ser_test_ids = df_test_raw1['customerID']
target_name = 'Churn'


# check for nans
df_train.isna().sum().sum()

0


import pycaret
import pycaret.classification as pyc


df_train.shape

(5634, 39)


show_methods(pyc)


USE_GPU = False
if ENV_COLAB:
    USE_GPU = True


exp = pyc.setup(df_train,target_name,
                train_size=0.8,
                session_id=SEED,
                use_gpu=USE_GPU,
                preprocess = True,
                categorical_features = None,
                ordinal_features = None,
                high_cardinality_features = None,
                numeric_features = None,
                date_features = None,
                ignore_features = None,
                normalize = False,
                data_split_stratify = True,
                silent=True,
                profile=False,
                log_experiment=False
                )

# use silent = True to check inferred datatypes
# then assign numeric and categorical features yourself.
#
# if sampling = False, 100% of data is used and its too slow
# if sampling = True, we need to enter number eg. 0.3 ourself.

"""
Here, we have data < 25k rows, so I have chosen not to use sampling.

""";


pyc.models(internal=True)[['Name', 'GPU Enabled']]


# pyc.compare_models?


pyc.get_metrics().index

Index(['acc', 'auc', 'recall', 'precision', 'f1', 'kappa', 'mcc'], dtype='object', name='ID')


# add Log Loss metric in pycaret
from sklearn.metrics import log_loss
pyc.add_metric('logloss', 'LogLoss', log_loss, greater_is_better=False)

Name                                                        LogLoss
Display Name                                                LogLoss
Score Function                <function log_loss at 0x7fd475a038b0>
Scorer               make_scorer(log_loss, greater_is_better=False)
Target                                                         pred
Args                                                             {}
Greater is Better                                             False
Multiclass                                                     True
Custom                                                         True
Name: logloss, dtype: object


best = pyc.compare_models(sort = 'Recall',fold=5)


# pyc.get_logs() # only works if we set log expt as true


model_name = 'lr'
path_df_eval = 'pycaret_df_eval_lr.csv'

lr = pyc.create_model(model_name,verbose=False)
mean_row = pyc.pull().loc['Mean']
df_eval = compare_new_models('lr','default',mean_row,path_df_eval,
                             sort='Recall',df_eval=None)


model_name = 'xgboost'
xgb = pyc.create_model(model_name,verbose=False)
mean_row = pyc.pull().loc['Mean']
df_eval = compare_new_models(model_name,'default',mean_row,
                path_df_eval,sort='Recall',df_eval=df_eval)


model_name = 'lightgbm'
lgb = pyc.create_model(model_name,verbose=False)

mean_row = pyc.pull().loc['Mean']
df_eval = compare_new_models(model_name,'default',
            mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


nb = pyc.create_model('nb',verbose=False)

mean_row = pyc.pull().loc['Mean']
df_eval = compare_new_models('nb','default',
            mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


# model_tuned1 = pyc.tune_model(lr,fold=5,optimize='Recall',
#                           search_library='scikit-optimize',verbose=False)

# mean_row = pyc.pull().loc['Mean']
# df_eval = compare_new_models('lr','tuned,scikit-optimize',
#              mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


# model_tuned2 = pyc.tune_model(lr,fold=5,optimize='Recall',
#                           search_library='tune-sklearn',verbose=False)

# mean_row = pyc.pull().loc['Mean']
# df_eval = compare_new_models('lr','tuned,tune-sklearn',
#    mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


# %%capture
# # use capture, there are too many lines of
# # The `start_trial` operation took 0.67942214012146 seconds to complete, which may be a performance bottleneck.
# model_tuned3 = pyc.tune_model(lr,fold=5,optimize='Recall',
#             search_library='tune-sklearn',n_iter=100,
#                            verbose=False,tuner_verbose=False)

# mean_row = pyc.pull().loc['Mean']
# desc = 'tuned,tune-sklearn,n_iter=100'
# df_eval = compare_new_models('lr',desc,mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


# %%capture
# # optimize f1
# model_tuned4 = pyc.tune_model(lr,fold=5,optimize='F1',
#             search_library='tune-sklearn',n_iter=100, verbose=False)

# mean_row = pyc.pull().loc['Mean']
# desc = 'tuned,tune-sklearn,n_iter=100,optimize=F1'
# df_eval = compare_new_models('lr',desc,mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


# %%time
# model_tuned4 = pyc.tune_model(lr,fold=5,optimize='Recall',
#             search_library='tune-sklearn',
#             n_iter=500,
#             early_stopping='asha',
#             early_stopping_max_iters=10,
#             verbose=True)

# this did not improve recall or f1


# from sklearn.linear_model import LogisticRegression
# model_tuned5 = LogisticRegression(C=6.669, class_weight={}, dual=False, fit_intercept=True,
#                    intercept_scaling=1, l1_ratio=None, max_iter=1000,
#                    multi_class='auto', n_jobs=None, penalty='l2',
#                    random_state=100, solver='lbfgs', tol=0.0001, verbose=0,
#                    warm_start=False)


# # tune hyperparameters with custom_grid
# params_lr = {"C": [2.039,0],
#           "penalty": ["l1", "l2"]
#           }

# tuned_lr_custom = pyc.tune_model(lr, custom_grid = params_lr,verbose=False)
# mean_row = pyc.pull().loc['Mean']
# desc = 'tuned,custom_grid'
# df_eval = compare_new_models('lr',desc,mean_row,path_df_eval,sort='Recall',df_eval=df_eval)


# look at df_eval and choose best model
# df_eval


# model_best = lr_tuned3
# print(model_best)

"""
# this is good
LogisticRegression(C=2.235, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=100, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
                   

LogisticRegression(C=9.072, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=100, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

""";


# WARNING: DONT DO THIS!!
# from sklearn.linear_model import LogisticRegression

# model_best = LogisticRegression(C=2.235, class_weight='balanced', dual=False,
#                    fit_intercept=True, intercept_scaling=1, l1_ratio=None,
#                    max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
#                    random_state=100, solver='lbfgs', tol=0.0001, verbose=0,
#                    warm_start=False)

# This will NOT work in pyc.finalize(model_best)


odir = '.' if ENV_COLAB else '../models/'

path_model_best = odir+'pycaret_model_best_lr.joblib'

# joblib.dump(model_best, path_model_best)


model_best = joblib.load(path_model_best)
model_best

LogisticRegression(C=9.072, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=100, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


# AUC-ROC plot
pyc.plot_model(model_best, plot = 'auc')


# evaluate model (click on buttotns to see the plots)
pyc.evaluate_model(model_best)


# pyc.interpret_model?


# interpret_model: SHAP
# pyc.interpret_model(model_best)

# note: logistic regression is not supported
# supported: rf, xgboost, lightgbm, catboost, dt, et


# interpret model : Correlation
# pyc.interpret_model(model_best_lr,plot='correlation')


# interpret model : Reason
# pyc.interpret_model(model_best_lr,plot='reason',obervation=12)


# help(pyc.get_config)


df_train.shape, pyc.get_config('X_test').shape # we have ohe columns.

((5634, 39), (1127, 60))


# pyc.finalize_model?


model_final = pyc.finalize_model(model_best)
print(model_final)

LogisticRegression(C=9.072, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=100, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


# odir = '.' if ENV_COLAB else '../models/'
# path_model_final = odir+'pycaret_model_final_lr.joblib'

# joblib.dump(model_best, path_model_final)

# model_final = joblib.load(path_model_final)
# model_final


# finalizing does not take much time, we only can dump model_best,
# we dont need to dump model_final.


df_preds = pyc.predict_model(model_final,df_test)
df_preds.iloc[-5:,-5:]


ytest = df_preds[target_name].to_numpy().ravel()
yprobs = df_preds['Score'].to_numpy().ravel()
ypreds = df_preds['Label'].to_numpy().ravel()

yprobs2d = np.c_[1-yprobs,yprobs]


pred_name = 'pycaret_lr'
path_pred = f'../predictions/{pred_name}.csv'

df_preds_out = pd.DataFrame({'customerID': ser_test_ids})
df_preds_out[f'ypreds_{pred_name}'] = ypreds
df_preds_out[f'yprobs_{pred_name}'] = yprobs2d[:,1]

df_preds_out.to_csv(path_pred,index=False)

df_preds_out.head()


def model_eval_bin(model_name,ytest,ypreds,yprobs2d,show_plots=True):
    import sklearn.metrics as skmetrics
    import scikitplot.metrics as skpmetrics
    import os

    acc       = skmetrics.accuracy_score(ytest,ypreds)
    precision = skmetrics.precision_score(ytest,ypreds)
    recall    = skmetrics.recall_score(ytest,ypreds)
    f1        = skmetrics.f1_score(ytest,ypreds)
    auc       = skmetrics.roc_auc_score(ytest,ypreds)

    print(skmetrics.classification_report(ytest,ypreds))
    print(skmetrics.confusion_matrix(ytest,ypreds))

    df_res = pd.DataFrame({'Accuracy':[acc],
                          'Precision': [precision],
                          'Recall': [recall],
                          'F1-score': [f1],
                          'AUC': [auc]},index=[model_name])

    display(df_res.style.format("{:.4f}"))
    if not os.path.isdir('../outputs'):
        os.makedirs('../outputs')
    o = '.' if ENV_COLAB else '../outputs/'
    df_res.to_csv(o+f'model_{model_name}.csv',index=True)

    if show_plots:
        skpmetrics.plot_precision_recall(ytest,yprobs2d) # more focus on minority
        skpmetrics.plot_roc_curve(ytest,yprobs2d) # equal focus on both groups
        skpmetrics.plot_confusion_matrix(ytest,ypreds)

model_eval_bin('pycaret_lr',ytest,ypreds,yprobs2d,show_plots=True)

              precision    recall  f1-score   support

           0       0.91      0.72      0.80      1035
           1       0.51      0.81      0.62       374

    accuracy                           0.74      1409
   macro avg       0.71      0.76      0.71      1409
weighted avg       0.80      0.74      0.76      1409

[[743 292]
 [ 72 302]]


df_preds[target_name].value_counts()

0    1035
1     374
Name: Churn, dtype: int64


df_preds[target_name].value_counts(normalize=True)

0    0.734564
1    0.265436
Name: Churn, dtype: float64


time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))

Time taken to run whole notebook: 0 hr 2 min 14 secs

	Gender	Partner	Dependents	Tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Contract_Month-to-month	NoSeniorCitizen_Contract_Month-to-month	InternetService_Fiber optic	StreamingTV_NoInternetService	No_OB_DP_TS	TotalServices	SenCit_Dependents	Partner_Dependents	SenCit_Partner	SenCit_Contract	SenCit_TechSupport	SenCit_PayMeth	Contract_mean_totCharges	Contract_totCharges_diff	PayMeth_mean_monthCharges	PayMeth_monthCharges_diff	Tenure_cat
0	0	1	0	36	1	2	1	2	2	2	2	0	2	2	1	1	106.05	3834.40	1	0	0	1	1	2	0	1	1	2	2	1	3683.643192	150.756808	66.703657	39.346343	3
1	1	0	0	10	1	0	0	2	0	0	2	2	0	0	0	1	62.25	612.95	0	1	1	1	1	1	0	0	0	0	2	1	1370.923131	-757.973131	66.703657	-4.453657	0
5632	0	1	1	68	1	2	1	0	2	0	2	2	2	2	1	1	103.75	7039.45	1	0	0	1	1	2	1	2	1	2	2	1	3683.643192	3355.806808	66.703657	37.046343	5
5633	1	0	0	69	1	2	2	1	1	1	1	1	1	2	0	1	23.95	1713.10	1	0	1	0	1	7	0	0	0	2	1	1	3683.643192	-1970.543192	66.703657	-42.753657	5

	0	1	2	3
0	Any	calibrate_model	interpret_model	pull
1	Dict	compare_models	io	pycaret
2	Display	create_model	is_in_colab	remove_metric
3	List	deploy_model	load_config	save_config
4	MLUsecase	enable_colab	load_model	save_model
5	Optional	ensemble_model	models	set_config
6	Tuple	evaluate_model	np	setup
7	Union	finalize_model	optimize_threshold	stack_models
8	add_metric	get_config	pd	traceback
9	automl	get_logs	plot_model	tune_model
10	blend_models	get_metrics	predict_model	warnings

	Description	Value
0	session_id	100
1	Target	Churn
2	Target Type	Binary
3	Label Encoded	0: 0, 1: 1
4	Original Data	(5634, 39)
5	Missing Values	False
6	Numeric Features	6
7	Categorical Features	32
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(4507, 60)
12	Transformed Test Set	(1127, 60)
13	Shuffle Train-Test	True
14	Stratify Train-Test	True
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	f6db
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Clustering	False
45	Clustering Iteration	None
46	Polynomial Features	False
47	Polynomial Degree	None
48	Trignometry Features	False
49	Polynomial Threshold	None
50	Group Features	False
51	Feature Selection	False
52	Features Selection Threshold	None
53	Feature Interaction	False
54	Feature Ratio	False
55	Interaction Threshold	None
56	Fix Imbalance	False
57	Fix Imbalance Method	SMOTE

	Name	GPU Enabled
ID
lr	Logistic Regression	False
knn	K Neighbors Classifier	False
nb	Naive Bayes	False
dt	Decision Tree Classifier	False
svm	SVM - Linear Kernel	False
rbfsvm	SVM - Radial Kernel	False
gpc	Gaussian Process Classifier	False
mlp	MLP Classifier	False
ridge	Ridge Classifier	False
rf	Random Forest Classifier	False
qda	Quadratic Discriminant Analysis	False
ada	Ada Boost Classifier	False
gbc	Gradient Boosting Classifier	False
lda	Linear Discriminant Analysis	False
et	Extra Trees Classifier	False
xgboost	Extreme Gradient Boosting	False
lightgbm	Light Gradient Boosting Machine	False
catboost	CatBoost Classifier	False
Bagging	Bagging Classifier	False
Stacking	Stacking Classifier	False
Voting	Voting Classifier	False
CalibratedCV	Calibrated Classifier CV	False

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	LogLoss	TT (Sec)
nb	Naive Bayes	0.7069	0.8243	0.8345	0.4722	0.6027	0.3983	0.4389	10.1234	0.0160
qda	Quadratic Discriminant Analysis	0.6102	0.6244	0.6548	0.3697	0.4717	0.2001	0.2210	13.4642	0.0400
lda	Linear Discriminant Analysis	0.8107	0.8455	0.5493	0.6780	0.6067	0.4839	0.4887	6.5368	0.0380
gbc	Gradient Boosting Classifier	0.8123	0.8496	0.5418	0.6861	0.6053	0.4843	0.4903	6.4831	0.4300
ada	Ada Boost Classifier	0.8107	0.8498	0.5402	0.6840	0.6028	0.4808	0.4872	6.5368	0.1640
lr	Logistic Regression	0.8063	0.8507	0.5376	0.6684	0.5957	0.4703	0.4753	6.6900	0.6760
lightgbm	Light Gradient Boosting Machine	0.7959	0.8345	0.5326	0.6401	0.5808	0.4475	0.4512	7.0502	0.4180
catboost	CatBoost Classifier	0.8028	0.8437	0.5318	0.6612	0.5888	0.4611	0.4663	6.8127	2.6080
xgboost	Extreme Gradient Boosting	0.7854	0.8250	0.5301	0.6106	0.5672	0.4256	0.4276	7.4104	0.8620
rf	Random Forest Classifier	0.7972	0.8258	0.5109	0.6512	0.5723	0.4419	0.4477	7.0041	0.2960
et	Extra Trees Classifier	0.7779	0.8017	0.5076	0.5968	0.5483	0.4024	0.4048	7.6709	0.2800
dt	Decision Tree Classifier	0.7362	0.6644	0.5059	0.5054	0.5050	0.3254	0.3258	9.1116	0.0300
ridge	Ridge Classifier	0.8087	0.0000	0.5042	0.6940	0.5837	0.4635	0.4737	6.6057	0.0180
knn	K Neighbors Classifier	0.7752	0.7729	0.4624	0.5993	0.5216	0.3781	0.3837	7.7630	0.0440
svm	SVM - Linear Kernel	0.7307	0.0000	0.3277	0.5969	0.3813	0.2308	0.2614	9.3025	0.0640

Modelling Customer Churn using pycaret

Load the libraries

Useful Scripts

Load the Data

Data Processing

Modelling Pycaret

Pycaret Setup

Comparing Models

Create Models

Hyperparameter Tuning

Save Model After HPO

Model Evaluation (Validation) : plot_model and evaluate_model

Model Interpretation of Tree Methods (Validation)

Pycaret configs

Finalize model (Fit whole train data)

Model Evaluation on Test Data

Time Taken

	Model	Description	Accuracy	AUC	Recall	Precision	F1	Kappa	MCC	LogLoss
0	lr	default	0.805600	0.849100	0.531800	0.669900	0.592200	0.466900	0.472700	6.713100
1	xgboost	default	0.788600	0.826500	0.518500	0.621700	0.565100	0.427000	0.430300	7.303000

	Model	Description	Accuracy	AUC	Recall	Precision	F1	Kappa	MCC	LogLoss
0	nb	default	0.706000	0.824100	0.833600	0.471200	0.601600	0.396700	0.437300	10.154100
1	lr	default	0.805600	0.849100	0.531800	0.669900	0.592200	0.466900	0.472700	6.713100
2	lightgbm	default	0.792800	0.835600	0.527700	0.631300	0.574200	0.438900	0.442400	7.157400
3	xgboost	default	0.788600	0.826500	0.518500	0.621700	0.565100	0.427000	0.430300	7.303000

	PayMeth_monthCharges_diff	Tenure_cat	Label	Score
1404	16.585135	0	1	0.8140
1405	-19.764865	3	0	0.7794
1406	6.085135	1	1	0.7191
1407	30.735135	1	1	0.8455
1408	-2.787658	1	0	0.5197

	customerID	ypreds_pycaret_lr	yprobs_pycaret_lr
0	1794-HBQTJ	1	0.5445
1	0356-OBMAC	0	0.7947
2	4077-CROMM	1	0.7195
3	5442-PPTJY	0	0.9508
4	2333-KWEWW	0	0.9625