import time

time_start_notebook = time.time()


import numpy as np
import pandas as pd
import seaborn as sns
import os
from pathlib import Path

from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot') 

# random state
SEED = 0
RNG = np.random.RandomState(SEED)

home = os.path.expanduser('~')

[(x.__name__,x.__version__) for x in [np,pd,sns]]

/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

[('numpy', '1.18.4'), ('pandas', '1.0.3'), ('seaborn', '0.10.1')]


import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    !pip install ipywidgets
    !pip install pycaret
    !jupyter nbextension enable --py widgetsnbextension

    from pycaret.utils import enable_colab
    enable_colab()
    # set OMP_NUM_THREADS=1 for hpsklearn package
    #!export OMP_NUM_THREADS=1
    print('Environment: Google Colab')

Requirement already satisfied: ipywidgets in /usr/local/lib/python3.6/dist-packages (7.5.1)
Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets) (5.0.6)
Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets) (4.3.3)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets) (4.10.1)
Requirement already satisfied: ipython>=4.0.0; python_version >= "3.3" in /usr/local/lib/python3.6/dist-packages (from ipywidgets) (5.5.0)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets) (3.5.1)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets) (4.6.3)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets) (2.6.0)
Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets) (0.2.0)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.3.1->ipywidgets) (1.12.0)
Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.3.1->ipywidgets) (4.4.2)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.4)
Requirement already satisfied: tornado>=4.0 in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets) (4.5.3)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (47.1.1)
Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.8.1)
Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (2.1.3)
Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (1.0.18)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.7.5)
Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (4.8.0)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.6/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets) (5.2.2)
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.6/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (19.0.1)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.1)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.1.9)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != "win32"->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.6.0)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.11.2)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (5.6.1)
Requirement already satisfied: terminado>=0.3.3; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.3)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.1.1)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.6.0)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.3)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.4.2)
Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.4.4)
Requirement already satisfied: bleach in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (3.1.5)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.4)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (20.4)
Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.1)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.4.7)
Collecting pycaret
  Downloading https://files.pythonhosted.org/packages/c7/41/f7fa05b6ce3cb3096a35fb5ac6dc0f2bb23e8304f068618fb2501be0a562/pycaret-1.0.0-py3-none-any.whl (188kB)
     |████████████████████████████████| 194kB 10.0MB/s 
Collecting DateTime==4.3
  Downloading https://files.pythonhosted.org/packages/73/22/a5297f3a1f92468cc737f8ce7ba6e5f245fcfafeae810ba37bd1039ea01c/DateTime-4.3-py2.py3-none-any.whl (60kB)
     |████████████████████████████████| 61kB 7.1MB/s 
Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from pycaret) (0.10.1)
Requirement already satisfied: umap-learn in /usr/local/lib/python3.6/dist-packages (from pycaret) (0.4.3)
Requirement already satisfied: textblob in /usr/local/lib/python3.6/dist-packages (from pycaret) (0.15.3)
Collecting lightgbm==2.3.1
  Downloading https://files.pythonhosted.org/packages/0b/9d/ddcb2f43aca194987f1a99e27edf41cf9bc39ea750c3371c2a62698c509a/lightgbm-2.3.1-py2.py3-none-manylinux1_x86_64.whl (1.2MB)
     |████████████████████████████████| 1.2MB 18.3MB/s 
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from pycaret) (1.18.4)
Collecting shap==0.32.1
  Downloading https://files.pythonhosted.org/packages/57/43/08f152a59a1d60f0328b476bdd58c791498989981ab9c6d595ec5448a86a/shap-0.32.1.tar.gz (259kB)
     |████████████████████████████████| 266kB 44.8MB/s 
Requirement already satisfied: xgboost==0.90 in /usr/local/lib/python3.6/dist-packages (from pycaret) (0.90)
Collecting kmodes==0.10.1
  Downloading https://files.pythonhosted.org/packages/79/c0/f7d8a0eb41ac6f302b4bc100f91b6e0f2558425ccfefaa0ec0430f77ee97/kmodes-0.10.1-py2.py3-none-any.whl
Collecting yellowbrick==1.0.1
  Downloading https://files.pythonhosted.org/packages/d1/cf/6d6ab47c0759d246262f9bdb53e89be3814bf1774bc51fffff995f5859f9/yellowbrick-1.0.1-py3-none-any.whl (378kB)
     |████████████████████████████████| 389kB 43.1MB/s 
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from pycaret) (0.15.1)
Collecting pyod
  Downloading https://files.pythonhosted.org/packages/cf/68/99df05e5666248e9c10359457e2da1b89943f5ac96749ceb1c131001eb88/pyod-0.8.0.tar.gz (93kB)
     |████████████████████████████████| 102kB 11.0MB/s 
Requirement already satisfied: gensim in /usr/local/lib/python3.6/dist-packages (from pycaret) (3.6.0)
Collecting catboost==0.20.2
  Downloading https://files.pythonhosted.org/packages/97/c4/586923de4634f88a31fd1b4966e15707a912b98b6f4566651b5ef58f36b5/catboost-0.20.2-cp36-none-manylinux1_x86_64.whl (63.9MB)
     |████████████████████████████████| 63.9MB 71kB/s 
Collecting scikit-learn==0.22
  Downloading https://files.pythonhosted.org/packages/2e/d0/860c4f6a7027e00acff373d9f5327f4ae3ed5872234b3cbdd7bcb52e5eff/scikit_learn-0.22-cp36-cp36m-manylinux1_x86_64.whl (7.0MB)
     |████████████████████████████████| 7.0MB 15.5MB/s 
Requirement already satisfied: plotly==4.4.1 in /usr/local/lib/python3.6/dist-packages (from pycaret) (4.4.1)
Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from pycaret) (1.0.3)
Requirement already satisfied: mlxtend in /usr/local/lib/python3.6/dist-packages (from pycaret) (0.14.0)
Requirement already satisfied: wordcloud in /usr/local/lib/python3.6/dist-packages (from pycaret) (1.5.0)
Collecting datefinder==0.7.0
  Downloading https://files.pythonhosted.org/packages/16/2b/af8efaee30c0ba4238cb4d0645a07100d33d11d20a8783c443ed8b813eb9/datefinder-0.7.0-py2.py3-none-any.whl
Collecting cufflinks==0.17.0
  Downloading https://files.pythonhosted.org/packages/e3/79/1b8673b2723e02919307d558896dbcedcb46807c4e29acd25cfe43a36c8b/cufflinks-0.17.0.tar.gz (81kB)
     |████████████████████████████████| 81kB 10.7MB/s 
Requirement already satisfied: nltk in /usr/local/lib/python3.6/dist-packages (from pycaret) (3.2.5)
Collecting pandas-profiling==2.3.0
  Downloading https://files.pythonhosted.org/packages/2c/2f/aae19e2173c10a9bb7fee5f5cad35dbe53a393960fc91abc477dcc4661e8/pandas-profiling-2.3.0.tar.gz (127kB)
     |████████████████████████████████| 133kB 45.5MB/s 
Collecting awscli
  Downloading https://files.pythonhosted.org/packages/df/f6/f8b31c70ec7f9cce99a1ae361c4e3a159531291eb74475a6466f4d836294/awscli-1.18.70-py2.py3-none-any.whl (3.1MB)
     |████████████████████████████████| 3.1MB 46.0MB/s 
Requirement already satisfied: IPython in /usr/local/lib/python3.6/dist-packages (from pycaret) (5.5.0)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from pycaret) (3.2.1)
Requirement already satisfied: spacy in /usr/local/lib/python3.6/dist-packages (from pycaret) (2.2.4)
Requirement already satisfied: ipywidgets in /usr/local/lib/python3.6/dist-packages (from pycaret) (7.5.1)
Collecting pyLDAvis
  Downloading https://files.pythonhosted.org/packages/a5/3a/af82e070a8a96e13217c8f362f9a73e82d61ac8fff3a2561946a97f96266/pyLDAvis-2.1.2.tar.gz (1.6MB)
     |████████████████████████████████| 1.6MB 46.3MB/s 
Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from DateTime==4.3->pycaret) (2018.9)
Collecting zope.interface
  Downloading https://files.pythonhosted.org/packages/57/33/565274c28a11af60b7cfc0519d46bde4125fcd7d32ebc0a81b480d0e8da6/zope.interface-5.1.0-cp36-cp36m-manylinux2010_x86_64.whl (234kB)
     |████████████████████████████████| 235kB 52.0MB/s 
Requirement already satisfied: scipy>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from seaborn->pycaret) (1.4.1)
Requirement already satisfied: numba!=0.47,>=0.46 in /usr/local/lib/python3.6/dist-packages (from umap-learn->pycaret) (0.48.0)
Requirement already satisfied: tbb in /usr/local/lib/python3.6/dist-packages (from umap-learn->pycaret) (2020.0.133)
Requirement already satisfied: tqdm>4.25.0 in /usr/local/lib/python3.6/dist-packages (from shap==0.32.1->pycaret) (4.41.1)
Requirement already satisfied: cycler>=0.10.0 in /usr/local/lib/python3.6/dist-packages (from yellowbrick==1.0.1->pycaret) (0.10.0)
Collecting combo
  Downloading https://files.pythonhosted.org/packages/78/52/e880bd923eba122515307d29ab43c1c356bad60610c27bed2cdec25d0240/combo-0.1.0.tar.gz
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from pyod->pycaret) (1.12.0)
Collecting suod
  Downloading https://files.pythonhosted.org/packages/a1/87/9170cabe1b5e10a7d095c0e28f2e30e7c1886a13f063de85d3cfacc06f4b/suod-0.0.4.tar.gz (2.1MB)
     |████████████████████████████████| 2.1MB 35.3MB/s 
Requirement already satisfied: smart-open>=1.2.1 in /usr/local/lib/python3.6/dist-packages (from gensim->pycaret) (2.0.0)
Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (from catboost==0.20.2->pycaret) (0.10.1)
Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from plotly==4.4.1->pycaret) (1.3.3)
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->pycaret) (2.8.1)
Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from mlxtend->pycaret) (47.1.1)
Requirement already satisfied: pillow in /usr/local/lib/python3.6/dist-packages (from wordcloud->pycaret) (7.0.0)
Requirement already satisfied: regex>=2017.02.08 in /usr/local/lib/python3.6/dist-packages (from datefinder==0.7.0->pycaret) (2019.12.20)
Collecting chart-studio>=1.0.0
  Downloading https://files.pythonhosted.org/packages/ca/ce/330794a6b6ca4b9182c38fc69dd2a9cbff60fd49421cb8648ee5fee352dc/chart_studio-1.1.0-py3-none-any.whl (64kB)
     |████████████████████████████████| 71kB 10.1MB/s 
Requirement already satisfied: colorlover>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from cufflinks==0.17.0->pycaret) (0.3.0)
Requirement already satisfied: jinja2>=2.8 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.3.0->pycaret) (2.11.2)
Requirement already satisfied: missingno>=0.4.2 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.3.0->pycaret) (0.4.2)
Collecting htmlmin>=0.1.12
  Downloading https://files.pythonhosted.org/packages/b3/e7/fcd59e12169de19f0131ff2812077f964c6b960e7c09804d30a7bf2ab461/htmlmin-0.1.12.tar.gz
Collecting phik>=0.9.8
  Downloading https://files.pythonhosted.org/packages/01/5a/7ef1c04ce62cd72f900c06298dc2385840550d5c653a0dbc19109a5477e6/phik-0.10.0-py3-none-any.whl (599kB)
     |████████████████████████████████| 604kB 43.5MB/s 
Collecting confuse>=1.0.0
  Downloading https://files.pythonhosted.org/packages/f7/64/85dbcea372efee5cba13eaa10a3bfa7019b8fe0c3c8314d8e189116e477a/confuse-1.1.0.tar.gz
Requirement already satisfied: astropy in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.3.0->pycaret) (4.0.1.post1)
Collecting botocore==1.16.20
  Downloading https://files.pythonhosted.org/packages/e3/70/f11780c15bbad1ad369fb352838dd59c954d3753976500cf7ce4a9550e5d/botocore-1.16.20-py2.py3-none-any.whl (6.2MB)
     |████████████████████████████████| 6.2MB 40.2MB/s 
Collecting colorama<0.4.4,>=0.2.5; python_version != "3.4"
  Downloading https://files.pythonhosted.org/packages/c9/dc/45cdef1b4d119eb96316b3117e6d5708a08029992b2fee2c143c7a0a5cc5/colorama-0.4.3-py2.py3-none-any.whl
Collecting rsa<=3.5.0,>=3.1.2
  Downloading https://files.pythonhosted.org/packages/e1/ae/baedc9cb175552e95f3395c43055a6a5e125ae4d48a1d7a924baca83e92e/rsa-3.4.2-py2.py3-none-any.whl (46kB)
     |████████████████████████████████| 51kB 7.8MB/s 
Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from awscli->pycaret) (0.3.3)
Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from awscli->pycaret) (0.15.2)
Requirement already satisfied: PyYAML<5.4,>=3.10; python_version != "3.4" in /usr/local/lib/python3.6/dist-packages (from awscli->pycaret) (3.13)
Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (2.1.3)
Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (4.8.0)
Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (1.0.18)
Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (0.8.1)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (0.7.5)
Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (4.4.2)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from IPython->pycaret) (4.3.3)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->pycaret) (1.2.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->pycaret) (2.4.7)
Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (7.4.0)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (2.0.3)
Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (1.1.3)
Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (0.4.1)
Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (0.6.0)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (1.0.2)
Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (1.0.0)
Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (2.23.0)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (3.0.2)
Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy->pycaret) (1.0.2)
Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets->pycaret) (5.0.6)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets->pycaret) (3.5.1)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets->pycaret) (4.10.1)
Requirement already satisfied: wheel>=0.23.0 in /usr/local/lib/python3.6/dist-packages (from pyLDAvis->pycaret) (0.34.2)
Requirement already satisfied: numexpr in /usr/local/lib/python3.6/dist-packages (from pyLDAvis->pycaret) (2.7.1)
Requirement already satisfied: pytest in /usr/local/lib/python3.6/dist-packages (from pyLDAvis->pycaret) (3.6.4)
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from pyLDAvis->pycaret) (0.16.0)
Collecting funcy
  Downloading https://files.pythonhosted.org/packages/ce/4b/6ffa76544e46614123de31574ad95758c421aae391a1764921b8a81e1eae/funcy-1.14.tar.gz (548kB)
     |████████████████████████████████| 552kB 43.3MB/s 
Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba!=0.47,>=0.46->umap-learn->pycaret) (0.31.0)
Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from smart-open>=1.2.1->gensim->pycaret) (1.13.13)
Requirement already satisfied: boto in /usr/local/lib/python3.6/dist-packages (from smart-open>=1.2.1->gensim->pycaret) (2.49.0)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2>=2.8->pandas-profiling==2.3.0->pycaret) (1.1.1)
Requirement already satisfied: urllib3<1.26,>=1.20; python_version != "3.4" in /usr/local/lib/python3.6/dist-packages (from botocore==1.16.20->awscli->pycaret) (1.24.3)
Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from botocore==1.16.20->awscli->pycaret) (0.10.0)
Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<=3.5.0,>=3.1.2->awscli->pycaret) (0.4.8)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != "win32"->IPython->pycaret) (0.6.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->IPython->pycaret) (0.1.9)
Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->IPython->pycaret) (0.2.0)
Requirement already satisfied: importlib-metadata>=0.20; python_version < "3.8" in /usr/local/lib/python3.6/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy->pycaret) (1.6.0)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy->pycaret) (2.9)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy->pycaret) (2020.4.5.1)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy->pycaret) (3.0.4)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret) (2.6.0)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret) (4.6.3)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.6/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets->pycaret) (5.2.2)
Requirement already satisfied: tornado>=4.0 in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets->pycaret) (4.5.3)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets->pycaret) (5.3.4)
Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from pytest->pyLDAvis->pycaret) (8.3.0)
Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from pytest->pyLDAvis->pycaret) (19.3.0)
Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.6/dist-packages (from pytest->pyLDAvis->pycaret) (1.8.1)
Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.6/dist-packages (from pytest->pyLDAvis->pycaret) (0.7.1)
Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.6/dist-packages (from pytest->pyLDAvis->pycaret) (1.4.0)
Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata>=0.20; python_version < "3.8"->catalogue<1.1.0,>=0.0.7->spacy->pycaret) (3.1.0)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (5.6.1)
Requirement already satisfied: terminado>=0.3.3; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.8.3)
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.6/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets->pycaret) (19.0.1)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.8.4)
Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.4.4)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (1.4.2)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.6.0)
Requirement already satisfied: bleach in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (3.1.5)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.3)
Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.5.1)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (20.4)
Building wheels for collected packages: shap, pyod, cufflinks, pandas-profiling, pyLDAvis, combo, suod, htmlmin, confuse, funcy
  Building wheel for shap (setup.py) ... done
  Created wheel for shap: filename=shap-0.32.1-cp36-cp36m-linux_x86_64.whl size=376796 sha256=5d458b21962351a18650d9b9f1533a2827717f93975a2fed68c5cf147304945e
  Stored in directory: /root/.cache/pip/wheels/8e/b2/50/8fadb5a59789cb5bdeb01b800223be540651ae92915172050b
  Building wheel for pyod (setup.py) ... done
  Created wheel for pyod: filename=pyod-0.8.0-cp36-none-any.whl size=105563 sha256=fcba28d84f859960440f218894358511920338a4a3e3a45c4b3be3be4e0f89b6
  Stored in directory: /root/.cache/pip/wheels/ba/a6/81/2dd042e240090f3603a686b897d03402219a86e3f61bc71184
  Building wheel for cufflinks (setup.py) ... done
  Created wheel for cufflinks: filename=cufflinks-0.17.0-cp36-none-any.whl size=67744 sha256=2f20a9db3fa4d46805039093494e2b98fca2a9a842fd85362cfcaac3582ade19
  Stored in directory: /root/.cache/pip/wheels/44/d7/dc/e830ab00bc2dd3b2731295103baa070f8cbdda8891f71a7a8d
  Building wheel for pandas-profiling (setup.py) ... done
  Created wheel for pandas-profiling: filename=pandas_profiling-2.3.0-py2.py3-none-any.whl size=145035 sha256=58fc38bdcd2bd3d0bc353ca51a7b6c738f946fa2a9c22b04ab82d700afb8b4e2
  Stored in directory: /root/.cache/pip/wheels/ce/c7/f1/dbfef4848ebb048cb1d4a22d1ed0c62d8ff2523747235e19fe
  Building wheel for pyLDAvis (setup.py) ... done
  Created wheel for pyLDAvis: filename=pyLDAvis-2.1.2-py2.py3-none-any.whl size=97711 sha256=b85060fb6ec4882082c17074a4258d055416ec3767796721ff1374d5576944ca
  Stored in directory: /root/.cache/pip/wheels/98/71/24/513a99e58bb6b8465bae4d2d5e9dba8f0bef8179e3051ac414
  Building wheel for combo (setup.py) ... done
  Created wheel for combo: filename=combo-0.1.0-cp36-none-any.whl size=42043 sha256=56f1d5638ad40d56fb97f9cf62e16f172e7589a9f8e597f8a1b375adf5dffe14
  Stored in directory: /root/.cache/pip/wheels/00/fd/6c/8da495ef08ce61844a646df2423c2b8ecda377a89c90ecd88e
  Building wheel for suod (setup.py) ... done
  Created wheel for suod: filename=suod-0.0.4-cp36-none-any.whl size=2167157 sha256=a2f2b1c7b6afe93d7248c3184acf14d7e824e600bca2dfce82cb9acc40887a1a
  Stored in directory: /root/.cache/pip/wheels/57/55/e5/a4fca65bba231f6d0115059b589148774b41faea25b3f2aa27
  Building wheel for htmlmin (setup.py) ... done
  Created wheel for htmlmin: filename=htmlmin-0.1.12-cp36-none-any.whl size=27084 sha256=781b80818c6fa66867288423e247abbd83d154d285eebd80a60da24304f9a939
  Stored in directory: /root/.cache/pip/wheels/43/07/ac/7c5a9d708d65247ac1f94066cf1db075540b85716c30255459
  Building wheel for confuse (setup.py) ... done
  Created wheel for confuse: filename=confuse-1.1.0-cp36-none-any.whl size=17574 sha256=a8ae1caaa3e713feb7f30189ccc616a4831e81827c461d1e65e0097a86fa4a7d
  Stored in directory: /root/.cache/pip/wheels/f6/8b/23/41a1b516f6d8d4cc81f5bdb55394a47cdbe9659c53668d3c9e
  Building wheel for funcy (setup.py) ... done
  Created wheel for funcy: filename=funcy-1.14-py2.py3-none-any.whl size=32042 sha256=d93665021da1467ebba560ae3960301d1eea6ccebfd6a173f599aa08d515169b
  Stored in directory: /root/.cache/pip/wheels/20/5a/d8/1d875df03deae6f178dfdf70238cca33f948ef8a6f5209f2eb
Successfully built shap pyod cufflinks pandas-profiling pyLDAvis combo suod htmlmin confuse funcy
Installing collected packages: zope.interface, DateTime, scikit-learn, lightgbm, shap, kmodes, yellowbrick, combo, suod, pyod, catboost, datefinder, chart-studio, cufflinks, htmlmin, phik, confuse, pandas-profiling, botocore, colorama, rsa, awscli, funcy, pyLDAvis, pycaret
  Found existing installation: scikit-learn 0.22.2.post1
    Uninstalling scikit-learn-0.22.2.post1:
      Successfully uninstalled scikit-learn-0.22.2.post1
  Found existing installation: lightgbm 2.2.3
    Uninstalling lightgbm-2.2.3:
      Successfully uninstalled lightgbm-2.2.3
  Found existing installation: yellowbrick 0.9.1
    Uninstalling yellowbrick-0.9.1:
      Successfully uninstalled yellowbrick-0.9.1
  Found existing installation: cufflinks 0.17.3
    Uninstalling cufflinks-0.17.3:
      Successfully uninstalled cufflinks-0.17.3
  Found existing installation: pandas-profiling 1.4.1
    Uninstalling pandas-profiling-1.4.1:
      Successfully uninstalled pandas-profiling-1.4.1
  Found existing installation: botocore 1.16.13
    Uninstalling botocore-1.16.13:
      Successfully uninstalled botocore-1.16.13
  Found existing installation: rsa 4.0
    Uninstalling rsa-4.0:
      Successfully uninstalled rsa-4.0
Successfully installed DateTime-4.3 awscli-1.18.70 botocore-1.16.20 catboost-0.20.2 chart-studio-1.1.0 colorama-0.4.3 combo-0.1.0 confuse-1.1.0 cufflinks-0.17.0 datefinder-0.7.0 funcy-1.14 htmlmin-0.1.12 kmodes-0.10.1 lightgbm-2.3.1 pandas-profiling-2.3.0 phik-0.10.0 pyLDAvis-2.1.2 pycaret-1.0.0 pyod-0.8.0 rsa-3.4.2 scikit-learn-0.22 shap-0.32.1 suod-0.0.4 yellowbrick-1.0.1 zope.interface-5.1.0

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: OK
Colab mode activated.
Environment: Google Colab


import pycaret
from pycaret.utils import version
import pycaret.regression as pyc

version()

1.0.0


def compare_new_models(name,desc,mean_row,df_eval=None,sort='R2',show=True):
    """Create dataframe from output of pycaret new model.
    Parameters
    -----------
    name: str
        Name of the model. eg. xgboost
    desc: str
        Description of the model. e.g tuned,calibrated
    mean_row: str
        The line copied from jupyter notebook output from
        pycaret new model. Note that fields are separated
        with tabs.
        e.g.
             MAE	MSE	RMSE	R2	RMSLE	MAPE
        Mean	0.9992	0.9663	0.7214	0.8299	0.7679	0.7675
    df_eval: Pandas Dataframe
        Template pandas dataframe
    sort: str
       One of following string: MAE	MSE	RMSE	R2	RMSLE	MAPE
    
    
    Returns:
       Pandas Dataframe.
    
    
    """
    mean_row_lst = mean_row.split('\t')
    assert len(mean_row_lst) == 7
    
    if not isinstance(df_eval, pd.DataFrame):
        df_eval = pd.DataFrame({'Model': [],
                                'Description':[],
                                'MAE':[],
                                'MSE':[],
                                'RMSE':[],
                                'R2':[],
                                'RMSLE':[],
                                'MAPE':[]
                               })

    acc,auc,rec,pre,f1,kap = mean_row.split('\t')[1:]
    row = [name,desc,acc,auc,rec,pre,f1,kap]
    
    df_eval.loc[len(df_eval)] = row
    df_eval = df_eval.drop_duplicates()\
                     .sort_values(sort,ascending=False)
    df_eval.index = range(len(df_eval))
    
    df_style = (df_eval.style.apply(lambda ser:
                ['background: lightblue'
                 if ser.name == sort else ''
                 for _ in ser]))
    
    if show:
        display(df_style)
    
    return df_eval


ifile = 'https://github.com/bhishanpdl/Datasets/blob/master/King_County_Seattle_House_Price_Kaggle/kc_house_data.csv?raw=true'
df = pd.read_csv(ifile)
print(df.shape)
df.head()

(21613, 21)


#ifile = Path.home() / 'Datasets/king_county_house_price/kc_house_price.csv'
#df = pd.read_csv(ifile)
#print(df.shape)
#df.head()


target = 'price'
features = df.columns.drop(target)


from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(
    df,test_size=0.2, random_state=SEED, 
    )

print(df_train.shape)
df_train.head()

(17290, 21)


experiment_june01_2020 = pyc.setup(df_train,target,
                                  train_size=0.8,
                                  session_id=SEED,
                                  sampling= True, 
                                  silent=True,
                                   normalize = True,
                                   transformation = True,
                                   transform_target = True,
                                   remove_multicollinearity = False
                                 )

# use silent = True to check inferred datatypes
# then assign numeric and categorical features yourself.
#
# if sampling = False, 100% of data is used and its too slow
# if sampling = True, we need to enter number eg. 0.3 ourself.

"""
Here, we have data < 25k rows, so I dont have to sample data.

""";

 
Setup Succesfully Completed!


# pyc.compare_models?


pyc.compare_models(sort = 'R2',fold=5)


# pyc.create_model?


cb = pyc.create_model('catboost')


mean_row = 'Mean	64436.3106	1.432340e+10	119254.9145	0.8890	0.1661	0.1201'
df_eval = compare_new_models('cb','default',mean_row,df_eval=None,sort='R2')


lgb = pyc.create_model('lightgbm')


mean_row = 'Mean	66869.0314	1.562930e+10	124447.5552	0.8793	0.1703	0.1236'
df_eval = compare_new_models('lgb','default',mean_row,df_eval=df_eval,sort='R2')


# pyc.tune_model?


cb_tuned = pyc.tune_model('catboost',fold=5,optimize='r2',n_iter=10)


mean_row = 'Mean	63561.3715	1.424778e+10	119320.2978	0.8895	0.1651	0.1186'
df_eval = compare_new_models('cb_tuned','tuned, n_iter=10',
                             mean_row,df_eval=df_eval,sort='R2')


# pyc.plot_model?


# AUC-ROC plot
pyc.plot_model(lgb, plot = 'residuals')


# pyc.plot_model(lgb, plot = 'parameter')


pyc.plot_model(lgb, plot='feature')


# evaluate model
# catboost is not available
pyc.evaluate_model(lgb)


# pyc.ensemble_model?


dt = pyc.create_model('dt')


dt_bagged = pyc.ensemble_model(dt, n_estimators=50,method='Bagging')


# dt_bagged_tuned = pyc.tune_model('dt', ensemble=True,
#                       method='Bagging',fold=3, n_iter=10, optimize='R2')


# dt_boosted_tuned = pyc.tune_model('dt', ensemble=True, method='Boosting')


# pyc.blend_models?


# blend_soft = pyc.blend_models(estimator_list = [dt, lgb,cb], method = 'soft')


# blend_hard = pyc.blend_models(estimator_list = [dt, lgb,cb], method = 'hard')


# pyc.stack_models?


# stack_soft = pyc.stack_models(estimator_list = [dt, lgb,cb], method = 'soft')

# stack_soft2 = pyc.stack_models(estimator_list = [cb, lgb],
#                                           method = 'soft',
#                                          meta_model='dt')


# stack_hard = pyc.stack_models(estimator_list = [dt, lgb,cb], method = 'hard')


# stack_soft_plot = pyc.stack_models([dt,cb,lgb], plot=True)


df_eval


# interpret_model: SHAP
pyc.interpret_model(cb)


# interpret model : Correlation
pyc.interpret_model(xgb,plot='correlation')


final_cb = pyc.finalize_model(cb_tuned)
print(final_cb)

<catboost.core.CatBoostRegressor object at 0x7f96f2651438>


if 'google.colab' in sys.modules:
    h = ''
else:
    h = "../models/"


# pyc.save_model?


# save the model
pyc.save_model(final_cb, h + 'final_cb.pkl')

Transformation Pipeline and Model Succesfully Saved


# load model
final_cb = pyc.load_model(model_name= h+ 'final_cb.pkl')

Transformation Pipeline and Model Sucessfully Loaded


# save entire experiment
pyc.save_experiment( h + "experiment_june1_2020")

Experiment Succesfully Saved


saved_experiment = pyc.load_experiment( h+ 'experiment_june1_2020')


type(saved_experiment[0])

pandas.core.frame.DataFrame


df_test.iloc[:5,-5:]


df_preds.head(2)


df_preds = pyc.predict_model(final_cb,df_test)
df_preds.loc[:5,['price','Label']]


ytest = df_preds['price'].to_numpy().ravel()
ypreds = df_preds['Label'].to_numpy().ravel()


# metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error


from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score, cross_val_predict


def adjustedR2(rsquared,nrows,kcols):
    """
    Adjusted r-squared depends on number of rows and columns of Test data.

    It reduces the value of original r-squared value.
    """
    return rsquared- (kcols-1)/(nrows-kcols) * (1-rsquared)


df_test.iloc[:5,-5:]


# metrics
rmse_test = np.sqrt(mean_squared_error(ytest,ypreds)).round(3)
print('RMSE for Test = {:,.2f}'.format(rmse_test))

RMSE for Test = 120,620.81


r2_test = metrics.r2_score(ytest, ypreds)
ar2_test = adjustedR2(r2_test, df_test.shape[0], df_test.shape[1])
print('Adjusted R-Squared Value for Test = {:,.2f}'.format(ar2_test))

Adjusted R-Squared Value for Test = 0.89


time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))

Time taken to run whole notebook: 0 hr 42 min 18 secs

255

	id	date	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	condition	grade	sqft_above	sqft_basement	yr_built	yr_renovated	zipcode	lat	long	sqft_living15	sqft_lot15
0	7129300520	20141013T000000	221900.0	3	1.00	1180	5650	1.0	3	7	1180	0	1955	0	98178	47.5112	-122.257	1340	5650
1	6414100192	20141209T000000	538000.0	3	2.25	2570	7242	2.0	3	7	2170	400	1951	1991	98125	47.7210	-122.319	1690	7639
2	5631500400	20150225T000000	180000.0	2	1.00	770	10000	1.0	3	6	770	0	1933	0	98028	47.7379	-122.233	2720	8062
3	2487200875	20141209T000000	604000.0	4	3.00	1960	5000	1.0	5	7	1050	910	1965	0	98136	47.5208	-122.393	1360	5000
4	1954400510	20150218T000000	510000.0	3	2.00	1680	8080	1.0	3	8	1680	0	1987	0	98074	47.6168	-122.045	1800	7503

	id	date	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	condition	grade	sqft_above	sqft_basement	yr_built	zipcode	lat	long	sqft_living15	sqft_lot15
16000	2561340020	20140804T000000	325000.0	3	1.75	1780	11096	1.0	3	7	1210	570	1979	98074	47.6170	-122.051	1780	10640
11286	8598200070	20141208T000000	278000.0	2	2.50	1420	2229	2.0	3	7	1420	0	2004	98059	47.4871	-122.165	1500	2230
3201	6788200931	20140520T000000	710000.0	2	1.00	1790	4000	1.0	4	7	1040	750	1923	98112	47.6405	-122.301	1310	4000
11049	3023059012	20140910T000000	389900.0	4	1.00	1710	117176	1.5	4	6	1710	0	1942	98055	47.4497	-122.212	1940	12223
9716	5683500030	20150320T000000	489000.0	4	1.00	1150	5217	1.5	3	7	1150	0	1951	98115	47.6806	-122.287	1220	5217

	Model	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	CatBoost Regressor	64592.148200	14215470107.269501	119176.346600	0.889800	0.166400	0.120400
1	Light Gradient Boosting Machine	66884.589400	15796571244.814899	125633.597200	0.877500	0.170400	0.123400
2	Extra Trees Regressor	72483.485800	19587832615.993801	139872.280700	0.848900	0.182900	0.130900
3	Random Forest	73713.402000	20225171628.308701	142069.516200	0.844300	0.186300	0.133300
4	Support Vector Machine	77456.152700	20711579102.639599	143710.930500	0.840700	0.193200	0.143300
5	Extreme Gradient Boosting	78505.996300	20809170639.483101	144170.988700	0.838000	0.191900	0.142200
6	Gradient Boosting Regressor	78742.160500	21233123195.326099	145638.976400	0.835000	0.192100	0.142400
7	K Neighbors Regressor	99869.552900	35761671648.342400	188436.312100	0.727300	0.241700	0.177200
8	Bayesian Ridge	113296.754900	36925917071.032501	191915.037900	0.714600	0.262100	0.206300
9	Ridge Regression	113335.019500	37157024553.624603	192475.653000	0.713300	0.262100	0.206300
10	Linear Regression	113609.077900	37932739024.708801	194351.657600	0.708200	0.335700	0.206700
11	Huber Regressor	113545.891500	38144819354.328102	195063.454100	0.705000	0.262700	0.206400
12	Decision Tree	107171.103400	38934640299.095001	197226.399400	0.699000	0.267900	0.196700
13	Random Sample Consensus	114239.182000	40868722582.135300	201263.265100	0.687600	0.384600	0.207900
14	TheilSen Regressor	117588.755400	46524608614.504700	215377.382100	0.642400	0.270800	0.209200
15	AdaBoost Regressor	118076.890300	50041183354.384804	222864.169400	0.618500	0.270300	0.198700
16	Orthogonal Matching Pursuit	134421.230500	57902684676.156700	240201.416700	0.556300	0.300300	0.234700
17	Passive Aggressive Regressor	174304.380400	92722944394.307495	303319.552900	0.288900	0.399500	0.321200
18	Elastic Net	207392.100500	126957643149.589096	355698.992600	0.026900	0.489100	0.393600
19	Lasso Regression	220087.659000	138423064599.675507	371489.873300	-0.061800	0.526800	0.425500
20	Lasso Least Angle Regression	220087.659000	138423064599.675507	371489.873300	-0.061800	0.526800	0.425500
21	Least Angle Regression	268380.925300	13714221770108.257812	2206049.167500	-92.002900	0.409600	0.397700

	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	64481.1513	1.401212e+10	118372.8134	0.9064	0.1713	0.1239
1	69322.8428	1.735346e+10	131732.5457	0.8811	0.1745	0.1232
2	61543.9247	1.268745e+10	112638.5776	0.9185	0.1524	0.1103
3	62584.7682	1.588238e+10	126025.3140	0.8790	0.1639	0.1189
4	68926.9055	1.948512e+10	139589.0940	0.8496	0.1672	0.1220
5	61506.0550	1.074439e+10	103655.1665	0.8972	0.1726	0.1238
6	63893.8176	1.194844e+10	109308.9012	0.8992	0.1678	0.1224
7	67176.8396	1.360371e+10	116634.9234	0.9049	0.1642	0.1192
8	63541.8463	1.384954e+10	117684.0585	0.8819	0.1644	0.1190
9	61384.9547	1.366742e+10	116907.7507	0.8725	0.1628	0.1183
Mean	64436.3106	1.432340e+10	119254.9145	0.8890	0.1661	0.1201
SD	2869.5259	2.462738e+09	10083.0496	0.0191	0.0059	0.0039

	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	67190.6714	1.531570e+10	123756.6146	0.8977	0.1729	0.1261
1	72278.7791	1.872396e+10	136835.5286	0.8717	0.1775	0.1263
2	63815.7104	1.475640e+10	121475.9200	0.9053	0.1542	0.1130
3	66777.0431	1.714628e+10	130943.8113	0.8694	0.1724	0.1240
4	70686.8855	2.193133e+10	148092.3184	0.8307	0.1722	0.1259
5	63213.5374	1.043737e+10	102163.4636	0.9001	0.1764	0.1268
6	66075.9202	1.263149e+10	112389.9094	0.8935	0.1730	0.1269
7	68818.4552	1.507098e+10	122763.9120	0.8946	0.1676	0.1221
8	65920.3346	1.563569e+10	125042.7588	0.8666	0.1685	0.1218
9	63912.9769	1.464374e+10	121011.3148	0.8634	0.1682	0.1227
Mean	66869.0314	1.562930e+10	124447.5552	0.8793	0.1703	0.1236
SD	2840.8848	2.994665e+09	11920.6137	0.0219	0.0062	0.0040

Table of Contents

Data Description¶

Business Problem¶

Introduction to Pycaret¶

Imports¶

Useful Functions¶

Load the data¶

Train test split¶

Pycaret Setup¶

Comparing All Models¶

Create Models¶

Hyperparameter Tuning¶

Model Evaluation for Train data¶

Ensemble Modelling¶

Bagging¶

Boosting¶

Blending¶

Stacking¶

Model Interpretation¶

Finalize Model for Deployment¶

Model Persistence¶

Model Predictions¶

Model Evaluation for Test Data¶

Time taken¶

	Description	Value
0	session_id	100
1	Transform Target	True
2	Transform Target Method	box-cox
3	Original Data	(17290, 21)
4	Missing Values	False
5	Numeric Features	14
6	Categorical Features	5
7	Ordinal Features	False
8	High Cardinality Features	False
9	High Cardinality Method	None
10	Sampled Data	(17290, 21)
11	Transformed Train Set	(13832, 71)
12	Transformed Test Set	(3458, 71)
13	Numeric Imputer	mean
14	Categorical Imputer	constant
15	Normalize	True
16	Normalize Method	zscore
17	Transformation	True
18	Transformation Method	yeo-johnson
19	PCA	False
20	PCA Method	None
21	PCA Components	None
22	Ignore Low Variance	False
23	Combine Rare Levels	False
24	Rare Level Threshold	None
25	Numeric Binning	False
26	Remove Outliers	False
27	Outliers Threshold	None
28	Remove Multicollinearity	False
29	Multicollinearity Threshold	None
30	Clustering	False
31	Clustering Iteration	None
32	Polynomial Features	False
33	Polynomial Degree	None
34	Trignometry Features	False
35	Polynomial Threshold	None
36	Group Features	False
37	Feature Selection	False
38	Features Selection Threshold	None
39	Feature Interaction	False
40	Feature Ratio	False
41	Interaction Threshold	None

	Model	Description	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	catboost	default	64436.3106	1.432340e+10	119254.9145	0.8890	0.1661	0.1201
1	lda	default	66869.0314	1.562930e+10	124447.5552	0.8793	0.1703	0.1236

	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	65042.6877	1.446424e+10	120267.3716	0.9022	0.1708	0.1213
1	61716.2211	1.458440e+10	120765.8767	0.8984	0.1581	0.1137
2	63967.1302	1.540910e+10	124133.3831	0.8683	0.1693	0.1212
3	65408.9010	1.345048e+10	115976.2171	0.8973	0.1649	0.1196
4	61671.9178	1.333070e+10	115458.6407	0.8813	0.1625	0.1173
Mean	63561.3715	1.424778e+10	119320.2978	0.8895	0.1651	0.1186
SD	1596.6789	7.727283e+08	3232.5751	0.0128	0.0046	0.0028

	Model	Description	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	cb	tuned, n_iter=10	63561.3715	1.424778e+10	119320.2978	0.8895	0.1651	0.1186
1	catboost	default	64436.3106	1.432340e+10	119254.9145	0.8890	0.1661	0.1201
2	lda	default	66869.0314	1.562930e+10	124447.5552	0.8793	0.1703	0.1236

	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	105087.1521	3.879945e+10	196975.7545	0.7409	0.2605	0.1949
1	106371.4429	3.908185e+10	197691.3069	0.7323	0.2594	0.1886
2	112297.3970	5.007604e+10	223776.7740	0.6785	0.2653	0.1964
3	100977.1602	3.842983e+10	196035.2812	0.7073	0.2499	0.1843
4	107969.2799	4.279011e+10	206857.7128	0.6696	0.2717	0.1963
5	103326.1514	3.280744e+10	181128.2536	0.6860	0.2658	0.1945
6	109090.0738	3.727536e+10	193068.2729	0.6856	0.2749	0.2056
7	113866.2724	4.254017e+10	206252.6807	0.7026	0.2668	0.1975
8	106924.0672	4.184682e+10	204564.9625	0.6431	0.2624	0.1927
9	98262.3481	3.310170e+10	181938.7149	0.6911	0.2611	0.1893
Mean	106417.1345	3.967488e+10	198828.9714	0.6937	0.2638	0.1940
SD	4552.9865	4.791769e+09	11912.9418	0.0274	0.0066	0.0055

	MAE	MSE	RMSE	R2	RMSLE	MAPE
0	73742.1944	2.075128e+10	144053.0465	0.8615	0.1873	0.1356
1	77846.5950	2.269144e+10	150636.7914	0.8446	0.1900	0.1340
2	76008.9750	2.663546e+10	163203.7268	0.8290	0.1752	0.1256
3	72161.3691	2.121861e+10	145666.1037	0.8384	0.1850	0.1310
4	75454.4990	2.141405e+10	146335.3883	0.8347	0.1859	0.1334
5	70841.1055	1.525008e+10	123491.2076	0.8541	0.1906	0.1371
6	74280.1768	1.808723e+10	134488.7570	0.8474	0.1937	0.1389
7	75619.3137	1.900825e+10	137870.4195	0.8671	0.1861	0.1308
8	71494.9830	1.912422e+10	138290.3423	0.8369	0.1820	0.1302
9	70246.7668	1.798092e+10	134092.9485	0.8322	0.1859	0.1326
Mean	73769.5978	2.021615e+10	141812.8732	0.8446	0.1862	0.1329
SD	2385.4806	2.947925e+09	10259.7445	0.0122	0.0048	0.0036

	Object
0	Regression Setup Config
1	X_training Set
2	y_training Set
3	X_test Set
4	y_test Set
5	Transformation Pipeline
6	Target Inverse Transformer
7	Compare Models Score Grid
8	Tuned <catboost.core.CatBoostRegressor object ...
9	Tuned <catboost.core.CatBoostRegressor object ...
10	Decision Tree Regressor
11	Decision Tree Regressor Score Grid
12	BaggingRegressor
13	BaggingRegressor Score Grid
14	Final <catboost.core.CatBoostRegressor object ...
15	Final <catboost.core.CatBoostRegressor object ...
16	Final <catboost.core.CatBoostRegressor object ...

	id	date	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	waterfront	view	condition	grade	sqft_above	sqft_basement	yr_built	yr_renovated	zipcode	lat	long	sqft_living15	sqft_lot15	Label
0	8669180390	20140604T000000	285000.0	3	2.5	2437	5136	2.0	0	0	3	7	2437	0	2011	0	98002	47.3517	-122.210	2437	4614	323995.4558
1	3750603471	20150327T000000	239950.0	3	2.5	1560	4800	2.0	0	0	4	7	1560	0	1974	0	98001	47.2653	-122.285	1510	12240	226057.2445

	zipcode	lat	long	sqft_living15	sqft_lot15
19836	98002	47.3517	-122.210	2437	4614
10442	98001	47.2653	-122.285	1510	12240
20548	98058	47.4517	-122.084	1720	44866
11014	98103	47.6915	-122.348	1300	5100
4138	98122	47.6053	-122.306	1740	1883

	price	Label
0	285000.0	323995.4558
1	239950.0	226057.2445
2	460000.0	499764.8922
3	397500.0	520474.9235
4	545000.0	567107.6546
5	369950.0	346977.2257