import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)

import os
import time

# matplotlib
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set(context='notebook', style='whitegrid', rc={'figure.figsize': (12,8)})
plt.style.use('ggplot') # better than sns styles.
matplotlib.rcParams['figure.figsize'] = 12,8

# random state
SEED=100
np.random.seed(SEED)

#============= pandas settings
# Jupyter notebook settings for pandas
#pd.set_option('display.float_format', '{:,.2g}'.format) # numbers sep by comma
np.set_printoptions(precision=3)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100) # None for all the rows
pd.set_option('display.max_colwidth', 200)

#========== ipython
import IPython
from IPython.display import display, HTML, Image, Markdown

# sklearn
import sklearn
from sklearn.preprocessing import StandardScaler

# timeseries
from datetime import date
import holidays

# prophet
import fbprophet
from fbprophet import Prophet
pd.plotting.register_matplotlib_converters() # prophet needs this
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.plot import plot_plotly
from fbprophet.diagnostics import cross_validation

# plotly
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.tools import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)

# versions
import watermark
%load_ext watermark
%watermark -a "Bhishan Poudel" -d -v -m
print()
%watermark -iv

Bhishan Poudel 2020-10-14 

CPython 3.7.9
IPython 7.18.1

compiler   : Clang 10.0.0 
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit

matplotlib 3.1.3
sklearn    0.22.1
pandas     1.1.1
plotly     4.11.0
holidays   0.10.3
json       2.0.9
watermark  2.0.2
seaborn    0.11.0
IPython    7.18.1
numpy      1.19.1
fbprophet  0.7.1


%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;


import sys
sys.path.append('../models')

import util_prophet
from util_prophet import get_mape, get_smape, get_ts_eval
from util_prophet import plot_actual_forecast_plotly, plot_deltas_plotly

df_eval = None


%%time

df = pd.read_csv('../data/train_1.csv.zip',compression='zip')
cond = df['Page'] == "Prince_(musician)_en.wikipedia.org_all-access_all-agents"
df = df.loc[cond]

df = df.filter(regex="Page|2016")
df = df.melt(id_vars=['Page'],var_name='ds',value_name='y').drop('Page',axis=1)
df['ds'] = pd.to_datetime(df['ds'])

print(df.shape)
df.head()

(366, 2)
CPU times: user 8.91 s, sys: 779 ms, total: 9.69 s
Wall time: 9.93 s


ts = df.set_index('ds')['y']
print(type(ts))
ts.head()

<class 'pandas.core.series.Series'>

ds
2016-01-01    20947.0
2016-01-02    19466.0
2016-01-03     8587.0
2016-01-04     7386.0
2016-01-05     7719.0
Name: y, dtype: float64


pd.plotting.register_matplotlib_converters() # prophet needs this
# ts.plot()


iplot([{'x': ts.index,'y': ts.to_numpy()} ])


df_eval = None


# prophet expects two columns: ds and y
df1 = df

print(df1.dtypes)
df1.head()

ds    datetime64[ns]
y            float64
dtype: object


m1 = Prophet()
m1.fit(df1);

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


# future
periods = 30

# after fitting model, we can create future dataframe
# which has no data but only next N periods index
future1 = m1.make_future_dataframe(periods=periods)

print('num periods  : {}'.format(periods))
print(f'df1 shape     : {df1.shape}')
print(f'future shape : {future1.shape}')

future1.head().append(future1.tail())

num periods  : 30
df1 shape     : (366, 2)
future shape : (396, 1)


# forecast
forecast1 = m1.predict(future1)

print(f'df       shape : {df.shape}')
print(f'periods        : {periods}')
print(f'future1  shape : {future1.shape}')
print(f'forecast shape : {forecast1.shape}')
forecast1.head(2).T

df       shape : (366, 2)
periods        : 30
future1  shape : (396, 1)
forecast shape : (439, 16)


forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(2)


fig1 = m1.plot(forecast1)
fig2 = m1.plot_components(forecast1)


plot_actual_forecast_plotly(df1, ['forecast1'],[forecast1])


# observation: this prediction only captures the trend
# problem    : the trend is going down and predicts negative values
#
# attempt:
# use cap and floor


num_preds = int(0.2*len(df))
N = len(df)
num_preds

73


ytest = df1[['y']].iloc[N-num_preds:N].to_numpy().ravel()
ypreds = forecast1[['yhat']].iloc[N-num_preds:N].to_numpy().ravel()

print(ytest.shape, ypreds.shape)
print(ytest[-5:],ypreds[-5:])

model_name = 'fbprophet'
desc = 'default'
df_eval = get_ts_eval(model_name, desc, ytest,ypreds,
                    df_eval=df_eval,show=True)

(73,) (73,)
[34560. 31090. 22827. 19956. 31446.] [-75230.577 -79920.429  29455.144  19393.979 -45574.696]


ypreds.min(), ypreds.max()

(-79920.42871391447, 82250.22810524894)


df1['y'].describe()

# median is about 10k

count    3.660000e+02
mean     6.261184e+04
std      4.388072e+05
min      6.634000e+03
25%      8.114500e+03
50%      9.642500e+03
75%      1.403350e+04
max      5.816910e+06
Name: y, dtype: float64


future1.head(2)


# set cap and floor 
df2 = df1.copy()
CAP = 15_000
df2['cap'] = CAP
df2['floor'] = CAP

# future
future2 = future1.copy()
future2['cap'] = CAP
future2['floor'] = CAP

# model
m2 = Prophet(growth='linear')
forecast2 = m2.fit(df2).predict(future2)

# plots
fig1 = m2.plot(forecast2)
fig2 = m2.plot_components(forecast2)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


plot_actual_forecast_plotly(df2, ['forecast2'],[forecast2])


# still I can see values below 0, there can not be views less than 0.
forecast2.head(2)


forecast2['yhat'].min(), forecast2['yhat'].max()

(-101038.4623935408, 212563.7355229822)


ytest = df1[['y']].iloc[N-num_preds:N].to_numpy().ravel()
ypreds = forecast1[['yhat']].iloc[N-num_preds:N].to_numpy().ravel()

model_name = 'fbprophet'
desc = 'before_cap_floor'
df_eval = get_ts_eval(model_name, desc, ytest,ypreds,
                    df_eval=df_eval,show=True)


forecast2.head(2).append(forecast2.tail(2)).iloc[:, [0,1,2,3,4,-1]]


forecast2.yhat.describe()

count       396.000000
mean      53746.447795
std       72551.923444
min     -101038.462394
25%        3308.088230
50%       55510.114209
75%      103260.752980
max      212563.735523
Name: yhat, dtype: float64


forecast2['yhat'].clip(0,CAP).describe()

count      396.000000
mean     10987.670455
std       6455.905153
min          0.000000
25%       3308.088230
50%      15000.000000
75%      15000.000000
max      15000.000000
Name: yhat, dtype: float64


# forecast2_copy = forecast2.copy()


forecast2['yhat'] = forecast2['yhat'].clip(0,CAP).to_numpy().ravel()

forecast2.yhat.describe()

count      396.000000
mean     10987.670455
std       6455.905153
min          0.000000
25%       3308.088230
50%      15000.000000
75%      15000.000000
max      15000.000000
Name: yhat, dtype: float64


ytest = df1[['y']].iloc[N-num_preds:N].to_numpy().ravel()
ypreds = forecast2[['yhat']].iloc[N-num_preds:N].to_numpy().ravel()

model_name = 'fbprophet'
desc = 'after_cap_floor'
df_eval = get_ts_eval(model_name, desc, ytest,ypreds,
                    df_eval=df_eval,show=True)


# observation: we improve smape
# problem    : we need to capture the seasonality
#
# attempt:
# use cap and floor


# keep default growth='linear'
# future2 has cap and floor

m3 = Prophet(growth='linear',
            daily_seasonality=True,
            weekly_seasonality=True,
            yearly_seasonality=True)

forecast3 = m3.fit(df2).predict(future2)

fig1 = m3.plot(forecast3)
fig2 = m3.plot_components(forecast3)


plot_actual_forecast_plotly(df2, ['forecast3'],[forecast3])


ytest = df1[['y']].iloc[N-num_preds:N].to_numpy().ravel()
ypreds = forecast3[['yhat']].iloc[N-num_preds:N].to_numpy().ravel()

model_name = 'fbprophet'
desc = 'seasonality_before_cap_floor'
df_eval = get_ts_eval(model_name, desc, ytest,ypreds,
                    df_eval=df_eval,show=True)


forecast3['yhat'] = forecast3['yhat'].clip(0,CAP).to_numpy().ravel()

ytest = df1[['y']].iloc[N-num_preds:N].to_numpy().ravel()
ypreds = forecast3[['yhat']].iloc[N-num_preds:N].to_numpy().ravel()

model_name = 'fbprophet'
desc = 'seasonality_after_cap_floor'
df_eval = get_ts_eval(model_name, desc, ytest,ypreds,
                    df_eval=df_eval,show=True)


from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m3, forecast3)
py.iplot(fig)

Parameter	Description
growth	linear’ or ‘logistic’ to specify a linear or logistic trend
changepoints	List of dates at which to include potential changepoints (automatic if not specified)
n_changepoints	If changepoints in not supplied, you may provide the number of changepoints to be automatically included
changepoint_prior_scale	Parameter for changing flexibility of automatic changepoint selection

Parameter	Description
yearly_seasonality	Fit yearly seasonality
weekly_seasonality	Fit weekly seasonality
daily_seasonality	Fit daily seasonality
holidays	Feed dataframe containing holiday name and date
seasonality_prior_scale	Parameter for changing strength of seasonality model
holiday_prior_scale	Parameter for changing strength of holiday model

	0	1
ds	2016-01-01 00:00:00	2016-01-02 00:00:00
trend	18809.1	19943.7
yhat_lower	-493065	-510895
yhat_upper	637866	577393
trend_lower	18809.1	19943.7
trend_upper	18809.1	19943.7
additive_terms	60221.7	-3992.76
additive_terms_lower	60221.7	-3992.76
additive_terms_upper	60221.7	-3992.76
weekly	60221.7	-3992.76
weekly_lower	60221.7	-3992.76
weekly_upper	60221.7	-3992.76
multiplicative_terms	0	0
multiplicative_terms_lower	0	0
multiplicative_terms_upper	0	0
yhat	79030.8	15950.9

	ds	yhat	yhat_lower	yhat_upper
437	2017-03-13	-125558.009363	-658134.726322	447424.699972
438	2017-03-14	-133305.169471	-709927.768389	394742.582324

	ds	trend	cap	yhat_lower	yhat_upper	trend_lower	trend_upper	additive_terms	additive_terms_lower	additive_terms_upper	weekly	weekly_lower	weekly_upper	multiplicative_terms	multiplicative_terms_lower	multiplicative_terms_upper	yhat
0	2016-01-01	18809.132427	15000	-473906.873249	662825.203872	18809.132427	18809.132427	60221.698194	60221.698194	60221.698194	60221.698194	60221.698194	60221.698194	0.0	0.0	0.0	79030.830621
1	2016-01-02	19943.708331	15000	-548929.320413	546276.650624	19943.708331	19943.708331	-3992.761624	-3992.761624	-3992.761624	-3992.761624	-3992.761624	-3992.761624	0.0	0.0	0.0	15950.946707

Table of Contents

Data Description¶

Prophet Description¶

Evaluation Metric¶

Imports¶

Useful Scripts¶

Load the data¶

Data Preprocessing¶

Modelling: prophet¶

Create dataframe with two columns: ds and y¶

model1: default parameters¶

Model2: saturation cap and floor¶

Model3: Seasonality¶

Plotly Visualizations for prophet¶

	ds	y
0	2016-01-01	20947.0
1	2016-01-02	19466.0
2	2016-01-03	8587.0
3	2016-01-04	7386.0
4	2016-01-05	7719.0

	Model	Description	MAPE	SMAPE	RMSE	ME	MAE	MPE	CORR	MINMAX	ACF1
0	fbprophet	default	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491
1	fbprophet	before_cap_floor	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491

	Model	Description	MAPE	SMAPE	RMSE	ME	MAE	MPE	CORR	MINMAX	ACF1
0	fbprophet	after_cap_floor	82	147.0780	12,655	7,658	10,089	1	-0.0811	0.7741	0.4794
1	fbprophet	default	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491
2	fbprophet	before_cap_floor	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491

	Model	Description	MAPE	SMAPE	RMSE	ME	MAE	MPE	CORR	MINMAX	ACF1
0	fbprophet	seasonality_before_cap_floor	423	139.6339	54,487	-3,904	44,547	-0	0.1662	2.3876	0.5637
1	fbprophet	after_cap_floor	82	147.0780	12,655	7,658	10,089	1	-0.0811	0.7741	0.4794
2	fbprophet	default	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491
3	fbprophet	before_cap_floor	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491

	Model	Description	MAPE	SMAPE	RMSE	ME	MAE	MPE	CORR	MINMAX	ACF1
0	fbprophet	seasonality_after_cap_floor	65	100.4473	9,009	3,603	7,426	0	0.2990	0.5764	0.4837
1	fbprophet	seasonality_before_cap_floor	423	139.6339	54,487	-3,904	44,547	-0	0.1662	2.3876	0.5637
2	fbprophet	after_cap_floor	82	147.0780	12,655	7,658	10,089	1	-0.0811	0.7741	0.4794
3	fbprophet	default	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491
4	fbprophet	before_cap_floor	437	171.8289	54,429	25,011	48,699	2	-0.2529	3.3718	0.4491

	ds
0	2016-01-01
1	2016-01-02
2	2016-01-03
3	2016-01-04
4	2016-01-05
391	2017-01-26
392	2017-01-27
393	2017-01-28
394	2017-01-29
395	2017-01-30