Reference: https://www.kaggle.com/c/web-traffic-time-series-forecasting/data
Original data: train_1.csv
-----------------------------
rows = 145,063
columns = 551
first column = Page
date columns = 2015-07-01, 2015-07-02, ..., 2016-12-31 (550 columns)
file size: 284.6 MB
Data for modelling:
--------------------------------------------------------------------
Timeseries : Now You See Me es (Spain, random_state=42)
For ARIMA : we have only one timeseries (one column)
For sklearn : For linear regressor, ensember learners we can have many columns
For fbprophet: we have only dataframe with columns ds and y (additional cap and floor)
References:
We use a decomposable time series model with three main model components: trend, seasonality, and holidays. They are combined in the following equation:
$$ y(t)=g(t)+s(t)+h(t)+\epsilon_{t} $$Using time as a regressor, Prophet is trying to fit several linear and non linear functions of time as components.
Modeling seasonality as an additive component is the same approach taken by exponential smoothing in Holt-Winters technique .
We are, in effect, framing the forecasting problem as a curve-fitting exercise rather than looking explicitly at the time based dependence of each observation within a time series.
Trend parameters:
Parameter | Description |
---|---|
growth | linear’ or ‘logistic’ to specify a linear or logistic trend |
changepoints | List of dates at which to include potential changepoints (automatic if not specified) |
n_changepoints | If changepoints in not supplied, you may provide the number of changepoints to be automatically included |
changepoint_prior_scale | Parameter for changing flexibility of automatic changepoint selection |
Seasonality & Holiday Parameters:
Parameter | Description |
---|---|
yearly_seasonality | Fit yearly seasonality |
weekly_seasonality | Fit weekly seasonality |
daily_seasonality | Fit daily seasonality |
holidays | Feed dataframe containing holiday name and date |
seasonality_prior_scale | Parameter for changing strength of seasonality model |
holiday_prior_scale | Parameter for changing strength of holiday model |
# from fbprophet import Prophet
# help(Prophet)
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
import os
import time
# matplotlib
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set(context='notebook', style='whitegrid', rc={'figure.figsize': (12,8)})
plt.style.use('ggplot') # better than sns styles.
matplotlib.rcParams['figure.figsize'] = 12,8
# random state
SEED=100
np.random.seed(SEED)
#============= pandas settings
# Jupyter notebook settings for pandas
#pd.set_option('display.float_format', '{:,.2g}'.format) # numbers sep by comma
np.set_printoptions(precision=3)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100) # None for all the rows
pd.set_option('display.max_colwidth', 200)
#========== ipython
import IPython
from IPython.display import display, HTML, Image, Markdown
# sklearn
import sklearn
from sklearn.preprocessing import StandardScaler
# timeseries
from datetime import date
import holidays
# prophet
import fbprophet
from fbprophet import Prophet
pd.plotting.register_matplotlib_converters() # prophet needs this
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.plot import plot_plotly
from fbprophet.diagnostics import cross_validation
# plotly
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.tools import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
# versions
import watermark
%load_ext watermark
%watermark -a "Bhishan Poudel" -d -v -m
print()
%watermark -iv
The watermark extension is already loaded. To reload it, use: %reload_ext watermark Bhishan Poudel 2020-10-14 CPython 3.7.9 IPython 7.18.1 compiler : Clang 10.0.0 system : Darwin release : 19.6.0 machine : x86_64 processor : i386 CPU cores : 4 interpreter: 64bit numpy 1.19.1 seaborn 0.11.0 plotly 4.11.0 matplotlib 3.1.3 IPython 7.18.1 json 2.0.9 sklearn 0.22.1 holidays 0.10.3 fbprophet 0.7.1 pandas 1.1.1 watermark 2.0.2
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
%%writefile ../models/util_prophet.py
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('ggplot') # better than sns styles.
matplotlib.rcParams['figure.figsize'] = 12,8
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.tools import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
from numba import jit
import math
# https://www.kaggle.com/cpmpml/smape-weirdness
def get_smape(y_true, y_pred):
A = y_true.to_numpy().ravel()
F = y_pred.to_numpy().ravel()[:len(A)]
return ( 200.0/len(A) * np.sum( np.abs(F - A) /
(np.abs(A) + np.abs(F) + np.finfo(float).eps))
)
def get_smape_fast(y_true, y_pred):
"""Fast implementation of SMAPE.
Parameters
-------------
y_true: numpy array with no NaNs and non-negative
y_pred: numpy array with no NaNs and non-negative
Returns
-------
out : float
"""
out = 0
for i in range(y_true.shape[0]):
if (y_true[i] != None and np.isnan(y_true[i]) == False):
a = y_true[i]
b = y_pred[i]
c = a+b
if c == 0:
continue
out += math.fabs(a - b) / c
out *= (200.0 / y_true.shape[0])
return out
def safe_median(s):
return np.median([x for x in s if ~np.isnan(x)])
def get_mape(y_true, y_pred):
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def plot_actual_forecast_mpl(df, forecast_str_lst, forecast_lst):
"""Plot prophet forcast.
Parameters
-----------
df -- dataframe with columns ds,y (cap and floor are optional)
forecast_str_lst -- list of strings
forecast_lst -- list of forecasts
Example
--------
forecast_str_lst = ['forecast1', 'forecast2', 'forecast3','forecast4']
forecast_lst = [eval(i) for i in forecast_str_lst]
plot_actual_forecast_mpl(df, ['forecast1'], [forecast1])
"""
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(12,8))
plt.plot(df.y,'b',label='original')
colors10_hex = ['#b03060','#ff0000', '#ff00ff',
'#67ceab', '#63c56c', '#225e31',
'#29b6f6', '#6495ed','#00008b',
'#ffa500']
for i,f in enumerate(forecast_str_lst):
forecast = forecast_lst[i]
plt.plot(forecast.yhat,c=colors10_hex[i],label=f)
plt.legend()
def plot_actual_forecast_sns(df, forecast_str_lst, forecast_lst):
"""Plot prophet forcast.
Parameters
-----------
df -- dataframe with columns ds,y (cap and floor are optional)
forecast_str_lst -- list of strings
Example
--------
forecast_str_lst = ['forecast1', 'forecast2', 'forecast3','forecast4']
forecast_lst = [eval(i) for i in forecast_str_lst]
plot_actual_forecast_sns(df2, forecast_str_lst, forecast_lst)
"""
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(12,8))
df_plot = df2[['y']]
df_plot.index = pd.to_datetime(df2['ds'])
for i,f in enumerate(forecast_str_lst):
forecast = forecast_lst[i]
ts = forecast.yhat
ts.index = pd.to_datetime(forecast.ds)
df_tmp = pd.DataFrame({f: ts})
df_plot = pd.concat([df_plot,df_tmp], axis=1)
sns.lineplot(data=df_plot)
def plot_actual_forecast_plotly(df, forecast_str_lst, forecast_lst):
"""Plot prophet forcast.
Parameters
-----------
df -- dataframe with columns ds,y (cap and floor are optional)
forecast_str_lst -- list of strings
forecast_lst -- list of forecasts
Example
--------
forecast_str_lst = ['forecast1', 'forecast2', 'forecast3','forecast4']
forecast_lst = [eval(i) for i in forecast_str_lst]
plot_actual_forecast_plotly(df2, forecast_str_lst,forecast_lst)
"""
from plotly.offline import plot, iplot, init_notebook_mode
df_plot = df[['y']]
df_plot.index = pd.to_datetime(df['ds'])
for i,f in enumerate(forecast_str_lst):
forecast = forecast_lst[i]
ts = forecast.yhat
ts.index = pd.to_datetime(forecast.ds)
df_tmp = pd.DataFrame({f: ts})
df_plot = pd.concat([df_plot,df_tmp], axis=1)
iplot([{'x': df_plot.index,'y': df_plot[col],'name': col}
for col in df_plot.columns
])
def plot_deltas(m):
"""Plot model params delta as bar plots.
Notes:
-------
1. If the barplot is all incresing downward,
we may need to change these quantities:
- changepoint_range=0.8 (default is 0.8)
- changepoint_prior_scale=0.7 (default is 0.05)
"""
import matplotlib.pyplot as plt
plt.style.use('ggplot')
deltas = m.params['delta'].mean(axis=0)
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax.bar(range(len(deltas)), deltas)
ax.set_ylabel('Rate change (delta)')
ax.set_xlabel('Potential changepoint')
fig.tight_layout()
def plot_deltas_plotly(m):
"""Plot prophet forecast params delta values.
"""
import plotly.graph_objs as go
from plotly.offline import plot, iplot
# data to plot
x = list(range(len(m.params['delta'][0])))
y = m.params['delta'].ravel().tolist()
# trace
trace = go.Bar(x= x,y=y,name='Change Points')
data = [trace]
fig = go.Figure(data=data)
iplot(fig)
def outliers_to_na(ts, devs):
"""Replace the outliers by na.
Then we can again fill na by 0.
Here, in this wikipedia data nans are given 0.
"""
median= ts['y'].median()
std = np.std(ts['y'])
for x in range(len(ts)):
val = ts['y'][x]
if (val < median - devs * std or val > median + devs * std):
ts['y'][x] = None
return ts
def convert_ts_to_prophet_df(ts):
"""Convert timeseries to dataframe required by prophet.
Parameters
-----------
ts: timeseries with index as datetime and have values
"""
df = pd.DataFrame(columns=['ds','y'])
df['ds'] = pd.to_datetime(ts.index)
df.index = df['ds']
df['y'] = ts.to_numpy()
return df
def remove_negs_from_forecast(forecast):
"""Replace negative forecasts by 0.
Parameters
----------
forecast -- dataframes returned by prophet
Example
--------
m1 = Prophet()
m1.fit(df1);
future1 = m1.make_future_dataframe(periods=60)
forecast1 = m1.predict(future1)
"""
forecast = forecast.copy()
forecast['yhat'] = forecast['yhat'].clip_lower(0)
forecast['yhat_lower'] = forecast['yhat_lower'].clip_lower(0)
forecast['yhat_upper'] = forecast['yhat_upper'].clip_lower(0)
return forecast
Overwriting ../models/util_prophet.py
import sys
sys.path.append('../models')
from util_prophet import get_smape
from util_prophet import plot_actual_forecast_plotly, plot_deltas_plotly
%%time
df_raw = pd.read_csv('../data/train_1.csv.zip',compression='zip',encoding='latin-1')
print(df_raw.shape) # (145063, 551) we have 145k data and 551 columns
df_raw.head()
(145063, 551) CPU times: user 7.59 s, sys: 685 ms, total: 8.28 s Wall time: 8.76 s
Page | 2015-07-01 | 2015-07-02 | 2015-07-03 | 2015-07-04 | 2015-07-05 | 2015-07-06 | 2015-07-07 | 2015-07-08 | 2015-07-09 | 2015-07-10 | 2015-07-11 | 2015-07-12 | 2015-07-13 | 2015-07-14 | 2015-07-15 | 2015-07-16 | 2015-07-17 | 2015-07-18 | 2015-07-19 | 2015-07-20 | 2015-07-21 | 2015-07-22 | 2015-07-23 | 2015-07-24 | 2015-07-25 | 2015-07-26 | 2015-07-27 | 2015-07-28 | 2015-07-29 | 2015-07-30 | 2015-07-31 | 2015-08-01 | 2015-08-02 | 2015-08-03 | 2015-08-04 | 2015-08-05 | 2015-08-06 | 2015-08-07 | 2015-08-08 | 2015-08-09 | 2015-08-10 | 2015-08-11 | 2015-08-12 | 2015-08-13 | 2015-08-14 | 2015-08-15 | 2015-08-16 | 2015-08-17 | 2015-08-18 | ... | 2016-11-12 | 2016-11-13 | 2016-11-14 | 2016-11-15 | 2016-11-16 | 2016-11-17 | 2016-11-18 | 2016-11-19 | 2016-11-20 | 2016-11-21 | 2016-11-22 | 2016-11-23 | 2016-11-24 | 2016-11-25 | 2016-11-26 | 2016-11-27 | 2016-11-28 | 2016-11-29 | 2016-11-30 | 2016-12-01 | 2016-12-02 | 2016-12-03 | 2016-12-04 | 2016-12-05 | 2016-12-06 | 2016-12-07 | 2016-12-08 | 2016-12-09 | 2016-12-10 | 2016-12-11 | 2016-12-12 | 2016-12-13 | 2016-12-14 | 2016-12-15 | 2016-12-16 | 2016-12-17 | 2016-12-18 | 2016-12-19 | 2016-12-20 | 2016-12-21 | 2016-12-22 | 2016-12-23 | 2016-12-24 | 2016-12-25 | 2016-12-26 | 2016-12-27 | 2016-12-28 | 2016-12-29 | 2016-12-30 | 2016-12-31 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2NE1_zh.wikipedia.org_all-access_spider | 18.0 | 11.0 | 5.0 | 13.0 | 14.0 | 9.0 | 9.0 | 22.0 | 26.0 | 24.0 | 19.0 | 10.0 | 14.0 | 15.0 | 8.0 | 16.0 | 8.0 | 8.0 | 16.0 | 7.0 | 11.0 | 10.0 | 20.0 | 18.0 | 15.0 | 14.0 | 49.0 | 10.0 | 16.0 | 18.0 | 8.0 | 5.0 | 9.0 | 7.0 | 13.0 | 9.0 | 7.0 | 4.0 | 11.0 | 10.0 | 5.0 | 9.0 | 9.0 | 9.0 | 9.0 | 13.0 | 4.0 | 15.0 | 25.0 | ... | 13.0 | 8.0 | 15.0 | 14.0 | 12.0 | 6.0 | 11.0 | 10.0 | 42.0 | 21.0 | 24.0 | 14.0 | 11.0 | 204.0 | 14.0 | 45.0 | 33.0 | 28.0 | 18.0 | 14.0 | 47.0 | 15.0 | 14.0 | 18.0 | 20.0 | 14.0 | 16.0 | 14.0 | 20.0 | 60.0 | 22.0 | 15.0 | 17.0 | 19.0 | 18.0 | 21.0 | 21.0 | 47.0 | 65.0 | 17.0 | 32.0 | 63.0 | 15.0 | 26.0 | 14.0 | 20.0 | 22.0 | 19.0 | 18.0 | 20.0 |
1 | 2PM_zh.wikipedia.org_all-access_spider | 11.0 | 14.0 | 15.0 | 18.0 | 11.0 | 13.0 | 22.0 | 11.0 | 10.0 | 4.0 | 41.0 | 65.0 | 57.0 | 38.0 | 20.0 | 62.0 | 44.0 | 15.0 | 10.0 | 47.0 | 24.0 | 17.0 | 22.0 | 9.0 | 39.0 | 13.0 | 11.0 | 12.0 | 21.0 | 19.0 | 9.0 | 15.0 | 33.0 | 8.0 | 8.0 | 7.0 | 13.0 | 2.0 | 23.0 | 12.0 | 27.0 | 27.0 | 36.0 | 23.0 | 58.0 | 80.0 | 60.0 | 69.0 | 42.0 | ... | 12.0 | 11.0 | 14.0 | 28.0 | 23.0 | 20.0 | 9.0 | 12.0 | 11.0 | 14.0 | 14.0 | 15.0 | 15.0 | 11.0 | 20.0 | 13.0 | 19.0 | 621.0 | 57.0 | 17.0 | 23.0 | 19.0 | 21.0 | 47.0 | 28.0 | 22.0 | 22.0 | 65.0 | 27.0 | 17.0 | 17.0 | 13.0 | 9.0 | 18.0 | 22.0 | 17.0 | 15.0 | 22.0 | 23.0 | 19.0 | 17.0 | 42.0 | 28.0 | 15.0 | 9.0 | 30.0 | 52.0 | 45.0 | 26.0 | 20.0 |
2 | 3C_zh.wikipedia.org_all-access_spider | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 4.0 | 0.0 | 3.0 | 4.0 | 4.0 | 1.0 | 1.0 | 1.0 | 6.0 | 8.0 | 6.0 | 4.0 | 5.0 | 1.0 | 2.0 | 3.0 | 8.0 | 8.0 | 6.0 | 6.0 | 2.0 | 2.0 | 3.0 | 2.0 | 4.0 | 3.0 | 3.0 | 5.0 | 3.0 | 5.0 | 4.0 | 2.0 | 5.0 | 1.0 | 4.0 | 5.0 | 0.0 | 0.0 | 7.0 | 3.0 | 5.0 | 1.0 | 6.0 | 2.0 | ... | 6.0 | 4.0 | 2.0 | 4.0 | 6.0 | 5.0 | 4.0 | 4.0 | 3.0 | 3.0 | 9.0 | 3.0 | 5.0 | 4.0 | 0.0 | 1.0 | 4.0 | 5.0 | 8.0 | 8.0 | 1.0 | 1.0 | 2.0 | 5.0 | 3.0 | 3.0 | 3.0 | 7.0 | 3.0 | 9.0 | 8.0 | 3.0 | 210.0 | 5.0 | 4.0 | 6.0 | 2.0 | 2.0 | 4.0 | 3.0 | 3.0 | 1.0 | 1.0 | 7.0 | 4.0 | 4.0 | 6.0 | 3.0 | 4.0 | 17.0 |
3 | 4minute_zh.wikipedia.org_all-access_spider | 35.0 | 13.0 | 10.0 | 94.0 | 4.0 | 26.0 | 14.0 | 9.0 | 11.0 | 16.0 | 16.0 | 11.0 | 23.0 | 145.0 | 14.0 | 17.0 | 85.0 | 4.0 | 30.0 | 22.0 | 9.0 | 10.0 | 11.0 | 7.0 | 7.0 | 11.0 | 9.0 | 11.0 | 44.0 | 8.0 | 14.0 | 19.0 | 10.0 | 17.0 | 17.0 | 10.0 | 7.0 | 10.0 | 1.0 | 8.0 | 27.0 | 19.0 | 16.0 | 2.0 | 84.0 | 22.0 | 14.0 | 47.0 | 25.0 | ... | 38.0 | 13.0 | 14.0 | 17.0 | 26.0 | 14.0 | 10.0 | 9.0 | 23.0 | 15.0 | 7.0 | 10.0 | 7.0 | 10.0 | 14.0 | 17.0 | 11.0 | 9.0 | 11.0 | 5.0 | 10.0 | 8.0 | 17.0 | 13.0 | 23.0 | 40.0 | 16.0 | 17.0 | 41.0 | 17.0 | 8.0 | 9.0 | 18.0 | 12.0 | 12.0 | 18.0 | 13.0 | 18.0 | 23.0 | 10.0 | 32.0 | 10.0 | 26.0 | 27.0 | 16.0 | 11.0 | 17.0 | 19.0 | 10.0 | 11.0 |
4 | 52_Hz_I_Love_You_zh.wikipedia.org_all-access_spider | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 13.0 | 11.0 | 8.0 | 6.0 | 10.0 | 14.0 | 6.0 | 9.0 | 6.0 | 16.0 | 14.0 | 13.0 | 15.0 | 14.0 | 16.0 | 9.0 | 178.0 | 64.0 | 12.0 | 10.0 | 11.0 | 6.0 | 8.0 | 7.0 | 9.0 | 8.0 | 5.0 | 11.0 | 8.0 | 4.0 | 15.0 | 5.0 | 8.0 | 8.0 | 6.0 | 7.0 | 15.0 | 4.0 | 11.0 | 7.0 | 48.0 | 9.0 | 25.0 | 13.0 | 3.0 | 11.0 | 27.0 | 13.0 | 36.0 | 10.0 |
5 rows × 551 columns
df_sample = df_raw.sample(5, random_state=42) # I like 100 but use 42 here.
df_sample
Page | 2015-07-01 | 2015-07-02 | 2015-07-03 | 2015-07-04 | 2015-07-05 | 2015-07-06 | 2015-07-07 | 2015-07-08 | 2015-07-09 | 2015-07-10 | 2015-07-11 | 2015-07-12 | 2015-07-13 | 2015-07-14 | 2015-07-15 | 2015-07-16 | 2015-07-17 | 2015-07-18 | 2015-07-19 | 2015-07-20 | 2015-07-21 | 2015-07-22 | 2015-07-23 | 2015-07-24 | 2015-07-25 | 2015-07-26 | 2015-07-27 | 2015-07-28 | 2015-07-29 | 2015-07-30 | 2015-07-31 | 2015-08-01 | 2015-08-02 | 2015-08-03 | 2015-08-04 | 2015-08-05 | 2015-08-06 | 2015-08-07 | 2015-08-08 | 2015-08-09 | 2015-08-10 | 2015-08-11 | 2015-08-12 | 2015-08-13 | 2015-08-14 | 2015-08-15 | 2015-08-16 | 2015-08-17 | 2015-08-18 | ... | 2016-11-12 | 2016-11-13 | 2016-11-14 | 2016-11-15 | 2016-11-16 | 2016-11-17 | 2016-11-18 | 2016-11-19 | 2016-11-20 | 2016-11-21 | 2016-11-22 | 2016-11-23 | 2016-11-24 | 2016-11-25 | 2016-11-26 | 2016-11-27 | 2016-11-28 | 2016-11-29 | 2016-11-30 | 2016-12-01 | 2016-12-02 | 2016-12-03 | 2016-12-04 | 2016-12-05 | 2016-12-06 | 2016-12-07 | 2016-12-08 | 2016-12-09 | 2016-12-10 | 2016-12-11 | 2016-12-12 | 2016-12-13 | 2016-12-14 | 2016-12-15 | 2016-12-16 | 2016-12-17 | 2016-12-18 | 2016-12-19 | 2016-12-20 | 2016-12-21 | 2016-12-22 | 2016-12-23 | 2016-12-24 | 2016-12-25 | 2016-12-26 | 2016-12-27 | 2016-12-28 | 2016-12-29 | 2016-12-30 | 2016-12-31 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
83529 | Phabricator/Project_management_www.mediawiki.org_all-access_spider | 6.0 | 6.0 | 4.0 | 6.0 | 8.0 | 6.0 | 4.0 | 0.0 | 2.0 | 3.0 | 2.0 | 3.0 | 3.0 | 7.0 | 1.0 | 4.0 | 1.0 | 6.0 | 2.0 | 3.0 | 8.0 | 4.0 | 5.0 | 6.0 | 3.0 | 4.0 | 3.0 | 5.0 | 7.0 | 6.0 | 5.0 | 3.0 | 5.0 | 2.0 | 6.0 | 3.0 | 8.0 | 5.0 | 4.0 | 2.0 | 1.0 | 3.0 | 4.0 | 1.0 | 9.0 | 2.0 | 2.0 | 8.0 | 5.0 | ... | 4.0 | 5.0 | 5.0 | 10.0 | 6.0 | 5.0 | 5.0 | 7.0 | 4.0 | 6.0 | 7.0 | 19.0 | 10.0 | 3.0 | 7.0 | 13.0 | 11.0 | 10.0 | 7.0 | 10.0 | 18.0 | 6.0 | 6.0 | 5.0 | 8.0 | 7.0 | 4.0 | 10.0 | 14.0 | 6.0 | 4.0 | 6.0 | 3.0 | 7.0 | 12.0 | 5.0 | 9.0 | 7.0 | 9.0 | 23.0 | 6.0 | 6.0 | 11.0 | 4.0 | 6.0 | 5.0 | 7.0 | 6.0 | 6.0 | 9.0 |
70433 | Now_You_See_Me_es.wikipedia.org_desktop_all-agents | 242.0 | 271.0 | 309.0 | 227.0 | 321.0 | 311.0 | 242.0 | 236.0 | 243.0 | 266.0 | 381.0 | 347.0 | 336.0 | 344.0 | 267.0 | 286.0 | 277.0 | 325.0 | 238.0 | 303.0 | 268.0 | 371.0 | 285.0 | 266.0 | 298.0 | 312.0 | 303.0 | 246.0 | 355.0 | 272.0 | 289.0 | 314.0 | 400.0 | 355.0 | 312.0 | 277.0 | 375.0 | 287.0 | 349.0 | 469.0 | 306.0 | 309.0 | 306.0 | 265.0 | 263.0 | 223.0 | 253.0 | 338.0 | 310.0 | ... | 364.0 | 533.0 | 444.0 | 390.0 | 306.0 | 343.0 | 302.0 | 381.0 | 439.0 | 434.0 | 428.0 | 714.0 | 393.0 | 336.0 | 386.0 | 474.0 | 421.0 | 316.0 | 341.0 | 278.0 | 346.0 | 321.0 | 323.0 | 305.0 | 296.0 | 250.0 | 244.0 | 324.0 | 286.0 | 357.0 | 387.0 | 337.0 | 268.0 | 293.0 | 253.0 | 263.0 | 334.0 | 305.0 | 305.0 | 257.0 | 231.0 | 222.0 | 193.0 | 229.0 | 334.0 | 316.0 | 324.0 | 268.0 | 201.0 | 190.0 |
84729 | Zürich_Hackathon_2014_www.mediawiki.org_all-access_spider | 3.0 | 19.0 | 19.0 | 30.0 | 21.0 | 24.0 | 17.0 | 178.0 | 40.0 | 1.0 | 3.0 | 2.0 | 4.0 | 11.0 | 7.0 | 13.0 | 14.0 | 24.0 | 7.0 | 11.0 | 8.0 | 3.0 | 8.0 | 9.0 | 4.0 | 1.0 | 5.0 | 4.0 | 2.0 | 7.0 | 3.0 | 2.0 | 2.0 | 9.0 | 2.0 | 2.0 | 5.0 | 3.0 | 6.0 | 3.0 | 7.0 | 6.0 | 11.0 | 20.0 | 18.0 | 7.0 | 1.0 | 2.0 | 4.0 | ... | 5.0 | 2.0 | 5.0 | 3.0 | 2.0 | 4.0 | 4.0 | 3.0 | 5.0 | 5.0 | 4.0 | 6.0 | 8.0 | 7.0 | 11.0 | 8.0 | 5.0 | 5.0 | 5.0 | 5.0 | 9.0 | 7.0 | 5.0 | 3.0 | 8.0 | 10.0 | 3.0 | 7.0 | 5.0 | 1.0 | 7.0 | 8.0 | 6.0 | 7.0 | 5.0 | 2.0 | 10.0 | 6.0 | 5.0 | 10.0 | 6.0 | 7.0 | 4.0 | 8.0 | 2.0 | 4.0 | 9.0 | 4.0 | 11.0 | 12.0 |
7969 | Ãrythrée_fr.wikipedia.org_desktop_all-agents | 672.0 | 513.0 | 774.0 | 1164.0 | 546.0 | 755.0 | 555.0 | 494.0 | 4801.0 | 4514.0 | 1101.0 | 1113.0 | 747.0 | 923.0 | 601.0 | 554.0 | 381.0 | 322.0 | 323.0 | 657.0 | 417.0 | 612.0 | 991.0 | 728.0 | 556.0 | 777.0 | 663.0 | 602.0 | 676.0 | 855.0 | 462.0 | 315.0 | 1440.0 | 812.0 | 1178.0 | 1307.0 | 904.0 | 740.0 | 472.0 | 367.0 | 431.0 | 1053.0 | 790.0 | 1122.0 | 1312.0 | 598.0 | 599.0 | 756.0 | 890.0 | ... | 415.0 | 348.0 | 460.0 | 542.0 | 435.0 | 439.0 | 360.0 | 602.0 | 367.0 | 488.0 | 466.0 | 463.0 | 500.0 | 391.0 | 347.0 | 347.0 | 460.0 | 433.0 | 396.0 | 383.0 | 376.0 | 306.0 | 327.0 | 386.0 | 356.0 | 335.0 | 301.0 | 366.0 | 256.0 | 361.0 | 388.0 | 14152.0 | 418.0 | 390.0 | 353.0 | 313.0 | 367.0 | 310.0 | 374.0 | 282.0 | 308.0 | 294.0 | 358.0 | 204.0 | 323.0 | 438.0 | 345.0 | 299.0 | 306.0 | 211.0 |
92077 | Metallica_es.wikipedia.org_all-access_all-agents | 1534.0 | 1644.0 | 1704.0 | 1569.0 | 1534.0 | 1577.0 | 1608.0 | 1731.0 | 1919.0 | 1628.0 | 1640.0 | 1646.0 | 1635.0 | 1690.0 | 2132.0 | 1635.0 | 1680.0 | 1638.0 | 1658.0 | 1651.0 | 1687.0 | 1607.0 | 1758.0 | 1778.0 | 1625.0 | 1725.0 | 1951.0 | 1802.0 | 1746.0 | 1771.0 | 1813.0 | 2014.0 | 2161.0 | 2075.0 | 2146.0 | 1808.0 | 1774.0 | 1875.0 | 1931.0 | 1867.0 | 1672.0 | 1836.0 | 2004.0 | 1907.0 | 1930.0 | 1659.0 | 1843.0 | 1741.0 | 1837.0 | ... | 3394.0 | 3452.0 | 3406.0 | 3479.0 | 3711.0 | 8146.0 | 10561.0 | 8201.0 | 6535.0 | 5544.0 | 4764.0 | 5125.0 | 4927.0 | 4128.0 | 3830.0 | 3884.0 | 3467.0 | 3466.0 | 3165.0 | 3047.0 | 2929.0 | 2946.0 | 3123.0 | 3011.0 | 2856.0 | 2905.0 | 2792.0 | 2908.0 | 2596.0 | 2653.0 | 2399.0 | 2605.0 | 2731.0 | 2553.0 | 2759.0 | 2599.0 | 2395.0 | 2352.0 | 2333.0 | 2307.0 | 2367.0 | 2259.0 | 2229.0 | 2070.0 | 2774.0 | 2552.0 | 2524.0 | 2358.0 | 2291.0 | 2153.0 |
5 rows × 551 columns
ts = df_sample.iloc[1,1:].astype(np.float64)
ts.name = df_sample.iloc[1,0]
ts.index = pd.to_datetime(ts.index,format='%Y-%m-%d')
ts.head()
2015-07-01 242.0 2015-07-02 271.0 2015-07-03 309.0 2015-07-04 227.0 2015-07-05 321.0 Name: Now_You_See_Me_es.wikipedia.org_desktop_all-agents, dtype: float64
pd.plotting.register_matplotlib_converters() # prophet needs this
ts.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd50cb8b590>
iplot([{'x': ts.index,'y': ts.to_numpy()} ])
# prophet expects two columns: ds and y
df1 = pd.DataFrame({'ds': ts.index, 'y': ts.to_numpy()})
print(df1.dtypes)
df1.head()
ds datetime64[ns] y float64 dtype: object
ds | y | |
---|---|---|
0 | 2015-07-01 | 242.0 |
1 | 2015-07-02 | 271.0 |
2 | 2015-07-03 | 309.0 |
3 | 2015-07-04 | 227.0 |
4 | 2015-07-05 | 321.0 |
# df1.plot(kind='line',x='ds',y='y')
iplot([{'x': df1['ds'], 'y': df1['y']}])
m1 = Prophet()
m1.fit(df1);
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this. INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
# future
future1 = m1.make_future_dataframe(periods=60)
print(df1.shape)
print(future1.shape)
future1.head().append(future1.tail())
(550, 2) (610, 1)
ds | |
---|---|
0 | 2015-07-01 |
1 | 2015-07-02 |
2 | 2015-07-03 |
3 | 2015-07-04 |
4 | 2015-07-05 |
605 | 2017-02-25 |
606 | 2017-02-26 |
607 | 2017-02-27 |
608 | 2017-02-28 |
609 | 2017-03-01 |
# forecast
forecast1 = m1.predict(future1)
print(forecast1.shape)
forecast1.head(2)
(610, 16)
ds | trend | yhat_lower | yhat_upper | trend_lower | trend_upper | additive_terms | additive_terms_lower | additive_terms_upper | weekly | weekly_lower | weekly_upper | multiplicative_terms | multiplicative_terms_lower | multiplicative_terms_upper | yhat | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2015-07-01 | 307.751440 | -63.775098 | 607.354088 | 307.751440 | 307.751440 | -21.422462 | -21.422462 | -21.422462 | -21.422462 | -21.422462 | -21.422462 | 0.0 | 0.0 | 0.0 | 286.328978 |
1 | 2015-07-02 | 308.018106 | -78.864756 | 625.959924 | 308.018106 | 308.018106 | -40.765105 | -40.765105 | -40.765105 | -40.765105 | -40.765105 | -40.765105 | 0.0 | 0.0 | 0.0 | 267.253001 |
forecast1.columns
Index(['ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper', 'additive_terms', 'additive_terms_lower', 'additive_terms_upper', 'weekly', 'weekly_lower', 'weekly_upper', 'multiplicative_terms', 'multiplicative_terms_lower', 'multiplicative_terms_upper', 'yhat'], dtype='object')
forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
ds | yhat | yhat_lower | yhat_upper | |
---|---|---|---|---|
605 | 2017-02-25 | -274.213944 | -623.349663 | 87.750713 |
606 | 2017-02-26 | -179.663153 | -519.817398 | 170.186405 |
607 | 2017-02-27 | -185.673077 | -535.048488 | 155.627229 |
608 | 2017-02-28 | -296.683580 | -635.954126 | 60.707484 |
609 | 2017-03-01 | -303.608354 | -637.701588 | 48.712486 |
fig1 = m1.plot(forecast1)
fig2 = m1.plot_components(forecast1)
plot_actual_forecast_plotly(df1, ['forecast1'],[forecast1])
forecast1.tail()['yhat']
605 -274.213944 606 -179.663153 607 -185.673077 608 -296.683580 609 -303.608354 Name: yhat, dtype: float64
# observation: this prediction only captures the trend
# problem : the trend is going down and predicts negative values
#
# todo:
# use logistic growth and give cap and floor for dataframes df and forecast.
# set the cap to values of trend
df2 = df1.copy()
df2['cap'] = 500
df2['floor'] = 0.0
future2 = future1.copy()
future2['cap'] = 500
future2['floor'] = 0.0
# take growth = 'logistic'
m2 = Prophet(growth='logistic')
forecast2 = m2.fit(df2).predict(future2)
# Plotting both the forecast predictions and components
fig1 = m2.plot(forecast2)
fig2 = m2.plot_components(forecast2)
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this. INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
# Conclusion: in this case the prediction trend has max 500 and min 0.0
# as we have set the values.
# keep default growth='linear' instead of logistic
m3 = Prophet(growth='linear',
daily_seasonality=True,
weekly_seasonality=True,
yearly_seasonality=True)
forecast3 = m3.fit(df2).predict(future2)
fig1 = m3.plot(forecast3)
fig2 = m3.plot_components(forecast3)
plot_actual_forecast_plotly(df2, ['forecast3'],[forecast3])
forecast3.tail()['yhat']
605 -163.299063 606 -70.728889 607 -78.644218 608 -190.036328 609 -194.024313 Name: yhat, dtype: float64
# observations:
# 1. trend goes downwards in the end and gives negative values
# 2. gives better fit than last time
# 3. peaks are captured better.
from fbprophet.plot import add_changepoints_to_plot
fig = m3.plot(forecast3)
a = add_changepoints_to_plot(fig.gca(), m3, forecast3)
# observation:
# 1. default changepoints = 0.8 takes only 80% of data to find changepoints
# 2. we see some peaks after 80% of data, increase changepoints
m4 = Prophet(daily_seasonality=True,
weekly_seasonality=True,
yearly_seasonality=True,
changepoint_range=0.9)
forecast4 = m4.fit(df2).predict(future2)
fig = m4.plot(forecast4)
a = add_changepoints_to_plot(fig.gca(), m4, forecast4)
plot_deltas_plotly(m4)
# observation:
# 1. when changepoint range increase, predictions becomes more negative
# upto about 17, then becomes more positive but still negative.
# 2. I can fiddle with default changepoint 0.8.
#
# todo:
# default changepoint prior scale = 0.05, increase it to make more flexible trend.
m5 = Prophet(daily_seasonality=True,
weekly_seasonality=True,
yearly_seasonality=True,
changepoint_range=0.8,
changepoint_prior_scale=0.7)
forecast5 = m5.fit(df2).predict(future2)
fig = m5.plot(forecast5)
a = add_changepoints_to_plot(fig.gca(), m5, forecast5)
plot_deltas_plotly(m5)
forecast_str_lst = ['forecast1', 'forecast2', 'forecast3','forecast4','forecast5']
forecast_lst = [eval(i) for i in forecast_str_lst]
plot_actual_forecast_plotly(df2, forecast_str_lst,forecast_lst)
# observations:
# 1. forecast5 is almost similar to 4 and 3
# 2. forecast5 gives better peaks than 3
df_sample
Page | 2015-07-01 | 2015-07-02 | 2015-07-03 | 2015-07-04 | 2015-07-05 | 2015-07-06 | 2015-07-07 | 2015-07-08 | 2015-07-09 | 2015-07-10 | 2015-07-11 | 2015-07-12 | 2015-07-13 | 2015-07-14 | 2015-07-15 | 2015-07-16 | 2015-07-17 | 2015-07-18 | 2015-07-19 | 2015-07-20 | 2015-07-21 | 2015-07-22 | 2015-07-23 | 2015-07-24 | 2015-07-25 | 2015-07-26 | 2015-07-27 | 2015-07-28 | 2015-07-29 | 2015-07-30 | 2015-07-31 | 2015-08-01 | 2015-08-02 | 2015-08-03 | 2015-08-04 | 2015-08-05 | 2015-08-06 | 2015-08-07 | 2015-08-08 | 2015-08-09 | 2015-08-10 | 2015-08-11 | 2015-08-12 | 2015-08-13 | 2015-08-14 | 2015-08-15 | 2015-08-16 | 2015-08-17 | 2015-08-18 | ... | 2016-11-12 | 2016-11-13 | 2016-11-14 | 2016-11-15 | 2016-11-16 | 2016-11-17 | 2016-11-18 | 2016-11-19 | 2016-11-20 | 2016-11-21 | 2016-11-22 | 2016-11-23 | 2016-11-24 | 2016-11-25 | 2016-11-26 | 2016-11-27 | 2016-11-28 | 2016-11-29 | 2016-11-30 | 2016-12-01 | 2016-12-02 | 2016-12-03 | 2016-12-04 | 2016-12-05 | 2016-12-06 | 2016-12-07 | 2016-12-08 | 2016-12-09 | 2016-12-10 | 2016-12-11 | 2016-12-12 | 2016-12-13 | 2016-12-14 | 2016-12-15 | 2016-12-16 | 2016-12-17 | 2016-12-18 | 2016-12-19 | 2016-12-20 | 2016-12-21 | 2016-12-22 | 2016-12-23 | 2016-12-24 | 2016-12-25 | 2016-12-26 | 2016-12-27 | 2016-12-28 | 2016-12-29 | 2016-12-30 | 2016-12-31 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
83529 | Phabricator/Project_management_www.mediawiki.org_all-access_spider | 6.0 | 6.0 | 4.0 | 6.0 | 8.0 | 6.0 | 4.0 | 0.0 | 2.0 | 3.0 | 2.0 | 3.0 | 3.0 | 7.0 | 1.0 | 4.0 | 1.0 | 6.0 | 2.0 | 3.0 | 8.0 | 4.0 | 5.0 | 6.0 | 3.0 | 4.0 | 3.0 | 5.0 | 7.0 | 6.0 | 5.0 | 3.0 | 5.0 | 2.0 | 6.0 | 3.0 | 8.0 | 5.0 | 4.0 | 2.0 | 1.0 | 3.0 | 4.0 | 1.0 | 9.0 | 2.0 | 2.0 | 8.0 | 5.0 | ... | 4.0 | 5.0 | 5.0 | 10.0 | 6.0 | 5.0 | 5.0 | 7.0 | 4.0 | 6.0 | 7.0 | 19.0 | 10.0 | 3.0 | 7.0 | 13.0 | 11.0 | 10.0 | 7.0 | 10.0 | 18.0 | 6.0 | 6.0 | 5.0 | 8.0 | 7.0 | 4.0 | 10.0 | 14.0 | 6.0 | 4.0 | 6.0 | 3.0 | 7.0 | 12.0 | 5.0 | 9.0 | 7.0 | 9.0 | 23.0 | 6.0 | 6.0 | 11.0 | 4.0 | 6.0 | 5.0 | 7.0 | 6.0 | 6.0 | 9.0 |
70433 | Now_You_See_Me_es.wikipedia.org_desktop_all-agents | 242.0 | 271.0 | 309.0 | 227.0 | 321.0 | 311.0 | 242.0 | 236.0 | 243.0 | 266.0 | 381.0 | 347.0 | 336.0 | 344.0 | 267.0 | 286.0 | 277.0 | 325.0 | 238.0 | 303.0 | 268.0 | 371.0 | 285.0 | 266.0 | 298.0 | 312.0 | 303.0 | 246.0 | 355.0 | 272.0 | 289.0 | 314.0 | 400.0 | 355.0 | 312.0 | 277.0 | 375.0 | 287.0 | 349.0 | 469.0 | 306.0 | 309.0 | 306.0 | 265.0 | 263.0 | 223.0 | 253.0 | 338.0 | 310.0 | ... | 364.0 | 533.0 | 444.0 | 390.0 | 306.0 | 343.0 | 302.0 | 381.0 | 439.0 | 434.0 | 428.0 | 714.0 | 393.0 | 336.0 | 386.0 | 474.0 | 421.0 | 316.0 | 341.0 | 278.0 | 346.0 | 321.0 | 323.0 | 305.0 | 296.0 | 250.0 | 244.0 | 324.0 | 286.0 | 357.0 | 387.0 | 337.0 | 268.0 | 293.0 | 253.0 | 263.0 | 334.0 | 305.0 | 305.0 | 257.0 | 231.0 | 222.0 | 193.0 | 229.0 | 334.0 | 316.0 | 324.0 | 268.0 | 201.0 | 190.0 |
84729 | Zürich_Hackathon_2014_www.mediawiki.org_all-access_spider | 3.0 | 19.0 | 19.0 | 30.0 | 21.0 | 24.0 | 17.0 | 178.0 | 40.0 | 1.0 | 3.0 | 2.0 | 4.0 | 11.0 | 7.0 | 13.0 | 14.0 | 24.0 | 7.0 | 11.0 | 8.0 | 3.0 | 8.0 | 9.0 | 4.0 | 1.0 | 5.0 | 4.0 | 2.0 | 7.0 | 3.0 | 2.0 | 2.0 | 9.0 | 2.0 | 2.0 | 5.0 | 3.0 | 6.0 | 3.0 | 7.0 | 6.0 | 11.0 | 20.0 | 18.0 | 7.0 | 1.0 | 2.0 | 4.0 | ... | 5.0 | 2.0 | 5.0 | 3.0 | 2.0 | 4.0 | 4.0 | 3.0 | 5.0 | 5.0 | 4.0 | 6.0 | 8.0 | 7.0 | 11.0 | 8.0 | 5.0 | 5.0 | 5.0 | 5.0 | 9.0 | 7.0 | 5.0 | 3.0 | 8.0 | 10.0 | 3.0 | 7.0 | 5.0 | 1.0 | 7.0 | 8.0 | 6.0 | 7.0 | 5.0 | 2.0 | 10.0 | 6.0 | 5.0 | 10.0 | 6.0 | 7.0 | 4.0 | 8.0 | 2.0 | 4.0 | 9.0 | 4.0 | 11.0 | 12.0 |
7969 | Ãrythrée_fr.wikipedia.org_desktop_all-agents | 672.0 | 513.0 | 774.0 | 1164.0 | 546.0 | 755.0 | 555.0 | 494.0 | 4801.0 | 4514.0 | 1101.0 | 1113.0 | 747.0 | 923.0 | 601.0 | 554.0 | 381.0 | 322.0 | 323.0 | 657.0 | 417.0 | 612.0 | 991.0 | 728.0 | 556.0 | 777.0 | 663.0 | 602.0 | 676.0 | 855.0 | 462.0 | 315.0 | 1440.0 | 812.0 | 1178.0 | 1307.0 | 904.0 | 740.0 | 472.0 | 367.0 | 431.0 | 1053.0 | 790.0 | 1122.0 | 1312.0 | 598.0 | 599.0 | 756.0 | 890.0 | ... | 415.0 | 348.0 | 460.0 | 542.0 | 435.0 | 439.0 | 360.0 | 602.0 | 367.0 | 488.0 | 466.0 | 463.0 | 500.0 | 391.0 | 347.0 | 347.0 | 460.0 | 433.0 | 396.0 | 383.0 | 376.0 | 306.0 | 327.0 | 386.0 | 356.0 | 335.0 | 301.0 | 366.0 | 256.0 | 361.0 | 388.0 | 14152.0 | 418.0 | 390.0 | 353.0 | 313.0 | 367.0 | 310.0 | 374.0 | 282.0 | 308.0 | 294.0 | 358.0 | 204.0 | 323.0 | 438.0 | 345.0 | 299.0 | 306.0 | 211.0 |
92077 | Metallica_es.wikipedia.org_all-access_all-agents | 1534.0 | 1644.0 | 1704.0 | 1569.0 | 1534.0 | 1577.0 | 1608.0 | 1731.0 | 1919.0 | 1628.0 | 1640.0 | 1646.0 | 1635.0 | 1690.0 | 2132.0 | 1635.0 | 1680.0 | 1638.0 | 1658.0 | 1651.0 | 1687.0 | 1607.0 | 1758.0 | 1778.0 | 1625.0 | 1725.0 | 1951.0 | 1802.0 | 1746.0 | 1771.0 | 1813.0 | 2014.0 | 2161.0 | 2075.0 | 2146.0 | 1808.0 | 1774.0 | 1875.0 | 1931.0 | 1867.0 | 1672.0 | 1836.0 | 2004.0 | 1907.0 | 1930.0 | 1659.0 | 1843.0 | 1741.0 | 1837.0 | ... | 3394.0 | 3452.0 | 3406.0 | 3479.0 | 3711.0 | 8146.0 | 10561.0 | 8201.0 | 6535.0 | 5544.0 | 4764.0 | 5125.0 | 4927.0 | 4128.0 | 3830.0 | 3884.0 | 3467.0 | 3466.0 | 3165.0 | 3047.0 | 2929.0 | 2946.0 | 3123.0 | 3011.0 | 2856.0 | 2905.0 | 2792.0 | 2908.0 | 2596.0 | 2653.0 | 2399.0 | 2605.0 | 2731.0 | 2553.0 | 2759.0 | 2599.0 | 2395.0 | 2352.0 | 2333.0 | 2307.0 | 2367.0 | 2259.0 | 2229.0 | 2070.0 | 2774.0 | 2552.0 | 2524.0 | 2358.0 | 2291.0 | 2153.0 |
5 rows × 551 columns
# observation:
# 1. I have chosen the wikipedia page of Now you see me in Spain as our
# time series dataframe. I will look at holidays in Spain.
from datetime import date
import holidays
# Select country
es_holidays = holidays.Spain(years = [2015,2016,2017])
es_holidays = pd.DataFrame.from_dict(es_holidays, orient='index')
es_holidays = pd.DataFrame({'holiday': 'Spain', 'ds': es_holidays.index})
print(es_holidays.shape)
es_holidays.head()
(27, 2)
holiday | ds | |
---|---|---|
0 | Spain | 2016-01-01 |
1 | Spain | 2016-01-06 |
2 | Spain | 2016-05-01 |
3 | Spain | 2016-08-15 |
4 | Spain | 2016-10-12 |
m6 = Prophet(growth='linear',
daily_seasonality=True,
weekly_seasonality=True,
yearly_seasonality=True,
changepoint_range=0.8,
changepoint_prior_scale=0.7,
holidays=es_holidays)
# add holidays
m6.add_country_holidays(country_name='ES')
# forecast
forecast6 = m6.fit(df2).predict(future2)
# plot forecasts
fig1 = m6.plot(forecast6)
fig2 = m6.plot_components(forecast6)
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
fig = plot_plotly(m6, forecast6)
py.iplot(fig)
The formula for SMAPE (Symmetric Mean Absolute Percentage Error) is given below:
$$ S M A P E=\frac{100 \%}{n} \sum_{t=1}^{n} \frac{\left|F_{t}-A_{t}\right|}{\left(\left|A_{t}\right|+\left|F_{t}\right|\right) / 2} $$Where, F is forecast and A is the actual value of time series at given time t.
Python implementation:
def smape(A, F):
F = A[:len(A)]
return ( 200.0/len(A) * np.sum( np.abs(F - A) /
(np.abs(A) + np.abs(F) + np.finfo(float).eps))
)
Despite the name Symmetric, the smape is not actually symmetric. Take this example from wikipedia for an example:
The SMAPE is not symmetric since over- and under-forecasts are not treated equally. This is illustrated by the following example by applying the SMAPE formula:
Over-forecasting: At = 100 and Ft = 110 give SMAPE = 4.76%
Under-forecasting: At = 100 and Ft = 90 give SMAPE = 5.26%.
def smape_np(A, F):
F = F[:len(A)]
return ( 200.0/len(A) * np.sum( np.abs(F - A) /
(np.abs(A) + np.abs(F) + np.finfo(float).eps))
)
y_true = df2[['y']]
y_pred = forecast1[['yhat']].iloc[:len(y_true)]
print(y_true.shape, y_pred.shape)
df_ypreds = pd.concat([y_true, y_pred],axis=1,sort=False).dropna()
df_ypreds.head().append(df_ypreds.tail())
(550, 1) (550, 1)
y | yhat | |
---|---|---|
0 | 242.0 | 286.328978 |
1 | 271.0 | 267.253001 |
2 | 309.0 | 246.293649 |
3 | 227.0 | 289.190937 |
4 | 321.0 | 390.841506 |
545 | 316.0 | 133.802482 |
546 | 324.0 | 126.877708 |
547 | 268.0 | 100.701953 |
548 | 201.0 | 72.642823 |
549 | 190.0 | 108.440333 |
smape_np(df2['y'].to_numpy(), forecast1['yhat'].to_numpy())
28.868059741019362
forecasts = [ 'forecast{}'.format(i) for i in range(1,7)]
smapes = [ get_smape(df2.y,eval('forecast{}.yhat'.format(i))) for i in range(1,7)]
df_eval = pd.DataFrame({'forecast': forecasts,
'smape': smapes,
})
df_eval
--------------------------------------------------------------------------- TypingError Traceback (most recent call last) <ipython-input-94-0d20d01dcb16> in <module> 1 forecasts = [ 'forecast{}'.format(i) for i in range(1,7)] 2 ----> 3 smapes = [ get_smape(df2.y,eval('forecast{}.yhat'.format(i))) for i in range(1,7)] 4 5 <ipython-input-94-0d20d01dcb16> in <listcomp>(.0) 1 forecasts = [ 'forecast{}'.format(i) for i in range(1,7)] 2 ----> 3 smapes = [ get_smape(df2.y,eval('forecast{}.yhat'.format(i))) for i in range(1,7)] 4 5 ~/opt/miniconda3/envs/gam/lib/python3.7/site-packages/numba/core/dispatcher.py in _compile_for_args(self, *args, **kws) 413 e.patch_message(msg) 414 --> 415 error_rewrite(e, 'typing') 416 except errors.UnsupportedError as e: 417 # Something unsupported is present in the user code, add help info ~/opt/miniconda3/envs/gam/lib/python3.7/site-packages/numba/core/dispatcher.py in error_rewrite(e, issue_type) 356 raise e 357 else: --> 358 reraise(type(e), e, None) 359 360 argtypes = [] ~/opt/miniconda3/envs/gam/lib/python3.7/site-packages/numba/core/utils.py in reraise(tp, value, tb) 78 value = tp() 79 if value.__traceback__ is not tb: ---> 80 raise value.with_traceback(tb) 81 raise value 82 TypingError: Failed in nopython mode pipeline (step: nopython frontend) non-precise type pyobject During: typing of argument at ../models/util_prophet.py (26) File "../models/util_prophet.py", line 26: def get_smape(y_true, y_pred): A = y_true.to_numpy().ravel() ^ This error may have been caused by the following argument(s): - argument 0: cannot determine Numba type of <class 'pandas.core.series.Series'> - argument 1: cannot determine Numba type of <class 'pandas.core.series.Series'> This error may have been caused by the following argument(s): - argument 0: cannot determine Numba type of <class 'pandas.core.series.Series'> - argument 1: cannot determine Numba type of <class 'pandas.core.series.Series'>
from fbprophet.diagnostics import cross_validation
cv_results = cross_validation(m6,
initial='366 days',
period='30 days',
horizon='60 days')
smape_cv = get_smape(cv_results.y, cv_results.yhat)
smape_cv