# Default values
filename='https://www.salesanalytics.co.jp/591h'
forecast = 12
seasonal = 12
# import pakages
import numpy as np
import pandas as pd
import pmdarima as pm
from pmdarima import utils
from pmdarima import arima
from pmdarima import model_selection
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')
# get data
df=pd.read_csv(filename,
index_col='Month',
parse_dates=True)
# view records
df
# graph display
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df.iloc[:,0], mode="lines", name='observed data'))
fig.show()
# Train Test Split
df_train, df_test = model_selection.train_test_split(df,
test_size=forecast)
# graph display
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_train.index,
y=df_train.iloc[:,0],
mode="lines",
name='observed data (train data)'))
fig.add_trace(go.Scatter(x=df_test.index,
y=df_test.iloc[:,0],
mode="lines",
name='observed data (test data)'))
fig.show()
# Train the Model
arima_model = pm.auto_arima(df_train,
seasonal=True,
m=seasonal,
n_jobs=-1,
maxiter=10)
arima_model
# Evaluation
train_pred = arima_model.predict_in_sample()
test_pred = arima_model.predict(n_periods=forecast)
print('RMSE:')
print(np.sqrt(mean_squared_error(df_test, test_pred)))
print('MAE:')
print(mean_absolute_error(df_test, test_pred))
print('MAPE:')
print(mean_absolute_percentage_error(df_test, test_pred))
# Train the Model
arima_model = pm.auto_arima(df,
seasonal=True,
m=seasonal,
n_jobs=-1,
maxiter=10)
arima_model
# Forecasting
train_pred = arima_model.predict_in_sample()
test_pred, conf_int = arima_model.predict(n_periods=forecast,
return_conf_int=True)
# create datatable
df_pred = pd.DataFrame(index=range(0,forecast))
df_pred['forecast value'] = pd.DataFrame(test_pred)
df_pred[['conf_inf','conf_sup']] =conf_int
if df.tail(1).index.month == 12:
yyyy = df.tail(1).index.year + 1
mm = 1
else:
yyyy = df.tail(1).index.year
mm = df.tail(1).index.month + 1
ts_yyyymm = pd.Timestamp(yyyy[0],mm,1)
df_pred.index = pd.date_range(ts_yyyymm,
periods=forecast,
freq='MS')
# view records
df_pred
# graph display
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index,
y=df.iloc[:,0],
mode="lines",
name='observed'))
fig.add_trace(go.Scatter(x=df_pred.index,
y=df_pred.iloc[:,0],
mode="lines",
name='forecast'))
fig.show()