Run this notebook: Open in Colab Open in Kaggle

ARIMA & SARIMA: Statistical Forecasting for Practitioners¶

ARIMA is the workhorse of statistical time series forecasting. This notebook covers model identification, fitting, diagnostics, and seasonal extensions — with a realistic walkthrough from raw data to deployment-ready forecast.

# !pip install statsmodels pmdarima
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

# Monthly airline passenger data (classic time series benchmark)
try:
    import statsmodels.api as sm
    data = sm.datasets.get_rdataset('AirPassengers')
    passengers = data.data.set_index(pd.period_range('1949-01', periods=144, freq='M'))['value']
except:
    # Synthetic fallback
    dates = pd.period_range('1949-01', periods=144, freq='M')
    trend = np.linspace(100, 500, 144)
    seasonal = 50 * np.sin(2 * np.pi * np.arange(144) / 12) * (1 + np.arange(144) / 200)
    noise = np.random.normal(0, 15, 144)
    passengers = pd.Series((trend + seasonal + noise).clip(50), index=dates, name='value')

print(f'Dataset: {len(passengers)} monthly observations ({passengers.index[0]} to {passengers.index[-1]})')
print(f'Range: {passengers.min():.0f} to {passengers.max():.0f}')

# Train/test split: hold out last 12 months
train = passengers[:-12]
test  = passengers[-12:]
print(f'Train: {len(train)} months | Test: {len(test)} months (forecast horizon)')

1. Model Identification — The Box-Jenkins Approach¶

# AirPassengers has trend + seasonal + heteroscedasticity
# Standard approach: log transform to stabilize variance, then difference

log_passengers = np.log(train)
log_diff1      = log_passengers.diff().dropna()
log_diff_both  = log_diff1.diff(12).dropna()

fig, axes = plt.subplots(3, 2, figsize=(14, 10))

for row, (series, title) in enumerate([
    (log_passengers, 'Log(Passengers) — trend present'),
    (log_diff1,      'Log 1st Difference — trend removed'),
    (log_diff_both,  'Log + Seasonal Diff — stationary'),
]):
    series.plot(ax=axes[row, 0], color='steelblue')
    axes[row, 0].set_title(title)
    
    if row == 2:  # Only show ACF/PACF for stationary version
        plot_acf(series, lags=24, ax=axes[row, 1], alpha=0.05)
        axes[row, 1].set_title('ACF — identify MA order and seasonal pattern')
    else:
        plot_acf(series, lags=24, ax=axes[row, 1], alpha=0.05)
        axes[row, 1].set_title(f'ACF ({title[:30]})')

plt.tight_layout()
plt.show()

print('SARIMA order identification:')
print('  d=1 (one regular difference)')
print('  D=1 (one seasonal difference, S=12)')
print('  Seasonal spikes in ACF at lags 12, 24 → SMA order')
print('  → Try SARIMA(1,1,1)(0,1,1)[12] as starting point')

2. Fitting SARIMA¶

# SARIMA(p,d,q)(P,D,Q)[S]
# Non-seasonal: (p=1, d=1, q=1)
# Seasonal:     (P=0, D=1, Q=1)[12]

model = SARIMAX(
    np.log(train),
    order=(1, 1, 1),
    seasonal_order=(0, 1, 1, 12),
    enforce_stationarity=False,
    enforce_invertibility=False,
)
result = model.fit(disp=False)
print(result.summary())
print()
print(f'AIC: {result.aic:.2f}')

3. Residual Diagnostics¶

# Good residuals: white noise (zero mean, constant variance, no autocorrelation)
residuals = result.resid

fig, axes = plt.subplots(2, 2, figsize=(13, 8))

# Residual plot
residuals.plot(ax=axes[0, 0], color='steelblue')
axes[0, 0].axhline(0, color='red', linestyle='--')
axes[0, 0].set_title('Residuals — should look like white noise')

# Histogram
axes[0, 1].hist(residuals, bins=20, edgecolor='black', alpha=0.7, color='steelblue')
axes[0, 1].set_title('Residual Distribution — should be normal')

# ACF of residuals
plot_acf(residuals.dropna(), lags=24, ax=axes[1, 0], alpha=0.05)
axes[1, 0].set_title('ACF of Residuals — should be all within bounds')

# Q-Q plot
from scipy import stats
stats.probplot(residuals.dropna(), dist='norm', plot=axes[1, 1])
axes[1, 1].set_title('Q-Q Plot — points on line = normally distributed')

plt.tight_layout()
plt.show()

# Ljung-Box test for residual autocorrelation
lb_result = acorr_ljungbox(residuals.dropna(), lags=[10, 20], return_df=True)
print('Ljung-Box test (H0: no autocorrelation in residuals):')
print(lb_result)
print('All p-values > 0.05 → residuals are white noise ✅' if (lb_result['lb_pvalue'] > 0.05).all() else 'Some autocorrelation remains ⚠️')

4. Forecasting & Confidence Intervals¶

# Forecast 12 months ahead
n_forecast = 12
forecast = result.get_forecast(steps=n_forecast)
forecast_mean = np.exp(forecast.predicted_mean)
forecast_ci   = np.exp(forecast.conf_int())

# Evaluation metrics
mae  = mean_absolute_error(test.values, forecast_mean.values)
rmse = np.sqrt(mean_squared_error(test.values, forecast_mean.values))
mape = np.mean(np.abs((test.values - forecast_mean.values) / test.values)) * 100

print(f'Forecast Accuracy (12-month holdout):')
print(f'  MAE:  {mae:.1f} passengers')
print(f'  RMSE: {rmse:.1f} passengers')
print(f'  MAPE: {mape:.1f}%')

# Plot
fig, ax = plt.subplots(figsize=(13, 6))

train.plot(ax=ax, label='Training data', color='steelblue', linewidth=1.5)
test.plot(ax=ax, label='Actual (test)', color='black', linewidth=2, linestyle='--')
forecast_mean.plot(ax=ax, label='SARIMA forecast', color='red', linewidth=2)

ax.fill_between(forecast_ci.index,
                forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1],
                alpha=0.3, color='red', label='95% CI')

ax.set_title(f'SARIMA(1,1,1)(0,1,1)[12] Forecast — MAPE: {mape:.1f}%')
ax.set_xlabel('Date')
ax.set_ylabel('Passengers')
ax.legend()
plt.tight_layout()
plt.show()

# Auto-ARIMA: automatic order selection
try:
    import pmdarima as pm
    auto_model = pm.auto_arima(
        np.log(train),
        start_p=0, start_q=0,
        max_p=3, max_q=3,
        m=12,              # Seasonal period
        seasonal=True,
        d=None,            # Let it determine d
        D=None,            # Let it determine D
        stepwise=True,     # Faster than exhaustive search
        information_criterion='aic',
        trace=False,
    )
    print(f'\nauto_arima best order: SARIMA{auto_model.order}{auto_model.seasonal_order}')
    print(f'AIC: {auto_model.aic():.2f}')
except ImportError:
    print('\npm.auto_arima: install pmdarima for automatic order selection')
    print('Usage: pm.auto_arima(series, seasonal=True, m=12, stepwise=True)')

ARIMA/SARIMA Cheat Sheet¶

Model           Parameters     When to Use
──────────────────────────────────────────────────────────
AR(p)           p              Pure autoregression
MA(q)           q              Moving average only
ARMA(p,q)       p, q           Stationary series
ARIMA(p,d,q)    p, d, q        Non-stationary, no seasonality
SARIMA(p,d,q)   + (P,D,Q)[S]  Seasonal series
(P,D,Q,S)

Order Selection:
  d: number of regular differences for stationarity (ADF test)
  D: number of seasonal differences (usually 0 or 1)
  p: PACF significant lags (after differencing)
  q: ACF significant lags (after differencing)
  P, Q: Same at seasonal lags (multiples of S)

Goodness of Fit:
  AIC/BIC: Lower is better (compare competing orders)
  Ljung-Box p > 0.05: Residuals are white noise (good)
  MAPE < 5%: Excellent | 5-10%: Good | 10-20%: Fair

Exercises¶

Use pm.auto_arima to find the best SARIMA order for monthly co2 data from statsmodels.
Implement walk-forward validation: retrain SARIMA monthly on growing history, forecast 1 step ahead.
Compare ARIMA(1,1,1) vs ARIMA(2,1,2) on the AirPassengers log-transformed data using AIC.
Add an external regressor (exogenous variable) to SARIMAX for a holiday indicator.
Plot forecast fan charts showing 50%, 80%, and 95% prediction intervals simultaneously.