Run this notebook: Open in Colab Open in Kaggle

Prophet: Scalable Forecasting for Business Time Series¶

Prophet handles trends, multiple seasonalities, and holidays automatically — without requiring stationarity or ARIMA order tuning. This notebook covers the full Prophet workflow: fitting, diagnostics, holidays, regressors, and cross-validation.

# !pip install prophet
try:
    from prophet import Prophet
    from prophet.diagnostics import cross_validation, performance_metrics
    from prophet.plot import plot_cross_validation_metric
    HAS_PROPHET = True
    print('Prophet available')
except ImportError:
    HAS_PROPHET = False
    print('Prophet not installed — showing patterns with simulated output')
    print('Install: pip install prophet')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

# Synthetic e-commerce daily orders (2 years)
dates = pd.date_range('2022-01-01', periods=730, freq='D')

trend = np.linspace(200, 350, 730)
weekly = 30 * np.sin(2 * np.pi * np.arange(730) / 7 + np.pi)  # Weekend dip
yearly = 50 * np.sin(2 * np.pi * np.arange(730) / 365 - np.pi/2)  # Summer peak

# Holiday spikes
orders = trend + weekly + yearly + np.random.normal(0, 15, 730)
black_friday_idx = [318, 683]  # Approximate Black Friday positions
orders[black_friday_idx] += 200
orders[np.array(black_friday_idx) + 1] += 150  # Cyber Monday

# Prophet expects columns: ds (datetime), y (target)
df = pd.DataFrame({'ds': dates, 'y': orders.clip(50)})

print(f'Dataset: {len(df)} daily observations')
print(f'Date range: {df.ds.min().date()} to {df.ds.max().date()}')
print(f'Orders range: {df.y.min():.0f} to {df.y.max():.0f}')

1. Basic Prophet Fit & Forecast¶

train_df = df[:-60]  # Hold out last 60 days
test_df  = df[-60:]

if HAS_PROPHET:
    # Minimal Prophet fit
    m = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        changepoint_prior_scale=0.05,  # Controls trend flexibility (0.001=rigid, 0.5=flexible)
        seasonality_prior_scale=10,    # Controls seasonality strength
    )
    m.fit(train_df)
    
    # Create future dataframe
    future = m.make_future_dataframe(periods=60)  # 60 days ahead
    forecast = m.predict(future)
    
    # Plot
    fig = m.plot(forecast)
    plt.title('Prophet Forecast')
    plt.show()
    
    # Component plots
    fig2 = m.plot_components(forecast)
    plt.tight_layout()
    plt.show()
    
    # Evaluate on holdout
    forecast_test = forecast.tail(60).set_index('ds')
    actual = test_df.set_index('ds')['y']
    mape = mean_absolute_percentage_error(actual, forecast_test['yhat'])
    print(f'MAPE on 60-day holdout: {mape:.1%}')
else:
    print('Prophet forecast output (simulated):')
    print('Trend: upward linear trend with 2 changepoints detected')
    print('Weekly: Saturday -15%, Sunday -25% vs weekday baseline')
    print('Yearly: December +40%, January -10% vs annual baseline')
    print('MAPE: ~8.3% on 60-day holdout')

2. Adding Holidays¶

if HAS_PROPHET:
    # Method 1: Built-in country holidays
    m_holidays = Prophet(yearly_seasonality=True, weekly_seasonality=True)
    m_holidays.add_country_holidays(country_name='US')
    m_holidays.fit(train_df)
    
    # Method 2: Custom holidays with window
    custom_holidays = pd.DataFrame({
        'holiday': ['black_friday', 'black_friday', 'cyber_monday', 'cyber_monday'],
        'ds': pd.to_datetime(['2022-11-25', '2023-11-24', '2022-11-28', '2023-11-27']),
        'lower_window': [-1, -1, 0, 0],  # Effect starts 1 day before
        'upper_window': [1, 1, 1, 1],    # Effect lasts 1 day after
    })
    
    m_custom = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        holidays=custom_holidays,
        holidays_prior_scale=20,  # Let holiday effects be large
    )
    m_custom.fit(train_df)
    forecast_custom = m_custom.predict(m_custom.make_future_dataframe(periods=60))
    
    # Show holiday effects
    holiday_effects = forecast_custom[forecast_custom['ds'].isin(custom_holidays['ds'])]
    print('Holiday effects (additive):')
    print(holiday_effects[['ds', 'holidays', 'yhat']].to_string(index=False))
else:
    print('Custom holidays pattern:')
    print("""
    custom_holidays = pd.DataFrame({
        'holiday': ['black_friday', 'cyber_monday'],
        'ds': pd.to_datetime(['2023-11-24', '2023-11-27']),
        'lower_window': [-1, 0],
        'upper_window': [1, 1],
    })
    m = Prophet(holidays=custom_holidays, holidays_prior_scale=20)
    """)
    print('Black Friday holiday effect: +185 orders (simulated)')

3. Adding Regressors (External Features)¶

# Add external regressors: marketing spend, promotions
df['promo_active'] = 0
df.loc[df['ds'].dt.month.isin([11, 12]), 'promo_active'] = 1  # Q4 promotions
df['marketing_spend'] = np.random.uniform(1000, 5000, len(df))
df.loc[df['ds'].dt.month.isin([11, 12]), 'marketing_spend'] *= 2

train_reg = df[:-60]
test_reg   = df[-60:]

if HAS_PROPHET:
    m_reg = Prophet(yearly_seasonality=True, weekly_seasonality=True)
    m_reg.add_regressor('promo_active', standardize=False)
    m_reg.add_regressor('marketing_spend', standardize=True)  # Standardize continuous vars
    m_reg.fit(train_reg[['ds', 'y', 'promo_active', 'marketing_spend']])
    
    future_reg = m_reg.make_future_dataframe(periods=60)
    future_reg = future_reg.merge(df[['ds', 'promo_active', 'marketing_spend']], on='ds', how='left')
    future_reg[['promo_active', 'marketing_spend']] = future_reg[['promo_active', 'marketing_spend']].fillna(0)
    
    forecast_reg = m_reg.predict(future_reg)
    
    # Show regressor coefficients
    print('Regressor coefficients:')
    print(m_reg.params)
except Exception as e:
    if HAS_PROPHET:
        print(f'Error: {e}')
    else:
        print('Regressor pattern:')
        print('m.add_regressor("promo_active") → treats as additive effect')
        print('Effect of promo_active: +35 orders/day (estimated)')

4. Cross-Validation & Hyperparameter Tuning¶

if HAS_PROPHET:
    # Prophet's built-in cross-validation
    # initial: training window size
    # period:  gap between cutoffs
    # horizon: forecast horizon to evaluate
    df_cv = cross_validation(
        m,
        initial='365 days',
        period='30 days',
        horizon='60 days',
        parallel='processes',
    )
    
    df_perf = performance_metrics(df_cv)
    print('Cross-validation performance metrics:')
    print(df_perf[['horizon', 'mae', 'mape', 'coverage']].head(10))
    
    # Plot MAPE by horizon
    fig = plot_cross_validation_metric(df_cv, metric='mape')
    plt.title('MAPE by Forecast Horizon')
    plt.show()
    
    # Hyperparameter tuning
    from itertools import product
    
    param_grid = {
        'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    }
    
    best_mape = float('inf')
    best_params = {}
    
    for cps, sps in product(param_grid['changepoint_prior_scale'], 
                             param_grid['seasonality_prior_scale']):
        m_cv = Prophet(changepoint_prior_scale=cps, seasonality_prior_scale=sps,
                       yearly_seasonality=True, weekly_seasonality=True)
        m_cv.fit(train_df)
        cv = cross_validation(m_cv, initial='200 days', period='30 days', horizon='30 days')
        perf = performance_metrics(cv)['mape'].mean()
        if perf < best_mape:
            best_mape = perf
            best_params = {'changepoint_prior_scale': cps, 'seasonality_prior_scale': sps}
    
    print(f'\nBest params: {best_params}')
    print(f'Best CV MAPE: {best_mape:.1%}')
else:
    print('Prophet cross-validation pattern:')
    print("""
    df_cv = cross_validation(m, initial='365 days', period='30 days', horizon='60 days')
    df_perf = performance_metrics(df_cv)
    # Outputs: horizon, mae, mape, mdape, rmse, coverage
    """)
    print('Typical CV MAPE for daily orders: 6-12% at 30-day horizon')

Prophet Cheat Sheet¶

Parameter                 Effect                 Default
────────────────────────────────────────────────────────────
changepoint_prior_scale   Trend flexibility       0.05
  → Higher: more breakpoints, risk of overfit
  → Lower:  smoother trend, risk of underfit
seasonality_prior_scale   Seasonality strength    10.0
holidays_prior_scale      Holiday spike size      10.0
seasonality_mode          'additive' or           'additive'
                          'multiplicative'

When to use Prophet vs ARIMA:
  Prophet pros: handles multiple seasonalities, holidays,
                missing data, non-uniform intervals
  ARIMA pros:   better for short series, pure statistical
                interpretability, standard diagnostics
  Both:         production forecasting at < 1 year horizon

Prophet gotchas:
  - Regressors must be available in the future (can't use same-day sales)
  - Saturation: use logistic growth if series has a cap
  - Very short series (< 2 full cycles): Prophet struggles

Exercises¶

Fit Prophet on the passengers dataset with seasonality_mode='multiplicative' — why is this more appropriate?
Add a promotions regressor and plot its isolated effect using plot_components.
Use Prophet’s logistic growth mode for a series with a known capacity (e.g., app users with 100K cap).
Compare Prophet vs SARIMA on the same 60-day holdout — which wins on MAPE?
Build a multi-item forecasting loop: fit one Prophet model per product SKU, collect all forecasts.