import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.special import gamma

sns.set_style('whitegrid')
np.random.seed(42)

1. MotivationΒΆ

Problem: Modeling Joint DistributionsΒΆ

Given marginals \(F_X(x)\) and \(F_Y(y)\), how to model \(F_{X,Y}(x,y)\)?

Naive: Assume independence β†’ \(F_{X,Y}(x,y) = F_X(x)F_Y(y)\)

Problem: Ignores dependence!

Copula SolutionΒΆ

Key idea: Separate marginal behavior from dependence structure.

\[F_{X,Y}(x,y) = C(F_X(x), F_Y(y))\]

where \(C: [0,1]^2 \to [0,1]\) is the copula.

2. Sklar’s TheoremΒΆ

StatementΒΆ

For any joint CDF \(F_{X,Y}\) with marginals \(F_X\), \(F_Y\):

\[F_{X,Y}(x,y) = C(F_X(x), F_Y(y))\]

for some copula \(C\).

If \(F_X\), \(F_Y\) continuous, \(C\) is unique.

ConverseΒΆ

Given copula \(C\) and marginals \(F_X\), \(F_Y\):

\[F_{X,Y}(x,y) = C(F_X(x), F_Y(y))\]

defines a valid joint distribution.

InterpretationΒΆ

Copula \(C\) captures pure dependence, independent of marginals!

# Gaussian copula example
def gaussian_copula(u, v, rho):
    """Gaussian copula CDF."""
    from scipy.stats import norm, multivariate_normal
    
    # Inverse CDF transform
    x = norm.ppf(u)
    y = norm.ppf(v)
    
    # Bivariate normal CDF
    cov = np.array([[1, rho], [rho, 1]])
    return multivariate_normal.cdf([x, y], mean=[0, 0], cov=cov)

# Visualize for different correlations
u_grid = np.linspace(0.01, 0.99, 50)
v_grid = np.linspace(0.01, 0.99, 50)
U, V = np.meshgrid(u_grid, v_grid)

fig, axes = plt.subplots(1, 3, figsize=(16, 4))
rhos = [-0.7, 0.0, 0.7]

for idx, rho in enumerate(rhos):
    C = np.zeros_like(U)
    for i in range(len(u_grid)):
        for j in range(len(v_grid)):
            C[j, i] = gaussian_copula(U[j, i], V[j, i], rho)
    
    cs = axes[idx].contourf(U, V, C, levels=20, cmap='viridis')
    axes[idx].set_xlabel('u', fontsize=11)
    axes[idx].set_ylabel('v', fontsize=11)
    axes[idx].set_title(f'Gaussian Copula (ρ={rho})', fontsize=12)
    plt.colorbar(cs, ax=axes[idx])

plt.tight_layout()
plt.show()

3. Common Copula FamiliesΒΆ

Independence CopulaΒΆ

\[C^\perp(u, v) = uv\]

Gaussian CopulaΒΆ

\[C^{Ga}(u, v; \rho) = \Phi_\rho(\Phi^{-1}(u), \Phi^{-1}(v))\]

where \(\Phi_\rho\) is bivariate normal CDF with correlation \(\rho\).

Student-t CopulaΒΆ

Heavier tails than Gaussian.

Archimedean CopulasΒΆ

\[C(u, v) = \phi^{-1}(\phi(u) + \phi(v))\]

Clayton: $\(C^{Cl}(u, v; \theta) = (u^{-\theta} + v^{-\theta} - 1)^{-1/\theta}\)$

Gumbel: $\(C^{Gu}(u, v; \theta) = \exp(-((-\ln u)^\theta + (-\ln v)^\theta)^{1/\theta})\)$

Frank: $\(C^{Fr}(u, v; \theta) = -\frac{1}{\theta}\ln\left(1 + \frac{(e^{-\theta u}-1)(e^{-\theta v}-1)}{e^{-\theta}-1}\right)\)$

def clayton_copula_sample(n, theta):
    """Sample from Clayton copula."""
    u = np.random.uniform(0, 1, n)
    t = np.random.uniform(0, 1, n)
    v = (u**(-theta) * (t**(-theta/(1+theta)) - 1) + 1)**(-1/theta)
    return u, v

def gumbel_copula_sample(n, theta):
    """Sample from Gumbel copula (approximate)."""
    from scipy.stats import levy_stable
    u = np.random.uniform(0, 1, n)
    v = np.random.uniform(0, 1, n)
    # Simplified sampling
    s = levy_stable.rvs(1/theta, 1, size=n)
    e1 = -np.log(u)
    e2 = -np.log(v)
    u_new = np.exp(-((e1**theta + e2**theta)**(1/theta)))
    return u, u_new

# Sample and visualize
n = 1000
fig, axes = plt.subplots(1, 3, figsize=(16, 4))

# Independence
u_indep = np.random.uniform(0, 1, n)
v_indep = np.random.uniform(0, 1, n)
axes[0].scatter(u_indep, v_indep, alpha=0.3, s=5)
axes[0].set_title('Independence', fontsize=12)
axes[0].set_xlabel('u', fontsize=11)
axes[0].set_ylabel('v', fontsize=11)

# Clayton
u_clay, v_clay = clayton_copula_sample(n, theta=2)
axes[1].scatter(u_clay, v_clay, alpha=0.3, s=5)
axes[1].set_title('Clayton (ΞΈ=2)', fontsize=12)
axes[1].set_xlabel('u', fontsize=11)
axes[1].set_ylabel('v', fontsize=11)

# Gumbel  
u_gum, v_gum = gumbel_copula_sample(n, theta=2)
axes[2].scatter(u_gum, v_gum, alpha=0.3, s=5)
axes[2].set_title('Gumbel (ΞΈ=2)', fontsize=12)
axes[2].set_xlabel('u', fontsize=11)
axes[2].set_ylabel('v', fontsize=11)

for ax in axes:
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

4. Application: Constructing Joint DistributionsΒΆ

# Combine different marginals with copula
n = 2000

# Sample from Clayton copula
u, v = clayton_copula_sample(n, theta=3)

# Transform to different marginals
# X ~ Normal(0, 1)
X = stats.norm.ppf(u, loc=0, scale=1)

# Y ~ Exponential(1)
Y = stats.expon.ppf(v, scale=1)

fig, axes = plt.subplots(1, 3, figsize=(16, 4))

# Copula space
axes[0].scatter(u, v, alpha=0.3, s=5)
axes[0].set_xlabel('u', fontsize=11)
axes[0].set_ylabel('v', fontsize=11)
axes[0].set_title('Copula Space', fontsize=12)
axes[0].grid(True, alpha=0.3)

# Original space
axes[1].scatter(X, Y, alpha=0.3, s=5)
axes[1].set_xlabel('X ~ N(0,1)', fontsize=11)
axes[1].set_ylabel('Y ~ Exp(1)', fontsize=11)
axes[1].set_title('Original Space', fontsize=12)
axes[1].grid(True, alpha=0.3)

# Marginals
axes[2].hist(X, bins=30, alpha=0.5, density=True, label='X')
axes[2].hist(Y, bins=30, alpha=0.5, density=True, label='Y')
axes[2].set_xlabel('Value', fontsize=11)
axes[2].set_ylabel('Density', fontsize=11)
axes[2].set_title('Marginal Distributions', fontsize=12)
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Correlation: {np.corrcoef(X, Y)[0, 1]:.3f}")

SummaryΒΆ

Sklar’s Theorem:ΒΆ

\[F_{X,Y}(x,y) = C(F_X(x), F_Y(y))\]

Separates: Marginals from dependence structure.

Common Copulas:ΒΆ

  • Independence: \(C(u,v) = uv\)

  • Gaussian: Symmetric, moderate tails

  • Clayton: Lower tail dependence

  • Gumbel: Upper tail dependence

Applications:ΒΆ

  • Finance: Portfolio risk

  • Insurance: Joint claims

  • Hydrology: Multivariate extremes

  • Machine learning: Dependency modeling

Next Steps:ΒΆ

  • 08_bayesian_nonparametrics.ipynb - Advanced distributions

  • 06_variational_inference.ipynb - Inference methods

  • Apply to real financial data