import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.special import gamma
sns.set_style('whitegrid')
np.random.seed(42)
1. MotivationΒΆ
Problem: Modeling Joint DistributionsΒΆ
Given marginals \(F_X(x)\) and \(F_Y(y)\), how to model \(F_{X,Y}(x,y)\)?
Naive: Assume independence β \(F_{X,Y}(x,y) = F_X(x)F_Y(y)\)
Problem: Ignores dependence!
Copula SolutionΒΆ
Key idea: Separate marginal behavior from dependence structure.
where \(C: [0,1]^2 \to [0,1]\) is the copula.
2. Sklarβs TheoremΒΆ
StatementΒΆ
For any joint CDF \(F_{X,Y}\) with marginals \(F_X\), \(F_Y\):
for some copula \(C\).
If \(F_X\), \(F_Y\) continuous, \(C\) is unique.
ConverseΒΆ
Given copula \(C\) and marginals \(F_X\), \(F_Y\):
defines a valid joint distribution.
InterpretationΒΆ
Copula \(C\) captures pure dependence, independent of marginals!
# Gaussian copula example
def gaussian_copula(u, v, rho):
"""Gaussian copula CDF."""
from scipy.stats import norm, multivariate_normal
# Inverse CDF transform
x = norm.ppf(u)
y = norm.ppf(v)
# Bivariate normal CDF
cov = np.array([[1, rho], [rho, 1]])
return multivariate_normal.cdf([x, y], mean=[0, 0], cov=cov)
# Visualize for different correlations
u_grid = np.linspace(0.01, 0.99, 50)
v_grid = np.linspace(0.01, 0.99, 50)
U, V = np.meshgrid(u_grid, v_grid)
fig, axes = plt.subplots(1, 3, figsize=(16, 4))
rhos = [-0.7, 0.0, 0.7]
for idx, rho in enumerate(rhos):
C = np.zeros_like(U)
for i in range(len(u_grid)):
for j in range(len(v_grid)):
C[j, i] = gaussian_copula(U[j, i], V[j, i], rho)
cs = axes[idx].contourf(U, V, C, levels=20, cmap='viridis')
axes[idx].set_xlabel('u', fontsize=11)
axes[idx].set_ylabel('v', fontsize=11)
axes[idx].set_title(f'Gaussian Copula (Ο={rho})', fontsize=12)
plt.colorbar(cs, ax=axes[idx])
plt.tight_layout()
plt.show()
3. Common Copula FamiliesΒΆ
Independence CopulaΒΆ
Gaussian CopulaΒΆ
where \(\Phi_\rho\) is bivariate normal CDF with correlation \(\rho\).
Student-t CopulaΒΆ
Heavier tails than Gaussian.
Archimedean CopulasΒΆ
Clayton: $\(C^{Cl}(u, v; \theta) = (u^{-\theta} + v^{-\theta} - 1)^{-1/\theta}\)$
Gumbel: $\(C^{Gu}(u, v; \theta) = \exp(-((-\ln u)^\theta + (-\ln v)^\theta)^{1/\theta})\)$
Frank: $\(C^{Fr}(u, v; \theta) = -\frac{1}{\theta}\ln\left(1 + \frac{(e^{-\theta u}-1)(e^{-\theta v}-1)}{e^{-\theta}-1}\right)\)$
def clayton_copula_sample(n, theta):
"""Sample from Clayton copula."""
u = np.random.uniform(0, 1, n)
t = np.random.uniform(0, 1, n)
v = (u**(-theta) * (t**(-theta/(1+theta)) - 1) + 1)**(-1/theta)
return u, v
def gumbel_copula_sample(n, theta):
"""Sample from Gumbel copula (approximate)."""
from scipy.stats import levy_stable
u = np.random.uniform(0, 1, n)
v = np.random.uniform(0, 1, n)
# Simplified sampling
s = levy_stable.rvs(1/theta, 1, size=n)
e1 = -np.log(u)
e2 = -np.log(v)
u_new = np.exp(-((e1**theta + e2**theta)**(1/theta)))
return u, u_new
# Sample and visualize
n = 1000
fig, axes = plt.subplots(1, 3, figsize=(16, 4))
# Independence
u_indep = np.random.uniform(0, 1, n)
v_indep = np.random.uniform(0, 1, n)
axes[0].scatter(u_indep, v_indep, alpha=0.3, s=5)
axes[0].set_title('Independence', fontsize=12)
axes[0].set_xlabel('u', fontsize=11)
axes[0].set_ylabel('v', fontsize=11)
# Clayton
u_clay, v_clay = clayton_copula_sample(n, theta=2)
axes[1].scatter(u_clay, v_clay, alpha=0.3, s=5)
axes[1].set_title('Clayton (ΞΈ=2)', fontsize=12)
axes[1].set_xlabel('u', fontsize=11)
axes[1].set_ylabel('v', fontsize=11)
# Gumbel
u_gum, v_gum = gumbel_copula_sample(n, theta=2)
axes[2].scatter(u_gum, v_gum, alpha=0.3, s=5)
axes[2].set_title('Gumbel (ΞΈ=2)', fontsize=12)
axes[2].set_xlabel('u', fontsize=11)
axes[2].set_ylabel('v', fontsize=11)
for ax in axes:
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
4. Application: Constructing Joint DistributionsΒΆ
# Combine different marginals with copula
n = 2000
# Sample from Clayton copula
u, v = clayton_copula_sample(n, theta=3)
# Transform to different marginals
# X ~ Normal(0, 1)
X = stats.norm.ppf(u, loc=0, scale=1)
# Y ~ Exponential(1)
Y = stats.expon.ppf(v, scale=1)
fig, axes = plt.subplots(1, 3, figsize=(16, 4))
# Copula space
axes[0].scatter(u, v, alpha=0.3, s=5)
axes[0].set_xlabel('u', fontsize=11)
axes[0].set_ylabel('v', fontsize=11)
axes[0].set_title('Copula Space', fontsize=12)
axes[0].grid(True, alpha=0.3)
# Original space
axes[1].scatter(X, Y, alpha=0.3, s=5)
axes[1].set_xlabel('X ~ N(0,1)', fontsize=11)
axes[1].set_ylabel('Y ~ Exp(1)', fontsize=11)
axes[1].set_title('Original Space', fontsize=12)
axes[1].grid(True, alpha=0.3)
# Marginals
axes[2].hist(X, bins=30, alpha=0.5, density=True, label='X')
axes[2].hist(Y, bins=30, alpha=0.5, density=True, label='Y')
axes[2].set_xlabel('Value', fontsize=11)
axes[2].set_ylabel('Density', fontsize=11)
axes[2].set_title('Marginal Distributions', fontsize=12)
axes[2].legend()
axes[2].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"Correlation: {np.corrcoef(X, Y)[0, 1]:.3f}")
SummaryΒΆ
Sklarβs Theorem:ΒΆ
Separates: Marginals from dependence structure.
Common Copulas:ΒΆ
Independence: \(C(u,v) = uv\)
Gaussian: Symmetric, moderate tails
Clayton: Lower tail dependence
Gumbel: Upper tail dependence
Applications:ΒΆ
Finance: Portfolio risk
Insurance: Joint claims
Hydrology: Multivariate extremes
Machine learning: Dependency modeling
Next Steps:ΒΆ
08_bayesian_nonparametrics.ipynb - Advanced distributions
06_variational_inference.ipynb - Inference methods
Apply to real financial data