import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.linalg import eigh, det
sns.set_style('whitegrid')
np.random.seed(42)
1. Motivation: DiversityΒΆ
ProblemΒΆ
Sample diverse subset from \(\{1,\ldots,N\}\).
Examples:
Document summarization
Minibatch selection
Recommendation diversity
DPP SolutionΒΆ
Point process with repulsion between items.
where \(K\) is positive semidefinite kernel matrix.
2. DPP DefinitionΒΆ
Kernel MatrixΒΆ
Let \(K \in \mathbb{R}^{N \times N}\) with \(0 \preceq K \preceq I\).
ProbabilityΒΆ
For subset \(Y \subseteq \{1,\ldots,N\}\):
where \(K_Y\) is submatrix with rows/cols in \(Y\).
Marginal ProbabilityΒΆ
RepulsionΒΆ
Negative correlation!
# Visualize repulsion
def compute_kernel_rbf(X, sigma=1.0):
"""RBF kernel for features."""
n = X.shape[0]
K = np.zeros((n, n))
for i in range(n):
for j in range(n):
K[i, j] = np.exp(-np.sum((X[i] - X[j])**2) / (2 * sigma**2))
return K
# 2D points
np.random.seed(42)
X = np.random.randn(20, 2)
K = compute_kernel_rbf(X, sigma=1.5)
# Normalize
K = K / (np.trace(K) + 1e-8) * 5
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Kernel matrix
im = axes[0].imshow(K, cmap='viridis')
axes[0].set_title('Kernel Matrix K', fontsize=12)
axes[0].set_xlabel('Item j', fontsize=11)
axes[0].set_ylabel('Item i', fontsize=11)
plt.colorbar(im, ax=axes[0])
# Feature space
axes[1].scatter(X[:, 0], X[:, 1], s=100, alpha=0.6)
for i in range(len(X)):
axes[1].text(X[i, 0]+0.1, X[i, 1]+0.1, str(i), fontsize=8)
axes[1].set_title('Feature Space', fontsize=12)
axes[1].set_xlabel('Feature 1', fontsize=11)
axes[1].set_ylabel('Feature 2', fontsize=11)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
3. L-ensemble DPPΒΆ
Quality-Diversity TradeoffΒΆ
Define \(L = B^T B\) where \(B \in \mathbb{R}^{d \times N}\).
Interpretation:
\(\|b_i\|\) = quality of item \(i\)
\(\langle b_i, b_j \rangle\) = similarity
ProbabilityΒΆ
Volume interpretation: \(\det(L_Y) = \text{volume}^2\) of parallelepiped.
# Visualize volume interpretation
np.random.seed(10)
b1 = np.array([1.5, 0.2])
b2_similar = np.array([1.4, 0.3]) # Similar to b1
b2_diverse = np.array([0.3, 1.5]) # Diverse from b1
def volume_squared(v1, v2):
return np.linalg.det(np.array([v1, v2]))**2
vol_similar = volume_squared(b1, b2_similar)
vol_diverse = volume_squared(b1, b2_diverse)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Similar
axes[0].arrow(0, 0, b1[0], b1[1], head_width=0.1, color='blue', label='b1')
axes[0].arrow(0, 0, b2_similar[0], b2_similar[1], head_width=0.1, color='red', label='b2')
axes[0].fill([0, b1[0], b1[0]+b2_similar[0], b2_similar[0]],
[0, b1[1], b1[1]+b2_similar[1], b2_similar[1]], alpha=0.3)
axes[0].set_title(f'Similar: detΒ²={vol_similar:.3f}', fontsize=12)
axes[0].set_xlim(-0.5, 3)
axes[0].set_ylim(-0.5, 2)
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Diverse
axes[1].arrow(0, 0, b1[0], b1[1], head_width=0.1, color='blue', label='b1')
axes[1].arrow(0, 0, b2_diverse[0], b2_diverse[1], head_width=0.1, color='red', label='b2')
axes[1].fill([0, b1[0], b1[0]+b2_diverse[0], b2_diverse[0]],
[0, b1[1], b1[1]+b2_diverse[1], b2_diverse[1]], alpha=0.3)
axes[1].set_title(f'Diverse: detΒ²={vol_diverse:.3f}', fontsize=12)
axes[1].set_xlim(-0.5, 3)
axes[1].set_ylim(-0.5, 2)
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"Volume ratio: {vol_diverse/vol_similar:.2f}x larger for diverse!")
4. Sampling AlgorithmΒΆ
Eigendecomposition MethodΒΆ
Compute \(L = V\Lambda V^T\)
For each eigenvalue \(\lambda_i\), include eigenvector with prob \(\frac{\lambda_i}{1+\lambda_i}\)
Sample from selected eigenvectors
Complexity: \(O(N^3)\) for eigendecomposition, \(O(Nk^3)\) for sampling.
def sample_dpp(L, max_iter=100):
"""Sample from DPP via eigendecomposition."""
N = L.shape[0]
# Eigendecomposition
eigvals, eigvecs = eigh(L)
eigvals = np.maximum(eigvals, 0) # Numerical stability
# Phase 1: Select eigenvectors
selected = []
for i in range(N):
if np.random.rand() < eigvals[i] / (1 + eigvals[i]):
selected.append(i)
if len(selected) == 0:
return np.array([])
V = eigvecs[:, selected]
# Phase 2: Sample items
Y = []
for _ in range(len(selected)):
# Compute probabilities
probs = np.sum(V**2, axis=1)
probs /= np.sum(probs)
# Sample item
i = np.random.choice(N, p=probs)
Y.append(i)
# Update V
if V.shape[1] > 1:
v_i = V[i, :]
V = V - np.outer(V[i, :], v_i) / np.linalg.norm(v_i)**2
# Orthogonalize
V, _ = np.linalg.qr(V)
return np.array(Y)
# Test sampling
N = 30
d = 5
B = np.random.randn(d, N)
L = B.T @ B
# Sample multiple times
samples = [sample_dpp(L) for _ in range(100)]
sizes = [len(s) for s in samples]
plt.figure(figsize=(8, 5))
plt.hist(sizes, bins=range(max(sizes)+2), alpha=0.7, edgecolor='black')
plt.xlabel('Subset Size', fontsize=11)
plt.ylabel('Frequency', fontsize=11)
plt.title('DPP Sample Size Distribution', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')
plt.show()
print(f"Mean size: {np.mean(sizes):.2f}")
print(f"Expected size (trace formula): {np.trace(L @ np.linalg.inv(L + np.eye(N))):.2f}")
5. Application: Diverse Subset SelectionΒΆ
# Compare DPP vs random sampling
np.random.seed(42)
N = 50
X = np.random.randn(N, 2)
# Compute kernel
K = compute_kernel_rbf(X, sigma=1.0)
L = K / (np.trace(K) + 1e-8) * 10
# DPP sample
dpp_sample = sample_dpp(L)
# Random sample (same size)
k = len(dpp_sample)
random_sample = np.random.choice(N, size=k, replace=False)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# DPP
axes[0].scatter(X[:, 0], X[:, 1], s=30, alpha=0.3, color='gray', label='All')
axes[0].scatter(X[dpp_sample, 0], X[dpp_sample, 1], s=100, color='red', label='DPP')
axes[0].set_title(f'DPP Sample (k={k})', fontsize=12)
axes[0].set_xlabel('Feature 1', fontsize=11)
axes[0].set_ylabel('Feature 2', fontsize=11)
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Random
axes[1].scatter(X[:, 0], X[:, 1], s=30, alpha=0.3, color='gray', label='All')
axes[1].scatter(X[random_sample, 0], X[random_sample, 1], s=100, color='blue', label='Random')
axes[1].set_title(f'Random Sample (k={k})', fontsize=12)
axes[1].set_xlabel('Feature 1', fontsize=11)
axes[1].set_ylabel('Feature 2', fontsize=11)
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Diversity metrics
def min_distance(X, indices):
if len(indices) < 2:
return 0
min_dist = np.inf
for i in range(len(indices)):
for j in range(i+1, len(indices)):
dist = np.linalg.norm(X[indices[i]] - X[indices[j]])
min_dist = min(min_dist, dist)
return min_dist
print(f"DPP min distance: {min_distance(X, dpp_sample):.3f}")
print(f"Random min distance: {min_distance(X, random_sample):.3f}")
SummaryΒΆ
DPP Definition:ΒΆ
Repulsion: Negative correlation between items.
Properties:ΒΆ
\(P(i \in Y) = K_{ii}\)
Expected size: \(\mathbb{E}[|Y|] = \text{tr}(K(I+K)^{-1})\)
Volume interpretation: maximize diversity
Sampling:ΒΆ
Eigendecompose \(L = V\Lambda V^T\)
Select eigenvectors stochastically
Sequential sampling from span
Applications:ΒΆ
Document summarization
Minibatch selection for SGD
Recommendation systems
Neural architecture search
Next Steps:ΒΆ
07_concentration_inequalities.ipynb - Theoretical bounds
Explore conditional DPPs
Fast sampling approximations