import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize, linprog
import seaborn as sns
sns.set_style('whitegrid')
np.random.seed(42)
1. Primal ProblemΒΆ
Standard FormΒΆ
LagrangianΒΆ
where \(\lambda \geq 0\) (dual variables), \(\nu\) unconstrained.
2. Dual ProblemΒΆ
Dual FunctionΒΆ
Always concave in \((\lambda, \nu)\)!
Dual ProblemΒΆ
Weak DualityΒΆ
for any feasible \((\lambda, \nu)\), where \(p^*\) is primal optimal.
Strong DualityΒΆ
Holds for convex problems under Slaterβs condition!
# Example: Quadratic program
# min 0.5*x^T Q x + c^T x
# s.t. Ax <= b
n = 3
Q = np.array([[2, 0.5, 0], [0.5, 1, 0.3], [0, 0.3, 1.5]])
c = np.array([1, 2, 1])
A = np.array([[1, 1, 1], [-1, 0, 0], [0, -1, 0], [0, 0, -1]])
b = np.array([1, 0, 0, 0])
def primal_obj(x):
return 0.5 * x @ Q @ x + c @ x
# Solve primal
constraints = [{'type': 'ineq', 'fun': lambda x, i=i: b[i] - A[i] @ x} for i in range(len(b))]
result_primal = minimize(primal_obj, np.zeros(n), constraints=constraints)
x_opt = result_primal.x
p_star = result_primal.fun
print(f"Primal optimal: {p_star:.4f}")
print(f"Optimal x: {x_opt}")
# Dual function
def dual_function(lam):
# g(Ξ») = inf_x L(x, Ξ») = inf_x [0.5*x^T Q x + c^T x + Ξ»^T(Ax - b)]
# Setting gradient to 0: Qx + c + A^T Ξ» = 0
# x* = -Q^{-1}(c + A^T Ξ»)
x_star = -np.linalg.solve(Q, c + A.T @ lam)
return 0.5 * x_star @ Q @ x_star + c @ x_star + lam @ (A @ x_star - b)
# Solve dual (maximize g(Ξ») s.t. Ξ» >= 0)
def neg_dual(lam):
return -dual_function(lam)
bounds = [(0, None) for _ in range(len(b))]
result_dual = minimize(neg_dual, np.ones(len(b)), bounds=bounds)
d_star = -result_dual.fun
print(f"\nDual optimal: {d_star:.4f}")
print(f"Duality gap: {p_star - d_star:.6f}")
3. KKT ConditionsΒΆ
For convex problem, \((x^*, \lambda^*, \nu^*)\) optimal iff:
Stationarity: \(\nabla f_0(x^*) + \sum_i \lambda_i^* \nabla f_i(x^*) + \sum_j \nu_j^* \nabla h_j(x^*) = 0\)
Primal feasibility: \(f_i(x^*) \leq 0\), \(h_j(x^*) = 0\)
Dual feasibility: \(\lambda^* \geq 0\)
Complementary slackness: \(\lambda_i^* f_i(x^*) = 0\)
# Verify KKT
lam_opt = result_dual.x
# Stationarity
grad = Q @ x_opt + c + A.T @ lam_opt
print(f"Stationarity (||βL||): {np.linalg.norm(grad):.6f}")
# Primal feasibility
violations = np.maximum(A @ x_opt - b, 0)
print(f"Primal feasibility: {np.linalg.norm(violations):.6f}")
# Dual feasibility
print(f"Dual feasibility: {np.all(lam_opt >= -1e-6)}")
# Complementary slackness
slack = lam_opt * (A @ x_opt - b)
print(f"Complementary slackness: {np.linalg.norm(slack):.6f}")
4. Application: SVM DualΒΆ
# SVM primal: min 0.5||w||^2 + C*sum ΞΎ_i
# Dual: max sum Ξ±_i - 0.5*sum_ij Ξ±_i Ξ±_j y_i y_j K(x_i, x_j)
# s.t. 0 <= Ξ±_i <= C, sum Ξ±_i y_i = 0
from sklearn.datasets import make_blobs
# Generate data
X, y = make_blobs(n_samples=50, centers=2, random_state=42, cluster_std=1.5)
y = 2*y - 1 # {-1, 1}
# Kernel matrix
def rbf_kernel(X, gamma=1.0):
sq_dists = np.sum(X**2, axis=1).reshape(-1, 1) + np.sum(X**2, axis=1) - 2*X@X.T
return np.exp(-gamma * sq_dists)
K = rbf_kernel(X, gamma=0.5)
# Dual objective: -0.5*Ξ±^T Q Ξ± + 1^T Ξ± where Q_ij = y_i y_j K_ij
Q_dual = np.outer(y, y) * K
def svm_dual_obj(alpha):
return -(-0.5 * alpha @ Q_dual @ alpha + np.sum(alpha))
# Constraints
C = 1.0
constraints = [{'type': 'eq', 'fun': lambda a: np.dot(a, y)}]
bounds = [(0, C) for _ in range(len(y))]
result = minimize(svm_dual_obj, np.zeros(len(y)), bounds=bounds, constraints=constraints)
alpha = result.x
# Support vectors
sv_idx = alpha > 1e-4
print(f"Support vectors: {np.sum(sv_idx)}/{len(y)}")
# Visualize
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=50, alpha=0.7)
plt.scatter(X[sv_idx, 0], X[sv_idx, 1], s=200, facecolors='none', edgecolors='black', linewidths=2)
plt.title('SVM via Dual (Support Vectors Circled)', fontsize=12)
plt.xlabel('Feature 1', fontsize=11)
plt.ylabel('Feature 2', fontsize=11)
plt.grid(True, alpha=0.3)
plt.show()
SummaryΒΆ
Duality:ΒΆ
Primal: \(\min f_0(x)\) s.t. \(f_i(x) \leq 0\)
Dual: \(\max g(\lambda)\) s.t. \(\lambda \geq 0\)
Key Results:ΒΆ
Weak duality always holds
Strong duality for convex + Slater
KKT conditions are necessary & sufficient
Applications:ΒΆ
SVM dual formulation
Network flow problems
Resource allocation
Game theory
Next Steps:ΒΆ
09_gradient_descent_convergence.ipynb - Optimization
Study interior-point methods
Explore semidefinite programming