import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
np.random.seed(42)

1. Problem: Solve \(Ax = b\)ΒΆ

Where \(A \in \mathbb{R}^{n \times n}\) is symmetric positive definite.

Equivalent: MinimizeΒΆ

\[f(x) = \frac{1}{2}x^TAx - b^Tx\]

Gradient Descent IssuesΒΆ

  • Zigzagging for ill-conditioned \(A\)

  • Slow convergence: \(O(\kappa \log(1/\epsilon))\) iterations

CG SolutionΒΆ

Converges in \(\leq n\) iterations!

2. Conjugate DirectionsΒΆ

A-OrthogonalityΒΆ

Directions \(\{p_0, \ldots, p_{n-1}\}\) are A-conjugate if:

\[p_i^T A p_j = 0, \quad \forall i \neq j\]

3. Convergence AnalysisΒΆ

# Test on larger system
n = 100
np.random.seed(42)
Q = np.random.randn(n, n)
A = Q.T @ Q + np.eye(n)  # SPD
x_true = np.random.randn(n)
b = A @ x_true

# CG
x_cg, _, res_cg = conjugate_gradient(A, b, x0=np.zeros(n), max_iter=n)

# Condition number
eigvals = np.linalg.eigvalsh(A)
kappa = eigvals[-1] / eigvals[0]

# Theoretical bound
iters = np.arange(len(res_cg))
bound = res_cg[0] * ((np.sqrt(kappa) - 1) / (np.sqrt(kappa) + 1))**iters

plt.figure(figsize=(10, 6))
plt.semilogy(res_cg, 'b-', linewidth=2, label='CG Residual')
plt.semilogy(bound, 'r--', label=f'Bound (ΞΊ={kappa:.1f})')
plt.xlabel('Iteration', fontsize=11)
plt.ylabel('Residual Norm', fontsize=11)
plt.title('CG Convergence', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"ΞΊ(A) = {kappa:.2f}")
print(f"Iterations to convergence: {len(res_cg)-1}")

4. Preconditioned CGΒΆ

def preconditioned_cg(A, b, M_inv=None, x0=None, tol=1e-6, max_iter=None):
    """Preconditioned CG."""
    n = len(b)
    if x0 is None:
        x = np.zeros(n)
    else:
        x = x0.copy()
    
    if M_inv is None:
        M_inv = np.eye(n)
    
    if max_iter is None:
        max_iter = n
    
    r = b - A @ x
    z = M_inv @ r
    p = z.copy()
    rz_old = r @ z
    
    residuals = [np.linalg.norm(r)]
    
    for i in range(max_iter):
        Ap = A @ p
        alpha = rz_old / (p @ Ap)
        x = x + alpha * p
        r = r - alpha * Ap
        
        residuals.append(np.linalg.norm(r))
        
        if np.linalg.norm(r) < tol:
            break
        
        z = M_inv @ r
        rz_new = r @ z
        beta = rz_new / rz_old
        p = z + beta * p
        rz_old = rz_new
    
    return x, residuals

# Jacobi preconditioner
M_inv = np.diag(1.0 / np.diag(A))

x_pcg, res_pcg = preconditioned_cg(A, b, M_inv=M_inv, max_iter=n)

plt.figure(figsize=(10, 6))
plt.semilogy(res_cg, 'b-', label='CG')
plt.semilogy(res_pcg, 'r-', label='Preconditioned CG')
plt.xlabel('Iteration', fontsize=11)
plt.ylabel('Residual', fontsize=11)
plt.title('CG vs Preconditioned CG', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"CG: {len(res_cg)-1} iterations")
print(f"PCG: {len(res_pcg)-1} iterations")

SummaryΒΆ

CG Algorithm:ΒΆ

Solves \(Ax = b\) for SPD \(A\) in \(\leq n\) iterations.

Convergence:ΒΆ

\[\|x_k - x^*\|_A \leq 2\left(\frac{\sqrt{\kappa}-1}{\sqrt{\kappa}+1}\right)^k \|x_0 - x^*\|_A\]

Key Properties:ΒΆ

  • A-conjugate directions

  • Krylov subspace methods

  • Optimal in subspace

Preconditioning:ΒΆ

Solve \(M^{-1}Ax = M^{-1}b\) to reduce \(\kappa\).

Applications:ΒΆ

  • Large sparse systems

  • PDEs (finite elements)

  • Deep learning (second-order)

  • Computer graphics

Next Steps:ΒΆ

  • 09_gradient_descent_convergence.ipynb - First-order

  • Study GMRES for non-symmetric

  • Explore incomplete Cholesky