Run this notebook: Open in Colab Open in Kaggle

# Setup
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

sns.set_style('whitegrid')
np.set_printoptions(precision=3, suppress=True)

2.1 Systems of Linear Equations¶

Example 2.1: Production Planning (MML Book)¶

A company produces products N₁, …, Nₙ using resources R₁, …, Rₘ.

Problem: Find how many units of each product to produce given resource constraints.

General form:

a₁₁x₁ + a₁₂x₂ + ... + a₁ₙxₙ = b₁
a₂₁x₁ + a₂₂x₂ + ... + a₂ₙxₙ = b₂
...
aₘ₁x₁ + aₘ₂x₂ + ... + aₘₙxₙ = bₘ

# Example 2.2 from MML Book
# System with unique solution:
# x₁ + x₂ + x₃ = 3
# x₁ - x₂ + 2x₃ = 2
#      x₂ + x₃ = 2

# Coefficient matrix A
A = np.array([
    [1,  1, 1],
    [1, -1, 2],
    [0,  1, 1]
])

# Right-hand side
b = np.array([3, 2, 2])

# Solve using NumPy
x = np.linalg.solve(A, b)

print("Solution:")
print(f"x₁ = {x[0]:.3f}")
print(f"x₂ = {x[1]:.3f}")
print(f"x₃ = {x[2]:.3f}")

# Verify solution
print(f"\nVerification: Ax = {A @ x}")
print(f"Expected:  b = {b}")
print(f"\nSolution is correct: {np.allclose(A @ x, b)}")

# Visualize system of equations in 2D
# Two equations: 2x + y = 5 and x - y = 1

# Solve for y in each equation
x_vals = np.linspace(-1, 4, 100)
y1 = 5 - 2*x_vals  # From 2x + y = 5
y2 = x_vals - 1     # From x - y = 1

plt.figure(figsize=(10, 6))
plt.plot(x_vals, y1, label='2x + y = 5', linewidth=2)
plt.plot(x_vals, y2, label='x - y = 1', linewidth=2)

# Solution point
A_2d = np.array([[2, 1], [1, -1]])
b_2d = np.array([5, 1])
solution = np.linalg.solve(A_2d, b_2d)

plt.plot(solution[0], solution[1], 'ro', markersize=12, label=f'Solution ({solution[0]:.1f}, {solution[1]:.1f})')
plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('System of Linear Equations in 2D', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.axhline(0, color='black', linewidth=0.5)
plt.axvline(0, color='black', linewidth=0.5)
plt.show()

print(f"Solution: x = {solution[0]:.2f}, y = {solution[1]:.2f}")

2.2 Matrices¶

Definition 2.1 (Matrix)¶

An (m, n)-matrix is an m×n arrangement of elements:

\[\begin{split}A = \begin{bmatrix} a_{11} & a_{12} & \cdots & a_{1n} \\ a_{21} & a_{22} & \cdots & a_{2n} \\ \vdots & \vdots & \ddots & \vdots \\ a_{m1} & a_{m2} & \cdots & a_{mn} \end{bmatrix}\end{split}\]

Special cases:

Row vector: (1, n)-matrix
Column vector: (m, 1)-matrix

# Matrix creation and basic operations

# Create matrices
A = np.array([[1, 2, 3],
              [4, 5, 6]])

B = np.array([[7, 8, 9],
              [10, 11, 12]])

print("Matrix A (2×3):")
print(A)
print(f"Shape: {A.shape}")

print("\nMatrix B (2×3):")
print(B)

# Matrix addition (element-wise)
C = A + B
print("\nA + B:")
print(C)

# Scalar multiplication
D = 3 * A
print("\n3 * A:")
print(D)

# Matrix multiplication (Eq. 2.13 in MML book)
# C = AB where c_ij = Σ(a_il * b_lj)

A = np.array([[1, 2],
              [3, 4],
              [5, 6]])

B = np.array([[7, 8, 9],
              [10, 11, 12]])

print("A (3×2):")
print(A)
print("\nB (2×3):")
print(B)

# Matrix multiplication
C = A @ B  # or np.dot(A, B)
print("\nC = A @ B (3×3):")
print(C)

# Verify: C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0]
print(f"\nVerify C[0,0]: {A[0,0]*B[0,0] + A[0,1]*B[1,0]} = {C[0,0]}")

# Note: Order matters!
# B @ A gives different result (and different shape)
D = B @ A
print("\nD = B @ A (2×2):")
print(D)

# Special matrices

# Identity matrix
I3 = np.eye(3)
print("Identity matrix I₃:")
print(I3)

# Zero matrix
Z = np.zeros((2, 3))
print("\nZero matrix (2×3):")
print(Z)

# Diagonal matrix
D = np.diag([1, 2, 3])
print("\nDiagonal matrix:")
print(D)

# Transpose
A = np.array([[1, 2, 3],
              [4, 5, 6]])
print("\nA:")
print(A)
print("\nA^T (transpose):")
print(A.T)

2.3 Solving Systems of Linear Equations¶

Gaussian Elimination¶

Transform the augmented matrix [A|b] into row-echelon form (REF).

Elementary row operations:

Swap two rows
Multiply a row by a nonzero scalar
Add a multiple of one row to another

# Gaussian elimination step-by-step

def gaussian_elimination(A, b, verbose=True):
    """
    Solve Ax = b using Gaussian elimination.
    """
    # Create augmented matrix [A|b]
    n = len(b)
    Ab = np.column_stack([A.copy().astype(float), b.copy().astype(float)])
    
    if verbose:
        print("Augmented matrix [A|b]:")
        print(Ab)
        print("\n" + "="*50)
    
    # Forward elimination
    for i in range(n):
        # Find pivot
        max_row = np.argmax(np.abs(Ab[i:, i])) + i
        if Ab[max_row, i] == 0:
            print(f"No unique solution exists (zero pivot at column {i})")
            return None
        
        # Swap rows if needed
        if max_row != i:
            Ab[[i, max_row]] = Ab[[max_row, i]]
            if verbose:
                print(f"\nSwap row {i} with row {max_row}:")
                print(Ab)
        
        # Eliminate column below pivot
        for j in range(i+1, n):
            factor = Ab[j, i] / Ab[i, i]
            Ab[j] = Ab[j] - factor * Ab[i]
            if verbose and factor != 0:
                print(f"\nRow {j} = Row {j} - {factor:.3f} * Row {i}:")
                print(Ab)
    
    if verbose:
        print("\n" + "="*50)
        print("Row echelon form:")
        print(Ab)
    
    # Back substitution
    x = np.zeros(n)
    for i in range(n-1, -1, -1):
        x[i] = (Ab[i, -1] - np.dot(Ab[i, i+1:n], x[i+1:])) / Ab[i, i]
    
    return x

# Example from MML book
A = np.array([[1, 1, 1],
              [1, -1, 2],
              [0, 1, 1]])

b = np.array([3, 2, 2])

print("Solving system:")
print("x₁ + x₂ + x₃ = 3")
print("x₁ - x₂ + 2x₃ = 2")
print("     x₂ + x₃ = 2")
print("\n" + "="*50 + "\n")

x = gaussian_elimination(A, b)

print("\n" + "="*50)
print("\nSolution:")
print(f"x = {x}")
print(f"\nVerification: Ax = {A @ x}")
print(f"Expected:  b = {b}")

# Matrix inverse
# A^(-1) is the matrix such that A * A^(-1) = I

A = np.array([[4, 7],
              [2, 6]], dtype=float)

print("Matrix A:")
print(A)

# Compute inverse
A_inv = np.linalg.inv(A)
print("\nA^(-1):")
print(A_inv)

# Verify: A * A^(-1) = I
I = A @ A_inv
print("\nA @ A^(-1):")
print(I)

# Solve using inverse: x = A^(-1) * b
b = np.array([1, 2])
x = A_inv @ b
print(f"\nSolve Ax = b:")
print(f"x = A^(-1) @ b = {x}")
print(f"Verification: Ax = {A @ x}")

2.4 Vector Spaces¶

Definition 2.5 (Vector Space)¶

A vector space V over ℝ is a set with two operations:

Addition: + : V × V → V
Scalar multiplication: · : ℝ × V → V

That satisfy:

Associativity of addition
Commutativity of addition
Identity element (zero vector)
Inverse elements
Distributivity
And more…

Examples: ℝⁿ, polynomials, matrices, functions

# Vector space examples

# Example 1: R^3 - standard vector space
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

print("Vectors in R³:")
print(f"v1 = {v1}")
print(f"v2 = {v2}")

# Vector addition (closed under addition)
v3 = v1 + v2
print(f"\nv1 + v2 = {v3}  (still in R³)")

# Scalar multiplication (closed under scaling)
v4 = 2.5 * v1
print(f"2.5 * v1 = {v4}  (still in R³)")

# Zero vector
zero = np.zeros(3)
print(f"\nZero vector: {zero}")
print(f"v1 + 0 = {v1 + zero}  (identity)")

# Additive inverse
neg_v1 = -v1
print(f"\n-v1 = {neg_v1}")
print(f"v1 + (-v1) = {v1 + neg_v1}  (inverse)")

# Visualize vector addition and scaling in 2D

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Vector addition
v1 = np.array([2, 1])
v2 = np.array([1, 3])
v3 = v1 + v2

ax1.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1, color='r', width=0.008, label='v₁')
ax1.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1, color='b', width=0.008, label='v₂')
ax1.quiver(0, 0, v3[0], v3[1], angles='xy', scale_units='xy', scale=1, color='g', width=0.01, label='v₁+v₂')

# Parallelogram
ax1.quiver(v1[0], v1[1], v2[0], v2[1], angles='xy', scale_units='xy', scale=1, 
           color='b', width=0.005, alpha=0.3, linestyle='--')
ax1.quiver(v2[0], v2[1], v1[0], v1[1], angles='xy', scale_units='xy', scale=1, 
           color='r', width=0.005, alpha=0.3, linestyle='--')

ax1.set_xlim(-1, 5)
ax1.set_ylim(-1, 5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend(fontsize=11)
ax1.set_title('Vector Addition', fontsize=13)
ax1.set_xlabel('x')
ax1.set_ylabel('y')

# Scalar multiplication
v = np.array([2, 1])
scalars = [-1, 0.5, 1, 1.5, 2]
colors = ['purple', 'orange', 'blue', 'green', 'red']

for scalar, color in zip(scalars, colors):
    scaled_v = scalar * v
    ax2.quiver(0, 0, scaled_v[0], scaled_v[1], angles='xy', scale_units='xy', scale=1, 
               color=color, width=0.008, label=f'{scalar}v')

ax2.set_xlim(-3, 5)
ax2.set_ylim(-3, 3)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend(fontsize=10)
ax2.set_title('Scalar Multiplication', fontsize=13)
ax2.set_xlabel('x')
ax2.set_ylabel('y')

plt.tight_layout()
plt.show()

2.5 Linear Independence¶

Definition 2.9 (Linear Combination)¶

A vector v is a linear combination of vectors {v₁, …, vₙ} if:

v = λ₁v₁ + λ₂v₂ + … + λₙvₙ

for some scalars λ₁, …, λₙ ∈ ℝ.

Definition 2.10 (Linear Independence)¶

Vectors {v₁, …, vₙ} are linearly independent if:

λ₁v₁ + λ₂v₂ + … + λₙvₙ = 0 ⟹ λ₁ = λ₂ = … = λₙ = 0

Otherwise, they are linearly dependent.

# Check linear independence

def check_linear_independence(vectors):
    """
    Check if vectors are linearly independent using rank.
    Vectors are linearly independent if rank(A) = number of vectors
    where A is the matrix with vectors as columns.
    """
    A = np.column_stack(vectors)
    rank = np.linalg.matrix_rank(A)
    n_vectors = len(vectors)
    
    print(f"Matrix A (vectors as columns):")
    print(A)
    print(f"\nRank: {rank}")
    print(f"Number of vectors: {n_vectors}")
    print(f"\nLinear {'independent' if rank == n_vectors else 'dependent'}")
    
    return rank == n_vectors

# Example 1: Linearly independent vectors
print("Example 1: Standard basis vectors in R³")
v1 = np.array([1, 0, 0])
v2 = np.array([0, 1, 0])
v3 = np.array([0, 0, 1])
check_linear_independence([v1, v2, v3])

print("\n" + "="*50 + "\n")

# Example 2: Linearly dependent vectors
print("Example 2: Dependent vectors (v3 = v1 + v2)")
v1 = np.array([1, 2, 3])
v2 = np.array([2, 3, 4])
v3 = v1 + v2  # v3 is a linear combination
check_linear_independence([v1, v2, v3])

# Visualize linear dependence/independence in 2D

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Independent vectors
v1 = np.array([1, 0])
v2 = np.array([0, 1])

ax1.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1, 
           color='r', width=0.015, label='v₁')
ax1.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1, 
           color='b', width=0.015, label='v₂')

# Span (all linear combinations)
for a in np.linspace(-2, 2, 9):
    for b in np.linspace(-2, 2, 9):
        point = a*v1 + b*v2
        ax1.plot(point[0], point[1], 'k.', alpha=0.2, markersize=3)

ax1.set_xlim(-2.5, 2.5)
ax1.set_ylim(-2.5, 2.5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend(fontsize=11)
ax1.set_title('Linearly Independent\n(span entire plane)', fontsize=13)
ax1.set_xlabel('x')
ax1.set_ylabel('y')

# Dependent vectors (collinear)
v1 = np.array([1, 1])
v2 = 2 * v1  # v2 = 2*v1 (dependent)

ax2.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1, 
           color='r', width=0.015, label='v₁')
ax2.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1, 
           color='b', width=0.015, label='v₂ = 2v₁')

# Span (only a line)
t_vals = np.linspace(-2, 2, 100)
line = np.array([t*v1 for t in t_vals])
ax2.plot(line[:, 0], line[:, 1], 'k--', alpha=0.3, linewidth=2, label='span')

ax2.set_xlim(-2.5, 2.5)
ax2.set_ylim(-2.5, 2.5)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend(fontsize=11)
ax2.set_title('Linearly Dependent\n(span only a line)', fontsize=13)
ax2.set_xlabel('x')
ax2.set_ylabel('y')

plt.tight_layout()
plt.show()

2.6 Basis and Rank¶

Definition 2.12 (Basis)¶

A basis of a vector space V is a set of linearly independent vectors that span V.

Standard basis of ℝⁿ: {e₁, e₂, …, eₙ} where:

e₁ = [1, 0, 0, …, 0]ᵀ
e₂ = [0, 1, 0, …, 0]ᵀ
…

Definition 2.14 (Rank)¶

The rank of a matrix A is the dimension of the subspace spanned by its columns (or rows).

# Standard basis of R^3
e1 = np.array([1, 0, 0])
e2 = np.array([0, 1, 0])
e3 = np.array([0, 0, 1])

print("Standard basis of R³:")
print(f"e₁ = {e1}")
print(f"e₂ = {e2}")
print(f"e₃ = {e3}")

# Any vector in R^3 can be written as a linear combination
v = np.array([3, -2, 5])
print(f"\nVector v = {v}")
print(f"v = {v[0]}e₁ + {v[1]}e₂ + {v[2]}e₃")
print(f"Verification: {v[0]*e1 + v[1]*e2 + v[2]*e3}")

# Different basis (still spans R^3)
b1 = np.array([1, 1, 0])
b2 = np.array([0, 1, 1])
b3 = np.array([1, 0, 1])

print("\n" + "="*50)
print("\nAlternative basis:")
print(f"b₁ = {b1}")
print(f"b₂ = {b2}")
print(f"b₃ = {b3}")

# Check if they form a basis
B = np.column_stack([b1, b2, b3])
print(f"\nLinear independent: {np.linalg.matrix_rank(B) == 3}")

# Express v in this basis
# Solve: B @ coords = v
coords = np.linalg.solve(B, v)
print(f"\nv in new basis: {coords}")
print(f"Verification: {coords[0]*b1 + coords[1]*b2 + coords[2]*b3}")

# Matrix rank examples

# Full rank matrix
A1 = np.array([[1, 0, 2],
               [0, 1, 3],
               [0, 0, 1]])

print("Matrix A₁:")
print(A1)
print(f"Rank: {np.linalg.matrix_rank(A1)}")
print(f"Full rank: {np.linalg.matrix_rank(A1) == min(A1.shape)}")

# Rank-deficient matrix
A2 = np.array([[1, 2, 3],
               [2, 4, 6],
               [3, 6, 9]])

print("\nMatrix A₂ (rows are multiples):")
print(A2)
print(f"Rank: {np.linalg.matrix_rank(A2)}")
print(f"Rank-deficient: {np.linalg.matrix_rank(A2) < min(A2.shape)}")

# Rectangular matrix
A3 = np.array([[1, 2, 3, 4],
               [5, 6, 7, 8],
               [9, 10, 11, 12]])

print("\nMatrix A₃ (3×4):")
print(A3)
print(f"Rank: {np.linalg.matrix_rank(A3)}")
print(f"Max possible rank: {min(A3.shape)}")

2.7 Linear Mappings¶

Definition 2.15 (Linear Mapping)¶

A mapping Φ : V → W is linear if:

Φ(v + w) = Φ(v) + Φ(w) for all v, w ∈ V
Φ(λv) = λΦ(v) for all v ∈ V and λ ∈ ℝ

Key: Matrices represent linear mappings!

# Linear transformations as matrices

# Rotation matrix (rotate by 45 degrees)
theta = np.pi / 4  # 45 degrees
R = np.array([[np.cos(theta), -np.sin(theta)],
              [np.sin(theta),  np.cos(theta)]])

print("Rotation matrix (45°):")
print(R)

# Apply to vectors
v1 = np.array([1, 0])
v2 = np.array([0, 1])

v1_rot = R @ v1
v2_rot = R @ v2

print(f"\nOriginal: v₁ = {v1}, v₂ = {v2}")
print(f"Rotated:  v₁' = {v1_rot}, v₂' = {v2_rot}")

# Scaling matrix
S = np.array([[2, 0],
              [0, 3]])

print("\nScaling matrix (2x in x, 3x in y):")
print(S)

v_scaled = S @ v1
print(f"\nOriginal: v₁ = {v1}")
print(f"Scaled:   v₁' = {v_scaled}")

# Visualize linear transformations

def plot_transformation(A, title="Linear Transformation"):
    """
    Visualize how a linear transformation affects the unit square.
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
    
    # Original unit square
    square = np.array([[0, 1, 1, 0, 0],
                       [0, 0, 1, 1, 0]])
    
    # Basis vectors
    e1 = np.array([1, 0])
    e2 = np.array([0, 1])
    
    # Plot original
    ax1.plot(square[0], square[1], 'b-', linewidth=2, label='Unit square')
    ax1.quiver(0, 0, e1[0], e1[1], angles='xy', scale_units='xy', scale=1, 
               color='r', width=0.01, label='e₁')
    ax1.quiver(0, 0, e2[0], e2[1], angles='xy', scale_units='xy', scale=1, 
               color='g', width=0.01, label='e₂')
    
    # Grid
    for i in range(-2, 3):
        ax1.plot([-2, 2], [i, i], 'k-', alpha=0.1, linewidth=0.5)
        ax1.plot([i, i], [-2, 2], 'k-', alpha=0.1, linewidth=0.5)
    
    ax1.set_xlim(-2, 2)
    ax1.set_ylim(-2, 2)
    ax1.set_aspect('equal')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    ax1.set_title('Before Transformation')
    ax1.set_xlabel('x')
    ax1.set_ylabel('y')
    
    # Apply transformation
    square_transformed = A @ square
    e1_transformed = A @ e1
    e2_transformed = A @ e2
    
    # Plot transformed
    ax2.plot(square_transformed[0], square_transformed[1], 'b-', linewidth=2, label='Transformed')
    ax2.quiver(0, 0, e1_transformed[0], e1_transformed[1], angles='xy', scale_units='xy', scale=1, 
               color='r', width=0.01, label='Ae₁')
    ax2.quiver(0, 0, e2_transformed[0], e2_transformed[1], angles='xy', scale_units='xy', scale=1, 
               color='g', width=0.01, label='Ae₂')
    
    # Grid
    for i in range(-2, 3):
        ax2.plot([-2, 2], [i, i], 'k-', alpha=0.1, linewidth=0.5)
        ax2.plot([i, i], [-2, 2], 'k-', alpha=0.1, linewidth=0.5)
    
    ax2.set_xlim(-2, 2)
    ax2.set_ylim(-2, 2)
    ax2.set_aspect('equal')
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    ax2.set_title(f'After: {title}')
    ax2.set_xlabel('x')
    ax2.set_ylabel('y')
    
    plt.tight_layout()
    plt.show()

# Rotation
theta = np.pi / 6  # 30 degrees
R = np.array([[np.cos(theta), -np.sin(theta)],
              [np.sin(theta),  np.cos(theta)]])
plot_transformation(R, "Rotation (30°)")

# Scaling
S = np.array([[1.5, 0],
              [0, 0.5]])
plot_transformation(S, "Scaling")

# Shear
H = np.array([[1, 0.5],
              [0, 1]])
plot_transformation(H, "Shear")

2.8 Affine Spaces¶

Definition 2.20 (Affine Space)¶

An affine space is a vector space without a fixed origin.

Affine mapping: Φ(x) = Ax + b

Linear part: Ax
Translation: b

Example: Lines, planes that don’t pass through origin

# Affine transformations: rotation + translation

# Rotation matrix
theta = np.pi / 4
A = np.array([[np.cos(theta), -np.sin(theta)],
              [np.sin(theta),  np.cos(theta)]])

# Translation vector
b = np.array([1, 2])

print("Affine transformation: y = Ax + b")
print(f"\nRotation matrix A:")
print(A)
print(f"\nTranslation vector b = {b}")

# Apply to points
points = np.array([[0, 1, 1, 0],
                   [0, 0, 1, 1]])

# Affine transformation
transformed = A @ points + b[:, np.newaxis]

plt.figure(figsize=(10, 10))

# Original square
plt.plot(np.append(points[0], points[0, 0]), 
             np.append(points[1], points[1, 0]), 
             'b-o', linewidth=2, markersize=8, label='Original')

# Transformed square
plt.plot(np.append(transformed[0], transformed[0, 0]), 
             np.append(transformed[1], transformed[1, 0]), 
             'r-o', linewidth=2, markersize=8, label='Rotated + Translated')

# Show translation vector
plt.quiver(0, 0, b[0], b[1], angles='xy', scale_units='xy', scale=1, 
           color='green', width=0.01, label='Translation')

plt.xlim(-1, 4)
plt.ylim(-1, 4)
plt.aspect('equal')
plt.grid(True, alpha=0.3)
plt.legend(fontsize=12)
plt.title('Affine Transformation', fontsize=14)
plt.xlabel('x')
plt.ylabel('y')
plt.show()

Summary¶

Key Concepts from Chapter 2:¶

Linear Equations: Fundamental to solving optimization problems in ML
Matrices: Represent data, transformations, and models
Gaussian Elimination: Systematic way to solve linear systems
Vector Spaces: Mathematical framework for ML algorithms
Linear Independence: Ensures unique representations
Basis and Rank: Dimensionality and structure of data
Linear Mappings: How matrices transform data
Affine Spaces: Generalization including translations

ML Applications:¶

Linear Regression (Chapter 9): Solve Ax = b to find parameters
PCA (Chapter 10): Find basis that captures variance
Neural Networks (Chapter 5): Linear transformations + nonlinearities
SVM (Chapter 12): Finding separating hyperplanes

Next Steps:¶

Chapter 3: Analytic Geometry (norms, inner products, distances)
Chapter 4: Matrix Decompositions (eigenvalues, SVD)
Chapter 5: Vector Calculus (gradients, optimization)

Practice: Work through the exercises at the end of Chapter 2 in the MML book!