# Setup
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
sns.set_style('whitegrid')
np.set_printoptions(precision=3, suppress=True)
2.1 Systems of Linear EquationsΒΆ
Example 2.1: Production Planning (MML Book)ΒΆ
A company produces products Nβ, β¦, Nβ using resources Rβ, β¦, Rβ.
Problem: Find how many units of each product to produce given resource constraints.
General form:
aββxβ + aββxβ + ... + aββxβ = bβ
aββxβ + aββxβ + ... + aββxβ = bβ
...
aββxβ + aββxβ + ... + aββxβ = bβ
# Example 2.2 from MML Book
# System with unique solution:
# xβ + xβ + xβ = 3
# xβ - xβ + 2xβ = 2
# xβ + xβ = 2
# Coefficient matrix A
A = np.array([
[1, 1, 1],
[1, -1, 2],
[0, 1, 1]
])
# Right-hand side
b = np.array([3, 2, 2])
# Solve using NumPy
x = np.linalg.solve(A, b)
print("Solution:")
print(f"xβ = {x[0]:.3f}")
print(f"xβ = {x[1]:.3f}")
print(f"xβ = {x[2]:.3f}")
# Verify solution
print(f"\nVerification: Ax = {A @ x}")
print(f"Expected: b = {b}")
print(f"\nSolution is correct: {np.allclose(A @ x, b)}")
# Visualize system of equations in 2D
# Two equations: 2x + y = 5 and x - y = 1
# Solve for y in each equation
x_vals = np.linspace(-1, 4, 100)
y1 = 5 - 2*x_vals # From 2x + y = 5
y2 = x_vals - 1 # From x - y = 1
plt.figure(figsize=(10, 6))
plt.plot(x_vals, y1, label='2x + y = 5', linewidth=2)
plt.plot(x_vals, y2, label='x - y = 1', linewidth=2)
# Solution point
A_2d = np.array([[2, 1], [1, -1]])
b_2d = np.array([5, 1])
solution = np.linalg.solve(A_2d, b_2d)
plt.plot(solution[0], solution[1], 'ro', markersize=12, label=f'Solution ({solution[0]:.1f}, {solution[1]:.1f})')
plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('System of Linear Equations in 2D', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.axhline(0, color='black', linewidth=0.5)
plt.axvline(0, color='black', linewidth=0.5)
plt.show()
print(f"Solution: x = {solution[0]:.2f}, y = {solution[1]:.2f}")
2.2 MatricesΒΆ
Definition 2.1 (Matrix)ΒΆ
An (m, n)-matrix is an mΓn arrangement of elements:
Special cases:
Row vector: (1, n)-matrix
Column vector: (m, 1)-matrix
# Matrix creation and basic operations
# Create matrices
A = np.array([[1, 2, 3],
[4, 5, 6]])
B = np.array([[7, 8, 9],
[10, 11, 12]])
print("Matrix A (2Γ3):")
print(A)
print(f"Shape: {A.shape}")
print("\nMatrix B (2Γ3):")
print(B)
# Matrix addition (element-wise)
C = A + B
print("\nA + B:")
print(C)
# Scalar multiplication
D = 3 * A
print("\n3 * A:")
print(D)
# Matrix multiplication (Eq. 2.13 in MML book)
# C = AB where c_ij = Ξ£(a_il * b_lj)
A = np.array([[1, 2],
[3, 4],
[5, 6]])
B = np.array([[7, 8, 9],
[10, 11, 12]])
print("A (3Γ2):")
print(A)
print("\nB (2Γ3):")
print(B)
# Matrix multiplication
C = A @ B # or np.dot(A, B)
print("\nC = A @ B (3Γ3):")
print(C)
# Verify: C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0]
print(f"\nVerify C[0,0]: {A[0,0]*B[0,0] + A[0,1]*B[1,0]} = {C[0,0]}")
# Note: Order matters!
# B @ A gives different result (and different shape)
D = B @ A
print("\nD = B @ A (2Γ2):")
print(D)
# Special matrices
# Identity matrix
I3 = np.eye(3)
print("Identity matrix Iβ:")
print(I3)
# Zero matrix
Z = np.zeros((2, 3))
print("\nZero matrix (2Γ3):")
print(Z)
# Diagonal matrix
D = np.diag([1, 2, 3])
print("\nDiagonal matrix:")
print(D)
# Transpose
A = np.array([[1, 2, 3],
[4, 5, 6]])
print("\nA:")
print(A)
print("\nA^T (transpose):")
print(A.T)
2.3 Solving Systems of Linear EquationsΒΆ
Gaussian EliminationΒΆ
Transform the augmented matrix [A|b] into row-echelon form (REF).
Elementary row operations:
Swap two rows
Multiply a row by a nonzero scalar
Add a multiple of one row to another
# Gaussian elimination step-by-step
def gaussian_elimination(A, b, verbose=True):
"""
Solve Ax = b using Gaussian elimination.
"""
# Create augmented matrix [A|b]
n = len(b)
Ab = np.column_stack([A.copy().astype(float), b.copy().astype(float)])
if verbose:
print("Augmented matrix [A|b]:")
print(Ab)
print("\n" + "="*50)
# Forward elimination
for i in range(n):
# Find pivot
max_row = np.argmax(np.abs(Ab[i:, i])) + i
if Ab[max_row, i] == 0:
print(f"No unique solution exists (zero pivot at column {i})")
return None
# Swap rows if needed
if max_row != i:
Ab[[i, max_row]] = Ab[[max_row, i]]
if verbose:
print(f"\nSwap row {i} with row {max_row}:")
print(Ab)
# Eliminate column below pivot
for j in range(i+1, n):
factor = Ab[j, i] / Ab[i, i]
Ab[j] = Ab[j] - factor * Ab[i]
if verbose and factor != 0:
print(f"\nRow {j} = Row {j} - {factor:.3f} * Row {i}:")
print(Ab)
if verbose:
print("\n" + "="*50)
print("Row echelon form:")
print(Ab)
# Back substitution
x = np.zeros(n)
for i in range(n-1, -1, -1):
x[i] = (Ab[i, -1] - np.dot(Ab[i, i+1:n], x[i+1:])) / Ab[i, i]
return x
# Example from MML book
A = np.array([[1, 1, 1],
[1, -1, 2],
[0, 1, 1]])
b = np.array([3, 2, 2])
print("Solving system:")
print("xβ + xβ + xβ = 3")
print("xβ - xβ + 2xβ = 2")
print(" xβ + xβ = 2")
print("\n" + "="*50 + "\n")
x = gaussian_elimination(A, b)
print("\n" + "="*50)
print("\nSolution:")
print(f"x = {x}")
print(f"\nVerification: Ax = {A @ x}")
print(f"Expected: b = {b}")
# Matrix inverse
# A^(-1) is the matrix such that A * A^(-1) = I
A = np.array([[4, 7],
[2, 6]], dtype=float)
print("Matrix A:")
print(A)
# Compute inverse
A_inv = np.linalg.inv(A)
print("\nA^(-1):")
print(A_inv)
# Verify: A * A^(-1) = I
I = A @ A_inv
print("\nA @ A^(-1):")
print(I)
# Solve using inverse: x = A^(-1) * b
b = np.array([1, 2])
x = A_inv @ b
print(f"\nSolve Ax = b:")
print(f"x = A^(-1) @ b = {x}")
print(f"Verification: Ax = {A @ x}")
2.4 Vector SpacesΒΆ
Definition 2.5 (Vector Space)ΒΆ
A vector space V over β is a set with two operations:
Addition: + : V Γ V β V
Scalar multiplication: Β· : β Γ V β V
That satisfy:
Associativity of addition
Commutativity of addition
Identity element (zero vector)
Inverse elements
Distributivity
And moreβ¦
Examples: ββΏ, polynomials, matrices, functions
# Vector space examples
# Example 1: R^3 - standard vector space
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])
print("Vectors in RΒ³:")
print(f"v1 = {v1}")
print(f"v2 = {v2}")
# Vector addition (closed under addition)
v3 = v1 + v2
print(f"\nv1 + v2 = {v3} (still in RΒ³)")
# Scalar multiplication (closed under scaling)
v4 = 2.5 * v1
print(f"2.5 * v1 = {v4} (still in RΒ³)")
# Zero vector
zero = np.zeros(3)
print(f"\nZero vector: {zero}")
print(f"v1 + 0 = {v1 + zero} (identity)")
# Additive inverse
neg_v1 = -v1
print(f"\n-v1 = {neg_v1}")
print(f"v1 + (-v1) = {v1 + neg_v1} (inverse)")
# Visualize vector addition and scaling in 2D
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# Vector addition
v1 = np.array([2, 1])
v2 = np.array([1, 3])
v3 = v1 + v2
ax1.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1, color='r', width=0.008, label='vβ')
ax1.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1, color='b', width=0.008, label='vβ')
ax1.quiver(0, 0, v3[0], v3[1], angles='xy', scale_units='xy', scale=1, color='g', width=0.01, label='vβ+vβ')
# Parallelogram
ax1.quiver(v1[0], v1[1], v2[0], v2[1], angles='xy', scale_units='xy', scale=1,
color='b', width=0.005, alpha=0.3, linestyle='--')
ax1.quiver(v2[0], v2[1], v1[0], v1[1], angles='xy', scale_units='xy', scale=1,
color='r', width=0.005, alpha=0.3, linestyle='--')
ax1.set_xlim(-1, 5)
ax1.set_ylim(-1, 5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend(fontsize=11)
ax1.set_title('Vector Addition', fontsize=13)
ax1.set_xlabel('x')
ax1.set_ylabel('y')
# Scalar multiplication
v = np.array([2, 1])
scalars = [-1, 0.5, 1, 1.5, 2]
colors = ['purple', 'orange', 'blue', 'green', 'red']
for scalar, color in zip(scalars, colors):
scaled_v = scalar * v
ax2.quiver(0, 0, scaled_v[0], scaled_v[1], angles='xy', scale_units='xy', scale=1,
color=color, width=0.008, label=f'{scalar}v')
ax2.set_xlim(-3, 5)
ax2.set_ylim(-3, 3)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend(fontsize=10)
ax2.set_title('Scalar Multiplication', fontsize=13)
ax2.set_xlabel('x')
ax2.set_ylabel('y')
plt.tight_layout()
plt.show()
2.5 Linear IndependenceΒΆ
Definition 2.9 (Linear Combination)ΒΆ
A vector v is a linear combination of vectors {vβ, β¦, vβ} if:
v = Ξ»βvβ + Ξ»βvβ + β¦ + Ξ»βvβ
for some scalars Ξ»β, β¦, Ξ»β β β.
Definition 2.10 (Linear Independence)ΒΆ
Vectors {vβ, β¦, vβ} are linearly independent if:
Ξ»βvβ + Ξ»βvβ + β¦ + Ξ»βvβ = 0 βΉ Ξ»β = Ξ»β = β¦ = Ξ»β = 0
Otherwise, they are linearly dependent.
# Check linear independence
def check_linear_independence(vectors):
"""
Check if vectors are linearly independent using rank.
Vectors are linearly independent if rank(A) = number of vectors
where A is the matrix with vectors as columns.
"""
A = np.column_stack(vectors)
rank = np.linalg.matrix_rank(A)
n_vectors = len(vectors)
print(f"Matrix A (vectors as columns):")
print(A)
print(f"\nRank: {rank}")
print(f"Number of vectors: {n_vectors}")
print(f"\nLinear {'independent' if rank == n_vectors else 'dependent'}")
return rank == n_vectors
# Example 1: Linearly independent vectors
print("Example 1: Standard basis vectors in RΒ³")
v1 = np.array([1, 0, 0])
v2 = np.array([0, 1, 0])
v3 = np.array([0, 0, 1])
check_linear_independence([v1, v2, v3])
print("\n" + "="*50 + "\n")
# Example 2: Linearly dependent vectors
print("Example 2: Dependent vectors (v3 = v1 + v2)")
v1 = np.array([1, 2, 3])
v2 = np.array([2, 3, 4])
v3 = v1 + v2 # v3 is a linear combination
check_linear_independence([v1, v2, v3])
# Visualize linear dependence/independence in 2D
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# Independent vectors
v1 = np.array([1, 0])
v2 = np.array([0, 1])
ax1.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1,
color='r', width=0.015, label='vβ')
ax1.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1,
color='b', width=0.015, label='vβ')
# Span (all linear combinations)
for a in np.linspace(-2, 2, 9):
for b in np.linspace(-2, 2, 9):
point = a*v1 + b*v2
ax1.plot(point[0], point[1], 'k.', alpha=0.2, markersize=3)
ax1.set_xlim(-2.5, 2.5)
ax1.set_ylim(-2.5, 2.5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend(fontsize=11)
ax1.set_title('Linearly Independent\n(span entire plane)', fontsize=13)
ax1.set_xlabel('x')
ax1.set_ylabel('y')
# Dependent vectors (collinear)
v1 = np.array([1, 1])
v2 = 2 * v1 # v2 = 2*v1 (dependent)
ax2.quiver(0, 0, v1[0], v1[1], angles='xy', scale_units='xy', scale=1,
color='r', width=0.015, label='vβ')
ax2.quiver(0, 0, v2[0], v2[1], angles='xy', scale_units='xy', scale=1,
color='b', width=0.015, label='vβ = 2vβ')
# Span (only a line)
t_vals = np.linspace(-2, 2, 100)
line = np.array([t*v1 for t in t_vals])
ax2.plot(line[:, 0], line[:, 1], 'k--', alpha=0.3, linewidth=2, label='span')
ax2.set_xlim(-2.5, 2.5)
ax2.set_ylim(-2.5, 2.5)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend(fontsize=11)
ax2.set_title('Linearly Dependent\n(span only a line)', fontsize=13)
ax2.set_xlabel('x')
ax2.set_ylabel('y')
plt.tight_layout()
plt.show()
2.6 Basis and RankΒΆ
Definition 2.12 (Basis)ΒΆ
A basis of a vector space V is a set of linearly independent vectors that span V.
Standard basis of ββΏ: {eβ, eβ, β¦, eβ} where:
eβ = [1, 0, 0, β¦, 0]α΅
eβ = [0, 1, 0, β¦, 0]α΅
β¦
Definition 2.14 (Rank)ΒΆ
The rank of a matrix A is the dimension of the subspace spanned by its columns (or rows).
# Standard basis of R^3
e1 = np.array([1, 0, 0])
e2 = np.array([0, 1, 0])
e3 = np.array([0, 0, 1])
print("Standard basis of RΒ³:")
print(f"eβ = {e1}")
print(f"eβ = {e2}")
print(f"eβ = {e3}")
# Any vector in R^3 can be written as a linear combination
v = np.array([3, -2, 5])
print(f"\nVector v = {v}")
print(f"v = {v[0]}eβ + {v[1]}eβ + {v[2]}eβ")
print(f"Verification: {v[0]*e1 + v[1]*e2 + v[2]*e3}")
# Different basis (still spans R^3)
b1 = np.array([1, 1, 0])
b2 = np.array([0, 1, 1])
b3 = np.array([1, 0, 1])
print("\n" + "="*50)
print("\nAlternative basis:")
print(f"bβ = {b1}")
print(f"bβ = {b2}")
print(f"bβ = {b3}")
# Check if they form a basis
B = np.column_stack([b1, b2, b3])
print(f"\nLinear independent: {np.linalg.matrix_rank(B) == 3}")
# Express v in this basis
# Solve: B @ coords = v
coords = np.linalg.solve(B, v)
print(f"\nv in new basis: {coords}")
print(f"Verification: {coords[0]*b1 + coords[1]*b2 + coords[2]*b3}")
# Matrix rank examples
# Full rank matrix
A1 = np.array([[1, 0, 2],
[0, 1, 3],
[0, 0, 1]])
print("Matrix Aβ:")
print(A1)
print(f"Rank: {np.linalg.matrix_rank(A1)}")
print(f"Full rank: {np.linalg.matrix_rank(A1) == min(A1.shape)}")
# Rank-deficient matrix
A2 = np.array([[1, 2, 3],
[2, 4, 6],
[3, 6, 9]])
print("\nMatrix Aβ (rows are multiples):")
print(A2)
print(f"Rank: {np.linalg.matrix_rank(A2)}")
print(f"Rank-deficient: {np.linalg.matrix_rank(A2) < min(A2.shape)}")
# Rectangular matrix
A3 = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]])
print("\nMatrix Aβ (3Γ4):")
print(A3)
print(f"Rank: {np.linalg.matrix_rank(A3)}")
print(f"Max possible rank: {min(A3.shape)}")
2.7 Linear MappingsΒΆ
Definition 2.15 (Linear Mapping)ΒΆ
A mapping Ξ¦ : V β W is linear if:
Ξ¦(v + w) = Ξ¦(v) + Ξ¦(w) for all v, w β V
Ξ¦(Ξ»v) = λΦ(v) for all v β V and Ξ» β β
Key: Matrices represent linear mappings!
# Linear transformations as matrices
# Rotation matrix (rotate by 45 degrees)
theta = np.pi / 4 # 45 degrees
R = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
print("Rotation matrix (45Β°):")
print(R)
# Apply to vectors
v1 = np.array([1, 0])
v2 = np.array([0, 1])
v1_rot = R @ v1
v2_rot = R @ v2
print(f"\nOriginal: vβ = {v1}, vβ = {v2}")
print(f"Rotated: vβ' = {v1_rot}, vβ' = {v2_rot}")
# Scaling matrix
S = np.array([[2, 0],
[0, 3]])
print("\nScaling matrix (2x in x, 3x in y):")
print(S)
v_scaled = S @ v1
print(f"\nOriginal: vβ = {v1}")
print(f"Scaled: vβ' = {v_scaled}")
# Visualize linear transformations
def plot_transformation(A, title="Linear Transformation"):
"""
Visualize how a linear transformation affects the unit square.
"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# Original unit square
square = np.array([[0, 1, 1, 0, 0],
[0, 0, 1, 1, 0]])
# Basis vectors
e1 = np.array([1, 0])
e2 = np.array([0, 1])
# Plot original
ax1.plot(square[0], square[1], 'b-', linewidth=2, label='Unit square')
ax1.quiver(0, 0, e1[0], e1[1], angles='xy', scale_units='xy', scale=1,
color='r', width=0.01, label='eβ')
ax1.quiver(0, 0, e2[0], e2[1], angles='xy', scale_units='xy', scale=1,
color='g', width=0.01, label='eβ')
# Grid
for i in range(-2, 3):
ax1.plot([-2, 2], [i, i], 'k-', alpha=0.1, linewidth=0.5)
ax1.plot([i, i], [-2, 2], 'k-', alpha=0.1, linewidth=0.5)
ax1.set_xlim(-2, 2)
ax1.set_ylim(-2, 2)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.legend()
ax1.set_title('Before Transformation')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
# Apply transformation
square_transformed = A @ square
e1_transformed = A @ e1
e2_transformed = A @ e2
# Plot transformed
ax2.plot(square_transformed[0], square_transformed[1], 'b-', linewidth=2, label='Transformed')
ax2.quiver(0, 0, e1_transformed[0], e1_transformed[1], angles='xy', scale_units='xy', scale=1,
color='r', width=0.01, label='Aeβ')
ax2.quiver(0, 0, e2_transformed[0], e2_transformed[1], angles='xy', scale_units='xy', scale=1,
color='g', width=0.01, label='Aeβ')
# Grid
for i in range(-2, 3):
ax2.plot([-2, 2], [i, i], 'k-', alpha=0.1, linewidth=0.5)
ax2.plot([i, i], [-2, 2], 'k-', alpha=0.1, linewidth=0.5)
ax2.set_xlim(-2, 2)
ax2.set_ylim(-2, 2)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.legend()
ax2.set_title(f'After: {title}')
ax2.set_xlabel('x')
ax2.set_ylabel('y')
plt.tight_layout()
plt.show()
# Rotation
theta = np.pi / 6 # 30 degrees
R = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
plot_transformation(R, "Rotation (30Β°)")
# Scaling
S = np.array([[1.5, 0],
[0, 0.5]])
plot_transformation(S, "Scaling")
# Shear
H = np.array([[1, 0.5],
[0, 1]])
plot_transformation(H, "Shear")
2.8 Affine SpacesΒΆ
Definition 2.20 (Affine Space)ΒΆ
An affine space is a vector space without a fixed origin.
Affine mapping: Ξ¦(x) = Ax + b
Linear part: Ax
Translation: b
Example: Lines, planes that donβt pass through origin
# Affine transformations: rotation + translation
# Rotation matrix
theta = np.pi / 4
A = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
# Translation vector
b = np.array([1, 2])
print("Affine transformation: y = Ax + b")
print(f"\nRotation matrix A:")
print(A)
print(f"\nTranslation vector b = {b}")
# Apply to points
points = np.array([[0, 1, 1, 0],
[0, 0, 1, 1]])
# Affine transformation
transformed = A @ points + b[:, np.newaxis]
plt.figure(figsize=(10, 10))
# Original square
plt.plot(np.append(points[0], points[0, 0]),
np.append(points[1], points[1, 0]),
'b-o', linewidth=2, markersize=8, label='Original')
# Transformed square
plt.plot(np.append(transformed[0], transformed[0, 0]),
np.append(transformed[1], transformed[1, 0]),
'r-o', linewidth=2, markersize=8, label='Rotated + Translated')
# Show translation vector
plt.quiver(0, 0, b[0], b[1], angles='xy', scale_units='xy', scale=1,
color='green', width=0.01, label='Translation')
plt.xlim(-1, 4)
plt.ylim(-1, 4)
plt.aspect('equal')
plt.grid(True, alpha=0.3)
plt.legend(fontsize=12)
plt.title('Affine Transformation', fontsize=14)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
SummaryΒΆ
Key Concepts from Chapter 2:ΒΆ
Linear Equations: Fundamental to solving optimization problems in ML
Matrices: Represent data, transformations, and models
Gaussian Elimination: Systematic way to solve linear systems
Vector Spaces: Mathematical framework for ML algorithms
Linear Independence: Ensures unique representations
Basis and Rank: Dimensionality and structure of data
Linear Mappings: How matrices transform data
Affine Spaces: Generalization including translations
ML Applications:ΒΆ
Linear Regression (Chapter 9): Solve Ax = b to find parameters
PCA (Chapter 10): Find basis that captures variance
Neural Networks (Chapter 5): Linear transformations + nonlinearities
SVM (Chapter 12): Finding separating hyperplanes
Next Steps:ΒΆ
Chapter 3: Analytic Geometry (norms, inner products, distances)
Chapter 4: Matrix Decompositions (eigenvalues, SVD)
Chapter 5: Vector Calculus (gradients, optimization)
Practice: Work through the exercises at the end of Chapter 2 in the MML book!