Run this notebook: Open in Colab Open in Kaggle

Chapter 3: Linear transformations and matrices ¶

1. Composition: One Transformation After Another¶

The Setup¶

What happens when you:

First apply transformation M₁
Then apply transformation M₂

Key Question¶

Is there a single matrix that represents doing both?

Answer: YES! That matrix is $M_2 \cdot M_1$ (read right-to-left)

Example¶

Rotate 90° (M₁)
Then shear (M₂)
Result = Shear · Rotation

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import seaborn as sns

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 8)
np.set_printoptions(precision=3, suppress=True)

def draw_basis_vectors(ax, i_hat, j_hat, color_i='green', color_j='red', labels=True):
    """Draw basis vectors"""
    arrow_props = dict(arrowstyle='->', mutation_scale=20, linewidth=3)
    
    arrow_i = FancyArrowPatch((0, 0), tuple(i_hat), color=color_i, **arrow_props)
    arrow_j = FancyArrowPatch((0, 0), tuple(j_hat), color=color_j, **arrow_props)
    ax.add_patch(arrow_i)
    ax.add_patch(arrow_j)
    
    if labels:
        ax.text(i_hat[0]*1.15, i_hat[1]*1.15, 'î', fontsize=16, 
               color=color_i, fontweight='bold')
        ax.text(j_hat[0]*1.15, j_hat[1]*1.15, 'ĵ', fontsize=16, 
               color=color_j, fontweight='bold')

def setup_ax(ax, xlim=(-3, 3), ylim=(-3, 3), title=''):
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_aspect('equal')
    ax.axhline(y=0, color='k', linewidth=0.8)
    ax.axvline(x=0, color='k', linewidth=0.8)
    ax.grid(True, alpha=0.3)
    if title:
        ax.set_title(title, fontsize=13, fontweight='bold')

# Example: Rotation then Shear
fig, axes = plt.subplots(1, 4, figsize=(20, 5))

# Step 0: Original
setup_ax(axes[0], title='Step 0: Original')
draw_basis_vectors(axes[0], np.array([1, 0]), np.array([0, 1]))
v = np.array([1, 1])
arrow_v = FancyArrowPatch((0, 0), tuple(v), color='blue', 
                         arrowstyle='->', mutation_scale=15, linewidth=2)
axes[0].add_patch(arrow_v)
axes[0].text(v[0]+0.2, v[1]+0.2, 'v', fontsize=14, color='blue', fontweight='bold')

# Step 1: After rotation (90° CCW)
M1 = np.array([[0, -1], [1, 0]])  # 90° rotation
setup_ax(axes[1], title='Step 1: Rotate 90° (M₁)')
i1 = M1 @ np.array([1, 0])
j1 = M1 @ np.array([0, 1])
draw_basis_vectors(axes[1], i1, j1)
v1 = M1 @ v
arrow_v1 = FancyArrowPatch((0, 0), tuple(v1), color='blue', 
                          arrowstyle='->', mutation_scale=15, linewidth=2)
axes[1].add_patch(arrow_v1)
axes[1].text(0.02, 0.98, f'M₁ =\n{M1}', transform=axes[1].transAxes,
            fontsize=10, verticalalignment='top', family='monospace',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

# Step 2: After shear
M2 = np.array([[1, 1], [0, 1]])  # horizontal shear
setup_ax(axes[2], title='Step 2: Then Shear (M₂)')
i2 = M2 @ i1
j2 = M2 @ j1
draw_basis_vectors(axes[2], i2, j2)
v2 = M2 @ v1
arrow_v2 = FancyArrowPatch((0, 0), tuple(v2), color='blue', 
                          arrowstyle='->', mutation_scale=15, linewidth=2)
axes[2].add_patch(arrow_v2)
axes[2].text(0.02, 0.98, f'M₂ =\n{M2}', transform=axes[2].transAxes,
            fontsize=10, verticalalignment='top', family='monospace',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

# Step 3: Composition (single transformation)
M_composed = M2 @ M1
setup_ax(axes[3], title='Composition: M₂M₁ (One Step)')
draw_basis_vectors(axes[3], i2, j2)
v_direct = M_composed @ v
arrow_vd = FancyArrowPatch((0, 0), tuple(v_direct), color='purple', 
                          arrowstyle='->', mutation_scale=15, linewidth=3)
axes[3].add_patch(arrow_vd)
axes[3].text(0.02, 0.98, f'M₂M₁ =\n{M_composed}', transform=axes[3].transAxes,
            fontsize=10, verticalalignment='top', family='monospace',
            bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))

plt.tight_layout()
plt.show()

print("Verification:")
print(f"Two-step: M₂(M₁v) = {v2}")
print(f"One-step: (M₂M₁)v = {v_direct}")
print(f"\nThey're the same! ✓")

2. Deriving Matrix Multiplication¶

The Key Insight¶

To find $M_2 \cdot M_1$, track where $\hat{i}$ and $\hat{j}$ end up after BOTH transformations!

Step-by-Step¶

Where does $\hat{i}$ land after $M_1$? → First column of $M_1$
Where does that land after $M_2$? → $M_2$ times (first column of $M_1$)
That’s the first column of $M_2 M_1$!
Repeat for $\hat{j}$ to get the second column

Formula¶

\[\begin{split}\begin{bmatrix} a & b \\ c & d \end{bmatrix} \begin{bmatrix} e & f \\ g & h \end{bmatrix} = \begin{bmatrix} ae+bg & af+bh \\ ce+dg & cf+dh \end{bmatrix}\end{split}\]

But don’t memorize - understand!

# Visualize the derivation
def visualize_multiplication_derivation(M1, M2):
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # TOP ROW: Track î
    # Step 1: Where does î go under M1?
    setup_ax(axes[0, 0], title='î under M₁')
    draw_basis_vectors(axes[0, 0], np.array([1, 0]), np.array([0, 1]), 
                      color_i='lightgreen', color_j='lightcoral')
    i_after_M1 = M1 @ np.array([1, 0])
    arrow = FancyArrowPatch((0, 0), tuple(i_after_M1), color='green', 
                           arrowstyle='->', mutation_scale=20, linewidth=3)
    axes[0, 0].add_patch(arrow)
    axes[0, 0].text(i_after_M1[0]+0.2, i_after_M1[1]+0.2, 
                   f'î → {i_after_M1}', fontsize=12, color='green', fontweight='bold')
    axes[0, 0].text(0.5, 0.98, f'M₁ = {M1.tolist()}', 
                   transform=axes[0, 0].transAxes, fontsize=10,
                   verticalalignment='top', ha='center')
    
    # Step 2: Then apply M2
    setup_ax(axes[0, 1], title='Then apply M₂')
    i_final = M2 @ i_after_M1
    arrow1 = FancyArrowPatch((0, 0), tuple(i_after_M1), color='green', 
                            arrowstyle='->', mutation_scale=15, linewidth=2,
                            linestyle='--', alpha=0.5)
    arrow2 = FancyArrowPatch((0, 0), tuple(i_final), color='darkgreen', 
                            arrowstyle='->', mutation_scale=20, linewidth=3)
    axes[0, 1].add_patch(arrow1)
    axes[0, 1].add_patch(arrow2)
    axes[0, 1].text(i_final[0]+0.2, i_final[1]+0.2, 
                   f'î → {i_final}', fontsize=12, color='darkgreen', fontweight='bold')
    axes[0, 1].text(0.5, 0.98, f'M₂ = {M2.tolist()}', 
                   transform=axes[0, 1].transAxes, fontsize=10,
                   verticalalignment='top', ha='center')
    
    # Step 3: This is the first column!
    setup_ax(axes[0, 2], title='First Column of M₂M₁')
    arrow = FancyArrowPatch((0, 0), tuple(i_final), color='darkgreen', 
                           arrowstyle='->', mutation_scale=20, linewidth=3)
    axes[0, 2].add_patch(arrow)
    axes[0, 2].text(0.5, 0.5, f'Column 1 =\n{i_final}', 
                   transform=axes[0, 2].transAxes, fontsize=14,
                   ha='center', va='center',
                   bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))
    
    # BOTTOM ROW: Track ĵ
    # Step 1: Where does ĵ go under M1?
    setup_ax(axes[1, 0], title='ĵ under M₁')
    draw_basis_vectors(axes[1, 0], np.array([1, 0]), np.array([0, 1]), 
                      color_i='lightgreen', color_j='lightcoral')
    j_after_M1 = M1 @ np.array([0, 1])
    arrow = FancyArrowPatch((0, 0), tuple(j_after_M1), color='red', 
                           arrowstyle='->', mutation_scale=20, linewidth=3)
    axes[1, 0].add_patch(arrow)
    axes[1, 0].text(j_after_M1[0]+0.2, j_after_M1[1]+0.2, 
                   f'ĵ → {j_after_M1}', fontsize=12, color='red', fontweight='bold')
    
    # Step 2: Then apply M2
    setup_ax(axes[1, 1], title='Then apply M₂')
    j_final = M2 @ j_after_M1
    arrow1 = FancyArrowPatch((0, 0), tuple(j_after_M1), color='red', 
                            arrowstyle='->', mutation_scale=15, linewidth=2,
                            linestyle='--', alpha=0.5)
    arrow2 = FancyArrowPatch((0, 0), tuple(j_final), color='darkred', 
                            arrowstyle='->', mutation_scale=20, linewidth=3)
    axes[1, 1].add_patch(arrow1)
    axes[1, 1].add_patch(arrow2)
    axes[1, 1].text(j_final[0]+0.2, j_final[1]+0.2, 
                   f'ĵ → {j_final}', fontsize=12, color='darkred', fontweight='bold')
    
    # Step 3: This is the second column!
    setup_ax(axes[1, 2], title='Second Column of M₂M₁')
    arrow = FancyArrowPatch((0, 0), tuple(j_final), color='darkred', 
                           arrowstyle='->', mutation_scale=20, linewidth=3)
    axes[1, 2].add_patch(arrow)
    axes[1, 2].text(0.5, 0.5, f'Column 2 =\n{j_final}', 
                   transform=axes[1, 2].transAxes, fontsize=14,
                   ha='center', va='center',
                   bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    # Show result
    result = M2 @ M1
    print("\nMatrix Multiplication Result:")
    print(f"M₂ @ M₁ = {M2} @ {M1}")
    print(f"        = {result}")
    print(f"\nColumn 1 = M₂ @ (first column of M₁) = {i_final}")
    print(f"Column 2 = M₂ @ (second column of M₁) = {j_final}")

# Example
M1 = np.array([[0, -1], [1, 0]])  # 90° rotation
M2 = np.array([[1, 1], [0, 1]])   # shear
visualize_multiplication_derivation(M1, M2)

3. Order Matters! (Non-Commutativity)¶

Critical Fact¶

(1)¶\[AB \neq BA\]

Why?¶

“Rotate then shear” ≠ “Shear then rotate”

Reading Order¶

$M_2 M_1 \vec{v}$ means:

First apply $M_1$ to $\vec{v}$
Then apply $M_2$ to the result

Read from right to left!

# Demonstrate non-commutativity
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Define transformations
rotation = np.array([[0, -1], [1, 0]])  # 90° CCW
shear = np.array([[1, 1], [0, 1]])      # horizontal shear

# Test vector
v = np.array([2, 1])

# TOP ROW: Rotation then Shear
setup_ax(axes[0, 0], title='Order 1: Start')
draw_basis_vectors(axes[0, 0], np.array([1, 0]), np.array([0, 1]))
arrow_v = FancyArrowPatch((0, 0), tuple(v), color='blue', 
                         arrowstyle='->', mutation_scale=15, linewidth=2)
axes[0, 0].add_patch(arrow_v)

setup_ax(axes[0, 1], title='Step 1: Rotate')
v1 = rotation @ v
i1 = rotation @ np.array([1, 0])
j1 = rotation @ np.array([0, 1])
draw_basis_vectors(axes[0, 1], i1, j1)
arrow_v1 = FancyArrowPatch((0, 0), tuple(v1), color='blue', 
                          arrowstyle='->', mutation_scale=15, linewidth=2)
axes[0, 1].add_patch(arrow_v1)

setup_ax(axes[0, 2], title='Step 2: Then Shear')
v2 = shear @ v1
i2 = shear @ i1
j2 = shear @ j1
draw_basis_vectors(axes[0, 2], i2, j2)
arrow_v2 = FancyArrowPatch((0, 0), tuple(v2), color='blue', 
                          arrowstyle='->', mutation_scale=15, linewidth=2)
axes[0, 2].add_patch(arrow_v2)
axes[0, 2].text(v2[0]+0.3, v2[1]+0.3, f'Result: {v2}', 
               fontsize=12, color='blue', fontweight='bold')

# BOTTOM ROW: Shear then Rotation
setup_ax(axes[1, 0], title='Order 2: Start')
draw_basis_vectors(axes[1, 0], np.array([1, 0]), np.array([0, 1]))
arrow_v = FancyArrowPatch((0, 0), tuple(v), color='red', 
                         arrowstyle='->', mutation_scale=15, linewidth=2)
axes[1, 0].add_patch(arrow_v)

setup_ax(axes[1, 1], title='Step 1: Shear')
v1_alt = shear @ v
i1_alt = shear @ np.array([1, 0])
j1_alt = shear @ np.array([0, 1])
draw_basis_vectors(axes[1, 1], i1_alt, j1_alt)
arrow_v1_alt = FancyArrowPatch((0, 0), tuple(v1_alt), color='red', 
                              arrowstyle='->', mutation_scale=15, linewidth=2)
axes[1, 1].add_patch(arrow_v1_alt)

setup_ax(axes[1, 2], title='Step 2: Then Rotate')
v2_alt = rotation @ v1_alt
i2_alt = rotation @ i1_alt
j2_alt = rotation @ j1_alt
draw_basis_vectors(axes[1, 2], i2_alt, j2_alt)
arrow_v2_alt = FancyArrowPatch((0, 0), tuple(v2_alt), color='red', 
                              arrowstyle='->', mutation_scale=15, linewidth=2)
axes[1, 2].add_patch(arrow_v2_alt)
axes[1, 2].text(v2_alt[0]+0.3, v2_alt[1]+0.3, f'Result: {v2_alt}', 
               fontsize=12, color='red', fontweight='bold')

plt.tight_layout()
plt.show()

print("Order Matters!")
print(f"\nShear @ Rotation = {shear @ rotation}")
print(f"\nRotation @ Shear = {rotation @ shear}")
print(f"\nThey're DIFFERENT! ✗")
print(f"\nFor vector {v}:")
print(f"  (Shear @ Rotation) @ v = {v2}")
print(f"  (Rotation @ Shear) @ v = {v2_alt}")

4. Associativity¶

Good News¶

While order matters, grouping doesn’t: $$(AB)C = A(BC)$$

Practical Importance¶

When computing $M_3 M_2 M_1 \vec{v}$, you can:

Compute $(M_3 M_2) M_1$ first, then multiply by $\vec{v}$
Or compute $M_3 (M_2 M_1)$ first, then multiply by $\vec{v}$
Or just apply one at a time: $M_3(M_2(M_1\vec{v}))$

Efficiency Tip¶

If applying the same transformations to many vectors:

Pre-compute the combined matrix once
Apply it to all vectors (faster!)

# Demonstrate associativity
A = np.array([[1, 2], [0, 1]])
B = np.array([[0, -1], [1, 0]])
C = np.array([[2, 0], [0, 0.5]])
v = np.array([1, 1])

# Three ways to compute the same thing
result1 = C @ (B @ (A @ v))  # Right-associative
result2 = (C @ B) @ (A @ v)  # Pre-compute C @ B
result3 = ((C @ B) @ A) @ v  # Pre-compute everything

print("Associativity Check:")
print(f"C @ (B @ (A @ v)) = {result1}")
print(f"(C @ B) @ (A @ v) = {result2}")
print(f"((C @ B) @ A) @ v = {result3}")
print(f"\nAll equal! ✓")

# Efficiency comparison
import time

# Many vectors
n_vectors = 10000
vectors = np.random.randn(2, n_vectors)

# Method 1: Apply one at a time
start = time.time()
for i in range(n_vectors):
    v = vectors[:, i]
    result = C @ (B @ (A @ v))
time1 = time.time() - start

# Method 2: Pre-compute combined matrix
start = time.time()
combined = C @ B @ A
for i in range(n_vectors):
    v = vectors[:, i]
    result = combined @ v
time2 = time.time() - start

# Method 3: Vectorized
start = time.time()
combined = C @ B @ A
results = combined @ vectors
time3 = time.time() - start

print(f"\nEfficiency for {n_vectors} vectors:")
print(f"One-at-a-time:     {time1:.4f}s")
print(f"Pre-computed:      {time2:.4f}s (speedup: {time1/time2:.1f}x)")
print(f"Vectorized:        {time3:.6f}s (speedup: {time1/time3:.0f}x)")
print(f"\n💡 Lesson: Pre-compute transformations when applying to many vectors!")

Summary¶

Key Insights¶

Matrix multiplication = Composition of transformations
Columns of $AB$ = where basis vectors land after both transformations
Order matters: $AB \neq BA$ (non-commutative)
Grouping doesn’t: $(AB)C = A(BC)$ (associative)
Read right-to-left: $M_2 M_1 \vec{v}$ means “first $M_1$, then $M_2$”

The Algorithm (Understood)¶

To compute $AB$:

Column 1 of $AB$ = $A$ × (column 1 of $B$)
Column 2 of $AB$ = $A$ × (column 2 of $B$)

Each column tells you where a basis vector lands!

Exercises¶

Compute (by hand) the composition of a 90° rotation followed by a horizontal shear
Find two matrices where $AB = BA$ (they commute)
Verify associativity: $(AB)C = A(BC)$ for random 2×2 matrices
What is $A^2$ (i.e., $A \times A$) for a 45° rotation matrix?
Explain why the identity matrix commutes with everything

Chapter 3: Linear transformations and matrices¶