import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

1. Point Cloud BasicsΒΆ

RepresentationΒΆ

Point cloud: \(\{p_i\}_{i=1}^N\) where \(p_i \in \mathbb{R}^3\)

Key Properties:ΒΆ

  1. Permutation invariance: \(f(\{p_1, p_2, \ldots\}) = f(\{p_{\pi(1)}, p_{\pi(2)}, \ldots\})\)

  2. Transformation invariance: Robust to rotations/translations

  3. Unordered: No canonical ordering

πŸ“š Reference Materials:

def generate_sphere_points(n_points=1000, radius=1.0):
    """Generate points on sphere surface."""
    theta = np.random.uniform(0, 2*np.pi, n_points)
    phi = np.random.uniform(0, np.pi, n_points)
    
    x = radius * np.sin(phi) * np.cos(theta)
    y = radius * np.sin(phi) * np.sin(theta)
    z = radius * np.cos(phi)
    
    return np.stack([x, y, z], axis=1)

def generate_cube_points(n_points=1000, size=1.0):
    """Generate points in cube volume."""
    return np.random.uniform(-size/2, size/2, (n_points, 3))

# Generate samples
sphere_pc = generate_sphere_points(500)
cube_pc = generate_cube_points(500)

# Visualize
fig = plt.figure(figsize=(14, 6))

ax1 = fig.add_subplot(121, projection='3d')
ax1.scatter(sphere_pc[:, 0], sphere_pc[:, 1], sphere_pc[:, 2], c='blue', s=1, alpha=0.6)
ax1.set_title('Sphere Point Cloud', fontsize=11)

ax2 = fig.add_subplot(122, projection='3d')
ax2.scatter(cube_pc[:, 0], cube_pc[:, 1], cube_pc[:, 2], c='red', s=1, alpha=0.6)
ax2.set_title('Cube Point Cloud', fontsize=11)

plt.tight_layout()
plt.show()

2. T-Net (Transformation Network)ΒΆ

Learns spatial transformer for input alignment:

\[T(x) = \text{MLP}(x) \rightarrow \mathbb{R}^{3 \times 3}\]
class TNet(nn.Module):
    """Spatial transformer network."""
    
    def __init__(self, k=3):
        super().__init__()
        self.k = k
        
        self.conv1 = nn.Conv1d(k, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k*k)
        
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)
    
    def forward(self, x):
        batch_size = x.size(0)
        
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        
        x = torch.max(x, 2)[0]  # Max pooling
        
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        
        # Initialize as identity
        identity = torch.eye(self.k, device=x.device).flatten().unsqueeze(0)
        x = x + identity
        
        return x.view(batch_size, self.k, self.k)

# Test
tnet = TNet(k=3).to(device)
test_input = torch.randn(4, 3, 100).to(device)
transform = tnet(test_input)
print(f"Transform shape: {transform.shape}")

3. PointNet ArchitectureΒΆ

Symmetric Function:ΒΆ

\[f(x_1, \ldots, x_n) = g\left(\max_{i=1}^n h(x_i)\right)\]

Max pooling ensures permutation invariance.

class PointNet(nn.Module):
    """PointNet for classification."""
    
    def __init__(self, num_classes=10):
        super().__init__()
        
        # Input transform
        self.input_transform = TNet(k=3)
        
        # Feature transform
        self.feature_transform = TNet(k=64)
        
        # MLP 1
        self.conv1 = nn.Conv1d(3, 64, 1)
        self.conv2 = nn.Conv1d(64, 64, 1)
        
        # MLP 2
        self.conv3 = nn.Conv1d(64, 64, 1)
        self.conv4 = nn.Conv1d(64, 128, 1)
        self.conv5 = nn.Conv1d(128, 1024, 1)
        
        # Batch norms
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(64)
        self.bn3 = nn.BatchNorm1d(64)
        self.bn4 = nn.BatchNorm1d(128)
        self.bn5 = nn.BatchNorm1d(1024)
        
        # Classifier
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)
        
        self.bn6 = nn.BatchNorm1d(512)
        self.bn7 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        batch_size = x.size(0)
        n_points = x.size(2)
        
        # Input transform
        trans = self.input_transform(x)
        x = torch.bmm(trans, x)
        
        # MLP 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        
        # Feature transform
        trans_feat = self.feature_transform(x)
        x = torch.bmm(trans_feat, x)
        
        # MLP 2
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.relu(self.bn5(self.conv5(x)))
        
        # Global feature
        x = torch.max(x, 2)[0]
        
        # Classifier
        x = F.relu(self.bn6(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn7(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)
        
        return x, trans_feat

model = PointNet(num_classes=2).to(device)
print("PointNet created")

Create Synthetic DatasetΒΆ

To demonstrate PointNet without requiring large 3D datasets, we create synthetic point clouds by sampling points from simple geometric shapes (spheres, cubes, cylinders, cones). Each shape class is represented by a set of \(N\) 3D coordinates, with some noise added for realism. This controlled setup lets us verify that the network can classify shapes based purely on their geometric structure, without relying on texture or color. The principles learned here transfer directly to real point cloud data from LiDAR sensors, depth cameras, and 3D scanners.

class PointCloudDataset(torch.utils.data.Dataset):
    """Synthetic point cloud dataset."""
    
    def __init__(self, n_samples=1000, n_points=1024):
        self.n_samples = n_samples
        self.n_points = n_points
        
        self.data = []
        self.labels = []
        
        for i in range(n_samples):
            if i % 2 == 0:
                # Sphere
                pc = generate_sphere_points(n_points)
                label = 0
            else:
                # Cube
                pc = generate_cube_points(n_points)
                label = 1
            
            self.data.append(pc)
            self.labels.append(label)
    
    def __len__(self):
        return self.n_samples
    
    def __getitem__(self, idx):
        pc = torch.FloatTensor(self.data[idx]).T  # (3, N)
        label = torch.LongTensor([self.labels[idx]])[0]
        return pc, label

# Create datasets
train_dataset = PointCloudDataset(n_samples=800, n_points=512)
test_dataset = PointCloudDataset(n_samples=200, n_points=512)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

print(f"Train: {len(train_dataset)}, Test: {len(test_dataset)}")

Train PointNetΒΆ

Training PointNet follows the standard supervised learning loop: forward pass through the shared MLPs, symmetric max-pooling to obtain a global feature vector, and classification via fully connected layers with cross-entropy loss. The max-pooling aggregation is what makes PointNet permutation invariant – shuffling the order of input points produces the same global feature. An optional spatial transformer network (T-Net) learns to canonicalize the input point cloud’s orientation, making the model robust to rigid transformations. Monitoring both training loss and per-class accuracy helps detect class imbalance issues common in 3D shape datasets.

def regularization_loss(trans_feat):
    """Feature transform regularization."""
    batch_size = trans_feat.size(0)
    identity = torch.eye(64, device=trans_feat.device).unsqueeze(0).repeat(batch_size, 1, 1)
    diff = trans_feat @ trans_feat.transpose(1, 2) - identity
    return torch.norm(diff, dim=(1, 2)).mean()

def train_pointnet(model, train_loader, n_epochs=20):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    
    losses = []
    
    for epoch in range(n_epochs):
        model.train()
        epoch_loss = 0
        
        for pc, labels in train_loader:
            pc, labels = pc.to(device), labels.to(device)
            
            output, trans_feat = model(pc)
            
            # Classification loss
            cls_loss = F.cross_entropy(output, labels)
            
            # Regularization
            reg_loss = regularization_loss(trans_feat)
            
            loss = cls_loss + 0.001 * reg_loss
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            losses.append(loss.item())
        
        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss/len(train_loader):.4f}")
    
    return losses

losses = train_pointnet(model, train_loader, n_epochs=20)

plt.figure(figsize=(10, 5))
plt.plot(losses, alpha=0.3)
plt.plot(np.convolve(losses, np.ones(20)/20, mode='valid'), linewidth=2)
plt.xlabel('Iteration', fontsize=11)
plt.ylabel('Loss', fontsize=11)
plt.title('PointNet Training', fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

EvaluateΒΆ

Evaluation on held-out test shapes measures overall classification accuracy and per-class performance. A confusion matrix reveals which shape pairs are most commonly confused – for example, cubes and rectangular prisms might be hard to distinguish from sparse point samples. Comparing PointNet’s accuracy to simpler baselines (e.g., hand-crafted 3D features plus SVM) demonstrates the power of learning point-level features directly from raw coordinates. In real applications, this evaluation extends to larger benchmarks like ModelNet40 (40 object categories) and ShapeNet (part segmentation).

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for pc, labels in test_loader:
        pc, labels = pc.to(device), labels.to(device)
        output, _ = model(pc)
        pred = output.argmax(dim=1)
        correct += (pred == labels).sum().item()
        total += labels.size(0)

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

Test Permutation InvarianceΒΆ

A critical property of PointNet is permutation invariance: the output should be identical regardless of the order in which points are presented. We verify this empirically by passing the same point cloud through the network multiple times with different random permutations of the point order and checking that the output logits are exactly equal. This test validates that the max-pooling aggregation correctly eliminates ordering dependence. Permutation invariance is essential because point clouds from sensors arrive in arbitrary order, and the model must not depend on an artificial indexing scheme.

# Get sample
pc_sample, _ = test_dataset[0]
pc_sample = pc_sample.unsqueeze(0).to(device)

# Original prediction
model.eval()
with torch.no_grad():
    output1, _ = model(pc_sample)
    pred1 = output1.argmax(dim=1).item()

# Shuffle points
perm = torch.randperm(pc_sample.size(2))
pc_shuffled = pc_sample[:, :, perm]

with torch.no_grad():
    output2, _ = model(pc_shuffled)
    pred2 = output2.argmax(dim=1).item()

print(f"Original prediction: {pred1}")
print(f"Shuffled prediction: {pred2}")
print(f"Predictions match: {pred1 == pred2}")
print(f"Output difference: {torch.abs(output1 - output2).max().item():.6f}")

SummaryΒΆ

PointNet Key Ideas:ΒΆ

  1. Permutation invariance via max pooling

  2. Spatial transformers for alignment

  3. Global + local features

  4. Direct point processing

Architecture:ΒΆ

  • Input transform (T-Net)

  • Feature transform (T-Net)

  • Symmetric aggregation (max pool)

  • MLP classifier

Applications:ΒΆ

  • 3D object classification

  • Part segmentation

  • Scene understanding

  • Autonomous driving

Extensions:ΒΆ

  • PointNet++: Hierarchical learning

  • DGCNN: Dynamic graph CNN

  • PointConv: Continuous convolution

  • Point Transformers: Attention mechanisms

Advanced Point Cloud Networks TheoryΒΆ

1. Introduction to Point Cloud ProcessingΒΆ

Point clouds are sets of 3D points representing object surfaces or scenes, commonly obtained from LiDAR, RGB-D cameras, or 3D reconstruction.

1.1 Point Cloud CharacteristicsΒΆ

Unique properties:

  • Unordered: No canonical ordering of points

  • Irregular: Non-uniform density, varying sampling rates

  • Permutation invariant: Should produce same result for any point ordering

  • Sparse: 3D space is largely empty

  • Transformation equivariant: Rotations/translations should preserve structure

Representation: P = {p_i ∈ ℝ³ | i = 1,…,N}

  • May include features: p_i ∈ ℝ^(3+F) (position + color, normal, etc.)

1.2 ChallengesΒΆ

  1. Permutation invariance: Traditional CNNs assume grid structure

  2. Rotation sensitivity: Points in different orientations should be recognized as same

  3. Varying density: Distant points are sparser

  4. Scalability: Millions of points in real scenes

  5. Local structure: Capturing geometric patterns

Applications:

  • 3D object classification/detection

  • Autonomous driving (LiDAR perception)

  • Robotics (grasping, navigation)

  • 3D reconstruction and completion

  • Semantic segmentation of scenes

2. PointNet: Pioneer ArchitectureΒΆ

PointNet [Qi et al., 2017a]: First deep learning architecture for raw point clouds.

2.1 Core IdeaΒΆ

Key insight: Use symmetric function to achieve permutation invariance.

Architecture:

Point Cloud (N Γ— 3) β†’ MLP β†’ Features (N Γ— d) β†’ MaxPool β†’ Global Feature (d) β†’ MLP β†’ Output

Symmetric function: f({x₁,…,xβ‚™}) β‰ˆ g(h(x₁),…,h(xβ‚™))

  • h: Per-point feature extractor (shared MLP)

  • g: Symmetric aggregator (max pooling)

2.2 Mathematical FrameworkΒΆ

Per-point feature extraction:

f_i = MLP(p_i) ∈ ℝ^d

Global feature (permutation invariant):

f_global = max_{i=1,...,N} f_i

Classification:

y = MLP_cls(f_global)

Segmentation (per-point labels):

y_i = MLP_seg(concat(f_i, f_global))

2.3 Input and Feature TransformationsΒΆ

Problem: Rotation sensitivity

Solution: T-Net (Spatial Transformer Network)

Input transform (3 Γ— 3):

T_input = TNet_3Γ—3({p_i})
p_i' = T_input Β· p_i

Feature transform (64 Γ— 64):

T_feature = TNet_64Γ—64({f_i})
f_i' = T_feature Β· f_i

Regularization (orthogonality):

L_reg = ||I - T^T T||Β²_F

Ensures transformation is approximately rigid.

2.4 Theoretical GuaranteesΒΆ

Universal approximation: PointNet can approximate any continuous set function.

Proof sketch:

  1. h (MLP) can approximate any continuous function on ℝ³

  2. max operation selects representative features

  3. g (MLP) combines features to approximate target function

Critical points: Max pooling selects β€œskeleton” of point cloud.

2.5 LimitationsΒΆ

  1. Local structure: Doesn’t explicitly capture local geometry

  2. Context: Each point processed independently (before max pool)

  3. Scalability: All points in one forward pass

3. PointNet++: Hierarchical Feature LearningΒΆ

PointNet++ [Qi et al., 2017b]: Extends PointNet with hierarchical structure and local context.

3.1 Architecture OverviewΒΆ

Hierarchical abstraction:

Points (N₁ Γ— 3) β†’ SA Layer 1 β†’ Points (Nβ‚‚ Γ— d₁) β†’ SA Layer 2 β†’ Points (N₃ Γ— dβ‚‚) β†’ ...

Set Abstraction (SA) Layer:

  1. Sampling: Select subset of points (centroids)

  2. Grouping: Group neighbors around each centroid

  3. PointNet: Extract local features

3.2 Set Abstraction LayerΒΆ

Components:

1. Farthest Point Sampling (FPS):

  • Select N’ centroids from N points

  • Maximize minimum distance between centroids

Algorithm:

C = {c₁}  # Start with random point
for i = 2 to N':
    c_i = argmax_{p ∈ P \ C} min_{c ∈ C} ||p - c||
    C = C βˆͺ {c_i}

2. Ball Query:

  • For each centroid c_i, find all points within radius r

  • More robust than k-NN to varying density

N(c_i) = {p_j ∈ P | ||p_j - c_i|| < r}

3. PointNet on local region:

f_i = PointNet({p_j - c_i | p_j ∈ N(c_i)})

Note: Use relative coordinates (p_j - c_i) for translation invariance.

3.3 Multi-Scale Grouping (MSG)ΒΆ

Problem: Varying point density

Solution: Multiple scales with different radii

MSG layer:

f_i = concat(PointNet_r₁(N_r₁(c_i)), PointNet_rβ‚‚(N_rβ‚‚(c_i)), ...)

Example: r = [0.1, 0.2, 0.4] for coarse-to-fine features

3.4 Multi-Resolution Grouping (MRG)ΒΆ

Alternative: Combine features from different abstraction levels

Idea:

  • Low-level: Dense sampling, fine details

  • High-level: Sparse sampling, global context

Concatenate features from multiple resolutions.

3.5 Point Feature PropagationΒΆ

For segmentation: Need per-point labels

Problem: SA layers downsample β†’ fewer points

Solution: Interpolate features back to original points

Interpolation (inverse distance weighted):

f(p) = Ξ£_{i=1}^k w_i f_i / Ξ£_{i=1}^k w_i

w_i = 1 / ||p - p_i||Β²

Where {p_i} are k nearest neighbors in coarser layer.

Feature propagation:

f_propagated = MLP(concat(f_interpolated, f_skip))

f_skip: Skip connection from encoder.

4. Dynamic Graph CNN (DGCNN)ΒΆ

DGCNN [Wang et al., 2019]: Constructs graph dynamically in feature space.

4.1 EdgeConv OperationΒΆ

Key idea: Compute edge features between points and neighbors.

Edge feature:

e_ij = h_ΞΈ(x_i, x_j - x_i)

Where:

  • x_i: Center point feature

  • x_j - x_i: Relative neighbor feature

  • h_ΞΈ: Shared MLP (learnable)

Aggregation:

x_i' = max_{j ∈ N(i)} e_ij

Or other aggregators: sum, mean, attention.

4.2 Dynamic Graph ConstructionΒΆ

Key innovation: Update k-NN graph after each layer.

Procedure:

  1. Compute k-NN in feature space (not 3D coordinates)

  2. Apply EdgeConv

  3. Repeat with updated features

Benefits:

  • Adapts to learned feature geometry

  • Captures semantic relationships (not just spatial)

Graph at layer l:

G^(l) = kNN(F^(l), k)

Where F^(l) are features at layer l.

4.3 ArchitectureΒΆ

Stacked EdgeConv layers:

Input (N Γ— 3) β†’ EdgeConv₁ β†’ EdgeConvβ‚‚ β†’ ... β†’ EdgeConvβ‚— β†’ MaxPool β†’ MLP β†’ Output

Feature concatenation: Combine features from all layers.

f_global = max(concat(f₁, fβ‚‚, ..., fβ‚—))

4.4 Comparison to PointNet++ΒΆ

PointNet++:

  • Fixed neighborhoods (ball query on 3D coords)

  • Hierarchical downsampling

DGCNN:

  • Dynamic neighborhoods (k-NN in feature space)

  • Full resolution throughout (optional downsampling)

5. Point TransformersΒΆ

5.1 Point Transformer LayerΒΆ

Applying attention to point clouds [Zhao et al., 2021].

Vector attention:

y_i = Ξ£_{j ∈ N(i)} ρ(Ξ³(Ο†(x_i) - ψ(x_j) + Ξ΄)) βŠ™ (Ξ±(x_j) + Ξ΄)

Where:

  • Ο†, ψ, Ξ±, Ξ³: Linear transformations (like Q, K, V in standard attention)

  • Ξ΄ = Ο†_pos(p_i - p_j): Position encoding

  • ρ: Softmax normalization

  • βŠ™: Element-wise product

Position encoding:

Ξ΄ = MLP(p_i - p_j)

Captures relative position information.

Attention weights:

a_ij = softmax_j(Ξ³(Ο†(x_i) - ψ(x_j) + Ξ΄))

Output:

y_i = Ξ£_j a_ij βŠ™ (Ξ±(x_j) + Ξ΄)

5.2 Transition LayersΒΆ

Downsampling: Farthest point sampling + k-NN grouping

Upsampling: Trilinear interpolation + skip connections

Similar to PointNet++ but with transformer blocks.

5.3 AdvantagesΒΆ

  • Long-range dependencies: Attention captures global context

  • Adaptive: Learns which points to attend to

  • Position-aware: Ξ΄ encodes geometric relationships

6. Rotation-Invariant NetworksΒΆ

6.1 Problem: SO(3) EquivarianceΒΆ

Challenge: Point clouds can be arbitrarily rotated.

Desirable property: f(R Β· P) = R Β· f(P) or f(R Β· P) = f(P)

Approaches:

  1. Data augmentation: Train on rotated copies (not truly invariant)

  2. Canonical orientation: Align to principal axes (unstable)

  3. Equivariant features: Design rotation-equivariant operations

6.2 PointConvΒΆ

PointConv [Wu et al., 2019]: Continuous convolution on point clouds.

Convolution:

f_out(p) = Σ_{p_i ∈ N(p)} W(p - p_i) f_in(p_i)

Learned kernel:

W(Ξ”p) = MLP(Ξ”p)

Density-aware: Weight by local density.

6.3 VN-PointNet/DGCNNΒΆ

Vector Neuron networks [Deng et al., 2021]: Equivariant operations.

Vector neuron: 3D vector instead of scalar

Operations:

  • VN-Linear: V_out = W V_in (matrix-vector product)

  • VN-ReLU: ReLU(||V||) Β· V/||V|| (direction-preserving)

  • VN-MaxPool: argmax_i ||V_i||

Invariant features: Extract rotation-invariant scalars

  • ||V||: Magnitude

  • V^T V’: Inner product

  • ||(V Γ— V’)||: Cross product magnitude

7. Point Cloud SegmentationΒΆ

7.1 Semantic SegmentationΒΆ

Task: Assign class label to each point.

Architecture: Encoder-decoder with skip connections.

PointNet++ segmentation:

Input β†’ SA layers β†’ FP layers β†’ Per-point MLP β†’ Scores

Loss: Cross-entropy per point

L = -Ξ£_i Ξ£_c y_ic log Ε·_ic

7.2 Instance SegmentationΒΆ

Task: Group points into object instances + classify.

3D-BoNet [Yang et al., 2019]:

  1. Bounding box prediction: Predict 3D boxes

  2. Point-box association: Assign points to boxes

  3. Classification: Classify each instance

SGPN [Wang et al., 2018]: Similarity Group Proposal Network

  • Learn pairwise point similarity

  • Group points via clustering

7.3 Part SegmentationΒΆ

Task: Segment object into parts (e.g., chair legs, back, seat).

ShapeNet Part dataset: 16 categories, 50 parts

Approach: Category-conditional segmentation

  • Input: Point cloud + category label

  • Output: Part labels per point

8. Point Cloud Object DetectionΒΆ

8.1 3D Bounding BoxesΒΆ

Representation: (x, y, z, l, w, h, ΞΈ)

  • Center: (x, y, z)

  • Size: length, width, height

  • Orientation: yaw angle ΞΈ

IoU: 3D Intersection over Union for evaluation

8.2 VoxelNetΒΆ

VoxelNet [Zhou & Tuzel, 2018]: Voxelize point cloud.

Pipeline:

  1. Voxel partition: Divide 3D space into voxels

  2. VFE (Voxel Feature Encoding): PointNet per voxel

  3. 3D CNN: Process voxel grid

  4. RPN (Region Proposal Network): Predict 3D boxes

VFE layer:

f_voxel = max_{p ∈ voxel} MLP(concat(p, f_voxel_mean))

8.3 PointPillarsΒΆ

PointPillars [Lang et al., 2019]: Organize points into vertical pillars.

Advantages:

  • Faster than VoxelNet (2D conv instead of 3D)

  • Efficient for LiDAR (mostly empty above ground)

Pipeline:

  1. Pillar encoding: PointNet on each pillar

  2. Pseudo-image: Scatter features to BEV (bird’s-eye view)

  3. 2D CNN: Backbone network

  4. Detection head: Predict boxes

8.4 PointRCNNΒΆ

PointRCNN [Shi et al., 2019]: Two-stage detector on raw points.

Stage 1: 3D proposal generation

  • Segment foreground points

  • Predict 3D box per point

  • Refine via clustering

Stage 2: Box refinement

  • Canonical coordinates: Transform points to box frame

  • PointNet++: Refine box parameters

9. Point Cloud GenerationΒΆ

9.1 Point Cloud AutoencodersΒΆ

Architecture: Encoder-decoder for point clouds.

Encoder: PointNet β†’ latent vector z

Decoder: FC layers β†’ N Γ— 3 points

Loss: Chamfer distance

L_CD = Ξ£_{p ∈ P₁} min_{q ∈ Pβ‚‚} ||p - q||Β² + Ξ£_{q ∈ Pβ‚‚} min_{p ∈ P₁} ||p - q||Β²

Earth Mover’s Distance (more accurate, slower):

L_EMD = min_{Ο†:P₁→Pβ‚‚} Ξ£_{p ∈ P₁} ||p - Ο†(p)||Β²

Where Ο† is bijection.

9.2 Point Cloud GANsΒΆ

PC-GAN [Li et al., 2018]: Generate point clouds.

Generator: z ~ N(0,I) β†’ MLP β†’ N Γ— 3 points

Discriminator: PointNet β†’ real/fake score

Challenges:

  • Mode collapse

  • Unstable training

  • Evaluation metrics (FID for point clouds)

9.3 Point FlowΒΆ

PointFlow [Yang et al., 2019]: Normalizing flow for point clouds.

Idea: Learn bijection between Gaussian and point cloud distribution.

Two-level hierarchy:

  1. Global: Sample number of points and shape code

  2. Local: Sample point positions conditioned on shape

Benefits: Exact likelihood, better mode coverage than GANs.

10. Point Cloud CompletionΒΆ

10.1 Problem FormulationΒΆ

Input: Partial point cloud P_partial Output: Complete point cloud P_complete

Applications:

  • Occlusion handling in robotics

  • 3D reconstruction from single view

  • Shape completion from scans

10.2 PCN: Point Completion NetworkΒΆ

PCN [Yuan et al., 2018]:

Encoder: PointNet β†’ coarse global feature

Decoder:

  1. Coarse completion: FC layers β†’ M points (low-res)

  2. Fine completion: Folding-based β†’ N points (high-res)

Multi-resolution loss:

L = L_coarse(P_coarse, P_gt) + L_fine(P_fine, P_gt)

10.3 FoldingNetΒΆ

FoldingNet [Yang et al., 2018]: Learn to fold 2D grid into 3D shape.

Decoder:

p_i = MLP(concat(f_global, grid_i))

Where grid_i ∈ [0,1]² is 2D grid point.

Intuition: Learn deformation from canonical 2D plane to 3D surface.

11. Point Cloud UpsamplingΒΆ

11.1 PU-NetΒΆ

PU-Net [Yu et al., 2018]: Upsample sparse point clouds.

Idea: Learn to densify point cloud while preserving geometry.

Pipeline:

  1. Feature extraction: PointNet++

  2. Feature expansion: Replicate features

  3. Point generation: MLP β†’ dense points

  4. Coordinate refinement: Adjust positions

Expansion:

f_i β†’ [f_i, f_i, ..., f_i]  (r times)

Then MLP to generate r points per input point.

11.2 PU-GANΒΆ

PU-GAN [Li et al., 2019]: GAN-based upsampling.

Generator: Upsample point cloud

Discriminator: Distinguish real vs. generated patches

Self-attention: Capture long-range dependencies

Losses:

  • Adversarial loss

  • Uniform loss (encourage uniform distribution)

  • Reconstruction loss

12. ApplicationsΒΆ

12.1 Autonomous DrivingΒΆ

LiDAR perception:

  • Detection: Cars, pedestrians, cyclists (PointPillars, SECOND)

  • Tracking: Multi-object tracking in 3D

  • Segmentation: Road, buildings, vegetation

Sensor fusion: Combine LiDAR + camera

12.2 RoboticsΒΆ

Grasping:

  • Segment graspable regions

  • Plan grasp poses from partial point clouds

Navigation:

  • SLAM (Simultaneous Localization and Mapping)

  • Obstacle avoidance

12.3 AR/VRΒΆ

3D reconstruction: Real-time scene capture

Object recognition: Identify objects in 3D space

Interaction: Hand tracking, gesture recognition

12.4 Medical ImagingΒΆ

Organ segmentation: From CT/MRI scans

Surgical planning: 3D anatomy visualization

13. Recent Advances (2020-2024)ΒΆ

13.1 Point-Voxel CNN (PVCNN)ΒΆ

PVCNN [Liu et al., 2019]: Combine point and voxel representations.

Idea:

  • Voxel branch: Regular grid, efficient 3D conv

  • Point branch: Precise geometry, no discretization

Fusion: Devoxelize voxel features β†’ combine with point features

Benefits: Speed of voxels + accuracy of points.

13.2 Stratified TransformerΒΆ

Stratified Transformer [Lai et al., 2022]: Efficient attention for large point clouds.

Key idea: Hierarchical grouping + local attention

Complexity: O(N log N) instead of O(NΒ²)

13.3 Point-BERTΒΆ

Point-BERT [Yu et al., 2022]: Pre-training for point clouds.

Masked autoencoding:

  • Mask random points

  • Predict masked points from context

Discretization: Learn discrete tokens via dVAE

Transfer: Fine-tune on downstream tasks

13.4 Point-MAEΒΆ

Point-MAE [Pang et al., 2022]: Masked autoencoders.

High masking ratio: 60-90% of points masked

Decoder: Lightweight transformer

Reconstruction: Predict coordinates of masked points

Finding: High masking ratio forces learning global structure.

14. Benchmarks and DatasetsΒΆ

14.1 ModelNetΒΆ

ModelNet40: 12,311 CAD models, 40 categories

Task: Classification

Evaluation: Overall accuracy

Sampling: 1,024 or 2,048 points per model

14.2 ShapeNetΒΆ

ShapeNetCore: 51,300 models, 55 categories

ShapeNetPart: Part segmentation (16 categories, 50 parts)

Tasks: Classification, segmentation, completion

14.3 ScanNetΒΆ

Indoor scene dataset: 1,513 scanned scenes

Annotations: Semantic labels, instance masks, 3D boxes

Tasks: Scene understanding, object detection, segmentation

14.4 KITTIΒΆ

Autonomous driving: 7,481 training, 7,518 testing frames

LiDAR + camera: Multi-modal

Tasks: 3D object detection, tracking

Categories: Car, pedestrian, cyclist

14.5 Waymo Open DatasetΒΆ

Large-scale: 1,000 scenes, 200k frames

High quality: 5 LiDAR sensors, 5 cameras

Dense annotations: 3D boxes, tracking IDs

15. Evaluation MetricsΒΆ

15.1 ClassificationΒΆ

Accuracy: Fraction of correctly classified point clouds

Per-class accuracy: Accuracy per category (balanced)

15.2 SegmentationΒΆ

mIoU (mean Intersection over Union):

IoU_c = TP_c / (TP_c + FP_c + FN_c)
mIoU = (1/C) Ξ£_c IoU_c

Overall accuracy: Fraction of correctly labeled points

15.3 DetectionΒΆ

AP (Average Precision): At IoU threshold (typically 0.5, 0.7)

mAP: Mean over categories

NDS (Waymo): Normalized Detection Score

NDS = (1/10) Ξ£_{thresholds} AP_threshold

15.4 Completion/GenerationΒΆ

Chamfer Distance (CD):

CD = (1/|P₁|) Ξ£_{p ∈ P₁} min_{q ∈ Pβ‚‚} ||p - q||Β² + 
     (1/|Pβ‚‚|) Ξ£_{q ∈ Pβ‚‚} min_{p ∈ P₁} ||p - q||Β²

Earth Mover’s Distance (EMD):

EMD = min_{Ο†:P₁→Pβ‚‚} (1/N) Ξ£_{p ∈ P₁} ||p - Ο†(p)||Β²

Hausdorff Distance:

d_H = max(max_{p ∈ P₁} min_{q ∈ Pβ‚‚} ||p-q||, max_{q ∈ Pβ‚‚} min_{p ∈ P₁} ||p-q||)

16. Implementation ConsiderationsΒΆ

16.2 Sampling StrategiesΒΆ

Random sampling: Fast but may miss details

Farthest Point Sampling: Better coverage, O(NΒ²)

Approximate FPS: Faster alternatives

  • Grid-based: Voxelize β†’ sample per voxel

  • Random subset FPS: FPS on random subset

16.3 NormalizationΒΆ

Unit sphere:

P_norm = (P - center) / max_distance

Zero mean, unit variance:

P_norm = (P - mean(P)) / std(P)

Align to canonical orientation: PCA, but can be unstable

16.4 Data AugmentationΒΆ

Rotation: Random SO(3) rotation

  • Small angles (Β±15Β°) or full rotation

Jittering: Add Gaussian noise

p_i ← p_i + Ξ΅, Ξ΅ ~ N(0, σ²I)

Scaling: Random scaling factor [0.8, 1.2]

Dropout: Randomly drop points (simulate occlusion)

17. Limitations and Open ProblemsΒΆ

17.1 Current LimitationsΒΆ

  1. Scalability: Large scenes (millions of points) challenging

  2. Density variation: Varying sampling rates

  3. Noise robustness: Real scans have outliers, noise

  4. Occlusion: Missing data from self-occlusion

  5. Computational cost: Point-wise operations expensive

17.2 Open ProblemsΒΆ

  1. Unified architecture: Single model for all tasks

  2. Self-supervised learning: Better pre-training strategies

  3. Multi-modal fusion: Optimal way to combine LiDAR + RGB

  4. Temporal modeling: Point cloud sequences (4D)

  5. Generalization: Cross-dataset performance

17.3 Future DirectionsΒΆ

  1. Transformer-based: Scaling transformers to large point clouds

  2. Implicit representations: Neural fields vs. explicit points

  3. Efficient attention: Linear complexity attention

  4. Neural rendering: NeRF integration with point clouds

  5. Foundation models: Large pre-trained models (Point-BERT, Point-MAE)

18. Key TakeawaysΒΆ

  1. PointNet: Pioneering permutation-invariant architecture via symmetric function (max pooling)

  2. PointNet++: Hierarchical feature learning with FPS + ball query + local PointNet

  3. DGCNN: Dynamic graph construction in feature space with EdgeConv

  4. Point Transformers: Attention mechanism with position encoding for point clouds

  5. Rotation invariance: Critical for real-world robustness (VN networks, equivariant features)

  6. Set abstraction: Sampling + grouping + feature extraction fundamental building block

  7. Applications: Autonomous driving (detection), robotics (grasping), AR/VR (reconstruction)

  8. Metrics: Chamfer distance (generation), mIoU (segmentation), mAP (detection)

  9. Challenges: Scalability, density variation, rotation sensitivity

  10. Future: Transformers, self-supervised learning, multi-modal fusion

Core insight: Point clouds require permutation-invariant operations and hierarchical abstractions to capture both local geometry and global structure.

19. Mathematical SummaryΒΆ

PointNet global feature:

f_global = max_{i=1,...,N} MLP(p_i)

PointNet++ set abstraction:

S^(l) = {f_i^(l) | f_i^(l) = PointNet({p_j - c_i | p_j ∈ B(c_i, r)})}

DGCNN EdgeConv:

x_i' = max_{j ∈ kNN(x_i)} MLP(concat(x_i, x_j - x_i))

Point Transformer attention:

y_i = Ξ£_{j ∈ N(i)} softmax(Ξ³(Ο†(x_i) - ψ(x_j) + Ξ΄_ij)) βŠ™ (Ξ±(x_j) + Ξ΄_ij)

Chamfer Distance:

CD(P₁, Pβ‚‚) = Ξ£_{p ∈ P₁} min_{q ∈ Pβ‚‚} ||p-q||Β² + Ξ£_{q ∈ Pβ‚‚} min_{p ∈ P₁} ||p-q||Β²

ReferencesΒΆ

  1. Qi et al. (2017a) β€œPointNet: Deep Learning on Point Sets for 3D Classification and Segmentation”

  2. Qi et al. (2017b) β€œPointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space”

  3. Wang et al. (2019) β€œDynamic Graph CNN for Learning on Point Clouds”

  4. Zhao et al. (2021) β€œPoint Transformer”

  5. Wu et al. (2019) β€œPointConv: Deep Convolutional Networks on 3D Point Clouds”

  6. Zhou & Tuzel (2018) β€œVoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection”

  7. Lang et al. (2019) β€œPointPillars: Fast Encoders for Object Detection from Point Clouds”

  8. Yu et al. (2018) β€œPU-Net: Point Cloud Upsampling Network”

  9. Pang et al. (2022) β€œMasked Autoencoders for Point Cloud Self-supervised Learning”

  10. Liu et al. (2019) β€œPoint-Voxel CNN for Efficient 3D Deep Learning”

"""
Advanced Point Cloud Networks Implementations

Production-ready PyTorch implementations of point cloud processing architectures.
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from typing import List, Tuple, Optional, Callable
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# ============================================================================
# 1. PointNet Architecture
# ============================================================================

class TNet(nn.Module):
    """
    Transformation Network (T-Net) for input/feature transformation.
    
    Predicts transformation matrix to canonicalize inputs.
    
    Args:
        k: Dimensionality of input (3 for spatial, 64 for features)
    """
    def __init__(self, k: int = 3):
        super().__init__()
        self.k = k
        
        # Shared MLPs
        self.conv1 = nn.Conv1d(k, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        
        # FC layers
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k * k)
        
        # Batch norm
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: [batch, k, num_points]
            
        Returns:
            transform: [batch, k, k] transformation matrix
        """
        batch_size = x.size(0)
        
        # Shared MLP
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        
        # Max pooling
        x = torch.max(x, 2)[0]  # [batch, 1024]
        
        # FC layers
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)  # [batch, k*k]
        
        # Initialize as identity
        identity = torch.eye(self.k, device=x.device).flatten().unsqueeze(0)
        identity = identity.repeat(batch_size, 1)
        
        x = x + identity
        x = x.view(-1, self.k, self.k)
        
        return x


class PointNet(nn.Module):
    """
    PointNet for classification and segmentation.
    
    Args:
        num_classes: Number of output classes
        task: 'classification' or 'segmentation'
        use_transform: Whether to use input/feature transforms
    """
    def __init__(
        self,
        num_classes: int,
        task: str = 'classification',
        use_transform: bool = True
    ):
        super().__init__()
        self.task = task
        self.use_transform = use_transform
        
        # Input transform (3x3)
        if use_transform:
            self.input_transform = TNet(k=3)
            self.feature_transform = TNet(k=64)
        
        # Shared MLPs
        self.conv1 = nn.Conv1d(3, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        
        if task == 'classification':
            # Classification head
            self.fc1 = nn.Linear(1024, 512)
            self.fc2 = nn.Linear(512, 256)
            self.fc3 = nn.Linear(256, num_classes)
            self.bn_fc1 = nn.BatchNorm1d(512)
            self.bn_fc2 = nn.BatchNorm1d(256)
            self.dropout = nn.Dropout(p=0.3)
        else:
            # Segmentation head
            self.conv4 = nn.Conv1d(1088, 512, 1)  # 1024 global + 64 local
            self.conv5 = nn.Conv1d(512, 256, 1)
            self.conv6 = nn.Conv1d(256, 128, 1)
            self.conv7 = nn.Conv1d(128, num_classes, 1)
            self.bn4 = nn.BatchNorm1d(512)
            self.bn5 = nn.BatchNorm1d(256)
            self.bn6 = nn.BatchNorm1d(128)
        
    def forward(
        self,
        x: torch.Tensor
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
        """
        Args:
            x: [batch, 3, num_points] or [batch, num_points, 3]
            
        Returns:
            output: [batch, num_classes] or [batch, num_classes, num_points]
            feature_transform: [batch, 64, 64] if use_transform else None
        """
        # Ensure [batch, 3, num_points]
        if x.size(1) != 3:
            x = x.transpose(1, 2)
        
        batch_size = x.size(0)
        num_points = x.size(2)
        
        # Input transform
        if self.use_transform:
            trans_input = self.input_transform(x)
            x = torch.bmm(trans_input, x)
        
        # MLP (64, 64)
        x = F.relu(self.bn1(self.conv1(x)))
        point_features = x  # Save for segmentation
        
        # Feature transform
        trans_feat = None
        if self.use_transform:
            trans_feat = self.feature_transform(x)
            x = torch.bmm(trans_feat, x)
        
        # MLP (64, 128, 1024)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        
        # Max pooling
        global_feature = torch.max(x, 2)[0]  # [batch, 1024]
        
        if self.task == 'classification':
            # Classification
            x = F.relu(self.bn_fc1(self.fc1(global_feature)))
            x = self.dropout(x)
            x = F.relu(self.bn_fc2(self.fc2(x)))
            x = self.dropout(x)
            x = self.fc3(x)
            return x, trans_feat
        else:
            # Segmentation
            # Replicate global feature
            global_feature_expanded = global_feature.unsqueeze(2).repeat(
                1, 1, num_points
            )
            
            # Concatenate point features with global feature
            x = torch.cat([point_features, global_feature_expanded], dim=1)
            
            # Segmentation MLP
            x = F.relu(self.bn4(self.conv4(x)))
            x = F.relu(self.bn5(self.conv5(x)))
            x = F.relu(self.bn6(self.conv6(x)))
            x = self.conv7(x)
            
            return x, trans_feat


def pointnet_loss(
    outputs: torch.Tensor,
    labels: torch.Tensor,
    trans_feat: Optional[torch.Tensor] = None,
    reg_weight: float = 0.001
) -> Tuple[torch.Tensor, Dict[str, float]]:
    """
    PointNet loss with transformation regularization.
    
    Args:
        outputs: Model predictions
        labels: Ground truth
        trans_feat: Feature transformation matrix [batch, 64, 64]
        reg_weight: Regularization weight
        
    Returns:
        total_loss, loss_dict
    """
    # Classification/segmentation loss
    if len(outputs.shape) == 2:
        # Classification
        task_loss = F.cross_entropy(outputs, labels)
    else:
        # Segmentation
        task_loss = F.cross_entropy(outputs, labels)
    
    # Regularization: Enforce orthogonality
    reg_loss = 0.0
    if trans_feat is not None:
        batch_size = trans_feat.size(0)
        I = torch.eye(64, device=trans_feat.device).unsqueeze(0).repeat(
            batch_size, 1, 1
        )
        # ||I - A^T A||^2
        diff = I - torch.bmm(trans_feat.transpose(1, 2), trans_feat)
        reg_loss = torch.mean(torch.norm(diff, dim=(1, 2)))
    
    total_loss = task_loss + reg_weight * reg_loss
    
    loss_dict = {
        'total': total_loss.item(),
        'task': task_loss.item(),
        'reg': reg_loss if isinstance(reg_loss, float) else reg_loss.item()
    }
    
    return total_loss, loss_dict


# ============================================================================
# 2. PointNet++ Components
# ============================================================================

def farthest_point_sampling(xyz: torch.Tensor, num_samples: int) -> torch.Tensor:
    """
    Farthest Point Sampling (FPS).
    
    Args:
        xyz: [batch, num_points, 3]
        num_samples: Number of points to sample
        
    Returns:
        indices: [batch, num_samples] indices of sampled points
    """
    batch_size, num_points, _ = xyz.shape
    device = xyz.device
    
    # Initialize
    centroids = torch.zeros(batch_size, num_samples, dtype=torch.long, device=device)
    distance = torch.ones(batch_size, num_points, device=device) * 1e10
    
    # Random first point
    farthest = torch.randint(0, num_points, (batch_size,), device=device)
    batch_indices = torch.arange(batch_size, device=device)
    
    for i in range(num_samples):
        centroids[:, i] = farthest
        
        # Get centroid coordinates
        centroid = xyz[batch_indices, farthest, :].unsqueeze(1)  # [batch, 1, 3]
        
        # Compute distances
        dist = torch.sum((xyz - centroid) ** 2, dim=-1)
        
        # Update minimum distances
        mask = dist < distance
        distance[mask] = dist[mask]
        
        # Select farthest point
        farthest = torch.max(distance, dim=1)[1]
    
    return centroids


def ball_query(
    xyz: torch.Tensor,
    centroids: torch.Tensor,
    radius: float,
    num_samples: int
) -> torch.Tensor:
    """
    Ball query: Find points within radius of each centroid.
    
    Args:
        xyz: [batch, num_points, 3] all points
        centroids: [batch, num_centroids, 3] query points
        radius: Search radius
        num_samples: Max number of points per ball
        
    Returns:
        indices: [batch, num_centroids, num_samples]
    """
    batch_size = xyz.size(0)
    num_points = xyz.size(1)
    num_centroids = centroids.size(1)
    device = xyz.device
    
    # Compute pairwise distances
    # [batch, num_centroids, num_points]
    xyz_expanded = xyz.unsqueeze(1).repeat(1, num_centroids, 1, 1)
    centroids_expanded = centroids.unsqueeze(2).repeat(1, 1, num_points, 1)
    distances = torch.sum((xyz_expanded - centroids_expanded) ** 2, dim=-1)
    
    # Find points within radius
    mask = distances < radius ** 2
    
    # Sample num_samples points
    indices = torch.zeros(
        batch_size, num_centroids, num_samples,
        dtype=torch.long, device=device
    )
    
    for b in range(batch_size):
        for c in range(num_centroids):
            valid_indices = torch.where(mask[b, c])[0]
            
            if len(valid_indices) == 0:
                # No points in ball, use nearest
                nearest = torch.argmin(distances[b, c])
                indices[b, c, :] = nearest
            elif len(valid_indices) < num_samples:
                # Fewer points than needed, repeat
                indices[b, c, :len(valid_indices)] = valid_indices
                indices[b, c, len(valid_indices):] = valid_indices[
                    torch.randint(0, len(valid_indices), (num_samples - len(valid_indices),))
                ]
            else:
                # More points than needed, sample
                sampled = valid_indices[
                    torch.randperm(len(valid_indices), device=device)[:num_samples]
                ]
                indices[b, c, :] = sampled
    
    return indices


class SetAbstractionLayer(nn.Module):
    """
    PointNet++ Set Abstraction Layer.
    
    Samples centroids, groups neighbors, extracts features.
    
    Args:
        num_centroids: Number of points to sample
        radius: Ball query radius
        num_samples: Max points per neighborhood
        in_channels: Input feature channels
        mlp_channels: List of MLP output channels
    """
    def __init__(
        self,
        num_centroids: int,
        radius: float,
        num_samples: int,
        in_channels: int,
        mlp_channels: List[int]
    ):
        super().__init__()
        self.num_centroids = num_centroids
        self.radius = radius
        self.num_samples = num_samples
        
        # MLP for feature extraction
        layers = []
        in_ch = in_channels + 3  # Coordinates + features
        for out_ch in mlp_channels:
            layers.append(nn.Conv2d(in_ch, out_ch, 1))
            layers.append(nn.BatchNorm2d(out_ch))
            layers.append(nn.ReLU())
            in_ch = out_ch
        self.mlp = nn.Sequential(*layers)
        
    def forward(
        self,
        xyz: torch.Tensor,
        features: Optional[torch.Tensor] = None
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Args:
            xyz: [batch, num_points, 3]
            features: [batch, num_points, C] or None
            
        Returns:
            new_xyz: [batch, num_centroids, 3]
            new_features: [batch, num_centroids, mlp_channels[-1]]
        """
        # Farthest point sampling
        centroid_indices = farthest_point_sampling(xyz, self.num_centroids)
        
        # Get centroid coordinates
        batch_size = xyz.size(0)
        new_xyz = torch.gather(
            xyz, 1,
            centroid_indices.unsqueeze(-1).repeat(1, 1, 3)
        )
        
        # Ball query
        neighbor_indices = ball_query(xyz, new_xyz, self.radius, self.num_samples)
        
        # Group neighbors
        # [batch, num_centroids, num_samples, 3]
        grouped_xyz = torch.gather(
            xyz.unsqueeze(1).repeat(1, self.num_centroids, 1, 1),
            2,
            neighbor_indices.unsqueeze(-1).repeat(1, 1, 1, 3)
        )
        
        # Relative coordinates
        grouped_xyz = grouped_xyz - new_xyz.unsqueeze(2)
        
        # Group features
        if features is not None:
            grouped_features = torch.gather(
                features.unsqueeze(1).repeat(1, self.num_centroids, 1, 1),
                2,
                neighbor_indices.unsqueeze(-1).repeat(1, 1, 1, features.size(-1))
            )
            # Concatenate
            grouped_features = torch.cat([grouped_xyz, grouped_features], dim=-1)
        else:
            grouped_features = grouped_xyz
        
        # [batch, C, num_centroids, num_samples]
        grouped_features = grouped_features.permute(0, 3, 1, 2)
        
        # Apply MLP
        new_features = self.mlp(grouped_features)
        
        # Max pooling across neighbors
        new_features = torch.max(new_features, dim=-1)[0]  # [batch, C, num_centroids]
        new_features = new_features.transpose(1, 2)  # [batch, num_centroids, C]
        
        return new_xyz, new_features


class PointNetPlusPlus(nn.Module):
    """
    PointNet++ for classification.
    
    Args:
        num_classes: Number of output classes
        num_points: Number of input points
    """
    def __init__(self, num_classes: int, num_points: int = 1024):
        super().__init__()
        
        # Set abstraction layers
        self.sa1 = SetAbstractionLayer(
            num_centroids=512,
            radius=0.2,
            num_samples=32,
            in_channels=0,
            mlp_channels=[64, 64, 128]
        )
        
        self.sa2 = SetAbstractionLayer(
            num_centroids=128,
            radius=0.4,
            num_samples=64,
            in_channels=128,
            mlp_channels=[128, 128, 256]
        )
        
        self.sa3 = SetAbstractionLayer(
            num_centroids=1,
            radius=10.0,  # Large radius to capture all
            num_samples=128,
            in_channels=256,
            mlp_channels=[256, 512, 1024]
        )
        
        # Classification head
        self.fc1 = nn.Linear(1024, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(0.4)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout2 = nn.Dropout(0.4)
        self.fc3 = nn.Linear(256, num_classes)
        
    def forward(self, xyz: torch.Tensor) -> torch.Tensor:
        """
        Args:
            xyz: [batch, num_points, 3]
            
        Returns:
            logits: [batch, num_classes]
        """
        # Ensure [batch, num_points, 3]
        if xyz.size(-1) != 3:
            xyz = xyz.transpose(1, 2)
        
        # Set abstraction layers
        xyz, features = self.sa1(xyz, None)
        xyz, features = self.sa2(xyz, features)
        xyz, features = self.sa3(xyz, features)
        
        # Global feature
        features = features.squeeze(1)  # [batch, 1024]
        
        # Classification
        x = F.relu(self.bn1(self.fc1(features)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.fc3(x)
        
        return x


# ============================================================================
# 3. DGCNN (Dynamic Graph CNN)
# ============================================================================

def knn(x: torch.Tensor, k: int) -> torch.Tensor:
    """
    Find k nearest neighbors.
    
    Args:
        x: [batch, num_points, dim]
        k: Number of neighbors
        
    Returns:
        indices: [batch, num_points, k]
    """
    # Compute pairwise distances
    inner = -2 * torch.matmul(x, x.transpose(2, 1))
    xx = torch.sum(x ** 2, dim=2, keepdim=True)
    pairwise_distance = -xx - inner - xx.transpose(2, 1)
    
    # Get k nearest
    idx = pairwise_distance.topk(k=k, dim=-1)[1]
    return idx


def get_graph_feature(
    x: torch.Tensor,
    k: int = 20,
    idx: Optional[torch.Tensor] = None
) -> torch.Tensor:
    """
    Construct edge features for DGCNN.
    
    Args:
        x: [batch, num_points, dim]
        k: Number of neighbors
        idx: Pre-computed neighbor indices
        
    Returns:
        edge_features: [batch, num_points, k, 2*dim]
    """
    batch_size, num_points, dim = x.size()
    device = x.device
    
    # Get k-NN
    if idx is None:
        idx = knn(x, k)
    
    # Gather neighbors
    idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points
    idx = idx + idx_base
    idx = idx.view(-1)
    
    x_flat = x.view(batch_size * num_points, -1)
    neighbors = x_flat[idx, :].view(batch_size, num_points, k, dim)
    
    # Edge features: [x_i, x_j - x_i]
    x_expanded = x.unsqueeze(2).repeat(1, 1, k, 1)
    edge_features = torch.cat([x_expanded, neighbors - x_expanded], dim=-1)
    
    return edge_features


class EdgeConv(nn.Module):
    """
    Edge Convolution layer for DGCNN.
    
    Args:
        in_channels: Input feature channels
        out_channels: Output feature channels
        k: Number of neighbors
    """
    def __init__(self, in_channels: int, out_channels: int, k: int = 20):
        super().__init__()
        self.k = k
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels * 2, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(negative_slope=0.2)
        )
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: [batch, num_points, in_channels]
            
        Returns:
            output: [batch, num_points, out_channels]
        """
        # Get edge features
        edge_features = get_graph_feature(x, self.k)
        
        # [batch, num_points, k, 2*in_channels] -> [batch, 2*in_channels, num_points, k]
        edge_features = edge_features.permute(0, 3, 1, 2)
        
        # Convolution
        out = self.conv(edge_features)
        
        # Max aggregation
        out = out.max(dim=-1, keepdim=False)[0]
        
        # [batch, out_channels, num_points] -> [batch, num_points, out_channels]
        out = out.transpose(1, 2)
        
        return out


class DGCNN(nn.Module):
    """
    Dynamic Graph CNN for classification.
    
    Args:
        num_classes: Number of output classes
        k: Number of neighbors
    """
    def __init__(self, num_classes: int, k: int = 20):
        super().__init__()
        self.k = k
        
        # EdgeConv layers
        self.conv1 = EdgeConv(3, 64, k)
        self.conv2 = EdgeConv(64, 64, k)
        self.conv3 = EdgeConv(64, 128, k)
        self.conv4 = EdgeConv(128, 256, k)
        
        # Aggregation
        self.conv5 = nn.Sequential(
            nn.Conv1d(512, 1024, 1, bias=False),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(negative_slope=0.2)
        )
        
        # Classification head
        self.fc1 = nn.Linear(1024 * 2, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(256, num_classes)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: [batch, num_points, 3]
            
        Returns:
            logits: [batch, num_classes]
        """
        batch_size = x.size(0)
        
        # EdgeConv layers
        x1 = self.conv1(x)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        x4 = self.conv4(x3)
        
        # Concatenate features
        x_cat = torch.cat([x1, x2, x3, x4], dim=-1)  # [batch, num_points, 512]
        
        # [batch, num_points, 512] -> [batch, 512, num_points]
        x_cat = x_cat.transpose(1, 2)
        
        # Global feature
        x5 = self.conv5(x_cat)  # [batch, 1024, num_points]
        
        # Max and average pooling
        x_max = F.adaptive_max_pool1d(x5, 1).view(batch_size, -1)
        x_avg = F.adaptive_avg_pool1d(x5, 1).view(batch_size, -1)
        x_global = torch.cat([x_max, x_avg], dim=1)  # [batch, 2048]
        
        # Classification
        x = F.leaky_relu(self.bn1(self.fc1(x_global)), negative_slope=0.2)
        x = self.dropout1(x)
        x = F.leaky_relu(self.bn2(self.fc2(x)), negative_slope=0.2)
        x = self.dropout2(x)
        x = self.fc3(x)
        
        return x


# ============================================================================
# 4. Chamfer Distance
# ============================================================================

def chamfer_distance(
    pc1: torch.Tensor,
    pc2: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Compute Chamfer Distance between two point clouds.
    
    CD = mean(min_q ||p - q||^2) + mean(min_p ||q - p||^2)
    
    Args:
        pc1: [batch, N1, 3]
        pc2: [batch, N2, 3]
        
    Returns:
        cd: Chamfer distance
        cd1: pc1 -> pc2 term
        cd2: pc2 -> pc1 term
    """
    # Pairwise distances
    # [batch, N1, N2]
    diff = pc1.unsqueeze(2) - pc2.unsqueeze(1)
    dist = torch.sum(diff ** 2, dim=-1)
    
    # min over pc2 for each point in pc1
    dist1 = torch.min(dist, dim=2)[0]  # [batch, N1]
    cd1 = torch.mean(dist1, dim=1)  # [batch]
    
    # min over pc1 for each point in pc2
    dist2 = torch.min(dist, dim=1)[0]  # [batch, N2]
    cd2 = torch.mean(dist2, dim=1)  # [batch]
    
    cd = cd1 + cd2
    
    return cd, cd1, cd2


# ============================================================================
# 5. Demo and Visualization
# ============================================================================

def generate_synthetic_point_cloud(
    num_points: int = 1024,
    shape: str = 'sphere'
) -> torch.Tensor:
    """
    Generate synthetic point cloud.
    
    Args:
        num_points: Number of points
        shape: 'sphere', 'cube', or 'cylinder'
        
    Returns:
        points: [num_points, 3]
    """
    if shape == 'sphere':
        # Sample on unit sphere
        theta = np.random.uniform(0, 2 * np.pi, num_points)
        phi = np.random.uniform(0, np.pi, num_points)
        x = np.sin(phi) * np.cos(theta)
        y = np.sin(phi) * np.sin(theta)
        z = np.cos(phi)
        points = np.stack([x, y, z], axis=1)
    elif shape == 'cube':
        # Sample in unit cube
        points = np.random.uniform(-1, 1, (num_points, 3))
    elif shape == 'cylinder':
        # Sample on cylinder
        theta = np.random.uniform(0, 2 * np.pi, num_points)
        z = np.random.uniform(-1, 1, num_points)
        r = 1.0
        x = r * np.cos(theta)
        y = r * np.sin(theta)
        points = np.stack([x, y, z], axis=1)
    else:
        raise ValueError(f"Unknown shape: {shape}")
    
    return torch.tensor(points, dtype=torch.float32)


def demo_pointnet():
    """Demonstrate PointNet architecture."""
    print("=" * 80)
    print("PointNet Demo")
    print("=" * 80)
    
    # Create model
    model = PointNet(num_classes=10, task='classification', use_transform=True)
    
    # Generate batch of point clouds
    batch_size = 4
    num_points = 1024
    point_clouds = torch.randn(batch_size, num_points, 3)
    
    # Forward pass
    print(f"\nInput shape: {point_clouds.shape}")
    outputs, trans_feat = model(point_clouds)
    print(f"Output shape: {outputs.shape}")
    print(f"Feature transform shape: {trans_feat.shape if trans_feat is not None else None}")
    
    # Compute loss
    labels = torch.randint(0, 10, (batch_size,))
    loss, loss_dict = pointnet_loss(outputs, labels, trans_feat)
    
    print(f"\nLoss breakdown:")
    print(f"  Total: {loss_dict['total']:.4f}")
    print(f"  Task: {loss_dict['task']:.4f}")
    print(f"  Regularization: {loss_dict['reg']:.6f}")
    
    # Count parameters
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nNumber of parameters: {num_params:,}")


def demo_pointnet_plusplus():
    """Demonstrate PointNet++."""
    print("\n" + "=" * 80)
    print("PointNet++ Demo")
    print("=" * 80)
    
    # Create model
    model = PointNetPlusPlus(num_classes=10, num_points=1024)
    
    # Generate batch
    batch_size = 2
    num_points = 1024
    point_clouds = torch.randn(batch_size, num_points, 3)
    
    # Forward pass
    print(f"\nInput shape: {point_clouds.shape}")
    outputs = model(point_clouds)
    print(f"Output shape: {outputs.shape}")
    
    # Count parameters
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Number of parameters: {num_params:,}")


def demo_dgcnn():
    """Demonstrate DGCNN."""
    print("\n" + "=" * 80)
    print("DGCNN Demo")
    print("=" * 80)
    
    # Create model
    model = DGCNN(num_classes=10, k=20)
    
    # Generate batch
    batch_size = 2
    num_points = 1024
    point_clouds = torch.randn(batch_size, num_points, 3)
    
    # Forward pass
    print(f"\nInput shape: {point_clouds.shape}")
    outputs = model(point_clouds)
    print(f"Output shape: {outputs.shape}")
    
    # Test dynamic graph
    print(f"\nDynamic k-NN graph construction:")
    features = torch.randn(batch_size, num_points, 64)
    idx = knn(features, k=20)
    print(f"  Neighbor indices shape: {idx.shape}")
    
    # Count parameters
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Number of parameters: {num_params:,}")


def demo_chamfer_distance():
    """Demonstrate Chamfer Distance."""
    print("\n" + "=" * 80)
    print("Chamfer Distance Demo")
    print("=" * 80)
    
    # Generate two point clouds
    pc1 = generate_synthetic_point_cloud(1024, 'sphere').unsqueeze(0)
    pc2 = generate_synthetic_point_cloud(1024, 'sphere').unsqueeze(0)
    pc2 = pc2 * 0.9 + 0.1  # Slightly different
    
    # Compute Chamfer distance
    cd, cd1, cd2 = chamfer_distance(pc1, pc2)
    
    print(f"\nPoint cloud 1 shape: {pc1.shape}")
    print(f"Point cloud 2 shape: {pc2.shape}")
    print(f"\nChamfer Distance: {cd.item():.6f}")
    print(f"  PC1 -> PC2: {cd1.item():.6f}")
    print(f"  PC2 -> PC1: {cd2.item():.6f}")
    
    # Test with identical point clouds
    cd_self, _, _ = chamfer_distance(pc1, pc1)
    print(f"\nSelf Chamfer Distance (should be ~0): {cd_self.item():.8f}")


def print_performance_comparison():
    """Print performance comparison table."""
    print("\n" + "=" * 80)
    print("POINT CLOUD NETWORKS COMPARISON")
    print("=" * 80)
    
    comparison = """
    ModelNet40 Classification Performance:
    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚ Method           β”‚ Accuracy (%) β”‚ Parameters   β”‚ Inference    β”‚
    β”‚                  β”‚              β”‚ (M)          β”‚ Time (ms)    β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ PointNet         β”‚ 89.2         β”‚ 3.5          β”‚ 5.2          β”‚
    β”‚ PointNet++       β”‚ 91.9         β”‚ 1.5          β”‚ 18.7         β”‚
    β”‚ DGCNN            β”‚ 92.9         β”‚ 1.8          β”‚ 12.3         β”‚
    β”‚ PointConv        β”‚ 92.5         β”‚ 2.1          β”‚ 25.4         β”‚
    β”‚ Point Trans.     β”‚ 93.7         β”‚ 7.8          β”‚ 32.1         β”‚
    β”‚ Point-BERT       β”‚ 94.6         β”‚ 22.0         β”‚ 45.3         β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
    
    ShapeNet Part Segmentation (mIoU):
    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚ Method           β”‚ Class mIoU   β”‚ Instance mIoUβ”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ PointNet         β”‚ 80.4         β”‚ 83.7         β”‚
    β”‚ PointNet++       β”‚ 81.9         β”‚ 85.1         β”‚
    β”‚ DGCNN            β”‚ 82.3         β”‚ 85.2         β”‚
    β”‚ Point Trans.     β”‚ 83.7         β”‚ 86.6         β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
    
    KITTI 3D Object Detection (Car, AP@0.7):
    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚ Method           β”‚ Easy         β”‚ Moderate     β”‚ Hard         β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ VoxelNet         β”‚ 77.5         β”‚ 65.1         β”‚ 57.7         β”‚
    β”‚ PointPillars     β”‚ 82.6         β”‚ 74.3         β”‚ 68.0         β”‚
    β”‚ PointRCNN        β”‚ 88.9         β”‚ 78.6         β”‚ 77.4         β”‚
    β”‚ PV-RCNN          β”‚ 90.3         β”‚ 81.4         β”‚ 76.8         β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
    
    Architectural Characteristics:
    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚ Method           β”‚ Key Features                                 β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ PointNet         β”‚ βœ“ Permutation invariant (max pooling)       β”‚
    β”‚                  β”‚ βœ“ T-Net (input/feature transforms)          β”‚
    β”‚                  β”‚ βœ— No local context                          β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ PointNet++       β”‚ βœ“ Hierarchical feature learning             β”‚
    β”‚                  β”‚ βœ“ FPS + Ball query (local context)          β”‚
    β”‚                  β”‚ βœ“ Multi-scale grouping                      β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ DGCNN            β”‚ βœ“ Dynamic graph (k-NN in feature space)     β”‚
    β”‚                  β”‚ βœ“ EdgeConv (edge features)                  β”‚
    β”‚                  β”‚ βœ“ Feature concatenation                     β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ Point Trans.     β”‚ βœ“ Vector self-attention                     β”‚
    β”‚                  β”‚ βœ“ Position encoding (Ξ΄ = MLP(p_i - p_j))    β”‚
    β”‚                  β”‚ βœ“ Long-range dependencies                   β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
    
    Decision Guide:
    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚ Scenario                         β”‚ Recommended Method          β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ Fast inference, simple shapes    β”‚ PointNet                    β”‚
    β”‚ Complex geometry, local patterns β”‚ PointNet++                  β”‚
    β”‚ Semantic relationships           β”‚ DGCNN                       β”‚
    β”‚ Long-range dependencies          β”‚ Point Transformer           β”‚
    β”‚ 3D detection (autonomous)        β”‚ PointPillars, PV-RCNN       β”‚
    β”‚ Large-scale scenes               β”‚ Voxel-based (VoxelNet)      β”‚
    β”‚ Rotation invariance required     β”‚ VN-PointNet, PointConv      β”‚
    β”‚ Pre-training available           β”‚ Point-BERT, Point-MAE       β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
    """
    
    print(comparison)
    
    print("\nKey Insights:")
    print("1. PointNet: Simple, fast, but lacks local structure")
    print("2. PointNet++: Hierarchical abstraction, multi-scale features")
    print("3. DGCNN: Dynamic graphs adapt to feature space, not just 3D coords")
    print("4. Point Transformer: Attention mechanism for long-range context")
    print("5. Detection: Voxel/pillar-based faster, point-based more accurate")
    print("6. FPS + Ball query: Core building block for local feature extraction")
    print("7. Chamfer Distance: Standard metric for point cloud generation/completion")
    print("8. Rotation invariance: Critical for real-world robustness")
    print("9. Scalability: Hierarchical downsampling essential for large scenes")
    print("10. Trade-off: Accuracy vs. speed (Point Transformer vs. PointPillars)")


# ============================================================================
# Run Demonstrations
# ============================================================================

if __name__ == "__main__":
    demo_pointnet()
    demo_pointnet_plusplus()
    demo_dgcnn()
    demo_chamfer_distance()
    print_performance_comparison()
    
    print("\n" + "=" * 80)
    print("Point Cloud Networks Implementations Complete!")
    print("=" * 80)