# Install fairness libraries (run once)
# !pip install fairlearn aif360
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from fairlearn.metrics import (
demographic_parity_difference,
demographic_parity_ratio,
equalized_odds_difference,
MetricFrame
)
np.random.seed(42)
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
print("โ
Setup complete")
Part 1: Understanding Bias in AIยถ
Types of Biasยถ
1. Historical Biasยถ
What: Bias exists in the world and is reflected in data
Example: Historical hiring data shows men hired more often for tech roles
Result: Model learns to prefer male candidates
2. Representation Biasยถ
What: Training data doesnโt represent reality
Example: Face recognition trained mostly on light-skinned faces
Result: Poor performance on dark-skinned faces
3. Measurement Biasยถ
What: Features measured differently for different groups
Example: Credit scores calculated differently by region
Result: Unfair predictions across groups
4. Aggregation Biasยถ
What: One model for all groups
Example: Diabetes risk model using same threshold for all ages
Result: Misses disease in elderly, over-diagnoses in young
5. Evaluation Biasยถ
What: Testing on non-representative data
Example: Speech recognition tested only on native speakers
Result: Poor performance on accents
6. Deployment Biasยถ
What: Model used in inappropriate context
Example: Recidivism model trained on one state, used in another
Result: Unfair predictions in new context
Protected Attributesยถ
Protected attributes are characteristics that anti-discrimination law shields from being used as a basis for adverse decisions. In machine learning, even when these features are explicitly excluded from the modelโs input, correlated proxy variables (such as zip code correlating with race, or name correlating with gender) can reintroduce the same biases. This is why simply removing the sensitive column is insufficient โ you must audit the modelโs outputs across protected groups regardless.
Common protected attributes:
Race/Ethnicity
Gender/Sex
Age
Religion
Disability
Sexual Orientation
National Origin
Legal context: Regulations like the U.S. Equal Credit Opportunity Act, Title VII of the Civil Rights Act, and the EU AI Act impose specific obligations on automated decision systems. Violations can result in lawsuits, fines, and reputational damage โ making fairness auditing a business necessity, not just an ethical ideal.
# Create synthetic loan approval dataset with bias
np.random.seed(42)
n_samples = 1000
# Generate data
data = pd.DataFrame({
'income': np.random.normal(50000, 20000, n_samples),
'credit_score': np.random.normal(650, 100, n_samples),
'age': np.random.randint(18, 70, n_samples),
'gender': np.random.choice(['Male', 'Female'], n_samples)
})
# Clip values
data['income'] = data['income'].clip(20000, 150000)
data['credit_score'] = data['credit_score'].clip(300, 850)
# Create biased approval (favors males)
# Legitimate factors
approval_score = (
(data['income'] - 20000) / 130000 * 0.4 +
(data['credit_score'] - 300) / 550 * 0.4
)
# Add gender bias (males get 20% boost)
approval_score += np.where(data['gender'] == 'Male', 0.2, 0)
# Random noise
approval_score += np.random.normal(0, 0.1, n_samples)
# Convert to binary
data['approved'] = (approval_score > 0.5).astype(int)
print("Loan Approval Dataset (with bias):")
print("=" * 60)
print(data.head(10))
# Check approval rates by gender
print("\nApproval Rates by Gender:")
approval_by_gender = data.groupby('gender')['approved'].agg(['sum', 'count', 'mean'])
approval_by_gender.columns = ['Approved', 'Total', 'Approval Rate']
print(approval_by_gender)
print("\nโ ๏ธ Notice: Males have higher approval rate!")
Part 2: Fairness Metricsยถ
1. Demographic Parity (Statistical Parity)ยถ
Definition: Positive outcome rate should be equal across groups
Formula: P(ลถ=1 | Gender=Male) = P(ลถ=1 | Gender=Female)
When satisfied: Both groups have same approval/acceptance rate
Criticism: Ignores differences in qualification
2. Equalized Oddsยถ
Definition: True positive and false positive rates equal across groups
Formula:
P(ลถ=1 | Y=1, Gender=Male) = P(ลถ=1 | Y=1, Gender=Female)(TPR)P(ลถ=1 | Y=0, Gender=Male) = P(ลถ=1 | Y=0, Gender=Female)(FPR)
When satisfied: Model performs equally well across groups
3. Equal Opportunityยถ
Definition: True positive rate equal across groups (relaxed equalized odds)
Formula: P(ลถ=1 | Y=1, Gender=Male) = P(ลถ=1 | Y=1, Gender=Female)
When satisfied: Qualified people have equal chance regardless of group
# Train a biased model
X = data[['income', 'credit_score', 'age']]
y = data['approved']
gender = data['gender']
X_train, X_test, y_train, y_test, gender_train, gender_test = train_test_split(
X, y, gender, test_size=0.3, random_state=42
)
# Train model (without using gender as feature)
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Calculate overall accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Overall Accuracy: {accuracy:.3f}")
# Calculate fairness metrics
dp_diff = demographic_parity_difference(
y_test, y_pred, sensitive_features=gender_test
)
dp_ratio = demographic_parity_ratio(
y_test, y_pred, sensitive_features=gender_test
)
eo_diff = equalized_odds_difference(
y_test, y_pred, sensitive_features=gender_test
)
print("\nFairness Metrics:")
print("=" * 60)
print(f"Demographic Parity Difference: {dp_diff:.3f}")
print(f" Interpretation: {abs(dp_diff)*100:.1f}% difference in approval rates")
print(f" โ
Fair if close to 0.0")
print(f"\nDemographic Parity Ratio: {dp_ratio:.3f}")
print(f" Interpretation: Ratio of approval rates")
print(f" โ
Fair if close to 1.0")
print(f"\nEqualized Odds Difference: {eo_diff:.3f}")
print(f" Interpretation: Max difference in TPR/FPR")
print(f" โ
Fair if close to 0.0")
# Fairness threshold (common: 0.8-1.25 for ratio)
if 0.8 <= dp_ratio <= 1.25:
print("\nโ
Model passes demographic parity test (80% rule)")
else:
print("\nโ Model FAILS demographic parity test (80% rule)")
Performance by Groupยถ
Aggregate fairness metrics like Demographic Parity Difference give you a single number, but they can mask important details. Breaking down standard classification metrics โ accuracy, precision, recall, and F1 โ by each protected group reveals where the model is failing. A model might have equal selection rates (passing demographic parity) yet still have vastly different false positive rates across groups, meaning different groups experience different types of errors. The MetricFrame class from Fairlearn makes this disaggregated analysis straightforward, and the confusion matrices per group show exactly how prediction errors are distributed.
# Detailed breakdown by group
from sklearn.metrics import precision_score, recall_score, f1_score
metrics = {
'accuracy': accuracy_score,
'precision': precision_score,
'recall': recall_score,
'f1': f1_score
}
metric_frame = MetricFrame(
metrics=metrics,
y_true=y_test,
y_pred=y_pred,
sensitive_features=gender_test
)
print("Performance by Gender:")
print("=" * 60)
print(metric_frame.by_group)
# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Plot 1: Metrics by group
metric_frame.by_group.plot(kind='bar', ax=axes[0])
axes[0].set_title('Model Performance by Gender', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Score', fontsize=12)
axes[0].set_xlabel('Gender', fontsize=12)
axes[0].legend(title='Metric', bbox_to_anchor=(1.05, 1), loc='upper left')
axes[0].grid(alpha=0.3, axis='y')
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=0)
# Plot 2: Confusion matrices
for i, gender_val in enumerate(['Female', 'Male']):
mask = gender_test == gender_val
cm = confusion_matrix(y_test[mask], y_pred[mask])
plt.subplot(2, 2, i+3)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title(f'{gender_val} - Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.tight_layout()
plt.show()
Part 3: Detecting Biasยถ
Data Bias Detectionยถ
Before touching the model, the first step is auditing the training data itself. Statistical tests like Chi-squared independence tests reveal whether the sensitive attribute and the target label are associated in the dataset โ which would mean any model trained on this data will likely inherit that association. Beyond statistical tests, examining feature distributions by group, checking for representation imbalance, and understanding the data collection process are all essential steps. Bias introduced at the data stage propagates through every downstream decision.
def analyze_dataset_bias(df, target_col, sensitive_col):
"""Analyze bias in dataset"""
print(f"Dataset Bias Analysis: {target_col} by {sensitive_col}")
print("=" * 70)
# 1. Class distribution by group
print("\n1. Class Distribution:")
dist = pd.crosstab(
df[sensitive_col],
df[target_col],
normalize='index'
) * 100
print(dist.round(2))
# 2. Group representation
print("\n2. Group Representation:")
group_counts = df[sensitive_col].value_counts()
group_pct = (group_counts / len(df) * 100).round(2)
representation = pd.DataFrame({
'Count': group_counts,
'Percentage': group_pct
})
print(representation)
# 3. Feature statistics by group
print("\n3. Feature Statistics by Group:")
numeric_cols = df.select_dtypes(include=[np.number]).columns
stats = df.groupby(sensitive_col)[numeric_cols].mean()
print(stats.round(2))
# 4. Statistical tests
from scipy.stats import chi2_contingency
contingency = pd.crosstab(df[sensitive_col], df[target_col])
chi2, p_value, dof, expected = chi2_contingency(contingency)
print(f"\n4. Statistical Independence Test:")
print(f" Chi-square statistic: {chi2:.3f}")
print(f" P-value: {p_value:.4f}")
if p_value < 0.05:
print(f" โ ๏ธ SIGNIFICANT relationship between {sensitive_col} and {target_col}!")
print(f" Data shows potential bias.")
else:
print(f" โ
No significant relationship detected")
analyze_dataset_bias(data, 'approved', 'gender')
Model Bias Detectionยถ
Once the model is trained, we need to measure bias in its predictions independently of data bias. Even a model trained on perfectly balanced data can develop biased decision boundaries. Model bias detection involves computing group-specific metrics โ selection rate, true positive rate (TPR), false positive rate (FPR), and precision โ and checking whether the ratios between groups fall within acceptable bounds. The commonly used โ80% ruleโ (or four-fifths rule from U.S. employment law) states that the selection rate for any protected group should be at least 80% of the rate for the highest-performing group. The function below provides a comprehensive bias detection report with both ratio and difference tests.
def detect_model_bias(y_true, y_pred, sensitive_features, feature_name='Group'):
"""Comprehensive model bias detection"""
results = []
for group in np.unique(sensitive_features):
mask = sensitive_features == group
# Calculate metrics
cm = confusion_matrix(y_true[mask], y_pred[mask])
tn, fp, fn, tp = cm.ravel()
# Rates
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0 # Recall
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
selection_rate = (tp + fp) / len(y_true[mask]) if len(y_true[mask]) > 0 else 0
results.append({
feature_name: group,
'Selection Rate': selection_rate,
'TPR (Recall)': tpr,
'FPR': fpr,
'Precision': precision,
'Accuracy': accuracy_score(y_true[mask], y_pred[mask])
})
df_results = pd.DataFrame(results)
print("Model Bias Detection Report:")
print("=" * 80)
print(df_results.round(3).to_string(index=False))
# Check disparities
print("\nDisparity Analysis:")
print("-" * 80)
for col in ['Selection Rate', 'TPR (Recall)', 'FPR', 'Precision', 'Accuracy']:
max_val = df_results[col].max()
min_val = df_results[col].min()
ratio = min_val / max_val if max_val > 0 else 0
diff = max_val - min_val
print(f"{col}:")
print(f" Range: {min_val:.3f} - {max_val:.3f}")
print(f" Ratio: {ratio:.3f} {'โ
' if ratio >= 0.8 else 'โ'}")
print(f" Difference: {diff:.3f} {'โ
' if diff <= 0.1 else 'โ'}")
return df_results
bias_report = detect_model_bias(y_test, y_pred, gender_test, 'Gender')
Part 4: Mitigation Strategiesยถ
1. Pre-Processing: Fix the Dataยถ
Techniques:
Reweighting: Assign different weights to samples
Resampling: Oversample underrepresented groups
Data Augmentation: Generate synthetic fair data
# Reweighting example
from sklearn.utils.class_weight import compute_sample_weight
# Compute sample weights to balance by gender and class
sample_weights = compute_sample_weight(
class_weight='balanced',
y=y_train
)
# Train model with sample weights
model_weighted = LogisticRegression(random_state=42)
model_weighted.fit(X_train, y_train, sample_weight=sample_weights)
y_pred_weighted = model_weighted.predict(X_test)
print("Comparison: Original vs Weighted Model")
print("=" * 70)
print("\nOriginal Model:")
detect_model_bias(y_test, y_pred, gender_test, 'Gender')
print("\n" + "=" * 70)
print("\nWeighted Model:")
detect_model_bias(y_test, y_pred_weighted, gender_test, 'Gender')
2. In-Processing: Constrain the Modelยถ
In-processing methods modify the training algorithm itself to enforce fairness constraints while optimizing for accuracy. Fairlearnโs ExponentiatedGradient reformulates fair classification as a sequence of cost-sensitive classification problems, iteratively reweighting samples to find a model that satisfies the specified fairness constraint (e.g., Demographic Parity) while minimizing the accuracy loss.
Techniques:
Exponentiated Gradient โ Reduces constrained optimization to a sequence of weighted classification problems
Adversarial debiasing โ Trains a classifier alongside an adversary that tries to predict the sensitive attribute from the classifierโs output; the main model learns to make predictions that the adversary cannot distinguish by group
Fairness-aware algorithms โ Custom loss functions that include fairness penalty terms, such as \(\mathcal{L} = \mathcal{L}_{\text{task}} + \lambda \cdot \mathcal{L}_{\text{fairness}}\)
# Fairlearn's ExponentiatedGradient for demographic parity
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
# Create fairness-constrained model
mitigator = ExponentiatedGradient(
LogisticRegression(random_state=42),
constraints=DemographicParity()
)
mitigator.fit(X_train, y_train, sensitive_features=gender_train)
y_pred_fair = mitigator.predict(X_test)
print("Comparison: Original vs Fair Model")
print("=" * 70)
# Compare fairness metrics
models = [
('Original', y_pred),
('Fair (Demographic Parity)', y_pred_fair)
]
comparison = []
for name, preds in models:
acc = accuracy_score(y_test, preds)
dp_diff = demographic_parity_difference(y_test, preds, sensitive_features=gender_test)
dp_ratio = demographic_parity_ratio(y_test, preds, sensitive_features=gender_test)
comparison.append({
'Model': name,
'Accuracy': acc,
'DP Difference': dp_diff,
'DP Ratio': dp_ratio
})
df_comparison = pd.DataFrame(comparison)
print(df_comparison.round(3).to_string(index=False))
print("\n๐ก Notice: Fair model may have lower accuracy but better fairness!")
3. Post-Processing: Adjust Predictionsยถ
Post-processing methods leave the trained model untouched and instead adjust its predictions to satisfy fairness criteria. This is especially useful when you cannot retrain the model (e.g., a vendor-provided black-box model) or when you want to separate the accuracy optimization from fairness enforcement. Fairlearnโs ThresholdOptimizer finds group-specific classification thresholds that achieve the best accuracy subject to equalized odds or demographic parity constraints.
Techniques:
Threshold optimization โ Different classification thresholds per group, calibrated to equalize TPR/FPR
Calibration โ Ensure predicted probabilities are equally well-calibrated across groups
Reject option classification โ Defer uncertain predictions near the decision boundary to human reviewers
# Threshold Optimizer for equalized odds
from fairlearn.postprocessing import ThresholdOptimizer
# Get probability predictions
y_pred_proba = model.predict_proba(X_test)[:, 1]
# Create threshold optimizer
threshold_optimizer = ThresholdOptimizer(
estimator=model,
constraints='equalized_odds',
objective='accuracy_score',
predict_method='predict_proba'
)
threshold_optimizer.fit(X_train, y_train, sensitive_features=gender_train)
y_pred_thresh = threshold_optimizer.predict(X_test, sensitive_features=gender_test)
print("Post-Processing Results:")
print("=" * 70)
models = [
('Original', y_pred),
('Threshold Optimized', y_pred_thresh)
]
for name, preds in models:
print(f"\n{name}:")
acc = accuracy_score(y_test, preds)
eo_diff = equalized_odds_difference(y_test, preds, sensitive_features=gender_test)
print(f" Accuracy: {acc:.3f}")
print(f" Equalized Odds Difference: {eo_diff:.3f}")
Part 5: Real-World Case Studiesยถ
Case Study 1: COMPAS Recidivismยถ
Background: Algorithm used to predict criminal reoffending
Findings (ProPublica 2016):
Black defendants: 45% false positive rate
White defendants: 23% false positive rate
Nearly 2x more likely to wrongly label Black defendants as high risk
Impact: Influenced bail and sentencing decisions
Lesson: Even without race as a feature, proxies (zip code, etc.) can cause bias
Case Study 2: Amazon Hiring Toolยถ
Background: ML tool to screen resumes
Problem: Trained on 10 years of historical hiring data (mostly male candidates)
Result:
Penalized resumes with โwomenโsโ in them (e.g., โwomenโs chess clubโ)
Downranked graduates from all-womenโs colleges
Action: Amazon scrapped the tool
Lesson: Historical bias gets learned by models
Case Study 3: Face Recognitionยถ
Study (MIT, 2018):
Error rates by demographic:
Light-skinned males: 0.8%
Dark-skinned females: 34.7%
Cause: Training data heavily skewed toward light-skinned faces
Impact: Misidentification in security, law enforcement
Lesson: Representation matters!
# Simulate COMPAS-like scenario
def simulate_compas():
"""Simulate biased recidivism prediction"""
np.random.seed(42)
n = 1000
# Create synthetic data
race = np.random.choice(['Black', 'White'], n, p=[0.4, 0.6])
# True recidivism (similar rates)
true_recidivism = np.random.choice([0, 1], n, p=[0.6, 0.4])
# Biased predictions (higher false positive for Black defendants)
predictions = true_recidivism.copy()
# Increase false positives for Black defendants
black_mask = (race == 'Black') & (true_recidivism == 0)
flip_indices = np.where(black_mask)[0]
flip_count = int(len(flip_indices) * 0.3) # 30% false positive rate
predictions[np.random.choice(flip_indices, flip_count, replace=False)] = 1
# Calculate false positive rates
print("COMPAS Simulation - False Positive Rates:")
print("=" * 60)
for race_val in ['Black', 'White']:
mask = race == race_val
actual_neg = (true_recidivism[mask] == 0)
pred_pos = (predictions[mask] == 1)
fp = np.sum(actual_neg & pred_pos)
tn = np.sum(actual_neg & ~pred_pos)
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
print(f"{race_val}:")
print(f" False Positives: {fp}")
print(f" True Negatives: {tn}")
print(f" False Positive Rate: {fpr:.3f} ({fpr*100:.1f}%)")
print("\nโ ๏ธ This demonstrates how bias manifests in predictions!")
simulate_compas()
Part 6: Fairness Toolkitsยถ
Popular Toolsยถ
Fairlearn (Microsoft)
Mitigation algorithms
Dashboard for visualization
Good for tabular data
AIF360 (IBM)
70+ fairness metrics
10+ mitigation algorithms
Research-oriented
What-If Tool (Google)
Visual interface
Interactive exploration
TensorFlow integration
Fairness Indicators (TensorFlow)
Model evaluation
TensorBoard integration
Production monitoring
๐ฏ Knowledge Checkยถ
Q1: Whatโs the difference between demographic parity and equalized odds?
Q2: Can removing sensitive features (like race) eliminate bias?
Q3: What are the three stages for bias mitigation?
Q4: Why might improving fairness reduce accuracy?
Click for answers
A1: Demographic parity: equal outcome rates. Equalized odds: equal TPR and FPR across groups
A2: No! Proxy features (zip code, name, etc.) can still encode sensitive info
A3: Pre-processing (data), in-processing (model), post-processing (predictions)
A4: Trade-off: model optimized for overall accuracy may perform unequally across groups
๐ Summaryยถ
Fairness Checklistยถ
Before Training:
Analyze dataset for bias
Check representation of protected groups
Identify potential proxy features
Define fairness criteria for your use case
During Training:
Track metrics by group
Consider fairness constraints
Use appropriate mitigation if needed
After Training:
Calculate fairness metrics
Test on diverse data
Document limitations
Monitor in production
Remember: Fairness is context-dependent. Always consider:
Legal requirements
Stakeholder needs
Potential harms
Trade-offs
๐ Next Stepsยถ
Complete Bias & Fairness Challenge
Read Notebook 5: Model Comparison & Selection
Audit your own models for bias
Explore Fairlearn and AIF360 documentation
Important work! Fairness is crucial for responsible AI! โ๏ธ