Data Visualization: From Exploratory Plots to Publication-Quality FiguresΒΆ

Matplotlib fundamentals, seaborn statistical plots, and plotly interactive charts β€” with a focus on making visualizations that actually communicate insights.

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import pandas as pd
import numpy as np

# Style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Datasets
tips = sns.load_dataset('tips')
flights = sns.load_dataset('flights')
iris = sns.load_dataset('iris')

print('Datasets loaded: tips, flights, iris')

1. Matplotlib Subplots β€” Full ControlΒΆ

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Tips Dataset: Four Perspectives', fontsize=16, fontweight='bold')

# 1. Total bill distribution
axes[0, 0].hist(tips['total_bill'], bins=25, color='steelblue', edgecolor='white', alpha=0.8)
axes[0, 0].axvline(tips['total_bill'].mean(), color='red', linestyle='--', label=f'Mean: ${tips["total_bill"].mean():.2f}')
axes[0, 0].axvline(tips['total_bill'].median(), color='orange', linestyle='--', label=f'Median: ${tips["total_bill"].median():.2f}')
axes[0, 0].set_title('Total Bill Distribution')
axes[0, 0].set_xlabel('Total Bill ($)')
axes[0, 0].legend()

# 2. Tip % by day
tips['tip_pct'] = tips['tip'] / tips['total_bill'] * 100
day_order = ['Thur', 'Fri', 'Sat', 'Sun']
bp = axes[0, 1].boxplot(
    [tips[tips['day'] == d]['tip_pct'].values for d in day_order],
    labels=day_order,
    patch_artist=True,
    medianprops={'color': 'red', 'linewidth': 2}
)
colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
axes[0, 1].set_title('Tip % Distribution by Day')
axes[0, 1].set_ylabel('Tip %')

# 3. Scatter: bill vs tip, colored by sex
for sex, color, marker in [('Male', 'steelblue', 'o'), ('Female', 'salmon', 's')]:
    subset = tips[tips['sex'] == sex]
    axes[1, 0].scatter(subset['total_bill'], subset['tip'],
                       c=color, marker=marker, alpha=0.6, s=50, label=sex)
m, b = np.polyfit(tips['total_bill'], tips['tip'], 1)
x_line = np.linspace(tips['total_bill'].min(), tips['total_bill'].max(), 100)
axes[1, 0].plot(x_line, m * x_line + b, 'k--', alpha=0.5, label=f'Trend (rΒ²={np.corrcoef(tips["total_bill"], tips["tip"])[0,1]**2:.2f})')
axes[1, 0].set_title('Total Bill vs Tip')
axes[1, 0].set_xlabel('Total Bill ($)')
axes[1, 0].set_ylabel('Tip ($)')
axes[1, 0].legend()

# 4. Stacked bar: smoker Γ— time
counts = tips.groupby(['time', 'smoker']).size().unstack(fill_value=0)
counts.plot(kind='bar', stacked=True, ax=axes[1, 1],
            color=['#e74c3c', '#3498db'], alpha=0.8, edgecolor='white')
axes[1, 1].set_title('Diners by Meal Time & Smoking')
axes[1, 1].set_xlabel('Time')
axes[1, 1].tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.show()

2. Seaborn β€” Statistical VisualizationsΒΆ

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Violin plot β€” richer than boxplot
sns.violinplot(data=tips, x='day', y='tip_pct', hue='sex',
               split=True, inner='quart', ax=axes[0],
               order=['Thur', 'Fri', 'Sat', 'Sun'])
axes[0].set_title('Tip % by Day & Sex (Violin)')
axes[0].set_ylabel('Tip %')

# Heatmap β€” flights data (months Γ— year)
flights_pivot = flights.pivot(index='month', columns='year', values='passengers')
sns.heatmap(flights_pivot, annot=True, fmt='d', cmap='YlOrRd',
            ax=axes[1], linewidths=0.5, cbar_kws={'label': 'Passengers'})
axes[1].set_title('Monthly Airline Passengers (1949-1960)')

# Pair plot subset (use FacetGrid for custom pair plots inline)
sns.scatterplot(data=iris, x='sepal_length', y='petal_length',
                hue='species', style='species', s=80, ax=axes[2])
axes[2].set_title('Iris: Sepal vs Petal Length')

plt.tight_layout()
plt.show()
# FacetGrid β€” small multiples
g = sns.FacetGrid(tips, col='time', row='sex', height=4, aspect=1.2)
g.map_dataframe(sns.histplot, x='total_bill', bins=20, kde=True)
g.set_axis_labels('Total Bill ($)', 'Count')
g.set_titles(col_template='{col_name}', row_template='{row_name}')
g.add_legend()
plt.suptitle('Bill Distribution by Meal Γ— Sex (Small Multiples)', y=1.02, fontsize=13)
plt.tight_layout()
plt.show()

3. Publication-Quality FiguresΒΆ

# Techniques for making figures presentable

COLORS = {'primary': '#2c3e50', 'accent': '#e74c3c', 'highlight': '#f39c12', 'muted': '#95a5a6'}

fig, ax = plt.subplots(figsize=(10, 6))

# Monthly trend
monthly = flights.groupby('year')['passengers'].sum().reset_index()
ax.fill_between(monthly['year'], monthly['passengers'],
                alpha=0.15, color=COLORS['primary'])
ax.plot(monthly['year'], monthly['passengers'],
        color=COLORS['primary'], linewidth=2.5, marker='o', markersize=6)

# Highlight peak year
peak = monthly.loc[monthly['passengers'].idxmax()]
ax.scatter(peak['year'], peak['passengers'],
           color=COLORS['accent'], s=150, zorder=5)
ax.annotate(f'Peak: {peak["passengers"]:,}',
            xy=(peak['year'], peak['passengers']),
            xytext=(peak['year'] - 2, peak['passengers'] - 5000),
            fontsize=11, color=COLORS['accent'],
            arrowprops={'arrowstyle': '->', 'color': COLORS['accent']})

# Polish
ax.set_title('Annual Airline Passengers: 1949–1960', fontsize=15, fontweight='bold', pad=15)
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Total Passengers', fontsize=12)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x/1000:.0f}K'))
ax.spines[['top', 'right']].set_visible(False)  # Remove top/right borders
ax.grid(axis='y', alpha=0.4)

plt.tight_layout()
plt.show()

4. Plotly β€” Interactive ChartsΒΆ

# !pip install plotly
try:
    import plotly.express as px
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    
    # Interactive scatter
    fig = px.scatter(
        tips, x='total_bill', y='tip',
        color='sex', size='tip_pct', symbol='time',
        hover_data=['day', 'size'],
        title='Tips: Interactive Explorer (hover for details)',
        labels={'total_bill': 'Total Bill ($)', 'tip': 'Tip ($)'}
    )
    fig.show()
    
    # Interactive heatmap
    fig2 = px.imshow(
        flights_pivot,
        title='Airline Passengers Heatmap (interactive)',
        color_continuous_scale='YlOrRd',
        aspect='auto'
    )
    fig2.show()

except ImportError:
    print('Install plotly: pip install plotly')
    print('Example code shown above β€” run after installing.')

Visualization Cheat SheetΒΆ

Data type / Goal                  β†’ Best chart
──────────────────────────────────────────────
Single continuous variable        β†’ histogram + KDE
Distribution comparison           β†’ violin plot or overlaid KDE
Category comparison               β†’ bar chart with error bars
Trend over time                   β†’ line chart with shaded area
Two continuous variables          β†’ scatter with regression line
Many variables pairwise           β†’ pairplot (seaborn)
Matrix of values                  β†’ heatmap
Part of whole                     β†’ stacked bar (not pie!)
Multiple group distributions      β†’ FacetGrid small multiples
Interactive exploration           β†’ plotly express

ExercisesΒΆ

  1. Recreate the matplotlib multi-panel figure using seaborn API only.

  2. Load sns.load_dataset('penguins') and create a pairplot colored by species.

  3. Build a Plotly animated scatter plot showing flight passengers growing over time using animation_frame='year'.

  4. Create a dashboard-style 3Γ—2 figure for the tips dataset with a consistent color scheme.

  5. Use matplotlib.gridspec.GridSpec to create an asymmetric layout (large scatter + two small histograms as marginals).