import pandas as pd
import datetime as dt
Apple Health Data β Real-World Data Analysis ProjectΒΆ
This notebook demonstrates a real-world data analysis workflow using Apple Health export data (walking distance, step counts, and flights climbed). It covers loading multiple CSV files, selecting relevant columns, parsing dates with pd.to_datetime(), applying unit conversion functions (kilometers to meters) with .apply(), computing summary statistics like .mean(), and creating interactive bar charts with Plotly.
Why this matters: Health and fitness data is a perfect example of messy, real-world time-series data that requires cleaning, transformation, and visualization before useful insights emerge. This project illustrates the complete data science pipeline from raw export to actionable visualization.
DistanceWalkingRunning = pd.read_csv("DistanceWalkingRunning.csv",parse_dates=True)
StepCount = pd.read_csv("StepCount.csv",parse_dates=True)
FlightsClimbed = pd.read_csv("FlightsClimbed.csv")
DistanceWalkingRunning
FlightsClimbed
DistanceWalkingRunning.head()
walking = DistanceWalkingRunning[["creationDate","startDate","endDate","value"]]
walking.head()
walking["creationDate"]
walking.head()
walking.head()
pd.to_datetime(walking["creationDate"])
walking["date"]
walking.head()
import plotly
from plotly.graph_objs import Scatter, Layout
import plotly.graph_objs as go
stepdata = [go.Bar(x=walking["date"].dt.date, y=walking["metres"])]
plotly.offline.plot({ 'data': stepdata,
'layout': {
'title': 'Walking (metres)',
'xaxis': {
'title': 'Creation Date'},
'yaxis': {
'title': 'Value '}
}})
def km_to_m(km):
return int(km*1000)
walking["metres"] = walking["value"].apply(km_to_m)
walking["metres"].mean()