import pandas as pd
MultiIndex β Hierarchical Indexing, Pivoting, and ReshapingΒΆ
A MultiIndex (hierarchical index) allows you to represent higher-dimensional data in a two-dimensional DataFrame. This notebook covers creating multi-level indices from columns, accessing data with .loc[] using tuples, level manipulation with .swaplevel() and .get_level_values(), reshaping between wide and long formats with .stack(), .unstack(), .pivot(), .pivot_table(), and .melt(). These reshaping operations are essential for transforming data between the format it is stored in and the format required for analysis or visualization β a task that arises constantly in real-world data science projects.
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"])
bigmac.head()
bigmac.dtypes
bigmac.info()
bigmac["Country"].nunique()
bigmac["Country"] = bigmac["Country"].astype("category")
bigmac.set_index(keys = ["Date","Country"], inplace=True)
bigmac.sort_index(inplace=True)
bigmac.index.names
type(bigmac.index)
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col=["Date","Country"])
bigmac.sort_index(inplace=True)
bigmac.head()
bigmac.index.get_level_values(0)
bigmac.index.set_names(["Date","Country"], inplace=True)
bigmac.head()
bigmac.sort_index(ascending=[True,False])
bigmac.loc[("2010-01-01","Norway"),"Price in US Dollars"]
bigmac.transpose()
bigmac.head()
bigmac.ix["Price in US Dollars"]
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col=["Date","Country"])
bigmac.sort_index(inplace=True)
bigmac.head()
bigmac.swaplevel()
world = pd.read_csv("worldstats.csv", index_col=["country","year"])
world.head()
world.stack()
world.info()
world.head()
type(world.stack().to_frame())
s = world.stack()
s.unstack()
s.unstack().unstack().unstack()
s.unstack(2)
s.unstack("year")
s.unstack(level = ["country","year"])
t = s.unstack(level = ["year"], fill_value = 0)
t.head()
sales = pd.read_csv("salesmen.csv", parse_dates = ["Date"])
sales["Salesman"] = sales["Salesman"].astype("category")
sales.head()
sales.pivot(index="Date", columns="Salesman", values="Revenue")
foods = pd.read_csv("foods.csv")
foods.head()
foods.pivot_table(values="Spend", index=["Gender","Item"], columns="City", aggfunc="sum")
sales = pd.read_csv("quarters.csv")
sales.head()
sales.melt(id_vars="Salesman",var_name="Quater", value_name="Revenue")