import pandas as pd
Merge, Join, and Concatenate β Combining DataFramesΒΆ
Real-world datasets are rarely stored in a single table. This notebook demonstrates the three primary ways to combine DataFrames in Pandas using restaurant sales data: vertical stacking with pd.concat() (with ignore_index and keys parameters), appending rows with .append(), SQL-style merging with .merge() (inner, outer, left joins, suffixes, indicator, and multi-key joins), cross-DataFrame lookups with different column names using left_on/right_on, and index-based joining with .join(). Mastering these operations is critical for data engineering workflows that assemble analytical datasets from multiple source tables.
week1 = pd.read_csv("Restaurant - Week 1 Sales.csv")
week2 = pd.read_csv("Restaurant - Week 2 Sales.csv")
customers = pd.read_csv("Restaurant - Customers.csv")
foods = pd.read_csv("Restaurant - Foods.csv")
len(week1)
len(week2)
len(pd.concat([week1,week2]))
pd.concat([week1,week2],ignore_index=True)
pd.concat([week1,week2], keys=["Week1","Week2"])
week1 = pd.read_csv("Restaurant - Week 1 Sales.csv")
week2 = pd.read_csv("Restaurant - Week 2 Sales.csv")
customers = pd.read_csv("Restaurant - Customers.csv")
foods = pd.read_csv("Restaurant - Foods.csv")
week2.append(week1,ignore_index=True).tail()
week1.merge(week2)
week1.merge(week2, how='inner', on='Customer ID',suffixes=[" - week1"," - week2"])
week1.merge(week2, how='inner', on=['Customer ID','Food ID'],suffixes=[" - week1"," - week2"])
week1.merge(week2, how='inner', suffixes=[" - week1"," - week2"])
week1[week1['Customer ID']==21]
week2[week2['Customer ID']==21]
outer_merge = week1.merge(week2, how='outer',on="Customer ID", suffixes=[" - week1"," - week2"], indicator=True)
outer_merge.nunique()
outer_merge["_merge"].value_counts()
foods.head()
week1.merge(foods, how='left', on='Food ID' )
week1.merge(foods, how='left', on='Food ID' , sort=True)
week2.head()
customers.head()
week2.merge(customers, left_on='Customer ID', right_on='ID', how = 'left').drop('ID',axis='columns')
satifaction = pd.read_csv("Restaurant - Week 1 Satisfaction.csv")
satifaction.head()
week1.join(satifaction)