import pandas as pd

DataFrame Fundamentals – Inspection, Selection, and CleaningΒΆ

This notebook covers the essential operations you need to work with Pandas DataFrames: inspecting structure with .index, .values, .shape, .dtypes, .columns, .axes, and .info(); selecting single and multiple columns; adding new columns with direct assignment and .insert(); broadcasting arithmetic across columns; frequency analysis with .value_counts(); handling missing data with .dropna() and .fillna(); type conversion with .astype(); sorting with .sort_values() and .sort_index(); and ranking with .rank(). These operations form the core toolkit for data exploration and preparation before any modeling work.

nba = pd.read_csv("nba.csv")
nba
nba.index
nba.values
nba.shape
nba.dtypes
nba.columns
nba.axes
nba.info()
nba.get_dtype_counts()
rev = pd.read_csv("revenue.csv", index_col= "Date")
rev.sum()
rev.sum(axis = 1 )
nba = pd.read_csv("nba.csv")
nba.Name

output = None
nba["Name"]
type(nba["Name"])
type(nba[["Name","Team"]])
nba["Sport"] = "BasketBall"
nba.head()

nba["League"] = "National Basketball Association"
nba.head()
nba.insert()
nba
nba = pd.read_csv("nba.csv")

nba
nba.insert(3, column = "Sport", value = "Basketball")
nba.head()
nba.Age.add(5)
nba["Team"].value_counts()
nba.tail()
nba.dropna().tail()
nba.dropna(how = "all")
import pandas as pd
nba = pd.read_csv("nba.csv").dropna(how = "all")

nba["Salary"].fillna(0, inplace = True)
nba["College"].fillna("None", inplace = True)
nba.tail()
nba.dtypes

nba.info()
nba["Salary"] = nba["Salary"].astype("int")
nba["Number"] = nba["Number"].astype("int")
nba["Age"] = nba["Age"].astype("int")
nba["Position"].nunique()
nba["Position"] = nba["Position"].astype("category")
nba["Team"].nunique()
nba["Team"] = nba["Team"].astype("category")
nba.sort_values("Name", ascending= False)
nba.sort_values("Salary", ascending= False)

nba.sort_index(ascending= False)
nba["Salary Rank"] = nba["Salary"].rank(ascending = False).astype("int")
nba.sort_values(by = "Salary", ascending = False)