Run this notebook: Open in Colab Open in Kaggle

import pandas as pd

DataFrame Fundamentals – Inspection, Selection, and Cleaning¶

This notebook covers the essential operations you need to work with Pandas DataFrames: inspecting structure with .index, .values, .shape, .dtypes, .columns, .axes, and .info(); selecting single and multiple columns; adding new columns with direct assignment and .insert(); broadcasting arithmetic across columns; frequency analysis with .value_counts(); handling missing data with .dropna() and .fillna(); type conversion with .astype(); sorting with .sort_values() and .sort_index(); and ranking with .rank(). These operations form the core toolkit for data exploration and preparation before any modeling work.

nba = pd.read_csv("nba.csv")
nba

nba.index

nba.values

nba.shape

nba.dtypes

nba.columns

nba.axes

nba.info()

nba.get_dtype_counts()

rev = pd.read_csv("revenue.csv", index_col= "Date")

rev.sum()

rev.sum(axis = 1 )

nba = pd.read_csv("nba.csv")
nba.Name

output = None

nba["Name"]

type(nba["Name"])

type(nba[["Name","Team"]])

nba["Sport"] = "BasketBall"

nba.head()

nba["League"] = "National Basketball Association"

nba.head()

nba.insert()

nba

nba = pd.read_csv("nba.csv")

nba

nba.insert(3, column = "Sport", value = "Basketball")

nba.head()

nba.Age.add(5)

nba["Team"].value_counts()

nba.tail()

nba.dropna().tail()

nba.dropna(how = "all")

import pandas as pd
nba = pd.read_csv("nba.csv").dropna(how = "all")

nba["Salary"].fillna(0, inplace = True)
nba["College"].fillna("None", inplace = True)

nba.tail()

nba.dtypes

nba.info()

nba["Salary"] = nba["Salary"].astype("int")

nba["Number"] = nba["Number"].astype("int")
nba["Age"] = nba["Age"].astype("int")

nba["Position"].nunique()

nba["Position"] = nba["Position"].astype("category")

nba["Team"].nunique()

nba["Team"] = nba["Team"].astype("category")

nba.sort_values("Name", ascending= False)
nba.sort_values("Salary", ascending= False)

nba.sort_index(ascending= False)

nba["Salary Rank"] = nba["Salary"].rank(ascending = False).astype("int")

nba.sort_values(by = "Salary", ascending = False)