import pandas as pd
Tamil Nadu Population Literacy AnalysisΒΆ
This notebook analyzes population and literacy rate data from Tamil Nadu, India across census years (1951-2011). It demonstrates type conversion with .astype(float), computing derived columns using NumPyβs np.multiply(), defining custom functions for percentage calculations, and creating interactive visualizations with Plotly including bar charts and combined scatter-bar plots with custom styling.
Why this matters: Government census data is a classic real-world dataset that requires cleaning (type conversions), feature engineering (computing absolute literacy numbers from percentages), and multi-trace visualization to compare trends across demographics (male vs. female literacy). This project combines Pandas data manipulation with Plotlyβs interactive charting capabilities.
literacy = pd.read_csv("population_literacyrate.csv", parse_dates=False, index_col="Census Year")
literacy
literacy["Population (in Lakhs)"]
literacy["Percentage of Literacy Total"] = literacy["Percentage of Literacy Total"].astype(float)
literacy['Population (in Lakhs)'] = literacy['Population (in Lakhs)'].astype(float)
literacy
import numpy as np
literacy["Literacy Total"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Total"])
literacy["Literacy Male"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Male"])
literacy["Literacy Female"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Female"])
literacy
def compute_percentage(x,y):
pct = float(x/y) * 100
return round(pct,2)
literacy['Percentage of Literacy Total'].apply(lambda x: compute_percentage
(literacy['Population (in Lakhs)']
literacy["Percentage of Literacy Total"]=1))
literacy
literacy["Percentage of Literacy Total"].apply(compute_percentage)
import plotly
from plotly.graph_objs import Scatter, Layout
import plotly.graph_objs as go
literate = [go.Bar(x=literacy.index, y=literacy["Population (in Lakhs)"])]
plotly.offline.plot({ 'data': literate,
'layout': {
'title': 'POPULATION LITERACY RATE IN TN',
'xaxis': {
'title': 'YEAR'},
'yaxis': {
'title': 'POPULATION (in lakhs) '}
}})
import plotly.plotly as py
from plotly.graph_objs import *
trace1 = {
"x": [1951, 1961, 1971, 1981, 1991, 2001, 2011],
"y": ["62.64", "122.59", "187.05", "263.3", "350.01", "458.52", "577.77"],
"marker": {
"line": {"width": 1},
"symbol": "hash-open-dot"
},
"mode": "lines+markers",
"name": "Literacy Total",
"text": ["62.64", "122.59", "187.05", "263.3", "350.01", "458.52", "577.77"],
"textsrc": "sivabalan:0:a12243",
"type": "scatter",
"uid": "ab6d87",
"xsrc": "sivabalan:0:64269c",
"ysrc": "sivabalan:0:a12243"
}
trace2 = {
"x": [1951, 1961, 1971, 1981, 1991, 2001, 2011],
"y": ["95.48", "173.8", "245.3", "329.63", "411.96", "513.82", "625.96"],
"hoverinfo": "x+y",
"marker": {
"color": "rgb(57, 172, 115)",
"line": {"width": 0}
},
"name": "Literacy Male",
"opacity": 1,
"text": ["95.48", "173.8", "245.3", "329.63", "411.96", "513.82", "625.96"],
"textsrc": "sivabalan:0:d51cc3",
"type": "bar",
"uid": "01452a",
"xsrc": "sivabalan:0:64269c",
"ysrc": "sivabalan:0:d51cc3"
}
trace3 = {
"x": [1951, 1961, 1971, 1981, 1991, 2001, 2011],
"y": ["30.42", "70.95", "127.39", "195.72", "286.72", "402.86", "529.8"],
"hoverinfo": "x+y",
"marker": {
"color": "rgb(32, 96, 64)",
"line": {"width": 0}
},
"name": "Literacy Female",
"opacity": 1,
"text": ["30.42", "70.95", "127.39", "195.72", "286.72", "402.86", "529.8"],
"textsrc": "sivabalan:0:ea8d61",
"type": "bar",
"uid": "447297",
"xsrc": "sivabalan:0:64269c",
"ysrc": "sivabalan:0:ea8d61"
}
data = Data([trace1, trace2, trace3])
layout = {
"autosize": True,
"barmode": "group",
"paper_bgcolor": "rgb(255, 250, 230)",
"plot_bgcolor": "rgb(255, 250, 230)",
"showlegend": True,
"title": "POPULATION LITERACY RATE IN TAMILNADU",
"titlefont": {"family": "Roboto"},
"xaxis": {
"autorange": True,
"range": [1946, 2016],
"title": "YEAR",
"type": "linear"
},
"yaxis": {
"autorange": True,
"range": [0, 658.905263158],
"title": "POPULATION (in lakhs) ",
"type": "linear"
}
}
fig = Figure(data=data, layout=layout)
plot_url = plotly.offline.plot(fig)
import colorlover as cl
from IPython.display import HTML
HTML(cl.to_html( cl.scales['3']['div']['RdYlBu'] ))
HTML(cl.to_html( cl.scales['11'] )) # All scales with 11 colors