Run this notebook: Open in Colab Open in Kaggle

import pandas as pd

Tamil Nadu Population Literacy Analysis¶

This notebook analyzes population and literacy rate data from Tamil Nadu, India across census years (1951-2011). It demonstrates type conversion with .astype(float), computing derived columns using NumPy’s np.multiply(), defining custom functions for percentage calculations, and creating interactive visualizations with Plotly including bar charts and combined scatter-bar plots with custom styling.

Why this matters: Government census data is a classic real-world dataset that requires cleaning (type conversions), feature engineering (computing absolute literacy numbers from percentages), and multi-trace visualization to compare trends across demographics (male vs. female literacy). This project combines Pandas data manipulation with Plotly’s interactive charting capabilities.

literacy = pd.read_csv("population_literacyrate.csv", parse_dates=False, index_col="Census Year")
literacy

literacy["Population (in Lakhs)"]

literacy["Percentage of Literacy Total"] = literacy["Percentage of Literacy Total"].astype(float)
literacy['Population (in Lakhs)'] = literacy['Population (in Lakhs)'].astype(float)

literacy

import numpy as np
literacy["Literacy Total"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Total"])
literacy["Literacy Male"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Male"])
literacy["Literacy Female"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Female"])

literacy

def compute_percentage(x,y):
      pct = float(x/y) * 100
      return round(pct,2)

literacy['Percentage of Literacy Total'].apply(lambda x: compute_percentage
                                               (literacy['Population (in Lakhs)']
                                                literacy["Percentage of Literacy Total"]=1))

literacy

literacy["Percentage of Literacy Total"].apply(compute_percentage)

import plotly
from plotly.graph_objs import Scatter, Layout
import plotly.graph_objs as go


literate = [go.Bar(x=literacy.index, y=literacy["Population (in Lakhs)"])]


plotly.offline.plot({ 'data': literate,
            'layout': {
               'title': 'POPULATION LITERACY RATE IN TN',
               'xaxis': {
                 'title': 'YEAR'},
               'yaxis': {
                'title': 'POPULATION (in lakhs) '}
        }})

import plotly.plotly as py
from plotly.graph_objs import *

trace1 = {
  "x": [1951, 1961, 1971, 1981, 1991, 2001, 2011], 
  "y": ["62.64", "122.59", "187.05", "263.3", "350.01", "458.52", "577.77"], 
  "marker": {
    "line": {"width": 1}, 
    "symbol": "hash-open-dot"
  }, 
  "mode": "lines+markers", 
  "name": "Literacy Total", 
  "text": ["62.64", "122.59", "187.05", "263.3", "350.01", "458.52", "577.77"], 
  "textsrc": "sivabalan:0:a12243", 
  "type": "scatter", 
  "uid": "ab6d87", 
  "xsrc": "sivabalan:0:64269c", 
  "ysrc": "sivabalan:0:a12243"
}
trace2 = {
  "x": [1951, 1961, 1971, 1981, 1991, 2001, 2011], 
  "y": ["95.48", "173.8", "245.3", "329.63", "411.96", "513.82", "625.96"], 
  "hoverinfo": "x+y", 
  "marker": {
    "color": "rgb(57, 172, 115)", 
    "line": {"width": 0}
  }, 
  "name": "Literacy Male", 
  "opacity": 1, 
  "text": ["95.48", "173.8", "245.3", "329.63", "411.96", "513.82", "625.96"], 
  "textsrc": "sivabalan:0:d51cc3", 
  "type": "bar", 
  "uid": "01452a", 
  "xsrc": "sivabalan:0:64269c", 
  "ysrc": "sivabalan:0:d51cc3"
}
trace3 = {
  "x": [1951, 1961, 1971, 1981, 1991, 2001, 2011], 
  "y": ["30.42", "70.95", "127.39", "195.72", "286.72", "402.86", "529.8"], 
  "hoverinfo": "x+y", 
  "marker": {
    "color": "rgb(32, 96, 64)", 
    "line": {"width": 0}
  }, 
  "name": "Literacy Female", 
  "opacity": 1, 
  "text": ["30.42", "70.95", "127.39", "195.72", "286.72", "402.86", "529.8"], 
  "textsrc": "sivabalan:0:ea8d61", 
  "type": "bar", 
  "uid": "447297", 
  "xsrc": "sivabalan:0:64269c", 
  "ysrc": "sivabalan:0:ea8d61"
}
data = Data([trace1, trace2, trace3])
layout = {
  "autosize": True, 
  "barmode": "group", 
  "paper_bgcolor": "rgb(255, 250, 230)", 
  "plot_bgcolor": "rgb(255, 250, 230)", 
  "showlegend": True, 
  "title": "POPULATION LITERACY RATE IN TAMILNADU", 
  "titlefont": {"family": "Roboto"}, 
  "xaxis": {
    "autorange": True, 
    "range": [1946, 2016], 
    "title": "YEAR", 
    "type": "linear"
  }, 
  "yaxis": {
    "autorange": True, 
    "range": [0, 658.905263158], 
    "title": "POPULATION (in lakhs) ", 
    "type": "linear"
  }
}
fig = Figure(data=data, layout=layout)
plot_url = plotly.offline.plot(fig)

import colorlover as cl
from IPython.display import HTML

HTML(cl.to_html( cl.scales['3']['div']['RdYlBu'] ))

HTML(cl.to_html( cl.scales['11'] )) # All scales with 11 colors