import pandas as pd

Tamil Nadu Population Literacy AnalysisΒΆ

This notebook analyzes population and literacy rate data from Tamil Nadu, India across census years (1951-2011). It demonstrates type conversion with .astype(float), computing derived columns using NumPy’s np.multiply(), defining custom functions for percentage calculations, and creating interactive visualizations with Plotly including bar charts and combined scatter-bar plots with custom styling.

Why this matters: Government census data is a classic real-world dataset that requires cleaning (type conversions), feature engineering (computing absolute literacy numbers from percentages), and multi-trace visualization to compare trends across demographics (male vs. female literacy). This project combines Pandas data manipulation with Plotly’s interactive charting capabilities.

literacy = pd.read_csv("population_literacyrate.csv", parse_dates=False, index_col="Census Year")
literacy
literacy["Population (in Lakhs)"]
literacy["Percentage of Literacy Total"] = literacy["Percentage of Literacy Total"].astype(float)
literacy['Population (in Lakhs)'] = literacy['Population (in Lakhs)'].astype(float)
literacy
import numpy as np
literacy["Literacy Total"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Total"])
literacy["Literacy Male"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Male"])
literacy["Literacy Female"] = np.multiply(literacy['Population (in Lakhs)']/100, literacy["Percentage of Literacy Female"])
literacy
def compute_percentage(x,y):
      pct = float(x/y) * 100
      return round(pct,2)
literacy['Percentage of Literacy Total'].apply(lambda x: compute_percentage
                                               (literacy['Population (in Lakhs)']
                                                literacy["Percentage of Literacy Total"]=1))
literacy
literacy["Percentage of Literacy Total"].apply(compute_percentage)
import plotly
from plotly.graph_objs import Scatter, Layout
import plotly.graph_objs as go


literate = [go.Bar(x=literacy.index, y=literacy["Population (in Lakhs)"])]


plotly.offline.plot({ 'data': literate,
            'layout': {
               'title': 'POPULATION LITERACY RATE IN TN',
               'xaxis': {
                 'title': 'YEAR'},
               'yaxis': {
                'title': 'POPULATION (in lakhs) '}
        }})
import plotly.plotly as py
from plotly.graph_objs import *
trace1 = {
  "x": [1951, 1961, 1971, 1981, 1991, 2001, 2011], 
  "y": ["62.64", "122.59", "187.05", "263.3", "350.01", "458.52", "577.77"], 
  "marker": {
    "line": {"width": 1}, 
    "symbol": "hash-open-dot"
  }, 
  "mode": "lines+markers", 
  "name": "Literacy Total", 
  "text": ["62.64", "122.59", "187.05", "263.3", "350.01", "458.52", "577.77"], 
  "textsrc": "sivabalan:0:a12243", 
  "type": "scatter", 
  "uid": "ab6d87", 
  "xsrc": "sivabalan:0:64269c", 
  "ysrc": "sivabalan:0:a12243"
}
trace2 = {
  "x": [1951, 1961, 1971, 1981, 1991, 2001, 2011], 
  "y": ["95.48", "173.8", "245.3", "329.63", "411.96", "513.82", "625.96"], 
  "hoverinfo": "x+y", 
  "marker": {
    "color": "rgb(57, 172, 115)", 
    "line": {"width": 0}
  }, 
  "name": "Literacy Male", 
  "opacity": 1, 
  "text": ["95.48", "173.8", "245.3", "329.63", "411.96", "513.82", "625.96"], 
  "textsrc": "sivabalan:0:d51cc3", 
  "type": "bar", 
  "uid": "01452a", 
  "xsrc": "sivabalan:0:64269c", 
  "ysrc": "sivabalan:0:d51cc3"
}
trace3 = {
  "x": [1951, 1961, 1971, 1981, 1991, 2001, 2011], 
  "y": ["30.42", "70.95", "127.39", "195.72", "286.72", "402.86", "529.8"], 
  "hoverinfo": "x+y", 
  "marker": {
    "color": "rgb(32, 96, 64)", 
    "line": {"width": 0}
  }, 
  "name": "Literacy Female", 
  "opacity": 1, 
  "text": ["30.42", "70.95", "127.39", "195.72", "286.72", "402.86", "529.8"], 
  "textsrc": "sivabalan:0:ea8d61", 
  "type": "bar", 
  "uid": "447297", 
  "xsrc": "sivabalan:0:64269c", 
  "ysrc": "sivabalan:0:ea8d61"
}
data = Data([trace1, trace2, trace3])
layout = {
  "autosize": True, 
  "barmode": "group", 
  "paper_bgcolor": "rgb(255, 250, 230)", 
  "plot_bgcolor": "rgb(255, 250, 230)", 
  "showlegend": True, 
  "title": "POPULATION LITERACY RATE IN TAMILNADU", 
  "titlefont": {"family": "Roboto"}, 
  "xaxis": {
    "autorange": True, 
    "range": [1946, 2016], 
    "title": "YEAR", 
    "type": "linear"
  }, 
  "yaxis": {
    "autorange": True, 
    "range": [0, 658.905263158], 
    "title": "POPULATION (in lakhs) ", 
    "type": "linear"
  }
}
fig = Figure(data=data, layout=layout)
plot_url = plotly.offline.plot(fig)
import colorlover as cl
from IPython.display import HTML
HTML(cl.to_html( cl.scales['3']['div']['RdYlBu'] ))
HTML(cl.to_html( cl.scales['11'] )) # All scales with 11 colors