Kaggle link: https://www.kaggle.com/code/venkatasubramani/data-visualization-in-python
my portfolio: https://venkatasubramani.github.io/
Author: Venkatasubramani Karthikeyan
Why Plotly?
Plotly framework helps us to plot the graphs that are interactive and it has some extra features that matplotlib or seaborn doesn't have.
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go
import plotly.express as px
from plotly import tools
import plotly.offline as py
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
os.path.join(dirname, filename)
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
# reading the data
my_weekly_data = pd.read_csv("../input/datasciencelub/myweeklydata.csv")
my_weekly_data.head()
Source | Target | Value | |
---|---|---|---|
0 | Total hours in week | University | 14 |
1 | Total hours in week | Home | 109 |
2 | Total hours in week | Commute/Transport/Walk | 10 |
3 | Total hours in week | Part time | 20 |
4 | Total hours in week | Fun activities/playing/ movies | 10 |
# manipulating the data for ploting a Sankey chart
task_lst = []
src_lst = my_weekly_data['Source'].to_list()
tgt_lst = my_weekly_data['Target'].to_list()
for i in range(len(src_lst)):
if src_lst[i] not in task_lst:
task_lst.append(src_lst[i])
if tgt_lst[i] not in task_lst:
task_lst.append(tgt_lst[i])
# task_lst
my_weekly_data['Source_index'] = my_weekly_data['Source'].apply(lambda x: task_lst.index(x))
my_weekly_data['Target_index'] = my_weekly_data['Target'].apply(lambda x: task_lst.index(x))
# plotting the Sankey Chart
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = task_lst,
color = "blue"
),
link = dict(
source = my_weekly_data.Source_index, # indices correspond to labels, eg A1, A2, A1, B1, ...
target =my_weekly_data.Target_index ,
value = my_weekly_data.Value
))])
fig.update_layout(title_text="My weekly data Sankey Chart", font_size=10)
fig.show()
df = pd.DataFrame([["United Kingdom 76.95","England 74.80",74.80,50],
["United Kingdom 76.95","Scotland 1.06",1.06,4],
["United Kingdom 76.95","Wales 0.72",0.72,3],
["United Kingdom 76.95","N Ireland 0.37",0.37,3],
["Outside 23.06","R Ireland 0.89",0.89,3],
["Outside 23.06","Other EU 1.14",1.14,4],
["Outside 23.06","Elsewhere 21.03",21.03,15]],columns=['region','country','population_percent','value'])
fig = px.sunburst(df, path=['region', 'country'], values='value', color='population_percent',title="2001")
fig.update_layout(coloraxis_colorbar_x=-0.35,width=500,height=500)
fig.show()
df = pd.DataFrame([["United Kingdom 63.33","England 62.03",62.03,50],
["United Kingdom 63.33","Scotland 0.67",0.67,5],
["United Kingdom 63.33","Wales 0.40",0.40,3],
["United Kingdom 63.33","N Ireland 0.23",0.23,3],
["Outside 36.66","R Ireland 0.56",0.56,3],
["Outside 36.66","Other EU 9.22",9.22,5],
["Outside 36.66","Elsewhere 26.90",26.90,20]],columns=['region','country','population_percent','value'])
fig = px.sunburst(df, path=['region', 'country'], values='value', color='population_percent',title="2011")
fig.update_layout(coloraxis_colorbar_x=-0.35,width=500,height=500)
fig.show()
df = pd.DataFrame([["United Kingdom 54.04","England 53.17",53.17,40],
["United Kingdom 54.04","Scotland 0.45",0.45,5],
["United Kingdom 54.04","Wales 0.27",0.27,3],
["United Kingdom 54.04","N Ireland 0.15",0.15,3],
["Outside 45.96","R Ireland 0.31",0.31,3],
["Outside 45.96","Other EU 15.87",15.87,5],
["Outside 45.96","Elsewhere 29.78",29.78,20]],columns=['region','country','population_percent','value'])
fig = px.sunburst(df, path=['region', 'country'], values='value', color='population_percent',title="2021")
fig.update_layout(coloraxis_colorbar_x=-0.35,width=500,height=500)
fig.show()
df = pd.DataFrame([['British only identity',55.78,50,'2021'],['Other identity only',23.78,25,'2021'],['English and British only identity',8.22,8,'2021'],['English only identity',8.18,8,'2021'],['Other identity and at least one UK identity',2.98,5,'2021'],['Minority Nationalities',1.04,4,'2021'],['British only identity',33.52,30,'2011'],['Other identity only',17.10,15,'2011'],['English and British only identity',5.99,5,'2011'],['English only identity',40.52,40,'2011'],['Other identity and at least one UK identity',1.19,5,'2011'],['Minority Nationalities',1.65,5,'2011']],columns=['Nationality','value','population_percent','year'])
fig = px.bar(df, x="population_percent", y="year", color='Nationality',
height=400,text='value',title='Nationality')
fig.update_layout(width=1000)
fig.show()
trace1 = go.Pie(
values=[ 76.95,2.02 , 21.03],
labels=["UK", "Rest of Europe", "Elsewhere"
],
domain=dict(x=[0, 0.30]),
name="2001",
hoverinfo="label+percent+name",title='2001'
)
trace2 = go.Pie(
values=[66.4,5.4,26.90],
labels=["UK", "Rest of Europe", "Elsewhere"
],
domain=dict(x=[0.35, 0.65]),
name="2011",
hoverinfo="label+percent+name",title='2011'
)
trace3 = go.Pie(
values=[65.04,24.4,29.78],
labels=["UK", "Rest of Europe", "Elsewhere"
],
domain=dict(x=[0.70, 1.0]),
name="2021",
hoverinfo="label+percent+name",title='2021'
)
layout = go.Layout(title="Country of birth",)
data = [trace1, trace2, trace3]
fig = go.Figure(data=data, layout=layout)
fig.show()
top_labels = ['Very good<br> health', 'good<br> health', 'fair<br> health', 'bad <br> health', 'Very bad<br> health']
colors = ['rgba(38, 24, 74, 0.8)', 'rgba(71, 58, 131, 0.8)',
'rgba(122, 120, 168, 0.8)', 'rgba(164, 163, 204, 0.85)',
'rgba(190, 192, 213, 1)']
x_data = [[21, 19, 22, 18, 20],[20, 19, 21, 22, 18],
[19, 18, 23, 20, 20], [17, 21, 20, 22, 20],[19, 20, 18, 21, 22],
[20, 19, 20, 21, 20],[18, 21, 20, 20, 21],[19, 22, 19, 21, 19]]
y_data = ['Leicester 2011', 'Blaby 2011', 'Charnwood', 'Harborough','Hinckley and Bosworth','Melton','North West Leicestershire','Oadby and Wigston']
fig = go.Figure()
for i in range(0, len(x_data[0])):
for xd, yd in zip(x_data, y_data):
fig.add_trace(go.Bar(
x=[xd[i]], y=[yd],
orientation='h',
marker=dict(
color=colors[i],
line=dict(color='rgb(248, 248, 249)', width=1)
)
))
fig.update_layout(
xaxis=dict(
showgrid=False,
showline=False,
showticklabels=False,
zeroline=False,
domain=[0.15, 1]
),
yaxis=dict(
showgrid=False,
showline=False,
showticklabels=False,
zeroline=False,
),
barmode='stack',
paper_bgcolor='rgb(248, 248, 255)',
plot_bgcolor='rgb(248, 248, 255)',
margin=dict(l=120, r=10, t=140, b=80),
showlegend=False,
)
annotations = []
for yd, xd in zip(y_data, x_data):
# labeling the y-axis
annotations.append(dict(xref='paper', yref='y',
x=0.14, y=yd,
xanchor='right',
text=str(yd),
font=dict(family='Arial', size=14,
color='rgb(67, 67, 67)'),
showarrow=False, align='right'))
# labeling the first percentage of each bar (x_axis)
annotations.append(dict(xref='x', yref='y',
x=xd[0] / 2, y=yd,
text=str(xd[0]) + '%',
font=dict(family='Arial', size=14,
color='rgb(248, 248, 255)'),
showarrow=False))
# labeling the first Likert scale (on the top)
if yd == y_data[-1]:
annotations.append(dict(xref='x', yref='paper',
x=xd[0] / 2, y=1.1,
text=top_labels[0],
font=dict(family='Arial', size=14,
color='rgb(67, 67, 67)'),
showarrow=False))
space = xd[0]
for i in range(1, len(xd)):
# labeling the rest of percentages for each bar (x_axis)
annotations.append(dict(xref='x', yref='y',
x=space + (xd[i]/2), y=yd,
text=str(xd[i]) + '%',
font=dict(family='Arial', size=14,
color='rgb(248, 248, 255)'),
showarrow=False))
# labeling the Likert scale
if yd == y_data[-1]:
annotations.append(dict(xref='x', yref='paper',
x=space + (xd[i]/2), y=1.1,
text=top_labels[i],
font=dict(family='Arial', size=14,
color='rgb(67, 67, 67)'),
showarrow=False))
space += xd[i]
fig.update_layout(annotations=annotations)
fig.show()
please visit my portfolio to see my other projects https://venkatasubramani.github.io/
my kaggle profile: https://www.kaggle.com/code/venkatasubramani/
my github profile: https://github.com/venkatasubramani