# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go
import plotly.express as px
from plotly import tools
import plotly.offline as py
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


# reading the data
my_weekly_data = pd.read_csv("../input/datasciencelub/myweeklydata.csv")
my_weekly_data.head()


# manipulating the data for ploting a Sankey chart
task_lst = []
src_lst = my_weekly_data['Source'].to_list()
tgt_lst = my_weekly_data['Target'].to_list()
for i in range(len(src_lst)):
    if src_lst[i] not in task_lst:
        task_lst.append(src_lst[i])
    if tgt_lst[i] not in task_lst:
        task_lst.append(tgt_lst[i])
# task_lst
my_weekly_data['Source_index'] = my_weekly_data['Source'].apply(lambda x: task_lst.index(x))
my_weekly_data['Target_index'] = my_weekly_data['Target'].apply(lambda x: task_lst.index(x))


# plotting the Sankey Chart

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = task_lst,
      color = "blue"
    ),
    link = dict(
      source = my_weekly_data.Source_index, # indices correspond to labels, eg A1, A2, A1, B1, ...
      target =my_weekly_data.Target_index ,
      value = my_weekly_data.Value
  ))])

fig.update_layout(title_text="My weekly data Sankey Chart", font_size=10)
fig.show()


df = pd.DataFrame([["United Kingdom 76.95","England 74.80",74.80,50],
["United Kingdom 76.95","Scotland 1.06",1.06,4],
["United Kingdom 76.95","Wales 0.72",0.72,3],
["United Kingdom 76.95","N Ireland 0.37",0.37,3],
["Outside 23.06","R Ireland 0.89",0.89,3],
["Outside 23.06","Other EU 1.14",1.14,4],
["Outside 23.06","Elsewhere 21.03",21.03,15]],columns=['region','country','population_percent','value'])
fig = px.sunburst(df, path=['region', 'country'], values='value', color='population_percent',title="2001")
fig.update_layout(coloraxis_colorbar_x=-0.35,width=500,height=500)
fig.show()


df = pd.DataFrame([["United Kingdom 63.33","England 62.03",62.03,50],
["United Kingdom 63.33","Scotland 0.67",0.67,5],
["United Kingdom 63.33","Wales 0.40",0.40,3],
["United Kingdom 63.33","N Ireland 0.23",0.23,3],
["Outside 36.66","R Ireland 0.56",0.56,3],
["Outside 36.66","Other EU 9.22",9.22,5],
["Outside 36.66","Elsewhere 26.90",26.90,20]],columns=['region','country','population_percent','value'])
fig = px.sunburst(df, path=['region', 'country'], values='value', color='population_percent',title="2011")
fig.update_layout(coloraxis_colorbar_x=-0.35,width=500,height=500)
fig.show()


df = pd.DataFrame([["United Kingdom 54.04","England 53.17",53.17,40],
["United Kingdom 54.04","Scotland 0.45",0.45,5],
["United Kingdom 54.04","Wales 0.27",0.27,3],
["United Kingdom 54.04","N Ireland 0.15",0.15,3],
["Outside 45.96","R Ireland 0.31",0.31,3],
["Outside 45.96","Other EU 15.87",15.87,5],
["Outside 45.96","Elsewhere 29.78",29.78,20]],columns=['region','country','population_percent','value'])
fig = px.sunburst(df, path=['region', 'country'], values='value', color='population_percent',title="2021")
fig.update_layout(coloraxis_colorbar_x=-0.35,width=500,height=500)
fig.show()


df = pd.DataFrame([['British only identity',55.78,50,'2021'],['Other identity only',23.78,25,'2021'],['English and British only identity',8.22,8,'2021'],['English only identity',8.18,8,'2021'],['Other identity and at least one UK identity',2.98,5,'2021'],['Minority Nationalities',1.04,4,'2021'],['British only identity',33.52,30,'2011'],['Other identity only',17.10,15,'2011'],['English and British only identity',5.99,5,'2011'],['English only identity',40.52,40,'2011'],['Other identity and at least one UK identity',1.19,5,'2011'],['Minority Nationalities',1.65,5,'2011']],columns=['Nationality','value','population_percent','year'])
fig = px.bar(df, x="population_percent", y="year", color='Nationality', 
             height=400,text='value',title='Nationality')
fig.update_layout(width=1000)
fig.show()


trace1 = go.Pie(
     values=[ 76.95,2.02 , 21.03],
     labels=["UK", "Rest of Europe", "Elsewhere"
             ],
     domain=dict(x=[0, 0.30]),
     name="2001",
     hoverinfo="label+percent+name",title='2001'
)
trace2 = go.Pie(
     values=[66.4,5.4,26.90],
     labels=["UK", "Rest of Europe", "Elsewhere"
             ],
     domain=dict(x=[0.35, 0.65]),
     name="2011",
     hoverinfo="label+percent+name",title='2011'
)
trace3 = go.Pie(
     values=[65.04,24.4,29.78],
     labels=["UK", "Rest of Europe", "Elsewhere"
             ],
     domain=dict(x=[0.70, 1.0]),
     name="2021",
     hoverinfo="label+percent+name",title='2021'
)
layout = go.Layout(title="Country of birth",)
data = [trace1, trace2, trace3]
fig = go.Figure(data=data, layout=layout)
fig.show()


top_labels = ['Very good<br> health', 'good<br> health', 'fair<br> health', 'bad <br> health', 'Very bad<br> health']

colors = ['rgba(38, 24, 74, 0.8)', 'rgba(71, 58, 131, 0.8)',
          'rgba(122, 120, 168, 0.8)', 'rgba(164, 163, 204, 0.85)',
          'rgba(190, 192, 213, 1)']

x_data = [[21, 19, 22, 18, 20],[20, 19, 21, 22, 18],
          [19, 18, 23, 20, 20], [17, 21, 20, 22, 20],[19, 20, 18, 21, 22],
          [20, 19, 20, 21, 20],[18, 21, 20, 20, 21],[19, 22, 19, 21, 19]]

y_data = ['Leicester 2011', 'Blaby 2011', 'Charnwood', 'Harborough','Hinckley and Bosworth','Melton','North West Leicestershire','Oadby and Wigston']

fig = go.Figure()

for i in range(0, len(x_data[0])):
    for xd, yd in zip(x_data, y_data):
        fig.add_trace(go.Bar(
            x=[xd[i]], y=[yd],
            orientation='h',
            marker=dict(
                color=colors[i],
                line=dict(color='rgb(248, 248, 249)', width=1)
            )
        ))

fig.update_layout(
    xaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=False,
        zeroline=False,
        domain=[0.15, 1]
    ),
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=False,
        zeroline=False,
    ),
    barmode='stack',
    paper_bgcolor='rgb(248, 248, 255)',
    plot_bgcolor='rgb(248, 248, 255)',
    margin=dict(l=120, r=10, t=140, b=80),
    showlegend=False,
)

annotations = []

for yd, xd in zip(y_data, x_data):
    # labeling the y-axis
    annotations.append(dict(xref='paper', yref='y',
                            x=0.14, y=yd,
                            xanchor='right',
                            text=str(yd),
                            font=dict(family='Arial', size=14,
                                      color='rgb(67, 67, 67)'),
                            showarrow=False, align='right'))
    # labeling the first percentage of each bar (x_axis)
    annotations.append(dict(xref='x', yref='y',
                            x=xd[0] / 2, y=yd,
                            text=str(xd[0]) + '%',
                            font=dict(family='Arial', size=14,
                                      color='rgb(248, 248, 255)'),
                            showarrow=False))
    # labeling the first Likert scale (on the top)
    if yd == y_data[-1]:
        annotations.append(dict(xref='x', yref='paper',
                                x=xd[0] / 2, y=1.1,
                                text=top_labels[0],
                                font=dict(family='Arial', size=14,
                                          color='rgb(67, 67, 67)'),
                                showarrow=False))
    space = xd[0]
    for i in range(1, len(xd)):
            # labeling the rest of percentages for each bar (x_axis)
            annotations.append(dict(xref='x', yref='y',
                                    x=space + (xd[i]/2), y=yd,
                                    text=str(xd[i]) + '%',
                                    font=dict(family='Arial', size=14,
                                              color='rgb(248, 248, 255)'),
                                    showarrow=False))
            # labeling the Likert scale
            if yd == y_data[-1]:
                annotations.append(dict(xref='x', yref='paper',
                                        x=space + (xd[i]/2), y=1.1,
                                        text=top_labels[i],
                                        font=dict(family='Arial', size=14,
                                                  color='rgb(67, 67, 67)'),
                                        showarrow=False))
            space += xd[i]

fig.update_layout(annotations=annotations)

fig.show()

	Source	Target	Value
0	Total hours in week	University	14
1	Total hours in week	Home	109
2	Total hours in week	Commute/Transport/Walk	10
3	Total hours in week	Part time	20
4	Total hours in week	Fun activities/playing/ movies	10

Data Visualization in Python using Plotly¶

Sankey Chart¶

Sunburst chart¶

horizonatal Bar chart¶

multiple pie charts¶

Rating charts (multiple bar charts)¶

Thanks for reading please follow me on github and kaggle to have a look at my other works¶