world-development-data/world_development_data.py at main · Amirabs7/world-development-data · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# -*- coding: utf-8 -*-
"""World Development Data

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1A6pSKsHXjG2KgiH-IZEGD4CHnMk3fDR4

1- Data Upload & Preview`

1. Data Upload and Preview:
"""

from google.colab import files
uploaded = files.upload()
import pandas as pd
df = pd.read_csv("worlddata.csv")
print(df.head())

"""2-Missing Data Summary + Cleaning"""

df.isnull().sum().sort_values(ascending=False)

# Fill small missing counts with column mean
cols_fill_mean = [
    "cpi", "alcohol_consumption", "labor_force", "labor_rate",
    "unemployment_rate", "air_pollution", "gdp", "gdp_capita",
    "water_access", "electricity_access", "population_density", "land_area"
]

for col in cols_fill_mean:
    df[col] = df[col].fillna(df[col].mean())

# For moderate-missing columns
for col in ["generosity", "freedom", "social_support"]:
    df[col] = df[col].fillna(df[col].mean())

# For adult_literacy (group-wise fill by income_class)
df["adult_literacy"] = df.groupby("income_class")["adult_literacy"].transform(
    lambda x: x.fillna(x.mean())
)

print(df.columns)

"""3-Global Comparison: Life Expectancy and GDP per Capita (Norway, India, Brazil)

In this section, we analyze and visualize life expectancy and economic indicators across three representative countries:

Norway (High-income, developed economy)

India (Lower-middle-income, developing country)

Brazil (Upper-middle-income, emerging economy)

By comparing these nations, we uncover how income class correlates with life expectancy and GDP per capita. This trio showcases the economic diversity of the modern world.

Bar chart shows differences in life expectancy across these countries.

Scatter plot visualizes the relationship between GDP per capita and life expectancy, highlighting the population density effect as bubble size.
"""

import plotly.express as px


# ========== Filters ==========
# Selected countries across diverse income classes
selected_countries = ['Norway', 'India', 'Brazil']
# Automatically detect income classes for selected countries
selected_classes = df[df['country'].isin(selected_countries)]['income_class'].unique().tolist()

# ========== Filter Dataset ==========
filtered_df = df.copy()
if selected_classes:
    filtered_df = filtered_df[filtered_df['income_class'].isin(selected_classes)]
if selected_countries:
    filtered_df = filtered_df[filtered_df['country'].isin(selected_countries)]

# Show what is being visualized
print("Filtered Countries and Income Classes:")
print(filtered_df[['country', 'income_class']])

# ========== Bar Chart: Life Expectancy ==========
fig_bar = px.bar(
    filtered_df.sort_values("life_expectancy", ascending=False),
    x="country",
    y="life_expectancy",
    color="income_class",
    title="Life Expectancy by Country",
    text="life_expectancy"
)
fig_bar.update_traces(texttemplate='%{text:.1f}', textposition='outside')
fig_bar.update_layout(yaxis_title="Life Expectancy (Years)", xaxis_title="Country")
fig_bar.show()

# ========== Scatter Plot: GDP per Capita vs Life Expectancy (Fixed Size) ==========
plot_df = filtered_df.dropna(subset=["gdp_capita", "life_expectancy"])
fig_scatter = px.scatter(
    plot_df,
    x="gdp_capita",
    y="life_expectancy",
    color="income_class",
    hover_name="country",
    title="GDP per Capita vs Life Expectancy (Fixed Bubble Size)"
)
fig_scatter.update_traces(marker=dict(size=20))
fig_scatter.update_layout(xaxis_title="GDP per Capita (USD)", yaxis_title="Life Expectancy (Years)")
fig_scatter.show()

"""4-Insightful Visual Sections

Basic Needs Breakdown: Who Fails the Most Metrics?
"""

df['basic_fail_score'] = (
    (df['electricity_access'] < 70).astype(int) +
    (df['water_access'] < 70).astype(int) +
    (df['life_expectancy'] < 65).astype(int) +
    (df['adult_literacy'] < 70).fillna(1).astype(int) +
    (df['gdp_capita'] < 1500).astype(int)
)

df[df['basic_fail_score'] >= 3][['country', 'basic_fail_score']]

"""
High Literacy, Low GDP: Underutilized Human Capital
"""

df[(df['adult_literacy'] > 95) & (df['gdp_capita'] < 3000)][['country', 'adult_literacy', 'gdp_capita']]

"""What This Implies (the brutal truth):
These countries have well-educated people, but they are still poor.

That’s a red flag. Normally, high literacy is associated with higher GDP (because education leads to better jobs and innovation). But here:

📘 They’ve invested in basic education

💸 But their economies haven’t translated that into wealth

High Alcohol + Low Social Support: Recipe for Social Collapse
"""

df[(df['alcohol_consumption'] > 7) & (df['social_support'] < 0.5)][['country', 'alcohol_consumption', 'social_support']]

"""5. Correlations Between Variables

"""

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(12,10))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')

"""| Variable 1         | Variable 2      | Correlation & Insight                                |
| ------------------ | --------------- | ---------------------------------------------------- |
| GDP per Capita     | Happiness Score | **Positive correlation**. More GDP = more happiness. |
| Income Class       | Happiness Score | **Strong association**. Higher income = happier.     |
| Population Density | Happiness Score | **Weak correlation**. No clear pattern.              |
| Country            | Happiness Score | **Wide variability**, even within same income class. |

"""