This Python script processes and visualizes greenhouse gas emission data from Argentina for the years 2010 to 2018. Dataset can be downloaded Here. Shapefile and associated files can be downloaded Here.
The script begins by specifying a path to the data directory and then loads an Excel file containing the emissions data. This dataset, named df, captures the emissions for different jurisdictions (provinces) within Argentina.
After loading the data:
It calculates the change in emissions from 2010 to 2018 for each jurisdiction and appends this as a new column (change) to the dataset.
The code then:
Finally, the script:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
# Path
path = "E:/articulos/argentina-ghg/"
# Load the first sheet "Total Pais" from the Excel file
file_path = path + 'input/desagregacion-provincial_(2010_2018).xlsx'
sheet_name = 'Total Pais'
# Read a few rows to get an overview of the data
df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=5)
# Drop any completely empty rows or columns
df.dropna(how='all', inplace=True)
df.dropna(axis=1, how='all', inplace=True)
# Step 2: Rename columns
# Rename the first column to 'Jurisdiction' and the rest to their respective years
correct_columns = ['Jurisdiction'] + [str(year) for year in range(2010, 2019)]
df.columns = correct_columns
# Step 3: Convert data types
# Convert the 'Jurisdiction' column to string and year columns to float
df['Jurisdiction'] = df['Jurisdiction'].astype(str)
for year in correct_columns[1:]:
df[year] = df[year].astype(float)
# Display cleaned dataframe (optional)
print(df.head())
Jurisdiction 2010 2011 2012 \
0 Total Pais 373.477735 371.189923 407.659166
2 Sin asignar 40.405523 41.249071 39.896586
3 Ciudad Autónoma de Buenos Aires 18.551483 20.727705 20.313776
4 Buenos Aires 84.795547 82.286993 89.218730
5 Catamarca 1.412102 1.888391 2.028079
2013 2014 2015 2016 2017 2018
0 411.030979 384.023971 366.396267 359.673052 370.159724 365.889794
2 44.164627 44.641829 44.239261 43.834199 45.294989 47.272842
3 19.204610 18.006212 18.694618 18.428503 17.602730 15.693272
4 93.004934 92.249117 89.281197 94.374863 85.761024 93.792030
5 2.241525 1.242369 1.064319 1.494383 1.970846 0.835929
# Rename the row where 'Jurisdiction' is 'Tierra del Fuego, Antarctica and South Atlantic Islands' to 'Tierra del Fuego'
df.loc[df['Jurisdiction'] == 'Tierra del Fuego, Antártida e Islas del Atlántico Sur', 'Jurisdiction'] = 'Tierra del Fuego'
# Remove the last 3 rows from the dataframe
df = df.iloc[:-3]
# Display the last few rows of the dataframe to confirm the changes
df.tail()
| Jurisdiction | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 22 | Santa Cruz | 7.242625 | 6.933753 | 7.256118 | 7.281231 | 7.377757 | 7.466951 | 7.335986 | 7.108920 | 6.974536 |
| 23 | Santa Fe | 24.646441 | 26.944307 | 27.714709 | 29.600614 | 28.644248 | 28.292507 | 28.496881 | 27.193841 | 26.922312 |
| 24 | Santiago del Estero | 27.673284 | 30.616677 | 30.298089 | 31.288838 | 22.321433 | 18.719216 | 16.297966 | 20.135978 | 16.572504 |
| 25 | Tucumán | 7.101321 | 7.089497 | 7.922411 | 7.606891 | 7.609264 | 7.855399 | 6.977381 | 6.975885 | 6.642570 |
| 26 | Tierra del Fuego | 5.021536 | 4.925542 | 5.029836 | 4.894071 | 3.100110 | 3.061547 | 3.268451 | 3.335445 | 3.143657 |
# Calculate the change from 2010 to 2018 for each row
df['change'] = df['2018'] - df['2010']
# Display the dataframe with the new column
df.head(10)
| Jurisdiction | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | change | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Total Pais | 373.477735 | 371.189923 | 407.659166 | 411.030979 | 384.023971 | 366.396267 | 359.673052 | 370.159724 | 365.889794 | -7.587941 |
| 2 | Sin asignar | 40.405523 | 41.249071 | 39.896586 | 44.164627 | 44.641829 | 44.239261 | 43.834199 | 45.294989 | 47.272842 | 6.867319 |
| 3 | Ciudad Autónoma de Buenos Aires | 18.551483 | 20.727705 | 20.313776 | 19.204610 | 18.006212 | 18.694618 | 18.428503 | 17.602730 | 15.693272 | -2.858211 |
| 4 | Buenos Aires | 84.795547 | 82.286993 | 89.218730 | 93.004934 | 92.249117 | 89.281197 | 94.374863 | 85.761024 | 93.792030 | 8.996484 |
| 5 | Catamarca | 1.412102 | 1.888391 | 2.028079 | 2.241525 | 1.242369 | 1.064319 | 1.494383 | 1.970846 | 0.835929 | -0.576172 |
| 6 | Córdoba | 28.762447 | 26.263621 | 23.353450 | 18.016880 | 24.502811 | 24.991885 | 21.510263 | 23.809129 | 27.710588 | -1.051859 |
| 7 | Corrientes | -1.582357 | -1.789908 | 2.382229 | 2.135111 | 1.783299 | 0.353670 | -0.913049 | 0.562175 | -1.596255 | -0.013897 |
| 8 | Chaco | 23.522462 | 25.630849 | 30.670467 | 31.287196 | 20.651086 | 20.986068 | 19.516270 | 22.734632 | 20.054211 | -3.468251 |
| 9 | Chubut | 9.574052 | 10.259267 | 9.746987 | 9.861211 | 19.037145 | 19.692655 | 19.487096 | 18.921365 | 9.536945 | -0.037107 |
| 10 | Entre Ríos | 14.368488 | 14.142667 | 14.120527 | 14.302901 | 13.036897 | 11.510080 | 10.435675 | 10.459301 | 10.005564 | -4.362924 |
# Combine with shapefile
# Load the GeoJSON or Shapefile
gdf = gpd.read_file(path + "input/provincias.shp")
print(gdf['NAM'])
0 Ciudad Autónoma de Buenos Aires 1 Neuquén 2 La Pampa 3 Mendoza 4 San Luis 5 Córdoba 6 Santa Fe 7 Entre Ríos 8 San Juan 9 La Rioja 10 Catamarca 11 Tucumán 12 Jujuy 13 Chaco 14 Formosa 15 Santiago del Estero 16 Tierra del Fuego, Antártida e Islas del Atlánt... 17 Santa Cruz 18 Chubut 19 Río Negro 20 Buenos Aires 21 Corrientes 22 Misiones 23 Salta Name: NAM, dtype: object
# Change name to match with another dataset
gdf.loc[gdf['NAM'] == 'Tierra del Fuego, Antártida e Islas del Atlántico Sur', 'NAM'] = 'Tierra del Fuego'
# Get unique values in both columns
unique_nam = set(gdf['NAM'].unique())
unique_province = set(df['Jurisdiction'].unique())
# Find names present in 'NAM' but not in 'Province'
in_nam_not_in_province = unique_nam - unique_province
# Find names present in 'Province' but not in 'NAM'
in_province_not_in_nam = unique_province - unique_nam
merged = gdf.merge(df, left_on="NAM", right_on="Jurisdiction", how="left")
# Create a high-resolution figure
fig, ax = plt.subplots(1, figsize=(10, 15), dpi=150)
# Draw the geometries of the provinces and color them according to the 'Total Hectares' column
merged.boundary.plot(ax=ax)
merged.plot(column='change', ax=ax, legend=True, legend_kwds={'label': ""})
# Set x and y axis limits to center the map on the region of interest
ax.set_xlim([-75, -50]) # Adjust these values as needed
ax.set_ylim([-60, -20]) # Adjust these values as needed
# Remove the axis labels
ax.set_xticks([])
ax.set_yticks([])
fig = ax.figure
cb_ax = fig.axes[1]
cb_ax.tick_params(labelsize=20)
title = cb_ax.set_title("Change in emissions 2010-2018", fontsize=20, loc='center')
title.set_position((-4, 1.0)) # (x, y)
# Uncomment to save the image
# plt.savefig(path + "/output/graphs/gei_evolution_provinces_map.png")
plt.show()