Skip to content

Commit

Permalink
Merge pull request #23 from com-480-data-visualization/casimir
Browse files Browse the repository at this point in the history
Casimir
  • Loading branch information
cmaximilian authored May 28, 2024
2 parents dc1ef8c + 9f24ea4 commit 207fe6b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 13 deletions.
26 changes: 14 additions & 12 deletions data_processing/notebooks/undesa_data_extraction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,19 +12,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Define which M49 codes to use (removed regions like \"Europe\", only kept indivisual countries)\n",
"M49 = [108,174,262,232,231,404,450,454,480,175,508,638,646,690,706,728,800,834,894,716,24,120,140,148,178,180,226,266,678,12,818,434,504,729,788,732,72,748,426,516,710,204,854,132,384,270,288,324,624,430,466,478,562,566,654,686,694,768,398,417,762,795,860,156,344,446,158,408,392,496,410,96,116,360,418,458,104,608,702,764,626,704,4,50,64,356,364,462,524,586,144,51,31,48,196,268,368,376,400,414,422,512,634,682,275,760,792,784,887,112,100,203,348,616,498,642,643,703,804,830,208,233,234,246,352,372,833,428,440,578,752,826,8,20,70,191,292,300,336,380,470,499,807,620,674,688,705,724,40,56,250,276,438,442,492,528,756,660,28,533,44,52,535,92,136,192,531,212,214,308,312,332,388,474,500,630,652,659,662,663,670,534,780,796,850,84,188,222,320,340,484,558,591,32,68,76,152,170,218,238,254,328,600,604,740,858,862,60,124,304,666,840,36,554,242,540,598,90,548,316,296,584,583,520,580,585,16,184,258,570,882,772,776,798,876]\n",
"KeepM49 = [108,174,262,232,231,404,450,454,480,175,508,638,646,690,706,728,800,834,894,716,24,120,140,148,178,180,226,266,678,12,818,434,504,729,788,732,72,748,426,516,710,204,854,132,384,270,288,324,624,430,466,478,562,566,654,686,694,768,398,417,762,795,860,156,344,446,158,408,392,496,410,96,116,360,418,458,104,608,702,764,626,704,4,50,64,356,364,462,524,586,144,51,31,48,196,268,368,376,400,414,422,512,634,682,275,760,792,784,887,112,100,203,348,616,498,642,643,703,804,830,208,233,234,246,352,372,833,428,440,578,752,826,8,20,70,191,292,300,336,380,470,499,807,620,674,688,705,724,40,56,250,276,438,442,492,528,756,660,28,533,44,52,535,92,136,192,531,212,214,308,312,332,388,474,500,630,652,659,662,663,670,534,780,796,850,84,188,222,320,340,484,558,591,32,68,76,152,170,218,238,254,328,600,604,740,858,862,60,124,304,666,840,36,554,242,540,598,90,548,316,296,584,583,520,580,585,16,184,258,570,882,772,776,798,876]\n",
"# If I would like to filter out only the important values\n",
"# mask = df['Subscription'].isin(active_statuses)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -37,20 +37,22 @@
"# Formatting \n",
"dfNamesCodesTypes[\"Region, development group, country or area\"] = dfNamesCodesTypes[\"Region, development group, country or area\"].str.strip().str.rstrip('*')\n",
"dfNamesCodesTypes[\"Type of data\"].fillna('NaN', inplace=True)\n",
"indxsKeepM49 = dfNamesCodesTypes['Location code'].isin(KeepM49)\n",
"dfNamesCodesTypes = dfNamesCodesTypes[indxsKeepM49]\n",
"# df1 --> \"Table 1\" --> \"International migrant stock at mid-year, both sexes combined\"\n",
"# df2 --> \"Table 2\" --> \"Total population at mid-year, both sexes combined (thousands)\"\n",
"# df3 --> \"Table 3\" --> \"International migrant stock as a percentage of the total population, both sexes combined\"\n",
"# df4 --> \"Table 5\" --> \"Annual rate of change of the migrant stock, both sexes combined\"\n",
"# df5 --> \"Table 6\" --> \"Refugee and asylum seekers at mid-year, both sexes combined\"\n",
"# df6 --> \"Table 6\" --> \"Refugee and asylum seekers as a percentage of the international migrant stock\"\n",
"# df7 --> \"Table 6\" --> \"Annual rate of change of the refugee and asylum seekers, both sexes combined\"\n",
"df1 = pd.read_excel(xls, 'Table 1', header=10, usecols=\"F:L\")\n",
"df2 = pd.read_excel(xls, 'Table 2', header=10, usecols=\"F:L\")\n",
"df3 = pd.read_excel(xls, 'Table 3', header=10, usecols=\"F:L\")\n",
"df4 = pd.read_excel(xls, 'Table 5', header=10, usecols=\"F:K\")\n",
"df5 = pd.read_excel(xls, 'Table 6', header=10, usecols=\"F:L\")\n",
"df6 = pd.read_excel(xls, 'Table 6', header=10, usecols=\"M:S\")\n",
"df7 = pd.read_excel(xls, 'Table 6', header=10, usecols=\"T:Y\")\n",
"df1 = pd.read_excel(xls, 'Table 1', header=10, usecols=\"F:L\")[indxsKeepM49]\n",
"df2 = pd.read_excel(xls, 'Table 2', header=10, usecols=\"F:L\")[indxsKeepM49]\n",
"df3 = pd.read_excel(xls, 'Table 3', header=10, usecols=\"F:L\")[indxsKeepM49]\n",
"df4 = pd.read_excel(xls, 'Table 5', header=10, usecols=\"F:K\")[indxsKeepM49]\n",
"df5 = pd.read_excel(xls, 'Table 6', header=10, usecols=\"F:L\")[indxsKeepM49]\n",
"df6 = pd.read_excel(xls, 'Table 6', header=10, usecols=\"M:S\")[indxsKeepM49]\n",
"df7 = pd.read_excel(xls, 'Table 6', header=10, usecols=\"T:Y\")[indxsKeepM49]\n",
"\n",
"data_dtfs = [df1, df2, df3, df4, df5, df6, df7]\n",
"descriptions = [\"International migrant stock at mid-year, both sexes combined\",\n",
Expand Down Expand Up @@ -82,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down
2 changes: 1 addition & 1 deletion website/data/undesa_data.json

Large diffs are not rendered by default.

0 comments on commit 207fe6b

Please sign in to comment.