Skip to content

Commit

Permalink
Merge pull request #56 from datapartnership/fall-second-checkin
Browse files Browse the repository at this point in the history
ais updates for 10/21
  • Loading branch information
andresfchamorro authored Oct 21, 2024
2 parents 551a887 + e4f071b commit 15cc952
Show file tree
Hide file tree
Showing 12 changed files with 551 additions and 5,027 deletions.
106 changes: 70 additions & 36 deletions notebooks/conflict/acled-persian-gulf.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
"countries_of_interest = [\n",
" \"Iran\",\n",
" \"United Arab Emirates\",\n",
" #\"Qatar\",\n",
" #\"Bahrain\",\n",
" # \"Qatar\",\n",
" # \"Bahrain\",\n",
" \"Oman\",\n",
" #\"Iraq\",\n",
" # \"Iraq\",\n",
"]\n",
"# ISO_COUNTRIES = [818, 887, 262, 232, 682, 400]\n",
"START_DATE = \"2023-10-07\""
Expand All @@ -38,7 +38,6 @@
"import logging\n",
"\n",
"import os\n",
"from os.path import join\n",
"\n",
"import pandas as pd\n",
"\n",
Expand Down Expand Up @@ -139,7 +138,16 @@
],
"source": [
"# Filter the rows where 'notes' column contains either 'Strait' or 'vessel'\n",
"strait_of_hormuz_attacks = data[(data['notes'].str.contains(r'\\b(Strait|vessel|ship|tanker|Persian Gulf|boat|Houthi|Hormuz)', case=False, na=False))&(data['event_type']!='Protests')]\n",
"strait_of_hormuz_attacks = data[\n",
" (\n",
" data[\"notes\"].str.contains(\n",
" r\"\\b(Strait|vessel|ship|tanker|Persian Gulf|boat|Houthi|Hormuz)\",\n",
" case=False,\n",
" na=False,\n",
" )\n",
" )\n",
" & (data[\"event_type\"] != \"Protests\")\n",
"]\n",
"strait_of_hormuz_attacks.shape"
]
},
Expand Down Expand Up @@ -167,13 +175,21 @@
"outputs": [],
"source": [
"grouped_data = analysis.convert_to_gdf(\n",
" strait_of_hormuz_attacks.groupby([\"latitude\", \"longitude\", \"notes\", \"event_type\", \"location\", \"country\", \"event_date\"])[\"fatalities\"]\n",
" strait_of_hormuz_attacks.groupby(\n",
" [\n",
" \"latitude\",\n",
" \"longitude\",\n",
" \"notes\",\n",
" \"event_type\",\n",
" \"location\",\n",
" \"country\",\n",
" \"event_date\",\n",
" ]\n",
" )[\"fatalities\"]\n",
" .agg([\"sum\", \"count\"])\n",
" .reset_index()\n",
")\n",
"grouped_data.rename(\n",
" columns={\"sum\": \"nrFatalities\", \"count\": \"nrEvents\"}, inplace=True\n",
")"
"grouped_data.rename(columns={\"sum\": \"nrFatalities\", \"count\": \"nrEvents\"}, inplace=True)"
]
},
{
Expand All @@ -189,29 +205,39 @@
"def split_in_three(text):\n",
" # Step 1: Find the length of the text and divide into thirds\n",
" third = len(text) // 3\n",
" \n",
"\n",
" # Step 2: Find the closest space to the first third\n",
" if ' ' in text[third:]:\n",
" first_split_point = third + text[third:].find(' ')\n",
" elif ' ' in text[:third]:\n",
" first_split_point = text[:third].rfind(' ')\n",
" if \" \" in text[third:]:\n",
" first_split_point = third + text[third:].find(\" \")\n",
" elif \" \" in text[:third]:\n",
" first_split_point = text[:third].rfind(\" \")\n",
" else:\n",
" first_split_point = third # If no spaces, just split at third point\n",
" \n",
"\n",
" # Step 3: Find the closest space to the second third\n",
" if ' ' in text[first_split_point + third:]:\n",
" second_split_point = first_split_point + third + text[first_split_point + third:].find(' ')\n",
" elif ' ' in text[first_split_point:]:\n",
" second_split_point = first_split_point + text[first_split_point:].rfind(' ')\n",
" if \" \" in text[first_split_point + third :]:\n",
" second_split_point = (\n",
" first_split_point + third + text[first_split_point + third :].find(\" \")\n",
" )\n",
" elif \" \" in text[first_split_point:]:\n",
" second_split_point = first_split_point + text[first_split_point:].rfind(\" \")\n",
" else:\n",
" second_split_point = first_split_point + third # If no spaces, just split at second third point\n",
" \n",
" second_split_point = (\n",
" first_split_point + third\n",
" ) # If no spaces, just split at second third point\n",
"\n",
" # Step 4: Return the three parts\n",
" return text[:first_split_point], text[first_split_point:second_split_point], text[second_split_point:]\n",
" return (\n",
" text[:first_split_point],\n",
" text[first_split_point:second_split_point],\n",
" text[second_split_point:],\n",
" )\n",
"\n",
"\n",
"# Apply the split_in_three function to the 'notes' column\n",
"grouped_data[['notes_part1', 'notes_part2', 'notes_part3']] = grouped_data['notes'].apply(lambda x: split_in_three(x)).apply(pd.Series)\n",
"\n"
"grouped_data[[\"notes_part1\", \"notes_part2\", \"notes_part3\"]] = (\n",
" grouped_data[\"notes\"].apply(lambda x: split_in_three(x)).apply(pd.Series)\n",
")"
]
},
{
Expand Down Expand Up @@ -455,10 +481,15 @@
}
],
"source": [
"m = grouped_data[grouped_data['event_type']!='Protests'].explore(\n",
" column=\"nrEvents\", zoom_start=5.1, marker_kwds={\"radius\": 5}, \n",
" vmin=1, vmax=50, cmap ='viridis', tooltip=['event_date','location','notes_part1', 'notes_part2', 'notes_part3'],\n",
" tooltip_kwds={\"aliases\": ['date','location','details', '', '']}\n",
"m = grouped_data[grouped_data[\"event_type\"] != \"Protests\"].explore(\n",
" column=\"nrEvents\",\n",
" zoom_start=5.1,\n",
" marker_kwds={\"radius\": 5},\n",
" vmin=1,\n",
" vmax=50,\n",
" cmap=\"viridis\",\n",
" tooltip=[\"event_date\", \"location\", \"notes_part1\", \"notes_part2\", \"notes_part3\"],\n",
" tooltip_kwds={\"aliases\": [\"date\", \"location\", \"details\", \"\", \"\"]},\n",
")\n",
"m"
]
Expand All @@ -480,8 +511,12 @@
},
"outputs": [],
"source": [
"conflict_by_country = analysis.get_acled_by_group(strait_of_hormuz_attacks, ['country'], freq='W')\n",
"conflict_by_location = analysis.get_acled_by_group(strait_of_hormuz_attacks, ['location'], freq='W')"
"conflict_by_country = analysis.get_acled_by_group(\n",
" strait_of_hormuz_attacks, [\"country\"], freq=\"W\"\n",
")\n",
"conflict_by_location = analysis.get_acled_by_group(\n",
" strait_of_hormuz_attacks, [\"location\"], freq=\"W\"\n",
")"
]
},
{
Expand Down Expand Up @@ -548,7 +583,6 @@
"from bokeh.plotting import show, output_notebook\n",
"import bokeh\n",
"from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS\n",
"from bokeh.models import Panel, Tabs\n",
"from bokeh.palettes import Category10\n",
"from datetime import datetime\n",
"\n",
Expand All @@ -563,12 +597,12 @@
"}\n",
"measure_colors = {\"nrEvents\": \"#4E79A7\", \"nrFatalities\": \"#F28E2B\"}\n",
"\n",
"measure = 'nrEvents'\n",
"measure = \"nrEvents\"\n",
"location_types = list(conflict_by_location[\"location\"].unique())\n",
"\n",
"if len(location_types) < 10:\n",
" # If there are fewer than 10 event types, randomly pick that many colors from Category10[10]\n",
" colors = random.sample(Category10[10], num_event_types)\n",
" # If there are fewer than 10 event types, randomly pick that many colors from Category10[10]\n",
" colors = random.sample(Category10[10], num_event_types)\n",
"else:\n",
" # If there are 10 or more event types, use Category10[10] directly\n",
" colors = Category10[10]\n",
Expand All @@ -581,9 +615,9 @@
" date_column=\"event_date\",\n",
" categories=location_types,\n",
" measure=measure,\n",
" category_column='location',\n",
" category_column=\"location\",\n",
" colors=colors,\n",
" #events_dict=events_dict\n",
" # events_dict=events_dict\n",
" )\n",
")"
]
Expand Down
Loading

0 comments on commit 15cc952

Please sign in to comment.