Merge pull request #56 from datapartnership/fall-second-checkin

ais updates for 10/21
datapartnership · Oct 21, 2024 · 15cc952 · 15cc952
2 parents 551a887 + e4f071b
commit 15cc952
Show file tree

Hide file tree

Showing 12 changed files with 551 additions and 5,027 deletions.
diff --git a/notebooks/conflict/acled-persian-gulf.ipynb b/notebooks/conflict/acled-persian-gulf.ipynb
@@ -14,10 +14,10 @@
     "countries_of_interest = [\n",
     "    \"Iran\",\n",
     "    \"United Arab Emirates\",\n",
-    "    #\"Qatar\",\n",
-    "    #\"Bahrain\",\n",
+    "    # \"Qatar\",\n",
+    "    # \"Bahrain\",\n",
     "    \"Oman\",\n",
-    "    #\"Iraq\",\n",
+    "    # \"Iraq\",\n",
     "]\n",
     "# ISO_COUNTRIES = [818, 887, 262, 232, 682, 400]\n",
     "START_DATE = \"2023-10-07\""
@@ -38,7 +38,6 @@
     "import logging\n",
     "\n",
     "import os\n",
-    "from os.path import join\n",
     "\n",
     "import pandas as pd\n",
     "\n",
@@ -139,7 +138,16 @@
    ],
    "source": [
     "# Filter the rows where 'notes' column contains either 'Strait' or 'vessel'\n",
-    "strait_of_hormuz_attacks = data[(data['notes'].str.contains(r'\\b(Strait|vessel|ship|tanker|Persian Gulf|boat|Houthi|Hormuz)', case=False, na=False))&(data['event_type']!='Protests')]\n",
+    "strait_of_hormuz_attacks = data[\n",
+    "    (\n",
+    "        data[\"notes\"].str.contains(\n",
+    "            r\"\\b(Strait|vessel|ship|tanker|Persian Gulf|boat|Houthi|Hormuz)\",\n",
+    "            case=False,\n",
+    "            na=False,\n",
+    "        )\n",
+    "    )\n",
+    "    & (data[\"event_type\"] != \"Protests\")\n",
+    "]\n",
     "strait_of_hormuz_attacks.shape"
    ]
   },
@@ -167,13 +175,21 @@
    "outputs": [],
    "source": [
     "grouped_data = analysis.convert_to_gdf(\n",
-    "    strait_of_hormuz_attacks.groupby([\"latitude\", \"longitude\", \"notes\", \"event_type\", \"location\", \"country\", \"event_date\"])[\"fatalities\"]\n",
+    "    strait_of_hormuz_attacks.groupby(\n",
+    "        [\n",
+    "            \"latitude\",\n",
+    "            \"longitude\",\n",
+    "            \"notes\",\n",
+    "            \"event_type\",\n",
+    "            \"location\",\n",
+    "            \"country\",\n",
+    "            \"event_date\",\n",
+    "        ]\n",
+    "    )[\"fatalities\"]\n",
     "    .agg([\"sum\", \"count\"])\n",
     "    .reset_index()\n",
     ")\n",
-    "grouped_data.rename(\n",
-    "    columns={\"sum\": \"nrFatalities\", \"count\": \"nrEvents\"}, inplace=True\n",
-    ")"
+    "grouped_data.rename(columns={\"sum\": \"nrFatalities\", \"count\": \"nrEvents\"}, inplace=True)"
    ]
   },
   {
@@ -189,29 +205,39 @@
     "def split_in_three(text):\n",
     "    # Step 1: Find the length of the text and divide into thirds\n",
     "    third = len(text) // 3\n",
-    "    \n",
+    "\n",
     "    # Step 2: Find the closest space to the first third\n",
-    "    if ' ' in text[third:]:\n",
-    "        first_split_point = third + text[third:].find(' ')\n",
-    "    elif ' ' in text[:third]:\n",
-    "        first_split_point = text[:third].rfind(' ')\n",
+    "    if \" \" in text[third:]:\n",
+    "        first_split_point = third + text[third:].find(\" \")\n",
+    "    elif \" \" in text[:third]:\n",
+    "        first_split_point = text[:third].rfind(\" \")\n",
     "    else:\n",
     "        first_split_point = third  # If no spaces, just split at third point\n",
-    "    \n",
+    "\n",
     "    # Step 3: Find the closest space to the second third\n",
-    "    if ' ' in text[first_split_point + third:]:\n",
-    "        second_split_point = first_split_point + third + text[first_split_point + third:].find(' ')\n",
-    "    elif ' ' in text[first_split_point:]:\n",
-    "        second_split_point = first_split_point + text[first_split_point:].rfind(' ')\n",
+    "    if \" \" in text[first_split_point + third :]:\n",
+    "        second_split_point = (\n",
+    "            first_split_point + third + text[first_split_point + third :].find(\" \")\n",
+    "        )\n",
+    "    elif \" \" in text[first_split_point:]:\n",
+    "        second_split_point = first_split_point + text[first_split_point:].rfind(\" \")\n",
     "    else:\n",
-    "        second_split_point = first_split_point + third  # If no spaces, just split at second third point\n",
-    "    \n",
+    "        second_split_point = (\n",
+    "            first_split_point + third\n",
+    "        )  # If no spaces, just split at second third point\n",
+    "\n",
     "    # Step 4: Return the three parts\n",
-    "    return text[:first_split_point], text[first_split_point:second_split_point], text[second_split_point:]\n",
+    "    return (\n",
+    "        text[:first_split_point],\n",
+    "        text[first_split_point:second_split_point],\n",
+    "        text[second_split_point:],\n",
+    "    )\n",
+    "\n",
     "\n",
     "# Apply the split_in_three function to the 'notes' column\n",
-    "grouped_data[['notes_part1', 'notes_part2', 'notes_part3']] = grouped_data['notes'].apply(lambda x: split_in_three(x)).apply(pd.Series)\n",
-    "\n"
+    "grouped_data[[\"notes_part1\", \"notes_part2\", \"notes_part3\"]] = (\n",
+    "    grouped_data[\"notes\"].apply(lambda x: split_in_three(x)).apply(pd.Series)\n",
+    ")"
    ]
   },
   {
@@ -455,10 +481,15 @@
     }
    ],
    "source": [
-    "m = grouped_data[grouped_data['event_type']!='Protests'].explore(\n",
-    "    column=\"nrEvents\", zoom_start=5.1, marker_kwds={\"radius\": 5}, \n",
-    "    vmin=1, vmax=50, cmap ='viridis', tooltip=['event_date','location','notes_part1', 'notes_part2', 'notes_part3'],\n",
-    "    tooltip_kwds={\"aliases\": ['date','location','details', '', '']}\n",
+    "m = grouped_data[grouped_data[\"event_type\"] != \"Protests\"].explore(\n",
+    "    column=\"nrEvents\",\n",
+    "    zoom_start=5.1,\n",
+    "    marker_kwds={\"radius\": 5},\n",
+    "    vmin=1,\n",
+    "    vmax=50,\n",
+    "    cmap=\"viridis\",\n",
+    "    tooltip=[\"event_date\", \"location\", \"notes_part1\", \"notes_part2\", \"notes_part3\"],\n",
+    "    tooltip_kwds={\"aliases\": [\"date\", \"location\", \"details\", \"\", \"\"]},\n",
     ")\n",
     "m"
    ]
@@ -480,8 +511,12 @@
    },
    "outputs": [],
    "source": [
-    "conflict_by_country = analysis.get_acled_by_group(strait_of_hormuz_attacks, ['country'], freq='W')\n",
-    "conflict_by_location = analysis.get_acled_by_group(strait_of_hormuz_attacks, ['location'], freq='W')"
+    "conflict_by_country = analysis.get_acled_by_group(\n",
+    "    strait_of_hormuz_attacks, [\"country\"], freq=\"W\"\n",
+    ")\n",
+    "conflict_by_location = analysis.get_acled_by_group(\n",
+    "    strait_of_hormuz_attacks, [\"location\"], freq=\"W\"\n",
+    ")"
    ]
   },
   {
@@ -548,7 +583,6 @@
     "from bokeh.plotting import show, output_notebook\n",
     "import bokeh\n",
     "from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS\n",
-    "from bokeh.models import Panel, Tabs\n",
     "from bokeh.palettes import Category10\n",
     "from datetime import datetime\n",
     "\n",
@@ -563,12 +597,12 @@
     "}\n",
     "measure_colors = {\"nrEvents\": \"#4E79A7\", \"nrFatalities\": \"#F28E2B\"}\n",
     "\n",
-    "measure = 'nrEvents'\n",
+    "measure = \"nrEvents\"\n",
     "location_types = list(conflict_by_location[\"location\"].unique())\n",
     "\n",
     "if len(location_types) < 10:\n",
-    "        # If there are fewer than 10 event types, randomly pick that many colors from Category10[10]\n",
-    "        colors = random.sample(Category10[10], num_event_types)\n",
+    "    # If there are fewer than 10 event types, randomly pick that many colors from Category10[10]\n",
+    "    colors = random.sample(Category10[10], num_event_types)\n",
     "else:\n",
     "    # If there are 10 or more event types, use Category10[10] directly\n",
     "    colors = Category10[10]\n",
@@ -581,9 +615,9 @@
     "        date_column=\"event_date\",\n",
     "        categories=location_types,\n",
     "        measure=measure,\n",
-    "        category_column='location',\n",
+    "        category_column=\"location\",\n",
     "        colors=colors,\n",
-    "        #events_dict=events_dict\n",
+    "        # events_dict=events_dict\n",
     "    )\n",
     ")"
    ]