From 8dbb478965df2d6776cd4cfbe11e1e71bf319e61 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:59:44 +0000 Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v4.6.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.5.0...v4.6.0) - [github.com/igorshubovych/markdownlint-cli: v0.39.0 → v0.41.0](https://github.com/igorshubovych/markdownlint-cli/compare/v0.39.0...v0.41.0) - [github.com/codespell-project/codespell: v2.2.6 → v2.3.0](https://github.com/codespell-project/codespell/compare/v2.2.6...v2.3.0) - [github.com/astral-sh/ruff-pre-commit: v0.3.3 → v0.6.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.3.3...v0.6.7) --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3474f25..024df10 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -14,7 +14,7 @@ repos: args: [--allow-missing-credentials] - id: detect-private-key - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.39.0 + rev: v0.41.0 hooks: - id: markdownlint name: Markdownlint @@ -24,13 +24,13 @@ repos: "--disable=MD033", # no-inline-html ] - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell name: codespell description: Checks for common misspellings in text files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.3 + rev: v0.6.7 hooks: - id: ruff types_or: [python, pyi, jupyter] From fb44f0ba0c1291b5c8b8c0a24d9dd7ddd65be5d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Sep 2024 23:00:08 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/ISSUE_TEMPLATE/project_setup.yaml | 1 - .../aviation-trends/aviation_update.ipynb | 939 ++++++++++++------ 2 files changed, 618 insertions(+), 322 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/project_setup.yaml b/.github/ISSUE_TEMPLATE/project_setup.yaml index 21ae525..9196586 100644 --- a/.github/ISSUE_TEMPLATE/project_setup.yaml +++ b/.github/ISSUE_TEMPLATE/project_setup.yaml @@ -46,4 +46,3 @@ body: attributes: label: Project Assignment description: Please provide a summary of the project and assignment. - diff --git a/notebooks/aviation-trends/aviation_update.ipynb b/notebooks/aviation-trends/aviation_update.ipynb index e9a2afc..388afed 100644 --- a/notebooks/aviation-trends/aviation_update.ipynb +++ b/notebooks/aviation-trends/aviation_update.ipynb @@ -20,8 +20,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n" + "import matplotlib.pyplot as plt" ] }, { @@ -42,10 +41,13 @@ "outputs": [], "source": [ "from datetime import datetime, timedelta\n", + "\n", + "\n", "def daterange(start_date, end_date):\n", " for n in range(int((end_date - start_date).days) + 1):\n", " yield (start_date + timedelta(n)).strftime(\"%Y-%m-%d\")\n", "\n", + "\n", "# Define the start and end dates\n", "start_date = datetime(2024, 6, 7)\n", "end_date = datetime(2024, 6, 9)" @@ -62,7 +64,7 @@ "outputs": [], "source": [ "# Define empty dataset to concat all the arrivals and departures\n", - "#departures = pd.DataFrame()" + "# departures = pd.DataFrame()" ] }, { @@ -142,6 +144,7 @@ "source": [ "import ast\n", "\n", + "\n", "def safe_literal_eval(value):\n", " if isinstance(value, str):\n", " try:\n", @@ -150,16 +153,19 @@ " return value\n", " return value\n", "\n", + "\n", "def explode(flights):\n", - " flights['arrival'] = flights['arrival'].apply(safe_literal_eval)\n", - " flights['departure'] = flights['departure'].apply(safe_literal_eval)\n", - " \n", - " fr1 = pd.json_normalize(flights['arrival']).add_suffix('_arr')\n", - " fr2 = pd.json_normalize(flights['departure']).add_suffix('_dep')\n", + " flights[\"arrival\"] = flights[\"arrival\"].apply(safe_literal_eval)\n", + " flights[\"departure\"] = flights[\"departure\"].apply(safe_literal_eval)\n", + "\n", + " fr1 = pd.json_normalize(flights[\"arrival\"]).add_suffix(\"_arr\")\n", + " fr2 = pd.json_normalize(flights[\"departure\"]).add_suffix(\"_dep\")\n", "\n", - " flights_exploded = pd.concat([flights.drop(columns=['arrival', 'departure']), fr1, fr2], axis=1)\n", + " flights_exploded = pd.concat(\n", + " [flights.drop(columns=[\"arrival\", \"departure\"]), fr1, fr2], axis=1\n", + " )\n", "\n", - " return flights_exploded\n" + " return flights_exploded" ] }, { @@ -174,55 +180,88 @@ "source": [ "from bokeh.plotting import figure, show\n", "from bokeh.io import output_notebook\n", - "from bokeh.models import ColumnDataSource, DatetimeTickFormatter, FixedTicker, HoverTool, CustomJS, Label, Span\n", + "from bokeh.models import (\n", + " ColumnDataSource,\n", + " DatetimeTickFormatter,\n", + " FixedTicker,\n", + " HoverTool,\n", + " Label,\n", + " Span,\n", + ")\n", "from bokeh.palettes import Spectral6\n", "\n", - "def get_area_plot(df, title, source_text, acled_events_daily, reindex_freq='D', events={}):\n", "\n", - " complete_date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max(), freq=reindex_freq)\n", + "def get_area_plot(\n", + " df, title, source_text, acled_events_daily, reindex_freq=\"D\", events={}\n", + "):\n", + " complete_date_range = pd.date_range(\n", + " start=df[\"flight_date\"].min(), end=df[\"flight_date\"].max(), freq=reindex_freq\n", + " )\n", " # Pivot the data to have flight_status as columns\n", - " df_pivot = df.pivot_table(index='flight_date', columns='flight_status', values='iata_arr', aggfunc='sum')\n", + " df_pivot = df.pivot_table(\n", + " index=\"flight_date\", columns=\"flight_status\", values=\"iata_arr\", aggfunc=\"sum\"\n", + " )\n", " df_reindexed = df_pivot.reindex(complete_date_range).fillna(0)\n", - " df_reindexed = df_reindexed.reset_index().rename(columns={'index': 'flight_date'})\n", - " \n", + " df_reindexed = df_reindexed.reset_index().rename(columns={\"index\": \"flight_date\"})\n", "\n", - " df['total_flights'] = df.groupby('flight_date')['iata_arr'].transform('sum')\n", - " max_flight = df['total_flights'].max() + 50\n", + " df[\"total_flights\"] = df.groupby(\"flight_date\")[\"iata_arr\"].transform(\"sum\")\n", + " max_flight = df[\"total_flights\"].max() + 50\n", "\n", " # Create a ColumnDataSource\n", " source = ColumnDataSource(df_reindexed)\n", "\n", " # Create a Bokeh figure\n", - " p = figure(x_axis_type='datetime', height=600, width=800, title=title,\n", - " toolbar_location=None, tools=\"\", x_axis_label='Flight Date', y_axis_label='Nr Flights')\n", + " p = figure(\n", + " x_axis_type=\"datetime\",\n", + " height=600,\n", + " width=800,\n", + " title=title,\n", + " toolbar_location=None,\n", + " tools=\"\",\n", + " x_axis_label=\"Flight Date\",\n", + " y_axis_label=\"Nr Flights\",\n", + " )\n", "\n", " # Create a stacked area plot using varea_stack\n", - " status_list = list(df['flight_status'].unique())\n", - " colors = Spectral6[:len(status_list)] # Adjust the color palette to the number of flight statuses\n", - "\n", - "\n", - " p.varea_stack(stackers=status_list, x='flight_date', color=colors, source=source, legend_label=status_list)\n", - " p.line(x=acled_events_daily['event_date'], y=acled_events_daily['nrEvents'], line_width=2, line_color='black', legend_label='Number of Conflict Events w/o Protests')\n", - "\n", + " status_list = list(df[\"flight_status\"].unique())\n", + " colors = Spectral6[\n", + " : len(status_list)\n", + " ] # Adjust the color palette to the number of flight statuses\n", + "\n", + " p.varea_stack(\n", + " stackers=status_list,\n", + " x=\"flight_date\",\n", + " color=colors,\n", + " source=source,\n", + " legend_label=status_list,\n", + " )\n", + " p.line(\n", + " x=acled_events_daily[\"event_date\"],\n", + " y=acled_events_daily[\"nrEvents\"],\n", + " line_width=2,\n", + " line_color=\"black\",\n", + " legend_label=\"Number of Conflict Events w/o Protests\",\n", + " )\n", "\n", " # Customize the plot\n", " p.y_range.start = 0\n", " p.y_range.end = max_flight\n", "\n", " # Format x-axis to show only the first day of each month\n", - " p.xaxis.formatter = DatetimeTickFormatter(\n", - " months=\"%b %Y\",\n", - " days=\"%d %b %Y\"\n", - " )\n", + " p.xaxis.formatter = DatetimeTickFormatter(months=\"%b %Y\", days=\"%d %b %Y\")\n", "\n", " # Limit the number of ticks by using FixedTicker\n", - " date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max(), freq='W')\n", + " date_range = pd.date_range(\n", + " start=df[\"flight_date\"].min(), end=df[\"flight_date\"].max(), freq=\"W\"\n", + " )\n", " ticks = [pd.to_datetime(date).timestamp() * 1000 for date in date_range]\n", "\n", - " #complete_date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max())\n", + " # complete_date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max())\n", "\n", " # Only keep a few ticks for clarity\n", - " p.xaxis.ticker = FixedTicker(ticks=ticks[::2]) # Adjust the slicing (e.g., [::2], [::3]) for more or fewer ticks\n", + " p.xaxis.ticker = FixedTicker(\n", + " ticks=ticks[::2]\n", + " ) # Adjust the slicing (e.g., [::2], [::3]) for more or fewer ticks\n", "\n", " # Rotate x-axis labels for better readability\n", " p.xaxis.major_label_orientation = 1.2\n", @@ -231,35 +270,59 @@ " p.legend.click_policy = \"mute\"\n", "\n", " hover = p.select(dict(type=HoverTool))\n", - " hover.tooltips = [(\"Flight Date\", \"@flight_date{%F}\"), (\"Status\", \"$name\"), (\"Value\", \"@$name\")]\n", - " hover.formatters = {'@flight_date': 'datetime'}\n", - "\n", - " #p.legend.title = 'Flight Status'\n", - " p.legend.location = 'top_left'\n", - " p.legend.orientation = 'horizontal'\n", - "\n", - " source_label = Label(x=0, y=0, x_units='screen', y_units='screen',\n", - " text=source_text, \n", - " text_font_size='10pt', text_color='gray')\n", + " hover.tooltips = [\n", + " (\"Flight Date\", \"@flight_date{%F}\"),\n", + " (\"Status\", \"$name\"),\n", + " (\"Value\", \"@$name\"),\n", + " ]\n", + " hover.formatters = {\"@flight_date\": \"datetime\"}\n", + "\n", + " # p.legend.title = 'Flight Status'\n", + " p.legend.location = \"top_left\"\n", + " p.legend.orientation = \"horizontal\"\n", + "\n", + " source_label = Label(\n", + " x=0,\n", + " y=0,\n", + " x_units=\"screen\",\n", + " y_units=\"screen\",\n", + " text=source_text,\n", + " text_font_size=\"10pt\",\n", + " text_color=\"gray\",\n", + " )\n", "\n", " # Add the label to the plot\n", - " p.add_layout(source_label, 'below')\n", + " p.add_layout(source_label, \"below\")\n", "\n", " for event_date, event_desc in events.items():\n", " # Convert the event_date string to a datetime object\n", " event_date_dt = pd.to_datetime(event_date)\n", - " \n", + "\n", " # Create a Span object (vertical line)\n", - " vline = Span(location=event_date_dt.timestamp() * 1000, dimension='height', line_color='grey', line_width=1, line_dash='dashed')\n", + " vline = Span(\n", + " location=event_date_dt.timestamp() * 1000,\n", + " dimension=\"height\",\n", + " line_color=\"grey\",\n", + " line_width=1,\n", + " line_dash=\"dashed\",\n", + " )\n", " p.add_layout(vline)\n", - " \n", + "\n", " # Add a label near the vertical line\n", - " label = Label(x=event_date_dt.timestamp() * 1000, y=df[df['flight_date']==event_date]['total_flights'].iloc[0]+150, y_units='screen', text=event_desc, text_color='grey', text_align='left', text_font_size=\"9pt\")\n", + " label = Label(\n", + " x=event_date_dt.timestamp() * 1000,\n", + " y=df[df[\"flight_date\"] == event_date][\"total_flights\"].iloc[0] + 150,\n", + " y_units=\"screen\",\n", + " text=event_desc,\n", + " text_color=\"grey\",\n", + " text_align=\"left\",\n", + " text_font_size=\"9pt\",\n", + " )\n", " p.add_layout(label)\n", "\n", " return p\n", " # Show the plot\n", - " #show(p)" + " # show(p)" ] }, { @@ -272,10 +335,14 @@ }, "outputs": [], "source": [ - "departures = pd.concat([pd.read_csv('../../data/aviation/aviationstack_bey_14082024_01012024_dep.csv'), \n", - " pd.read_csv('../../data/aviation/aviationstack_bey_25082024_15082024_dep.csv'),\n", - " pd.read_csv('../../data/aviation/aviationstack_bey_25082024_01012024_dep2.csv')])\n", - "departures.drop(columns='Unnamed: 0', inplace=True)\n", + "departures = pd.concat(\n", + " [\n", + " pd.read_csv(\"../../data/aviation/aviationstack_bey_14082024_01012024_dep.csv\"),\n", + " pd.read_csv(\"../../data/aviation/aviationstack_bey_25082024_15082024_dep.csv\"),\n", + " pd.read_csv(\"../../data/aviation/aviationstack_bey_25082024_01012024_dep2.csv\"),\n", + " ]\n", + ")\n", + "departures.drop(columns=\"Unnamed: 0\", inplace=True)\n", "departures.reset_index(drop=True, inplace=True)" ] }, @@ -316,18 +383,24 @@ "\n", "df = departures\n", "# Assuming df is your DataFrame and 'flight_date' is the column with the dates\n", - "df['flight_date'] = pd.to_datetime(df['flight_date']) # Ensure 'flight_date' is in datetime format\n", + "df[\"flight_date\"] = pd.to_datetime(\n", + " df[\"flight_date\"]\n", + ") # Ensure 'flight_date' is in datetime format\n", "\n", "# Generate the complete date range from the minimum to the maximum date\n", - "complete_date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max())\n", + "complete_date_range = pd.date_range(\n", + " start=df[\"flight_date\"].min(), end=df[\"flight_date\"].max()\n", + ")\n", "\n", "# Find missing dates by comparing the complete date range with the dates in the DataFrame\n", - "missing_dates = complete_date_range.difference(df['flight_date'])\n", + "missing_dates = complete_date_range.difference(df[\"flight_date\"])\n", "\n", "if missing_dates.empty:\n", " print(\"All dates are present.\")\n", "else:\n", - " print(f\"The following dates are not available in the data. The reasons are unknown.: {missing_dates}\")\n" + " print(\n", + " f\"The following dates are not available in the data. The reasons are unknown.: {missing_dates}\"\n", + " )" ] }, { @@ -355,9 +428,9 @@ } ], "source": [ - "beginning = departures['flight_date'].min()\n", - "end = departures['flight_date'].max()\n", - "print(f'Data is available from {beginning} to {end}')" + "beginning = departures[\"flight_date\"].min()\n", + "end = departures[\"flight_date\"].max()\n", + "print(f\"Data is available from {beginning} to {end}\")" ] }, { @@ -391,12 +464,16 @@ ], "source": [ "before = departures_exploded.shape[0]\n", - "print(f'There were {before} flights before duplication check')\n", + "print(f\"There were {before} flights before duplication check\")\n", "# check for duplicate flights i.e., flights scheduled to take off at the exact same time from the same place to the same destination\n", - "departures_exploded = departures_exploded.drop_duplicates(subset=['flight_date', 'scheduled_arr', 'iata_arr', 'iata_dep', 'scheduled_dep'])\n", + "departures_exploded = departures_exploded.drop_duplicates(\n", + " subset=[\"flight_date\", \"scheduled_arr\", \"iata_arr\", \"iata_dep\", \"scheduled_dep\"]\n", + ")\n", "\n", "after = departures_exploded.shape[0]\n", - "print(f'There are {after} flights after duplication check. {before-after} flights were duplicated')" + "print(\n", + " f\"There are {after} flights after duplication check. {before-after} flights were duplicated\"\n", + ")" ] }, { @@ -423,10 +500,14 @@ }, "outputs": [], "source": [ - "acled_events_daily = pd.read_csv('../../data/conflict/acled_events_without_protests.csv')\n", - "acled_events_daily['event_date'] = acled_events_daily['event_date'].apply(lambda x: pd.to_datetime(x))\n", - "acled_events_daily = acled_events_daily[acled_events_daily['event_date']>'2024-01-01']\n", - "acled_events_daily.drop(columns =['Unnamed: 0', 'index'], inplace=True)" + "acled_events_daily = pd.read_csv(\n", + " \"../../data/conflict/acled_events_without_protests.csv\"\n", + ")\n", + "acled_events_daily[\"event_date\"] = acled_events_daily[\"event_date\"].apply(\n", + " lambda x: pd.to_datetime(x)\n", + ")\n", + "acled_events_daily = acled_events_daily[acled_events_daily[\"event_date\"] > \"2024-01-01\"]\n", + "acled_events_daily.drop(columns=[\"Unnamed: 0\", \"index\"], inplace=True)" ] }, { @@ -439,8 +520,20 @@ }, "outputs": [], "source": [ - "acled_events_daily = acled_events_daily.groupby([pd.Grouper(key='event_date', freq='D')])[['fatalities', 'nrEvents']].sum().reset_index()\n", - "acled_events_weekly = acled_events_daily.groupby([pd.Grouper(key='event_date', freq='W')])[['fatalities', 'nrEvents']].sum().reset_index()" + "acled_events_daily = (\n", + " acled_events_daily.groupby([pd.Grouper(key=\"event_date\", freq=\"D\")])[\n", + " [\"fatalities\", \"nrEvents\"]\n", + " ]\n", + " .sum()\n", + " .reset_index()\n", + ")\n", + "acled_events_weekly = (\n", + " acled_events_daily.groupby([pd.Grouper(key=\"event_date\", freq=\"W\")])[\n", + " [\"fatalities\", \"nrEvents\"]\n", + " ]\n", + " .sum()\n", + " .reset_index()\n", + ")" ] }, { @@ -499,8 +592,14 @@ ], "source": [ "# Test to see if any flight has more than one flight status assoctaed with it.\n", - "duplicate_status_test = departures_exploded.groupby(['flight_date', 'scheduled_arr', 'iata_arr', 'iata_dep', 'scheduled_dep'])[['flight_status']].count().reset_index()\n", - "duplicate_status_test[duplicate_status_test['flight_status']>1]" + "duplicate_status_test = (\n", + " departures_exploded.groupby(\n", + " [\"flight_date\", \"scheduled_arr\", \"iata_arr\", \"iata_dep\", \"scheduled_dep\"]\n", + " )[[\"flight_status\"]]\n", + " .count()\n", + " .reset_index()\n", + ")\n", + "duplicate_status_test[duplicate_status_test[\"flight_status\"] > 1]" ] }, { @@ -513,7 +612,7 @@ }, "outputs": [], "source": [ - "events = {'2024-04-14':'Temporary shutdown of\\nLebanese airspace between\\n7AM and 1PM'}" + "events = {\"2024-04-14\": \"Temporary shutdown of\\nLebanese airspace between\\n7AM and 1PM\"}" ] }, { @@ -579,11 +678,25 @@ "source": [ "output_notebook() # Display plots inline in a Jupyter notebook\n", "\n", - "complete_date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max())\n", - "df = departures_exploded.groupby(['flight_date', 'flight_status']).count()[['iata_arr']].reset_index()\n", - "df['flight_date'] = df['flight_date'].apply(lambda x: pd.to_datetime(x))\n", - "\n", - "show(get_area_plot(df, 'Daily Departures from BEY', 'Source: Flight data from AviationStack and conflict events from ACLED', acled_events_daily, events=events))\n" + "complete_date_range = pd.date_range(\n", + " start=df[\"flight_date\"].min(), end=df[\"flight_date\"].max()\n", + ")\n", + "df = (\n", + " departures_exploded.groupby([\"flight_date\", \"flight_status\"])\n", + " .count()[[\"iata_arr\"]]\n", + " .reset_index()\n", + ")\n", + "df[\"flight_date\"] = df[\"flight_date\"].apply(lambda x: pd.to_datetime(x))\n", + "\n", + "show(\n", + " get_area_plot(\n", + " df,\n", + " \"Daily Departures from BEY\",\n", + " \"Source: Flight data from AviationStack and conflict events from ACLED\",\n", + " acled_events_daily,\n", + " events=events,\n", + " )\n", + ")" ] }, { @@ -607,169 +720,169 @@ "outputs": [], "source": [ "iata_mapping = {\n", - " 'BEY': 'Beirut',\n", - " 'ADA': 'Adana',\n", - " 'ADD': 'Addis Ababa',\n", - " 'AMM': 'Amman',\n", - " 'AUH': 'Abu Dhabi',\n", - " 'BAH': 'Bahrain',\n", - " 'BER': 'Berlin',\n", - " 'BGW': 'Baghdad',\n", - " 'CAI': 'Cairo',\n", - " 'CDG': 'Paris (Charles de Gaulle)',\n", - " 'DOH': 'Doha',\n", - " 'DUS': 'Düsseldorf',\n", - " 'DXB': 'Dubai',\n", - " 'FCO': 'Rome (Fiumicino)',\n", - " 'FRA': 'Frankfurt',\n", - " 'GVA': 'Geneva',\n", - " 'IST': 'Istanbul',\n", - " 'JED': 'Jeddah',\n", - " 'KAC': 'Kuwait City',\n", - " 'KWI': 'Kuwait City',\n", - " 'LCA': 'Larnaca',\n", - " 'LHR': 'London (Heathrow)',\n", - " 'LXR': 'Luxor',\n", - " 'MAD': 'Madrid',\n", - " 'NJF': 'Najaf',\n", - " 'ORY': 'Paris (Orly)',\n", - " 'ATH': 'Athens',\n", - " 'AYT': 'Antalya',\n", - " 'BRU': 'Brussels',\n", - " 'BSR': 'Basra',\n", - " 'CMF': 'Chambéry',\n", - " 'CPH': 'Copenhagen',\n", - " 'DMM': 'Dammam',\n", - " 'EBL': 'Erbil',\n", - " 'ESB': 'Ankara',\n", - " 'IKA': 'Tehran',\n", - " 'ACC': 'Accra',\n", - " 'ADJ': 'Amman',\n", - " 'ALP': 'Aleppo',\n", - " 'BRE': 'Bremen',\n", - " 'DAM': 'Damascus',\n", - " 'ISU': 'Sulaymaniyah',\n", - " 'AKT': 'Akrotiri',\n", - " 'EVN': 'Yerevan',\n", - " 'ARN': 'Stockholm',\n", - " 'BZZ': 'Brize Norton',\n", - " 'AZI': 'Abu Dhabi (Al Bateen)',\n", - " 'ADB': 'Izmir',\n", - " 'HAM': 'Hamburg',\n", - " 'HKG': 'Hong Kong',\n", - " 'FIH': 'Kinshasa',\n", - " 'CMB': 'Colombo',\n", - " 'NCE': 'Nice',\n", - " 'MFM': 'Macau',\n", - " 'MNL': 'Manila',\n", - " 'OTP': 'Bucharest',\n", - " 'RUH': 'Riyadh',\n", - " 'LOS': 'Lagos',\n", - " 'MLA': 'Malta',\n", - " 'MRS': 'Marseille',\n", - " 'PSA': 'Pisa',\n", - " 'ISL': 'Istanbul (Sabiha Gökçen)',\n", - " 'ALG': 'Algiers',\n", - " 'SAW': 'Istanbul (Sabiha Gökçen)',\n", - " 'SHJ': 'Sharjah',\n", - " 'SSH': 'Sharm El Sheikh',\n", - " 'DWC': 'Dubai (Al Maktoum)',\n", - " 'PFO': 'Paphos',\n", - " 'MCT': 'Muscat',\n", - " 'CIA': 'Rome (Ciampino)',\n", - " 'LBG': 'Paris (Le Bourget)',\n", - " 'LEJ': 'Leipzig',\n", - " 'BIA': 'Bastia',\n", - " 'BUD': 'Budapest',\n", - " 'CHQ': 'Chania',\n", - " 'HAN': 'Hanoi',\n", - " 'AMS': 'Amsterdam',\n", - " 'VIY': 'Vigo',\n", - " 'PNH': 'Phnom Penh',\n", - " 'BRI': 'Bari',\n", - " 'EIN': 'Eindhoven',\n", - " 'NBO': 'Nairobi',\n", - " 'CEQ': 'Cannes',\n", - " 'LYS': 'Lyon',\n", - " 'KTM': 'Kathmandu',\n", - " 'SIR': 'Sion',\n", - " 'BEG': 'Belgrade',\n", - " 'INI': 'Niš',\n", - " 'RMS': 'Ramstein',\n", - " 'ISB': 'Islamabad',\n", - " 'ZAZ': 'Zaragoza',\n", - " 'HRG': 'Hurghada',\n", - " 'KGS': 'Kos',\n", - " 'MED': 'Medina',\n", - " 'MHD': 'Mashhad',\n", - " 'HER': 'Heraklion',\n", - " 'ASW': 'Aswan',\n", - " 'DUB': 'Dublin',\n", - " 'HBE': 'Alexandria',\n", - " 'ABJ': 'Abidjan',\n", - " 'CTA': 'Catania',\n", - " 'BOJ': 'Burgas',\n", - " 'LDE': 'Lourdes',\n", - " 'DEL': 'Delhi',\n", - " 'LAD': 'Luanda',\n", - " 'BJZ': 'Badajoz',\n", - " 'MIR': 'Monastir',\n", - " 'MXP': 'Milan (Malpensa)',\n", - " 'HYD': 'Hyderabad',\n", - " 'KRK': 'Krakow',\n", - " 'BJV': 'Bodrum',\n", - " 'GYD': 'Baku',\n", - " 'TLV': 'Tel Aviv',\n", - " 'WAW': 'Warsaw',\n", - " 'DLM': 'Dalaman',\n", - " 'IBZ': 'Ibiza',\n", - " 'BLQ': 'Bologna',\n", - " 'FAB': 'Farnborough',\n", - " 'FLR': 'Florence',\n", - " 'BRQ': 'Brno',\n", - " 'BJY': 'Belgrade (Batajnica)',\n", - " 'GOT': 'Gothenburg',\n", - " 'DBB': 'Dabaa',\n", - " 'CGN': 'Cologne',\n", - " 'FOG': 'Foggia',\n", - " 'FJR': 'Fujairah',\n", - " 'CFU': 'Corfu',\n", - " 'ABV': 'Abuja',\n", - " 'COV': 'Coventry',\n", - " 'BUS': 'Batumi',\n", - " 'ZRH': 'Zurich',\n", - " 'UAB': 'Incirlik',\n", - " 'ULH': 'Al Ula',\n", - " 'TBS': 'Tbilisi',\n", - " 'STN': 'London (Stansted)',\n", - " 'XJD': 'Al Udeid',\n", - " 'TEB': 'Teterboro',\n", - " 'SPX': 'Spangdahlem',\n", - " 'VAR': 'Varna',\n", - " 'LIN': 'Milan (Linate)',\n", - " 'TMP': 'Tampere',\n", - " 'VCE': 'Venice',\n", - " 'LTN': 'London (Luton)',\n", - " 'SKG': 'Thessaloniki',\n", - " 'TRN': 'Turin',\n", - " 'TUN': 'Tunis',\n", - " 'THR': 'Tehran',\n", - " 'OSR': 'Ostrava',\n", - " 'SOF': 'Sofia',\n", - " 'TRS': 'Trieste',\n", - " 'LIS': 'Lisbon',\n", - " 'JMK': 'Mykonos',\n", - " 'RHO': 'Rhodes',\n", - " 'KYE': 'Kiryat Shmona',\n", - " 'RKE': 'Roskilde',\n", - " 'RIX': 'Riga',\n", - " 'TIV': 'Tivat',\n", - " 'NAP': 'Naples',\n", - " 'OLB': 'Olbia',\n", - " 'MUH': 'Marsa Matruh',\n", - " 'VIE': 'Vienna',\n", - " 'SZX': 'Shenzhen'\n", - "}\n" + " \"BEY\": \"Beirut\",\n", + " \"ADA\": \"Adana\",\n", + " \"ADD\": \"Addis Ababa\",\n", + " \"AMM\": \"Amman\",\n", + " \"AUH\": \"Abu Dhabi\",\n", + " \"BAH\": \"Bahrain\",\n", + " \"BER\": \"Berlin\",\n", + " \"BGW\": \"Baghdad\",\n", + " \"CAI\": \"Cairo\",\n", + " \"CDG\": \"Paris (Charles de Gaulle)\",\n", + " \"DOH\": \"Doha\",\n", + " \"DUS\": \"Düsseldorf\",\n", + " \"DXB\": \"Dubai\",\n", + " \"FCO\": \"Rome (Fiumicino)\",\n", + " \"FRA\": \"Frankfurt\",\n", + " \"GVA\": \"Geneva\",\n", + " \"IST\": \"Istanbul\",\n", + " \"JED\": \"Jeddah\",\n", + " \"KAC\": \"Kuwait City\",\n", + " \"KWI\": \"Kuwait City\",\n", + " \"LCA\": \"Larnaca\",\n", + " \"LHR\": \"London (Heathrow)\",\n", + " \"LXR\": \"Luxor\",\n", + " \"MAD\": \"Madrid\",\n", + " \"NJF\": \"Najaf\",\n", + " \"ORY\": \"Paris (Orly)\",\n", + " \"ATH\": \"Athens\",\n", + " \"AYT\": \"Antalya\",\n", + " \"BRU\": \"Brussels\",\n", + " \"BSR\": \"Basra\",\n", + " \"CMF\": \"Chambéry\",\n", + " \"CPH\": \"Copenhagen\",\n", + " \"DMM\": \"Dammam\",\n", + " \"EBL\": \"Erbil\",\n", + " \"ESB\": \"Ankara\",\n", + " \"IKA\": \"Tehran\",\n", + " \"ACC\": \"Accra\",\n", + " \"ADJ\": \"Amman\",\n", + " \"ALP\": \"Aleppo\",\n", + " \"BRE\": \"Bremen\",\n", + " \"DAM\": \"Damascus\",\n", + " \"ISU\": \"Sulaymaniyah\",\n", + " \"AKT\": \"Akrotiri\",\n", + " \"EVN\": \"Yerevan\",\n", + " \"ARN\": \"Stockholm\",\n", + " \"BZZ\": \"Brize Norton\",\n", + " \"AZI\": \"Abu Dhabi (Al Bateen)\",\n", + " \"ADB\": \"Izmir\",\n", + " \"HAM\": \"Hamburg\",\n", + " \"HKG\": \"Hong Kong\",\n", + " \"FIH\": \"Kinshasa\",\n", + " \"CMB\": \"Colombo\",\n", + " \"NCE\": \"Nice\",\n", + " \"MFM\": \"Macau\",\n", + " \"MNL\": \"Manila\",\n", + " \"OTP\": \"Bucharest\",\n", + " \"RUH\": \"Riyadh\",\n", + " \"LOS\": \"Lagos\",\n", + " \"MLA\": \"Malta\",\n", + " \"MRS\": \"Marseille\",\n", + " \"PSA\": \"Pisa\",\n", + " \"ISL\": \"Istanbul (Sabiha Gökçen)\",\n", + " \"ALG\": \"Algiers\",\n", + " \"SAW\": \"Istanbul (Sabiha Gökçen)\",\n", + " \"SHJ\": \"Sharjah\",\n", + " \"SSH\": \"Sharm El Sheikh\",\n", + " \"DWC\": \"Dubai (Al Maktoum)\",\n", + " \"PFO\": \"Paphos\",\n", + " \"MCT\": \"Muscat\",\n", + " \"CIA\": \"Rome (Ciampino)\",\n", + " \"LBG\": \"Paris (Le Bourget)\",\n", + " \"LEJ\": \"Leipzig\",\n", + " \"BIA\": \"Bastia\",\n", + " \"BUD\": \"Budapest\",\n", + " \"CHQ\": \"Chania\",\n", + " \"HAN\": \"Hanoi\",\n", + " \"AMS\": \"Amsterdam\",\n", + " \"VIY\": \"Vigo\",\n", + " \"PNH\": \"Phnom Penh\",\n", + " \"BRI\": \"Bari\",\n", + " \"EIN\": \"Eindhoven\",\n", + " \"NBO\": \"Nairobi\",\n", + " \"CEQ\": \"Cannes\",\n", + " \"LYS\": \"Lyon\",\n", + " \"KTM\": \"Kathmandu\",\n", + " \"SIR\": \"Sion\",\n", + " \"BEG\": \"Belgrade\",\n", + " \"INI\": \"Niš\",\n", + " \"RMS\": \"Ramstein\",\n", + " \"ISB\": \"Islamabad\",\n", + " \"ZAZ\": \"Zaragoza\",\n", + " \"HRG\": \"Hurghada\",\n", + " \"KGS\": \"Kos\",\n", + " \"MED\": \"Medina\",\n", + " \"MHD\": \"Mashhad\",\n", + " \"HER\": \"Heraklion\",\n", + " \"ASW\": \"Aswan\",\n", + " \"DUB\": \"Dublin\",\n", + " \"HBE\": \"Alexandria\",\n", + " \"ABJ\": \"Abidjan\",\n", + " \"CTA\": \"Catania\",\n", + " \"BOJ\": \"Burgas\",\n", + " \"LDE\": \"Lourdes\",\n", + " \"DEL\": \"Delhi\",\n", + " \"LAD\": \"Luanda\",\n", + " \"BJZ\": \"Badajoz\",\n", + " \"MIR\": \"Monastir\",\n", + " \"MXP\": \"Milan (Malpensa)\",\n", + " \"HYD\": \"Hyderabad\",\n", + " \"KRK\": \"Krakow\",\n", + " \"BJV\": \"Bodrum\",\n", + " \"GYD\": \"Baku\",\n", + " \"TLV\": \"Tel Aviv\",\n", + " \"WAW\": \"Warsaw\",\n", + " \"DLM\": \"Dalaman\",\n", + " \"IBZ\": \"Ibiza\",\n", + " \"BLQ\": \"Bologna\",\n", + " \"FAB\": \"Farnborough\",\n", + " \"FLR\": \"Florence\",\n", + " \"BRQ\": \"Brno\",\n", + " \"BJY\": \"Belgrade (Batajnica)\",\n", + " \"GOT\": \"Gothenburg\",\n", + " \"DBB\": \"Dabaa\",\n", + " \"CGN\": \"Cologne\",\n", + " \"FOG\": \"Foggia\",\n", + " \"FJR\": \"Fujairah\",\n", + " \"CFU\": \"Corfu\",\n", + " \"ABV\": \"Abuja\",\n", + " \"COV\": \"Coventry\",\n", + " \"BUS\": \"Batumi\",\n", + " \"ZRH\": \"Zurich\",\n", + " \"UAB\": \"Incirlik\",\n", + " \"ULH\": \"Al Ula\",\n", + " \"TBS\": \"Tbilisi\",\n", + " \"STN\": \"London (Stansted)\",\n", + " \"XJD\": \"Al Udeid\",\n", + " \"TEB\": \"Teterboro\",\n", + " \"SPX\": \"Spangdahlem\",\n", + " \"VAR\": \"Varna\",\n", + " \"LIN\": \"Milan (Linate)\",\n", + " \"TMP\": \"Tampere\",\n", + " \"VCE\": \"Venice\",\n", + " \"LTN\": \"London (Luton)\",\n", + " \"SKG\": \"Thessaloniki\",\n", + " \"TRN\": \"Turin\",\n", + " \"TUN\": \"Tunis\",\n", + " \"THR\": \"Tehran\",\n", + " \"OSR\": \"Ostrava\",\n", + " \"SOF\": \"Sofia\",\n", + " \"TRS\": \"Trieste\",\n", + " \"LIS\": \"Lisbon\",\n", + " \"JMK\": \"Mykonos\",\n", + " \"RHO\": \"Rhodes\",\n", + " \"KYE\": \"Kiryat Shmona\",\n", + " \"RKE\": \"Roskilde\",\n", + " \"RIX\": \"Riga\",\n", + " \"TIV\": \"Tivat\",\n", + " \"NAP\": \"Naples\",\n", + " \"OLB\": \"Olbia\",\n", + " \"MUH\": \"Marsa Matruh\",\n", + " \"VIE\": \"Vienna\",\n", + " \"SZX\": \"Shenzhen\",\n", + "}" ] }, { @@ -782,11 +895,53 @@ }, "outputs": [], "source": [ - "departures_exploded['airportcity'] = departures_exploded['iata_arr'].map(iata_mapping)\n", - "most_changed_departures = pd.DataFrame(departures_exploded[departures_exploded['flight_status'].isin(['scheduled', 'cancelled', 'diverted'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)\n", - "most_cancelled_departures = pd.DataFrame(departures_exploded[departures_exploded['flight_status'].isin(['cancelled'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)\n", - "most_scheduled_departures = pd.DataFrame(departures_exploded[departures_exploded['flight_status'].isin(['scheduled'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)\n", - "most_diverted_departures = pd.DataFrame(departures_exploded[departures_exploded['flight_status'].isin(['diverted'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)" + "departures_exploded[\"airportcity\"] = departures_exploded[\"iata_arr\"].map(iata_mapping)\n", + "most_changed_departures = (\n", + " pd.DataFrame(\n", + " departures_exploded[\n", + " departures_exploded[\"flight_status\"].isin(\n", + " [\"scheduled\", \"cancelled\", \"diverted\"]\n", + " )\n", + " ][\"airportcity\"]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")\n", + "most_cancelled_departures = (\n", + " pd.DataFrame(\n", + " departures_exploded[departures_exploded[\"flight_status\"].isin([\"cancelled\"])][\n", + " \"airportcity\"\n", + " ]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")\n", + "most_scheduled_departures = (\n", + " pd.DataFrame(\n", + " departures_exploded[departures_exploded[\"flight_status\"].isin([\"scheduled\"])][\n", + " \"airportcity\"\n", + " ]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")\n", + "most_diverted_departures = (\n", + " pd.DataFrame(\n", + " departures_exploded[departures_exploded[\"flight_status\"].isin([\"diverted\"])][\n", + " \"airportcity\"\n", + " ]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")" ] }, { @@ -810,45 +965,64 @@ } ], "source": [ - "fig, axs = plt.subplots(2,2,figsize=(10,12))\n", + "fig, axs = plt.subplots(2, 2, figsize=(10, 12))\n", "ax = axs.flatten()\n", "\n", - "ax[0].barh(y='airportcity', width='count', data=most_changed_departures, color='lightblue')\n", - "ax[1].barh(y='airportcity', width='count', data=most_cancelled_departures, color='orange')\n", - "ax[2].barh(y='airportcity', width='count', data=most_scheduled_departures, color='#023436')\n", - "ax[3].barh(y='airportcity', width='count', data=most_diverted_departures, color='#00BFB3')\n", + "ax[0].barh(\n", + " y=\"airportcity\", width=\"count\", data=most_changed_departures, color=\"lightblue\"\n", + ")\n", + "ax[1].barh(\n", + " y=\"airportcity\", width=\"count\", data=most_cancelled_departures, color=\"orange\"\n", + ")\n", + "ax[2].barh(\n", + " y=\"airportcity\", width=\"count\", data=most_scheduled_departures, color=\"#023436\"\n", + ")\n", + "ax[3].barh(\n", + " y=\"airportcity\", width=\"count\", data=most_diverted_departures, color=\"#00BFB3\"\n", + ")\n", "\n", "for a in ax:\n", " a.set_xlim(0, 320)\n", - " \n", + "\n", " # Remove right and top spines\n", " for spine in a.spines.values():\n", " spine.set_visible(False)\n", - " \n", + "\n", " # Optionally, remove grid lines and ticks (if needed)\n", " a.xaxis.grid(False)\n", " a.yaxis.grid(False)\n", - " a.tick_params(axis='both', which='both', length=0)\n", + " a.tick_params(axis=\"both\", which=\"both\", length=0)\n", " a.set_xticks([]) # Remove x-axis ticks\n", "\n", " for index, value in enumerate(a.patches):\n", - " a.text(value.get_width()+20, value.get_y() + value.get_height() / 2, \n", - " f'{int(value.get_width())}', \n", - " ha='center', va='center')\n", + " a.text(\n", + " value.get_width() + 20,\n", + " value.get_y() + value.get_height() / 2,\n", + " f\"{int(value.get_width())}\",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " )\n", "\n", "# Adding titles and labels (optional)\n", - "ax[0].set_title('Most Changed Departures', loc='left')\n", - "ax[0].text(0.64, 0.99, 'Changed includes scheduled, diverted and cancelled flights', ha='center', va='center', transform=ax[0].transAxes, fontsize=9)\n", - "\n", - "ax[1].set_title('Most Cancelled Departures', loc='left')\n", - "ax[2].set_title('Most Scheduled Departures', loc='left')\n", - "ax[3].set_title('Most Diverted Departures', loc='left')\n", - "\n", - "plt.suptitle('Top 10 Destinations with Changes in Departures from BEY in 2024')\n", - "\n", - "plt.subplots_adjust(wspace=0.4) \n", - "plt.tight_layout()\n", - "\n" + "ax[0].set_title(\"Most Changed Departures\", loc=\"left\")\n", + "ax[0].text(\n", + " 0.64,\n", + " 0.99,\n", + " \"Changed includes scheduled, diverted and cancelled flights\",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " transform=ax[0].transAxes,\n", + " fontsize=9,\n", + ")\n", + "\n", + "ax[1].set_title(\"Most Cancelled Departures\", loc=\"left\")\n", + "ax[2].set_title(\"Most Scheduled Departures\", loc=\"left\")\n", + "ax[3].set_title(\"Most Diverted Departures\", loc=\"left\")\n", + "\n", + "plt.suptitle(\"Top 10 Destinations with Changes in Departures from BEY in 2024\")\n", + "\n", + "plt.subplots_adjust(wspace=0.4)\n", + "plt.tight_layout()" ] }, { @@ -924,10 +1098,27 @@ "source": [ "output_notebook() # Display plots inline in a Jupyter notebook\n", "\n", - "departures_exploded['flight_date'] = departures_exploded['flight_date'].apply(lambda x: pd.to_datetime(x))\n", - "df = departures_exploded.groupby([pd.Grouper(key='flight_date', freq='W'),'flight_status']).count()[['iata_arr']].reset_index()\n", - "\n", - "show(get_area_plot(df, 'Weekly Departures from BEY', 'Source: Flight data from AviationStack and conflict events from ACLED', acled_events_weekly, reindex_freq='W', events=events))\n" + "departures_exploded[\"flight_date\"] = departures_exploded[\"flight_date\"].apply(\n", + " lambda x: pd.to_datetime(x)\n", + ")\n", + "df = (\n", + " departures_exploded.groupby(\n", + " [pd.Grouper(key=\"flight_date\", freq=\"W\"), \"flight_status\"]\n", + " )\n", + " .count()[[\"iata_arr\"]]\n", + " .reset_index()\n", + ")\n", + "\n", + "show(\n", + " get_area_plot(\n", + " df,\n", + " \"Weekly Departures from BEY\",\n", + " \"Source: Flight data from AviationStack and conflict events from ACLED\",\n", + " acled_events_weekly,\n", + " reindex_freq=\"W\",\n", + " events=events,\n", + " )\n", + ")" ] }, { @@ -940,9 +1131,13 @@ }, "outputs": [], "source": [ - "arrivals = pd.concat([pd.read_csv('../../data/aviation/aviationstack_bey_25082024_01012024_arr2.csv'),\n", - " pd.read_csv('../../data/aviation/aviationstack_bey_26082024_01012024_arr.csv')])\n", - "arrivals.drop(columns='Unnamed: 0', inplace=True)\n", + "arrivals = pd.concat(\n", + " [\n", + " pd.read_csv(\"../../data/aviation/aviationstack_bey_25082024_01012024_arr2.csv\"),\n", + " pd.read_csv(\"../../data/aviation/aviationstack_bey_26082024_01012024_arr.csv\"),\n", + " ]\n", + ")\n", + "arrivals.drop(columns=\"Unnamed: 0\", inplace=True)\n", "arrivals.drop_duplicates(inplace=True)\n", "arrivals.reset_index(drop=True, inplace=True)" ] @@ -972,18 +1167,24 @@ "\n", "df = arrivals\n", "# Assuming df is your DataFrame and 'flight_date' is the column with the dates\n", - "df['flight_date'] = pd.to_datetime(df['flight_date']) # Ensure 'flight_date' is in datetime format\n", + "df[\"flight_date\"] = pd.to_datetime(\n", + " df[\"flight_date\"]\n", + ") # Ensure 'flight_date' is in datetime format\n", "\n", "# Generate the complete date range from the minimum to the maximum date\n", - "complete_date_range = pd.date_range(start=df['flight_date'].min(), end=df['flight_date'].max())\n", + "complete_date_range = pd.date_range(\n", + " start=df[\"flight_date\"].min(), end=df[\"flight_date\"].max()\n", + ")\n", "\n", "# Find missing dates by comparing the complete date range with the dates in the DataFrame\n", - "missing_dates = complete_date_range.difference(df['flight_date'])\n", + "missing_dates = complete_date_range.difference(df[\"flight_date\"])\n", "\n", "if missing_dates.empty:\n", " print(\"All dates are present.\")\n", "else:\n", - " print(f\"Flights for the following dates are missing in the data, reasons are unknown.: {missing_dates}\")\n" + " print(\n", + " f\"Flights for the following dates are missing in the data, reasons are unknown.: {missing_dates}\"\n", + " )" ] }, { @@ -1019,12 +1220,16 @@ ], "source": [ "before = arrivals_exploded.shape[0]\n", - "print(f'There were {before} flights before duplication check')\n", + "print(f\"There were {before} flights before duplication check\")\n", "# check for duplicate flights i.e., flights scheduled to take off at the exact same time from the same place to the same destination\n", - "arrivals_exploded = arrivals_exploded.drop_duplicates(subset=['flight_date', 'scheduled_arr', 'iata_arr', 'iata_dep', 'scheduled_dep'])\n", + "arrivals_exploded = arrivals_exploded.drop_duplicates(\n", + " subset=[\"flight_date\", \"scheduled_arr\", \"iata_arr\", \"iata_dep\", \"scheduled_dep\"]\n", + ")\n", "\n", "after = arrivals_exploded.shape[0]\n", - "print(f'There are {after} flights after duplication check. {before-after} flights were duplicated')" + "print(\n", + " f\"There are {after} flights after duplication check. {before-after} flights were duplicated\"\n", + ")" ] }, { @@ -1083,8 +1288,14 @@ ], "source": [ "# Test to see if any flight has more than one flight status assoctaed with it.\n", - "duplicate_status_test = arrivals_exploded.groupby(['flight_date', 'scheduled_arr', 'iata_arr', 'iata_dep', 'scheduled_dep'])[['flight_status']].count().reset_index()\n", - "duplicate_status_test[duplicate_status_test['flight_status']>1]" + "duplicate_status_test = (\n", + " arrivals_exploded.groupby(\n", + " [\"flight_date\", \"scheduled_arr\", \"iata_arr\", \"iata_dep\", \"scheduled_dep\"]\n", + " )[[\"flight_status\"]]\n", + " .count()\n", + " .reset_index()\n", + ")\n", + "duplicate_status_test[duplicate_status_test[\"flight_status\"] > 1]" ] }, { @@ -1150,10 +1361,22 @@ "source": [ "output_notebook() # Display plots inline in a Jupyter notebook\n", "\n", - "df = df = arrivals_exploded.groupby(['flight_date', 'flight_status']).count()[['iata_arr']].reset_index()\n", - "df['flight_date'] = df['flight_date'].apply(lambda x: pd.to_datetime(x))\n", - "\n", - "show(get_area_plot(df, 'Daily Arrivals to BEY', 'Source: Flight data from AviationStack and conflict events from ACLED', acled_events_daily, events=events))\n" + "df = df = (\n", + " arrivals_exploded.groupby([\"flight_date\", \"flight_status\"])\n", + " .count()[[\"iata_arr\"]]\n", + " .reset_index()\n", + ")\n", + "df[\"flight_date\"] = df[\"flight_date\"].apply(lambda x: pd.to_datetime(x))\n", + "\n", + "show(\n", + " get_area_plot(\n", + " df,\n", + " \"Daily Arrivals to BEY\",\n", + " \"Source: Flight data from AviationStack and conflict events from ACLED\",\n", + " acled_events_daily,\n", + " events=events,\n", + " )\n", + ")" ] }, { @@ -1229,10 +1452,27 @@ "source": [ "output_notebook() # Display plots inline in a Jupyter notebook\n", "\n", - "arrivals_exploded['flight_date'] = arrivals_exploded['flight_date'].apply(lambda x: pd.to_datetime(x))\n", - "df = arrivals_exploded.groupby([pd.Grouper(key='flight_date', freq='W'),'flight_status']).count()[['iata_arr']].reset_index()\n", - "\n", - "show(get_area_plot(df, 'Weekly Arrivals to BEY', 'Source: Flight data from AviationStack and conflict events from ACLED', acled_events_weekly, reindex_freq='W', events=events))\n" + "arrivals_exploded[\"flight_date\"] = arrivals_exploded[\"flight_date\"].apply(\n", + " lambda x: pd.to_datetime(x)\n", + ")\n", + "df = (\n", + " arrivals_exploded.groupby(\n", + " [pd.Grouper(key=\"flight_date\", freq=\"W\"), \"flight_status\"]\n", + " )\n", + " .count()[[\"iata_arr\"]]\n", + " .reset_index()\n", + ")\n", + "\n", + "show(\n", + " get_area_plot(\n", + " df,\n", + " \"Weekly Arrivals to BEY\",\n", + " \"Source: Flight data from AviationStack and conflict events from ACLED\",\n", + " acled_events_weekly,\n", + " reindex_freq=\"W\",\n", + " events=events,\n", + " )\n", + ")" ] }, { @@ -1255,11 +1495,53 @@ }, "outputs": [], "source": [ - "arrivals_exploded['airportcity'] = arrivals_exploded['iata_dep'].map(iata_mapping)\n", - "most_changed_arrivals = pd.DataFrame(arrivals_exploded[arrivals_exploded['flight_status'].isin(['scheduled', 'cancelled', 'diverted'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)\n", - "most_cancelled_arrivals = pd.DataFrame(arrivals_exploded[arrivals_exploded['flight_status'].isin(['cancelled'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)\n", - "most_scheduled_arrivals = pd.DataFrame(arrivals_exploded[arrivals_exploded['flight_status'].isin(['scheduled'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)\n", - "most_diverted_arrivals = pd.DataFrame(arrivals_exploded[arrivals_exploded['flight_status'].isin(['diverted'])]['airportcity'].value_counts().head(10)).reset_index().sort_values(by='count', ascending=True)" + "arrivals_exploded[\"airportcity\"] = arrivals_exploded[\"iata_dep\"].map(iata_mapping)\n", + "most_changed_arrivals = (\n", + " pd.DataFrame(\n", + " arrivals_exploded[\n", + " arrivals_exploded[\"flight_status\"].isin(\n", + " [\"scheduled\", \"cancelled\", \"diverted\"]\n", + " )\n", + " ][\"airportcity\"]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")\n", + "most_cancelled_arrivals = (\n", + " pd.DataFrame(\n", + " arrivals_exploded[arrivals_exploded[\"flight_status\"].isin([\"cancelled\"])][\n", + " \"airportcity\"\n", + " ]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")\n", + "most_scheduled_arrivals = (\n", + " pd.DataFrame(\n", + " arrivals_exploded[arrivals_exploded[\"flight_status\"].isin([\"scheduled\"])][\n", + " \"airportcity\"\n", + " ]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")\n", + "most_diverted_arrivals = (\n", + " pd.DataFrame(\n", + " arrivals_exploded[arrivals_exploded[\"flight_status\"].isin([\"diverted\"])][\n", + " \"airportcity\"\n", + " ]\n", + " .value_counts()\n", + " .head(10)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"count\", ascending=True)\n", + ")" ] }, { @@ -1283,45 +1565,60 @@ } ], "source": [ - "fig, axs = plt.subplots(2,2,figsize=(10,12))\n", + "fig, axs = plt.subplots(2, 2, figsize=(10, 12))\n", "ax = axs.flatten()\n", "\n", - "ax[0].barh(y='airportcity', width='count', data=most_changed_arrivals, color='lightblue')\n", - "ax[1].barh(y='airportcity', width='count', data=most_cancelled_arrivals, color='orange')\n", - "ax[2].barh(y='airportcity', width='count', data=most_scheduled_arrivals, color='#023436')\n", - "ax[3].barh(y='airportcity', width='count', data=most_diverted_arrivals, color='#00BFB3')\n", + "ax[0].barh(\n", + " y=\"airportcity\", width=\"count\", data=most_changed_arrivals, color=\"lightblue\"\n", + ")\n", + "ax[1].barh(y=\"airportcity\", width=\"count\", data=most_cancelled_arrivals, color=\"orange\")\n", + "ax[2].barh(\n", + " y=\"airportcity\", width=\"count\", data=most_scheduled_arrivals, color=\"#023436\"\n", + ")\n", + "ax[3].barh(y=\"airportcity\", width=\"count\", data=most_diverted_arrivals, color=\"#00BFB3\")\n", "\n", "for a in ax:\n", " a.set_xlim(0, 320)\n", - " \n", + "\n", " # Remove right and top spines\n", " for spine in a.spines.values():\n", " spine.set_visible(False)\n", - " \n", + "\n", " # Optionally, remove grid lines and ticks (if needed)\n", " a.xaxis.grid(False)\n", " a.yaxis.grid(False)\n", - " a.tick_params(axis='both', which='both', length=0)\n", + " a.tick_params(axis=\"both\", which=\"both\", length=0)\n", " a.set_xticks([]) # Remove x-axis ticks\n", "\n", " for index, value in enumerate(a.patches):\n", - " a.text(value.get_width()+20, value.get_y() + value.get_height() / 2, \n", - " f'{int(value.get_width())}', \n", - " ha='center', va='center')\n", + " a.text(\n", + " value.get_width() + 20,\n", + " value.get_y() + value.get_height() / 2,\n", + " f\"{int(value.get_width())}\",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " )\n", "\n", "# Adding titles and labels (optional)\n", - "ax[0].set_title('Most Changed Arrivals', loc='left')\n", - "ax[0].text(0.73, 0.99, 'Changed includes scheduled, diverted and cancelled flights', ha='center', va='center', transform=ax[0].transAxes, fontsize=9)\n", - "\n", - "ax[1].set_title('Most Cancelled Arrivals', loc='left')\n", - "ax[2].set_title('Most Scheduled Arrivals', loc='left')\n", - "ax[3].set_title('Most Diverted Arrivals', loc='left')\n", - "\n", - "plt.suptitle('Top 10 Destinations with Changes in Arrivals to BEY in 2024')\n", - "\n", - "plt.subplots_adjust(wspace=0.4) \n", - "plt.tight_layout()\n", - "\n" + "ax[0].set_title(\"Most Changed Arrivals\", loc=\"left\")\n", + "ax[0].text(\n", + " 0.73,\n", + " 0.99,\n", + " \"Changed includes scheduled, diverted and cancelled flights\",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " transform=ax[0].transAxes,\n", + " fontsize=9,\n", + ")\n", + "\n", + "ax[1].set_title(\"Most Cancelled Arrivals\", loc=\"left\")\n", + "ax[2].set_title(\"Most Scheduled Arrivals\", loc=\"left\")\n", + "ax[3].set_title(\"Most Diverted Arrivals\", loc=\"left\")\n", + "\n", + "plt.suptitle(\"Top 10 Destinations with Changes in Arrivals to BEY in 2024\")\n", + "\n", + "plt.subplots_adjust(wspace=0.4)\n", + "plt.tight_layout()" ] }, {