Skip to content

Commit

Permalink
🥅 Do some more error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
jh0ker committed Nov 2, 2023
1 parent 97e1330 commit e0cd84e
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 43 deletions.
4 changes: 2 additions & 2 deletions ddj_cloud/scrapers/talsperren/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_data(
T2 = TypeVar("T2")


def skip_errors_and_none(
def apply_guarded(
func: Callable[[T2], Optional[T1]],
data: Iterable[T2],
) -> Generator[T1, None, None]:
Expand All @@ -49,6 +49,6 @@ def skip_errors_and_none(
if result is not None:
yield result
except Exception as e:
print("Skipping reservoir due to error:")
print("Skipping due to error:")
print(e)
sentry_sdk.capture_exception(e)
12 changes: 5 additions & 7 deletions ddj_cloud/scrapers/talsperren/federations/agger.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime as dt
from typing import Generator, Iterable

import requests

from ..common import ReservoirRecord, Federation, TZ_UTC
from ..common import ReservoirRecord, Federation, TZ_UTC, apply_guarded


class AggerFederation(Federation):
Expand Down Expand Up @@ -46,9 +47,6 @@ def _get_reservoir_records(self, name: str) -> list[ReservoirRecord]:
if row[value_idx] >= 0 # Negative values seem to be errors
]

def get_data(self, **kwargs) -> list[ReservoirRecord]:
return [
record
for name in self.reservoirs.keys()
for record in self._get_reservoir_records(name)
]
def get_data(self, **kwargs) -> Iterable[ReservoirRecord]:
for records in apply_guarded(self._get_reservoir_records, self.reservoirs.keys()):
yield from records
16 changes: 8 additions & 8 deletions ddj_cloud/scrapers/talsperren/federations/eifel_rur.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import datetime as dt
from typing import Optional
from typing import Iterable, Optional

import bs4
import requests

from ..common import ReservoirRecord, Federation, TZ_BERLIN
from ..common import ReservoirRecord, Federation, TZ_BERLIN, apply_guarded


class EifelRurFederation(Federation):
Expand Down Expand Up @@ -95,9 +95,9 @@ def get_data(
*,
start: Optional[dt.datetime] = None,
end: Optional[dt.datetime] = None,
) -> list[ReservoirRecord]:
return [
record
for name in self.reservoirs.keys()
for record in self._get_reservoir_records(name, start, end)
]
) -> Iterable[ReservoirRecord]:
for records in apply_guarded(
lambda name: self._get_reservoir_records(name, start=start, end=end),
self.reservoirs.keys(),
):
yield from records
13 changes: 5 additions & 8 deletions ddj_cloud/scrapers/talsperren/federations/gelsenwasser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Generator
from typing import Generator, Iterable
import re

import bs4
import dateparser
import requests
import sentry_sdk

from ..common import ReservoirRecord, Federation, TZ_BERLIN
from ..common import ReservoirRecord, Federation, TZ_BERLIN, apply_guarded


class GelsenwasserFederation(Federation):
Expand Down Expand Up @@ -76,9 +76,6 @@ def _get_reservoir_records(
content_mio_m3,
)

def get_data(self, **kwargs) -> list[ReservoirRecord]:
return [
record
for name in self.reservoirs.keys()
for record in self._get_reservoir_records(name)
]
def get_data(self, **kwargs) -> Iterable[ReservoirRecord]:
for records in apply_guarded(self._get_reservoir_records, self.reservoirs.keys()):
yield from records
4 changes: 2 additions & 2 deletions ddj_cloud/scrapers/talsperren/federations/ruhr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import requests
import re

from ..common import ReservoirRecord, Federation, TZ_BERLIN, skip_errors_and_none
from ..common import ReservoirRecord, Federation, TZ_BERLIN, apply_guarded


class RuhrFederation(Federation):
Expand Down Expand Up @@ -78,4 +78,4 @@ def get_data(self, **kwargs) -> Iterable[ReservoirRecord]:

coord_divs: bs4.ResultSet[bs4.Tag] = coords_div.find_all("div", recursive=False) # type: ignore

return skip_errors_and_none(self._parse_coord_div, coord_divs)
return apply_guarded(self._parse_coord_div, coord_divs)
13 changes: 5 additions & 8 deletions ddj_cloud/scrapers/talsperren/federations/wahnbach.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import Generator
from typing import Generator, Iterable
import re

import bs4
import dateparser
import requests

from ..common import ReservoirRecord, Federation, TZ_BERLIN
from ..common import ReservoirRecord, Federation, TZ_BERLIN, apply_guarded


class WahnbachReservoirFederation(Federation):
Expand Down Expand Up @@ -82,12 +82,9 @@ def _get_reservoir_records(
# fill_ratio=percentage / 100.0,
)

def get_data(self, **kwargs) -> list[ReservoirRecord]:
return [
record
for name in self.reservoirs.keys()
for record in self._get_reservoir_records(name)
]
def get_data(self, **kwargs) -> Iterable[ReservoirRecord]:
for records in apply_guarded(self._get_reservoir_records, self.reservoirs.keys()):
yield from records


"""
Expand Down
12 changes: 5 additions & 7 deletions ddj_cloud/scrapers/talsperren/federations/wupper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime as dt
from typing import Iterable

import requests

from ..common import TZ_UTC, ReservoirRecord, Federation
from ..common import TZ_UTC, ReservoirRecord, Federation, apply_guarded


class WupperFederation(Federation):
Expand Down Expand Up @@ -113,9 +114,6 @@ def _get_reservoir_records(self, name: str) -> list[ReservoirRecord]:
if row[value_idx] is not None
]

def get_data(self, **kwargs) -> list[ReservoirRecord]:
return [
record
for name in self.reservoirs.keys()
for record in self._get_reservoir_records(name)
]
def get_data(self, **kwargs) -> Iterable[ReservoirRecord]:
for records in apply_guarded(self._get_reservoir_records, self.reservoirs.keys()):
yield from records
8 changes: 7 additions & 1 deletion ddj_cloud/scrapers/talsperren/talsperren.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pandas as pd
import datetime as dt
import sentry_sdk

from ddj_cloud.utils.storage import (
DownloadFailedException,
Expand Down Expand Up @@ -37,7 +38,12 @@ def run():
# Get data from all federations
data = []
for federation in federations:
data.extend(federation.get_data(start=start))
try:
data.extend(federation.get_data(start=start))
except Exception as e:
print("Skipping federation due to error:")
print(e)
sentry_sdk.capture_exception(e)

# Parse into data frame
df_new = pd.DataFrame(data)
Expand Down

0 comments on commit e0cd84e

Please sign in to comment.