Skip to content

Commit

Permalink
Extract update hook running in a class (#218).
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Aug 14, 2023
1 parent 7826842 commit 47b1aa7
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 221 deletions.
80 changes: 80 additions & 0 deletions src/reader/_types.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
from __future__ import annotations

import logging
from collections.abc import Callable
from collections.abc import Iterable
from collections.abc import Mapping
from collections.abc import Sequence
from dataclasses import dataclass
from dataclasses import field
from datetime import datetime
from datetime import timezone
from functools import cached_property
from types import MappingProxyType
from types import SimpleNamespace
from typing import Any
from typing import Generic
from typing import get_args
from typing import Literal
from typing import NamedTuple
from typing import TypeVar
from typing import Union

from ._hash_utils import get_hash
from .exceptions import SingleUpdateHookError
from .exceptions import UpdateHookError
from .exceptions import UpdateHookErrorGroup
from .types import _entry_argument
from .types import _feed_argument
from .types import _namedtuple_compat
Expand All @@ -25,12 +32,15 @@
from .types import Entry
from .types import EntryAddedBy
from .types import EntryInput
from .types import EntryUpdateStatus
from .types import ExceptionInfo
from .types import Feed
from .types import FeedInput
from .types import TagFilterInput
from .types import TristateFilterInput

log = logging.getLogger("reader")

# Private API
# https://github.com/lemon24/reader/issues/111

Expand Down Expand Up @@ -556,3 +566,73 @@ def fix_datetime_tzinfo(
assert _old is False or value.tzinfo == _old, value
kwargs[name] = value.replace(tzinfo=_new)
return obj._replace(**kwargs)


UpdateHook = Callable[..., None]
AfterEntryUpdateHook = Callable[[_T, EntryData, EntryUpdateStatus], None]
FeedUpdateHook = Callable[[_T, str], None]
FeedsUpdateHook = Callable[[_T], None]
UpdateHookType = Literal[
'before_feeds_update',
'before_feed_update',
'after_entry_update',
'after_feed_update',
'after_feeds_update',
]


@dataclass(frozen=True)
class UpdateHooks(Generic[_T]):
target: _T
before_feeds_update: list[FeedsUpdateHook[_T]] = field(default_factory=list)
before_feed_update: list[FeedUpdateHook[_T]] = field(default_factory=list)
after_entry_update: list[AfterEntryUpdateHook[_T]] = field(default_factory=list)
after_feed_update: list[FeedUpdateHook[_T]] = field(default_factory=list)
after_feeds_update: list[FeedsUpdateHook[_T]] = field(default_factory=list)

def run(
self, when: UpdateHookType, resource_id: tuple[str, ...] | None, *args: Any
) -> None:
for hook in getattr(self, when):
try:
hook(self.target, *args)
except Exception as e:
raise SingleUpdateHookError(when, hook, resource_id) from e

def group(self, message: str) -> _UpdateHookErrorGrouper:
return _UpdateHookErrorGrouper(self, message)


@dataclass(frozen=True)
class _UpdateHookErrorGrouper:
hooks: UpdateHooks[Any]
message: str
exceptions: list[UpdateHookError] = field(default_factory=list)
seen_dedupe_keys: set[Any] = field(default_factory=set)

def run(
self,
when: UpdateHookType,
resource_id: tuple[str, ...] | None,
*args: Any,
limit: int = 0,
) -> None:
for hook in getattr(self.hooks, when):
try:
hook(self.hooks.target, *args)
except Exception as e:
exc = SingleUpdateHookError(when, hook, resource_id)
exc.__cause__ = e
self.add(exc, resource_id, limit)

def add(self, exc: UpdateHookError, dedupe_key: Any = None, limit: int = 0) -> None:
if limit and dedupe_key not in self.seen_dedupe_keys: # pragma: no cover
if len(self.seen_dedupe_keys) >= limit:
log.error("too many hook errors; discarding exception", exc_info=exc)
return
self.seen_dedupe_keys.add(dedupe_key)
self.exceptions.append(exc)

def close(self) -> None:
if self.exceptions:
raise UpdateHookErrorGroup(self.message, self.exceptions)
69 changes: 16 additions & 53 deletions src/reader/_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@
from ._types import FeedForUpdate
from ._types import FeedUpdateIntent
from ._types import ParsedFeed
from ._types import UpdateHooks
from ._utils import count_consumed
from ._utils import PrefixLogger
from .exceptions import FeedNotFoundError
from .exceptions import ParseError
from .exceptions import SingleUpdateHookError
from .exceptions import UpdateError
from .exceptions import UpdateHookError
from .exceptions import UpdateHookErrorGroup
from .types import EntryUpdateStatus
from .types import ExceptionInfo
from .types import UpdatedFeed
Expand Down Expand Up @@ -321,20 +319,19 @@ class Pipeline:

storage: Storage
parser: Parser
hooks: UpdateHooks[Any]
now: Callable[[], datetime]
map: MapFunction[Any, Any]
# for hooks' usage *only*
reader: Reader
decider = Decider

@classmethod
def from_reader(cls, reader: Reader, map: MapFunction[Any, Any]) -> Pipeline:
return cls(
storage=reader._storage,
parser=reader._parser,
hooks=reader._update_hooks,
now=reader._now,
map=map,
reader=reader,
)

def update(self, filter_options: FeedFilterOptions) -> Iterable[UpdateResult]:
Expand Down Expand Up @@ -445,15 +442,7 @@ def update_feed(
feed: FeedUpdateIntent | None,
entries: Iterable[EntryUpdateIntent],
) -> tuple[int, int]:
# FIXME: log hook errors; cannot warn, because they'd be raised

for feed_hook in self.reader.before_feed_update_hooks:
try:
feed_hook(self.reader, url)
except Exception as e:
raise SingleUpdateHookError(
'before_feed_update', feed_hook, (url,)
) from e
self.hooks.run('before_feed_update', (url,), url)

if feed:
if entries:
Expand All @@ -463,53 +452,27 @@ def update_feed(
# if feed_for_update.url != parsed_feed.feed.url, the feed was redirected.
# TODO: Maybe handle redirects somehow else (e.g. change URL if permanent).

hook_errors = self.hooks.group("got unexpected after-update hook errors")

new_count = 0
updated_count = 0
entry_hook_errors: dict[tuple[str, str], list[UpdateHookError]] = {}
for entry in entries:
if entry.new:
new_count += 1
entry_status = EntryUpdateStatus.NEW
else:
updated_count += 1
entry_status = EntryUpdateStatus.MODIFIED
for entry_hook in self.reader.after_entry_update_hooks:
try:
entry_hook(self.reader, entry.entry, entry_status)
except Exception as e:
resource_id = entry.entry.resource_id
if (
resource_id not in entry_hook_errors
and len(entry_hook_errors) >= 5
): # pragma: no cover
log.exception(
"update feed %r: more than 5 entries had hook errors; "
"discarding exception for hook %r and entry %r",
url,
entry_hook,
resource_id,
)
continue
# FIXME: remove the "pragma: no cover" above
exc = SingleUpdateHookError(
'after_entry_update', entry_hook, resource_id
)
exc.__cause__ = e
entry_hook_errors.setdefault(resource_id, []).append(exc)

hook_errors = [e for es in entry_hook_errors.values() for e in es]

for feed_hook in self.reader.after_feed_update_hooks:
try:
feed_hook(self.reader, url)
except Exception as e:
exc = SingleUpdateHookError('after_feed_update', feed_hook, (url,))
exc.__cause__ = e
hook_errors.append(exc)

if hook_errors:
raise UpdateHookErrorGroup(
"got unexpected after-update hook errors", hook_errors

hook_errors.run(
'after_entry_update',
entry.entry.resource_id,
entry.entry,
entry_status,
limit=5,
)

hook_errors.run('after_feed_update', (url,), url)
hook_errors.close()

return new_count, updated_count
Loading

0 comments on commit 47b1aa7

Please sign in to comment.