Skip to content

Commit

Permalink
Merge pull request #2876 from cal-itp/fetch-rt-secrets
Browse files Browse the repository at this point in the history
Fetch latest RT secrets every minute
  • Loading branch information
atvaccaro committed Aug 9, 2023
2 parents fedc12f + bc0821b commit 6ff447e
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
- name: app
image: gtfs-rt-archiver
command: ["python"]
args: ["-m", "gtfs_rt_archiver_v3.consumer", "--load-env-secrets"]
args: ["-m", "gtfs_rt_archiver_v3.consumer"]
envFrom:
- configMapRef:
name: archiver-app-vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
- name: app
image: gtfs-rt-archiver
command: ["python"]
args: ["-m", "gtfs_rt_archiver_v3.ticker", "--load-env-secrets"]
args: ["-m", "gtfs_rt_archiver_v3.ticker"]
envFrom:
- configMapRef:
name: archiver-app-vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ resources:
images:
- name: 'gtfs-rt-archiver'
newName: 'ghcr.io/cal-itp/data-infra/gtfs-rt-archiver-v3'
newTag: '2023.7.20'
newTag: '2023.8.9'
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ patches:
images:
- name: 'gtfs-rt-archiver'
newName: 'ghcr.io/cal-itp/data-infra/gtfs-rt-archiver-v3'
newTag: '2023.7.20'
newTag: '2023.8.9'
4 changes: 2 additions & 2 deletions services/gtfs-rt-archiver-v3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Code changes require building and pushing a new Docker image, as well as applyin
4. Finally, apply changes using `kubectl` as described above.
1. Currently, the image is built/pushed on merges to main but the Kubernetes manifests are not applied.

### Fixing download configurations
### Changing download configurations
GTFS download configurations (for both Schedule and RT) are sourced from the [GTFS Dataset table](https://airtable.com/appPnJWrQ7ui4UmIl/tbl5V6Vjs4mNQgYbc) in the California Transit Airtable base, and we have [specific documentation](https://docs.google.com/document/d/1IO8x9-31LjwmlBDH0Jri-uWI7Zygi_IPc9nqd7FPEQM/edit#heading=h.b2yta6yeugar) for modifying the table. (Both of these Airtable links require authentication/access to Airtable.) You may need to make URL or authentication adjustments in this table. This data is downloaded daily into our infrastructure and will propagate to the GTFS Schedule and RT downloads; you may execute the [Airtable download job](https://o1d2fa0877cf3fb10p-tp.appspot.com/dags/airtable_loader_v2/grid) manually after making edits to "deploy" the changes more quickly.

Another possible intervention is updating or adding authentication information in [Secret Manager](https://console.cloud.google.com/security/secret-manager). You may create new versions . **As of 2023-04-10 the archiver does not automatically pick up new/modified secrets; you must restart the archiver for changes to take effect.**
Another possible intervention is updating or adding authentication information in [Secret Manager](https://console.cloud.google.com/security/secret-manager). You may create new versions of existing secrets, or add entirely new secrets. Secrets must be tagged with `gtfs_rt: true` to be loaded as secrets in the archiver; secrets are refreshed every 5 minutes by the ticker.
4 changes: 2 additions & 2 deletions services/gtfs-rt-archiver-v3/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ services:

gtfs-rt-archiver-v3-ticker:
<<: *gtfs-rt-archiver-v3-common
command: python -m gtfs_rt_archiver_v3.ticker --load-env-secrets
command: python -m gtfs_rt_archiver_v3.ticker

gtfs-rt-archiver-v3-consumer:
<<: *gtfs-rt-archiver-v3-common
command: python -m gtfs_rt_archiver_v3.consumer --load-env-secrets
command: python -m gtfs_rt_archiver_v3.consumer
10 changes: 1 addition & 9 deletions services/gtfs-rt-archiver-v3/gtfs_rt_archiver_v3/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import sentry_sdk
import typer
from calitp_data_infra.auth import get_secrets_by_label # type: ignore
from huey.constants import WORKER_THREAD # type: ignore
from huey.consumer_options import ConsumerConfig # type: ignore
from prometheus_client import start_http_server
Expand Down Expand Up @@ -42,17 +41,10 @@ def set_exception_fingerprint(event, hint):
return event


def main(
port: int = int(os.getenv("CONSUMER_PROMETHEUS_PORT", 9102)),
load_env_secrets: bool = False,
):
def main(port: int = int(os.getenv("CONSUMER_PROMETHEUS_PORT", 9102))):
sentry_sdk.init(before_send=set_exception_fingerprint)
start_http_server(port)

if load_env_secrets:
for key, value in get_secrets_by_label("gtfs_rt").items():
os.environ[key] = value

config = ConsumerConfig(
workers=int(
os.getenv("CALITP_HUEY_CONSUMER_WORKERS", 16)
Expand Down
11 changes: 4 additions & 7 deletions services/gtfs-rt-archiver-v3/gtfs_rt_archiver_v3/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime
from functools import wraps
from pathlib import Path
from typing import Optional
from typing import Mapping, Optional

import humanize
import pendulum
Expand Down Expand Up @@ -98,15 +98,12 @@ def increment_task_signals_counter(
).inc()


auth_dict = None
last_fetch_file = None


@huey.on_startup()
def load_auth_dict():
global auth_dict, last_fetch_file
# TODO: this isn't ideal, we probably could store the keys from get_secrets_by_label() in consumer.py
auth_dict = os.environ
def load_globals():
global last_fetch_file
last_fetch_file = os.environ["LAST_FETCH_FILE"]


Expand All @@ -129,7 +126,7 @@ def inner(*args, **kwargs):

@huey.task(expires=int(os.getenv("CALITP_FETCH_EXPIRATION_SECONDS", 5)))
@scoped
def fetch(tick: datetime, config: GTFSDownloadConfig):
def fetch(tick: datetime, config: GTFSDownloadConfig, auth_dict: Mapping[str, str]):
labels = dict(
record_name=config.name,
record_uri=config.url,
Expand Down
34 changes: 22 additions & 12 deletions services/gtfs-rt-archiver-v3/gtfs_rt_archiver_v3/ticker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import List, Tuple
from typing import List, Mapping, Optional

import pendulum
import schedule # type: ignore
import sentry_sdk
import typer
from cachetools.func import ttl_cache
from calitp_data_infra.auth import get_secrets_by_label # type: ignore
from calitp_data_infra.storage import ( # type: ignore
GTFSDownloadConfig,
Expand All @@ -25,9 +24,12 @@
from .metrics import AIRTABLE_CONFIGURATION_AGE, TICKS
from .tasks import fetch, huey

configs: Optional[List[GTFSDownloadConfig]] = None
secrets: Optional[Mapping[str, str]] = None

@ttl_cache(ttl=300)
def get_configs() -> Tuple[pendulum.DateTime, List[GTFSDownloadConfig]]:

def get_configs():
global configs
typer.secho("pulling updated configs from airtable")
latest = get_latest(GTFSDownloadConfigExtract)
fs = get_fs()
Expand All @@ -51,22 +53,28 @@ def get_configs() -> Tuple[pendulum.DateTime, List[GTFSDownloadConfig]]:
f"found {len(configs)} configs in airtable {latest.path} {age} seconds old"
)
AIRTABLE_CONFIGURATION_AGE.set(age)
return latest.ts, configs


def get_secrets():
global secrets
start = pendulum.now()
secrets = get_secrets_by_label("gtfs_rt")
typer.secho(
f"took {(pendulum.now() - start).in_words()} to load {len(secrets)} secrets"
)


def main(
port: int = int(os.getenv("TICKER_PROMETHEUS_PORT", 9102)),
load_env_secrets: bool = False,
touch_file: Path = Path(os.environ["LAST_TICK_FILE"]),
):
assert isinstance(touch_file, Path)
sentry_sdk.init(environment=os.getenv("AIRFLOW_ENV"))
start_http_server(port)

if load_env_secrets:
for key, value in get_secrets_by_label("gtfs_rt").items():
os.environ[key] = value

get_configs()
assert configs is not None
get_secrets()
assert secrets is not None
typer.secho("flushing huey")
huey.flush()

Expand All @@ -76,12 +84,12 @@ def tick(second: int) -> None:
dt = datetime.now(timezone.utc).replace(second=second, microsecond=0)
typer.secho(f"ticking {dt}")
TICKS.inc()
extracted_at, configs = get_configs()
random.shuffle(configs)
for config in configs:
fetch(
tick=dt,
config=config,
auth_dict=secrets,
)
typer.secho(
f"took {(pendulum.now() - start).in_words()} to enqueue {len(configs)} fetches"
Expand All @@ -90,6 +98,8 @@ def tick(second: int) -> None:
schedule.every().minute.at(":00").do(tick, second=0)
schedule.every().minute.at(":20").do(tick, second=20)
schedule.every().minute.at(":40").do(tick, second=40)
schedule.every().minute.at(":45").do(get_configs)
schedule.every().minute.at(":45").do(get_secrets)

typer.secho(f"ticking starting at {pendulum.now()}!")
while True:
Expand Down
2 changes: 1 addition & 1 deletion services/gtfs-rt-archiver-v3/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gtfs-rt-archiver"
version = "2023.7.20"
version = "2023.8.9"
description = ""
authors = ["Andrew Vaccaro <[email protected]>"]

Expand Down

0 comments on commit 6ff447e

Please sign in to comment.