Skip to content

Commit

Permalink
Merge pull request #70 from openzim/update-archive-metadata
Browse files Browse the repository at this point in the history
Basic Archive management
  • Loading branch information
rgaudin authored Aug 23, 2024
2 parents 1e23ced + 3de4f75 commit bd70033
Show file tree
Hide file tree
Showing 27 changed files with 2,059 additions and 977 deletions.
20 changes: 14 additions & 6 deletions backend/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import humanfriendly
from rq import Retry

logging.basicConfig()


def determine_mandatory_environment_variables():
for variable in ("POSTGRES_URI", "S3_URL_WITH_CREDENTIALS", "PRIVATE_SALT"):
Expand Down Expand Up @@ -59,14 +61,16 @@ class BackendConf:

# Deployment
public_url: str = os.getenv("PUBLIC_URL") or "http://localhost"
# /!\ this must match the region/bucket on s3 credentials
download_url: str = (
os.getenv("DOWNLOAD_URL")
or "https://s3.us-west-1.wasabisys.com/org-kiwix-zimit/zim"
or "https://s3.eu-west-2.wasabisys.com/org-kiwix-nautilus"
)
allowed_origins = os.getenv(
"ALLOWED_ORIGINS",
"http://localhost",
).split("|")
debug: bool = bool(os.getenv("DEBUG") or "")

# Zimfarm (3rd party API creating ZIMs and calling back with feedback)
zimfarm_api_url: str = (
Expand All @@ -80,10 +84,16 @@ class BackendConf:
zimfarm_task_cpu: int = int(os.getenv("ZIMFARM_TASK_CPU") or "3")
zimfarm_task_memory: int = 0
zimfarm_task_disk: int = 0
zimfarm_callback_base_url = os.getenv("ZIMFARM_CALLBACK_BASE_URL", "")
zimfarm_callback_base_url = (
os.getenv("ZIMFARM_CALLBACK_BASE_URL") or "https://api.nautilus.openzim.org/v1"
)
zimfarm_callback_token = os.getenv("ZIMFARM_CALLBACK_TOKEN", uuid.uuid4().hex)
zimfarm_task_worker: str = os.getenv("ZIMFARM_TASK_WORKDER") or "-"
zimfarm_task_worker: str = os.getenv("ZIMFARM_TASK_WORKER") or "-"
zimfarm_request_timeout_sec: int = 10
zim_download_url: str = (
os.getenv("ZIM_DOWNLOAD_URL")
or "https://s3.us-west-1.wasabisys.com/org-kiwix-zimit"
)

# Mailgun (3rd party API to send emails)
mailgun_api_url: str = os.getenv("MAILGUN_API_URL") or ""
Expand All @@ -95,6 +105,7 @@ class BackendConf:

def __post_init__(self):
self.logger = logging.getLogger(Path(__file__).parent.name)
self.logger.setLevel(logging.DEBUG if self.debug else logging.INFO)
self.transient_storage_path.mkdir(exist_ok=True)
self.job_retry = Retry(max=self.s3_max_tries, interval=int(self.s3_retry_wait))

Expand All @@ -119,9 +130,6 @@ def __post_init__(self):
os.getenv("ZIMFARM_TASK_DISK") or "200MiB"
)

if not self.zimfarm_callback_base_url:
self.zimfarm_callback_base_url = f"{self.zimfarm_api_url}/requests/hook"


constants = BackendConf()
logger = constants.logger
20 changes: 18 additions & 2 deletions backend/api/database/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
from enum import Enum
from typing import Any, ClassVar, TypeVar
from uuid import UUID

Expand Down Expand Up @@ -36,6 +37,7 @@ class ArchiveConfig(BaseModel):
tags: list[str]
illustration: str
filename: str
main_logo: str | None = None

@classmethod
def init_with(cls: type[T], filename: str, **kwargs) -> T:
Expand All @@ -49,7 +51,8 @@ def init_with(cls: type[T], filename: str, **kwargs) -> T:
def is_ready(self) -> bool:
try:
for key in self.model_fields.keys():
validate_required_values(key.title(), getattr(self, key, ""))
if key != "main_logo":
validate_required_values(key.title(), getattr(self, key, ""))
validate_title("Title", self.title)
validate_description("Description", self.description)
validate_language("Language", self.languages)
Expand All @@ -60,6 +63,18 @@ def is_ready(self) -> bool:
return True


class ArchiveStatus(str, Enum):
# It's in database but not requested and can be modified
PENDING = "PENDING"
# it has been ZF-requested; can not be modified by user,
# awaiting callback from ZimFarm
REQUESTED = "REQUESTED"
# ZimFarm task succeeded, it now has a download_url and filesize
READY = "READY"
# ZimFarm task failed, cant be downloaded
FAILED = "FAILED"


class ArchiveConfigType(types.TypeDecorator):
cache_ok = True
impl = JSONB
Expand Down Expand Up @@ -89,6 +104,7 @@ class Base(MappedAsDataclass, DeclarativeBase):
# timezone below)
type_annotation_map: ClassVar = {
ArchiveConfig: ArchiveConfigType,
ArchiveStatus: String,
dict[str, Any]: JSONB, # transform Python Dict[str, Any] into PostgreSQL JSONB
list[dict[str, Any]]: JSONB,
datetime: DateTime(
Expand Down Expand Up @@ -207,7 +223,7 @@ class Archive(Base):
completed_on: Mapped[datetime | None]
download_url: Mapped[str | None]
collection_json_path: Mapped[str | None]
status: Mapped[str]
status: Mapped[ArchiveStatus]
zimfarm_task_id: Mapped[UUID | None]
email: Mapped[str | None]
config: Mapped[ArchiveConfig]
5 changes: 3 additions & 2 deletions backend/api/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from api.database.models import Archive

jinja_env = Environment(
loader=FileSystemLoader("templates"),
loader=FileSystemLoader(Path(__file__).parent.joinpath("templates")),
autoescape=select_autoescape(["html", "txt"]),
)
jinja_env.filters["short_id"] = lambda value: str(value)[:5]
Expand Down Expand Up @@ -69,7 +69,8 @@ def get_context(task: dict[str, Any], archive: Archive):
"""Jinja context dict for email notifications"""
return {
"base_url": constants.public_url,
"download_url": constants.download_url,
"zim_download_url": constants.zim_download_url,
"task": task,
"file": next(iter(task["files"].values())) if task.get("files") else None,
"archive": archive,
}
12 changes: 12 additions & 0 deletions backend/api/routes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,15 @@ async def validated_project(
if not project:
raise HTTPException(HTTPStatus.NOT_FOUND, f"Project not found: {project_id}")
return project


async def userless_validated_project(
project_id: UUID,
session: Session = Depends(gen_session),
) -> Project:
"""Depends()-able Project from request, ensuring it exists"""
stmt = select(Project).filter_by(id=project_id)
project = session.execute(stmt).scalar()
if not project:
raise HTTPException(HTTPStatus.NOT_FOUND, f"Project not found: {project_id}")
return project
Loading

0 comments on commit bd70033

Please sign in to comment.