Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/zenodo uploader #214

Merged
merged 41 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
ef1d395
Started implementation with basic test to understand better the requi…
jsmatias Nov 21, 2023
525233e
Endpoint created
jsmatias Nov 22, 2023
242a35b
Tests were refactored and the logic for the zenodo uploader included.
jsmatias Nov 27, 2023
8b81edc
more tests added to zenodo uploader
jsmatias Nov 27, 2023
392f776
Clean up and minor rearrangement of the files.
jsmatias Nov 27, 2023
893bc3f
Some docstrings added
jsmatias Nov 27, 2023
ede20bf
Parameter as an option to publish included and more tests written.
jsmatias Nov 29, 2023
f9b3965
Merge branch 'develop' into feature/zenodo_uploader
jsmatias Dec 8, 2023
0459759
Resolving PR comments: Replaced engine and with DbSession on tests.
jsmatias Dec 8, 2023
f397043
Resolving PR comments: corrected and improved description of the zeno…
jsmatias Dec 8, 2023
bf46cdb
Included a validator for zenodo id and refactored part of the code fo…
jsmatias Dec 11, 2023
dbc48fd
Corrected statuds code 423 -> 409
jsmatias Dec 11, 2023
3055689
Created an abstract class for general functionalities of the uploaders.
jsmatias Dec 12, 2023
acec6f1
corrected code duplication
jsmatias Dec 12, 2023
06bcaf0
improved readability of the tests of the zenodo uploader
jsmatias Dec 13, 2023
0e7ef9e
replaced set_up function for pytest.fixtures
jsmatias Dec 13, 2023
e181286
improved error handling logic
jsmatias Dec 14, 2023
6347b10
improved metadata
jsmatias Dec 16, 2023
986afb9
made the method in the abstract
jsmatias Dec 16, 2023
6572200
included a license validator to upload content to zenodo
jsmatias Dec 17, 2023
b631288
validation of the license value was moved to the beginning of the upl…
jsmatias Dec 17, 2023
eada74c
included validation for the metadata contact name, required to publis…
jsmatias Dec 18, 2023
d8aae43
minor corretion on docstring
jsmatias Dec 18, 2023
042742f
corrected the url for browser access of the dataset on zenodo
jsmatias Dec 18, 2023
243066d
Merge branch 'develop' into feature/zenodo_uploader
jsmatias Dec 19, 2023
c0a47a4
added zenodo validator for platform id to concept.py
jsmatias Dec 20, 2023
c94faa0
renamed error_handling function
jsmatias Dec 20, 2023
3c49371
Minor changes
jsmatias Dec 20, 2023
f3e9c37
added a version requirement check
jsmatias Dec 20, 2023
2cd454e
improved documentation
jsmatias Dec 20, 2023
9896c7f
added the functionality of auto generating repo_id for hugging face
jsmatias Dec 20, 2023
ff81fca
created abstract method for the validation of the platform_resource_i…
jsmatias Dec 20, 2023
f98de88
Improved error tracing back for an unexpected zenodo response status …
jsmatias Dec 21, 2023
7518cc7
removed token and repo_id from class instance
jsmatias Dec 21, 2023
729ece7
minor changes on docstrings
jsmatias Dec 21, 2023
2d5816f
Added authorisation to the upload endpoints
jsmatias Dec 21, 2023
a33725b
renamed error_handlers -> error_handling
jsmatias Dec 21, 2023
c303f22
Merge branch 'develop' into feature/zenodo_uploader
jsmatias Dec 21, 2023
d837e21
improved documentation and renamed function
jsmatias Dec 21, 2023
4665fbe
correct regex and added tests for platform_resource_identifier
jsmatias Dec 22, 2023
c552cd6
extra tests for zenodo and hugging face uploaders
jsmatias Dec 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# data/ is intended for database data from the mysql container
data/

# MacOS
.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
5 changes: 2 additions & 3 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@
from sqlalchemy.engine import Engine
from sqlmodel import Session, select

import routers
from authentication import get_current_user
from config import KEYCLOAK_CONFIG
from database.deletion.triggers import add_delete_triggers
from database.model.concept.concept import AIoDConcept
from database.model.platform.platform import Platform
from database.model.platform.platform_names import PlatformName
from database.setup import sqlmodel_engine
from routers import resource_routers, parent_routers, enum_routers
from routers import resource_routers, parent_routers, enum_routers, uploader_routers


def _parse_args() -> argparse.Namespace:
Expand Down Expand Up @@ -78,9 +77,9 @@ def counts() -> dict:

for router in (
resource_routers.router_list
+ routers.other_routers
+ parent_routers.router_list
+ enum_routers.router_list
+ uploader_routers.router_list
):
app.include_router(router.create(engine, url_prefix))

Expand Down
4 changes: 0 additions & 4 deletions src/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
from .resource_router import ResourceRouter # noqa:F401
from .upload_router_huggingface import UploadRouterHuggingface


other_routers = [UploadRouterHuggingface()]
11 changes: 11 additions & 0 deletions src/routers/uploader_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import abc

from sqlalchemy.engine import Engine
from fastapi import APIRouter


class UploaderRouter(abc.ABC):
def create(self, engine: Engine, url_prefix: str) -> APIRouter:
jsmatias marked this conversation as resolved.
Show resolved Hide resolved
router = APIRouter()
jsmatias marked this conversation as resolved.
Show resolved Hide resolved

return router
5 changes: 5 additions & 0 deletions src/routers/uploader_routers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from routers.uploader_router import UploaderRouter
from .upload_router_zenodo import UploadRouterZenodo
from .upload_router_huggingface import UploadRouterHuggingface

router_list: list[UploaderRouter] = [UploadRouterZenodo(), UploadRouterHuggingface()]
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
from fastapi import File, Query, UploadFile
from sqlalchemy.engine import Engine

from uploader.hugging_face_uploader import handle_upload
from uploaders.hugging_face_uploader import handle_upload
from routers.uploader_router import UploaderRouter


class UploadRouterHuggingface:
class UploadRouterHuggingface(UploaderRouter):
def create(self, engine: Engine, url_prefix: str) -> APIRouter:
router = APIRouter()
router = super().create(engine, url_prefix)

@router.post(url_prefix + "/upload/datasets/{identifier}/huggingface", tags=["upload"])
def huggingFaceUpload(
Expand Down
37 changes: 37 additions & 0 deletions src/routers/uploader_routers/upload_router_zenodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Annotated

from fastapi import APIRouter
from fastapi import File, Query, UploadFile, Path
from sqlalchemy.engine import Engine

from uploaders.zenodo_uploader import ZenodoUploader
from routers.uploader_router import UploaderRouter


class UploadRouterZenodo(UploaderRouter):
def create(self, engine: Engine, url_prefix: str) -> APIRouter:
router = super().create(engine, url_prefix)

zenodo_uploader = ZenodoUploader()

@router.post(url_prefix + "/upload/datasets/{identifier}/zenodo", tags=["upload"])
def zenodo_upload(
josvandervelde marked this conversation as resolved.
Show resolved Hide resolved
identifier: int = Path(
description="The AIoD dataset identifier",
),
file: UploadFile = File(
title="File", description="This file will be uploaded to Zenodo"
),
publish: Annotated[
bool,
Query(
title="Publish dataset",
description="When published, the dataset and files will be publicaly ccessible "
jsmatias marked this conversation as resolved.
Show resolved Hide resolved
"and you will no longer be able to upload more files!",
),
] = False,
token: str = Query(title="Zenodo Token", description="The access token of Zenodo"),
) -> int:
return zenodo_uploader.handle_upload(engine, identifier, publish, token, file)

return router
139 changes: 139 additions & 0 deletions src/tests/uploader/zenodo/mock_zenodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
Generates mocked responses for zenodo endpoints.
- POST to BASE_URL: Creates an empty record
- GET to BASE_URL/RESOURCE_ID: Gets the info of the record.
- PUT to BASE_URL/REOURCE_ID: Updates metadata
- PUT to REPO_URL/FILE_NAME: Uploads a file
- POST to BASE_URL/REOURCE_ID/actions/publish: Publishes the dataset with all content
- GET to BASE_URL/RESOURCE_ID/files: Gets the list of files in draft mode
- GET to RECORDS_URL/RESOURCE_ID/files: Gets the list of published data
"""

import responses

BASE_URL = "https://zenodo.org/api/deposit/depositions"
REPO_URL = "https://zenodo.org/api/files/fake-bucket-id00"
RECORDS_URL = "https://zenodo.org/api/records"
RESOURCE_ID = 100


def mock_create_repo(mocked_requests: responses.RequestsMock) -> responses.RequestsMock:
mocked_requests.add(
responses.POST,
BASE_URL,
json=record_response(),
status=201,
)

return mocked_requests
jsmatias marked this conversation as resolved.
Show resolved Hide resolved


def mock_get_repo_metadata(
mocked_requests: responses.RequestsMock, is_published: bool = False
) -> responses.RequestsMock:
mocked_requests.add(
responses.GET,
f"{BASE_URL}/{RESOURCE_ID}",
json=record_response(is_published),
status=200,
)

return mocked_requests


def mock_update_metadata(mocked_requests: responses.RequestsMock) -> responses.RequestsMock:
mocked_requests.add(
responses.PUT,
f"{BASE_URL}/{RESOURCE_ID}",
json={},
status=200,
)
return mocked_requests


def mock_upload_file(
mocked_requests: responses.RequestsMock, new_file: str
) -> responses.RequestsMock:
mocked_requests.add(
responses.PUT,
f"{REPO_URL}/{new_file}",
json={},
status=201,
)
return mocked_requests


def mock_publish_resource(mocked_requests: responses.RequestsMock) -> responses.RequestsMock:
mocked_requests.add(
responses.POST,
f"{BASE_URL}/{RESOURCE_ID}/actions/publish",
json=publish_response(),
status=202,
)
return mocked_requests


def mock_get_draft_files(
mocked_requests: responses.RequestsMock, files: list[str]
) -> responses.RequestsMock:
mocked_requests.add(
responses.GET,
f"{BASE_URL}/{RESOURCE_ID}/files",
json=draft_files_response(files),
status=200,
)
return mocked_requests


def mock_get_published_files(
mocked_requests: responses.RequestsMock, files: list[str]
) -> responses.RequestsMock:
mocked_requests.add(
responses.GET,
f"{RECORDS_URL}/{RESOURCE_ID}/files",
json=published_files_reponse(files),
status=200,
)
return mocked_requests


def record_response(is_published: bool = False) -> dict:
response = {
"id": RESOURCE_ID,
# just the state `done` matters here
"state": "done" if is_published else "unsubmitted/inprogress",
"links": {"bucket": REPO_URL, "record": f"{RECORDS_URL}/{RESOURCE_ID}"},
}
return response


def publish_response() -> dict:
response = {"links": {"record": f"{RECORDS_URL}/{RESOURCE_ID}"}}
return response


def draft_files_response(filenames: list[str]) -> list[dict]:
"""Truncated reponse from zenodo when a request is made to the draft repo url."""
response = [
{"id": f"123-{name}", "filename": name, "filesize": 20, "checksum": "12345abcd"}
for name in filenames
]
return response


def published_files_reponse(filenames: list[str]) -> dict[str, list[dict]]:
"""Truncated reponse from zenodo when a request is made to the public repo url."""
response = {
"entries": [
{
"key": name,
"file_id": f"123-{name}",
"checksum": "12345abcd",
"size": 20,
"links": {"content": f"{RECORDS_URL}/{RESOURCE_ID}/files/{name}/content"},
}
for name in filenames
]
}

return response
Loading