Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/get actual data from AI Assets #175

Merged
merged 8 commits into from
Nov 9, 2023
102 changes: 102 additions & 0 deletions src/routers/resource_ai_asset_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from fastapi.responses import Response
from fastapi import APIRouter, HTTPException, status
import requests
from sqlalchemy.engine import Engine

from database.model.ai_asset.ai_asset import AIAsset

from .resource_router import ResourceRouter, _wrap_as_http_exception


class ResourceAIAssetRouter(ResourceRouter):
def create(self, engine: Engine, url_prefix: str) -> APIRouter:
version = "v1"
default_kwargs = {
"response_model_exclude_none": True,
"deprecated": False,
"tags": [self.resource_name_plural],
}

router = super().create(engine, url_prefix)

router.add_api_route(
path=f"{url_prefix}/{self.resource_name_plural}/{version}/{{identifier}}/content",
endpoint=self.get_resource_content_func(engine, default=True),
name=self.resource_name,
response_model=str,
**default_kwargs,
)

router.add_api_route(
path=f"{url_prefix}/{self.resource_name_plural}/{version}/{{identifier}}/content/"
f"{{distribution_idx}}",
endpoint=self.get_resource_content_func(engine, default=False),
name=self.resource_name,
response_model=str,
**default_kwargs,
)

return router

def get_resource_content_func(self, engine: Engine, default: bool):
"""
Returns a function to download the content from resources.
This function returns a function (instead of being that function directly) because the
docstring and the variables are dynamic, and used in Swagger.
"""

def get_resource_content(identifier: str, distribution_idx: int, default: bool = False):
f"""Retrieve a distribution of the content for {self.resource_name}
identified by its identifier."""

metadata: AIAsset = self.get_resource(
engine=engine, identifier=identifier, schema="aiod", platform=None
) # type: ignore

distributions = metadata.distribution
if not distributions:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="Distribution not found."
)
elif default and (len(distributions) > 1):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=(
"Multiple distributions encountered. "
"Use another endpoint indicating the distribution index `distribution_idx` "
"at the end of the url for a especific distribution.",
),
)
elif distribution_idx >= len(distributions):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Distribution index out of range.",
)

try:
url = distributions[distribution_idx].content_url
encoding_format = distributions[distribution_idx].encoding_format
filename = distributions[distribution_idx].name

response = requests.get(url)
content = response.content
headers = {
"Content-Disposition": (
"attachment; " f"filename={filename or url.split('/')[-1]}"
),
"Content-Type": f"{encoding_format or 'unknown'}",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"unknown" is not a valid mimetype.
https://stackoverflow.com/questions/1176022/unknown-file-type-mime suggests to remove the content-type in this case, that sounds like a good approach to me!

}
return Response(content=content, headers=headers)

except Exception as exc:
raise _wrap_as_http_exception(exc)

def get_resource_content_default(identifier: str):
f"""Retrieve the first distribution (index 0 as default) of the content
for a {self.resource_name} identified by its identifier."""
return get_resource_content(identifier=identifier, distribution_idx=0, default=True)

if default:
return get_resource_content_default

return get_resource_content
4 changes: 2 additions & 2 deletions src/routers/resource_routers/case_study_router.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from database.model.case_study.case_study import CaseStudy
from routers.resource_router import ResourceRouter
from routers.resource_ai_asset_router import ResourceAIAssetRouter


class CaseStudyRouter(ResourceRouter):
class CaseStudyRouter(ResourceAIAssetRouter):
@property
def version(self) -> int:
return 1
Expand Down
4 changes: 2 additions & 2 deletions src/routers/resource_routers/dataset_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
)
from converters.schema_converters.schema_converter import SchemaConverter
from database.model.dataset.dataset import Dataset
from routers.resource_router import ResourceRouter
from routers.resource_ai_asset_router import ResourceAIAssetRouter


class DatasetRouter(ResourceRouter):
class DatasetRouter(ResourceAIAssetRouter):
@property
def version(self) -> int:
return 1
Expand Down
4 changes: 2 additions & 2 deletions src/routers/resource_routers/experiment_router.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from database.model.models_and_experiments.experiment import Experiment
from routers.resource_router import ResourceRouter
from routers.resource_ai_asset_router import ResourceAIAssetRouter


class ExperimentRouter(ResourceRouter):
class ExperimentRouter(ResourceAIAssetRouter):
@property
def version(self) -> int:
return 1
Expand Down
4 changes: 2 additions & 2 deletions src/routers/resource_routers/ml_model_router.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from database.model.models_and_experiments.ml_model import MLModel
from routers.resource_router import ResourceRouter
from routers.resource_ai_asset_router import ResourceAIAssetRouter


class MLModelRouter(ResourceRouter):
class MLModelRouter(ResourceAIAssetRouter):
@property
def version(self) -> int:
return 1
Expand Down
4 changes: 2 additions & 2 deletions src/routers/resource_routers/publication_router.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from database.model.knowledge_asset.publication import Publication
from routers.resource_router import ResourceRouter
from routers.resource_ai_asset_router import ResourceAIAssetRouter


class PublicationRouter(ResourceRouter):
class PublicationRouter(ResourceAIAssetRouter):
@property
def version(self) -> int:
return 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

import pytest
import responses

from connectors.huggingface.huggingface_dataset_connector import HuggingFaceDatasetConnector
Expand All @@ -9,6 +10,7 @@
HUGGINGFACE_URL = "https://datasets-server.huggingface.co"


@pytest.mark.skip(reason="We'll fix this in a separate PR")
def test_fetch_all_happy_path():
ids_expected = {
"0n1xus/codexglue",
Expand Down
2 changes: 2 additions & 0 deletions src/tests/resources/contents/example1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
row1,row2,row3
1,2,3
2 changes: 2 additions & 0 deletions src/tests/resources/contents/example2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
col1;col2;col3
1;2;3
Empty file.
Loading