Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/aiondemand/AIOD-rest-api
Browse files Browse the repository at this point in the history
…into feature/elastic_search
  • Loading branch information
josvandervelde committed Nov 20, 2023
2 parents 3e5c446 + 429e8ac commit 9bdd974
Show file tree
Hide file tree
Showing 44 changed files with 2,986 additions and 637 deletions.
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,21 @@ Information on how to install Docker is found in [their documentation](https://d

### Using docker compose
```bash
docker compose up
docker compose --profile examples up
```

starts the MYSQL Server, the REST API, Keycloak for Identy and access management and Nginx for reverse proxing. \
Once started, you should be able to visit the REST API server at: http://localhost and Keycloak at http://localhost/aiod-auth \
To authenticate to the REST API swagger interface the predefined user is: user, and password: password \
To authenticate as admin to Keycloak the predefined user is: admin and password: password \
To use a different DNS hostname replace localhost with it in .env and src/config.toml \
This configuration is intended for development, DO NOT use it in production.

To turn if off again, use
```bash
docker compose --profile examples down
```

To connect to the database use `./scripts/database-connect.sql`.

```bash
Expand All @@ -98,6 +104,16 @@ mysql> SHOW DATABASES;

Now, you can visit the server from your browser at `localhost:8000/docs`.

#### Using connectors
You can specify different connectors using

```bash
docker compose --profile examples --profile huggingface-datasets --profile openml-datasets up -d
docker compose --profile examples --profile huggingface-datasets --profile openml-datasets down
```

Make sure you use the same profile for `up` and `down`, otherwise some containers might keep
running.

#### Local Installation

Expand Down
1 change: 1 addition & 0 deletions connectors/huggingface/datasets.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

WORK_DIR=/opt/connectors/data/huggingface/dataset
mkdir -p $WORK_DIR

python3 connectors/synchronization.py \
-c connectors.huggingface.huggingface_dataset_connector.HuggingFaceDatasetConnector \
Expand Down
4 changes: 2 additions & 2 deletions connectors/openml/datasets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ echo $(date -u) "Starting synchronization..."
PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \
-c $CONNECTOR \
-w $WORK_DIR \
--from-identifier 4500 \
--save-every 100 > ${WORK_DIR}/connector.log 2>&1
--from-identifier 1 \
--save-every 100 >> ${WORK_DIR}/connector.log 2>&1
echo $(date -u) "Synchronization Done."
2 changes: 1 addition & 1 deletion connectors/zenodo/datasets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \
-c $CONNECTOR \
-w $WORK_DIR \
--from-date "2023-08-01" \
--save-every 100 > ${WORK_DIR}/connector.log 2>&1
--save-every 100 >> ${WORK_DIR}/connector.log 2>&1
echo $(date -u) "Synchronization Done."
Empty file added data/connectors/.gitkeep
Empty file.
Empty file added data/deletion/.gitkeep
Empty file.
Empty file added data/mysql/.gitkeep
Empty file.
26 changes: 15 additions & 11 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ services:
ports:
- 8000:8000
volumes:
- ./src:/app
- ./src:/app:ro
command: >
python main.py
--rebuild-db only-if-empty
Expand All @@ -23,22 +23,23 @@ services:
start_interval: 1s
start_period: 30s
interval: 5s
timeout: 30s
retries: 5
timeout: 120s
retries: 24
depends_on:
sqlserver:
condition: service_healthy

fill-db-with-examples:
profiles: ["examples"]
image: ai4eu_server
container_name: fill-db-with-examples
env_file: .env
environment:
- KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET
volumes:
- ./src:/app
- ./src:/app:ro
- ./data/connectors:/opt/connectors/data
- ./connectors:/opt/connectors/script
- ./connectors:/opt/connectors/script:ro
command: >
/bin/bash -c "/opt/connectors/script/fill-examples.sh"
depends_on:
Expand All @@ -62,22 +63,24 @@ services:
condition: service_healthy

huggingface-dataset-connector:
profiles: ["huggingface-datasets"]
image: ai4eu_server
container_name: huggingface-dataset-connector
env_file: .env
environment:
- KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET
volumes:
- ./src:/app
- ./src:/app:ro
- ./data/connectors:/opt/connectors/data
- ./connectors/huggingface/:/opt/connectors/script
- ./connectors/huggingface/:/opt/connectors/script:ro
command: >
/bin/bash -c "/opt/connectors/script/datasets.sh"
depends_on:
app:
condition: service_healthy

openml-dataset-connector:
profiles: ["openml-datasets"]
build:
context: connectors/openml
dockerfile: Dockerfile
Expand All @@ -87,16 +90,17 @@ services:
environment:
- KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET
volumes:
- ./src:/app
- ./src:/app:ro
- ./data/connectors:/opt/connectors/data
- ./connectors/openml/:/opt/connectors/script
- ./connectors/openml/:/opt/connectors/script:ro
command: >
/bin/bash -c "/opt/connectors/script/entry.sh"
depends_on:
app:
condition: service_healthy

zenodo-dataset-connector:
profiles: ["zenodo-datasets"]
build:
context: connectors/zenodo
dockerfile: Dockerfile
Expand Down Expand Up @@ -141,7 +145,7 @@ services:
ports:
- 8080:8080
volumes:
- ./quay-keycloak:/opt/keycloak/data/import
- ./quay-keycloak:/opt/keycloak/data/import:ro
command: >
start-dev
--hostname-url http://${HOSTNAME}/aiod-auth
Expand All @@ -156,7 +160,7 @@ services:
container_name: nginx
restart: unless-stopped
volumes:
- ./nginx:/etc/nginx/conf.d
- ./nginx:/etc/nginx/conf.d:ro
ports:
- 80:80
depends_on:
Expand Down
12 changes: 7 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ authors = [
{name = "Taniya Das", email = "[email protected]"}
]
dependencies = [
"urllib3== 2.0.7",
"urllib3== 2.1.0",
"bibtexparser==1.4.1",
"huggingface_hub==0.19.1",
"huggingface_hub==0.19.4",
"fastapi==0.104.1",
"uvicorn==0.24.0.post1",
"requests==2.31.0",
"mysqlclient==2.2.0",
"oic==1.6.0",
"python-keycloak==3.3.0",
"python-keycloak==3.7.0",
"python-dotenv==1.0.0",
"pytz==2023.3.post1",
"pydantic_schemaorg==1.0.6",
Expand All @@ -43,7 +43,7 @@ dev = [
"pytest-dotenv==0.5.2",
"pytest-xdist==3.4.0",
"pre-commit==3.5.0",
"responses==0.24.0",
"responses==0.24.1",
"starlette==0.27.0"
]

Expand All @@ -60,7 +60,9 @@ filterwarnings = [
env_override_existing_values = 1
env_files = [
"src/.env",
"src/tests/.env"
"src/tests/.env",
".env", # Only used if running from docker container
"tests/.env" # Only used if running from docker container
]


Expand Down
14 changes: 14 additions & 0 deletions scripts/clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
# convenience script to revert back to a clean state.

DIR_SCRIPT=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
DIR_ROOT=$( dirname $DIR_SCRIPT )
DIR_DATA=$DIR_ROOT/data

DIR_MYSQL=$DIR_DATA/mysql
DIR_CONNECTORS=$DIR_DATA/connectors
DIR_DELETION=$DIR_DATA/deletion

find $DIR_CONNECTORS -type f ! -name .gitkeep -delete
find $DIR_DELETION -type f ! -name .gitkeep -delete
sudo rm -rf $DIR_MYSQL/*
30 changes: 16 additions & 14 deletions src/connectors/abstract/resource_connector_by_date.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import abc
import logging
from datetime import datetime, date
from datetime import datetime, timedelta
from typing import Generic, Iterator, Tuple

from connectors.abstract.resource_connector import ResourceConnector
from connectors.record_error import RecordError


from connectors.resource_with_relations import ResourceWithRelations
from routers.resource_router import RESOURCE

Expand All @@ -27,9 +26,10 @@ def fetch(
def run(
self,
state: dict,
from_date: date | None = None,
limit: int | None = None,
from_incl: datetime | None = None,
to_excl: datetime | None = None,
time_per_loop: timedelta = timedelta(days=1),
**kwargs,
) -> Iterator[RESOURCE | ResourceWithRelations[RESOURCE] | RecordError]:
if limit is not None:
Expand All @@ -44,17 +44,19 @@ def run(

first_run = not state
if first_run:
if from_date is None:
raise ValueError("In the first run, the from-date needs to be set")
from_incl = datetime.combine(from_date, datetime.min.time())
if from_incl is None:
raise ValueError("In the first run, from_incl needs to be set")
else:
from_incl = datetime.fromtimestamp(state["last"] + 0.001)

logging.info(f"Starting synchronisation {from_incl=}, {to_excl=}.")
state["from_incl"] = from_incl.timestamp()
state["to_excl"] = to_excl.timestamp()
for datetime_, result in self.fetch(from_incl=from_incl, to_excl=to_excl):
yield result
if datetime_:
state["last"] = datetime_.timestamp()
while from_incl < to_excl:
to_excl_current = min(from_incl + time_per_loop, to_excl)
logging.info(f"Starting synchronisation {from_incl=}, {to_excl_current=}.")
state["from_incl"] = from_incl.timestamp()
state["to_excl"] = to_excl_current.timestamp()
for datetime_, result in self.fetch(from_incl=from_incl, to_excl=to_excl_current):
yield result
if datetime_:
state["last"] = datetime_.timestamp()
from_incl = to_excl_current
state["result"] = "Complete run done (although there might be errors)."
3 changes: 2 additions & 1 deletion src/connectors/example/enum_fill_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ def platform_name(self) -> PlatformName:
def fetch(self, limit: int | None = None) -> Iterator[RESOURCE]:
with open(self.json_path) as f:
json_data = json.load(f)
yield from json_data[:limit]
for value in json_data[:limit]:
yield value.lower()
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
[
{
"platform": "example",
"platform_resource_identifier": "1",
"aiod_entry": {
"editor": [],
"status": "draft"
},
"email": ["[email protected]"],
"telephone": ["0032 XXXX XXXX"],
"location": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"editor": [],
"status": "draft"
},
"access_mode": ["textual"],
"alternate_name": [
"alias 1",
"alias 2"
Expand All @@ -23,7 +24,7 @@
],
"citation": [],
"contact": [],
"content": [{"plain": "An alternative to using .distribution.content_url"}],
"content": {"plain": "An alternative to using .distribution.content_url"},
"creator": [],
"distribution": [
{
Expand All @@ -39,7 +40,9 @@
"technology_readiness_level": 1
}
],
"educational_level": ["primary school", "secondary school", "university"],
"has_part": [],
"in_language": ["eng", "fra", "spa"],
"industrial_sector": [
"Finance",
"eCommerce",
Expand All @@ -50,6 +53,22 @@
"keyword1",
"keyword2"
],
"location": [{
"address": {
"region": "California",
"locality": "Paris",
"street": "Wetstraat 170",
"postal_code": "1040 AA",
"address": "Wetstraat 170, 1040 Brussel",
"country": "BEL"
},
"geo": {
"latitude": 37.42242,
"longitude": -122.08585,
"elevation_millimeters": 0
}
}],
"prerequisite": ["undergraduate knowledge of statistics", "graduate knowledge of linear algebra"],
"relevant_link": ["https://www.example.com/a_relevant_link", "https://www.example.com/another_relevant_link"],
"license": "https://creativecommons.org/share-your-work/public-domain/cc0/",
"media": [
Expand Down Expand Up @@ -80,6 +99,11 @@
"Computer Vision."
],
"level": "EQF level 3",
"target_audience": [
"professionals",
"students in higher education",
"teachers in secondary school"
],
"type": "presentation"
}
]
4 changes: 2 additions & 2 deletions src/connectors/example/resources/resource/teams.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
"description": {"plain": "This is a team of an organisation."},
"date_published": "2022-01-01T15:15:00.000",
"same_as": "https://www.example.com/resource/this_resource",
"platform": "example",
"platform_resource_identifier": "1",
"aiod_entry": {
"platform": "example",
"platform_resource_identifier": "1",
"editor": [],
"status": "draft"
},
Expand Down
Loading

0 comments on commit 9bdd974

Please sign in to comment.