From f4613f0b76c7338925a86caf520f9863ed46cb3c Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Tue, 14 Nov 2023 11:36:43 +0100 Subject: [PATCH 01/21] Update example-connector resources --- .../resource/{contact.json => contacts.json} | 0 .../resource/educational_resources.json | 26 ++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) rename src/connectors/example/resources/resource/{contact.json => contacts.json} (100%) diff --git a/src/connectors/example/resources/resource/contact.json b/src/connectors/example/resources/resource/contacts.json similarity index 100% rename from src/connectors/example/resources/resource/contact.json rename to src/connectors/example/resources/resource/contacts.json diff --git a/src/connectors/example/resources/resource/educational_resources.json b/src/connectors/example/resources/resource/educational_resources.json index 224b0da1..95d8882d 100644 --- a/src/connectors/example/resources/resource/educational_resources.json +++ b/src/connectors/example/resources/resource/educational_resources.json @@ -12,6 +12,7 @@ "editor": [], "status": "draft" }, + "access_mode": ["textual"], "alternate_name": [ "alias 1", "alias 2" @@ -23,7 +24,7 @@ ], "citation": [], "contact": [], - "content": [{"plain": "An alternative to using .distribution.content_url"}], + "content": {"plain": "An alternative to using .distribution.content_url"}, "creator": [], "distribution": [ { @@ -39,7 +40,9 @@ "technology_readiness_level": 1 } ], + "educational_level": ["primary school", "secondary school", "university"], "has_part": [], + "in_language": ["eng", "fra", "spa"], "industrial_sector": [ "Finance", "eCommerce", @@ -50,6 +53,22 @@ "keyword1", "keyword2" ], + "location": [{ + "address": { + "region": "California", + "locality": "Paris", + "street": "Wetstraat 170", + "postal_code": "1040 AA", + "address": "Wetstraat 170, 1040 Brussel", + "country": "BEL" + }, + "geo": { + "latitude": 37.42242, + "longitude": -122.08585, + "elevation_millimeters": 0 + } + }], + "prerequisite": ["undergraduate knowledge of statistics", "graduate knowledge of linear algebra"], "relevant_link": ["https://www.example.com/a_relevant_link", "https://www.example.com/another_relevant_link"], "license": "https://creativecommons.org/share-your-work/public-domain/cc0/", "media": [ @@ -80,6 +99,11 @@ "Computer Vision." ], "level": "EQF level 3", + "target_audience": [ + "professionals", + "students in higher education", + "teachers in secondary school" + ], "type": "presentation" } ] \ No newline at end of file From 559c86c25bfd3a118b04a44ad94ecef37460136f Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Tue, 14 Nov 2023 13:44:41 +0100 Subject: [PATCH 02/21] Huggingface dataset connector: bugfixes --- connectors/huggingface/datasets.sh | 1 + docker-compose.yaml | 18 ++-- .../huggingface_dataset_connector.py | 47 +++++++--- src/routers/resource_router.py | 2 - .../test_huggingface_dataset_connector.py | 87 ++++++++++++++++++ .../data_list_incorrect_citation.json | 91 +++++++++++++++++++ .../huggingface/parquet_bigIR_ar_cov19.json | 15 +++ 7 files changed, 236 insertions(+), 25 deletions(-) create mode 100644 src/tests/resources/connectors/huggingface/data_list_incorrect_citation.json create mode 100644 src/tests/resources/connectors/huggingface/parquet_bigIR_ar_cov19.json diff --git a/connectors/huggingface/datasets.sh b/connectors/huggingface/datasets.sh index 99ba48c7..814958f5 100755 --- a/connectors/huggingface/datasets.sh +++ b/connectors/huggingface/datasets.sh @@ -1,6 +1,7 @@ #!/bin/bash WORK_DIR=/opt/connectors/data/huggingface/dataset +mkdir -p $WORK_DIR python3 connectors/synchronization.py \ -c connectors.huggingface.huggingface_dataset_connector.HuggingFaceDatasetConnector \ diff --git a/docker-compose.yaml b/docker-compose.yaml index 44ed5783..475f6f15 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,7 +13,7 @@ services: ports: - 8000:8000 volumes: - - ./src:/app + - ./src:/app:ro command: > python main.py --rebuild-db only-if-empty @@ -36,9 +36,9 @@ services: environment: - KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET volumes: - - ./src:/app + - ./src:/app:ro - ./data/connectors:/opt/connectors/data - - ./connectors:/opt/connectors/script + - ./connectors:/opt/connectors/script:ro command: > /bin/bash -c "/opt/connectors/script/fill-examples.sh" depends_on: @@ -69,9 +69,9 @@ services: environment: - KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET volumes: - - ./src:/app + - ./src:/app:ro - ./data/connectors:/opt/connectors/data - - ./connectors/huggingface/:/opt/connectors/script + - ./connectors/huggingface/:/opt/connectors/script:ro command: > /bin/bash -c "/opt/connectors/script/datasets.sh" depends_on: @@ -88,9 +88,9 @@ services: environment: - KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET volumes: - - ./src:/app + - ./src:/app:ro - ./data/connectors:/opt/connectors/data - - ./connectors/openml/:/opt/connectors/script + - ./connectors/openml/:/opt/connectors/script:ro command: > /bin/bash -c "/opt/connectors/script/entry.sh" depends_on: @@ -142,7 +142,7 @@ services: ports: - 8080:8080 volumes: - - ./quay-keycloak:/opt/keycloak/data/import + - ./quay-keycloak:/opt/keycloak/data/import:ro command: > start-dev --hostname-url http://${HOSTNAME}/aiod-auth @@ -157,7 +157,7 @@ services: container_name: nginx restart: unless-stopped volumes: - - ./nginx:/etc/nginx/conf.d + - ./nginx:/etc/nginx/conf.d:ro ports: - 80:80 depends_on: diff --git a/src/connectors/huggingface/huggingface_dataset_connector.py b/src/connectors/huggingface/huggingface_dataset_connector.py index 6ce86463..c1caa71d 100644 --- a/src/connectors/huggingface/huggingface_dataset_connector.py +++ b/src/connectors/huggingface/huggingface_dataset_connector.py @@ -1,6 +1,5 @@ import logging import typing - import bibtexparser import requests from huggingface_hub import list_datasets @@ -59,15 +58,24 @@ def fetch( def fetch_dataset(self, dataset: DatasetInfo, pydantic_class, pydantic_class_publication): citations = [] - if hasattr(dataset, "citation") and dataset.citation: - parsed_citations = bibtexparser.loads(dataset.citation).entries - if len(parsed_citations) == 0: - if dataset.citation: - citations = [ - pydantic_class_publication( - name=dataset.citation, - ) - ] + raw_citation = getattr(dataset, "citation", None) + if raw_citation is not None: + parsed_citations = bibtexparser.loads(raw_citation).entries + if len(parsed_citations) == 0 and raw_citation.startswith("@"): + # Ugly fix: many HF datasets have a wrong citation (see testcase) + parsed_citations = bibtexparser.loads(raw_citation + "}").entries + if len(parsed_citations) == 0 and ( + raw_citation.startswith("@") or len(raw_citation) > field_length.NORMAL + ): + # incorrect bibtex. There are many mistakes in the HF citations. E.g., + # @Inproceedings(Conference) instead of @inproceedings (note the capitals). + pass + elif len(parsed_citations) == 0: + citations = [ + pydantic_class_publication( + name=raw_citation, aiod_entry=AIoDEntryCreate(status="published") + ) + ] else: citations = [ pydantic_class_publication( @@ -76,6 +84,10 @@ def fetch_dataset(self, dataset: DatasetInfo, pydantic_class, pydantic_class_pub name=citation["title"], same_as=citation["link"] if "link" in citation else None, type=citation["ENTRYTYPE"], + description=Text(plain=f"By {citation['author']}") + if "author" in citation + else None, + aiod_entry=AIoDEntryCreate(status="published"), ) for citation in parsed_citations ] @@ -96,11 +108,18 @@ def fetch_dataset(self, dataset: DatasetInfo, pydantic_class, pydantic_class_pub ] size = None ds_license = None - if dataset.card_data is not None and "license" in dataset.card_data: + if ( + dataset.card_data is not None + and "license" in dataset.card_data + and dataset.card_data["license"] + ): if isinstance(dataset.card_data["license"], str): ds_license = dataset.card_data["license"] else: - (ds_license,) = dataset.card_data["license"] + # There can be more than one license in HF, e.g., ['cc-by-sa-3.0', 'gfdl']. This + # seems weird, what does it mean to have two different licenses? That's why we're + # only saving the first. + ds_license = dataset.card_data["license"][0] # TODO(issue 8): implement # if "dataset_info" in dataset.cardData: @@ -129,10 +148,10 @@ def fetch_dataset(self, dataset: DatasetInfo, pydantic_class, pydantic_class_pub description=description, date_published=dataset.createdAt if hasattr(dataset, "createdAt") else None, license=ds_license, - distributions=distributions, + distribution=distributions, is_accessible_for_free=True, size=size, - keywords=dataset.tags, + keyword=dataset.tags, ), related_resources=related_resources, ) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index ad336ca1..a43b5e72 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -360,8 +360,6 @@ def create_resource(self, session: Session, resource_create_instance: SQLModel): deserialize_resource_relationships( session, self.resource_class, resource, resource_create_instance ) - # if isinstance(resource, AbstractAIResource) and resource.ai_: - # resource.ai_resource.type = self.resource_name session.add(resource) session.commit() return resource diff --git a/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py b/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py index c1df2f73..f8455e20 100644 --- a/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py +++ b/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py @@ -4,6 +4,8 @@ from connectors.huggingface.huggingface_dataset_connector import HuggingFaceDatasetConnector from connectors.resource_with_relations import ResourceWithRelations +from database.model.ai_resource.text import Text +from database.model.platform.platform_names import PlatformName from tests.testutils.paths import path_test_resources HUGGINGFACE_URL = "https://datasets-server.huggingface.co" @@ -45,6 +47,91 @@ def test_fetch_all_happy_path(): assert all(len(r.related_resources) in (1, 2) for r in resources_with_relations) assert all(len(r.related_resources["citation"]) == 1 for r in resources_with_relations[:5]) + dataset = datasets[0] + assert dataset.platform_resource_identifier == "acronym_identification" + assert dataset.platform == PlatformName.huggingface + assert dataset.description == Text( + plain="Acronym identification training and development " + "sets for the acronym identification task at SDU@AAAI-21." + ) + assert dataset.name == "acronym_identification" + assert dataset.same_as == "https://huggingface.co/datasets/acronym_identification" + assert dataset.license == "mit" + assert len(dataset.distribution) == 3 + expected_url_base = ( + "https://huggingface.co/datasets/acronym_identification/resolve" + "/refs%2Fconvert%2Fparquet/default/" + ) + assert {dist.content_url for dist in dataset.distribution} == { + (expected_url_base + "acronym_identification-test.parquet"), + (expected_url_base + "acronym_identification-train.parquet"), + (expected_url_base + "acronym_identification-validation.parquet"), + } + assert dataset.is_accessible_for_free + assert set(dataset.keyword) == { + "acronym-identification", + "annotations_creators:expert-generated", + "arxiv:2010.14678", + "language:en", + "language_creators:found", + "license:mit", + "multilinguality:monolingual", + "region:us", + "size_categories:10K Date: Tue, 14 Nov 2023 16:32:25 +0100 Subject: [PATCH 03/21] Making NamedRelation and Platform lowercase (to avoid collisions between same name but one capitalized, which gave sql integrity errors), bugfixes for HuggingFace bibtex parsing --- .../huggingface_dataset_connector.py | 71 ++++++++++--------- src/connectors/synchronization.py | 2 - src/database/model/concept/concept.py | 5 ++ src/database/model/named_relation.py | 19 ++++- src/database/model/serializers.py | 11 +-- src/tests/.env | 1 + .../test_huggingface_dataset_connector.py | 1 - .../model/agent/test_person_delete.py | 6 +- .../resource_routers/test_router_contact.py | 4 +- .../test_router_dataset_generic_fields.py | 37 +++++++--- .../resource_routers/test_router_ml_model.py | 2 +- .../resource_routers/test_router_news.py | 8 +-- .../test_router_organisation.py | 4 +- 13 files changed, 106 insertions(+), 65 deletions(-) diff --git a/src/connectors/huggingface/huggingface_dataset_connector.py b/src/connectors/huggingface/huggingface_dataset_connector.py index c1caa71d..19e2969d 100644 --- a/src/connectors/huggingface/huggingface_dataset_connector.py +++ b/src/connectors/huggingface/huggingface_dataset_connector.py @@ -57,40 +57,7 @@ def fetch( yield RecordError(identifier=dataset.id, error=e) def fetch_dataset(self, dataset: DatasetInfo, pydantic_class, pydantic_class_publication): - citations = [] - raw_citation = getattr(dataset, "citation", None) - if raw_citation is not None: - parsed_citations = bibtexparser.loads(raw_citation).entries - if len(parsed_citations) == 0 and raw_citation.startswith("@"): - # Ugly fix: many HF datasets have a wrong citation (see testcase) - parsed_citations = bibtexparser.loads(raw_citation + "}").entries - if len(parsed_citations) == 0 and ( - raw_citation.startswith("@") or len(raw_citation) > field_length.NORMAL - ): - # incorrect bibtex. There are many mistakes in the HF citations. E.g., - # @Inproceedings(Conference) instead of @inproceedings (note the capitals). - pass - elif len(parsed_citations) == 0: - citations = [ - pydantic_class_publication( - name=raw_citation, aiod_entry=AIoDEntryCreate(status="published") - ) - ] - else: - citations = [ - pydantic_class_publication( - platform=self.platform_name, - platform_resource_identifier=citation["ID"], - name=citation["title"], - same_as=citation["link"] if "link" in citation else None, - type=citation["ENTRYTYPE"], - description=Text(plain=f"By {citation['author']}") - if "author" in citation - else None, - aiod_entry=AIoDEntryCreate(status="published"), - ) - for citation in parsed_citations - ] + citations = self._parse_citations(dataset, pydantic_class_publication) parquet_info = HuggingFaceDatasetConnector._get( url="https://datasets-server.huggingface.co/parquet", @@ -155,3 +122,39 @@ def fetch_dataset(self, dataset: DatasetInfo, pydantic_class, pydantic_class_pub ), related_resources=related_resources, ) + + def _parse_citations(self, dataset, pydantic_class_publication) -> list: + """Best effort parsing of the citations. There are many""" + raw_citation = getattr(dataset, "citation", None) + if raw_citation is None: + return [] + + try: + parsed_citations = bibtexparser.loads(raw_citation).entries + if len(parsed_citations) == 0 and raw_citation.startswith("@"): + # Ugly fix: many HF datasets have a wrong citation (see testcase) + parsed_citations = bibtexparser.loads(raw_citation + "}").entries + elif len(parsed_citations) == 0 and len(raw_citation) <= field_length.NORMAL: + return [ + pydantic_class_publication( + name=raw_citation, aiod_entry=AIoDEntryCreate(status="published") + ) + ] + return [ + pydantic_class_publication( + platform=self.platform_name, + platform_resource_identifier=citation["ID"], + name=citation["title"], + same_as=citation["link"] if "link" in citation else None, + type=citation["ENTRYTYPE"], + description=Text(plain=f"By {citation['author']}") + if "author" in citation + else None, + aiod_entry=AIoDEntryCreate(status="published"), + ) + for citation in parsed_citations + ] + except Exception: + return [] + # Probably an incorrect bibtex. There are many mistakes in the HF citations. E.g., + # @Inproceedings(Conference) instead of @inproceedings (note the capitals). diff --git a/src/connectors/synchronization.py b/src/connectors/synchronization.py index 94ec3192..284030b5 100644 --- a/src/connectors/synchronization.py +++ b/src/connectors/synchronization.py @@ -183,9 +183,7 @@ def main(): logging.info(f"Saving state after handling {i}th result: {json.dumps(state)}") with open(state_path, "w") as f: json.dump(state, f, indent=4) - session.commit() with open(state_path, "w") as f: - session.commit() json.dump(state, f, indent=4) logging.info("Done") diff --git a/src/database/model/concept/concept.py b/src/database/model/concept/concept.py index b53516af..8a4c499e 100644 --- a/src/database/model/concept/concept.py +++ b/src/database/model/concept/concept.py @@ -1,5 +1,6 @@ import copy import datetime +import os from typing import Optional, Tuple from sqlalchemy import CheckConstraint, Index @@ -13,6 +14,9 @@ from database.model.relationships import OneToOne from database.model.serializers import CastDeserializer +IS_SQLITE = os.getenv("DB") == "SQLite" +CONSTRAINT_LOWERCASE = f"{'platform' if IS_SQLITE else 'BINARY(platform)'} = LOWER(platform)" + class AIoDConceptBase(SQLModel): """The AIoDConcept is the top-level (abstract) class in AIoD.""" @@ -82,4 +86,5 @@ def __table_args__(cls) -> Tuple: "(platform IS NULL) <> (platform_resource_identifier IS NOT NULL)", name=f"{cls.__name__}_platform_xnor_platform_id_null", ), + CheckConstraint(CONSTRAINT_LOWERCASE, name=f"{cls.__name__}_platform_lowercase"), ) diff --git a/src/database/model/named_relation.py b/src/database/model/named_relation.py index 3ab5b4bb..4183fcad 100644 --- a/src/database/model/named_relation.py +++ b/src/database/model/named_relation.py @@ -1,10 +1,27 @@ +import os +from typing import Tuple + +from sqlalchemy import CheckConstraint +from sqlalchemy.orm import declared_attr from sqlmodel import SQLModel, Field from database.model.field_length import NORMAL +IS_SQLITE = os.getenv("DB") == "SQLite" +CONSTRAINT_LOWERCASE_NAME = f"{'name' if IS_SQLITE else 'BINARY(name)'} = LOWER(name)" + class NamedRelation(SQLModel): - """An enumerable-type string""" + """An enumerable-type string (lowercase)""" identifier: int = Field(default=None, primary_key=True) name: str = Field(index=True, unique=True, description="The string value", max_length=NORMAL) + + @declared_attr + def __table_args__(cls) -> Tuple: + return ( + CheckConstraint( + CONSTRAINT_LOWERCASE_NAME, + name=f"{cls.__name__}_name_lowercase", + ), + ) diff --git a/src/database/model/serializers.py b/src/database/model/serializers.py index e10b28b2..3199445c 100644 --- a/src/database/model/serializers.py +++ b/src/database/model/serializers.py @@ -102,10 +102,10 @@ def deserialize(self, session: Session, input_: int | list[int]) -> SQLModel | l @dataclasses.dataclass class FindByNameDeserializer(DeSerializer[NamedRelation]): """ - Deserialization of NamedValues: uniquely identified by their name. + Deserialization of NamedRelations: uniquely identified by their name. In case of a single name, this deserializer returns the identifier. In case of a list of - names, it returns the list of NamedValues. + names, it returns the list of NamedRelations. """ clazz: type[NamedRelation] @@ -114,6 +114,7 @@ def deserialize( self, session: Session, name: str | list[str] ) -> NamedRelation | list[NamedRelation]: if not isinstance(name, list): + name = name.lower() query = select(self.clazz.identifier).where(self.clazz.name == name) identifier = session.scalars(query).first() if identifier is None: @@ -122,10 +123,10 @@ def deserialize( session.flush() identifier = new_object.identifier return identifier - - query = select(self.clazz).where(self.clazz.name.in_(name)) # type: ignore[attr-defined] + names = [n.lower() for n in name] + query = select(self.clazz).where(self.clazz.name.in_(names)) # type: ignore[attr-defined] existing = session.scalars(query).all() - names_not_found = set(name) - {e.name for e in existing} + names_not_found = set(names) - {e.name for e in existing} new_objects = [self.clazz(name=name) for name in names_not_found] if any(names_not_found): session.add_all(new_objects) diff --git a/src/tests/.env b/src/tests/.env index 803f6c1e..3cdc4b9b 100644 --- a/src/tests/.env +++ b/src/tests/.env @@ -1 +1,2 @@ KEYCLOAK_CLIENT_SECRET="mocked_secret" +DB="SQLite" diff --git a/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py b/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py index f8455e20..613194e8 100644 --- a/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py +++ b/src/tests/connectors/huggingface/test_huggingface_dataset_connector.py @@ -117,7 +117,6 @@ def test_incorrect_citation(): for dataset_id in ids_expected: mock_parquet(mocked_requests, dataset_id) resources_with_relations = list(connector.fetch()) - (dataset,) = [r.resource for r in resources_with_relations] (related_resources,) = [r.related_resources for r in resources_with_relations] (citation,) = related_resources["citation"] assert citation.aiod_entry.status == "published" diff --git a/src/tests/database/model/agent/test_person_delete.py b/src/tests/database/model/agent/test_person_delete.py index 6038913e..53692023 100644 --- a/src/tests/database/model/agent/test_person_delete.py +++ b/src/tests/database/model/agent/test_person_delete.py @@ -10,9 +10,9 @@ def test_happy_path( client: TestClient, engine: Engine, ): - expertise_a = Expertise(name="Just") - expertise_b = Expertise(name="An") - expertise_c = Expertise(name="Example") + expertise_a = Expertise(name="just") + expertise_b = Expertise(name="an") + expertise_c = Expertise(name="example") person_a = Person( name="person a", expertise=[expertise_a, expertise_b], diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index 64a0cf14..45d1637c 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -15,7 +15,7 @@ def test_happy_path(client: TestClient, mocked_privileged_token: Mock, body_asse body = copy.deepcopy(body_asset) body["name"] = "Contact name" body["email"] = ["a@b.com"] - body["telephone"] = ["0032 XXXX XXXX"] + body["telephone"] = ["0032 xxxx xxxx"] body["location"] = [ { "address": {"country": "NED", "street": "Street Name 10", "postal_code": "1234AB"}, @@ -33,7 +33,7 @@ def test_happy_path(client: TestClient, mocked_privileged_token: Mock, body_asse response_json = response.json() assert response_json["name"] == "Contact name" assert response_json["email"] == ["a@b.com"] - assert response_json["telephone"] == ["0032 XXXX XXXX"] + assert response_json["telephone"] == ["0032 xxxx xxxx"] assert response_json["location"] == [ { "address": {"country": "NED", "street": "Street Name 10", "postal_code": "1234AB"}, diff --git a/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py b/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py index e78e6049..7974b4f2 100644 --- a/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py +++ b/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py @@ -79,10 +79,10 @@ def test_happy_path( } assert response_json["is_accessible_for_free"] - assert response_json["application_area"] == ["Voice Assistance"] - assert response_json["industrial_sector"] == ["eCommerce"] - assert response_json["research_area"] == ["Explainable AI"] - assert response_json["scientific_domain"] == ["Voice Recognition"] + assert response_json["application_area"] == ["voice assistance"] + assert response_json["industrial_sector"] == ["ecommerce"] + assert response_json["research_area"] == ["explainable ai"] + assert response_json["scientific_domain"] == ["voice recognition"] assert response_json["contact"] == [1] assert response_json["creator"] == [1] assert response_json["citation"] == [1] @@ -192,8 +192,8 @@ def create_body(i: int, *keywords): assert response.status_code == 200, response.json() response = client.get("/news/v1/2") assert set(response.json()["keyword"]) == { - "AI", - "ArtificialIntelligence", + "ai", + "artificialintelligence", "digitaltransformation", "smartcities", "mobility", @@ -207,18 +207,35 @@ def create_body(i: int, *keywords): client.post("/news/v1", json=body4, headers={"Authorization": "Fake token"}) response = client.get("/news/v1/4") assert set(response.json()["keyword"]) == { - "AI4EU Experiments", + "ai4eu experiments", "solutions", "pipelines", - "hybrid AI", - "modular AI", + "hybrid ai", + "modular ai", "reliability", "explainability", "trustworthiness", - "ArtificialIntelligence", + "artificialintelligence", } +def test_post_duplicate_named_relations_with_different_capitals( + client: TestClient, + engine: Engine, + mocked_privileged_token: Mock, +): + keycloak_openid.userinfo = mocked_privileged_token + + def create_body(i: int, *keywords): + return {"name": f"dataset{i}", "keyword": keywords} + + body1 = create_body(1, "AI") + body2 = create_body(2, "ai") + client.post("/news/v1", json=body1, headers={"Authorization": "Fake token"}) + response = client.post("/news/v1", json=body2, headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + + def test_post_editors( client: TestClient, engine: Engine, diff --git a/src/tests/routers/resource_routers/test_router_ml_model.py b/src/tests/routers/resource_routers/test_router_ml_model.py index eaa94359..fdc20204 100644 --- a/src/tests/routers/resource_routers/test_router_ml_model.py +++ b/src/tests/routers/resource_routers/test_router_ml_model.py @@ -57,6 +57,6 @@ def test_happy_path( response_json = response.json() assert response_json["pid"] == "https://doi.org/10.1000/182" - assert response_json["type"] == "Large Language Model" + assert response_json["type"] == "large language model" assert response_json["related_experiment"] == [1] assert response_json["distribution"] == [distribution] diff --git a/src/tests/routers/resource_routers/test_router_news.py b/src/tests/routers/resource_routers/test_router_news.py index 252b6ded..ee82c152 100644 --- a/src/tests/routers/resource_routers/test_router_news.py +++ b/src/tests/routers/resource_routers/test_router_news.py @@ -15,7 +15,7 @@ def test_happy_path( body = copy.deepcopy(body_resource) body["headline"] = "A headline to show on top of the page." body["alternative_headline"] = "An alternative headline." - body["category"] = ["Research: Education", "Research: Awards", "Business: Health"] + body["category"] = ["research: education", "research: awards", "business: health"] body["content"] = {"plain": "plain content"} response = client.post("/news/v1", json=body, headers={"Authorization": "Fake token"}) @@ -28,8 +28,8 @@ def test_happy_path( assert response_json["headline"] == "A headline to show on top of the page." assert response_json["alternative_headline"] == "An alternative headline." assert set(response_json["category"]) == { - "Research: Education", - "Research: Awards", - "Business: Health", + "research: education", + "research: awards", + "business: health", } assert response_json["content"] == {"plain": "plain content"} diff --git a/src/tests/routers/resource_routers/test_router_organisation.py b/src/tests/routers/resource_routers/test_router_organisation.py index 975a5788..4532f13e 100644 --- a/src/tests/routers/resource_routers/test_router_organisation.py +++ b/src/tests/routers/resource_routers/test_router_organisation.py @@ -48,7 +48,7 @@ def test_happy_path( assert response_json["date_founded"] == "2023-01-01" assert response_json["legal_name"] == "A name for the organisation" assert response_json["ai_relevance"] == "Part of CLAIRE" - assert response_json["type"] == "Research Institute" + assert response_json["type"] == "research institute" assert response_json["member"] == [1] assert response_json["contact_details"] == 1 @@ -63,7 +63,7 @@ def test_happy_path( response = client.put("organisations/v1/2", json=body, headers={"Authorization": "Fake token"}) assert response.status_code == 200, response.json() response = client.get("organisations/v1/2") - assert response.json()["type"] == "Association" + assert response.json()["type"] == "association" response = client.delete("/organisations/v1/2", headers={"Authorization": "Fake token"}) assert response.status_code == 200, response.json() From 338e6d85e76ca6a864bc67983a125b04d945590a Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Tue, 14 Nov 2023 16:49:24 +0100 Subject: [PATCH 04/21] Docker compose profiles --- README.md | 9 ++++++++- docker-compose.yaml | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 261a1177..35dd52fa 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Information on how to install Docker is found in [their documentation](https://d ### Using docker compose ```bash -docker compose up +docker compose --profile examples up ``` starts the MYSQL Server, the REST API, Keycloak for Identy and access management and Nginx for reverse proxing. \ Once started, you should be able to visit the REST API server at: http://localhost and Keycloak at http://localhost/aiod-auth \ @@ -93,6 +93,13 @@ mysql> SHOW DATABASES; Now, you can visit the server from your browser at `localhost:8000/docs`. +#### Using connectors +You can specify different connectors using + +```bash +docker compose --profile examples --profile huggingface-datasets --profile openml-datasets up -d +docker compose --profile examples --profile huggingface-datasets --profile openml-datasets down +``` #### Local Installation diff --git a/docker-compose.yaml b/docker-compose.yaml index 475f6f15..f821de6a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -30,6 +30,7 @@ services: condition: service_healthy fill-db-with-examples: + profiles: ["examples"] image: ai4eu_server container_name: fill-db-with-examples env_file: .env @@ -63,6 +64,7 @@ services: condition: service_healthy huggingface-dataset-connector: + profiles: ["huggingface-datasets"] image: ai4eu_server container_name: huggingface-dataset-connector env_file: .env @@ -79,6 +81,7 @@ services: condition: service_healthy openml-dataset-connector: + profiles: ["openml-datasets"] build: context: connectors/openml dockerfile: Dockerfile @@ -98,6 +101,7 @@ services: condition: service_healthy zenodo-dataset-connector: + profiles: ["zenodo-datasets"] build: context: connectors/zenodo dockerfile: Dockerfile From 1771018cfa687828fe352d59e4e9a21e92486a5f Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Tue, 14 Nov 2023 17:49:11 +0100 Subject: [PATCH 05/21] bugfix for openml --- connectors/openml/cron | 2 +- connectors/openml/datasets.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/connectors/openml/cron b/connectors/openml/cron index b3961434..02c82ff7 100644 --- a/connectors/openml/cron +++ b/connectors/openml/cron @@ -1 +1 @@ -25 * * * * bash /opt/connectors/script/datasets.sh >> /opt/connectors/data/openml/dataset/cron.log 2>&1 +* * * * * bash /opt/connectors/script/datasets.sh >> /opt/connectors/data/cron.log 2>&1 diff --git a/connectors/openml/datasets.sh b/connectors/openml/datasets.sh index fffc9297..4d143041 100755 --- a/connectors/openml/datasets.sh +++ b/connectors/openml/datasets.sh @@ -15,6 +15,6 @@ echo $(date -u) "Starting synchronization..." PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \ -c $CONNECTOR \ -w $WORK_DIR \ - --from-identifier 4500 \ + --from-identifier 1 \ --save-every 100 > ${WORK_DIR}/connector.log 2>&1 echo $(date -u) "Synchronization Done." From e5fe0d1680b15be3a6300a593ab443048e89b6a7 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Tue, 14 Nov 2023 17:49:53 +0100 Subject: [PATCH 06/21] Rerun connectors on empty db --- src/connectors/synchronization.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/connectors/synchronization.py b/src/connectors/synchronization.py index 284030b5..f42192d4 100644 --- a/src/connectors/synchronization.py +++ b/src/connectors/synchronization.py @@ -8,7 +8,7 @@ from datetime import datetime from typing import Optional -from sqlmodel import Session +from sqlmodel import Session, select from connectors.abstract.resource_connector import ResourceConnector, RESOURCE from connectors.record_error import RecordError @@ -136,18 +136,22 @@ def main(): module = importlib.import_module(module_path) connector: ResourceConnector = getattr(module, connector_cls_name)() + working_dir.mkdir(parents=True, exist_ok=True) error_path = working_dir / RELATIVE_PATH_ERROR_CSV state_path = working_dir / RELATIVE_PATH_STATE_JSON - error_path.parents[0].mkdir(parents=True, exist_ok=True) - state_path.parents[0].mkdir(parents=True, exist_ok=True) first_run = not state_path.exists() - if first_run: + engine = sqlmodel_engine(rebuild_db="never") + with Session(engine) as session: + db_empty = session.scalars(select(connector.resource_class)).first() is None + + if first_run or db_empty: state = {} + state_path.unlink(missing_ok=True) + error_path.unlink(missing_ok=True) else: with open(state_path, "r") as f: state = json.load(f) - items = connector.run( state=state, from_identifier=args.from_identifier, @@ -161,8 +165,6 @@ def main(): if router.resource_class == connector.resource_class ] - engine = sqlmodel_engine(rebuild_db="never") - with Session(engine) as session: for i, item in enumerate(items): error = save_to_database(router=router, connector=connector, session=session, item=item) From 23497e95dd8260b8fb36766aae1fdebd7a8e4e4f Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 12:29:03 +0100 Subject: [PATCH 07/21] Fixes for openml and zenodo --- connectors/openml/cron | 2 +- connectors/openml/datasets.sh | 2 +- connectors/zenodo/datasets.sh | 2 +- .../abstract/resource_connector_by_date.py | 30 +- .../huggingface_dataset_connector.py | 1 + src/connectors/synchronization.py | 4 +- .../zenodo/zenodo_dataset_connector.py | 99 +- .../zenodo/test_get_datasets_zenodo.py | 114 +- .../resources/connectors/zenodo/6884943.json | 1 + .../resources/connectors/zenodo/7199024.json | 1 + .../resources/connectors/zenodo/7555467.json | 1 + .../resources/connectors/zenodo/7793917.json | 1 + .../resources/connectors/zenodo/7902673.json | 1 + .../resources/connectors/zenodo/7947283.json | 1 + .../resources/connectors/zenodo/data_1.json | 104 - .../resources/connectors/zenodo/dataset.json | 129 - .../connectors/zenodo/list_records.xml | 2523 +++++++++++++++-- 17 files changed, 2473 insertions(+), 543 deletions(-) create mode 100644 src/tests/resources/connectors/zenodo/6884943.json create mode 100644 src/tests/resources/connectors/zenodo/7199024.json create mode 100644 src/tests/resources/connectors/zenodo/7555467.json create mode 100644 src/tests/resources/connectors/zenodo/7793917.json create mode 100644 src/tests/resources/connectors/zenodo/7902673.json create mode 100644 src/tests/resources/connectors/zenodo/7947283.json delete mode 100644 src/tests/resources/connectors/zenodo/data_1.json delete mode 100644 src/tests/resources/connectors/zenodo/dataset.json diff --git a/connectors/openml/cron b/connectors/openml/cron index 02c82ff7..b3961434 100644 --- a/connectors/openml/cron +++ b/connectors/openml/cron @@ -1 +1 @@ -* * * * * bash /opt/connectors/script/datasets.sh >> /opt/connectors/data/cron.log 2>&1 +25 * * * * bash /opt/connectors/script/datasets.sh >> /opt/connectors/data/openml/dataset/cron.log 2>&1 diff --git a/connectors/openml/datasets.sh b/connectors/openml/datasets.sh index 4d143041..3c186d03 100755 --- a/connectors/openml/datasets.sh +++ b/connectors/openml/datasets.sh @@ -16,5 +16,5 @@ PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \ -c $CONNECTOR \ -w $WORK_DIR \ --from-identifier 1 \ - --save-every 100 > ${WORK_DIR}/connector.log 2>&1 + --save-every 100 >> ${WORK_DIR}/connector.log 2>&1 echo $(date -u) "Synchronization Done." diff --git a/connectors/zenodo/datasets.sh b/connectors/zenodo/datasets.sh index 810bba01..dfcd90f7 100755 --- a/connectors/zenodo/datasets.sh +++ b/connectors/zenodo/datasets.sh @@ -16,5 +16,5 @@ PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \ -c $CONNECTOR \ -w $WORK_DIR \ --from-date "2023-08-01" \ - --save-every 100 > ${WORK_DIR}/connector.log 2>&1 + --save-every 100 >> ${WORK_DIR}/connector.log 2>&1 echo $(date -u) "Synchronization Done." diff --git a/src/connectors/abstract/resource_connector_by_date.py b/src/connectors/abstract/resource_connector_by_date.py index e655995d..77b06c9d 100644 --- a/src/connectors/abstract/resource_connector_by_date.py +++ b/src/connectors/abstract/resource_connector_by_date.py @@ -1,11 +1,10 @@ import abc import logging -from datetime import datetime, date +from datetime import datetime, timedelta from typing import Generic, Iterator, Tuple + from connectors.abstract.resource_connector import ResourceConnector from connectors.record_error import RecordError - - from connectors.resource_with_relations import ResourceWithRelations from routers.resource_router import RESOURCE @@ -27,9 +26,10 @@ def fetch( def run( self, state: dict, - from_date: date | None = None, limit: int | None = None, + from_incl: datetime | None = None, to_excl: datetime | None = None, + time_per_loop: timedelta = timedelta(days=1), **kwargs, ) -> Iterator[RESOURCE | ResourceWithRelations[RESOURCE] | RecordError]: if limit is not None: @@ -44,17 +44,19 @@ def run( first_run = not state if first_run: - if from_date is None: - raise ValueError("In the first run, the from-date needs to be set") - from_incl = datetime.combine(from_date, datetime.min.time()) + if from_incl is None: + raise ValueError("In the first run, from_incl needs to be set") else: from_incl = datetime.fromtimestamp(state["last"] + 0.001) - logging.info(f"Starting synchronisation {from_incl=}, {to_excl=}.") - state["from_incl"] = from_incl.timestamp() - state["to_excl"] = to_excl.timestamp() - for datetime_, result in self.fetch(from_incl=from_incl, to_excl=to_excl): - yield result - if datetime_: - state["last"] = datetime_.timestamp() + while from_incl < to_excl: + to_excl_current = min(from_incl + time_per_loop, to_excl) + logging.info(f"Starting synchronisation {from_incl=}, {to_excl_current=}.") + state["from_incl"] = from_incl.timestamp() + state["to_excl"] = to_excl_current.timestamp() + for datetime_, result in self.fetch(from_incl=from_incl, to_excl=to_excl_current): + yield result + if datetime_: + state["last"] = datetime_.timestamp() + from_incl = to_excl_current state["result"] = "Complete run done (although there might be errors)." diff --git a/src/connectors/huggingface/huggingface_dataset_connector.py b/src/connectors/huggingface/huggingface_dataset_connector.py index 19e2969d..a2cf5d52 100644 --- a/src/connectors/huggingface/huggingface_dataset_connector.py +++ b/src/connectors/huggingface/huggingface_dataset_connector.py @@ -135,6 +135,7 @@ def _parse_citations(self, dataset, pydantic_class_publication) -> list: # Ugly fix: many HF datasets have a wrong citation (see testcase) parsed_citations = bibtexparser.loads(raw_citation + "}").entries elif len(parsed_citations) == 0 and len(raw_citation) <= field_length.NORMAL: + # Sometimes dataset.citation is not a bibtex field, but just the title of an article return [ pydantic_class_publication( name=raw_citation, aiod_entry=AIoDEntryCreate(status="published") diff --git a/src/connectors/synchronization.py b/src/connectors/synchronization.py index f42192d4..866f595f 100644 --- a/src/connectors/synchronization.py +++ b/src/connectors/synchronization.py @@ -50,7 +50,7 @@ def _parse_args() -> argparse.Namespace: ) parser.add_argument( "--from-date", - type=lambda d: datetime.strptime(d, "%Y-%m-%d").date(), + type=lambda d: datetime.strptime(d, "%Y-%m-%d"), help="The start date. Only relevant for the first run of date-based connectors. " "In subsequent runs, date-based connectors will synchronize from the previous " "end-time. Format: YYYY-MM-DD", @@ -155,7 +155,7 @@ def main(): items = connector.run( state=state, from_identifier=args.from_identifier, - from_date=args.from_date, + from_incl=args.from_date, limit=args.limit, ) diff --git a/src/connectors/zenodo/zenodo_dataset_connector.py b/src/connectors/zenodo/zenodo_dataset_connector.py index 30a41998..46d9c22d 100644 --- a/src/connectors/zenodo/zenodo_dataset_connector.py +++ b/src/connectors/zenodo/zenodo_dataset_connector.py @@ -6,12 +6,14 @@ import xmltodict from sickle import Sickle from sqlmodel import SQLModel +from starlette import status from connectors.abstract.resource_connector_by_date import ResourceConnectorByDate from connectors.record_error import RecordError from connectors.resource_with_relations import ResourceWithRelations from database.model import field_length from database.model.agent.person import Person +from database.model.ai_asset.distribution import Distribution from database.model.ai_resource.text import Text from database.model.concept.aiod_entry import AIoDEntryCreate from database.model.dataset.dataset import Dataset @@ -40,44 +42,10 @@ def retry(self, _id: int) -> ResourceWithRelations[Dataset] | RecordError: code shows no similarities. """ - response = requests.get(f"https://zenodo.org/api/records/{_id}") - if not response.ok: - msg = response.json()["error"]["message"] - return RecordError( - identifier=str(_id), - error=f"Error while fetching data from Zenodo: '{msg}'.", - ) - - record = response.json() - creator_names = [item["name"] for item in record["metadata"]["creators"]] - creators = [] - for name in creator_names: - name_splits = name.split(", ") - if len(name_splits) == 2: - creators.append(Person(given_name=name_splits[1], surname=name_splits[0])) - else: - creators.append(Person(name=name)) - - description = record.get("metadata").get("description") - if len(description) > field_length.LONG: - text_break = " [...]" - description = description[: field_length.LONG - len(text_break)] + text_break - if description: - description = Text(plain=description) - - pydantic_class = resource_create(Dataset) - dataset = pydantic_class( - aiod_entry=AIoDEntryCreate(status="published"), - platform="zenodo", - platform_resource_identifier=_id, - date_published=record.get("created"), - name=record.get("metadata").get("title"), - description=description, - license=record.get("metadata").get("license").get("id"), - keyword=record.get("metadata").get("keywords"), - ) - return ResourceWithRelations[Dataset]( - resource=dataset, related_resources={"creator": creators} + raise NotImplementedError( + "Currently not implemented. See git history for an earlier " + "implementation, that needs to be brought up-to-date (ideally " + "using the same code as fetch)." ) @staticmethod @@ -90,9 +58,9 @@ def _dataset_from_record( ) -> ResourceWithRelations[Dataset] | RecordError: error_fmt = ZenodoDatasetConnector._error_msg_bad_format if isinstance(record["creators"]["creator"], list): - creator_names = [item["creatorName"] for item in record["creators"]["creator"]] - elif isinstance(record["creators"]["creator"]["creatorName"], str): - creator_names = [record["creators"]["creator"]["creatorName"]] + creator_names = [item["creatorName"]["#text"] for item in record["creators"]["creator"]] + elif isinstance(record["creators"]["creator"]["creatorName"]["#text"], str): + creator_names = [record["creators"]["creator"]["creatorName"]["#text"]] else: error_fmt("") return RecordError(identifier=identifier, error=error_fmt("creator")) @@ -142,17 +110,21 @@ def _dataset_from_record( else: return RecordError(identifier=identifier, error=error_fmt("date_published")) - if isinstance(record["publisher"], str): - publisher = record["publisher"] - else: - return RecordError(identifier=identifier, error=error_fmt("publisher")) - - if isinstance(record["rightsList"]["rights"], list): - license_ = record["rightsList"]["rights"][0]["@rightsURI"] - elif isinstance(record["rightsList"]["rights"]["@rightsURI"], str): - license_ = record["rightsList"]["rights"]["@rightsURI"] - else: - return RecordError(identifier=identifier, error=error_fmt("license")) + publisher = None + if "publisher" in record: + if isinstance(record["publisher"], str): + publisher = record["publisher"] + else: + return RecordError(identifier=identifier, error=error_fmt("publisher")) + + license_ = None + if "rightsList" in record: + if isinstance(record["rightsList"]["rights"], list): + license_ = record["rightsList"]["rights"][0]["#text"] + elif isinstance(record["rightsList"]["rights"]["#text"], str): + license_ = record["rightsList"]["rights"]["#text"] + else: + return RecordError(identifier=identifier, error=error_fmt("license")) keywords = [] if "subjects" in record: @@ -163,6 +135,26 @@ def _dataset_from_record( else: return RecordError(identifier=identifier, error=error_fmt("keywords")) + response = requests.get(f"https://zenodo.org/api/records/{id_number}/files") + if response.status_code == status.HTTP_200_OK: + entries = response.json()["entries"] + distributions = [ + Distribution( + name=entry["key"], + content_url=entry["links"]["content"], + encoding_format=entry["mimetype"], + checksum_algorithm=entry["checksum"].split(":")[0] + if "checksum" in entry + else None, + checksum=entry["checksum"].split(":")[1] if "checksum" in entry else None, + ) + for entry in entries + ] + elif response.status_code in (status.HTTP_403_FORBIDDEN, status.HTTP_410_GONE): + distributions = [] # Private files, or deleted files + else: + response.raise_for_status() + pydantic_class = resource_create(Dataset) dataset = pydantic_class( aiod_entry=AIoDEntryCreate(status="published"), @@ -175,6 +167,7 @@ def _dataset_from_record( publisher=publisher, license=license_, keyword=keywords, + distribution=distributions, ) return ResourceWithRelations[Dataset]( @@ -205,7 +198,7 @@ def fetch( } ) - while record := next(records, None): + for record in records: id_ = None datetime_ = None resource_type = ZenodoDatasetConnector._resource_type(record) diff --git a/src/tests/connectors/zenodo/test_get_datasets_zenodo.py b/src/tests/connectors/zenodo/test_get_datasets_zenodo.py index 39b1424b..069d21e8 100644 --- a/src/tests/connectors/zenodo/test_get_datasets_zenodo.py +++ b/src/tests/connectors/zenodo/test_get_datasets_zenodo.py @@ -13,79 +13,49 @@ def test_fetch_happy_path(): with responses.RequestsMock() as mocked_requests: mock_zenodo_responses(mocked_requests) - from_incl = datetime.datetime(2000, 1, 1, 12, 0, 0) - to_excl = datetime.datetime(2000, 1, 2, 12, 0, 0) - resources = list(connector.run(state={}, from_date=from_incl, to_excl=to_excl)) + from_incl = datetime.datetime(2023, 5, 23, 8, 0, 0) + to_excl = datetime.datetime(2023, 5, 23, 9, 0, 0) + resources = list(connector.run(state={}, from_incl=from_incl, to_excl=to_excl)) datasets = [r for r in resources if not isinstance(r, RecordError)] - assert len(datasets) == 1 + errors = [r for r in resources if isinstance(r, RecordError)] + assert {error.error for error in errors} == {"Wrong type"} + assert len(datasets) == 6 + assert len(errors) == 20 dataset = datasets[0].resource - assert dataset.name == "THE FIELD'S MALL MASS SHOOTING: EMERGENCY MEDICAL SERVICES RESPONSE" - assert dataset.description.plain == "This is a description paragraph" - assert dataset.date_published == datetime.datetime(2023, 5, 6) - assert dataset.license == "https://creativecommons.org/licenses/by/4.0/legalcode" + assert dataset.name == "kogalab21/all-alpha_design" + expected = ( + "Source data and demos for the research article entitled “Design of " + "complicated all-α protein structures” by Koya Sakuma, Naohiro Kobayashi, " + "Toshihiko Sugiki, Toshio Nagashima, Toshimichi Fujiwara, Kano Suzuki, Naoya " + "Kobayashi, Takeshi Murata, Takahiro Kosugi, Rie Koga, and Nobuyasu Koga." + ) + assert dataset.description.plain == expected + assert dataset.date_published == datetime.datetime(2023, 5, 18) + assert dataset.license == "Other (Open)" assert dataset.platform == "zenodo" - assert dataset.platform_resource_identifier == "zenodo.org:7961614" - assert set(dataset.keyword) == { - "Mass casualty", - "Major incident", - "Management and leadership", - "Disaster", - "Mass shooting", - } + assert dataset.platform_resource_identifier == "zenodo.org:7947283" + assert set(dataset.keyword) == set() creators: list[Person] = datasets[0].related_resources["creator"] - assert len(creators) == 4 - for given, sur in [ - ("Peter Martin", "Hansen"), - ("henrik", "Alstrøm"), - ("Anders", "Damm-Hejmdal"), - ("Søren", "Mikkelsen"), - ]: - assert any(c for c in creators if c.given_name == given and c.surname == sur) - + assert len(creators) == 1 + assert creators[0].name == "Nobuyasu Koga" -def test_retry_happy_path(): - connector = ZenodoDatasetConnector() - with responses.RequestsMock() as mocked_requests: - with open(path_test_resources() / "connectors" / "zenodo" / "dataset.json", "r") as f: - dataset = f.read() - mocked_requests.add( - responses.GET, - "https://zenodo.org/api/records/7902672", # noqa E501 - body=dataset, - status=200, - ) - id_ = "7902672" - resource_with_relations = connector.retry(id_) - dataset = resource_with_relations.resource - assert dataset.name == "THE FIELD'S MALL MASS SHOOTING: EMERGENCY MEDICAL SERVICES RESPONSE" - assert dataset.description.plain == "This is a description paragraph" - assert dataset.date_published == datetime.datetime( - 2023, 5, 23, 7, 56, 17, 414652, tzinfo=datetime.timezone.utc + (dataset_7902673,) = [ + d.resource + for d in datasets + if d.resource.platform_resource_identifier == "zenodo.org:7902673" + ] + distributions = dataset_7902673.distribution + assert len(distributions) == 3 + distribution = distributions[0] + assert distribution.name == "FIELDS_CONFIDE_CHECLIST.docx" + assert distribution.encoding_format == "application/octet-stream" + assert distribution.checksum == "97f511d24f8867405a8f87afbc76939d" + assert distribution.checksum_algorithm == "md5" + assert ( + distribution.content_url + == "https://zenodo.org/api/records/7902673/files/FIELDS_CONFIDE_CHECLIST.docx/content" ) - assert dataset.license == "CC-BY-4.0" - assert dataset.platform == "zenodo" - assert dataset.platform_resource_identifier == "7902672" - - assert len(dataset.keyword) == 5 - assert set(dataset.keyword) == { - "Mass casualty", - "Major incident", - "Management and leadership", - "Disaster", - "Mass shooting", - } - creators: list[Person] = resource_with_relations.related_resources["creator"] - assert len(creators) == 6 - for given, sur in [ - ("Peter Martin", "Hansen"), - ("henrik", "Alstrøm"), - ("Anders", "Damm-Hejmdal"), - ("Søren", "Mikkelsen"), - ("Marius", "Rehn"), - ("Peter Anthony", "Berlac"), - ]: - assert any(c for c in creators if c.given_name == given and c.surname == sur) def mock_zenodo_responses(mocked_requests: responses.RequestsMock): @@ -96,7 +66,17 @@ def mock_zenodo_responses(mocked_requests: responses.RequestsMock): records_list = f.read() mocked_requests.add( responses.GET, - "https://zenodo.org/oai2d?metadataPrefix=oai_datacite&from=2000-01-01T00%3A00%3A00&until=2000-01-02T12%3A00%3A00&verb=ListRecords", # noqa E501 + "https://zenodo.org/oai2d?" + "metadataPrefix=oai_datacite&" + "from=2023-05-23T08%3A00%3A00&" + "until=2023-05-23T09%3A00%3A00&" + "verb=ListRecords", body=records_list, status=200, ) + for id_ in (6884943, 7793917, 7199024, 7947283, 7555467, 7902673): + with open(path_test_resources() / "connectors" / "zenodo" / f"{id_}.json", "r") as f: + body = f.read() + mocked_requests.add( + responses.GET, f"https://zenodo.org/api/records/{id_}/files", body=body, status=200 + ) diff --git a/src/tests/resources/connectors/zenodo/6884943.json b/src/tests/resources/connectors/zenodo/6884943.json new file mode 100644 index 00000000..8677316f --- /dev/null +++ b/src/tests/resources/connectors/zenodo/6884943.json @@ -0,0 +1 @@ +{"enabled": true, "links": {"self": "https://zenodo.org/api/records/6884943/files", "archive": "https://zenodo.org/api/records/6884943/files-archive"}, "entries": [{"key": "README.docx", "storage_class": "L", "checksum": "md5:1518228f7a74510bd88eaa052cc9d604", "size": 16259, "created": "2022-07-22T16:05:04.099090+00:00", "updated": "2023-05-23T08:50:26.216375+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "b5ed6bbd-9633-4a21-a973-08e92f097eb9", "file_id": "b297f148-8095-4575-a7f4-d9651302dc83", "bucket_id": "e993febe-32fa-4373-a79c-b5218b20070c", "links": {"self": "https://zenodo.org/api/records/6884943/files/README.docx", "content": "https://zenodo.org/api/records/6884943/files/README.docx/content"}}, {"key": "AoA_3DVA_Measurements.zip", "storage_class": "L", "checksum": "md5:1662553f77508162d8d8a1bcf6d606dc", "size": 22402412, "created": "2022-07-22T16:05:04.099090+00:00", "updated": "2023-05-23T08:50:26.216375+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "dfe314ff-aa81-49d5-b6cd-268aed4c8772", "file_id": "7a498936-abf3-4d45-9de1-4e54f2d0b0b4", "bucket_id": "e993febe-32fa-4373-a79c-b5218b20070c", "links": {"self": "https://zenodo.org/api/records/6884943/files/AoA_3DVA_Measurements.zip", "content": "https://zenodo.org/api/records/6884943/files/AoA_3DVA_Measurements.zip/content"}}, {"key": "README.pdf", "storage_class": "L", "checksum": "md5:51182b01c79ed3464941512a066d5ad5", "size": 173120, "created": "2022-07-22T16:05:04.099090+00:00", "updated": "2023-05-23T08:50:26.216375+00:00", "status": "completed", "metadata": null, "mimetype": "application/pdf", "version_id": "e42a1d7c-0968-4e74-b81d-518a20130313", "file_id": "7301da5d-ebf7-4c9a-8aca-8a42c0fb3782", "bucket_id": "e993febe-32fa-4373-a79c-b5218b20070c", "links": {"self": "https://zenodo.org/api/records/6884943/files/README.pdf", "content": "https://zenodo.org/api/records/6884943/files/README.pdf/content"}}], "default_preview": null, "order": []} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/7199024.json b/src/tests/resources/connectors/zenodo/7199024.json new file mode 100644 index 00000000..9c82ba8e --- /dev/null +++ b/src/tests/resources/connectors/zenodo/7199024.json @@ -0,0 +1 @@ +{"enabled": true, "links": {"self": "https://zenodo.org/api/records/7199024/files", "archive": "https://zenodo.org/api/records/7199024/files-archive"}, "entries": [{"key": "IQ measurements with 5G SRS signals and receiver 4-ports 3D Vector Antenna for positioning studies.zip", "storage_class": "L", "checksum": "md5:ea0a169bb372f2a02ef754eca515f54a", "size": 15370369, "created": "2022-10-14T14:32:06.711360+00:00", "updated": "2023-05-23T08:49:11.464265+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "6911e4d3-621f-414e-9961-4be55b94a38d", "file_id": "d263f978-e268-4f89-b52b-b088b0ddfe49", "bucket_id": "02922c60-4fca-489b-99f2-bf41719b3b75", "links": {"self": "https://zenodo.org/api/records/7199024/files/IQ%20measurements%20with%205G%20SRS%20signals%20and%20receiver%204-ports%203D%20Vector%20Antenna%20for%20positioning%20studies.zip", "content": "https://zenodo.org/api/records/7199024/files/IQ%20measurements%20with%205G%20SRS%20signals%20and%20receiver%204-ports%203D%20Vector%20Antenna%20for%20positioning%20studies.zip/content"}}, {"key": "README.pdf", "storage_class": "L", "checksum": "md5:52269b598fe8ba5fc1e6ffec997dac22", "size": 316506, "created": "2022-10-14T14:32:06.711360+00:00", "updated": "2023-05-23T08:49:11.464265+00:00", "status": "completed", "metadata": null, "mimetype": "application/pdf", "version_id": "fd03b564-cf1c-4e8d-8d68-5bbd50d99254", "file_id": "2888a644-3c44-44c0-90fb-522f9939c487", "bucket_id": "02922c60-4fca-489b-99f2-bf41719b3b75", "links": {"self": "https://zenodo.org/api/records/7199024/files/README.pdf", "content": "https://zenodo.org/api/records/7199024/files/README.pdf/content"}}, {"key": "README.docx", "storage_class": "L", "checksum": "md5:23c02d78fb6f5d6179007d19bbb1b060", "size": 257235, "created": "2022-10-14T14:32:06.711360+00:00", "updated": "2023-05-23T08:49:11.464265+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "de8ecfc3-5187-40de-87c4-21be7049ee2b", "file_id": "fd26e2db-b7a1-4d1a-9ecb-51852090af54", "bucket_id": "02922c60-4fca-489b-99f2-bf41719b3b75", "links": {"self": "https://zenodo.org/api/records/7199024/files/README.docx", "content": "https://zenodo.org/api/records/7199024/files/README.docx/content"}}], "default_preview": null, "order": []} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/7555467.json b/src/tests/resources/connectors/zenodo/7555467.json new file mode 100644 index 00000000..64fae02f --- /dev/null +++ b/src/tests/resources/connectors/zenodo/7555467.json @@ -0,0 +1 @@ +{"enabled": true, "links": {"self": "https://zenodo.org/api/records/7555467/files", "archive": "https://zenodo.org/api/records/7555467/files-archive"}, "entries": [{"key": "training-data.zip", "storage_class": "L", "checksum": "md5:a29158c848e9c692743314f1e8dbd84c", "size": 1105442067, "created": "2023-01-20T18:41:15.900444+00:00", "updated": "2023-05-23T08:09:18.335085+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "a82d660a-a912-4eae-b1cf-29075f29a27d", "file_id": "8e68076b-f489-4cab-be36-7eea795b5fb4", "bucket_id": "7ca20386-74b7-4ebc-ba45-54c35f29e8dd", "links": {"self": "https://zenodo.org/api/records/7555467/files/training-data.zip", "content": "https://zenodo.org/api/records/7555467/files/training-data.zip/content"}}], "default_preview": null, "order": []} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/7793917.json b/src/tests/resources/connectors/zenodo/7793917.json new file mode 100644 index 00000000..2b21387b --- /dev/null +++ b/src/tests/resources/connectors/zenodo/7793917.json @@ -0,0 +1 @@ +{"enabled": true, "links": {"self": "https://zenodo.org/api/records/7793917/files", "archive": "https://zenodo.org/api/records/7793917/files-archive"}, "entries": [{"key": "2016.zip", "storage_class": "L", "checksum": "md5:c21f4419e8048d2e1826ad0102211349", "size": 100507806, "created": "2023-04-03T06:36:11.580036+00:00", "updated": "2023-05-23T08:37:37.936347+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "d8153966-3381-44a6-9701-1d345bad3c94", "file_id": "3f7fe7bc-cd78-4f5b-b11a-2b79999cad51", "bucket_id": "cf24bd54-fbc8-4764-b840-ade7a726a221", "links": {"self": "https://zenodo.org/api/records/7793917/files/2016.zip", "content": "https://zenodo.org/api/records/7793917/files/2016.zip/content"}}, {"key": "2015.zip", "storage_class": "L", "checksum": "md5:41bca25822301d094e359dcba1b2e6ed", "size": 99600754, "created": "2023-04-03T06:36:11.580036+00:00", "updated": "2023-05-23T08:37:37.936347+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "9500223a-c052-4c20-9a63-35c67d251114", "file_id": "7b9c8c58-f502-4d30-ac12-ccd9ea36dc3e", "bucket_id": "cf24bd54-fbc8-4764-b840-ade7a726a221", "links": {"self": "https://zenodo.org/api/records/7793917/files/2015.zip", "content": "https://zenodo.org/api/records/7793917/files/2015.zip/content"}}, {"key": "2017.zip", "storage_class": "L", "checksum": "md5:8dd2dd17682260f84d5d82375b32146f", "size": 99840240, "created": "2023-04-03T06:36:11.580036+00:00", "updated": "2023-05-23T08:37:37.936347+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "6882164e-9266-4593-a752-ce2cb99047ff", "file_id": "11b42ab0-1136-464a-a3b9-c1afed69f746", "bucket_id": "cf24bd54-fbc8-4764-b840-ade7a726a221", "links": {"self": "https://zenodo.org/api/records/7793917/files/2017.zip", "content": "https://zenodo.org/api/records/7793917/files/2017.zip/content"}}, {"key": "2019.zip", "storage_class": "L", "checksum": "md5:43f042dfc7638c6a4e86538f25a695ef", "size": 98951914, "created": "2023-04-03T06:36:11.580036+00:00", "updated": "2023-05-23T08:37:37.936347+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "5f33f4cd-92ce-4bec-ad46-325466e7ae28", "file_id": "0cf09011-b294-4b22-b6d1-9364a7277be2", "bucket_id": "cf24bd54-fbc8-4764-b840-ade7a726a221", "links": {"self": "https://zenodo.org/api/records/7793917/files/2019.zip", "content": "https://zenodo.org/api/records/7793917/files/2019.zip/content"}}, {"key": "2020.zip", "storage_class": "L", "checksum": "md5:2dbfdfb41ebac38be63accb3450efc46", "size": 98656349, "created": "2023-04-03T06:36:11.580036+00:00", "updated": "2023-05-23T08:37:37.936347+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "1ad3ee50-b89a-4de5-9dd5-e77c4af5b28c", "file_id": "ae5db600-3769-42fb-a724-337851e2e076", "bucket_id": "cf24bd54-fbc8-4764-b840-ade7a726a221", "links": {"self": "https://zenodo.org/api/records/7793917/files/2020.zip", "content": "https://zenodo.org/api/records/7793917/files/2020.zip/content"}}, {"key": "2018.zip", "storage_class": "L", "checksum": "md5:2e40687b861f86ad26d31d9908ccf2ad", "size": 99355029, "created": "2023-04-03T06:36:11.580036+00:00", "updated": "2023-05-23T08:37:37.936347+00:00", "status": "completed", "metadata": null, "mimetype": "application/zip", "version_id": "8cc559a9-c8d5-4e26-b2f7-4b3c04970159", "file_id": "cdd65c6b-b992-4d4d-be39-690ca8465ab2", "bucket_id": "cf24bd54-fbc8-4764-b840-ade7a726a221", "links": {"self": "https://zenodo.org/api/records/7793917/files/2018.zip", "content": "https://zenodo.org/api/records/7793917/files/2018.zip/content"}}], "default_preview": null, "order": []} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/7902673.json b/src/tests/resources/connectors/zenodo/7902673.json new file mode 100644 index 00000000..2ac69544 --- /dev/null +++ b/src/tests/resources/connectors/zenodo/7902673.json @@ -0,0 +1 @@ +{"enabled": true, "links": {"self": "https://zenodo.org/api/records/7902673/files", "archive": "https://zenodo.org/api/records/7902673/files-archive"}, "entries": [{"key": "FIELDS_CONFIDE_CHECLIST.docx", "storage_class": "L", "checksum": "md5:97f511d24f8867405a8f87afbc76939d", "size": 15600, "created": "2023-05-06T12:35:20.936527+00:00", "updated": "2023-05-23T08:05:45.007085+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "5ce8df6b-6e71-478d-a70c-7af3ee0b0ef0", "file_id": "ae87f52f-c4e5-45de-abaa-751c27cf1168", "bucket_id": "69b8a648-5379-479c-b9a9-93e22ebfa7e4", "links": {"self": "https://zenodo.org/api/records/7902673/files/FIELDS_CONFIDE_CHECLIST.docx", "content": "https://zenodo.org/api/records/7902673/files/FIELDS_CONFIDE_CHECLIST.docx/content"}}, {"key": "FIELDS_SINE_CFB.xlsx", "storage_class": "L", "checksum": "md5:538fb0d16d323d6904b2df3976f6a9c0", "size": 25574, "created": "2023-05-06T12:35:20.936527+00:00", "updated": "2023-05-23T08:05:45.007085+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "34210c1b-0412-43d9-a742-6f36adb5ae95", "file_id": "045b3345-6281-41a9-82e6-9c70c3f5474e", "bucket_id": "69b8a648-5379-479c-b9a9-93e22ebfa7e4", "links": {"self": "https://zenodo.org/api/records/7902673/files/FIELDS_SINE_CFB.xlsx", "content": "https://zenodo.org/api/records/7902673/files/FIELDS_SINE_CFB.xlsx/content"}}, {"key": "FIELDS_UNITS_XL.xlsx", "storage_class": "L", "checksum": "md5:18431d2ad50b7d82935a1dda6ee6db61", "size": 12369, "created": "2023-05-06T12:35:20.936527+00:00", "updated": "2023-05-23T08:05:45.007085+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "3e23d566-d966-4c0b-868a-fb4ce85c4631", "file_id": "2a47ee87-4f6d-48de-acfa-80d32e73eeeb", "bucket_id": "69b8a648-5379-479c-b9a9-93e22ebfa7e4", "links": {"self": "https://zenodo.org/api/records/7902673/files/FIELDS_UNITS_XL.xlsx", "content": "https://zenodo.org/api/records/7902673/files/FIELDS_UNITS_XL.xlsx/content"}}], "default_preview": null, "order": []} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/7947283.json b/src/tests/resources/connectors/zenodo/7947283.json new file mode 100644 index 00000000..910b3be9 --- /dev/null +++ b/src/tests/resources/connectors/zenodo/7947283.json @@ -0,0 +1 @@ +{"enabled": true, "links": {"self": "https://zenodo.org/api/records/7947283/files", "archive": "https://zenodo.org/api/records/7947283/files-archive"}, "entries": [{"key": "ExtFig3.cc4.txt", "storage_class": "L", "checksum": "md5:c86bd8be85412da69145fb56eddf3cd4", "size": 77421, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "5d2cd8ca-440d-44de-99e2-0d155cf72cbf", "file_id": "ac3dac84-e4e6-4b50-b8d0-bf7e13ab1927", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc4.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc4.txt/content"}}, {"key": "ExtFig4_5.txt", "storage_class": "L", "checksum": "md5:88b2eda5dd497926fcdd380aacf6fc9a", "size": 979221, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "d5ef9cfc-e74d-43a4-8eef-7bafcb02724d", "file_id": "c2f57290-f31b-44e4-b349-95f1de8dbfd8", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig4_5.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig4_5.txt/content"}}, {"key": "ExtFig3.cc5.txt", "storage_class": "L", "checksum": "md5:eb31d3b8f871ebe9c03497410d1c5ad1", "size": 67391, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "5c9442b5-2d73-4e37-a80c-dcea4b6628bb", "file_id": "1be442a8-c6c0-415c-a738-55f6c7c9e894", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc5.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc5.txt/content"}}, {"key": "ExtFig7.tar.gz", "storage_class": "L", "checksum": "md5:3b18cc12f9080dd8792cf3014d991a0e", "size": 360936212, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "application/gzip", "version_id": "40a70c81-3309-43ac-9494-f82e0d4ea3ac", "file_id": "6188d5f7-f094-4d19-88c5-66d9286fb37a", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig7.tar.gz", "content": "https://zenodo.org/api/records/7947283/files/ExtFig7.tar.gz/content"}}, {"key": "Fig1d.txt", "storage_class": "L", "checksum": "md5:682624e69e1fe9d7808453eea2a68380", "size": 152830, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "14c1d478-bed3-4cb4-96ed-33c0d90c4fd4", "file_id": "43277486-8cae-49b6-b05c-80d6d37413e0", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/Fig1d.txt", "content": "https://zenodo.org/api/records/7947283/files/Fig1d.txt/content"}}, {"key": "Fig3b.txt", "storage_class": "L", "checksum": "md5:945c7f9b765fd4f6761145a79c1760f0", "size": 49569398, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "6ab2a342-8d51-46f2-a592-74257d1af29a", "file_id": "b01f8078-5c87-46e2-9b13-0b1510134b30", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/Fig3b.txt", "content": "https://zenodo.org/api/records/7947283/files/Fig3b.txt/content"}}, {"key": "ExtFig3.cc3.txt", "storage_class": "L", "checksum": "md5:da658031ae26a015938fcd446da9e6f0", "size": 99720, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "cc344ac3-d385-4bd1-95b3-86a1b5e5541d", "file_id": "14939f29-eacb-49d2-b250-c85ec62d1841", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc3.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc3.txt/content"}}, {"key": "ExtFig3.cc2.txt", "storage_class": "L", "checksum": "md5:24f0324bc91a2ae4a45e1ee72182314c", "size": 84608, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "c9eceb46-e9a7-46a8-a3fc-fc9f48a62da6", "file_id": "34d5aa85-2ab5-4c19-9c3e-3441b91c7321", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc2.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc2.txt/content"}}, {"key": "ExtFig3.cc1.txt", "storage_class": "L", "checksum": "md5:c06d1f93a38b4dee83cb2476af93a31f", "size": 87306, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "132677ad-0163-46bb-b83c-5e58305a6d00", "file_id": "69628d86-090b-4df1-81a7-3113d56db89c", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc1.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig3.cc1.txt/content"}}, {"key": "ExtFig18.txt", "storage_class": "L", "checksum": "md5:2e2c63dd13e078dda58e4a05cafc4fee", "size": 297327, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "af11fabc-181f-4303-b5ab-9f7df85c4af1", "file_id": "8f44b2b1-08b7-466b-a7d3-b595225bb13c", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/ExtFig18.txt", "content": "https://zenodo.org/api/records/7947283/files/ExtFig18.txt/content"}}, {"key": "demos.tgz", "storage_class": "L", "checksum": "md5:9661ff82dbdd9b600d97057b62c29d8c", "size": 1559831951, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "application/gzip", "version_id": "e390004a-8714-4586-9506-82de812917b1", "file_id": "051c04f6-c1ac-490b-bed8-f8b975838001", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/demos.tgz", "content": "https://zenodo.org/api/records/7947283/files/demos.tgz/content"}}, {"key": "compact_scfree_H6.whiteprints", "storage_class": "L", "checksum": "md5:d6251797748fba2e8b3a3653af66a174", "size": 12449633, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "a2bc1708-100b-45cf-bbd7-786a8ffcb773", "file_id": "49884d73-e895-4db4-a6e9-07e61836778e", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/compact_scfree_H6.whiteprints", "content": "https://zenodo.org/api/records/7947283/files/compact_scfree_H6.whiteprints/content"}}, {"key": "compact_scfree_H6.txt", "storage_class": "L", "checksum": "md5:bab66f2dd299f904e0fe6cb629d5112d", "size": 23777057, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "e16aa122-957e-48c3-b029-747fcb571e85", "file_id": "32a11485-c973-4cf9-860e-a35f2a7ef0d4", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/compact_scfree_H6.txt", "content": "https://zenodo.org/api/records/7947283/files/compact_scfree_H6.txt/content"}}, {"key": "compact_scfree_H5.whiteprints", "storage_class": "L", "checksum": "md5:ba0b4a61c8e90540f4f30147c2630272", "size": 51302858, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "application/octet-stream", "version_id": "08fec92d-5511-417d-ba90-ac0c09cf014e", "file_id": "8b05d851-e6ca-4454-94fb-8ee1b786e164", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/compact_scfree_H5.whiteprints", "content": "https://zenodo.org/api/records/7947283/files/compact_scfree_H5.whiteprints/content"}}, {"key": "compact_scfree_H5.txt", "storage_class": "L", "checksum": "md5:564d0ae4a2bce5fd7609beca1a590522", "size": 108235838, "created": "2023-05-19T05:48:16.685458+00:00", "updated": "2023-05-23T08:38:27.735409+00:00", "status": "completed", "metadata": null, "mimetype": "text/plain", "version_id": "346c9d9d-c626-4617-8e9d-b3922ab9703e", "file_id": "28ec4727-6adb-4298-8723-6149440a849e", "bucket_id": "e995bb40-6b8b-4911-9e89-edfeca311b31", "links": {"self": "https://zenodo.org/api/records/7947283/files/compact_scfree_H5.txt", "content": "https://zenodo.org/api/records/7947283/files/compact_scfree_H5.txt/content"}}], "default_preview": null, "order": []} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/data_1.json b/src/tests/resources/connectors/zenodo/data_1.json deleted file mode 100644 index ef98914f..00000000 --- a/src/tests/resources/connectors/zenodo/data_1.json +++ /dev/null @@ -1,104 +0,0 @@ - -{ - "conceptdoi":"10.5281/zenodo.7712946", - "conceptrecid":"7712946", - "created":"2023-03-09T14:41:29.188193+00:00", - "doi":"10.5281/zenodo.7712947", - "files":[ - { - "bucket":"cf7fc7fa-b298-46ac-9fe6-87ebc1d0edc1", - "checksum":"md5:c1e06dd66c8feebd110696b5ca895bf8", - "key":"09-09-2023.pdf", - "links":{ - "self":"https://zenodo.org/api/files/cf7fc7fa-b298-46ac-9fe6-87ebc1d0edc1/09-09-2023.pdf" - }, - "size":694897, - "type":"pdf" - } - ], - "id":7712947, - "links":{ - "badge":"https://zenodo.org/badge/doi/10.5281/zenodo.7712947.svg", - "bucket":"https://zenodo.org/api/files/cf7fc7fa-b298-46ac-9fe6-87ebc1d0edc1", - "conceptbadge":"https://zenodo.org/badge/doi/10.5281/zenodo.7712946.svg", - "conceptdoi":"https://doi.org/10.5281/zenodo.7712946", - "doi":"https://doi.org/10.5281/zenodo.7712947", - "html":"https://zenodo.org/record/7712947", - "latest":"https://zenodo.org/api/records/7712947", - "latest_html":"https://zenodo.org/record/7712947", - "self":"https://zenodo.org/api/records/7712947" - }, - "metadata":{ - "access_right":"open", - "access_right_category":"success", - "creators":[ - { - "affiliation":"School of Built Environment, University of Technology Sarawak (UTS), Malaysia", - "name":"Chih Siong Wong" - } - ], - "description":"

ABSTRACT: Architectural education has long placed a strong emphasis on the studio, which serves as a crucial place for teaching and learning. The architecture studio is a dynamic and lively place where students can engage in design discussions, exchange ideas, and receive feedback from instructors and peers. Despite recognising the vital role architecture studios play in shaping the learning experience of architecture students, limited research has explored which specific attributes of the studio environment are most important to students. This research aims to address this gap by determining the priorities of architecture students regarding key studio attributes. The research employs a quantitative approach using the Best-Worst Scaling (BWS) model and a questionnaire survey. A pilot study was conducted with undergraduate architecture students. The results reveal the ranking and relative importance of key studio attributes, organised into five categories: Physiological Facilities, Information and Communications Technology, Indoor Environment Quality, Territoriality, Furniture, and Reference. This research provides a valuable reference for designing student-centred studio environments and demonstrates the viability of using the BWS method to determine students’ priorities for studio attributes.

", - "doi":"10.5281/zenodo.7712947", - "journal":{ - "issue":"03", - "pages":"1913-1921", - "title":"International Journal of Current Science Research and Review", - "volume":"06" - }, - "keywords":[ - "Architecture Education, Best-Worst Scaling, Studio, Studio Attributes, Studio Environment." - ], - "language":"eng", - "license":{ - "id":"CC-BY-4.0" - }, - "publication_date":"2023-03-09", - "related_identifiers":[ - { - "identifier":"10.5281/zenodo.7712946", - "relation":"isVersionOf", - "scheme":"doi" - } - ], - "relations":{ - "version":[ - { - "count":1, - "index":0, - "is_last":true, - "last_child":{ - "pid_type":"recid", - "pid_value":"7712947" - }, - "parent":{ - "pid_type":"recid", - "pid_value":"7712946" - } - } - ] - }, - "resource_type":{ - "subtype":"article", - "title":"Journal article", - "type":"publication" - }, - "title":"Student-Centred Studio Environments: A Deep Dive into Architecture Students' Needs" - }, - "owners":[ - 190048 - ], - "revision":1, - "stats":{ - "downloads":0.0, - "unique_downloads":0.0, - "unique_views":0.0, - "version_downloads":0.0, - "version_unique_downloads":0.0, - "version_unique_views":0.0, - "version_views":0.0, - "version_volume":0.0, - "views":0.0, - "volume":0.0 - }, - "updated":"2023-03-09T14:41:30.542276+00:00" -} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/dataset.json b/src/tests/resources/connectors/zenodo/dataset.json deleted file mode 100644 index 3f63ea9a..00000000 --- a/src/tests/resources/connectors/zenodo/dataset.json +++ /dev/null @@ -1,129 +0,0 @@ -{ - "conceptdoi": "10.5281/zenodo.7902672", - "conceptrecid": "7902672", - "created": "2023-05-23T07:56:17.414652+00:00", - "doi": "10.5281/zenodo.7961614", - "files": [ - { - "bucket": "ec86c6d8-aa64-4eea-aa9b-b2ca0880a9c4", - "checksum": "md5:97f511d24f8867405a8f87afbc76939d", - "key": "FIELDS_CONFIDE_CHECLIST.docx", - "links": { - "self": "https://zenodo.org/api/files/ec86c6d8-aa64-4eea-aa9b-b2ca0880a9c4/FIELDS_CONFIDE_CHECLIST.docx" - }, - "size": 15600, - "type": "docx" - }, - { - "bucket": "ec86c6d8-aa64-4eea-aa9b-b2ca0880a9c4", - "checksum": "md5:18431d2ad50b7d82935a1dda6ee6db61", - "key": "FIELDS_UNITS_XL.xlsx", - "links": { - "self": "https://zenodo.org/api/files/ec86c6d8-aa64-4eea-aa9b-b2ca0880a9c4/FIELDS_UNITS_XL.xlsx" - }, - "size": 12369, - "type": "xlsx" - } - ], - "id": 7961614, - "links": { - "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.7961614.svg", - "bucket": "https://zenodo.org/api/files/ec86c6d8-aa64-4eea-aa9b-b2ca0880a9c4", - "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.7902672.svg", - "conceptdoi": "https://doi.org/10.5281/zenodo.7902672", - "doi": "https://doi.org/10.5281/zenodo.7961614", - "html": "https://zenodo.org/record/7961614", - "latest": "https://zenodo.org/api/records/7961614", - "latest_html": "https://zenodo.org/record/7961614", - "self": "https://zenodo.org/api/records/7961614" - }, - "metadata": { - "access_right": "open", - "access_right_category": "success", - "creators": [ - { - "affiliation": "Odense University Hospital", - "name": "Hansen, Peter Martin" - }, - { - "affiliation": "Copenhagen University Hopsital", - "name": "Alstrøm, henrik" - }, - { - "affiliation": "Copenhagen Emergency Medical Services", - "name": "Damm-Hejmdal, Anders" - }, - { - "affiliation": "Odense University Hospital", - "name": "Mikkelsen, Søren" - }, - { - "affiliation": "Oslo University Hospital", - "name": "Rehn, Marius" - }, - { - "affiliation": "Copenhagen University Hospital", - "name": "Berlac, Peter Anthony" - } - ], - "description": "This is a description paragraph", - "doi": "10.5281/zenodo.7961614", - "keywords": [ - "Major incident", - "Disaster", - "Mass shooting", - "Mass casualty", - "Management and leadership" - ], - "license": { - "id": "CC-BY-4.0" - }, - "publication_date": "2023-05-06", - "related_identifiers": [ - { - "identifier": "10.5281/zenodo.7902672", - "relation": "isVersionOf", - "scheme": "doi" - } - ], - "relations": { - "version": [ - { - "count": 2, - "index": 1, - "is_last": true, - "last_child": { - "pid_type": "recid", - "pid_value": "7961614" - }, - "parent": { - "pid_type": "recid", - "pid_value": "7902672" - } - } - ] - }, - "resource_type": { - "title": "Dataset", - "type": "dataset" - }, - "title": "THE FIELD'S MALL MASS SHOOTING: EMERGENCY MEDICAL SERVICES RESPONSE" - }, - "owners": [ - 543260 - ], - "revision": 4, - "stats": { - "downloads": 2.0, - "unique_downloads": 1.0, - "unique_views": 3.0, - "version_downloads": 3.0, - "version_unique_downloads": 2.0, - "version_unique_views": 29.0, - "version_views": 30.0, - "version_volume": 53543.0, - "views": 3.0, - "volume": 27969.0 - }, - "updated": "2023-05-25T02:28:52.350812+00:00" -} \ No newline at end of file diff --git a/src/tests/resources/connectors/zenodo/list_records.xml b/src/tests/resources/connectors/zenodo/list_records.xml index 47722c30..b6750b3d 100644 --- a/src/tests/resources/connectors/zenodo/list_records.xml +++ b/src/tests/resources/connectors/zenodo/list_records.xml @@ -1,173 +1,2354 @@ - - + - 2023-05-25T12:30:03Z - https://zenodo.org/oai2d - - -
- oai:zenodo.org:7961614 - 2023-05-23T08:05:46Z -
- - - true - 3.1 - CERN.ZENODO - - - 10.5281/zenodo.7961614 - - - Hansen, Peter Martin - Odense University Hospital - - - Alstrøm, henrik - Copenhagen University Hopsital - - - Damm-Hejmdal, Anders - Copenhagen Emergency Medical Services - - - Mikkelsen, Søren - Odense University Hospital - - - - THE FIELD'S MALL MASS SHOOTING: EMERGENCY MEDICAL SERVICES RESPONSE - - Zenodo - 2023 - - Major incident - Disaster - Mass shooting - Mass casualty - Management and leadership - - - 2023-05-06 - - - - 10.5281/zenodo.7902672 - - - Creative Commons Attribution 4.0 International - Open Access - - - This is a description paragraph - - - - -
- -
- oai:zenodo.org:7960898 - 2023-05-23T06:03:15Z -
- - - true - 3.1 - CERN.ZENODO - - - 10.5281/zenodo.7960898 - - - Azamova Barno - - - - THE USE OF A MULTIMEDIA BOARD AS A FACTOR INFLUENCING THE ASSIMILATION OF GEOMETRIC MATERIAL IN ELEMENTARY SCHOOL - - Zenodo - 2023 - - 2023-05-23 - - - - 10.5281/zenodo.7960897 - - - Creative Commons Attribution 4.0 International - Open Access - - - <p>The use of various information tools in teaching makes the lesson non-standard, increases the motivation of children, allows them to be involved in various forms of work and, of course, teaches them to learn and acquire knowledge on their own.</p> - - - - - -
- -
- oai:zenodo.org:7947746 - 2023-05-23T08:03:55Z - software -
- - - true - 3.1 - CERN.ZENODO - - - 10.5281/zenodo.7947746 - - - Jonge, Niek N.F. - 0000-0002-3054-6210 - Wageningen University and Research - - - Louwen, Joris J. R. - Wageningen University and Research - - - Huber, Florian - 0000-0002-3535-9406 - Netherlands eScience Center - - - Hooft, Justin J. J. - 0000-0002-9340-5511 - Wageningen University and Research - - - - MS2Query - - Zenodo - 2023 - - 2023-05-18 - - - - https://github.com/iomega/ms2query/tree/1.0.1 - 10.5281/zenodo.7695011 - - 1.0.1 - - Apache License 2.0 - Open Access - - - Machine learning assisted library querying of MS/MS spectra. - If you use this software, please cite it using these metadata. - - - - - -
-
+ 2023-11-14T18:31:59Z + https://zenodo.org/oai2d + + +
+ oai:zenodo.org:7961720 + 2023-05-23T08:43:15Z +
+ + + 4.3 + + + + 10.5281/zenodo.7961720 + + oai:zenodo.org:7961720 + + + + Yoko Ito + Yoko Ito + Tokai University School o Medicine + + + + Supplementary Figure 1 + + Zenodo + 2023 + + 2023-05-23 + + + + 10.5281/zenodo.7961719 + + + Supplementary Figure 1A and B to submit our research manuscript into AJP Lung + + + + + +
+ +
+ oai:zenodo.org:7961708 + 2023-05-23T08:26:48Z +
+ + + 4.3 + + + + 10.58473/JBS0022 + + oai:zenodo.org:7961708 + + + + Liu Huan + Liu Huan + 0000-0003-4881-8509 + 江西省诚筑环保工程有限公司 + + + + Original review of specificity in the interaction between pathogen invasion and host organism + + Zenodo + 2023 + + 2023-05-23 + + + + Article 14. Original review of specificity in the interaction between pathogen invasion and host organism + + +Author: Liu Huan (1983- ), Master of Science (First Class Honours), The University of Auckland. + + + + + +
+ +
+ oai:zenodo.org:7947283 + 2023-05-23T08:38:27Z +
+ + + 4.3 + + + + 10.5281/zenodo.7947283 + + oai:zenodo.org:7947283 + + + + Nobuyasu Koga + Nobuyasu Koga + Institute for Protein Research, Osaka University + + + + kogalab21/all-alpha_design + + Zenodo + 2023 + + 2023-05-18 + + + + https://github.com/kogalab21/all-alpha_design/tree/v0.1.0 + 10.5281/zenodo.7947261 + + v1.0.0 + + Other (Open) + + + Source data and demos for the research article entitled “Design of complicated all-α protein structures” by Koya Sakuma, Naohiro Kobayashi, Toshihiko Sugiki, Toshio Nagashima, Toshimichi Fujiwara, Kano Suzuki, Naoya Kobayashi, Takeshi Murata, Takahiro Kosugi, Rie Koga, and Nobuyasu Koga. + + + + + +
+ +
+ oai:zenodo.org:7902673 + 2023-05-23T08:05:45Z +
+ + + 4.3 + + + + 10.5281/zenodo.7902673 + + oai:zenodo.org:7902673 + + + + Hansen, Peter Martin + Peter Martin + Hansen + Odense University Hospital + + + Alstrøm, henrik + henrik + Alstrøm + Copenhagen University Hopsital + + + Damm-Hejmdal, Anders + Anders + Damm-Hejmdal + Copenhagen Emergency Medical Services + + + Mikkelsen, Søren + Søren + Mikkelsen + Odense University Hospital + + + Rehn, Marius + Marius + Rehn + Oslo University Hospital + + + Berlac, Peter Anthony + Peter Anthony + Berlac + Copenhagen University Hospital + + + + THE FIELD'S MALL MASS SHOOTING: EMERGENCY MEDICAL SERVICES RESPONSE + + Zenodo + 2023 + + Major incident + Disaster + Mass shooting + Mass casualty + Management and leadership + + + 2023-05-06 + + + + 10.5281/zenodo.7902672 + + + Creative Commons Attribution 4.0 International + + + Case report describing the mass shooting at the Field's shopping mall in Copenhagen, July 3rd, 2022. + + +Dataset to support the findings of the case report, hopefully to be published after peer-review in Scandinavian Journal of Trauma, Resuscitation and Emergency Medicine. + + + + + +
+ +
+ oai:zenodo.org:7555467 + 2023-05-23T08:09:18Z +
+ + + 4.3 + + + + 10.5281/zenodo.7555467 + + oai:zenodo.org:7555467 + + + + Akshay Akshay + Akshay Akshay + 0000-0003-3186-7478 + + + Mitali Katoch + Mitali Katoch + + + Masoud Abedi + Masoud Abedi + + + Mustafa Besic + Mustafa Besic + + + Navid Shekarchizadeh + Navid Shekarchizadeh + + + Fiona C. Burkhard + Fiona C. Burkhard + + + Alex Bigger-Allen + Alex Bigger-Allen + + + Rosalyn M. Adam + Rosalyn M. Adam + + + Katia Monastyrskaya + Katia Monastyrskaya + + + Ali Hashemi Gheinani + Ali Hashemi Gheinani + + + + Supporting data for "SpheroScan: A User-Friendly Deep Learning Tool for Spheroid Image Analysis" + + Zenodo + 2023 + + 2023-01-20 + + + + 10.5281/zenodo.7555466 + + + Creative Commons Zero v1.0 Universal + + + In recent years, three-dimensional (3D) spheroid models have become increasingly popular in scientific research as they provide a more physiologically relevant microenvironment that mimics in vivo conditions. The use of 3D spheroid assays has proven to be advantageous as it offers a better understanding of the cellular behavior, drug efficacy, and toxicity as compared to traditional two-dimensional cell culture methods. However, the use of 3D spheroid assays is impeded by the absence of automated and user-friendly tools for spheroid image analysis, which adversely affects the reproducibility and throughput of these assays. + + +To address these issues, we have developed a fully automated, web-based tool called SpheroScan, which uses the deep learning framework called Mask Regions with Convolutional Neural Networks (R-CNN) for image detection and segmentation. To develop a deep learning model that could be applied to spheroid images from a range of experimental conditions, we trained the model using spheroid images captured using IncuCyte Live-Cell Analysis System and a conventional microscope. Performance evaluation of the trained model using validation and test datasets shows promising results. + + +SpheroScan allows for easy analysis of large numbers of images and provides interactive visualization features for a more in-depth understanding of the data. Our tool represents a significant advancement in the analysis of spheroid images and will facilitate the widespread adoption of 3D spheroid models in scientific research. The source code and a detailed tutorial for SpheroScan are available at https://github.com/FunctionalUrology/SpheroScan. +Enable GingerCannot connect to Ginger Check your internet connection +or reload the browserDisable in this text fieldRephraseRephrase current sentenceEdit in Ginger× + + + + + +
+ +
+ oai:zenodo.org:7525202 + 2023-05-23T08:29:23Z +
+ + + 4.3 + + + + 10.5281/zenodo.7525202 + + oai:zenodo.org:7525202 + + + + Hofmann, Fabian + Fabian + Hofmann + 0000-0002-6604-5450 + + + Hampp, Johannes + Johannes + Hampp + 0000-0002-1776-116X + + + Neumann, Fabian + Fabian + Neumann + 0000-0001-8551-1480 + + + Brown, Tom + Tom + Brown + 0000-0001-5898-1911 + + + Hörsch, Jonas + Jonas + Hörsch + 0000-0001-9438-767X + + + + atlite: A Lightweight Python Package for Calculating Renewable Power Potentials and Time Series + + Zenodo + 2021 + + 2021-06-24 + + + + https://github.com/PyPSA/atlite/tree/v0.2.10 + 10.5281/zenodo.5026364 + + 0.2.5 + + GNU General Public License v3.0 or later + + + What's Changed + + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/251 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/258 + +Always shift solar influx by 30 minutes by @zoltanmaric in https://github.com/PyPSA/atlite/pull/257 + +hydro inflow unit conversion corrected by @hailiangliu89 in https://github.com/PyPSA/atlite/pull/254 + +Issue227 - Turbine, Panel and CSP Installation Configs from local path by @LukasFrankenQ in https://github.com/PyPSA/atlite/pull/250 + +add discord reference by @pz-max in https://github.com/PyPSA/atlite/pull/260 + +ease rasterio restriction by @pz-max in https://github.com/PyPSA/atlite/pull/262 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/264 + +Combine ERA5 and ERA5T data by @zoltanmaric in https://github.com/PyPSA/atlite/pull/261 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/266 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/268 + +gis: use wkt for object identifyer (shapely >= v2.0 compat) by @FabianHofmann in https://github.com/PyPSA/atlite/pull/270 + +New Contributors + + + +@zoltanmaric made their first contribution in https://github.com/PyPSA/atlite/pull/257 + +@LukasFrankenQ made their first contribution in https://github.com/PyPSA/atlite/pull/250 + + +Full Changelog: https://github.com/PyPSA/atlite/compare/v0.2.9...v0.2.10 + If you use this package, please cite our corresponding paper in JOSS (The Journal of Open Source Software). + + + + + +
+ +
+ oai:zenodo.org:7901584 + 2023-05-23T08:12:44Z +
+ + + 4.3 + + + + 10.5281/zenodo.7901584 + + oai:zenodo.org:7901584 + + + + Chao Gao + Chao Gao + + + + Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China + + Zenodo + 2023 + + Two-way coupled meteorology and air quality model + WRF-CMAQ + WRF-CHIMERE + WRF-Chem + Inter-comparisons + + + 2023-05-05 + + eng + + + 10.5281/zenodo.7900754 + + 0.0.1 + + Creative Commons Attribution 4.0 International + + + Here presented all related source codes for the two-way coupled WRF-CMAQ, WRF-Chem and WRF-CHIMERE models, and even the pre- and post- processing scripts used for paper of "Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China", https://doi.org/10.5194/gmd-2023-21. + + + + + +
+ +
+ oai:zenodo.org:7947262 + 2023-05-23T08:38:27Z +
+ + + 4.3 + + + + 10.5281/zenodo.7947262 + + oai:zenodo.org:7947262 + + + + Nobuyasu Koga + Nobuyasu Koga + + + + kogalab21/all-alpha_design: Initial release + + Zenodo + 2023 + + 2023-05-18 + + + + https://github.com/kogalab21/all-alpha_design/tree/v0.1.0 + 10.5281/zenodo.7947261 + + v0.1.0 + + Other (Open) + + + - + + + + + +
+ +
+ oai:zenodo.org:7961678 + 2023-05-23T08:24:21Z +
+ + + 4.3 + + + + 10.5281/zenodo.7961678 + + oai:zenodo.org:7961678 + + + + S. Gnanam, K. Jaya Surya, G. Hasina Beevi, V. Uma Mageswari, J. Gajendiran, S. Rajagopal, J. Ramana Ramya, S. Gokul Raj + S. Gnanam, K. Jaya Surya, G. Hasina Beevi, V. Uma Mageswari, J. Gajendiran, S. Rajagopal, J. Ramana Ramya, S. Gokul Raj + Department of Physics, School of Basic Sciences, Vels Institute of Science, Technology & Advanced Studies (VISTAS), Pallavaram, Chennai-600 117, India. + + + + Study of Structural and Optical Properties of Azadirachta Indica assisted Green Synthesized Silver Nanoparticles + + Scientific Research Reports + 2023 + + Nanoparticles; Silver; Green synthesis process; Structural studies; Optical studies + + + 2023-05-28 + + eng + + + 10.5281/zenodo.7961677 + + + Creative Commons Attribution 4.0 International + + + In this chapter, Azadirachta indica-assisted nanostructured silver particles were attempted via green route followed by testing their crystalline structure, crystallite size, vibrational bands, optical energy gap values using standard tools powder XRD, FT-IR and UV spectrum characterization. + + + + + +
+ +
+ oai:zenodo.org:6302891 + 2023-05-23T08:29:23Z +
+ + + 4.3 + + + + 10.5281/zenodo.6302891 + + oai:zenodo.org:6302891 + + + + Hofmann, Fabian + Fabian + Hofmann + 0000-0002-6604-5450 + + + Hampp, Johannes + Johannes + Hampp + 0000-0002-1776-116X + + + Neumann, Fabian + Fabian + Neumann + 0000-0001-8551-1480 + + + Brown, Tom + Tom + Brown + 0000-0001-5898-1911 + + + Hörsch, Jonas + Jonas + Hörsch + 0000-0001-9438-767X + + + + atlite: A Lightweight Python Package for Calculating Renewable Power Potentials and Time Series + + Zenodo + 2021 + + 2021-06-24 + + + + https://github.com/PyPSA/atlite/tree/v0.2.7 + 10.5281/zenodo.5026364 + + 0.2.5 + + GNU General Public License v3.0 or later + + + See Release Notes for details. + If you use this package, please cite our corresponding paper in JOSS (The Journal of Open Source Software). + + + + + +
+ +
+ oai:zenodo.org:6884943 + 2023-05-23T08:50:26Z +
+ + + 4.3 + + + + 10.5281/zenodo.6884943 + + oai:zenodo.org:6884943 + + + + Obaid, Hadeel S. + Hadeel S. + Obaid + 0000-0003-0859-6315 + Tampere University + + + Sun, Bo + Bo + Sun + 0000-0002-5803-4778 + Tampere University + + + Morlaas, Christophe + Christophe + Morlaas + 0000-0003-4533-1711 + ENAC + + + Tan, Bo + Bo + Tan + 0000-0002-6855-6270 + Tampere University + + + Lohan, Elena-Simona + Elena-Simona + Lohan + 0000-0003-1718-6924 + Tampere University + + + + Angle measurements with 3D Vector antenna for localization purposes – open-access datasets + + Zenodo + 2022 + + AoA, DoA, Positioning, 3D Vector Antenna, MUSIC Algorithm + + + 2022-07-22 + + + + 10.5281/zenodo.6884942 + https://zenodo.org/communities/tau_wireless + + + Creative Commons Attribution 4.0 International + + + This dataset contains data on positioning measurements of the angle of arrival (AoA) as well as the azimuth angle estimation using the MUSIC Algorithm. The data was collected from four ports (p1,p2,p3,p4) of a 3D Vector Antenna (3D VA) provided by ENAC. Data were captured in a laboratory environment with conditions that affect the positioning performance.  + + + + European Commission + 00k4n6c32 + 893917 + Evaluation of 5G Network and mmWave Radar Sensors to Enhance Surveillance of the Airport Surface + + + Academy of Finland + 05k73zm37 + 328226 + Ubiquitous Localization, communication, and sensing infrastrucTuRe for Autonomous systems (ULTRA) / Consortium: ULTRA + + + + + + +
+ +
+ oai:zenodo.org:7793917 + 2023-05-23T08:37:37Z +
+ + + 4.3 + + + + 10.5281/zenodo.7793917 + + oai:zenodo.org:7793917 + + + + Tongwen Li + Tongwen Li + Sun Yat-sen University + + + Jingan Wu + Jingan Wu + Sun Yat-sen University + + + + Daily Gapless 0.1° XCO2 Dataset for China + + Zenodo + 2023 + + XCO2, gapless, deep learning + + + 2023-04-03 + + + + 10.5281/zenodo.7793916 + + V1 + + Creative Commons Attribution 4.0 International + + + This is the daily gapless column-averaged dry-air mole fraction of CO2 (XCO2) dataset with a high spatial resolution of 0.1° in China from 2015 to 2020. This dataset was generated by the deep learning-based multisource data fusion, including satellite XCO2, reanalyzed XCO2, satellite vegetation data, and meteorological fields. This dataset yields a high accuracy in terms of cross-validation and ground-based validation. + + +This data set is stored in Geotiff format and can be opened with ArcGIS, ENVI, etc. + + + + + +
+ +
+ oai:zenodo.org:7961633 + 2023-05-23T08:05:43Z +
+ + + 4.3 + + + + 10.5281/zenodo.7961633 + + oai:zenodo.org:7961633 + + + + S. Santhi, S. Sandihya, K. Srinivasan + S. Santhi, S. Sandihya, K. Srinivasan + Department of Mathematics, Vels Institute of Science Technology and Advanced Studies, Pallavaram, Tamil Nadu, India. + + + + Analysis of Transportation Problem under Fuzzy Environment + + Scientific Research Reports + 2023 + + fuzzy algorithms, Transportation Problem, Operation Research + + + 2023-05-28 + + eng + + + 10.5281/zenodo.7961632 + + + Creative Commons Attribution 4.0 International + + + Transportation model provides an essential service of relating a company to its suppliers and customers. These transportation models make certain efficient movement and timely availability of raw materials and finished goods. Transportation model has broad practical applications, not only in transportation problem but also in such problems as production planning, communication network, transportation scheduling and allotment etc. The parameters of a transportation problem are unit costs, capacity (product, supply) and demand (requirement) values. In practice, these parameters are not always precisely known. Fuzzy sets and fuzzy logic are powerful mathematical tools for controlling uncertain systems in industry, humanity and nature; they are facilitators of approximate reasoning with imprecise and incomplete information. Here propose a new kind of approach to solve the fuzzy transportation problem with an imprecise environment, where the transportation costs are in the form of trapezoidal fuzzy numbers. In real life situation, because of many reasons, supply, demand, and unit transportation costs may become inconsistent. These inaccurate data can be represented as fuzzy numbers. The fuzzy numbers and values were majorly used in various fields such as experimental sciences, artificial intelligence, etc. Here, converted the  trapezoidal fuzzy numbers into crisp values by using the magnitude ranking function and by applying Max-min method the initial basic feasible solution to the fuzzy transportation problem were obtained. The numerical illustration demonstrates the new projected way for managing transportation problems on fuzzy algorithms. + + + + + +
+ +
+ oai:zenodo.org:7958493 + 2023-05-23T08:36:50Z +
+ + + 4.3 + + + + 10.5281/zenodo.7958493 + + oai:zenodo.org:7958493 + + + + Hale, Sarah H + Sarah H + Hale + 0000-0002-7743-9199 + + + Arp, Hans Peter H. + Hans Peter H. + Arp + 0000-0002-0747-8838 + + + Cousins, Ian + Ian + Cousins + + + Figuière, Romain + Romain + Figuière + + + Lennquist, Anna + Anna + Lennquist + + + Pahl, Sabine + Sabine + Pahl + + + White, Mathew + Mathew + White + + + Suffill, Ellise + Ellise + Suffill + + + Schymanski, Emma + Emma + Schymanski + + + Peters, Greg + Greg + Peters + + + Aggarwal, Rahul + Rahul + Aggarwal + + + + Making the essential-use concept enforceable, effective and understandable + + Zenodo + 2022 + + PFAS + essential-use + chemical policy + regulation + + + 2022-11-23 + + eng + + + 10.5281/zenodo.7958492 + https://zenodo.org/communities/zeropm-h2020 + + + Creative Commons Attribution 4.0 International + + + Submitted to CARACAL regarding feedback for implementing the Essential-Use concept within European Chemical legislation.  + + +The concept of essential use in policy can be traced back to the Montreal Protocol signed in 1987. The agreement protects the Earth’s ozone layer by phasing out chemicals that deplete it and by limiting the use of ozone-depleting chemicals to only essential uses. The scientific community picked up on this concept in 2019 when a peer-reviewed publication applied the essential use concept to per- and polyfluoroalkyl substances (PFASs). + + +  + + +When is it justified to use the most harmful substances? + + +The essential-use concept gained further regulatory momentum in 2020 with the publication of the Chemicals Strategy for Sustainability towards a Toxic free environment (CSS). Here, the essential-use concept was extended to restrict the use of "the most harmful chemicals" which "are only allowed if their use is necessary for health, safety or is critical for the functioning of society and if there are no alternatives available from the standpoint of environment and health."  + + +  + + +The “essential-use” concept versus broader concepts of “essentiality” + + +From a theoretical point of view, there are several levels of “essentiality” that involve different stakeholder groups such as industry, policy makers, consumers and academics. These levels also include the essentiality of a given chemical, the essentiality of a given chemical's function in a product (or end use), and the essentiality of a given product (or end use). In addition, function can be considered from the perspective of the chemical, the end use, or as a service. + + +  + + +This communication aims to disentangle the complexity caused by the different levels of “essentiality” and move towards making the essential-use approach enforceable, effective and understandable. + + + + European Commission + 00k4n6c32 + 101036756 + ZeroPM: Zero pollution of Persistent, Mobile substances + + + + + + +
+ +
+ oai:zenodo.org:7901682 + 2023-05-23T08:12:45Z +
+ + + 4.3 + + + + 10.5281/zenodo.7901682 + + oai:zenodo.org:7901682 + + + + Chao Gao + Chao Gao + + + Xuelei Zhang + Xuelei Zhang + + + Aijun Xiu + Aijun Xiu + + + Qingqing Tong + Qingqing Tong + + + Hongmei Zhao + Hongmei Zhao + + + Guangyi Yang + Guangyi Yang + + + Mengduo Zhang + Mengduo Zhang + + + Shengjin Xie + Shengjin Xie + + + + Source codes of WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1 + + Zenodo + 2023 + + Two-way coupled meteorology and air quality model + WRF-CMAQ + WRF-CHIMERE + WRF-Chem + Inter-comparisons + + + 2023-05-05 + + eng + + + 10.5281/zenodo.7900754 + + 0.0.1 + + Creative Commons Attribution 4.0 International + + + Here presented all related source codes for the two-way coupled WRF-CMAQ, WRF-Chem and WRF-CHIMERE models, and the pre- and post-processing scripts used for the paper of "Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China".  We investigate the performance of each coupled model over eastern China during 2017, and related configurations of each coupled model regarding aerosol-radiation interactions (ARI) and/or aerosol-cloud interactions (ACI) effects as presented in the following Table1. + + + + Table 1. Summary of scenarios setting in three coupled models. + + + Model + Scenario + Configuration option + Description + + + WRF-CMAQ + (1) WRF-CMAQ_NO + DO_SW_CAL=F + Without aerosol feedbacks + + +   + (2) WRF-CMAQ_ARI + DO_SW_CAL=T + ARI + + + WRF-Chem + (3) WRF-Chem_NO + + +aer_ra_feedback=0 + + +wetscav_onoff=0 + cldchem_onoff=0 + Without aerosol feedbacks + + +   + (4) WRF-Chem_ARI + + +aer_ra_feedback=1 + + +wetscav_onoff=0 + cldchem_onoff=0 + ARI + + +   + (5) WRF-Chem_BOTH + + +aer_ra_feedback=1 + + +wetscav_onoff=1 + cldchem_onoff=1 + ARI and ACI + + + WRF-CHIMERE + (6) WRF-CHIMERE_NO + + +direct_feed_chimere=0 + indirect_feed_chimere=0 + Without aerosol feedbacks + + +   + (7) WRF-CHIMERE_ARI + + +direct_feed_chimere=1 + indirect_feed_chimere=0 + ARI + + +   + (8) WRF-CHIMERE_BOTH + + +direct_feed_chimere=1 + indirect_feed_chimere=1 + ARI and ACI + + + + + +More detailed information on model configuration of WRF-CMAQ, WRF-Chem and WRF-CHIMERE are in 201701icbc.csh, namelist.input and chimere.par files, respectively. Table 2 lists several configurations of these three coupled models, as follows. + + +Table 2. Model configurations and parameterization schemes. + + + + + + + +Configurations + + + +WRF-CMAQ + + + +WRF-Chem + + + +WRF-CHIMERE + + + + + +Horizontal grid spacing + + + +27 km (110 × 150) + + + +27 km (120 × 160) + + + +27 km (120 × 170) + + + + + +Vertical resolution + + + +29 layers from surface (23.2 m) to 100 hPa (16 km) with 11 layers in the bottom 1 km + + 29 layers from surface (23.2 m) to 100 hPa (16 km) with 11 layers in the bottom 1 km + 29 layers from surface (23.2 m) to 100 hPa (16 km) with 11 layers in the bottom 1 km + + + + +Shortwave radiation + + + +RRTMG + + + +RRTMG + + + +RRTMG + + + + + +Longwave radiation + + + +RRTMG + + + +RRTMG + + + +RRTMG + + + + + +Aerosol mixing state + + + +Core-Shell + + + +Core-Shell + + + +Core-Shell + + + + + +Cloud microphysics + + + +Morrison + + + +Morrison + + + +Thompson + + + + + +PBL + + + +ACM2 + + + +YSU + + + +YSU + + + + + +Cumulus + + + +Kain-Fritsch + + + +Grell-Freitas + + + +Grell-Freitas + + + + + +Surface + + + +Pleim-Xiu + + + +Monin-Obukhov + + + +Monin-Obukhov + + + + + +Land surface + + + +Pleim-Xiu LSM + + + +Noah LSM + + + +Noah LSM + + + + + +Gas-phase chemistry + + + +CB6 + + + +CBMZ + + + +MELCHIOR2 + + + + + +Photolysis + + + +Fast-JX + + + +Fast-JX + + + +Fast-JX + + + + + +Aerosol mechanism + + + +AERO6 + + + +MOSAIC + + + +SAM + + + + + +Aerosol size distribution + + + +Modal (3 modes) + + + +Sectional (4 bins) + + + +Sectional (10 bins) + + + + + +Biogenic emission + + + +MEGAN v3.0 + + + +MEGAN v3.0 + + + +MEGAN v3.0 + + + + + +Biomass burning emission + + + +FINN v1.5 + + + +FINN v1.5 + + + +FINN v1.5 + + + + + +Dust emission + + + +Foroutan + + + +GOCART + + + +Menut + + + + + +Sea-salt emission + + + +Gong + + + +Gong + + + +Monahan + + + + + +Meteorological ICs and lateral BCs + + + +FNL + + + +FNL + + + +FNL + + + + + +Chemical ICs and lateral BCs + + + +MOZART + + + +MOZART + + + +LMDZ-INCA + + + + + + +Automated run scripts: + + +WRF-CMAQ: ${YYYMM}icbc.csh + + +WRF-Chem: batch.sh + + +WRF-CHIMERE: ${YYYYMM}.sh + + +  + + + + + +
+ +
+ oai:zenodo.org:7900755 + 2023-05-23T08:12:44Z +
+ + + 4.3 + + + + 10.5281/zenodo.7900755 + + oai:zenodo.org:7900755 + + + + Chao Gao + Chao Gao + + + + Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China + + Zenodo + 2023 + + Two-way coupled meteorology and air quality model + WRF-CMAQ + WRF-CHIMERE + WRF-Chem + Inter-comparisons + + + 2023-05-05 + + eng + + + 10.5281/zenodo.7900754 + + 0.0.1 + + Creative Commons Attribution 4.0 International + + + Here presented all related source codes for the two-way coupled WRF-CMAQ, WRF-Chem and WRF-CHIMERE models, and even the pre- and post- processing scripts used for paper of "Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China", https://doi.org/10.5194/gmd-2023-21. + + + + + +
+ +
+ oai:zenodo.org:7901630 + 2023-05-23T08:12:44Z +
+ + + 4.3 + + + + 10.5281/zenodo.7901630 + + oai:zenodo.org:7901630 + + + + Chao Gao + Chao Gao + + + + Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China + + Zenodo + 2023 + + Two-way coupled meteorology and air quality model + WRF-CMAQ + WRF-CHIMERE + WRF-Chem + Inter-comparisons + + + 2023-05-05 + + eng + + + 10.5281/zenodo.7900754 + + 0.0.1 + + Creative Commons Attribution 4.0 International + + + Here presented all related source codes for the two-way coupled WRF-CMAQ, WRF-Chem and WRF-CHIMERE models, and even the pre- and post- processing scripts used for paper of "Inter-comparison of multiple two-way coupled meteorology and air quality models (WRF v4.1.1-CMAQ v5.3.1, WRF-Chem v4.1.1 and WRF v3.7.1-CHIMERE v2020r1) in eastern China", https://doi.org/10.5194/gmd-2023-21. + + + + + +
+ +
+ oai:zenodo.org:7949712 + 2023-05-23T08:38:28Z +
+ + + 4.3 + + + + 10.5281/zenodo.7949712 + + oai:zenodo.org:7949712 + + + + Nobuyasu Koga + Nobuyasu Koga + Osaka Univeristy + + + + kogalab21/all-alpha_design: Second release + + Zenodo + 2023 + + 2023-05-19 + + + + https://github.com/kogalab21/all-alpha_design/tree/v1.0.0 + 10.5281/zenodo.7947261 + + v1.0.0 + + Other (Open) + + + No description provided. + + + + + +
+ +
+ oai:zenodo.org:7552508 + 2023-05-23T08:10:53Z +
+ + + 4.3 + + + + 10.5281/zenodo.7552508 + + oai:zenodo.org:7552508 + + + + Akshay Akshay + Akshay Akshay + 0000-0003-3186-7478 + University of Bern + + + Mitali Katoch + Mitali Katoch + + + Masoud Abedi + Masoud Abedi + + + Mustafa Besic + Mustafa Besic + + + Navid Shekarchizadeh + Navid Shekarchizadeh + + + Fiona C. Burkhard + Fiona C. Burkhard + + + Alex Bigger-Allen + Alex Bigger-Allen + + + Rosalyn M. Adam + Rosalyn M. Adam + + + Katia Monastyrskaya + Katia Monastyrskaya + + + Ali Hashemi Gheinani + Ali Hashemi Gheinani + + + + Trained Model Weights for "SpheroScan: A User-Friendly Deep Learning Tool for Spheroid Image Analysis" + + Zenodo + 2023 + + 2023-01-19 + + + + 10.5281/zenodo.7552507 + + 0.0.1 + + Creative Commons Zero v1.0 Universal + + + In recent years, three-dimensional (3D) spheroid models have become increasingly popular in scientific research as they provide a more physiologically relevant microenvironment that mimics in vivo conditions. The use of 3D spheroid assays has proven to be advantageous as it offers a better understanding of the cellular behavior, drug efficacy, and toxicity as compared to traditional two-dimensional cell culture methods. However, the use of 3D spheroid assays is impeded by the absence of automated and user-friendly tools for spheroid image analysis, which adversely affects the reproducibility and throughput of these assays. + + +To address these issues, we have developed a fully automated, web-based tool called SpheroScan, which uses the deep learning framework called Mask Regions with Convolutional Neural Networks (R-CNN) for image detection and segmentation. To develop a deep learning model that could be applied to spheroid images from a range of experimental conditions, we trained the model using spheroid images captured using IncuCyte Live-Cell Analysis System and a conventional microscope. Performance evaluation of the trained model using validation and test datasets shows promising results. + + +SpheroScan allows for easy analysis of large numbers of images and provides interactive visualization features for a more in-depth understanding of the data. Our tool represents a significant advancement in the analysis of spheroid images and will facilitate the widespread adoption of 3D spheroid models in scientific research. The source code and a detailed tutorial for SpheroScan are available at https://github.com/FunctionalUrology/SpheroScan. + + + + + +
+ +
+ oai:zenodo.org:7157434 + 2023-05-23T08:29:23Z +
+ + + 4.3 + + + + 10.5281/zenodo.7157434 + + oai:zenodo.org:7157434 + + + + Hofmann, Fabian + Fabian + Hofmann + 0000-0002-6604-5450 + + + Hampp, Johannes + Johannes + Hampp + 0000-0002-1776-116X + + + Neumann, Fabian + Fabian + Neumann + 0000-0001-8551-1480 + + + Brown, Tom + Tom + Brown + 0000-0001-5898-1911 + + + Hörsch, Jonas + Jonas + Hörsch + 0000-0001-9438-767X + + + + atlite: A Lightweight Python Package for Calculating Renewable Power Potentials and Time Series + + Zenodo + 2021 + + 2021-06-24 + + + + https://github.com/PyPSA/atlite/tree/v0.2.9 + 10.5281/zenodo.5026364 + + 0.2.5 + + GNU General Public License v3.0 or later + + + What's Changed + + +gis: extend allowed time frame to 1959 by @fneum in https://github.com/PyPSA/atlite/pull/247 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/248 + +Rasterio update to 1.3.0 by @pz-max in https://github.com/PyPSA/atlite/pull/249 + + +Full Changelog: https://github.com/PyPSA/atlite/compare/v0.2.8...v0.2.9 + If you use this package, please cite our corresponding paper in JOSS (The Journal of Open Source Software). + + + + + +
+ +
+ oai:zenodo.org:5026365 + 2023-05-23T08:29:22Z +
+ + + 4.3 + + + + 10.5281/zenodo.5026365 + + oai:zenodo.org:5026365 + + + + Fabian Hofmann + Fabian Hofmann + 0000-0002-6604-5450 + + + Johannes Hammp + Johannes Hammp + 0000-0002-1776-116X + + + Fabian Neumann + Fabian Neumann + 0000-0001-8551-1480 + + + Tom Brown + Tom Brown + 0000-0001-5898-1911 + + + Jonas Hörsch + Jonas Hörsch + 0000-0001-9438-767X + + + + atlite: A Lightweight Python Package for Calculating Renewable Power Potentials and Time Series + + Zenodo + 2021 + + 2021-06-24 + + eng + + + https://github.com/PyPSA/atlite/tree/v0.2.5.0 + 10.5281/zenodo.5026364 + + v0.2.5.0 + + + + + Atlite is a free software, xarray-based Python library for converting weather data (like wind speeds, solar influx) into energy systems data. It has a lightweight design and works with big weather datasets while keeping the resource requirements especially on CPU and RAM resources low. + + +Atlite can process the weather data fields like solar influx, wind speed, temperature and can convert them into power-system relevant time series like solar PV power output, wind turbine power output etc. for any subsets of a full weather database. + + + + + +
+ +
+ oai:zenodo.org:6642180 + 2023-05-23T08:29:23Z +
+ + + 4.3 + + + + 10.5281/zenodo.6642180 + + oai:zenodo.org:6642180 + + + + Hofmann, Fabian + Fabian + Hofmann + 0000-0002-6604-5450 + + + Hampp, Johannes + Johannes + Hampp + 0000-0002-1776-116X + + + Neumann, Fabian + Fabian + Neumann + 0000-0001-8551-1480 + + + Brown, Tom + Tom + Brown + 0000-0001-5898-1911 + + + Hörsch, Jonas + Jonas + Hörsch + 0000-0001-9438-767X + + + + atlite: A Lightweight Python Package for Calculating Renewable Power Potentials and Time Series + + Zenodo + 2021 + + 2021-06-24 + + + + https://github.com/PyPSA/atlite/tree/v0.2.8 + 10.5281/zenodo.5026364 + + 0.2.5 + + GNU General Public License v3.0 or later + + + What's Changed + + +Fix bad comparison for sarah datafiles. by @euronion in https://github.com/PyPSA/atlite/pull/224 + +Fix/exclusioncalculator filehandling by @euronion in https://github.com/PyPSA/atlite/pull/226 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/229 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/234 + +Add pretty-format-yaml pre-commit hook. by @euronion in https://github.com/PyPSA/atlite/pull/235 + +Add NREL turbine data to atlite by @thesethtruth in https://github.com/PyPSA/atlite/pull/233 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/237 + +fix: use explicit rasterio resample method for averaging by @FabianHofmann in https://github.com/PyPSA/atlite/pull/239 + +Fix rasterio reprojection bug. Appears with 1.2.10 by @pz-max in https://github.com/PyPSA/atlite/pull/240 + +Fix NaN values into inflow by @davide-f in https://github.com/PyPSA/atlite/pull/242 + +[pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/PyPSA/atlite/pull/246 + +Improve memory load efficiency for shape_availability calculation by @calvintr in https://github.com/PyPSA/atlite/pull/243 + +New Contributors + + + +@thesethtruth made their first contribution in https://github.com/PyPSA/atlite/pull/233 + +@pz-max made their first contribution in https://github.com/PyPSA/atlite/pull/240 + +@davide-f made their first contribution in https://github.com/PyPSA/atlite/pull/242 + +@calvintr made their first contribution in https://github.com/PyPSA/atlite/pull/243 + + +Full Changelog: https://github.com/PyPSA/atlite/compare/v0.2.7...v0.2.8 + If you use this package, please cite our corresponding paper in JOSS (The Journal of Open Source Software). + + + + + +
+ +
+ oai:zenodo.org:7956832 + 2023-05-23T08:26:28Z +
+ + + 4.3 + + + + 10.1109/LWC.2022.3233475 + + oai:zenodo.org:7956832 + + + + Steven Rivetti + Steven Rivetti + Chalmers University of Technology + + + José Miguel Mateos-Ramos + José Miguel Mateos-Ramos + Chalmers University of Technology + + + Yibo Wu + Yibo Wu + Chalmers University of Technology + + + Jinxiang Song + Jinxiang Song + Chalmers University of Technology + + + Musa Furkan Keskin + Musa Furkan Keskin + Chalmers University of Technology + + + Vijaya Yajnanarayana + Vijaya Yajnanarayana + Ericsson Research + + + Christian Häger + Christian Häger + Chalmers University of Technology + + + Henk Wymeersch + Henk Wymeersch + Chalmers University of Technology + + + + Spatial Signal Design for Positioning via End-to-End Learning + + Zenodo + 2023 + + mmWave positioning, precoder optimization, end-to-end learning. + + + 2023-01-02 + + + + https://zenodo.org/communities/hexa-x + + + Creative Commons Attribution 4.0 International + + + This letter considers the problem of end-to-end (E2E) learning for joint optimization of transmitter precoding and receiver processing for mmWave downlink positioning. Considering a multiple-input single-output (MISO) scenario, we propose a novel autoencoder (AE) architecture to estimate user equipment (UE) position with multiple base stations (BSs) and demonstrate that E2E learning can match model-based design, both for angle-of-departure (AoD) and position estimation, under ideal conditions without model deficits and outperform it in the presence of hardware impairments. + + + + European Commission + 00k4n6c32 + 101015956 + A flagship for B5G/6G vision and intelligent fabric of technology enablers connecting human, physical, and digital worlds + + + European Commission + 00k4n6c32 + 888913 + A New Waveform for Joint Radar and Communications Beyond 5G + + + + + + +
+ +
+ oai:zenodo.org:7199024 + 2023-05-23T08:49:11Z +
+ + + 4.3 + + + + 10.5281/zenodo.7199024 + + oai:zenodo.org:7199024 + + + + Obaid, Hadeel + Hadeel + Obaid + 0000-0003-0859-6315 + + + Tan, Bo + Bo + Tan + 0000-0002-6855-6270 + + + Morlaas, Christophe + Christophe + Morlaas + 0000-0003-4533-1711 + + + Lohan, Elena Simona + Elena Simona + Lohan + 0000-0003-1718-6924 + + + + I/Q measurements with 5G SRS signals and receiver 4-port 3D Vector Antenna for positioning studies + + Zenodo + 2022 + + 2022-10-14 + + + + 10.5281/zenodo.7199023 + https://zenodo.org/communities/tau_wireless + + + Creative Commons Attribution 4.0 International + + + This dataset contains the I\Q data of four received signals from a 4-port 3D Vector Antenna (3D VA) as well as *fig and *png examples of the angle of arrival (AoA)/azimuth angle estimation using the MUSIC algorithm based on the raw data. The data was collected from four ports (p5, p6, p7, p8) of a 3D VA provided by ENAC. A single Yagi antenna has been used as a transmitter at 2.1GHz carrier frequency and horizontal polarization. + + + + European Commission + 00k4n6c32 + 893917 + Evaluation of 5G Network and mmWave Radar Sensors to Enhance Surveillance of the Airport Surface + + + Academy of Finland + 05k73zm37 + 328226 + Ubiquitous Localization, communication, and sensing infrastrucTuRe for Autonomous systems (ULTRA) / Consortium: ULTRA + + + + + + +
+ +
+ oai:zenodo.org:7915849 + 2023-05-23T08:56:15Z +
+ + + 4.3 + + + + 10.5281/zenodo.7915849 + + oai:zenodo.org:7915849 + + + + Rücknagel, Jesko + Jesko + Rücknagel + 0000-0001-8824-8390 + Technische Informationsbibliothek + + + Schmeja, Stefan + Stefan + Schmeja + 0000-0001-6130-9472 + Technische Informationsbibliothek + + + + Good open access: minimum requirements vs best practice + + Zenodo + 2023 + + open access + + + 2023-05-09 + + eng + Presentation + + 10.5281/zenodo.7915848 + https://zenodo.org/communities/open-access_network + + + Creative Commons Attribution 4.0 International + + + Open Access aims to enable people worldwide to access scholarly information without barriers. To achieve this, various publication channels exist, and the number of possible publication venues continues to grow. The presentation will focus on explaining which selection criteria should be applied when choosing a publication venue in order to best achieve the goals of the Open Access movement. For this purpose, we take a closer look at the myths that still exist regarding Open Access. + + +The following questions will be discussed: + + + + +What best practices should I follow in publishing open access? + +What initiatives are there to push forward the publishing system's transformation? + +Which tools can help me to find a suitable publication venue? + + + + + +
+ +
+ oai:zenodo.org:7878530 + 2023-05-23T08:00:47Z +
+ + + 4.3 + + + + 10.5281/zenodo.7878530 + + oai:zenodo.org:7878530 + + + + Akhmetzyanova Diana Ratmirovna + Akhmetzyanova Diana Ratmirovna + - + + + Sheikin Sergey Dmitrievich + Sheikin Sergey Dmitrievich + - + + + + Oribatida + + Zenodo + 2023 + + 2023-04-29 + + Photo + + 10.5281/zenodo.7878529 + + + Oribatid mites photo + + + + + +
+
From 96b36a271833d4fafa31123e51c5153666530b6a Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 15:38:11 +0100 Subject: [PATCH 08/21] Create directories for the data directory, to avoid permission denied errors --- data/connectors/.gitkeep | 0 data/deletion/.gitkeep | 0 data/mysql/.gitkeep | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/connectors/.gitkeep create mode 100644 data/deletion/.gitkeep create mode 100644 data/mysql/.gitkeep diff --git a/data/connectors/.gitkeep b/data/connectors/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/data/deletion/.gitkeep b/data/deletion/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/data/mysql/.gitkeep b/data/mysql/.gitkeep new file mode 100644 index 00000000..e69de29b From 996a5271842e19b79750c382d68cf187c7385855 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 15:53:53 +0100 Subject: [PATCH 09/21] Convenience script for reverting back to a clean state --- scripts/clean.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 scripts/clean.sh diff --git a/scripts/clean.sh b/scripts/clean.sh new file mode 100755 index 00000000..5df76083 --- /dev/null +++ b/scripts/clean.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# convenience script to revert back to a clean state. + +DIR_SCRIPT=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +DIR_ROOT=$( dirname $DIR_SCRIPT ) +DIR_DATA=$DIR_ROOT/data + +DIR_MYSQL=$DIR_DATA/mysql +DIR_CONNECTORS=$DIR_DATA/connectors +DIR_DELETION=$DIR_DATA/deletion + +find $DIR_CONNECTORS -type f ! -name .gitkeep -delete +find $DIR_DELETION -type f ! -name .gitkeep -delete +sudo rm -rf $DIR_MYSQL/* From bfb0d5e21c521a964a33649cd77244a6fb78c5ce Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 16:09:51 +0100 Subject: [PATCH 10/21] Update README: describe profiles --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 35dd52fa..2a4dfad0 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ Information on how to install Docker is found in [their documentation](https://d ```bash docker compose --profile examples up ``` + starts the MYSQL Server, the REST API, Keycloak for Identy and access management and Nginx for reverse proxing. \ Once started, you should be able to visit the REST API server at: http://localhost and Keycloak at http://localhost/aiod-auth \ To authenticate to the REST API swagger interface the predefined user is: user, and password: password \ @@ -76,6 +77,11 @@ To authenticate as admin to Keycloak the predefined user is: admin and password: To use a different DNS hostname replace localhost with it in .env and src/config.toml \ This configuration is intended for development, DO NOT use it in production. +To turn if off again, use +```bash +docker compose --profile examples down +``` + To connect to the database use `./scripts/database-connect.sql`. ```bash @@ -101,6 +107,9 @@ docker compose --profile examples --profile huggingface-datasets --profile openm docker compose --profile examples --profile huggingface-datasets --profile openml-datasets down ``` +Make sure you use the same profile for `up` and `down`, otherwise some containers might keep +running. + #### Local Installation If you want to run the server locally, you need **Python 3.11**. From dff06934a2cfe5a7311ac325cc15d2cc3b0d0738 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 16:12:40 +0100 Subject: [PATCH 11/21] Make sure enums that are used as examples are lowercase --- src/connectors/example/enum_fill_connector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/connectors/example/enum_fill_connector.py b/src/connectors/example/enum_fill_connector.py index 353e08c6..d25651b1 100644 --- a/src/connectors/example/enum_fill_connector.py +++ b/src/connectors/example/enum_fill_connector.py @@ -29,4 +29,5 @@ def platform_name(self) -> PlatformName: def fetch(self, limit: int | None = None) -> Iterator[RESOURCE]: with open(self.json_path) as f: json_data = json.load(f) - yield from json_data[:limit] + for value in json_data[:limit]: + yield value.lower() From 7c85d47951830231289a136e3865003efe5b1ba1 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 16:25:38 +0100 Subject: [PATCH 12/21] Some machines were throwing errors because it took too long for the API to go up --- docker-compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index f821de6a..8dbfc548 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -23,8 +23,8 @@ services: start_interval: 1s start_period: 30s interval: 5s - timeout: 30s - retries: 5 + timeout: 120s + retries: 24 depends_on: sqlserver: condition: service_healthy From a9db5ad7e9c477c0b6a7c93f6834ef145172c4b2 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 16:45:24 +0100 Subject: [PATCH 13/21] Fixing .env file locations --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e6a0c1bf..81b37940 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,9 @@ filterwarnings = [ env_override_existing_values = 1 env_files = [ "src/.env", - "src/tests/.env" + "src/tests/.env", + ".env", # If running from docker container + "tests/.env" # For running from docker container ] From a4e0b697ea7eb8550f8fde7b5fdcfd28b3646fed Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Wed, 15 Nov 2023 16:53:30 +0100 Subject: [PATCH 14/21] Better comments on environment files --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 81b37940..40c841f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,8 +60,8 @@ env_override_existing_values = 1 env_files = [ "src/.env", "src/tests/.env", - ".env", # If running from docker container - "tests/.env" # For running from docker container + ".env", # Only used if running from docker container + "tests/.env" # Only used if running from docker container ] From b4198f05789d838fff79225cb2aff8a36c24a184 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 15:58:13 +0000 Subject: [PATCH 15/21] Bump python-keycloak from 3.3.0 to 3.7.0 Bumps [python-keycloak](https://github.com/marcospereirampj/python-keycloak) from 3.3.0 to 3.7.0. - [Changelog](https://github.com/marcospereirampj/python-keycloak/blob/master/CHANGELOG.md) - [Commits](https://github.com/marcospereirampj/python-keycloak/compare/v3.3.0...v3.7.0) --- updated-dependencies: - dependency-name: python-keycloak dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 81b37940..9bbdbad2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "requests==2.31.0", "mysqlclient==2.2.0", "oic==1.6.0", - "python-keycloak==3.3.0", + "python-keycloak==3.7.0", "python-dotenv==1.0.0", "pytz==2023.3.post1", "pydantic_schemaorg==1.0.6", From 7b5b928499e1a2dcd538a0bab1bca3f8f7cdf9c6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:01:17 +0000 Subject: [PATCH 16/21] Bump urllib3 from 2.0.7 to 2.1.0 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.7 to 2.1.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.0.7...2.1.0) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 40c841f4..09bc782a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ authors = [ {name = "Taniya Das", email = "t.das@tue.nl"} ] dependencies = [ - "urllib3== 2.0.7", + "urllib3== 2.1.0", "bibtexparser==1.4.1", "huggingface_hub==0.19.1", "fastapi==0.104.1", From 3c45e0305916a913a6b0f1bb64d3c95a977d8dee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:01:23 +0000 Subject: [PATCH 17/21] Bump responses from 0.24.0 to 0.24.1 Bumps [responses](https://github.com/getsentry/responses) from 0.24.0 to 0.24.1. - [Release notes](https://github.com/getsentry/responses/releases) - [Changelog](https://github.com/getsentry/responses/blob/master/CHANGES) - [Commits](https://github.com/getsentry/responses/compare/0.24.0...0.24.1) --- updated-dependencies: - dependency-name: responses dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 40c841f4..b19a90c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dev = [ "pytest-dotenv==0.5.2", "pytest-xdist==3.4.0", "pre-commit==3.5.0", - "responses==0.24.0", + "responses==0.24.1", "starlette==0.27.0" ] From e4dca157b395acca23ed23b5c6501783a385250d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:01:32 +0000 Subject: [PATCH 18/21] Bump huggingface-hub from 0.19.1 to 0.19.3 Bumps [huggingface-hub](https://github.com/huggingface/huggingface_hub) from 0.19.1 to 0.19.3. - [Release notes](https://github.com/huggingface/huggingface_hub/releases) - [Commits](https://github.com/huggingface/huggingface_hub/compare/v0.19.1...v0.19.3) --- updated-dependencies: - dependency-name: huggingface-hub dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 40c841f4..49803259 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ authors = [ dependencies = [ "urllib3== 2.0.7", "bibtexparser==1.4.1", - "huggingface_hub==0.19.1", + "huggingface_hub==0.19.3", "fastapi==0.104.1", "uvicorn==0.24.0.post1", "requests==2.31.0", From 1091a32f66988a882851b76716bb1c404791d1c9 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Fri, 17 Nov 2023 10:22:23 +0100 Subject: [PATCH 19/21] Total counts and detailed counts per resource --- src/main.py | 9 +++ src/routers/resource_router.py | 31 ++++++-- .../routers/generic/test_router_get_count.py | 74 +++++++++++++++++++ 3 files changed, 106 insertions(+), 8 deletions(-) diff --git a/src/main.py b/src/main.py index a676eb52..c407a9c5 100644 --- a/src/main.py +++ b/src/main.py @@ -67,6 +67,15 @@ def test_authorization(user: Json = Depends(get_current_user)) -> dict: """ return {"msg": "success", "user": user} + @app.get(url_prefix + "/counts/v1") + def counts() -> dict: + return { + router.resource_name_plural: count + for router in resource_routers.router_list + if issubclass(router.resource_class, AIoDConcept) + and (count := router.get_resource_count_func(engine)(detailed=True)) + } + for router in ( resource_routers.router_list + routers.other_routers diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index a43b5e72..24289190 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -9,7 +9,7 @@ from fastapi import APIRouter, Depends, HTTPException, status from fastapi.encoders import jsonable_encoder from pydantic import BaseModel -from sqlalchemy import and_ +from sqlalchemy import and_, func from sqlalchemy.engine import Engine from sqlalchemy.sql.operators import is_ from sqlmodel import SQLModel, Session, select @@ -128,7 +128,7 @@ def create(self, engine: Engine, url_prefix: str) -> APIRouter: router.add_api_route( path=f"{url_prefix}/counts/{self.resource_name_plural}/v1", endpoint=self.get_resource_count_func(engine), - response_model=int, # type: ignore + response_model=int | dict[str, int], name=f"Count of {self.resource_name_plural}", **default_kwargs, ) @@ -250,15 +250,30 @@ def get_resource_count_func(self, engine: Engine): docstring and the variables are dynamic, and used in Swagger. """ - def get_resource_count(): + def get_resource_count(detailed=False): f"""Retrieve the number of {self.resource_name_plural}.""" try: with Session(engine) as session: - return ( - session.query(self.resource_class) - .where(is_(self.resource_class.date_deleted, None)) - .count() - ) + if not detailed: + return ( + session.query(self.resource_class) + .where(is_(self.resource_class.date_deleted, None)) + .count() + ) + else: + count_list = ( + session.query( + self.resource_class.platform, + func.count(self.resource_class.identifier), + ) + .where(is_(self.resource_class.date_deleted, None)) + .group_by(self.resource_class.platform) + .all() + ) + return { + platform if platform else "aiod": count + for platform, count in count_list + } except Exception as e: raise _wrap_as_http_exception(e) diff --git a/src/tests/routers/generic/test_router_get_count.py b/src/tests/routers/generic/test_router_get_count.py index db70abd2..7571b200 100644 --- a/src/tests/routers/generic/test_router_get_count.py +++ b/src/tests/routers/generic/test_router_get_count.py @@ -4,7 +4,11 @@ from sqlmodel import Session from starlette.testclient import TestClient +from database.model.agent.contact import Contact +from database.model.agent.person import Person +from database.model.concept.aiod_entry import AIoDEntryORM from database.model.concept.status import Status +from database.model.knowledge_asset.publication import Publication from tests.testutils.test_resource import test_resource_factory @@ -35,3 +39,73 @@ def test_get_count_happy_path( assert response_json == 2 assert "deprecated" not in response.headers + + +def test_get_count_detailed_happy_path( + client_test_resource: TestClient, engine_test_resource: Engine, draft: Status +): + with Session(engine_test_resource) as session: + session.add_all( + [ + test_resource_factory( + title="my_test_resource_1", status=draft, platform_resource_identifier="1" + ), + test_resource_factory( + title="My second test resource", status=draft, platform_resource_identifier="2" + ), + test_resource_factory( + title="My third test resource", + status=draft, + platform_resource_identifier="3", + date_deleted=datetime.datetime.now(), + platform="openml", + ), + test_resource_factory( + title="My third test resource", + status=draft, + platform_resource_identifier="4", + platform="openml", + ), + test_resource_factory( + title="My fourth test resource", + status=draft, + platform=None, + platform_resource_identifier=None, + ), + ] + ) + session.commit() + response = client_test_resource.get("/counts/test_resources/v1?detailed=true") + assert response.status_code == 200, response.json() + response_json = response.json() + + assert response_json == {"aiod": 1, "example": 2, "openml": 1} + assert "deprecated" not in response.headers + + +def test_get_count_total( + client: TestClient, + engine: Engine, + person: Person, + publication: Publication, + contact: Contact, +): + + with Session(engine) as session: + session.add(person) + session.merge(publication) + session.add(Publication(name="2", aiod_enty=AIoDEntryORM(type="publication"))) + session.add(Publication(name="3", aiod_enty=AIoDEntryORM(type="publication"))) + session.add(contact) + session.commit() + + response = client.get("/counts/v1") + assert response.status_code == 200, response.json() + response_json = response.json() + + assert response_json == { + "contacts": {"example": 1}, + "persons": {"example": 1}, + "publications": {"aiod": 2, "example": 1}, + } + assert "deprecated" not in response.headers From da2eebe3d406bba8b6950a94a403dc09b616a3bb Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Fri, 17 Nov 2023 10:25:22 +0100 Subject: [PATCH 20/21] Make sure all example-resources have platform=example --- src/connectors/example/resources/resource/contacts.json | 6 ++++++ src/connectors/example/resources/resource/teams.json | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/connectors/example/resources/resource/contacts.json b/src/connectors/example/resources/resource/contacts.json index a34927e1..261535d4 100644 --- a/src/connectors/example/resources/resource/contacts.json +++ b/src/connectors/example/resources/resource/contacts.json @@ -1,5 +1,11 @@ [ { + "platform": "example", + "platform_resource_identifier": "1", + "aiod_entry": { + "editor": [], + "status": "draft" + }, "email": ["a@b.com"], "telephone": ["0032 XXXX XXXX"], "location": [ diff --git a/src/connectors/example/resources/resource/teams.json b/src/connectors/example/resources/resource/teams.json index 7d4b9601..a5e9be1a 100644 --- a/src/connectors/example/resources/resource/teams.json +++ b/src/connectors/example/resources/resource/teams.json @@ -4,9 +4,9 @@ "description": {"plain": "This is a team of an organisation."}, "date_published": "2022-01-01T15:15:00.000", "same_as": "https://www.example.com/resource/this_resource", + "platform": "example", + "platform_resource_identifier": "1", "aiod_entry": { - "platform": "example", - "platform_resource_identifier": "1", "editor": [], "status": "draft" }, From 9d1498f7f6ffef3ae958a646dbf0cb8249d3cf15 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 17 Nov 2023 11:23:28 +0000 Subject: [PATCH 21/21] Bump huggingface-hub from 0.19.3 to 0.19.4 Bumps [huggingface-hub](https://github.com/huggingface/huggingface_hub) from 0.19.3 to 0.19.4. - [Release notes](https://github.com/huggingface/huggingface_hub/releases) - [Commits](https://github.com/huggingface/huggingface_hub/compare/v0.19.3...v0.19.4) --- updated-dependencies: - dependency-name: huggingface-hub dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bb4a49e7..703ca15c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ authors = [ dependencies = [ "urllib3== 2.1.0", "bibtexparser==1.4.1", - "huggingface_hub==0.19.3", + "huggingface_hub==0.19.4", "fastapi==0.104.1", "uvicorn==0.24.0.post1", "requests==2.31.0",