diff --git a/.gitignore b/.gitignore index 588f399..97276f5 100644 --- a/.gitignore +++ b/.gitignore @@ -191,3 +191,6 @@ featureCodes_en.tsv listperson.xml schubert_pmb.csv brahms_pmb.csv +hanslick-werke-wikidata.csv +Untitled1.ipynb +event-types.csv diff --git a/apis_core/apis_entities/detail_views.py b/apis_core/apis_entities/detail_views.py index ee3bb3a..c3bb6e6 100644 --- a/apis_core/apis_entities/detail_views.py +++ b/apis_core/apis_entities/detail_views.py @@ -76,7 +76,7 @@ def get(self, request, *args, **kwargs): "".join([x.title() for x in match]), tb_object_open, link_to_relations, - rel_type + rel_type, ) ) object_lod = Uri.objects.filter(entity=instance) diff --git a/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html b/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html index e68bafb..c329808 100644 --- a/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html +++ b/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html @@ -207,7 +207,7 @@

Beziehungen

{% block relations %} {% for obj in right_card %} {% if obj.1.data|length > 0 %} -

{{ obj.0 }}

+

{{ obj.0 }} {% if obj.5 %}{% endif %}

{% render_table obj.1 %} diff --git a/apis_core/apis_entities/tests.py b/apis_core/apis_entities/tests.py index e816910..80c8a13 100644 --- a/apis_core/apis_entities/tests.py +++ b/apis_core/apis_entities/tests.py @@ -14,6 +14,7 @@ from normdata.utils import ( get_or_create_person_from_wikidata, get_or_create_place_from_wikidata, + get_or_create_work_from_wikidata, import_from_normdata, ) @@ -253,6 +254,27 @@ def test_013_import_normdata_no_wikidata(self): response = client.post(url, payload, follow=True) self.assertEqual(response.status_code, 200) + def test_013a_import_normdata_for_work(self): + client.login(**USER) + payload = { + "normdata_url": "https://www.wikidata.org/wiki/Q105745657", + "entity_type": "work", + } + url = reverse( + "normdata:import_from_normdata", + ) + response = client.post(url, payload, follow=True) + self.assertEqual(response.status_code, 200) + + def test_013b_wikidata_work_exist(self): + entity = get_or_create_work_from_wikidata( + "https://www.wikidata.org/wiki/Q105745657" + ) + ic(entity) + for x in entity.uri_set.all(): + entity = get_or_create_work_from_wikidata(x.uri) + self.assertTrue(entity) + def test_014_wikidata_place_exist(self): entity = get_or_create_place_from_wikidata( "http://www.wikidata.org/entity/Q1741" diff --git a/apis_core/apis_relations/forms2.py b/apis_core/apis_relations/forms2.py index c81e7ba..f94e663 100644 --- a/apis_core/apis_relations/forms2.py +++ b/apis_core/apis_relations/forms2.py @@ -108,11 +108,15 @@ def get_html_table(self, entity_type, request, site_instance, form_match): if form_match.group(1) == form_match.group(2): dic_a = {"related_" + entity_type.lower() + "a": site_instance} dic_b = {"related_" + entity_type.lower() + "b": site_instance} - objects = self.relation_form.objects.filter(Q(**dic_a) | Q(**dic_b)).order_by("-updated")[:10] + objects = self.relation_form.objects.filter( + Q(**dic_a) | Q(**dic_b) + ).order_by("-updated")[:10] table_html = table(data=objects, prefix=prefix) else: tab_query = {"related_" + entity_type.lower(): site_instance} - ttab = self.relation_form.objects.filter(**tab_query).order_by("-updated")[:10] + ttab = self.relation_form.objects.filter(**tab_query).order_by("-updated")[ + :10 + ] table_html = table(data=ttab, prefix=prefix) return table_html diff --git a/issue__230_hanslicks_werke.ipynb b/issue__230_hanslicks_werke.ipynb new file mode 100644 index 0000000..038414f --- /dev/null +++ b/issue__230_hanslicks_werke.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "16556ed2-f178-4edd-9c4c-5639fbdcc851", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "from acdh_id_reconciler import gnd_to_wikidata\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dcbb6ef-dfa6-4ff9-b256-c3a4ce35a5bd", + "metadata": {}, + "outputs": [], + "source": [ + "orig_data = requests.get(\"https://raw.githubusercontent.com/Hanslick-Online/hsl-entities/refs/heads/main/json_dumps/Werke.json\").json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95db218e-6b72-4451-aa80-4b66af239d31", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "data = []\n", + "for key, value in tqdm(orig_data.items(), total=len(orig_data)):\n", + " gnd = value[\"gnd\"]\n", + " if gnd:\n", + " try:\n", + " wikidata = gnd_to_wikidata(gnd)\n", + " wikidata[\"id\"] = key\n", + " data.append(wikidata)\n", + " except IndexError:\n", + " continue\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67be47c8-5a4f-4d94-98b2-61d2cd6364bd", + "metadata": {}, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4be7625f-010e-4730-b4f7-0cce11d86b80", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fae7f32c-405a-4dc9-ab5d-c6d0737e3620", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"hanslick-werke-wikidata.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72f574ac-259b-4d7d-84d9-33da01a47ef1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/normdata/forms.py b/normdata/forms.py index 06b206c..bde4f87 100644 --- a/normdata/forms.py +++ b/normdata/forms.py @@ -14,5 +14,6 @@ class NormDataImportForm(forms.Form): ("person", "Person"), ("place", "Ort"), ("institution", "Institution"), + ("work", "Werk"), ), ) diff --git a/normdata/utils.py b/normdata/utils.py index 0436789..51588af 100644 --- a/normdata/utils.py +++ b/normdata/utils.py @@ -1,25 +1,32 @@ from acdh_geonames_utils.gn_client import gn_as_object from acdh_id_reconciler import geonames_to_wikidata, gnd_to_wikidata -from acdh_wikidata_pyutils import WikiDataPerson, WikiDataPlace, WikiDataOrg +from acdh_wikidata_pyutils import ( + WikiDataPerson, + WikiDataPlace, + WikiDataOrg, + WikiDataEntity, +) from AcdhArcheAssets.uri_norm_rules import get_normalized_uri from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.db.utils import IntegrityError from icecream import ic -from pylobid.pylobid import PyLobidPerson, PyLobidPlace, PyLobidOrg +from pylobid.pylobid import PyLobidPerson, PyLobidPlace, PyLobidOrg, PyLobidWork -from apis_core.apis_entities.models import Person, Place, Institution +from apis_core.apis_entities.models import Person, Place, Institution, Work from apis_core.apis_metainfo.models import Uri -from apis_core.apis_relations.models import PersonPlace, InstitutionPlace +from apis_core.apis_relations.models import PersonPlace, InstitutionPlace, PersonWork from apis_core.apis_vocabularies.models import ( PersonPlaceRelation, InstitutionPlaceRelation, + PersonWorkRelation, ) DOMAIN_MAPPING = settings.DOMAIN_MAPPING BIRTH_REL = getattr(settings, "BIRTH_REL") DEATH_REL = getattr(settings, "DEATH_REL") LOCATED_REL = getattr(settings, "ORG_LOCATED_IN") +CREATED_REL = getattr(settings, "AUTHOR_RELS") def get_uri_domain(uri): @@ -133,6 +140,7 @@ def get_or_create_place_from_wikidata(uri): def get_or_create_person_from_gnd(uri): + uri = get_normalized_uri(uri) try: entity = Uri.objects.get(uri=uri).entity entity = Person.objects.get(id=entity.id) @@ -162,6 +170,7 @@ def get_or_create_person_from_gnd(uri): def get_or_create_org_from_gnd(uri): + uri = get_normalized_uri(uri.strip()) try: entity = Uri.objects.get(uri=uri).entity entity = Institution.objects.get(id=entity.id) @@ -182,6 +191,7 @@ def get_or_create_org_from_gnd(uri): def get_or_create_person_from_wikidata(uri): + uri = get_normalized_uri(uri) try: entity = Uri.objects.get(uri=uri).entity entity = Person.objects.get(id=entity.id) @@ -240,6 +250,93 @@ def get_or_create_person_from_wikidata(uri): return entity +def get_or_create_work_from_gnd(uri): + try: + entity = Uri.objects.get(uri=uri).entity + entity = Work.objects.get(id=entity.id) + return entity + except ObjectDoesNotExist: + fetched_item = PyLobidWork(uri) + try: + start_date_written, end_date_written = fetched_item.date_of_creation.split( + "-" + ) + except ValueError: + start_date_written, end_date_written = fetched_item.date_of_creation, "" + apis_entity = { + "name": fetched_item.pref_name, + "start_date_written": start_date_written, + "end_date_written": end_date_written, + } + entity = Work.objects.create(**apis_entity) + Uri.objects.create( + uri=uri, + domain="gnd", + entity=entity, + ) + try: + start_date_written, end_date_written = ( + fetched_item.date_of_production.split("-") + ) + except ValueError: + pass + for x in fetched_item.creators: + if x["role"] in ["firstAuthor", "author", "firstComposer"]: + print(x) + try: + wikidata_url = gnd_to_wikidata(x["id"])["wikidata"] + print(wikidata_url) + creator = get_or_create_person_from_wikidata(wikidata_url) + except IndexError: + creator = get_or_create_person_from_gnd(x["id"]) + try: + relation_type = PersonWorkRelation.objects.get(id=CREATED_REL[0]) + except ObjectDoesNotExist: + relation_type, _ = PersonWorkRelation.objects.get_or_create( + name="hat geschaffen" + ) + rel, _ = PersonWork.objects.get_or_create( + related_person=creator, + related_work=entity, + relation_type=relation_type, + start_date_written=start_date_written, + end_date_written=end_date_written, + ) + print(rel) + return entity + + +def get_or_create_work_from_wikidata(uri): + try: + entity = Uri.objects.get(uri=uri).entity + entity = Work.objects.get(id=entity.id) + return entity + except ObjectDoesNotExist: + wd_entity = WikiDataEntity(uri) + if wd_entity.gnd_uri: + try: + entity = Uri.objects.get(uri=wd_entity.gnd_uri).entity + entity = Work.objects.get(id=entity.id) + return entity + except ObjectDoesNotExist: + entity = get_or_create_work_from_gnd(wd_entity.gnd_uri) + Uri.objects.create( + uri=get_normalized_uri(uri), + domain="wikidata", + entity=entity, + ) + return entity + else: + apis_entity = wd_entity.get_apis_entity() + entity = Work.objects.create(**apis_entity) + Uri.objects.create( + uri=get_normalized_uri(uri), + domain="wikidata", + entity=entity, + ) + return entity + + def get_or_create_org_from_wikidata(uri): try: entity = Uri.objects.get(uri=uri).entity @@ -299,6 +396,13 @@ def import_from_normdata(raw_url, entity_type): try: wikidata_url = gnd_to_wikidata(normalized_url)["wikidata"] except (IndexError, KeyError): + if entity_type == "work": + try: + entity = get_or_create_work_from_gnd(normalized_url) + return entity + except Exception as e: + ic(e) + wikidata_url = False if entity_type == "place": try: entity = get_or_create_place_from_gnd(normalized_url) @@ -344,6 +448,8 @@ def import_from_normdata(raw_url, entity_type): entity = get_or_create_place_from_wikidata(wikidata_url) elif entity_type == "person": entity = get_or_create_person_from_wikidata(wikidata_url) + elif entity_type == "work": + entity = get_or_create_work_from_wikidata(wikidata_url) else: entity = get_or_create_org_from_wikidata(wikidata_url) else: diff --git a/notebooks/issue__233_eventtypes.ipynb b/notebooks/issue__233_eventtypes.ipynb new file mode 100644 index 0000000..0f3731a --- /dev/null +++ b/notebooks/issue__233_eventtypes.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "f18c074c-f0be-4591-a38c-b701afb4129b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6a07e35-4799-47fd-a09f-b0a0c5c61f79", + "metadata": {}, + "outputs": [], + "source": [ + "data = []\n", + "for x in EventType.objects.all():\n", + " item = {}\n", + " item[\"id\"] = x.id\n", + " item[\"name\"] = x.name\n", + " item[\"used\"] = Event.objects.filter(kind=x).count()\n", + " data.append(item)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "396ebbff-1e4e-462c-a9b5-5002f4f7a53e", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data)\n", + "df.to_csv(\"event-types.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0aa9d7f-2e09-48e6-8730-6e44e863952c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt index e9257f9..9ecb480 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ django-tables2 django-vite>=3.0.3,<4 djangorestframework pandas -pylobid +pylobid>=2.3.0,<3 psycopg2-binary==2.9.9 pyocclient==0.6 networkx>=3.2.1,<4 diff --git a/templates/partials/head.html b/templates/partials/head.html index 0ecda4c..2fb727e 100644 --- a/templates/partials/head.html +++ b/templates/partials/head.html @@ -2,13 +2,13 @@ {% block metaDescription %} {% endblock %} - - + + - + @@ -20,10 +20,20 @@ + integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY=" crossorigin="" /> + integrity="sha256-20nQCchB9co0qIjJZRGuk2/Z9VM+kNiyxNV1lvTlZBo=" crossorigin=""> + {% block scriptHeader %} {% endblock scriptHeader %} \ No newline at end of file