From 35ff8186dc6c7b3684497030a06eb3cbedae62e9 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Mon, 28 Oct 2024 12:07:53 +0100 Subject: [PATCH 1/6] wip --- .gitignore | 1 + issue__230_hanslicks_werke.ipynb | 108 +++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 issue__230_hanslicks_werke.ipynb diff --git a/.gitignore b/.gitignore index 588f399..cf4a96b 100644 --- a/.gitignore +++ b/.gitignore @@ -191,3 +191,4 @@ featureCodes_en.tsv listperson.xml schubert_pmb.csv brahms_pmb.csv +hanslick-werke-wikidata.csv diff --git a/issue__230_hanslicks_werke.ipynb b/issue__230_hanslicks_werke.ipynb new file mode 100644 index 0000000..038414f --- /dev/null +++ b/issue__230_hanslicks_werke.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "16556ed2-f178-4edd-9c4c-5639fbdcc851", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "from acdh_id_reconciler import gnd_to_wikidata\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dcbb6ef-dfa6-4ff9-b256-c3a4ce35a5bd", + "metadata": {}, + "outputs": [], + "source": [ + "orig_data = requests.get(\"https://raw.githubusercontent.com/Hanslick-Online/hsl-entities/refs/heads/main/json_dumps/Werke.json\").json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95db218e-6b72-4451-aa80-4b66af239d31", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "data = []\n", + "for key, value in tqdm(orig_data.items(), total=len(orig_data)):\n", + " gnd = value[\"gnd\"]\n", + " if gnd:\n", + " try:\n", + " wikidata = gnd_to_wikidata(gnd)\n", + " wikidata[\"id\"] = key\n", + " data.append(wikidata)\n", + " except IndexError:\n", + " continue\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67be47c8-5a4f-4d94-98b2-61d2cd6364bd", + "metadata": {}, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4be7625f-010e-4730-b4f7-0cce11d86b80", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fae7f32c-405a-4dc9-ab5d-c6d0737e3620", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"hanslick-werke-wikidata.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72f574ac-259b-4d7d-84d9-33da01a47ef1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 74673405ee0ef4f40fab82e91581d7a71d9d82ba Mon Sep 17 00:00:00 2001 From: csae8092 Date: Mon, 28 Oct 2024 14:36:17 +0100 Subject: [PATCH 2/6] baiscially works, ToDo: tests --- .gitignore | 1 + normdata/forms.py | 1 + normdata/utils.py | 108 ++++++++++++++++++++++++++++++++++++++++++++-- requirements.txt | 2 +- 4 files changed, 107 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index cf4a96b..dd826f4 100644 --- a/.gitignore +++ b/.gitignore @@ -192,3 +192,4 @@ listperson.xml schubert_pmb.csv brahms_pmb.csv hanslick-werke-wikidata.csv +Untitled1.ipynb diff --git a/normdata/forms.py b/normdata/forms.py index 06b206c..bde4f87 100644 --- a/normdata/forms.py +++ b/normdata/forms.py @@ -14,5 +14,6 @@ class NormDataImportForm(forms.Form): ("person", "Person"), ("place", "Ort"), ("institution", "Institution"), + ("work", "Werk"), ), ) diff --git a/normdata/utils.py b/normdata/utils.py index 0436789..dfdbddd 100644 --- a/normdata/utils.py +++ b/normdata/utils.py @@ -1,25 +1,32 @@ from acdh_geonames_utils.gn_client import gn_as_object from acdh_id_reconciler import geonames_to_wikidata, gnd_to_wikidata -from acdh_wikidata_pyutils import WikiDataPerson, WikiDataPlace, WikiDataOrg +from acdh_wikidata_pyutils import ( + WikiDataPerson, + WikiDataPlace, + WikiDataOrg, + WikiDataEntity, +) from AcdhArcheAssets.uri_norm_rules import get_normalized_uri from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.db.utils import IntegrityError from icecream import ic -from pylobid.pylobid import PyLobidPerson, PyLobidPlace, PyLobidOrg +from pylobid.pylobid import PyLobidPerson, PyLobidPlace, PyLobidOrg, PyLobidWork -from apis_core.apis_entities.models import Person, Place, Institution +from apis_core.apis_entities.models import Person, Place, Institution, Work from apis_core.apis_metainfo.models import Uri -from apis_core.apis_relations.models import PersonPlace, InstitutionPlace +from apis_core.apis_relations.models import PersonPlace, InstitutionPlace, PersonWork from apis_core.apis_vocabularies.models import ( PersonPlaceRelation, InstitutionPlaceRelation, + PersonWorkRelation ) DOMAIN_MAPPING = settings.DOMAIN_MAPPING BIRTH_REL = getattr(settings, "BIRTH_REL") DEATH_REL = getattr(settings, "DEATH_REL") LOCATED_REL = getattr(settings, "ORG_LOCATED_IN") +CREATED_REL = getattr(settings, "AUTHOR_RELS") def get_uri_domain(uri): @@ -133,6 +140,7 @@ def get_or_create_place_from_wikidata(uri): def get_or_create_person_from_gnd(uri): + uri = get_normalized_uri(uri) try: entity = Uri.objects.get(uri=uri).entity entity = Person.objects.get(id=entity.id) @@ -162,6 +170,7 @@ def get_or_create_person_from_gnd(uri): def get_or_create_org_from_gnd(uri): + uri = get_normalized_uri(uri.strip()) try: entity = Uri.objects.get(uri=uri).entity entity = Institution.objects.get(id=entity.id) @@ -182,6 +191,7 @@ def get_or_create_org_from_gnd(uri): def get_or_create_person_from_wikidata(uri): + uri = get_normalized_uri(uri) try: entity = Uri.objects.get(uri=uri).entity entity = Person.objects.get(id=entity.id) @@ -240,6 +250,87 @@ def get_or_create_person_from_wikidata(uri): return entity +def get_or_create_work_from_gnd(uri): + try: + entity = Uri.objects.get(uri=uri).entity + entity = Work.objects.get(id=entity.id) + return entity + except ObjectDoesNotExist: + fetched_item = PyLobidWork(uri) + try: + start_date_written, end_date_written = fetched_item.date_of_creation.split( + "-" + ) + except ValueError: + start_date_written, end_date_written = fetched_item.date_of_creation, "" + apis_entity = { + "name": fetched_item.pref_name, + "start_date_written": start_date_written, + "end_date_written": end_date_written, + } + entity = Work.objects.create(**apis_entity) + Uri.objects.create( + uri=uri, + domain="gnd", + entity=entity, + ) + for x in fetched_item.creators: + if x["role"] in ["firstAuthor", "author", "firstComposer"]: + print(x) + try: + wikidata_url = gnd_to_wikidata(x["id"])["wikidata"] + print(wikidata_url) + creator = get_or_create_person_from_wikidata(wikidata_url) + except IndexError: + creator = get_or_create_person_from_gnd(x["id"]) + try: + relation_type = PersonWorkRelation.objects.get(id=CREATED_REL[0]) + except ObjectDoesNotExist: + relation_type, _ = PersonWorkRelation.objects.get_or_create( + name="hat geschaffen" + ) + rel, _ = PersonWork.objects.get_or_create( + related_person=creator, + related_work=entity, + relation_type=relation_type, + start_date_written=start_date_written, + end_date_written=end_date_written, + ) + print(rel) + return entity + + +def get_or_create_work_from_wikidata(uri): + try: + entity = Uri.objects.get(uri=uri).entity + entity = Work.objects.get(id=entity.id) + return entity + except ObjectDoesNotExist: + wd_entity = WikiDataEntity(uri) + if wd_entity.gnd_uri: + try: + entity = Uri.objects.get(uri=wd_entity.gnd_uri).entity + entity = Work.objects.get(id=entity.id) + return entity + except ObjectDoesNotExist: + entity = get_or_create_work_from_gnd(wd_entity.gnd_uri) + Uri.objects.create( + uri=get_normalized_uri(uri), + domain="wikidata", + entity=entity, + ) + return entity + else: + apis_entity = wd_entity.get_apis_entity() + entity = Work.objects.create(**apis_entity) + Uri.objects.create( + uri=get_normalized_uri(uri), + domain="wikidata", + entity=entity, + ) + return entity + + def get_or_create_org_from_wikidata(uri): try: entity = Uri.objects.get(uri=uri).entity @@ -299,6 +390,13 @@ def import_from_normdata(raw_url, entity_type): try: wikidata_url = gnd_to_wikidata(normalized_url)["wikidata"] except (IndexError, KeyError): + if entity_type == "work": + try: + entity = get_or_create_work_from_gnd(normalized_url) + return entity + except Exception as e: + ic(e) + wikidata_url = False if entity_type == "place": try: entity = get_or_create_place_from_gnd(normalized_url) @@ -344,6 +442,8 @@ def import_from_normdata(raw_url, entity_type): entity = get_or_create_place_from_wikidata(wikidata_url) elif entity_type == "person": entity = get_or_create_person_from_wikidata(wikidata_url) + elif entity_type == "work": + entity = get_or_create_work_from_wikidata(wikidata_url) else: entity = get_or_create_org_from_wikidata(wikidata_url) else: diff --git a/requirements.txt b/requirements.txt index e9257f9..5296eb9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ django-tables2 django-vite>=3.0.3,<4 djangorestframework pandas -pylobid +pylobid>=2.0.0,<3 psycopg2-binary==2.9.9 pyocclient==0.6 networkx>=3.2.1,<4 From 8c74e6453c6d143df68b4e8197c30fc966ed8261 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 29 Oct 2024 10:30:31 +0100 Subject: [PATCH 3/6] closes #233 [skip ci] --- .gitignore | 1 + notebooks/issue__233_eventtypes.ipynb | 69 +++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 notebooks/issue__233_eventtypes.ipynb diff --git a/.gitignore b/.gitignore index dd826f4..97276f5 100644 --- a/.gitignore +++ b/.gitignore @@ -193,3 +193,4 @@ schubert_pmb.csv brahms_pmb.csv hanslick-werke-wikidata.csv Untitled1.ipynb +event-types.csv diff --git a/notebooks/issue__233_eventtypes.ipynb b/notebooks/issue__233_eventtypes.ipynb new file mode 100644 index 0000000..328d38f --- /dev/null +++ b/notebooks/issue__233_eventtypes.ipynb @@ -0,0 +1,69 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f18c074c-f0be-4591-a38c-b701afb4129b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c6a07e35-4799-47fd-a09f-b0a0c5c61f79", + "metadata": {}, + "outputs": [], + "source": [ + "data = []\n", + "for x in EventType.objects.all():\n", + " item = {}\n", + " item[\"id\"] = x.id\n", + " item[\"name\"] = x.name\n", + " data.append(item)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "396ebbff-1e4e-462c-a9b5-5002f4f7a53e", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data)\n", + "df.to_csv(\"event-types.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "822c81d4-31ad-43e1-9239-6ca11ab7d335", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5732eb02cfbd59abaa883cea246bf347c3205194 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 29 Oct 2024 11:02:09 +0100 Subject: [PATCH 4/6] return of matomo [skip ci] --- notebooks/issue__233_eventtypes.ipynb | 9 ++++---- templates/partials/head.html | 30 ++++++++++++++++++--------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/notebooks/issue__233_eventtypes.ipynb b/notebooks/issue__233_eventtypes.ipynb index 328d38f..0f3731a 100644 --- a/notebooks/issue__233_eventtypes.ipynb +++ b/notebooks/issue__233_eventtypes.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "f18c074c-f0be-4591-a38c-b701afb4129b", "metadata": {}, "outputs": [], @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "c6a07e35-4799-47fd-a09f-b0a0c5c61f79", "metadata": {}, "outputs": [], @@ -22,12 +22,13 @@ " item = {}\n", " item[\"id\"] = x.id\n", " item[\"name\"] = x.name\n", + " item[\"used\"] = Event.objects.filter(kind=x).count()\n", " data.append(item)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "396ebbff-1e4e-462c-a9b5-5002f4f7a53e", "metadata": {}, "outputs": [], @@ -39,7 +40,7 @@ { "cell_type": "code", "execution_count": null, - "id": "822c81d4-31ad-43e1-9239-6ca11ab7d335", + "id": "a0aa9d7f-2e09-48e6-8730-6e44e863952c", "metadata": {}, "outputs": [], "source": [] diff --git a/templates/partials/head.html b/templates/partials/head.html index 0ecda4c..2fb727e 100644 --- a/templates/partials/head.html +++ b/templates/partials/head.html @@ -2,13 +2,13 @@ {% block metaDescription %} {% endblock %} - - + + - + @@ -20,10 +20,20 @@ + integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY=" crossorigin="" /> + integrity="sha256-20nQCchB9co0qIjJZRGuk2/Z9VM+kNiyxNV1lvTlZBo=" crossorigin=""> + {% block scriptHeader %} {% endblock scriptHeader %} \ No newline at end of file From 13f467ad9b0ce44a6c68541ddff774f94209fa17 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 29 Oct 2024 11:20:11 +0100 Subject: [PATCH 5/6] no link to relation-list-view for labels [skip ci] --- .../apis_entities/detail_views/entity_detail_generic.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html b/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html index e68bafb..c329808 100644 --- a/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html +++ b/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html @@ -207,7 +207,7 @@

Beziehungen

{% block relations %} {% for obj in right_card %} {% if obj.1.data|length > 0 %} -

{{ obj.0 }}

+

{{ obj.0 }} {% if obj.5 %}{% endif %}

{% render_table obj.1 %} From 307cfb7c009ddab81195fe213dd6520dd62042b7 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 29 Oct 2024 12:18:31 +0100 Subject: [PATCH 6/6] adds test for work importer --- apis_core/apis_entities/detail_views.py | 2 +- apis_core/apis_entities/tests.py | 22 ++++++++++++++++++++++ apis_core/apis_relations/forms2.py | 8 ++++++-- normdata/utils.py | 8 +++++++- requirements.txt | 2 +- 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/apis_core/apis_entities/detail_views.py b/apis_core/apis_entities/detail_views.py index ee3bb3a..c3bb6e6 100644 --- a/apis_core/apis_entities/detail_views.py +++ b/apis_core/apis_entities/detail_views.py @@ -76,7 +76,7 @@ def get(self, request, *args, **kwargs): "".join([x.title() for x in match]), tb_object_open, link_to_relations, - rel_type + rel_type, ) ) object_lod = Uri.objects.filter(entity=instance) diff --git a/apis_core/apis_entities/tests.py b/apis_core/apis_entities/tests.py index e816910..80c8a13 100644 --- a/apis_core/apis_entities/tests.py +++ b/apis_core/apis_entities/tests.py @@ -14,6 +14,7 @@ from normdata.utils import ( get_or_create_person_from_wikidata, get_or_create_place_from_wikidata, + get_or_create_work_from_wikidata, import_from_normdata, ) @@ -253,6 +254,27 @@ def test_013_import_normdata_no_wikidata(self): response = client.post(url, payload, follow=True) self.assertEqual(response.status_code, 200) + def test_013a_import_normdata_for_work(self): + client.login(**USER) + payload = { + "normdata_url": "https://www.wikidata.org/wiki/Q105745657", + "entity_type": "work", + } + url = reverse( + "normdata:import_from_normdata", + ) + response = client.post(url, payload, follow=True) + self.assertEqual(response.status_code, 200) + + def test_013b_wikidata_work_exist(self): + entity = get_or_create_work_from_wikidata( + "https://www.wikidata.org/wiki/Q105745657" + ) + ic(entity) + for x in entity.uri_set.all(): + entity = get_or_create_work_from_wikidata(x.uri) + self.assertTrue(entity) + def test_014_wikidata_place_exist(self): entity = get_or_create_place_from_wikidata( "http://www.wikidata.org/entity/Q1741" diff --git a/apis_core/apis_relations/forms2.py b/apis_core/apis_relations/forms2.py index c81e7ba..f94e663 100644 --- a/apis_core/apis_relations/forms2.py +++ b/apis_core/apis_relations/forms2.py @@ -108,11 +108,15 @@ def get_html_table(self, entity_type, request, site_instance, form_match): if form_match.group(1) == form_match.group(2): dic_a = {"related_" + entity_type.lower() + "a": site_instance} dic_b = {"related_" + entity_type.lower() + "b": site_instance} - objects = self.relation_form.objects.filter(Q(**dic_a) | Q(**dic_b)).order_by("-updated")[:10] + objects = self.relation_form.objects.filter( + Q(**dic_a) | Q(**dic_b) + ).order_by("-updated")[:10] table_html = table(data=objects, prefix=prefix) else: tab_query = {"related_" + entity_type.lower(): site_instance} - ttab = self.relation_form.objects.filter(**tab_query).order_by("-updated")[:10] + ttab = self.relation_form.objects.filter(**tab_query).order_by("-updated")[ + :10 + ] table_html = table(data=ttab, prefix=prefix) return table_html diff --git a/normdata/utils.py b/normdata/utils.py index dfdbddd..51588af 100644 --- a/normdata/utils.py +++ b/normdata/utils.py @@ -19,7 +19,7 @@ from apis_core.apis_vocabularies.models import ( PersonPlaceRelation, InstitutionPlaceRelation, - PersonWorkRelation + PersonWorkRelation, ) DOMAIN_MAPPING = settings.DOMAIN_MAPPING @@ -274,6 +274,12 @@ def get_or_create_work_from_gnd(uri): domain="gnd", entity=entity, ) + try: + start_date_written, end_date_written = ( + fetched_item.date_of_production.split("-") + ) + except ValueError: + pass for x in fetched_item.creators: if x["role"] in ["firstAuthor", "author", "firstComposer"]: print(x) diff --git a/requirements.txt b/requirements.txt index 5296eb9..9ecb480 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ django-tables2 django-vite>=3.0.3,<4 djangorestframework pandas -pylobid>=2.0.0,<3 +pylobid>=2.3.0,<3 psycopg2-binary==2.9.9 pyocclient==0.6 networkx>=3.2.1,<4