Skip to content

Commit

Permalink
Merge pull request #235 from arthur-schnitzler/155-create-wikidata-im…
Browse files Browse the repository at this point in the history
…porter-for-work

155 create wikidata importer for work
  • Loading branch information
csae8092 authored Oct 29, 2024
2 parents 35ff818 + 307cfb7 commit b7c3e46
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,5 @@ listperson.xml
schubert_pmb.csv
brahms_pmb.csv
hanslick-werke-wikidata.csv
Untitled1.ipynb
event-types.csv
2 changes: 1 addition & 1 deletion apis_core/apis_entities/detail_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def get(self, request, *args, **kwargs):
"".join([x.title() for x in match]),
tb_object_open,
link_to_relations,
rel_type
rel_type,
)
)
object_lod = Uri.objects.filter(entity=instance)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ <h3 class="text-center mt-5">Beziehungen</h3>
{% block relations %}
{% for obj in right_card %}
{% if obj.1.data|length > 0 %}
<h4>{{ obj.0 }} <a class="apis-{{ obj.5 }}" title="Gehe zu {{ object }}–{{ obj.0 }} Relationen" href="{{ obj.4 }}"><i class="bi bi-box-arrow-up-right" aria-hidden="true"><span class="visually-hidden">Gehe zu {{ object }} - {{ obj.5 }} Relationen</span></i></a></h4>
<h4>{{ obj.0 }} {% if obj.5 %}<a class="apis-{{ obj.5 }}" title="Gehe zu {{ object }}–{{ obj.0 }} Relationen" href="{{ obj.4 }}"><i class="bi bi-box-arrow-up-right" aria-hidden="true"><span class="visually-hidden">Gehe zu {{ object }} - {{ obj.5 }} Relationen</span></i></a>{% endif %}</h4>

<div id="tab_{{ obj.2 }}" class="mb-5">
{% render_table obj.1 %}
Expand Down
22 changes: 22 additions & 0 deletions apis_core/apis_entities/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from normdata.utils import (
get_or_create_person_from_wikidata,
get_or_create_place_from_wikidata,
get_or_create_work_from_wikidata,
import_from_normdata,
)

Expand Down Expand Up @@ -253,6 +254,27 @@ def test_013_import_normdata_no_wikidata(self):
response = client.post(url, payload, follow=True)
self.assertEqual(response.status_code, 200)

def test_013a_import_normdata_for_work(self):
client.login(**USER)
payload = {
"normdata_url": "https://www.wikidata.org/wiki/Q105745657",
"entity_type": "work",
}
url = reverse(
"normdata:import_from_normdata",
)
response = client.post(url, payload, follow=True)
self.assertEqual(response.status_code, 200)

def test_013b_wikidata_work_exist(self):
entity = get_or_create_work_from_wikidata(
"https://www.wikidata.org/wiki/Q105745657"
)
ic(entity)
for x in entity.uri_set.all():
entity = get_or_create_work_from_wikidata(x.uri)
self.assertTrue(entity)

def test_014_wikidata_place_exist(self):
entity = get_or_create_place_from_wikidata(
"http://www.wikidata.org/entity/Q1741"
Expand Down
8 changes: 6 additions & 2 deletions apis_core/apis_relations/forms2.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,15 @@ def get_html_table(self, entity_type, request, site_instance, form_match):
if form_match.group(1) == form_match.group(2):
dic_a = {"related_" + entity_type.lower() + "a": site_instance}
dic_b = {"related_" + entity_type.lower() + "b": site_instance}
objects = self.relation_form.objects.filter(Q(**dic_a) | Q(**dic_b)).order_by("-updated")[:10]
objects = self.relation_form.objects.filter(
Q(**dic_a) | Q(**dic_b)
).order_by("-updated")[:10]
table_html = table(data=objects, prefix=prefix)
else:
tab_query = {"related_" + entity_type.lower(): site_instance}
ttab = self.relation_form.objects.filter(**tab_query).order_by("-updated")[:10]
ttab = self.relation_form.objects.filter(**tab_query).order_by("-updated")[
:10
]
table_html = table(data=ttab, prefix=prefix)
return table_html

Expand Down
1 change: 1 addition & 0 deletions normdata/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ class NormDataImportForm(forms.Form):
("person", "Person"),
("place", "Ort"),
("institution", "Institution"),
("work", "Werk"),
),
)
114 changes: 110 additions & 4 deletions normdata/utils.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
from acdh_geonames_utils.gn_client import gn_as_object
from acdh_id_reconciler import geonames_to_wikidata, gnd_to_wikidata
from acdh_wikidata_pyutils import WikiDataPerson, WikiDataPlace, WikiDataOrg
from acdh_wikidata_pyutils import (
WikiDataPerson,
WikiDataPlace,
WikiDataOrg,
WikiDataEntity,
)
from AcdhArcheAssets.uri_norm_rules import get_normalized_uri
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db.utils import IntegrityError
from icecream import ic
from pylobid.pylobid import PyLobidPerson, PyLobidPlace, PyLobidOrg
from pylobid.pylobid import PyLobidPerson, PyLobidPlace, PyLobidOrg, PyLobidWork

from apis_core.apis_entities.models import Person, Place, Institution
from apis_core.apis_entities.models import Person, Place, Institution, Work
from apis_core.apis_metainfo.models import Uri
from apis_core.apis_relations.models import PersonPlace, InstitutionPlace
from apis_core.apis_relations.models import PersonPlace, InstitutionPlace, PersonWork
from apis_core.apis_vocabularies.models import (
PersonPlaceRelation,
InstitutionPlaceRelation,
PersonWorkRelation,
)

DOMAIN_MAPPING = settings.DOMAIN_MAPPING
BIRTH_REL = getattr(settings, "BIRTH_REL")
DEATH_REL = getattr(settings, "DEATH_REL")
LOCATED_REL = getattr(settings, "ORG_LOCATED_IN")
CREATED_REL = getattr(settings, "AUTHOR_RELS")


def get_uri_domain(uri):
Expand Down Expand Up @@ -133,6 +140,7 @@ def get_or_create_place_from_wikidata(uri):


def get_or_create_person_from_gnd(uri):
uri = get_normalized_uri(uri)
try:
entity = Uri.objects.get(uri=uri).entity
entity = Person.objects.get(id=entity.id)
Expand Down Expand Up @@ -162,6 +170,7 @@ def get_or_create_person_from_gnd(uri):


def get_or_create_org_from_gnd(uri):
uri = get_normalized_uri(uri.strip())
try:
entity = Uri.objects.get(uri=uri).entity
entity = Institution.objects.get(id=entity.id)
Expand All @@ -182,6 +191,7 @@ def get_or_create_org_from_gnd(uri):


def get_or_create_person_from_wikidata(uri):
uri = get_normalized_uri(uri)
try:
entity = Uri.objects.get(uri=uri).entity
entity = Person.objects.get(id=entity.id)
Expand Down Expand Up @@ -240,6 +250,93 @@ def get_or_create_person_from_wikidata(uri):
return entity


def get_or_create_work_from_gnd(uri):
try:
entity = Uri.objects.get(uri=uri).entity
entity = Work.objects.get(id=entity.id)
return entity
except ObjectDoesNotExist:
fetched_item = PyLobidWork(uri)
try:
start_date_written, end_date_written = fetched_item.date_of_creation.split(
"-"
)
except ValueError:
start_date_written, end_date_written = fetched_item.date_of_creation, ""
apis_entity = {
"name": fetched_item.pref_name,
"start_date_written": start_date_written,
"end_date_written": end_date_written,
}
entity = Work.objects.create(**apis_entity)
Uri.objects.create(
uri=uri,
domain="gnd",
entity=entity,
)
try:
start_date_written, end_date_written = (
fetched_item.date_of_production.split("-")
)
except ValueError:
pass
for x in fetched_item.creators:
if x["role"] in ["firstAuthor", "author", "firstComposer"]:
print(x)
try:
wikidata_url = gnd_to_wikidata(x["id"])["wikidata"]
print(wikidata_url)
creator = get_or_create_person_from_wikidata(wikidata_url)
except IndexError:
creator = get_or_create_person_from_gnd(x["id"])
try:
relation_type = PersonWorkRelation.objects.get(id=CREATED_REL[0])
except ObjectDoesNotExist:
relation_type, _ = PersonWorkRelation.objects.get_or_create(
name="hat geschaffen"
)
rel, _ = PersonWork.objects.get_or_create(
related_person=creator,
related_work=entity,
relation_type=relation_type,
start_date_written=start_date_written,
end_date_written=end_date_written,
)
print(rel)
return entity


def get_or_create_work_from_wikidata(uri):
try:
entity = Uri.objects.get(uri=uri).entity
entity = Work.objects.get(id=entity.id)
return entity
except ObjectDoesNotExist:
wd_entity = WikiDataEntity(uri)
if wd_entity.gnd_uri:
try:
entity = Uri.objects.get(uri=wd_entity.gnd_uri).entity
entity = Work.objects.get(id=entity.id)
return entity
except ObjectDoesNotExist:
entity = get_or_create_work_from_gnd(wd_entity.gnd_uri)
Uri.objects.create(
uri=get_normalized_uri(uri),
domain="wikidata",
entity=entity,
)
return entity
else:
apis_entity = wd_entity.get_apis_entity()
entity = Work.objects.create(**apis_entity)
Uri.objects.create(
uri=get_normalized_uri(uri),
domain="wikidata",
entity=entity,
)
return entity


def get_or_create_org_from_wikidata(uri):
try:
entity = Uri.objects.get(uri=uri).entity
Expand Down Expand Up @@ -299,6 +396,13 @@ def import_from_normdata(raw_url, entity_type):
try:
wikidata_url = gnd_to_wikidata(normalized_url)["wikidata"]
except (IndexError, KeyError):
if entity_type == "work":
try:
entity = get_or_create_work_from_gnd(normalized_url)
return entity
except Exception as e:
ic(e)
wikidata_url = False
if entity_type == "place":
try:
entity = get_or_create_place_from_gnd(normalized_url)
Expand Down Expand Up @@ -344,6 +448,8 @@ def import_from_normdata(raw_url, entity_type):
entity = get_or_create_place_from_wikidata(wikidata_url)
elif entity_type == "person":
entity = get_or_create_person_from_wikidata(wikidata_url)
elif entity_type == "work":
entity = get_or_create_work_from_wikidata(wikidata_url)
else:
entity = get_or_create_org_from_wikidata(wikidata_url)
else:
Expand Down
70 changes: 70 additions & 0 deletions notebooks/issue__233_eventtypes.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f18c074c-f0be-4591-a38c-b701afb4129b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6a07e35-4799-47fd-a09f-b0a0c5c61f79",
"metadata": {},
"outputs": [],
"source": [
"data = []\n",
"for x in EventType.objects.all():\n",
" item = {}\n",
" item[\"id\"] = x.id\n",
" item[\"name\"] = x.name\n",
" item[\"used\"] = Event.objects.filter(kind=x).count()\n",
" data.append(item)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "396ebbff-1e4e-462c-a9b5-5002f4f7a53e",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(data)\n",
"df.to_csv(\"event-types.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0aa9d7f-2e09-48e6-8730-6e44e863952c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ django-tables2
django-vite>=3.0.3,<4
djangorestframework
pandas
pylobid
pylobid>=2.3.0,<3
psycopg2-binary==2.9.9
pyocclient==0.6
networkx>=3.2.1,<4
Expand Down
30 changes: 20 additions & 10 deletions templates/partials/head.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
{% block metaDescription %}
{% endblock %}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.1/jquery.min.js" integrity="sha512-v2CJ7UaYy4JwqLDIrZUI/4hqeoQieOmAZNXBeQyjo21dadnwR+8ZaIJVT8EE2iyI61OV8e6M8PP2/4hpQINQ/g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.1/jquery.min.js"
integrity="sha512-v2CJ7UaYy4JwqLDIrZUI/4hqeoQieOmAZNXBeQyjo21dadnwR+8ZaIJVT8EE2iyI61OV8e6M8PP2/4hpQINQ/g=="
crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.css">
<link
rel="stylesheet"
href="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.css"
/>
<link rel="stylesheet" href="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.css" />
<link rel="stylesheet" href="{% static 'css/style.css' %}" />
<link rel="apple-touch-icon" sizes="180x180" href="{% static 'img/apple-touch-icon.png' %}">
<link rel="icon" type="image/png" sizes="32x32" href="{% static 'img/favicon-32x32.png' %}">
Expand All @@ -20,10 +20,20 @@
<meta name="msapplication-config" content="{% static 'img/browserconfig.xml' %}">
<meta name="theme-color" content="#ffffff">
<link rel="stylesheet" href="https://unpkg.com/[email protected]/dist/leaflet.css"
integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY="
crossorigin=""/>
integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY=" crossorigin="" />
<script src="https://unpkg.com/[email protected]/dist/leaflet.js"
integrity="sha256-20nQCchB9co0qIjJZRGuk2/Z9VM+kNiyxNV1lvTlZBo="
crossorigin=""></script>
integrity="sha256-20nQCchB9co0qIjJZRGuk2/Z9VM+kNiyxNV1lvTlZBo=" crossorigin=""></script>
<script type="text/javascript">
var _paq = _paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function () {
var u = "https://matomo.acdh.oeaw.ac.at/";
_paq.push(['setTrackerUrl', u + 'piwik.php']);
_paq.push(['setSiteId', '124']);
var d = document, g = d.createElement('script'), s = d.getElementsByTagName('script')[0];
g.type = 'text/javascript'; g.async = true; g.defer = true; g.src = u + 'piwik.js'; s.parentNode.insertBefore(g, s);
})();</script>
{% block scriptHeader %}
{% endblock scriptHeader %}

0 comments on commit b7c3e46

Please sign in to comment.