Skip to content

Commit

Permalink
Merge pull request #85 from arthur-schnitzler/main
Browse files Browse the repository at this point in the history
updates
  • Loading branch information
csae8092 authored Jun 4, 2024
2 parents 4d06e7e + be58446 commit 510b25d
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 1 deletion.
8 changes: 7 additions & 1 deletion dumper/management/commands/wikidata_minter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ class Command(BaseCommand):
help = "mint WikiData IDs for GND-URIs"

def handle(self, *args, **kwargs):
LIMIT = 100
LIMIT = 2
USER_AGENT_PMB = "pmb (https://pmb.acdh.oeaw.ac.at)"
col, _ = Collection.objects.get_or_create(name="No WikiData-ID found")
ents = TempEntityClass.objects.filter(uri__uri__icontains="wikidata").filter(collection=col)
print(f"found {ents.count()} entities with wikidata-ids but related to {col}")
if ents:
print(f"remove relation to {col}")
for x in tqdm(ents, total=ents.count()):
x.collection.remove(col)
types = ["d-nb.info", "geonames"]
for uri_type in types:
print(f"processing URIS with type: {uri_type}")
Expand Down
127 changes: 127 additions & 0 deletions issue__195_deletenotusedprofessions.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1e1c119a",
"metadata": {},
"outputs": [],
"source": [
"# 2024-05-28 run against production\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d95d08e9",
"metadata": {},
"outputs": [],
"source": [
"ProfessionType.objects.all().count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5543f8e0",
"metadata": {},
"outputs": [],
"source": [
"ProfessionType.objects.filter(person=None).count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b071e98",
"metadata": {},
"outputs": [],
"source": [
"data = []\n",
"for x in ProfessionType.objects.filter(person=None).distinct():\n",
" data.append([x.id, x.name])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30beda86",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4fcb8186",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(data, columns=[\"id\", \"name\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d7acc58e",
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"hansi.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18d88961",
"metadata": {},
"outputs": [],
"source": [
"for x in ProfessionType.objects.filter(person=None).distinct():\n",
" x.delete()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6bac8d6e",
"metadata": {},
"outputs": [],
"source": [
"ProfessionType.objects.all().count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17dc6357",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 510b25d

Please sign in to comment.