Skip to content

Commit

Permalink
Merge pull request #79 from arthur-schnitzler/main
Browse files Browse the repository at this point in the history
updates
  • Loading branch information
csae8092 authored Apr 26, 2024
2 parents 84d58b8 + 3c6cbc5 commit df79426
Show file tree
Hide file tree
Showing 22 changed files with 362 additions and 9 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,6 @@ hansi.*
media/relations.gexf
edges.csv
nodes.csv
node_modules/
node_modules/
featureCodes_en.csv
featureCodes_en.tsv
11 changes: 9 additions & 2 deletions apis_core/apis_entities/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,26 +140,33 @@ def test_009_merge_view(self):
after = Person.objects.all().count()
self.assertTrue(before > after)

def test_009a_merge_notesandreferences(self):
def test_009a_merge_notesandreferences_andgedner(self):
source_one = Person.objects.create(
name="Person which will be merged",
notes="notes_one",
references="references_one",
gender="female",
)
source_two = Person.objects.create(
name="Person two which will be merged",
name="Person two which will be merged", gender="male"
)
target = Person.objects.create(
name="Person which will be kept",
notes="target_notes",
references="target_references",
)
target.merge_with(source_one.id)
self.assertEqual("female", target.gender)
self.assertTrue("notes_one" in target.notes)
self.assertTrue("target_notes" in target.notes)
self.assertTrue("references_one" in target.references)
self.assertTrue("target_references" in target.references)
target.merge_with(source_two)
self.assertEqual("female", target.gender)

place_target = Place.objects.create(name="Sumsi")
place_source = Place.objects.create(name="Dumsi")
place_target.merge_with(place_source.id)

def test_010_delete_views(self):
client.login(**USER)
Expand Down
22 changes: 17 additions & 5 deletions apis_core/apis_metainfo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,14 +183,14 @@ def clean_start_date_written(self):
if self.start_date_written:
if "<" in self.start_date_written:
clean_date = self.start_date_written.split("<")[0]
return clean_date
return clean_date.strip()

def clean_end_date_written(self):
clean_date = self.end_date_written
if self.end_date_written:
if "<" in self.end_date_written:
clean_date = self.end_date_written.split("<")[0]
return clean_date
return clean_date.strip()

@classmethod
def get_listview_url(self):
Expand Down Expand Up @@ -298,17 +298,23 @@ def merge_with(self, entities):
if not isinstance(entities, list) and not isinstance(entities, QuerySet):
entities = [entities]
entities = [
self_model_class.objects.get(pk=ent)
if type(ent) == int
else ent # noqa: E721
self_model_class.objects.get(pk=ent) if type(ent) == int else ent # noqa: E721
for ent in entities
]
rels = ContentType.objects.filter(
app_label="apis_relations", model__icontains=e_a
)
try:
self_gender = self.gender
except AttributeError:
self_gender = False
notes = []
references = []
for ent in entities:
try:
ent_gender = ent.gender
except AttributeError:
ent_gender = False
if isinstance(ent.notes, str):
notes.append(ent.notes)
if isinstance(ent.references, str):
Expand Down Expand Up @@ -357,6 +363,12 @@ def merge_with(self, entities):
setattr(t, "related_{}".format(e_a.lower()), self)
t.save()
ent.delete()
if self_gender:
pass
elif ent_gender:
self.gender = ent_gender
else:
pass
save_target = False
if len(notes) > 0:
additional_notes = " ".join(notes)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"metadata": {},
"outputs": [],
"source": [
"# rerun 2024-04-12\n",
"# rerun 2024-04-09\n",
"# run 2024-04-05\n",
"from tqdm import tqdm\n",
Expand Down Expand Up @@ -39,6 +40,14 @@
" except Exception as e:\n",
" print(keep.id, remove.id, e)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9f368fb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
85 changes: 85 additions & 0 deletions notebooks/issue__171_gnfeature_codes.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "51cca613",
"metadata": {},
"outputs": [],
"source": [
"# run against production 2024-04-18\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"from django.core.exceptions import MultipleObjectsReturned"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "733e8089",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_table(\"./featureCodes_en.tsv\", sep=\"\\t\", header=None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b41f79d4",
"metadata": {},
"outputs": [],
"source": [
"print(PlaceType.objects.all().count())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f380fb44",
"metadata": {},
"outputs": [],
"source": [
"for i, row in tqdm(df.iterrows()):\n",
" exists = PlaceType.objects.filter(name=row[0])\n",
" if exists:\n",
" pass\n",
" else:\n",
" try:\n",
" new_place = PlaceType.objects.create(name=row[0], description=row[1])\n",
" except:\n",
" pass\n",
"print(PlaceType.objects.all().count()) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb3f96c3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
130 changes: 130 additions & 0 deletions notebooks/issue__180_merge_place_types.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f47bcd68",
"metadata": {},
"outputs": [],
"source": [
"# run against production 2024-04-18\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f820cd7f",
"metadata": {},
"outputs": [],
"source": [
"props = [\"id\", \"name\"] "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe06fca6",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(\n",
" PlaceType.objects.all().values_list(*props),\n",
" columns=props,\n",
").astype(\"str\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6c74cfd",
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c0cca8a",
"metadata": {},
"outputs": [],
"source": [
"to_process = []\n",
"for g, ndf in df.groupby(\"name\"):\n",
" if len(ndf) > 1:\n",
" to_process.append([int(x) for x in ndf['id'].tolist()])\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a73dbb6",
"metadata": {},
"outputs": [],
"source": [
"len(to_process)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd48e0e6",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Nr. of place types before: {PlaceType.objects.all().count()}\")\n",
"for x in to_process:\n",
" good_type = PlaceType.objects.get(id=x[0])\n",
" bad_type = PlaceType.objects.get(id=x[1])\n",
" bad_type_base = VocabsBaseClass.objects.get(id=x[1])\n",
" for place in Place.objects.filter(kind=bad_type):\n",
" place.kind = good_type\n",
" place.save()\n",
" bad_type_base.delete()\n",
"print(f\"Nr. of place types after: {PlaceType.objects.all().count()}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd2993ae",
"metadata": {},
"outputs": [],
"source": [
"PlaceType.objects.filter(name=\"A.ADM1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "313cea39",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit df79426

Please sign in to comment.