Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add concept to reference dt node migration #24 #28

Merged
merged 13 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 152 additions & 3 deletions arches_references/management/commands/controlled_lists.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
from arches.app.models.models import Value, Language
from arches.app.models.fields.i18n import I18n_JSONField
from arches.app.models.models import (
CardXNodeXWidget,
GraphModel,
Language,
Node,
Value,
Widget,
)
from arches.app.models.graph import Graph
from arches_references.models import List
from django.core.exceptions import ValidationError
from django.core.management.base import BaseCommand, CommandError
from django.db import models, transaction
from django.db.models.expressions import CombinedExpression
from django.db.models.fields.json import KT
from django.db.models.functions import Cast


class Command(BaseCommand):
Expand All @@ -16,7 +30,10 @@ def add_arguments(self, parser):
action="store",
dest="operation",
required=True,
choices=["migrate_collections_to_controlled_lists"],
choices=[
"migrate_collections_to_controlled_lists",
"migrate_concept_nodes_to_reference_datatype",
],
help="The operation to perform",
)

Expand All @@ -26,7 +43,6 @@ def add_arguments(self, parser):
action="store",
dest="collections_to_migrate",
nargs="*",
required=True,
johnatawnclementawn marked this conversation as resolved.
Show resolved Hide resolved
help="One or more collections to migrate to controlled lists",
)

Expand Down Expand Up @@ -57,6 +73,14 @@ def add_arguments(self, parser):
help="The language to use for sorting preferred labels. Default 'en'",
)

parser.add_argument(
"-g",
"--graph",
action="store",
dest="graph",
help="The graphid which associated concept nodes will be migrated to use the reference datatype",
)

def handle(self, *args, **options):
if options["operation"] == "migrate_collections_to_controlled_lists":
psl = options["preferred_sort_language"]
Expand All @@ -69,6 +93,9 @@ def handle(self, *args, **options):
)
)

if options["collections_to_migrate"] is None:
raise CommandError("No collections provided to migrate.")

if not options["overwrite"]:
for collection_name in options["collections_to_migrate"]:
if List.objects.filter(name=collection_name).exists():
Expand All @@ -82,6 +109,8 @@ def handle(self, *args, **options):
overwrite=options["overwrite"],
preferred_sort_language=psl,
)
elif options["operation"] == "migrate_concept_nodes_to_reference_datatype":
self.migrate_concept_nodes_to_reference_datatype(options["graph"])

def migrate_collections_to_controlled_lists(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of scope for this PR, but I just ran into a case sensitivity thing, so it might have been nicer to do a looser match, e.g. value__lower__in=collections_to_migrate and then on the python side lower() everything in collections_to_migrate.

self,
Expand Down Expand Up @@ -139,3 +168,123 @@ def migrate_collections_to_controlled_lists(
)
result = cursor.fetchone()
self.stdout.write(result[0])

def migrate_concept_nodes_to_reference_datatype(self, graph_id):
try:
source_graph = GraphModel.objects.get(pk=graph_id)
except (GraphModel.DoesNotExist, ValidationError) as e:
raise CommandError(e)

nodes = (
Node.objects.filter(
graph_id=source_graph.graphid,
datatype__in=["concept", "concept-list"],
is_immutable=False,
)
.annotate(
collection_id=Cast(
KT("config__rdmCollection"),
output_field=models.UUIDField(),
)
)
.prefetch_related("cardxnodexwidget_set")
)

if len(nodes) == 0:
raise CommandError(
"No concept/concept-list nodes found for the {0} graph".format(
source_graph.name
)
)

REFERENCE_SELECT_WIDGET = Widget.objects.get(name="reference-select-widget")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if we should make this more resilient against widget renames. Should we use id? Or just filter on datatype and use the hardcoded one if present else an aribtrary one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, but I think for now it's okay. Currently the reference select widget handles both single and multi-select. The forecasts for folding other core datatypes into Arches-References are far away - I say we cross this bridge when we get there.

controlled_list_ids = List.objects.all().values_list("id", flat=True)

errors = []
with transaction.atomic():
for node in nodes:
if node.collection_id in controlled_list_ids:
if node.datatype == "concept":
node.config = {
"multiValue": False,
"controlledList": str(node.collection_id),
}
elif node.datatype == "concept-list":
node.config = {
"multiValue": True,
"controlledList": str(node.collection_id),
}
node.datatype = "reference"
node.full_clean()
node.save()

cross_records = (
node.cardxnodexwidget_set.annotate(
config_without_i18n=Cast(
models.F("config"),
output_field=models.JSONField(),
)
)
.annotate(
without_default=CombinedExpression(
models.F("config_without_i18n"),
"-",
models.Value(
"defaultValue", output_field=models.CharField()
),
output_field=models.JSONField(),
)
)
.annotate(
without_default_and_options=CombinedExpression(
models.F("without_default"),
"-",
models.Value(
"options", output_field=models.CharField()
),
output_field=I18n_JSONField(),
)
)
)
for cross_record in cross_records:
# work around for i18n as_sql method issue detailed here: https://github.com/archesproject/arches/issues/11473
cross_record.config = {}
cross_record.save()
johnatawnclementawn marked this conversation as resolved.
Show resolved Hide resolved

cross_record.config = cross_record.without_default_and_options
cross_record.widget = REFERENCE_SELECT_WIDGET
cross_record.full_clean()
cross_record.save()

elif node.collection_id not in controlled_list_ids:
errors.append(
{"node_alias": node.alias, "collection_id": node.collection_id}
)

if errors:
self.stderr.write(
"The following collections for the associated nodes have not been migrated to controlled lists:"
)
for error in errors:
self.stderr.write(
"Node alias: {0}, Collection ID: {1}".format(
error["node_alias"], error["collection_id"]
)
)
else:
source_graph = Graph.objects.get(pk=graph_id)

# Refresh the nodes to ensure the changes are reflected in the serialized graph
for node in source_graph.nodes.values():
node.refresh_from_db()

source_graph.create_editable_future_graph()
source_graph.publish(
notes="Migrated concept/concept-list nodes to reference datatype"
)

self.stdout.write(
"All concept/concept-list nodes for the {0} graph have been successfully migrated to reference datatype".format(
source_graph.name
)
)
80 changes: 79 additions & 1 deletion tests/cli_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from django.test.utils import captured_stdout
from django.core.management.base import CommandError

from arches_references.models import List, ListItem, ListItemValue
from arches.app.models.models import Node
from arches.app.utils.skos import SKOSReader
from arches_references.models import List, ListItem, ListItemValue

from .test_settings import PROJECT_TEST_ROOT

Expand Down Expand Up @@ -155,3 +156,80 @@ def test_no_matching_language_error(self):
stderr=output,
)
self.assertEqual(expected_output, str(e.exception))


class MigrateConceptNodesToReferenceDatatypeTests(TestCase):
# Test data has three models:
# - `Concept Node Migration Test`, with four concept nodes
# - `Collection Not Migrated`, with one concept node but the collection hasn't been migrated
# - `No concept nodes`, only has a string and a number node
# Contains a Collection "Top Concept", which has been migrated to a controlled list
fixtures = ["concept_node_migration_test_data"]

def test_migrate_concept_nodes_to_reference_datatype(self):
output = io.StringIO()
TEST_GRAPH_ID = "8f7cfa3c-d0e0-4a66-8608-43dd726a1b81"

management.call_command(
"controlled_lists",
operation="migrate_concept_nodes_to_reference_datatype",
graph=TEST_GRAPH_ID,
stdout=output,
)

nodes = Node.objects.filter(graph_id=TEST_GRAPH_ID).prefetch_related(
"cardxnodexwidget_set"
)
reference_nodes = nodes.filter(datatype="reference")

self.assertEqual(len(nodes.filter(datatype__in=["concept", "concept-list"])), 0)
self.assertEqual(len(reference_nodes), 4)

expected_node_config_keys = ["multiValue", "controlledList"]
expected_widget_config_keys = ["label", "placeholder", "i18n_properties"]
for node in reference_nodes:
self.assertEqual(expected_node_config_keys, list(node.config.keys()))
for widget in node.cardxnodexwidget_set.all():
self.assertEqual(
expected_widget_config_keys, list(widget.config.keys())
)

def test_no_matching_graph_error(self):
output = io.StringIO()
expected_output = "GraphModel matching query does not exist."

with self.assertRaises(CommandError) as e:
management.call_command(
"controlled_lists",
operation="migrate_concept_nodes_to_reference_datatype",
graph="00000000-0000-0000-0000-000000000000",
stderr=output,
)
self.assertEqual(str(e.exception), expected_output)

def test_no_concept_nodes_error(self):
output = io.StringIO()
expected_output = (
"No concept/concept-list nodes found for the No concept nodes graph"
)

with self.assertRaises(CommandError) as e:
management.call_command(
"controlled_lists",
operation="migrate_concept_nodes_to_reference_datatype",
graph="fc46b399-c824-45e5-86e2-5b992b8fa619",
stderr=output,
)
self.assertEqual(str(e.exception), expected_output)

def test_collections_not_migrated_error(self):
output = io.StringIO()
expected_output = "The following collections for the associated nodes have not been migrated to controlled lists:\nNode alias: concept_not_migrated, Collection ID: 00000000-0000-0000-0000-000000000005"

management.call_command(
"controlled_lists",
operation="migrate_concept_nodes_to_reference_datatype",
graph="b974103f-73bb-4f2a-bffb-8303227ba0da",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a follow-up ticket we could accept slugs, but not now. 👍

stderr=output,
)
self.assertEqual(output.getvalue().strip(), expected_output)
Loading