Skip to content

Commit

Permalink
merge #19
Browse files Browse the repository at this point in the history
  • Loading branch information
chrabyrd committed Aug 28, 2024
2 parents 38dd2b2 + cd1b7c9 commit 3c039e5
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 125 deletions.
119 changes: 9 additions & 110 deletions arches_lingo/views/trees.py → arches_lingo/concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,16 @@

from django.contrib.postgres.expressions import ArraySubquery
from django.core.cache import caches
from django.core.paginator import Paginator
from django.db.models import (
CharField,
Exists,
FloatField,
F,
OuterRef,
Value,
)
from django.db.models.expressions import CombinedExpression, Func
from django.db.models import CharField, Exists, F, OuterRef, Value
from django.db.models.expressions import CombinedExpression
from django.utils.translation import gettext_lazy as _
from django.utils.decorators import method_decorator
from django.views.generic import View

from arches.app.models.models import (
Relation,
ResourceInstance,
TileModel,
Value as ConceptValue,
)
from arches.app.models.system_settings import settings
from arches.app.utils.decorators import group_required
from arches.app.utils.response import JSONResponse

from arches_lingo.const import (
SCHEMES_GRAPH_ID,
Expand All @@ -44,33 +31,16 @@
PREF_LABEL_VALUE_ID,
ALT_LABEL_VALUE_ID,
)
from arches_lingo.models import VwLabelValue
from arches_lingo.query_utils import JsonbArrayElements

TOP_CONCEPT_OF_LOOKUP = f"data__{TOP_CONCEPT_OF_NODE_AND_NODEGROUP}"
BROADER_LOOKUP = f"data__{CLASSIFICATION_STATUS_ASCRIBED_CLASSIFICATION_NODEID}"

cache = caches["lingo"]


class JsonbArrayElements(Func):
"""https://forum.djangoproject.com/t/django-4-2-behavior-change-when-using-arrayagg-on-unnested-arrayfield-postgresql-specific/21547/5"""

arity = 1
contains_subquery = True
function = "JSONB_ARRAY_ELEMENTS"


class LevenshteinLessEqual(Func):
arity = 3
function = "LEVENSHTEIN_LESS_EQUAL"


@method_decorator(
group_required("RDM Administrator", raise_exception=True), name="dispatch"
)
class ConceptTreeView(View):
class ConceptBuilder:
def __init__(self):
super().__init__()
self.schemes = ResourceInstance.objects.none()

# key=concept valueid (str) val=language code
Expand Down Expand Up @@ -280,11 +250,11 @@ def add_broader_concept_recursive(self, working_parent_list, conceptid):
schemes = sorted(self.schemes_by_top_concept[conceptid])
working_parent_list.insert(0, schemes[0])
return working_parent_list
else:
working_parent_list.insert(0, first_broader_conceptid)
return self.add_broader_concept_recursive(
working_parent_list, first_broader_conceptid
)

working_parent_list.insert(0, first_broader_conceptid)
return self.add_broader_concept_recursive(
working_parent_list, first_broader_conceptid
)

def serialize_concept_label(self, label_tile: dict):
lang_code = self.language_concepts[label_tile[CONCEPT_NAME_LANGUAGE_NODE][0]]
Expand All @@ -298,74 +268,3 @@ def serialize_concept_label(self, label_tile: dict):
"language": lang_code,
"value": value,
}

def get(self, request):
data = {
"schemes": [self.serialize_scheme(scheme) for scheme in self.schemes],
}
# Todo: filter by nodegroup permissions
return JSONResponse(data)


@method_decorator(
group_required("RDM Administrator", raise_exception=True), name="dispatch"
)
class ValueSearchView(ConceptTreeView):
def get(self, request):
term = request.GET.get("term")
max_edit_distance = request.GET.get(
"maxEditDistance", self.default_sensitivity()
)
page_number = request.GET.get("page", 1)
items_per_page = request.GET.get("items", 25)

concept_query = VwLabelValue.objects.all()
if term:
concept_query = (
concept_query.annotate(
edit_distance=LevenshteinLessEqual(
F("value"),
Value(term),
Value(max_edit_distance),
output_field=FloatField(),
)
)
.filter(edit_distance__lte=max_edit_distance)
.order_by("edit_distance")
)
else:
concept_query = concept_query.order_by("concept_id")
concept_query = concept_query.values_list("concept_id", flat=True).distinct()

paginator = Paginator(concept_query, items_per_page)
page = paginator.get_page(page_number)

data = [
self.serialize_concept(str(concept_uuid), parents=True, children=False)
for concept_uuid in page
]

# Todo: filter by nodegroup permissions
return JSONResponse(
{
"current_page": page.number,
"total_pages": paginator.num_pages,
"results_per_page": paginator.per_page,
"total_results": paginator.count,
"data": data,
}
)

@staticmethod
def default_sensitivity():
"""Remains to be seen whether the existing elastic sensitivity setting
should be the fallback, but stub something out for now.
This sensitivity setting is actually inversely related to edit distance,
because it's prefix_length in elastic, not fuzziness, so invert it.
"""
elastic_prefix_length = settings.SEARCH_TERM_SENSITIVITY
if elastic_prefix_length <= 0:
return 5
if elastic_prefix_length >= 5:
return 0
return int(5 - elastic_prefix_length)
2 changes: 1 addition & 1 deletion arches_lingo/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class FuzzyStrMatchExtension(CreateExtension):
def __init__(self):
self.name = "fuzzystrmatch"
super().__init__("fuzzystrmatch")


class Migration(migrations.Migration):
Expand Down
4 changes: 4 additions & 0 deletions arches_lingo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from arches.app.models.models import ResourceInstance

from arches_lingo.querysets import LabelValueQuerySet


class VwLabelValue(models.Model):
concept = models.ForeignKey(
Expand All @@ -12,6 +14,8 @@ class VwLabelValue(models.Model):
)
value = models.CharField(db_column="value")

objects = LabelValueQuerySet.as_manager()

class Meta:
managed = False
db_table = f"arches_lingo__vw_label_values"
14 changes: 14 additions & 0 deletions arches_lingo/query_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from django.db.models.expressions import Func


class JsonbArrayElements(Func):
"""https://forum.djangoproject.com/t/django-4-2-behavior-change-when-using-arrayagg-on-unnested-arrayfield-postgresql-specific/21547/5"""

arity = 1
contains_subquery = True
function = "JSONB_ARRAY_ELEMENTS"


class LevenshteinLessEqual(Func):
arity = 3
function = "LEVENSHTEIN_LESS_EQUAL"
23 changes: 23 additions & 0 deletions arches_lingo/querysets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from django.db import models

from arches_lingo.query_utils import LevenshteinLessEqual


class LabelValueQuerySet(models.QuerySet):

def fuzzy_search(self, term, max_edit_distance):
from arches_lingo.models import VwLabelValue

return (
VwLabelValue.objects.all()
.annotate(
edit_distance=LevenshteinLessEqual(
models.F("value"),
models.Value(term),
models.Value(max_edit_distance),
output_field=models.FloatField(),
)
)
.filter(edit_distance__lte=max_edit_distance)
.order_by("edit_distance")
)
2 changes: 1 addition & 1 deletion arches_lingo/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
"oauth2_provider.middleware.OAuth2TokenMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
# "django.middleware.clickjacking.XFrameOptionsMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"arches.app.utils.middleware.SetAnonymousUser",
# "silk.middleware.SilkyMiddleware",
]
Expand Down
13 changes: 3 additions & 10 deletions arches_lingo/src/arches_lingo/App.vue
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,13 @@ const { $gettext } = useGettext();
router.beforeEach(async (to, _from, next) => {
try {
let userData = user.value;
if (!userData || userData.username === ANONYMOUS) {
userData = await fetchUser();
setUser(userData);
}
let userData = await fetchUser();
setUser(userData);
const requiresAuthentication = to.matched.some(
(record) => record.meta.requiresAuthentication,
);
if (
requiresAuthentication &&
(!userData || userData.username === ANONYMOUS)
) {
if (requiresAuthentication && userData.username === ANONYMOUS) {
throw new Error();
} else {
next();
Expand Down
5 changes: 3 additions & 2 deletions arches_lingo/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
from django.conf.urls.i18n import i18n_patterns
from django.urls import include, path

from arches_lingo.views import LingoRootView, ConceptTreeView, ValueSearchView
from arches_lingo.views.root import LingoRootView
from arches_lingo.views.api.concepts import ConceptTreeView, ValueSearchView

urlpatterns = [
path("", LingoRootView.as_view(), name="root"),
path("login", LingoRootView.as_view(), name="login"),
path("search", LingoRootView.as_view(), name="search"),
path("advanced-search", LingoRootView.as_view(), name="advanced-search"),
path("schemes", LingoRootView.as_view(), name="schemes"),
path("api/concept_trees", ConceptTreeView.as_view(), name="concept_trees"),
path("api/concepts", ConceptTreeView.as_view(), name="api_concepts"),
path("api/search", ValueSearchView.as_view(), name="api_search"),
path("", include("arches_references.urls")),
]
Expand Down
70 changes: 70 additions & 0 deletions arches_lingo/views/api/concepts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from django.core.paginator import Paginator
from django.utils.decorators import method_decorator
from django.views.generic import View

from arches.app.models.system_settings import settings
from arches.app.utils.decorators import group_required
from arches.app.utils.response import JSONResponse

from arches_lingo.models import VwLabelValue
from arches_lingo.concepts import ConceptBuilder


@method_decorator(
group_required("RDM Administrator", raise_exception=True), name="dispatch"
)
class ConceptTreeView(View):
def get(self, request):
builder = ConceptBuilder()
data = {
"schemes": [builder.serialize_scheme(scheme) for scheme in builder.schemes]
}
return JSONResponse(data)


@method_decorator(
group_required("RDM Administrator", raise_exception=True), name="dispatch"
)
class ValueSearchView(ConceptTreeView):
def get(self, request):
term = request.GET.get("term")
max_edit_distance = int(
request.GET.get("maxEditDistance", self.default_sensitivity())
)
page_number = request.GET.get("page", 1)
items_per_page = request.GET.get("items", 25)

if term:
concept_query = VwLabelValue.objects.fuzzy_search(term, max_edit_distance)
else:
concept_query = VwLabelValue.objects.all().order_by("concept_id")
concept_query = concept_query.values_list("concept_id", flat=True).distinct()

paginator = Paginator(concept_query, items_per_page)
page = paginator.get_page(page_number)

data = []
if page:
builder = ConceptBuilder()
data = [
builder.serialize_concept(
str(concept_uuid), parents=True, children=False
)
for concept_uuid in page
]

return JSONResponse(data)

@staticmethod
def default_sensitivity():
"""Remains to be seen whether the existing elastic sensitivity setting
should be the fallback, but stub something out for now.
This sensitivity setting is actually inversely related to edit distance,
because it's prefix_length in elastic, not fuzziness, so invert it.
"""
elastic_prefix_length = settings.SEARCH_TERM_SENSITIVITY
if elastic_prefix_length <= 0:
return 5
if elastic_prefix_length >= 5:
return 0
return 5 - elastic_prefix_length
2 changes: 1 addition & 1 deletion tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_get_concept_trees(self):
# 4: select broader tiles, subquery for labels
# 5: select top concept tiles, subquery for labels
# 6: select schemes, subquery for labels
response = self.client.get(reverse("concept_trees"))
response = self.client.get(reverse("api_concepts"))

self.assertEqual(response.status_code, 200)
result = json.loads(response.content)
Expand Down

0 comments on commit 3c039e5

Please sign in to comment.