Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

firebreak: Remove ML dependencies and use related barriers api #838

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ __timestamp = $(shell date +%F_%H-%M)
pip-install: ## Install pip requirements inside the container.
@echo "$$(tput setaf 3)🙈 Installing Pip Packages 🙈$$(tput sgr 0)"
@docker-compose exec web poetry lock
@docker-compose exec web poetry export --without-hashes -f requirements.txt -o requirements-app.txt
@docker-compose exec web poetry export --without-hashes -f requirements.txt -o requirements.txt
@docker-compose exec web poetry export --with dev --without-hashes -f requirements.txt -o requirements-dev.txt
@docker-compose exec web pip install -r requirements-dev.txt
@docker-compose exec web sed -i '1i# ======\n# DO NOT EDIT - use pyproject.toml instead!\n# Generated: $(__timestamp)\n# ======' requirements.txt
Expand Down
18 changes: 7 additions & 11 deletions api/barriers/signals/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
send_top_priority_notification,
)
from api.metadata.constants import TOP_PRIORITY_BARRIER_STATUS
from api.related_barriers import manager
from api.related_barriers.constants import BarrierEntry
from api.related_barriers.manager import BARRIER_UPDATE_FIELDS
from api.related_barriers import client

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -204,21 +202,19 @@ def related_barrier_update_embeddings(sender, instance, *args, **kwargs):

changed = any(
getattr(current_barrier_object, field) != getattr(instance, field)
for field in BARRIER_UPDATE_FIELDS
for field in ["title", "summary"]
)
logger.info(
f"(Handler) Updating related barrier embeddings for {instance.pk}: {changed}"
)

if changed and not current_barrier_object.draft:
if not manager.manager:
manager.init()
try:
manager.manager.update_barrier(
BarrierEntry(
id=str(current_barrier_object.id),
barrier_corpus=manager.barrier_to_corpus(current_barrier_object),
)
# Fail gracefully
client.get_related_barriers(
pk=str(current_barrier_object.id),
title=instance.title,
summary=instance.summary,
)
except Exception as e:
# We don't want barrier embedding updates to break worker so just log error
Expand Down
43 changes: 43 additions & 0 deletions api/related_barriers/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os

import requests


def seed_data():
from django.db.models import CharField
from django.db.models import Value as V
from django.db.models.functions import Concat

from api.barriers.models import Barrier

return (
Barrier.objects.filter(archived=False)
.exclude(draft=True)
.annotate(
barrier_corpus=Concat("title", V(". "), "summary", output_field=CharField())
)
.values("id", "barrier_corpus")
)


def seed():
data = seed_data()

requests.post(
f"{os.environ['RELATED_BARRIERS_BASE_URL']}/seed",
json={
"data": [
{"barrier_id": str(d["id"]), "corpus": d["barrier_corpus"]}
for d in data
]
},
)


def get_related_barriers(pk, title, summary):
res = requests.post(
f"{os.environ['RELATED_BARRIERS_BASE_URL']}/related-barriers",
json={"barrier_id": f"{pk}", "corpus": f"{title}. {summary}"},
)
results = res.json()["results"]
return [b["barrier_id"] for b in results]
9 changes: 0 additions & 9 deletions api/related_barriers/constants.py

This file was deleted.

236 changes: 0 additions & 236 deletions api/related_barriers/manager.py

This file was deleted.

21 changes: 5 additions & 16 deletions api/related_barriers/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@
from rest_framework.response import Response

from api.barriers.models import Barrier
from api.related_barriers import manager
from api.related_barriers.constants import (
SIMILAR_BARRIERS_LIMIT,
SIMILARITY_THRESHOLD,
BarrierEntry,
)
from api.related_barriers import client
from api.related_barriers.serializers import BarrierRelatedListSerializer

logger = logging.getLogger(__name__)
Expand All @@ -24,20 +19,14 @@ def related_barriers(request, pk) -> Response:
logger.info(f"Getting related barriers for {pk}")
barrier = get_object_or_404(Barrier, pk=pk)

if manager.manager is None:
manager.init()
# client.seed()

similar_barrier_ids = manager.manager.get_similar_barriers(
barrier=BarrierEntry(
id=str(barrier.id),
barrier_corpus=manager.barrier_to_corpus(barrier),
),
similarity_threshold=SIMILARITY_THRESHOLD,
quantity=SIMILAR_BARRIERS_LIMIT,
barrier_ids = client.get_related_barriers(
pk=str(barrier.pk), title=barrier.title, summary=barrier.summary
)

return Response(
BarrierRelatedListSerializer(
Barrier.objects.filter(id__in=similar_barrier_ids), many=True
Barrier.objects.filter(id__in=barrier_ids), many=True
).data
)
4 changes: 4 additions & 0 deletions docker-compose.local-template.env
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ FAKE_METADATA=False # For local docker testing, set to True
# It's used to generate links for the given environment.
DMAS_BASE_URL=http://localhost:9880

# Related Barriers Service
# ========================
RELATED_BARRIERS_BASE_URL=not-set

# Django
# ========================
DEBUG=True
Expand Down
Loading