diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3a60d0041771..3f9abcc671fb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,12 +2,12 @@ # Ensure that the team responsible for upgrades sees any PRs that would # add GitHub-hosted dependencies to that platform. -requirements/edx/github.in @openedx/arbi-bom +requirements/edx/github.in @openedx/2u-arbi-bom # Core common/djangoapps/student/ -common/djangoapps/student/models/__init__.py @openedx/teaching-and-learning -common/djangoapps/student/models/course_enrollment.py @openedx/teaching-and-learning +common/djangoapps/student/models/__init__.py @openedx/2u-tnl +common/djangoapps/student/models/course_enrollment.py @openedx/2u-tnl common/djangoapps/third_party_auth/ lms/djangoapps/course_api/blocks lms/djangoapps/courseware/ @@ -47,8 +47,8 @@ openedx/features/discounts/ # Ping Axim On-call if someone uses the QuickStart # https://docs.openedx.org/en/latest/developers/quickstarts/first_openedx_pr.html -lms/templates/dashboard.html @openedx/axim-oncall +lms/templates/dashboard.html @openedx/axim-oncall # Ensure minimal.yml stays minimal, this could be a team in the future # but it's just me for now, others can sign up if they care as well. -lms/envs/minimal.yml @feanil +lms/envs/minimal.yml @feanil diff --git a/Makefile b/Makefile index 0bb53d1119f4..6fc019290088 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,7 @@ shell: ## launch a bash shell in a Docker container with all edx-platform depend REQ_FILES = \ requirements/edx/coverage \ requirements/edx/paver \ - requirements/edx-sandbox/py38 \ + requirements/edx-sandbox/base \ requirements/edx/base \ requirements/edx/doc \ requirements/edx/testing \ @@ -130,7 +130,7 @@ endef COMMON_CONSTRAINTS_TXT=requirements/common_constraints.txt .PHONY: $(COMMON_CONSTRAINTS_TXT) $(COMMON_CONSTRAINTS_TXT): - wget -O "$(@)" https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt + curl -L https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt > "$(@)" printf "$(COMMON_CONSTRAINTS_TEMP_COMMENT)" | cat - $(@) > temp && mv temp $(@) compile-requirements: export CUSTOM_COMPILE_COMMAND=make upgrade diff --git a/docs/hooks/events.rst b/docs/hooks/events.rst index 461f484a58d6..7f9584c9e8e1 100644 --- a/docs/hooks/events.rst +++ b/docs/hooks/events.rst @@ -244,3 +244,6 @@ Content Authoring Events - org.openedx.content_authoring.content_library.deleted.v1 - 2023-07-20 + * - `CONTENT_OBJECT_TAGS_CHANGED `_ + - org.openedx.content_authoring.content.object.tags.changed.v1 + - 2024-03-31 diff --git a/lms/djangoapps/certificates/apis/v0/tests/test_views.py b/lms/djangoapps/certificates/apis/v0/tests/test_views.py index fd31e3456961..efff97f54d5a 100644 --- a/lms/djangoapps/certificates/apis/v0/tests/test_views.py +++ b/lms/djangoapps/certificates/apis/v0/tests/test_views.py @@ -309,7 +309,7 @@ def test_no_certificate(self): def test_query_counts(self): # Test student with no certificates student_no_cert = UserFactory.create(password=self.user_password) - with self.assertNumQueries(21, table_ignorelist=WAFFLE_TABLES): + with self.assertNumQueries(20, table_ignorelist=WAFFLE_TABLES): resp = self.get_response( AuthType.jwt, requesting_user=self.global_staff, diff --git a/lms/djangoapps/course_api/tests/test_views.py b/lms/djangoapps/course_api/tests/test_views.py index 4065c54a983b..aab9769661a2 100644 --- a/lms/djangoapps/course_api/tests/test_views.py +++ b/lms/djangoapps/course_api/tests/test_views.py @@ -434,7 +434,7 @@ def test_too_many_courses(self): self.setup_user(self.audit_user) # These query counts were found empirically - query_counts = [53, 46, 46, 46, 46, 46, 46, 46, 46, 46, 16] + query_counts = [52, 46, 46, 46, 46, 46, 46, 46, 46, 46, 16] ordered_course_ids = sorted([str(cid) for cid in (course_ids + [c.id for c in self.courses])]) self.clear_caches() diff --git a/lms/djangoapps/courseware/tests/test_views.py b/lms/djangoapps/courseware/tests/test_views.py index 1d6c358ffd19..ef3d4741769e 100644 --- a/lms/djangoapps/courseware/tests/test_views.py +++ b/lms/djangoapps/courseware/tests/test_views.py @@ -365,7 +365,7 @@ def test_index_query_counts(self): self.client.login(username=self.user.username, password=self.user_password) CourseEnrollment.enroll(self.user, course.id) - with self.assertNumQueries(178, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): + with self.assertNumQueries(177, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): with check_mongo_calls(3): url = reverse( 'courseware_section', @@ -1496,8 +1496,8 @@ def test_view_certificate_link(self): self.assertContains(resp, "earned a certificate for this course.") @ddt.data( - (True, 54), - (False, 54), + (True, 53), + (False, 53), ) @ddt.unpack def test_progress_queries_paced_courses(self, self_paced, query_count): @@ -1512,7 +1512,7 @@ def test_progress_queries(self): ContentTypeGatingConfig.objects.create(enabled=True, enabled_as_of=datetime(2018, 1, 1)) self.setup_course() with self.assertNumQueries( - 54, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST + 53, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST ), check_mongo_calls(2): self._get_progress_page() diff --git a/lms/djangoapps/discussion/django_comment_client/base/tests.py b/lms/djangoapps/discussion/django_comment_client/base/tests.py index 7eb99020d608..6aa9c3e8e9fd 100644 --- a/lms/djangoapps/discussion/django_comment_client/base/tests.py +++ b/lms/djangoapps/discussion/django_comment_client/base/tests.py @@ -410,7 +410,7 @@ def inner(self, default_store, block_count, mongo_calls, sql_queries, *args, **k return inner @ddt.data( - (ModuleStoreEnum.Type.split, 3, 8, 43), + (ModuleStoreEnum.Type.split, 3, 8, 42), ) @ddt.unpack @count_queries @@ -418,7 +418,7 @@ def test_create_thread(self, mock_request): self.create_thread_helper(mock_request) @ddt.data( - (ModuleStoreEnum.Type.split, 3, 6, 42), + (ModuleStoreEnum.Type.split, 3, 6, 41), ) @ddt.unpack @count_queries diff --git a/lms/djangoapps/instructor/tests/test_api.py b/lms/djangoapps/instructor/tests/test_api.py index 00b0fc56b05f..0cb7e009454e 100644 --- a/lms/djangoapps/instructor/tests/test_api.py +++ b/lms/djangoapps/instructor/tests/test_api.py @@ -713,7 +713,7 @@ def test_bad_file_upload_type(self): """ Try uploading some non-CSV file and verify that it is rejected """ - uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \x00\x01").read()) + uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \xC3\x01").read()) response = self.client.post(self.url, {'students_list': uploaded_file}) assert response.status_code == 200 data = json.loads(response.content.decode('utf-8')) @@ -920,15 +920,16 @@ def test_users_created_and_enrolled_successfully_if_others_fail(self): manual_enrollments = ManualEnrollmentAudit.objects.all() assert manual_enrollments.count() == 2 - @patch('lms.djangoapps.instructor.views.api', 'generate_random_string', - Mock(side_effect=['first', 'first', 'second'])) def test_generate_unique_password_no_reuse(self): """ generate_unique_password should generate a unique password string that hasn't been generated before. """ - generated_password = ['first'] - password = generate_unique_password(generated_password, 12) - assert password != 'first' + with patch('lms.djangoapps.instructor.views.api.generate_random_string') as mock: + mock.side_effect = ['first', 'first', 'second'] + + generated_password = ['first'] + password = generate_unique_password(generated_password, 12) + assert password != 'first' @patch.dict(settings.FEATURES, {'ALLOW_AUTOMATED_SIGNUPS': False}) def test_allow_automated_signups_flag_not_set(self): diff --git a/lms/djangoapps/instructor/tests/test_certificates.py b/lms/djangoapps/instructor/tests/test_certificates.py index 9921ae6d6831..ce3433f312d8 100644 --- a/lms/djangoapps/instructor/tests/test_certificates.py +++ b/lms/djangoapps/instructor/tests/test_certificates.py @@ -888,7 +888,7 @@ def test_bad_file_upload_type(self): """ Try uploading CSV file with invalid binary data and verify that it is rejected """ - uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \x00\x01").read()) + uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \xC3\x01").read()) response = self.client.post(self.url, {'students_list': uploaded_file}) assert response.status_code == 200 data = json.loads(response.content.decode('utf-8')) diff --git a/lms/envs/common.py b/lms/envs/common.py index 30ba71f62aad..4fd9ba08ada5 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2286,8 +2286,6 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring 'openedx.core.djangoapps.cors_csrf.middleware.CorsCSRFMiddleware', 'openedx.core.djangoapps.cors_csrf.middleware.CsrfCrossDomainCookieMiddleware', - 'splash.middleware.SplashMiddleware', - 'openedx.core.djangoapps.geoinfo.middleware.CountryMiddleware', 'openedx.core.djangoapps.embargo.middleware.EmbargoMiddleware', @@ -3162,9 +3160,6 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring # Notes 'lms.djangoapps.edxnotes', - # Splash screen - 'splash', - # User API 'rest_framework', diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py new file mode 100644 index 000000000000..24d69dfd9fcf --- /dev/null +++ b/openedx/core/djangoapps/content/search/api.py @@ -0,0 +1,520 @@ +""" +Content index and search API using Meilisearch +""" +from __future__ import annotations + +import logging +import time +from contextlib import contextmanager +from datetime import datetime, timedelta, timezone +from functools import wraps +from typing import Callable, Generator + +from django.conf import settings +from django.contrib.auth import get_user_model +from django.core.cache import cache +from meilisearch import Client as MeilisearchClient +from meilisearch.errors import MeilisearchError +from meilisearch.models.task import TaskInfo +from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.locator import LibraryLocatorV2 +from common.djangoapps.student.roles import GlobalStaff +from rest_framework.request import Request +from common.djangoapps.student.role_helpers import get_course_roles +from openedx.core.djangoapps.content.search.models import get_access_ids_for_request + +from openedx.core.djangoapps.content_libraries import api as lib_api +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import modulestore + +from .documents import ( + Fields, + meili_id_from_opaque_key, + searchable_doc_for_course_block, + searchable_doc_for_library_block, + searchable_doc_tags +) + +log = logging.getLogger(__name__) + +User = get_user_model() + +STUDIO_INDEX_SUFFIX = "studio_content" + +if hasattr(settings, "MEILISEARCH_INDEX_PREFIX"): + STUDIO_INDEX_NAME = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_SUFFIX +else: + STUDIO_INDEX_NAME = STUDIO_INDEX_SUFFIX + + +_MEILI_CLIENT = None +_MEILI_API_KEY_UID = None + +LOCK_EXPIRE = 24 * 60 * 60 # Lock expires in 24 hours + +MAX_ACCESS_IDS_IN_FILTER = 1_000 +MAX_ORGS_IN_FILTER = 1_000 + + +@contextmanager +def _index_rebuild_lock() -> Generator[str, None, None]: + """ + Lock to prevent that more than one rebuild is running at the same time + """ + lock_id = f"lock-meilisearch-index-{STUDIO_INDEX_NAME}" + new_index_name = STUDIO_INDEX_NAME + "_new" + + status = cache.add(lock_id, new_index_name, LOCK_EXPIRE) + + if not status: + # Lock already acquired + raise RuntimeError("Rebuild already in progress") + + # Lock acquired + try: + yield new_index_name + finally: + # Release the lock + cache.delete(lock_id) + + +def _get_running_rebuild_index_name() -> str | None: + lock_id = f"lock-meilisearch-index-{STUDIO_INDEX_NAME}" + + return cache.get(lock_id) + + +def _get_meilisearch_client(): + """ + Get the Meiliesearch client + """ + global _MEILI_CLIENT # pylint: disable=global-statement + + # Connect to Meilisearch + if not is_meilisearch_enabled(): + raise RuntimeError("MEILISEARCH_ENABLED is not set - search functionality disabled.") + + if _MEILI_CLIENT is not None: + return _MEILI_CLIENT + + _MEILI_CLIENT = MeilisearchClient(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + try: + _MEILI_CLIENT.health() + except MeilisearchError as err: + _MEILI_CLIENT = None + raise ConnectionError("Unable to connect to Meilisearch") from err + return _MEILI_CLIENT + + +def clear_meilisearch_client(): + global _MEILI_CLIENT # pylint: disable=global-statement + + _MEILI_CLIENT = None + + +def _get_meili_api_key_uid(): + """ + Helper method to get the UID of the API key we're using for Meilisearch + """ + global _MEILI_API_KEY_UID # pylint: disable=global-statement + if _MEILI_API_KEY_UID is None: + _MEILI_API_KEY_UID = _get_meilisearch_client().get_key(settings.MEILISEARCH_API_KEY).uid + return _MEILI_API_KEY_UID + + +def _wait_for_meili_task(info: TaskInfo) -> None: + """ + Simple helper method to wait for a Meilisearch task to complete + This method will block until the task is completed, so it should only be used in celery tasks + or management commands. + """ + client = _get_meilisearch_client() + current_status = client.get_task(info.task_uid) + while current_status.status in ("enqueued", "processing"): + time.sleep(0.5) + current_status = client.get_task(info.task_uid) + if current_status.status != "succeeded": + try: + err_reason = current_status.error['message'] + except (TypeError, KeyError): + err_reason = "Unknown error" + raise MeilisearchError(err_reason) + + +def _wait_for_meili_tasks(info_list: list[TaskInfo]) -> None: + """ + Simple helper method to wait for multiple Meilisearch tasks to complete + """ + while info_list: + info = info_list.pop() + _wait_for_meili_task(info) + + +def _index_exists(index_name: str) -> bool: + """ + Check if an index exists + """ + client = _get_meilisearch_client() + try: + client.get_index(index_name) + except MeilisearchError as err: + if err.code == "index_not_found": + return False + else: + raise err + return True + + +@contextmanager +def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generator[str, None, None]: + """ + Create a new temporary Meilisearch index, populate it, then swap it to + become the active index. + + Args: + status_cb (Callable): A callback function to report status messages + """ + if status_cb is None: + status_cb = log.info + + client = _get_meilisearch_client() + status_cb("Checking index...") + with _index_rebuild_lock() as temp_index_name: + if _index_exists(temp_index_name): + status_cb("Temporary index already exists. Deleting it...") + _wait_for_meili_task(client.delete_index(temp_index_name)) + + status_cb("Creating new index...") + _wait_for_meili_task( + client.create_index(temp_index_name, {'primaryKey': 'id'}) + ) + new_index_created = client.get_index(temp_index_name).created_at + + yield temp_index_name + + if not _index_exists(STUDIO_INDEX_NAME): + # We have to create the "target" index before we can successfully swap the new one into it: + status_cb("Preparing to swap into index (first time)...") + _wait_for_meili_task(client.create_index(STUDIO_INDEX_NAME)) + status_cb("Swapping index...") + client.swap_indexes([{'indexes': [temp_index_name, STUDIO_INDEX_NAME]}]) + # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status + # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 + while True: + time.sleep(1) + if client.get_index(STUDIO_INDEX_NAME).created_at != new_index_created: + status_cb("Waiting for swap completion...") + else: + break + status_cb("Deleting old index...") + _wait_for_meili_task(client.delete_index(temp_index_name)) + + +def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None: + """ + Recurse the children of an XBlock and call the given function for each + + The main purpose of this is just to wrap the loading of each child in + try...except. Otherwise block.get_children() would do what we need. + """ + if block.has_children: + for child_id in block.children: + try: + child = block.get_child(child_id) + except Exception as err: # pylint: disable=broad-except + log.exception(err) + if status_cb is not None: + status_cb(f"Unable to load block {child_id}") + else: + fn(child) + + +def _update_index_docs(docs) -> None: + """ + Helper function that updates the documents in the search index + + If there is a rebuild in progress, the document will also be added to the new index. + """ + if not docs: + return + + client = _get_meilisearch_client() + current_rebuild_index_name = _get_running_rebuild_index_name() + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be added to the new index. + tasks.append(client.index(current_rebuild_index_name).update_documents(docs)) + tasks.append(client.index(STUDIO_INDEX_NAME).update_documents(docs)) + + _wait_for_meili_tasks(tasks) + + +def only_if_meilisearch_enabled(f): + """ + Only call `f` if meilisearch is enabled + """ + @wraps(f) + def wrapper(*args, **kwargs): + """Wraps the decorated function.""" + if is_meilisearch_enabled(): + return f(*args, **kwargs) + return wrapper + + +def is_meilisearch_enabled() -> bool: + """ + Returns whether Meilisearch is enabled + """ + if hasattr(settings, "MEILISEARCH_ENABLED"): + return settings.MEILISEARCH_ENABLED + + return False + + +# pylint: disable=too-many-statements +def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None: + """ + Rebuild the Meilisearch index from scratch + """ + if status_cb is None: + status_cb = log.info + + client = _get_meilisearch_client() + store = modulestore() + + # Get the lists of libraries + status_cb("Counting libraries...") + lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] + num_libraries = len(lib_keys) + + # Get the list of courses + status_cb("Counting courses...") + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + all_courses = store.get_courses() + num_courses = len(all_courses) + + # Some counters so we can track our progress as indexing progresses: + num_contexts = num_courses + num_libraries + num_contexts_done = 0 # How many courses/libraries we've indexed + num_blocks_done = 0 # How many individual components/XBlocks we've indexed + + status_cb(f"Found {num_courses} courses and {num_libraries} libraries.") + with _using_temp_index(status_cb) as temp_index_name: + ############## Configure the index ############## + + # The following index settings are best changed on an empty index. + # Changing them on a populated index will "re-index all documents in the index, which can take some time" + # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. + + # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): + client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) + # Mark which attributes can be used for filtering/faceted search: + client.index(temp_index_name).update_filterable_attributes([ + Fields.block_type, + Fields.context_key, + Fields.org, + Fields.tags, + Fields.tags + "." + Fields.tags_taxonomy, + Fields.tags + "." + Fields.tags_level0, + Fields.tags + "." + Fields.tags_level1, + Fields.tags + "." + Fields.tags_level2, + Fields.tags + "." + Fields.tags_level3, + Fields.type, + Fields.access_id, + ]) + # Mark which attributes are used for keyword search, in order of importance: + client.index(temp_index_name).update_searchable_attributes([ + Fields.display_name, + Fields.block_id, + Fields.content, + Fields.tags, + # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. + ]) + + ############## Libraries ############## + status_cb("Indexing libraries...") + for lib_key in lib_keys: + status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") + try: + docs = [] + for component in lib_api.get_library_components(lib_key): + metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) + doc = {} + doc.update(searchable_doc_for_library_block(metadata)) + doc.update(searchable_doc_tags(metadata.usage_key)) + docs.append(doc) + + num_blocks_done += 1 + if docs: + # Add all the docs in this library at once (usually faster than adding one at a time): + _wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + except Exception as err: # pylint: disable=broad-except + status_cb(f"Error indexing library {lib_key}: {err}") + finally: + num_contexts_done += 1 + + ############## Courses ############## + status_cb("Indexing courses...") + for course in all_courses: + status_cb( + f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" + ) + docs = [] + + # Pre-fetch the course with all of its children: + course = store.get_course(course.id, depth=None) + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block) + doc.update(searchable_doc_tags(block.usage_key)) + docs.append(doc) # pylint: disable=cell-var-from-loop + _recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop + + _recurse_children(course, add_with_children) + + if docs: + # Add all the docs in this course at once (usually faster than adding one at a time): + _wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + num_contexts_done += 1 + num_blocks_done += len(docs) + + status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.") + + +def upsert_xblock_index_doc(usage_key: UsageKey, recursive: bool = True) -> None: + """ + Creates or updates the document for the given XBlock in the search index + + + Args: + usage_key (UsageKey): The usage key of the XBlock to index + recursive (bool): If True, also index all children of the XBlock + """ + xblock = modulestore().get_item(usage_key) + + docs = [] + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block) + docs.append(doc) + if recursive: + _recurse_children(block, add_with_children) + + add_with_children(xblock) + + _update_index_docs(docs) + + +def delete_index_doc(usage_key: UsageKey) -> None: + """ + Deletes the document for the given XBlock from the search index + + Args: + usage_key (UsageKey): The usage key of the XBlock to be removed from the index + """ + current_rebuild_index_name = _get_running_rebuild_index_name() + + client = _get_meilisearch_client() + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be deleted from the new index. + tasks.append(client.index(current_rebuild_index_name).delete_document(meili_id_from_opaque_key(usage_key))) + tasks.append(client.index(STUDIO_INDEX_NAME).delete_document(meili_id_from_opaque_key(usage_key))) + + _wait_for_meili_tasks(tasks) + + +def upsert_library_block_index_doc(usage_key: UsageKey) -> None: + """ + Creates or updates the document for the given Library Block in the search index + """ + + library_block = lib_api.get_component_from_usage_key(usage_key) + library_block_metadata = lib_api.LibraryXBlockMetadata.from_component(usage_key.context_key, library_block) + + docs = [ + searchable_doc_for_library_block(library_block_metadata) + ] + + _update_index_docs(docs) + + +def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None: + """ + Creates or updates the documents for the given Content Library in the search index + """ + docs = [] + for component in lib_api.get_library_components(library_key): + metadata = lib_api.LibraryXBlockMetadata.from_component(library_key, component) + doc = searchable_doc_for_library_block(metadata) + docs.append(doc) + + _update_index_docs(docs) + + +def upsert_block_tags_index_docs(usage_key: UsageKey): + """ + Updates the tags data in documents for the given Course/Library block + """ + doc = {Fields.id: meili_id_from_opaque_key(usage_key)} + doc.update(searchable_doc_tags(usage_key)) + _update_index_docs([doc]) + + +def _get_user_orgs(request: Request) -> list[str]: + """ + Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole. + + Note: org-level roles have course_id=None to distinguish them from course-level roles. + """ + course_roles = get_course_roles(request.user) + return list(set( + role.org + for role in course_roles + if role.course_id is None and role.role in ['staff', 'instructor'] + )) + + +def _get_meili_access_filter(request: Request) -> dict: + """ + Return meilisearch filter based on the requesting user's permissions. + """ + # Global staff can see anything, so no filters required. + if GlobalStaff().has_user(request.user): + return {} + + # Everyone else is limited to their org staff roles... + user_orgs = _get_user_orgs(request)[:MAX_ORGS_IN_FILTER] + + # ...or the N most recent courses and libraries they can access. + access_ids = get_access_ids_for_request(request, omit_orgs=user_orgs)[:MAX_ACCESS_IDS_IN_FILTER] + return { + "filter": f"org IN {user_orgs} OR access_id IN {access_ids}", + } + + +def generate_user_token_for_studio_search(request): + """ + Returns a Meilisearch API key that only allows the user to search content that they have permission to view + """ + expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) + + search_rules = { + STUDIO_INDEX_NAME: _get_meili_access_filter(request), + } + # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. + restricted_api_key = _get_meilisearch_client().generate_tenant_token( + api_key_uid=_get_meili_api_key_uid(), + search_rules=search_rules, + expires_at=expires_at, + ) + + return { + "url": settings.MEILISEARCH_PUBLIC_URL, + "index_name": STUDIO_INDEX_NAME, + "api_key": restricted_api_key, + } diff --git a/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst index 1f355409da0e..f7430121605d 100644 --- a/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst +++ b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst @@ -104,6 +104,8 @@ Decision new ``content/search`` Django app, so it's relatively easy to swap out later if this experiment doesn't pan out. 4. We will not use ``edx-search`` for the new search functionality. +5. For the experiment, we won't use Meilisearch during tests, but we expect to + add that in the future if we move forward with replacing Elasticsearch completely. Consequences diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index d32956c688d0..062c5c44a827 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -2,11 +2,12 @@ Utilities related to indexing content for search """ from __future__ import annotations -from hashlib import blake2b + import logging +from hashlib import blake2b from django.utils.text import slugify -from opaque_keys.edx.keys import UsageKey, LearningContextKey +from opaque_keys.edx.keys import LearningContextKey, UsageKey from openedx.core.djangoapps.content.search.models import SearchAccess from openedx.core.djangoapps.content_libraries import api as lib_api @@ -14,7 +15,6 @@ from openedx.core.djangoapps.xblock import api as xblock_api log = logging.getLogger(__name__) -STUDIO_INDEX_NAME = "studio_content" class Fields: @@ -64,7 +64,7 @@ class DocType: library_block = "library_block" -def _meili_id_from_opaque_key(usage_key: UsageKey) -> str: +def meili_id_from_opaque_key(usage_key: UsageKey) -> str: """ Meilisearch requires each document to have a primary key that's either an integer or a string composed of alphanumeric characters (a-z A-Z 0-9), @@ -98,7 +98,6 @@ class implementation returns only: {"content": {"display_name": "..."}, "content_type": "..."} """ block_data = { - Fields.id: _meili_id_from_opaque_key(block.usage_key), Fields.usage_key: str(block.usage_key), Fields.block_id: str(block.usage_key.block_id), Fields.display_name: xblock_api.get_block_display_name(block), @@ -171,9 +170,11 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: # Note that we could improve performance for indexing many components from the same library/course, # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the # tags for each component separately. - all_tags = tagging_api.get_object_tags(object_id).all() + all_tags = tagging_api.get_object_tags(str(object_id)).all() if not all_tags: - return {} + # Clear out tags in the index when unselecting all tags for the block, otherwise + # it would remain the last value if a cleared Fields.tags field is not included + return {Fields.tags: {}} result = { Fields.tags_taxonomy: [], Fields.tags_level0: [], @@ -207,23 +208,38 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: return {Fields.tags: result} -def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict: +def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict: """ Generate a dictionary document suitable for ingestion into a search engine like Meilisearch or Elasticsearch, so that the given library block can be found using faceted search. """ - library_name = lib_api.get_library(metadata.usage_key.context_key).title - doc = {} - try: - block = xblock_api.load_block(metadata.usage_key, user=None) - except Exception as err: # pylint: disable=broad-except - log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}") + library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title + block = xblock_api.load_block(xblock_metadata.usage_key, user=None) + + doc = { + Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key), + Fields.type: DocType.library_block, + } + doc.update(_fields_from_block(block)) - doc.update(_tags_for_content_object(metadata.usage_key)) - doc[Fields.type] = DocType.library_block + # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here: doc[Fields.breadcrumbs] = [{"display_name": library_name}] + + return doc + + +def searchable_doc_tags(usage_key: UsageKey) -> dict: + """ + Generate a dictionary document suitable for ingestion into a search engine + like Meilisearch or Elasticsearch, with the tags data for the given content object. + """ + doc = { + Fields.id: meili_id_from_opaque_key(usage_key), + } + doc.update(_tags_for_content_object(usage_key)) + return doc @@ -233,7 +249,11 @@ def searchable_doc_for_course_block(block) -> dict: like Meilisearch or Elasticsearch, so that the given course block can be found using faceted search. """ - doc = _fields_from_block(block) - doc.update(_tags_for_content_object(block.usage_key)) - doc[Fields.type] = DocType.course_block + doc = { + Fields.id: meili_id_from_opaque_key(block.usage_key), + Fields.type: DocType.course_block, + } + + doc.update(_fields_from_block(block)) + return doc diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index 9184b2b2e273..1a80b2215781 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -1,14 +1,38 @@ """ Signal/event handlers for content search """ + +import logging + from django.db.models.signals import post_delete from django.dispatch import receiver -from openedx_events.content_authoring.data import ContentLibraryData -from openedx_events.content_authoring.signals import CONTENT_LIBRARY_DELETED +from openedx_events.content_authoring.data import ContentLibraryData, ContentObjectData, LibraryBlockData, XBlockData +from openedx_events.content_authoring.signals import ( + CONTENT_LIBRARY_DELETED, + CONTENT_LIBRARY_UPDATED, + LIBRARY_BLOCK_CREATED, + LIBRARY_BLOCK_DELETED, + XBLOCK_CREATED, + XBLOCK_DELETED, + XBLOCK_UPDATED, + CONTENT_OBJECT_TAGS_CHANGED, +) +from openedx.core.djangoapps.content_tagging.utils import get_content_key_from_string from openedx.core.djangoapps.content.course_overviews.models import CourseOverview from openedx.core.djangoapps.content.search.models import SearchAccess +from .api import only_if_meilisearch_enabled, upsert_block_tags_index_docs +from .tasks import ( + delete_library_block_index_doc, + delete_xblock_index_doc, + update_content_library_index_docs, + upsert_library_block_index_doc, + upsert_xblock_index_doc +) + +log = logging.getLogger(__name__) + # Using post_delete here because there is no COURSE_DELETED event defined. @receiver(post_delete, sender=CourseOverview) @@ -21,3 +45,114 @@ def delete_course_search_access(sender, instance, **kwargs): # pylint: disable= def delete_library_search_access(content_library: ContentLibraryData, **kwargs): """Deletes the SearchAccess instance for deleted content libraries""" SearchAccess.objects.filter(context_key=content_library.library_key).delete() + + +@receiver(XBLOCK_CREATED) +@only_if_meilisearch_enabled +def xblock_created_handler(**kwargs) -> None: + """ + Create the index for the XBlock + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_xblock_index_doc.delay( + str(xblock_info.usage_key), + recursive=False, + ) + + +@receiver(XBLOCK_UPDATED) +@only_if_meilisearch_enabled +def xblock_updated_handler(**kwargs) -> None: + """ + Update the index for the XBlock and its children + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_xblock_index_doc.delay( + str(xblock_info.usage_key), + recursive=True, # Update all children because the breadcrumb may have changed + ) + + +@receiver(XBLOCK_DELETED) +@only_if_meilisearch_enabled +def xblock_deleted_handler(**kwargs) -> None: + """ + Delete the index for the XBlock + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + delete_xblock_index_doc.delay(str(xblock_info.usage_key)) + + +@receiver(LIBRARY_BLOCK_CREATED) +@only_if_meilisearch_enabled +def library_block_updated_handler(**kwargs) -> None: + """ + Create or update the index for the content library block + """ + library_block_data = kwargs.get("library_block", None) + if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_library_block_index_doc.delay(str(library_block_data.usage_key)) + + +@receiver(LIBRARY_BLOCK_DELETED) +@only_if_meilisearch_enabled +def library_block_deleted(**kwargs) -> None: + """ + Delete the index for the content library block + """ + library_block_data = kwargs.get("library_block", None) + if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + delete_library_block_index_doc.delay(str(library_block_data.usage_key)) + + +@receiver(CONTENT_LIBRARY_UPDATED) +@only_if_meilisearch_enabled +def content_library_updated_handler(**kwargs) -> None: + """ + Update the index for the content library + """ + content_library_data = kwargs.get("content_library", None) + if not content_library_data or not isinstance(content_library_data, ContentLibraryData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + update_content_library_index_docs.delay(str(content_library_data.library_key)) + + +@receiver(CONTENT_OBJECT_TAGS_CHANGED) +@only_if_meilisearch_enabled +def content_object_tags_changed_handler(**kwargs) -> None: + """ + Update the tags data in the index for the Content Object + """ + content_object_tags = kwargs.get("content_object", None) + if not content_object_tags or not isinstance(content_object_tags, ContentObjectData): + log.error("Received null or incorrect data for event") + return + + try: + # Check if valid if course or library block + get_content_key_from_string(content_object_tags.object_id) + except ValueError: + log.error("Received invalid content object id") + return + + upsert_block_tags_index_docs(content_object_tags.object_id) diff --git a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py deleted file mode 100644 index 18c7681879cb..000000000000 --- a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Mixin for Django management commands that interact with Meilisearch -""" -from contextlib import contextmanager -import time - -from django.conf import settings -from django.core.management import CommandError -import meilisearch -from meilisearch.errors import MeilisearchError -from meilisearch.models.task import TaskInfo - - -class MeiliCommandMixin: - """ - Mixin for Django management commands that interact with Meilisearch - """ - def get_meilisearch_client(self): - """ - Get the Meiliesearch client - """ - if hasattr(self, "_meili_client"): - return self._meili_client - # Connect to Meilisearch - if not settings.MEILISEARCH_ENABLED: - raise CommandError("MEILISEARCH_ENABLED is not set - search functionality disabled.") - - self._meili_client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - try: - self._meili_client.health() - except MeilisearchError as err: - self.stderr.write(err.message) # print this because 'raise...from...' doesn't print the details - raise CommandError("Unable to connect to Meilisearch") from err - return self._meili_client - - def wait_for_meili_task(self, info: TaskInfo): - """ - Simple helper method to wait for a Meilisearch task to complete - """ - client = self.get_meilisearch_client() - current_status = client.get_task(info.task_uid) - while current_status.status in ("enqueued", "processing"): - self.stdout.write("...") - time.sleep(1) - current_status = client.get_task(info.task_uid) - if current_status.status != "succeeded": - self.stderr.write(f"Task has status: {current_status.status}") - self.stderr.write(str(current_status.error)) - try: - err_reason = current_status.error['message'] - except (TypeError, KeyError): - err_reason = "Unknown error" - raise MeilisearchError(err_reason) - - def index_exists(self, index_name: str) -> bool: - """ - Check if an index exists - """ - client = self.get_meilisearch_client() - try: - client.get_index(index_name) - except MeilisearchError as err: - if err.code == "index_not_found": - return False - else: - raise err - return True - - @contextmanager - def using_temp_index(self, target_index): - """ - Create a new temporary Meilisearch index, populate it, then swap it to - become the active index. - """ - client = self.get_meilisearch_client() - self.stdout.write("Checking index...") - temp_index_name = target_index + "_new" - if self.index_exists(temp_index_name): - self.stdout.write("Temporary index already exists. Deleting it...") - self.wait_for_meili_task(client.delete_index(temp_index_name)) - - self.stdout.write("Creating new index...") - self.wait_for_meili_task( - client.create_index(temp_index_name, {'primaryKey': 'id'}) - ) - new_index_created = client.get_index(temp_index_name).created_at - - yield temp_index_name - - if not self.index_exists(target_index): - # We have to create the "target" index before we can successfully swap the new one into it: - self.stdout.write("Preparing to swap into index (first time)...") - self.wait_for_meili_task(client.create_index(target_index)) - self.stdout.write("Swapping index...") - client.swap_indexes([{'indexes': [temp_index_name, target_index]}]) - # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status - # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 - while True: - time.sleep(1) - if client.get_index(target_index).created_at != new_index_created: - self.stdout.write("Waiting for swap completion...") - else: - break - self.stdout.write("Deleting old index...") - self.wait_for_meili_task(client.delete_index(temp_index_name)) diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py index 8d39ef3746eb..3767ebcba6c9 100644 --- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -5,28 +5,12 @@ See also cms/djangoapps/contentstore/management/commands/reindex_course.py which indexes LMS (published) courses in ElasticSearch. """ -import logging -import time - -from django.conf import settings from django.core.management import BaseCommand, CommandError -from openedx.core.djangoapps.content_libraries import api as lib_api -from openedx.core.djangoapps.content.search.documents import ( - Fields, - searchable_doc_for_course_block, - searchable_doc_for_library_block, - STUDIO_INDEX_NAME, -) -from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.django import modulestore -from .meili_mixin import MeiliCommandMixin - - -log = logging.getLogger(__name__) +from ... import api -class Command(MeiliCommandMixin, BaseCommand): +class Command(BaseCommand): """ Build or re-build the search index for courses and libraries (in Studio, i.e. Draft mode) @@ -41,120 +25,13 @@ def handle(self, *args, **options): """ Build a new search index for Studio, containing content from courses and libraries """ + if not api.is_meilisearch_enabled(): + raise CommandError("Meilisearch is not enabled. Please set MEILISEARCH_ENABLED to True in your settings.") + if not options["experimental"]: raise CommandError( "This command is experimental and not recommended for production. " "Use the --experimental argument to acknowledge and run it." ) - start_time = time.perf_counter() - client = self.get_meilisearch_client() - store = modulestore() - - # Get the lists of libraries - self.stdout.write("Counting libraries...") - lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] - num_libraries = len(lib_keys) - - # Get the list of courses - self.stdout.write("Counting courses...") - with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): - all_courses = store.get_courses() - num_courses = len(all_courses) - - # Some counters so we can track our progress as indexing progresses: - num_contexts = num_courses + num_libraries - num_contexts_done = 0 # How many courses/libraries we've indexed - num_blocks_done = 0 # How many individual components/XBlocks we've indexed - - self.stdout.write(f"Found {num_courses} courses and {num_libraries} libraries.") - index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME - with self.using_temp_index(index_name) as temp_index_name: - ############## Configure the index ############## - - # The following index settings are best changed on an empty index. - # Changing them on a populated index will "re-index all documents in the index, which can take some time" - # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. - - # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): - client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) - # Mark which attributes can be used for filtering/faceted search: - client.index(temp_index_name).update_filterable_attributes([ - Fields.block_type, - Fields.context_key, - Fields.org, - Fields.tags, - Fields.type, - Fields.access_id, - ]) - # Mark which attributes are used for keyword search, in order of importance: - client.index(temp_index_name).update_searchable_attributes([ - Fields.display_name, - Fields.block_id, - Fields.content, - Fields.tags, - # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. - ]) - ############## Libraries ############## - self.stdout.write("Indexing libraries...") - for lib_key in lib_keys: - self.stdout.write(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") - docs = [] - for component in lib_api.get_library_components(lib_key): - metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) - doc = searchable_doc_for_library_block(metadata) - docs.append(doc) - num_blocks_done += 1 - if docs: - # Add all the docs in this library at once (usually faster than adding one at a time): - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - num_contexts_done += 1 - - ############## Courses ############## - self.stdout.write("Indexing courses...") - for course in all_courses: - self.stdout.write( - f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" - ) - docs = [] - - # Pre-fetch the course with all of its children: - course = store.get_course(course.id, depth=None) - - def add_with_children(block): - """ Recursively index the given XBlock/component """ - doc = searchable_doc_for_course_block(block) - docs.append(doc) # pylint: disable=cell-var-from-loop - self.recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop - - self.recurse_children(course, add_with_children) - - if docs: - # Add all the docs in this course at once (usually faster than adding one at a time): - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - num_contexts_done += 1 - num_blocks_done += len(docs) - - elapsed_time = time.perf_counter() - start_time - self.stdout.write( - f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses " - f"and libraries in {elapsed_time:.0f}s." - ) - - def recurse_children(self, block, fn): - """ - Recurse the children of an XBlock and call the given function for each - - The main purpose of this is just to wrap the loading of each child in - try...except. Otherwise block.get_children() would do what we need. - """ - if block.has_children: - for child_id in block.children: - try: - child = block.get_child(child_id) - except Exception as err: # pylint: disable=broad-except - log.exception(err) - self.stderr.write(f"Unable to load block {child_id}") - else: - fn(child) + api.rebuild_index(self.stdout.write) diff --git a/openedx/core/djangoapps/content/search/tasks.py b/openedx/core/djangoapps/content/search/tasks.py new file mode 100644 index 000000000000..06ea3e777c61 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tasks.py @@ -0,0 +1,83 @@ +""" +Defines asynchronous celery task for content indexing +""" + +from __future__ import annotations + +import logging + +from celery import shared_task +from celery_utils.logged_task import LoggedTask +from edx_django_utils.monitoring import set_code_owner_attribute +from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 +from meilisearch.errors import MeilisearchError + +from . import api + +log = logging.getLogger(__name__) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def upsert_xblock_index_doc(usage_key_str: str, recursive: bool) -> None: + """ + Celery task to update the content index document for an XBlock + """ + usage_key = UsageKey.from_string(usage_key_str) + + log.info("Updating content index document for XBlock with id: %s", usage_key) + + api.upsert_xblock_index_doc(usage_key, recursive) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def delete_xblock_index_doc(usage_key_str: str) -> None: + """ + Celery task to delete the content index document for an XBlock + """ + usage_key = UsageKey.from_string(usage_key_str) + + log.info("Updating content index document for XBlock with id: %s", usage_key) + + api.delete_index_doc(usage_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def upsert_library_block_index_doc(usage_key_str: str) -> None: + """ + Celery task to update the content index document for a library block + """ + usage_key = LibraryUsageLocatorV2.from_string(usage_key_str) + + log.info("Updating content index document for library block with id: %s", usage_key) + + api.upsert_library_block_index_doc(usage_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def delete_library_block_index_doc(usage_key_str: str) -> None: + """ + Celery task to delete the content index document for a library block + """ + usage_key = LibraryUsageLocatorV2.from_string(usage_key_str) + + log.info("Deleting content index document for library block with id: %s", usage_key) + + api.delete_index_doc(usage_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def update_content_library_index_docs(library_key_str: str) -> None: + """ + Celery task to update the content index documents for all library blocks in a library + """ + library_key = LibraryLocatorV2.from_string(library_key_str) + + log.info("Updating content index documents for library with id: %s", library_key) + + api.upsert_content_library_index_docs(library_key) diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py new file mode 100644 index 000000000000..89913dee41dc --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -0,0 +1,284 @@ +""" +Tests for the Studio content search API. +""" +from __future__ import annotations + +import copy + +from unittest.mock import MagicMock, call, patch + +import ddt +from django.test import override_settings +from organizations.tests.factories import OrganizationFactory + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from openedx.core.djangoapps.content_tagging import api as tagging_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase + + +try: + # This import errors in the lms because content.search is not an installed app there. + from .. import api + from ..models import SearchAccess +except RuntimeError: + SearchAccess = {} + +STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" + + +@ddt.ddt +@skip_unless_cms +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestSearchApi(ModuleStoreTestCase): + """ + Tests for the Studio content search and index API. + """ + + MODULESTORE = TEST_DATA_SPLIT_MODULESTORE + + def setUp(self): + super().setUp() + self.user = UserFactory.create() + self.user_id = self.user.id + + self.modulestore_patcher = patch( + "openedx.core.djangoapps.content.search.api.modulestore", return_value=self.store + ) + self.addCleanup(self.modulestore_patcher.stop) + self.modulestore_patcher.start() + + # Clear the Meilisearch client to avoid side effects from other tests + api.clear_meilisearch_client() + + # Create course + self.course = self.store.create_course( + "org1", + "test_course", + "test_run", + self.user_id, + fields={"display_name": "Test Course"}, + ) + course_access, _ = SearchAccess.objects.get_or_create(context_key=self.course.id) + + # Create XBlocks + self.sequential = self.store.create_child(self.user_id, self.course.location, "sequential", "test_sequential") + self.doc_sequential = { + 'id': 'block-v1org1test_coursetest_runtypesequentialblocktest_sequential-f702c144', + 'type': 'course_block', + 'usage_key': 'block-v1:org1+test_course+test_run+type@sequential+block@test_sequential', + 'block_id': 'test_sequential', + 'display_name': 'sequential', + 'block_type': 'sequential', + 'context_key': 'course-v1:org1+test_course+test_run', + 'org': 'org1', + 'breadcrumbs': [{'display_name': 'Test Course'}], + 'content': {}, + 'access_id': course_access.id, + } + self.store.create_child(self.user_id, self.sequential.location, "vertical", "test_vertical") + self.doc_vertical = { + 'id': 'block-v1org1test_coursetest_runtypeverticalblocktest_vertical-e76a10a4', + 'type': 'course_block', + 'usage_key': 'block-v1:org1+test_course+test_run+type@vertical+block@test_vertical', + 'block_id': 'test_vertical', + 'display_name': 'vertical', + 'block_type': 'vertical', + 'context_key': 'course-v1:org1+test_course+test_run', + 'org': 'org1', + 'breadcrumbs': [ + {'display_name': 'Test Course'}, + {'display_name': 'sequential'} + ], + 'content': {}, + 'access_id': course_access.id, + } + + # Create a content library: + self.library = library_api.create_library( + library_type=library_api.COMPLEX, + org=OrganizationFactory.create(short_name="org1"), + slug="lib", + title="Library", + ) + lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key) + # Populate it with a problem: + self.problem = library_api.create_library_block(self.library.key, "problem", "p1") + self.doc_problem = { + "id": "lborg1libproblemp1-a698218e", + "usage_key": "lb:org1:lib:problem:p1", + "block_id": "p1", + "display_name": "Blank Problem", + "block_type": "problem", + "context_key": "lib:org1:lib", + "org": "org1", + "breadcrumbs": [{"display_name": "Library"}], + "content": {"problem_types": [], "capa_content": " "}, + "type": "library_block", + "access_id": lib_access.id, + } + + # Create a couple of taxonomies with tags + self.taxonomyA = tagging_api.create_taxonomy(name="A", export_id="A") + self.taxonomyB = tagging_api.create_taxonomy(name="B", export_id="B") + tagging_api.set_taxonomy_orgs(self.taxonomyA, all_orgs=True) + tagging_api.set_taxonomy_orgs(self.taxonomyB, all_orgs=True) + tagging_api.add_tag_to_taxonomy(self.taxonomyA, "one") + tagging_api.add_tag_to_taxonomy(self.taxonomyA, "two") + tagging_api.add_tag_to_taxonomy(self.taxonomyB, "three") + tagging_api.add_tag_to_taxonomy(self.taxonomyB, "four") + + @override_settings(MEILISEARCH_ENABLED=False) + def test_reindex_meilisearch_disabled(self, mock_meilisearch): + with self.assertRaises(RuntimeError): + api.rebuild_index() + + mock_meilisearch.return_value.swap_indexes.assert_not_called() + + @override_settings(MEILISEARCH_ENABLED=True) + def test_reindex_meilisearch(self, mock_meilisearch): + + # Add tags field to doc, since reindex calls includes tags + doc_sequential = copy.deepcopy(self.doc_sequential) + doc_sequential["tags"] = {} + doc_vertical = copy.deepcopy(self.doc_vertical) + doc_vertical["tags"] = {} + doc_problem = copy.deepcopy(self.doc_problem) + doc_problem["tags"] = {} + + api.rebuild_index() + mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls( + [ + call([doc_sequential, doc_vertical]), + call([doc_problem]), + ], + any_order=True, + ) + + @ddt.data( + True, + False + ) + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_xblock_metadata(self, recursive, mock_meilisearch): + """ + Test indexing an XBlock. + """ + api.upsert_xblock_index_doc(self.sequential.usage_key, recursive=recursive) + + if recursive: + expected_docs = [self.doc_sequential, self.doc_vertical] + else: + expected_docs = [self.doc_sequential] + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with(expected_docs) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_xblock_tags(self, mock_meilisearch): + """ + Test indexing an XBlock with tags. + """ + + # Tag XBlock (these internally call `upsert_block_tags_index_docs`) + tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyA, ["one", "two"]) + tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyB, ["three", "four"]) + + # Build expected docs with tags at each stage + doc_sequential_with_tags1 = { + "id": self.doc_sequential["id"], + "tags": { + 'taxonomy': ['A'], + 'level0': ['A > one', 'A > two'] + } + } + doc_sequential_with_tags2 = { + "id": self.doc_sequential["id"], + "tags": { + 'taxonomy': ['A', 'B'], + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + } + } + + mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( + [ + call([doc_sequential_with_tags1]), + call([doc_sequential_with_tags2]), + ], + any_order=True, + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_delete_index_xblock(self, mock_meilisearch): + """ + Test deleting an XBlock doc from the index. + """ + api.delete_index_doc(self.sequential.usage_key) + + mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( + self.doc_sequential['id'] + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_library_block_metadata(self, mock_meilisearch): + """ + Test indexing a Library Block. + """ + api.upsert_library_block_index_doc(self.problem.usage_key) + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.doc_problem]) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_library_block_tags(self, mock_meilisearch): + """ + Test indexing an Library Block with tags. + """ + + # Tag XBlock (these internally call `upsert_block_tags_index_docs`) + tagging_api.tag_object(str(self.problem.usage_key), self.taxonomyA, ["one", "two"]) + tagging_api.tag_object(str(self.problem.usage_key), self.taxonomyB, ["three", "four"]) + + # Build expected docs with tags at each stage + doc_problem_with_tags1 = { + "id": self.doc_problem["id"], + "tags": { + 'taxonomy': ['A'], + 'level0': ['A > one', 'A > two'] + } + } + doc_problem_with_tags2 = { + "id": self.doc_problem["id"], + "tags": { + 'taxonomy': ['A', 'B'], + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + } + } + + mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( + [ + call([doc_problem_with_tags1]), + call([doc_problem_with_tags2]), + ], + any_order=True, + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_delete_index_library_block(self, mock_meilisearch): + """ + Test deleting a Library Block doc from the index. + """ + api.delete_index_doc(self.problem.usage_key) + + mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( + self.doc_problem['id'] + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_content_library_metadata(self, mock_meilisearch): + """ + Test indexing a whole content library. + """ + api.upsert_content_library_index_docs(self.library.key) + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.doc_problem]) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 914cae880a41..79efda11177b 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -2,6 +2,7 @@ Tests for the Studio content search documents (what gets stored in the index) """ from organizations.models import Organization + from openedx.core.djangoapps.content_tagging import api as tagging_api from openedx.core.djangolib.testing.utils import skip_unless_cms from xmodule.modulestore.django import modulestore @@ -10,7 +11,7 @@ try: # This import errors in the lms because content.search is not an installed app there. - from ..documents import searchable_doc_for_course_block + from ..documents import searchable_doc_for_course_block, searchable_doc_tags from ..models import SearchAccess except RuntimeError: searchable_doc_for_course_block = lambda x: x @@ -78,7 +79,10 @@ def test_problem_block(self): Test how a problem block gets represented in the search index """ block = self.store.get_item(self.problem_block.usage_key) - doc = searchable_doc_for_course_block(block) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) + assert doc == { # Note the 'id' has been stripped of special characters to meet Meilisearch requirements. # The '-8516ed8' suffix is deterministic based on the original usage key. @@ -115,7 +119,9 @@ def test_html_block(self): Test how an HTML block gets represented in the search index """ block = self.store.get_item(self.html_block_key) - doc = searchable_doc_for_course_block(block) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) assert doc == { "id": "block-v1edxtoy2012_falltypehtmlblocktoyjumpto-efb9c601", "type": "course_block", diff --git a/openedx/core/djangoapps/content/search/tests/test_handlers.py b/openedx/core/djangoapps/content/search/tests/test_handlers.py new file mode 100644 index 000000000000..1a1134547666 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_handlers.py @@ -0,0 +1,156 @@ +""" +Tests for the search index update handlers +""" +from unittest.mock import MagicMock, patch + +from django.test import LiveServerTestCase, override_settings +from organizations.tests.factories import OrganizationFactory + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from openedx.core.lib.blockstore_api.tests.base import BlockstoreAppTestMixin +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase + + +try: + # This import errors in the lms because content.search is not an installed app there. + from .. import api + from ..models import SearchAccess +except RuntimeError: + SearchAccess = {} + + +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +@override_settings(MEILISEARCH_ENABLED=True) +@skip_unless_cms +class TestUpdateIndexHandlers( + ModuleStoreTestCase, + BlockstoreAppTestMixin, + LiveServerTestCase, +): + """ + Test that the search index is updated when XBlocks and Library Blocks are modified + """ + + MODULESTORE = TEST_DATA_SPLIT_MODULESTORE + + def setUp(self): + super().setUp() + # Create user + self.user = UserFactory.create() + self.user_id = self.user.id + + self.orgA = OrganizationFactory.create(short_name="orgA") + + self.patcher = patch("openedx.core.djangoapps.content_tagging.tasks.modulestore", return_value=self.store) + self.addCleanup(self.patcher.stop) + self.patcher.start() + + api.clear_meilisearch_client() # Clear the Meilisearch client to avoid leaking state from other tests + + def test_create_delete_xblock(self, meilisearch_client): + # Create course + course = self.store.create_course( + self.orgA.short_name, + "test_course", + "test_run", + self.user_id, + fields={"display_name": "Test Course"}, + ) + course_access, _ = SearchAccess.objects.get_or_create(context_key=course.id) + + # Create XBlocks + sequential = self.store.create_child(self.user_id, course.location, "sequential", "test_sequential") + doc_sequential = { + "id": "block-v1orgatest_coursetest_runtypesequentialblocktest_sequential-0cdb9395", + "type": "course_block", + "usage_key": "block-v1:orgA+test_course+test_run+type@sequential+block@test_sequential", + "block_id": "test_sequential", + "display_name": "sequential", + "block_type": "sequential", + "context_key": "course-v1:orgA+test_course+test_run", + "org": "orgA", + "breadcrumbs": [{"display_name": "Test Course"}], "content": {}, + "access_id": course_access.id, + + } + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_sequential]) + vertical = self.store.create_child(self.user_id, sequential.location, "vertical", "test_vertical") + doc_vertical = { + "id": "block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b", + "type": "course_block", + "usage_key": "block-v1:orgA+test_course+test_run+type@vertical+block@test_vertical", + "block_id": "test_vertical", + "display_name": "vertical", + "block_type": "vertical", + "context_key": "course-v1:orgA+test_course+test_run", + "org": "orgA", + "breadcrumbs": [{"display_name": "Test Course"}, {"display_name": "sequential"}], + "content": {}, + "access_id": course_access.id, + } + + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_vertical]) + + # Update the XBlock + sequential = self.store.get_item(sequential.location, self.user_id) # Refresh the XBlock + sequential.display_name = "Updated Sequential" + self.store.update_item(sequential, self.user_id) + + # The display name and the child's breadcrumbs should be updated + doc_sequential["display_name"] = "Updated Sequential" + doc_vertical["breadcrumbs"][1]["display_name"] = "Updated Sequential" + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([ + doc_sequential, + doc_vertical, + ]) + + # Delete the XBlock + self.store.delete_item(vertical.location, self.user_id) + + meilisearch_client.return_value.index.return_value.delete_document.assert_called_with( + "block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b" + ) + + def test_create_delete_library_block(self, meilisearch_client): + # Create library + library = library_api.create_library( + org=self.orgA, + slug="lib_a", + title="Library Org A", + description="This is a library from Org A", + ) + lib_access, _ = SearchAccess.objects.get_or_create(context_key=library.key) + + problem = library_api.create_library_block(library.key, "problem", "Problem1") + doc_problem = { + "id": "lborgalib_aproblemproblem1-ca3186e9", + "type": "library_block", + "usage_key": "lb:orgA:lib_a:problem:Problem1", + "block_id": "Problem1", + "display_name": "Blank Problem", + "block_type": "problem", + "context_key": "lib:orgA:lib_a", + "org": "orgA", + "breadcrumbs": [{"display_name": "Library Org A"}], + "content": {"problem_types": [], "capa_content": " "}, + "access_id": lib_access.id, + } + + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem]) + + # Rename the content library + library_api.update_library(library.key, title="Updated Library Org A") + + # The breadcrumbs should be updated + doc_problem["breadcrumbs"][0]["display_name"] = "Updated Library Org A" + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem]) + + # Delete the Library Block + library_api.delete_library_block(problem.usage_key) + + meilisearch_client.return_value.index.return_value.delete_document.assert_called_with( + "lborgalib_aproblemproblem1-ca3186e9" + ) diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py index 853f5b5149a6..7c1b8e99850a 100644 --- a/openedx/core/djangoapps/content/search/tests/test_views.py +++ b/openedx/core/djangoapps/content/search/tests/test_views.py @@ -1,8 +1,10 @@ """ Tests for the Studio content search REST API. """ +from __future__ import annotations + import functools -from unittest import mock +from unittest.mock import ANY, MagicMock, Mock, patch import ddt from django.test import override_settings @@ -17,7 +19,8 @@ try: # This import errors in the lms because content.search is not an installed app there. - from openedx.core.djangoapps.content.search.models import SearchAccess + from .. import api + from ..models import SearchAccess except RuntimeError: SearchAccess = {} @@ -28,7 +31,7 @@ def mock_meilisearch(enabled=True): """ - Decorator that mocks the required parts of content.search.views to simulate a running Meilisearch instance. + Decorator that mocks the required parts of content.search.api to simulate a running Meilisearch instance. """ def decorator(func): """ @@ -40,17 +43,19 @@ def wrapper(*args, **kwargs): MEILISEARCH_ENABLED=enabled, MEILISEARCH_PUBLIC_URL="http://meilisearch.url", ): - with mock.patch( - 'openedx.core.djangoapps.content.search.views._get_meili_api_key_uid', + with patch( + 'openedx.core.djangoapps.content.search.api._get_meili_api_key_uid', return_value=MOCK_API_KEY_UID, ): return func(*args, **kwargs) + return wrapper return decorator @ddt.ddt @skip_unless_cms +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) class StudioSearchViewTest(StudioSearchTestMixin, SharedModuleStoreTestCase): """ General tests for the Studio search REST API. @@ -59,6 +64,9 @@ def setUp(self): super().setUp() self.client = APIClient() + # Clear the Meilisearch client to avoid side effects from other tests + api.clear_meilisearch_client() + @mock_meilisearch(enabled=False) def test_studio_search_unathenticated_disabled(self): """ @@ -84,31 +92,33 @@ def test_studio_search_disabled(self): result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 404 + def _mock_generate_tenant_token(self, mock_search_client): + """ + Return a mocked meilisearch.Client.generate_tenant_token method. + """ + mock_generate_tenant_token = Mock(return_value='restricted_api_key') + mock_search_client.return_value = Mock( + generate_tenant_token=mock_generate_tenant_token, + ) + return mock_generate_tenant_token + @mock_meilisearch(enabled=True) - def test_studio_search_enabled(self): + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') + def test_studio_search_enabled(self, mock_search_client): """ We've implement fine-grained permissions on the meilisearch content, so any logged-in user can get a restricted API key for Meilisearch using the REST API. """ self.client.login(username='student', password='student_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 200 assert result.data["index_name"] == "studio_content" assert result.data["url"] == "http://meilisearch.url" assert result.data["api_key"] and isinstance(result.data["api_key"], str) - def _mock_generate_tenant_token(self, mock_search_client): - """ - Return a mocked meilisearch.Client.generate_tenant_token method. - """ - mock_generate_tenant_token = mock.Mock(return_value='restricted_api_key') - mock_search_client.return_value = mock.Mock( - generate_tenant_token=mock_generate_tenant_token, - ) - return mock_generate_tenant_token - @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_student_no_access(self, mock_search_client): """ Users without access to any courses or libraries will have all documents filtered out. @@ -124,11 +134,11 @@ def test_studio_search_student_no_access(self, mock_search_client): "filter": "org IN [] OR access_id IN []", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_staff(self, mock_search_client): """ Users with global staff access can search any document. @@ -142,11 +152,11 @@ def test_studio_search_staff(self, mock_search_client): search_rules={ "studio_content": {} }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_course_staff_access(self, mock_search_client): """ Users with staff or instructor access to a course or library will be limited to these courses/libraries. @@ -168,7 +178,7 @@ def test_studio_search_course_staff_access(self, mock_search_client): "filter": f"org IN [] OR access_id IN {expected_access_ids}", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @ddt.data( @@ -176,7 +186,7 @@ def test_studio_search_course_staff_access(self, mock_search_client): 'org_instr', ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_org_access(self, username, mock_search_client): """ Users with org access to any courses or libraries will use the org filter. @@ -192,11 +202,11 @@ def test_studio_search_org_access(self, username, mock_search_client): "filter": "org IN ['org1'] OR access_id IN []", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_omit_orgs(self, mock_search_client): """ Grant org access to our staff user to ensure that org's access_ids are omitted from the search filter. @@ -219,13 +229,13 @@ def test_studio_search_omit_orgs(self, mock_search_client): "filter": f"org IN ['org1'] OR access_id IN {expected_access_ids}", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views._get_user_orgs') - @mock.patch('openedx.core.djangoapps.content.search.views.get_access_ids_for_request') - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api._get_user_orgs') + @patch('openedx.core.djangoapps.content.search.api.get_access_ids_for_request') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_limits(self, mock_search_client, mock_get_access_ids, mock_get_user_orgs): """ Users with access to many courses/libraries or orgs will only be able to search content @@ -254,5 +264,5 @@ def test_studio_search_limits(self, mock_search_client, mock_get_access_ids, moc "filter": f"org IN {expected_user_orgs} OR access_id IN {expected_access_ids}", } }, - expires_at=mock.ANY, + expires_at=ANY, ) diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py index 5b29a8db1b94..f747fe77e8b5 100644 --- a/openedx/core/djangoapps/content/search/views.py +++ b/openedx/core/djangoapps/content/search/views.py @@ -3,69 +3,19 @@ """ from __future__ import annotations -from datetime import datetime, timedelta, timezone import logging -from django.conf import settings from django.contrib.auth import get_user_model -import meilisearch from rest_framework.exceptions import NotFound -from rest_framework.request import Request from rest_framework.response import Response from rest_framework.views import APIView -from common.djangoapps.student.role_helpers import get_course_roles -from common.djangoapps.student.roles import GlobalStaff from openedx.core.lib.api.view_utils import view_auth_classes -from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME -from openedx.core.djangoapps.content.search.models import get_access_ids_for_request + +from . import api User = get_user_model() log = logging.getLogger(__name__) -MAX_ACCESS_IDS_IN_FILTER = 1_000 -MAX_ORGS_IN_FILTER = 1_000 - - -def _get_meili_api_key_uid(): - """ - Helper method to get the UID of the API key we're using for Meilisearch - """ - if not hasattr(_get_meili_api_key_uid, "uid"): - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - _get_meili_api_key_uid.uid = client.get_key(settings.MEILISEARCH_API_KEY).uid - return _get_meili_api_key_uid.uid - - -def _get_meili_access_filter(request: Request) -> dict: - """ - Return meilisearch filter based on the requesting user's permissions. - """ - # Global staff can see anything, so no filters required. - if GlobalStaff().has_user(request.user): - return {} - - # Everyone else is limited to their org staff roles... - user_orgs = _get_user_orgs(request)[:MAX_ORGS_IN_FILTER] - - # ...or the N most recent courses and libraries they can access. - access_ids = get_access_ids_for_request(request, omit_orgs=user_orgs)[:MAX_ACCESS_IDS_IN_FILTER] - return { - "filter": f"org IN {user_orgs} OR access_id IN {access_ids}", - } - - -def _get_user_orgs(request: Request) -> list[str]: - """ - Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole. - - Note: org-level roles have course_id=None to distinguish them from course-level roles. - """ - course_roles = get_course_roles(request.user) - return list(set( - role.org - for role in course_roles - if role.course_id is None and role.role in ['staff', 'instructor'] - )) @view_auth_classes(is_authenticated=True) @@ -78,25 +28,9 @@ def get(self, request): """ Give user details on how they can search studio content """ - if not settings.MEILISEARCH_ENABLED: + if not api.is_meilisearch_enabled(): raise NotFound("Meilisearch features are not enabled.") - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME - # Create an API key that only allows the user to search content that they have permission to view: - expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) - search_rules = { - index_name: _get_meili_access_filter(request), - } - # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. - restricted_api_key = client.generate_tenant_token( - api_key_uid=_get_meili_api_key_uid(), - search_rules=search_rules, - expires_at=expires_at, - ) + response_data = api.generate_user_token_for_studio_search(request) - return Response({ - "url": settings.MEILISEARCH_PUBLIC_URL, - "index_name": index_name, - "api_key": restricted_api_key, - }) + return Response(response_data) diff --git a/openedx/core/djangoapps/content_tagging/api.py b/openedx/core/djangoapps/content_tagging/api.py index 1a6662c52911..47b157c1a34e 100644 --- a/openedx/core/djangoapps/content_tagging/api.py +++ b/openedx/core/djangoapps/content_tagging/api.py @@ -11,12 +11,15 @@ import openedx_tagging.core.tagging.api as oel_tagging from django.db.models import Exists, OuterRef, Q, QuerySet +from django.utils.timezone import now from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.locator import LibraryLocatorV2 from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy from openedx_tagging.core.tagging.models.utils import TAGS_CSV_SEPARATOR from organizations.models import Organization from .helpers.objecttag_export_helpers import build_object_tree_with_objecttags, iterate_with_level +from openedx_events.content_authoring.data import ContentObjectData +from openedx_events.content_authoring.signals import CONTENT_OBJECT_TAGS_CHANGED from .models import TaxonomyOrg from .types import ContentKey, TagValuesByObjectIdDict, TagValuesByTaxonomyIdDict, TaxonomyDict @@ -298,6 +301,10 @@ def set_exported_object_tags( create_invalid=True, taxonomy_export_id=str(taxonomy_export_id), ) + CONTENT_OBJECT_TAGS_CHANGED.send_event( + time=now(), + content_object=ContentObjectData(object_id=content_key_str) + ) def import_course_tags_from_csv(csv_path, course_id) -> None: @@ -371,6 +378,9 @@ def tag_object( Replaces the existing ObjectTag entries for the given taxonomy + object_id with the given list of tags, if the taxonomy can be used by the given object_id. + This is a wrapper around oel_tagging.tag_object that adds emitting the `CONTENT_OBJECT_TAGS_CHANGED` event + when tagging an object. + tags: A list of the values of the tags from this taxonomy to apply. object_tag_class: Optional. Use a proxy subclass of ObjectTag for additional @@ -389,7 +399,10 @@ def tag_object( taxonomy=taxonomy, tags=tags, ) - + CONTENT_OBJECT_TAGS_CHANGED.send_event( + time=now(), + content_object=ContentObjectData(object_id=object_id) + ) # Expose the oel_tagging APIs diff --git a/openedx/core/djangoapps/content_tagging/rest_api/v1/views.py b/openedx/core/djangoapps/content_tagging/rest_api/v1/views.py index fc14037ef16f..71b210b9e561 100644 --- a/openedx/core/djangoapps/content_tagging/rest_api/v1/views.py +++ b/openedx/core/djangoapps/content_tagging/rest_api/v1/views.py @@ -5,16 +5,18 @@ from django.db.models import Count from django.http import StreamingHttpResponse -from openedx_tagging.core.tagging.rest_api.v1.views import ObjectTagView, TaxonomyView from openedx_tagging.core.tagging import rules as oel_tagging_rules +from openedx_tagging.core.tagging.rest_api.v1.views import ObjectTagView, TaxonomyView from rest_framework import status from rest_framework.decorators import action from rest_framework.exceptions import PermissionDenied, ValidationError from rest_framework.request import Request from rest_framework.response import Response from rest_framework.views import APIView -from ...auth import has_view_object_tags_access +from openedx_events.content_authoring.data import ContentObjectData +from openedx_events.content_authoring.signals import CONTENT_OBJECT_TAGS_CHANGED +from ...auth import has_view_object_tags_access from ...api import ( create_taxonomy, generate_csv_rows, @@ -138,12 +140,25 @@ def orgs(self, request, **_kwargs) -> Response: class ObjectTagOrgView(ObjectTagView): """ View to create and retrieve ObjectTags for a provided Object ID (object_id). - This view extends the ObjectTagView to add Organization filters for the results. + This view extends the ObjectTagView to add Organization filters for the results, + and fires events when the tags are updated. Refer to ObjectTagView docstring for usage details. """ filter_backends = [ObjectTagTaxonomyOrgFilterBackend] + def update(self, request, *args, **kwargs) -> Response: + """ + Extend the update method to fire CONTENT_OBJECT_TAGS_CHANGED event + """ + response = super().update(request, *args, **kwargs) + if response.status_code == 200: + object_id = kwargs.get('object_id') + CONTENT_OBJECT_TAGS_CHANGED.send_event( + content_object=ContentObjectData(object_id=object_id) + ) + return response + class ObjectTagExportView(APIView): """" diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_views.py index 0496f45ae2fc..ff0fb7abe4eb 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_views.py @@ -232,7 +232,7 @@ def test_get_username(self): Test that a client (logged in) can get her own username. """ self.client.login(username=self.user.username, password=TEST_PASSWORD) - self._verify_get_own_username(19) + self._verify_get_own_username(18) def test_get_username_inactive(self): """ @@ -242,7 +242,7 @@ def test_get_username_inactive(self): self.client.login(username=self.user.username, password=TEST_PASSWORD) self.user.is_active = False self.user.save() - self._verify_get_own_username(19) + self._verify_get_own_username(18) def test_get_username_not_logged_in(self): """ @@ -251,7 +251,7 @@ def test_get_username_not_logged_in(self): """ # verify that the endpoint is inaccessible when not logged in - self._verify_get_own_username(12, expected_status=401) + self._verify_get_own_username(11, expected_status=401) @skip_unless_lms @@ -358,7 +358,7 @@ class TestAccountsAPI(FilteredQueryCountMixin, CacheIsolationTestCase, UserAPITe """ ENABLED_CACHES = ['default'] - TOTAL_QUERY_COUNT = 27 + TOTAL_QUERY_COUNT = 26 FULL_RESPONSE_FIELD_COUNT = 29 def setUp(self): @@ -811,7 +811,7 @@ def verify_get_own_information(queries): assert data['time_zone'] is None self.client.login(username=self.user.username, password=TEST_PASSWORD) - verify_get_own_information(self._get_num_queries(25)) + verify_get_own_information(self._get_num_queries(24)) # Now make sure that the user can get the same information, even if not active self.user.is_active = False @@ -831,7 +831,7 @@ def test_get_account_empty_string(self): legacy_profile.save() self.client.login(username=self.user.username, password=TEST_PASSWORD) - with self.assertNumQueries(self._get_num_queries(25), table_ignorelist=WAFFLE_TABLES): + with self.assertNumQueries(self._get_num_queries(24), table_ignorelist=WAFFLE_TABLES): response = self.send_get(self.client) for empty_field in ("level_of_education", "gender", "country", "state", "bio",): assert response.data[empty_field] is None diff --git a/openedx/core/lib/cache_utils.py b/openedx/core/lib/cache_utils.py index 1b3b65f378a4..c379ab2d961a 100644 --- a/openedx/core/lib/cache_utils.py +++ b/openedx/core/lib/cache_utils.py @@ -126,7 +126,7 @@ def __init__(self, func): self.cache = {} def __call__(self, *args): - if not isinstance(args, collections.Hashable): + if not isinstance(args, collections.abc.Hashable): # uncacheable. a list, for instance. # better to not cache than blow up. return self.func(*args) @@ -147,7 +147,10 @@ def __get__(self, obj, objtype): """ Support instance methods. """ - return functools.partial(self.__call__, obj) + partial = functools.partial(self.__call__, obj) + # Make the cache accessible on the wrapped object so it can be cleared if needed. + partial.cache = self.cache + return partial class CacheInvalidationManager: diff --git a/openedx/features/course_experience/tests/views/test_course_updates.py b/openedx/features/course_experience/tests/views/test_course_updates.py index f26e1cd35ba2..379be52ed40f 100644 --- a/openedx/features/course_experience/tests/views/test_course_updates.py +++ b/openedx/features/course_experience/tests/views/test_course_updates.py @@ -49,7 +49,7 @@ def test_queries(self): # Fetch the view and verify that the query counts haven't changed # TODO: decrease query count as part of REVO-28 - with self.assertNumQueries(53, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): + with self.assertNumQueries(52, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): with check_mongo_calls(3): url = course_updates_url(self.course) self.client.get(url) diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 9c0484d69209..25ac994b3245 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -23,7 +23,7 @@ click>=8.0,<9.0 # The team that owns this package will manually bump this package rather than having it pulled in automatically. # This is to allow them to better control its deployment and to do it in a process that works better # for them. -edx-enterprise==4.15.3 +edx-enterprise==4.15.8 # Stay on LTS version, remove once this is added to common constraint Django<5.0 diff --git a/requirements/edx-sandbox/README.rst b/requirements/edx-sandbox/README.rst new file mode 100644 index 000000000000..6129aa865b31 --- /dev/null +++ b/requirements/edx-sandbox/README.rst @@ -0,0 +1,59 @@ +edx-sandbox: a Python environment for sandboxed execution with CodeJail +####################################################################### + +The requirements in this directory describe a Python environment separate from +the general edx-platform environment. When correctly configured with +`CodeJail `_, edx-platform can use +it to execute untrusted code, particularly instructor-authored Python code +within ``