diff --git a/portal-backend/depmap/interactive/config/models.py b/portal-backend/depmap/interactive/config/models.py index cac0dbd1..9361461a 100644 --- a/portal-backend/depmap/interactive/config/models.py +++ b/portal-backend/depmap/interactive/config/models.py @@ -506,6 +506,12 @@ def _get_nonstandard_noncustom_datasets(self): nonstandard_datasets[key] = Config(**nonstandard_dict) return nonstandard_datasets + def is_legacy_private_dataset(self, dataset_id: str) -> bool: + all_private_dataset_ids = [ + dataset.dataset_id for dataset in PrivateDatasetMetadata.get_all() + ] + return dataset_id in all_private_dataset_ids + def get_allowed_private_datasets(self) -> Dict[str, Config]: """ WARNING: This CANNOT BE CACHED and must be re-retrieved on every request. diff --git a/portal-backend/depmap/interactive/config/utils.py b/portal-backend/depmap/interactive/config/utils.py index d7a40303..cbd097c5 100755 --- a/portal-backend/depmap/interactive/config/utils.py +++ b/portal-backend/depmap/interactive/config/utils.py @@ -18,6 +18,7 @@ from depmap.taiga_id import utils as taiga_utils from depmap.utilities import entity_utils from depmap.utilities.exception import InteractiveDatasetNotFound +from depmap.utilities.data_access_log import log_legacy_private_dataset_access def __get_config() -> InteractiveConfig: @@ -73,6 +74,8 @@ def get_dataset_label(dataset_id) -> str: """ Returns label of dataset """ + if __get_config().is_legacy_private_dataset(dataset_id): + log_legacy_private_dataset_access("get_dataset_label", dataset_ids=[dataset_id]) return __get_config().get(dataset_id).label diff --git a/portal-backend/depmap/interactive/interactive_utils/get_and_process_data.py b/portal-backend/depmap/interactive/interactive_utils/get_and_process_data.py index 61f29979..387bfa4f 100644 --- a/portal-backend/depmap/interactive/interactive_utils/get_and_process_data.py +++ b/portal-backend/depmap/interactive/interactive_utils/get_and_process_data.py @@ -43,6 +43,7 @@ from depmap.vector_catalog.nodes.categorical_tree_nodes import ( MUTATION_DETAILS_DATASET_ID, ) +from depmap.utilities.data_access_log import log_legacy_private_dataset_access def get_matrix(dataset_id): @@ -207,6 +208,11 @@ def get_subsetted_df_by_labels( """ Get a filtered dataframe with rows indexed by entity labels and columns indexed by depmap ids. """ + if __get_config().is_legacy_private_dataset(dataset_id): + log_legacy_private_dataset_access( + "get_subsetted_df_by_labels", dataset_ids=[dataset_id] + ) + row_index_to_entity_label = {} col_index_to_depmap_id = {} feature_row_labels_set = set(feature_row_labels) if feature_row_labels else set() @@ -271,6 +277,11 @@ def get_subsetted_df_by_ids( :param cell_line_ids: depmap ids of cell lines to return. If None, return all cell lines :return: dataframe where rows are entities and columns are cell lines """ + if __get_config().is_legacy_private_dataset(dataset_id): + log_legacy_private_dataset_access( + "get_subsetted_df_by_ids", dataset_ids=[dataset_id] + ) + row_index_to_entity_label = {} col_index_to_depmap_id = {} entity_ids_set = set(entity_ids) if entity_ids else set() @@ -318,6 +329,10 @@ def get_subsetted_df(dataset_id, row_indices, col_indices): if is_standard(dataset_id): df = standard_utils.get_subsetted_df(dataset_id, row_indices, col_indices) else: + if __get_config().is_legacy_private_dataset(dataset_id): + log_legacy_private_dataset_access( + "get_subsetted_df", dataset_ids=[dataset_id] + ) df = nonstandard_utils.get_subsetted_df(dataset_id, row_indices, col_indices) return df diff --git a/portal-backend/depmap/private_dataset/views.py b/portal-backend/depmap/private_dataset/views.py index 3b6f12e2..61ce491d 100644 --- a/portal-backend/depmap/private_dataset/views.py +++ b/portal-backend/depmap/private_dataset/views.py @@ -14,6 +14,7 @@ ) from depmap.vector_catalog.trees import OTHER_DATASET_NON_PREPOPULATE_ID_BASE from depmap.interactive.nonstandard import nonstandard_utils +from depmap.utilities.data_access_log import log_legacy_private_dataset_access blueprint = Blueprint( "private_dataset", @@ -29,6 +30,9 @@ def home(): abort(404) datasets = data_access.get_private_datasets() + dataset_ids = list(datasets.keys()) + log_legacy_private_dataset_access("get_dataset_label", dataset_ids) + datasets = [ { "dataset_id": dataset_id, diff --git a/portal-backend/depmap/utilities/data_access_log.py b/portal-backend/depmap/utilities/data_access_log.py index 6e90bab2..fe06ca15 100644 --- a/portal-backend/depmap/utilities/data_access_log.py +++ b/portal-backend/depmap/utilities/data_access_log.py @@ -68,3 +68,25 @@ def log_bulk_download_csv(): ) ), ) + + +def log_legacy_private_dataset_access(function_name, dataset_ids): + """ + In theory, once we switch to the new private dataset UI, + private datasets should no longer be accessed through the legacy system. + If we see logs where they are being accessed, then we'll know there are other + features that need to be updated. + """ + log.info( + "%s", + json.dumps( + dict( + timestamp=datetime.datetime.now().isoformat(), + type="legacy-private-dataset-access", + endpoint=request.endpoint, + function=function_name, + dataset_ids=dataset_ids, + user=get_authenticated_user(), + ) + ), + ) diff --git a/portal-backend/tests/conftest.py b/portal-backend/tests/conftest.py index 69e0c7d1..15100daa 100755 --- a/portal-backend/tests/conftest.py +++ b/portal-backend/tests/conftest.py @@ -178,6 +178,9 @@ def __format_all_private_dataset_settings(self): "private-b4d7094196889fa4614409570bb12ab5c09c9cc00388deb7c13ec57fd2996461": None } + def is_legacy_private_dataset(self, dataset_id: str) -> bool: + return False + @classmethod def _get_mutations_taiga_id(cls): return "this-is-test-nonsense-that-should-never-be-checked-against.1/except-for-taiga-alias-loading"