Skip to content

Commit

Permalink
Add logging for legacy private dataset access (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
snwessel authored Jul 23, 2024
1 parent 8f9b457 commit 219b5e0
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 0 deletions.
6 changes: 6 additions & 0 deletions portal-backend/depmap/interactive/config/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,12 @@ def _get_nonstandard_noncustom_datasets(self):
nonstandard_datasets[key] = Config(**nonstandard_dict)
return nonstandard_datasets

def is_legacy_private_dataset(self, dataset_id: str) -> bool:
all_private_dataset_ids = [
dataset.dataset_id for dataset in PrivateDatasetMetadata.get_all()
]
return dataset_id in all_private_dataset_ids

def get_allowed_private_datasets(self) -> Dict[str, Config]:
"""
WARNING: This CANNOT BE CACHED and must be re-retrieved on every request.
Expand Down
3 changes: 3 additions & 0 deletions portal-backend/depmap/interactive/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from depmap.taiga_id import utils as taiga_utils
from depmap.utilities import entity_utils
from depmap.utilities.exception import InteractiveDatasetNotFound
from depmap.utilities.data_access_log import log_legacy_private_dataset_access


def __get_config() -> InteractiveConfig:
Expand Down Expand Up @@ -73,6 +74,8 @@ def get_dataset_label(dataset_id) -> str:
"""
Returns label of dataset
"""
if __get_config().is_legacy_private_dataset(dataset_id):
log_legacy_private_dataset_access("get_dataset_label", dataset_ids=[dataset_id])
return __get_config().get(dataset_id).label


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from depmap.vector_catalog.nodes.categorical_tree_nodes import (
MUTATION_DETAILS_DATASET_ID,
)
from depmap.utilities.data_access_log import log_legacy_private_dataset_access


def get_matrix(dataset_id):
Expand Down Expand Up @@ -207,6 +208,11 @@ def get_subsetted_df_by_labels(
"""
Get a filtered dataframe with rows indexed by entity labels and columns indexed by depmap ids.
"""
if __get_config().is_legacy_private_dataset(dataset_id):
log_legacy_private_dataset_access(
"get_subsetted_df_by_labels", dataset_ids=[dataset_id]
)

row_index_to_entity_label = {}
col_index_to_depmap_id = {}
feature_row_labels_set = set(feature_row_labels) if feature_row_labels else set()
Expand Down Expand Up @@ -271,6 +277,11 @@ def get_subsetted_df_by_ids(
:param cell_line_ids: depmap ids of cell lines to return. If None, return all cell lines
:return: dataframe where rows are entities and columns are cell lines
"""
if __get_config().is_legacy_private_dataset(dataset_id):
log_legacy_private_dataset_access(
"get_subsetted_df_by_ids", dataset_ids=[dataset_id]
)

row_index_to_entity_label = {}
col_index_to_depmap_id = {}
entity_ids_set = set(entity_ids) if entity_ids else set()
Expand Down Expand Up @@ -318,6 +329,10 @@ def get_subsetted_df(dataset_id, row_indices, col_indices):
if is_standard(dataset_id):
df = standard_utils.get_subsetted_df(dataset_id, row_indices, col_indices)
else:
if __get_config().is_legacy_private_dataset(dataset_id):
log_legacy_private_dataset_access(
"get_subsetted_df", dataset_ids=[dataset_id]
)
df = nonstandard_utils.get_subsetted_df(dataset_id, row_indices, col_indices)

return df
Expand Down
4 changes: 4 additions & 0 deletions portal-backend/depmap/private_dataset/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)
from depmap.vector_catalog.trees import OTHER_DATASET_NON_PREPOPULATE_ID_BASE
from depmap.interactive.nonstandard import nonstandard_utils
from depmap.utilities.data_access_log import log_legacy_private_dataset_access

blueprint = Blueprint(
"private_dataset",
Expand All @@ -29,6 +30,9 @@ def home():
abort(404)

datasets = data_access.get_private_datasets()
dataset_ids = list(datasets.keys())
log_legacy_private_dataset_access("get_dataset_label", dataset_ids)

datasets = [
{
"dataset_id": dataset_id,
Expand Down
22 changes: 22 additions & 0 deletions portal-backend/depmap/utilities/data_access_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,25 @@ def log_bulk_download_csv():
)
),
)


def log_legacy_private_dataset_access(function_name, dataset_ids):
"""
In theory, once we switch to the new private dataset UI,
private datasets should no longer be accessed through the legacy system.
If we see logs where they are being accessed, then we'll know there are other
features that need to be updated.
"""
log.info(
"%s",
json.dumps(
dict(
timestamp=datetime.datetime.now().isoformat(),
type="legacy-private-dataset-access",
endpoint=request.endpoint,
function=function_name,
dataset_ids=dataset_ids,
user=get_authenticated_user(),
)
),
)
3 changes: 3 additions & 0 deletions portal-backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ def __format_all_private_dataset_settings(self):
"private-b4d7094196889fa4614409570bb12ab5c09c9cc00388deb7c13ec57fd2996461": None
}

def is_legacy_private_dataset(self, dataset_id: str) -> bool:
return False

@classmethod
def _get_mutations_taiga_id(cls):
return "this-is-test-nonsense-that-should-never-be-checked-against.1/except-for-taiga-alias-loading"
Expand Down

0 comments on commit 219b5e0

Please sign in to comment.