From b631b62e281f3d4bde0c8c95718f0b63ffe4f60d Mon Sep 17 00:00:00 2001 From: sarafiller <84543217+sarafiller@users.noreply.github.com> Date: Fri, 17 May 2024 10:29:45 -0400 Subject: [PATCH 1/5] Update classify.py --- scimap/helpers/classify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scimap/helpers/classify.py b/scimap/helpers/classify.py index 77f360e7..529bbcb2 100644 --- a/scimap/helpers/classify.py +++ b/scimap/helpers/classify.py @@ -131,7 +131,7 @@ def classify (adata, classify_idx=data.index classified = pd.DataFrame(non_summary.loc[data.index]) #subsets phenotype rows to only classified cells - if showPhenotypeLabel: + if showPhenotypeLabel is True: classified[phenotype_label] = classified[phenotype]+"_"+classify_label # add phenotype_label col classified[label]=pd.DataFrame(np.repeat(classify_label, len(classify_idx)), index = classify_idx) # add label col classified.drop([phenotype], axis='columns', inplace=True) # drop phenotype col, for merge From 289400f28cbc0476d94fb5d7ea23c8a50da05f93 Mon Sep 17 00:00:00 2001 From: sarafiller <84543217+sarafiller@users.noreply.github.com> Date: Tue, 21 May 2024 12:46:24 -0400 Subject: [PATCH 2/5] Update _classify.py --- scimap/helpers/_classify.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scimap/helpers/_classify.py b/scimap/helpers/_classify.py index dc0817c5..bbbabe2c 100644 --- a/scimap/helpers/_classify.py +++ b/scimap/helpers/_classify.py @@ -131,7 +131,7 @@ def classify (adata, else: # create new naming scheme for label and phenotype_label cols in classified classify_idx=data.index - if showPhenotypeLabel: + if showPhenotypeLabel is True: non_summary = pd.DataFrame({phenotype: adata.obs[phenotype]}) # gets the index and phenotype non_summary[phenotype] = non_summary[phenotype].astype(str) @@ -145,7 +145,7 @@ def classify (adata, if collapse_failed is True: - if showPhenotypeLabel: + if showPhenotypeLabel is True: meta = non_summary # has index and phenotype col else: meta = pd.DataFrame(index= adata.obs.index) From e7f29f9be222a43090ea53d93443ecd90636d255 Mon Sep 17 00:00:00 2001 From: sarafiller <84543217+sarafiller@users.noreply.github.com> Date: Tue, 21 May 2024 14:08:46 -0400 Subject: [PATCH 3/5] Most up to date version of classsify. Get rid of classify.py and keep _classify.py, then change name --- scimap/helpers/_classify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scimap/helpers/_classify.py b/scimap/helpers/_classify.py index bbbabe2c..6b2844eb 100644 --- a/scimap/helpers/_classify.py +++ b/scimap/helpers/_classify.py @@ -101,7 +101,7 @@ def classify (adata, subclassify_phenotype = [subclassify_phenotype] if (showPhenotypeLabel): phenotype_label=phenotype+"_"+label - elif phenotype==None: + elif phenotype is None: if isinstance(subclassify_phenotype, str) or (showPhenotypeLabel): raise TypeError("You must pass a column name to the PHENOTYPE argument in order to use `subclassify_phenotype` or to set `showPhenotypeLabel = True`") From 4184da2a138186a56c3d533605afc128db2e8473 Mon Sep 17 00:00:00 2001 From: sarafiller <84543217+sarafiller@users.noreply.github.com> Date: Tue, 21 May 2024 14:13:19 -0400 Subject: [PATCH 4/5] Delete scimap/helpers/classify.py --- scimap/helpers/classify.py | 175 ------------------------------------- 1 file changed, 175 deletions(-) delete mode 100644 scimap/helpers/classify.py diff --git a/scimap/helpers/classify.py b/scimap/helpers/classify.py deleted file mode 100644 index 529bbcb2..00000000 --- a/scimap/helpers/classify.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Created on Mon Oct 26 12:04:17 2020 -# @author: Ajit Johnson Nirmal -""" -!!! abstract "Short Description" - `sm.hl.classify`: This utility function enables users to annotate cells by assessing - the presence or absence of specific markers. It offers flexibility to apply classifications - across the entire dataset or within previously defined subsets, such as phenotyped or - clustered cell groups, facilitating targeted analyses based on marker expression. - -## Function -""" - - -#Library -import pandas as pd -import numpy as np - - -# Functions -def classify (adata, - pos=None, - neg=None, - classify_label='passed_classify', - failed_label='failed_classify', - phenotype=None, - subclassify_phenotype=None, - threshold = 0.5, - collapse_failed=True, - label="classify", - showPhenotypeLabel=False, - verbose=True): - - """ -Parameters: - adata (anndata.AnnData): - The annotated data matrix for classification. - - pos (list, optional): - Markers that should be expressed in the cells of interest. - - neg (list, optional): - Markers that should not be expressed in the cells of interest. - - classify_label (str, optional): - Label for cells that meet the classification criteria. - - failed_label (str, optional): - Label for cells that do not meet the classification criteria. - - phenotype (str, required if subclassify_phenotype or collapse_failed is used): - Column in `adata.obs` containing the phenotype information. - - subclassify_phenotype (list, optional): - Phenotypes within which classification should be performed. - - threshold (float, optional): - Threshold for determining positive or negative expression. - - collapse_failed (bool, optional): - If True, unclassified cells are grouped under a single failed label. - - label (str, optional): - Key under which classification results are stored in `adata.obs`. - - showPhenotypeLabel (bool, optional): - If True, appends classification status to existing phenotype labels in the results. - - verbose (bool, optional): - If True, prints progress and informational messages during the classification process. - -Returns: - adata (anndata.AnnData): - The input AnnData object, updated with classification results in `adata.obs[label]`. - -Example: - ```python - - # Basic classification with positive and negative markers - adata = sm.hl.classify(adata, pos=['CD3D', 'CD8A'], neg=['PDGFRB'], label='T_cell_classification') - - # Classify specific phenotypes, preserving original phenotype labels for unclassified cells - adata = sm.hl.classify(adata, pos=['CD19'], neg=['CD3D'], subclassify_phenotype=['B cells'], - phenotype='cell_type', collapse_failed=False, label='B_cell_subclassification') - - # Use showPhenotypeLabel to append classification status to existing phenotype labels - adata = sm.hl.classify(adata, pos=['CD34'], neg=['CD45'], phenotype='cell_type', - showPhenotypeLabel=True, label='stem_cell_classification', verbose=True) - - ``` - """ - - # clean the input - if isinstance(pos, str): - pos = [pos] - if isinstance(neg, str): - neg = [neg] - if isinstance(subclassify_phenotype, str): - subclassify_phenotype = [subclassify_phenotype] - if (showPhenotypeLabel): - phenotype_label=phenotype+"_"+label - - - # Create a dataFrame with the necessary inforamtion - data = pd.DataFrame(adata.X, index= adata.obs.index, columns = adata.var.index) - - # if user requests to subset a specific phenotype - if subclassify_phenotype is not None: - meta = pd.DataFrame(adata.obs[phenotype]) - subset_index = meta[meta[phenotype].isin(subclassify_phenotype)].index - data = data.loc[subset_index] - - # Subset cells that pass the pos criteria - if pos is not None: - for i in pos: - data = data[data[i] >= threshold] - - # Subset cells that pass the neg criteria - if neg is not None and not data.empty: - for j in neg: - data = data[data[j] < threshold] - - # Cells that passed the classify criteria - if data.empty: - raise TypeError("No cells were found to satisfy your `classify` criteria") - else: - # create new naming scheme for label and phenotype_label cols in classified - non_summary = pd.DataFrame({phenotype: adata.obs[phenotype]}) # gets the index and phenotype - non_summary[phenotype] = non_summary[phenotype].astype(str) - - classify_idx=data.index - classified = pd.DataFrame(non_summary.loc[data.index]) #subsets phenotype rows to only classified cells - if showPhenotypeLabel is True: - classified[phenotype_label] = classified[phenotype]+"_"+classify_label # add phenotype_label col - classified[label]=pd.DataFrame(np.repeat(classify_label, len(classify_idx)), index = classify_idx) # add label col - classified.drop([phenotype], axis='columns', inplace=True) # drop phenotype col, for merge - - - - if collapse_failed is True: - meta = non_summary # has index and phenotype col - meta = meta.merge(classified, how='outer', left_index=True, right_index=True) # gain classified col(s) and NaNs for non-matches - if showPhenotypeLabel is True: - meta[phenotype_label]= meta[phenotype_label].fillna(meta[phenotype].astype(str)+"_"+failed_label) - meta=meta[phenotype_label] - else: - meta[label]=meta[label].fillna(failed_label) - meta=meta[label] - - - else: - if phenotype is None: - raise ValueError("Please pass a column name to the PHENOTYPE argument") - - if showPhenotypeLabel is True: - meta=non_summary # phenotype col - classified=pd.DataFrame({phenotype: classified[phenotype_label]}) # takes phenotype_label col and renames to phenotype, ensures it's a df - meta.update(classified) # updates with phenotype_label for only the classified cells - else: - meta= pd.DataFrame(adata.obs[phenotype]) - classified = pd.DataFrame(np.repeat(classify_label, len(classify_idx)), index = classify_idx, columns = [phenotype]) - meta.update(classified) # updates with label for only the classified cells - - - # Add to Anndata - meta = meta.reindex(adata.obs.index) - if showPhenotypeLabel is True: - adata.obs[phenotype_label]=meta - else: - adata.obs[label]=meta - - # return - return adata - From fe182f2b40c9fc7d751f05ba6de2d8636680087f Mon Sep 17 00:00:00 2001 From: sarafiller <84543217+sarafiller@users.noreply.github.com> Date: Tue, 21 May 2024 14:14:08 -0400 Subject: [PATCH 5/5] Rename _classify.py to classify.py --- scimap/helpers/{_classify.py => classify.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scimap/helpers/{_classify.py => classify.py} (100%) diff --git a/scimap/helpers/_classify.py b/scimap/helpers/classify.py similarity index 100% rename from scimap/helpers/_classify.py rename to scimap/helpers/classify.py