Merge pull request aristoteleo#691 from Sichao25/sijie-july24-fix

Deprecate `.A` attributes of sparse matrix in Sijie july24 fix
chansigit · Jul 10, 2024 · c9fa676 · c9fa676
2 parents 7e464dc + 87422f0
commit c9fa676
Show file tree

Hide file tree

Showing 38 changed files with 165 additions and 165 deletions.
diff --git a/dynamo/estimation/csc/utils_velocity.py b/dynamo/estimation/csc/utils_velocity.py
@@ -215,8 +215,8 @@ def fit_linreg(
         the extreme data points (r2), the r2 calculated using all data points (all_r2). If argument r2 is False, r2 and
         all_r2 will not be returned.
     """
-    x = x.A if issparse(x) else x
-    y = y.A if issparse(y) else y
+    x = x.toarray() if issparse(x) else x
+    y = y.toarray() if issparse(y) else y
 
     _mask = np.logical_and(~np.isnan(x), ~np.isnan(y))
     if mask is not None:
@@ -279,8 +279,8 @@ def fit_linreg_robust(
         all_r2 will not be returned.
     """
 
-    x = x.A if issparse(x) else x
-    y = y.A if issparse(y) else y
+    x = x.toarray() if issparse(x) else x
+    y = y.toarray() if issparse(y) else y
 
     _mask = np.logical_and(~np.isnan(x), ~np.isnan(y))
     if mask is not None:
@@ -405,7 +405,7 @@ def fit_first_order_deg_lsq(
     Returns:
         The estimated value for beta (beta) and the estimated value for the initial spliced, labeled mRNA count (l0).
     """
-    l = l.A.flatten() if issparse(l) else l
+    l = l.toarray().flatten() if issparse(l) else l
 
     tau = t - np.min(t)
     l0 = np.nanmean(l[tau == 0])
@@ -434,7 +434,7 @@ def solve_first_order_deg(t: np.ndarray, l: Union[csr_matrix, np.ndarray]) -> Tu
         The initial counts of the species (for example, labeled mRNA), degradation rate constant and half-life the species.
     """
 
-    x = l.A.flatten() if issparse(l) else l
+    x = l.toarray().flatten() if issparse(l) else l
 
     t_uniq = np.unique(t)
     x_stra = strat_mom(x, t, np.nanmean)
@@ -468,7 +468,7 @@ def fit_gamma_lsq(
     Returns:
         The estimated value for gamma and the estimated value for the initial spliced mRNA count.
     """
-    s = s.A.flatten() if issparse(s) else s
+    s = s.toarray().flatten() if issparse(s) else s
 
     tau = t - np.min(t)
     s0 = np.mean(s[tau == 0])
@@ -501,7 +501,7 @@ def fit_alpha_synthesis(t: np.ndarray, u: Union[csr_matrix, np.ndarray], beta: f
     Returns:
         The estimated value for alpha.
     """
-    u = u.A if issparse(u) else u
+    u = u.toarray() if issparse(u) else u
 
     # fit alpha assuming u=0 at t=0
     expt = np.exp(-beta * t)
@@ -533,7 +533,7 @@ def fit_alpha_degradation(
         The estimated value for alpha (alpha), the initial unspliced mRNA count (u0), and coefficient of determination
     or r square (r2).
     """
-    x = u.A if issparse(u) else u
+    x = u.toarray() if issparse(u) else u
 
     tau = t - np.min(t)
 
@@ -569,7 +569,7 @@ def solve_alpha_degradation(
         The estimated value for alpha (alpha), the initial unspliced mRNA count (b), coefficient of determination or r
         square.
     """
-    u = u.A if issparse(u) else u
+    u = u.toarray() if issparse(u) else u
 
     n = u.size
     tau = t - np.min(t)
@@ -617,7 +617,7 @@ def fit_alpha_beta_synthesis(
     Returns:
         The estimated value for alpha and the estimated value for beta.
     """
-    l = l.A if issparse(l) else l
+    l = l.toarray() if issparse(l) else l
 
     tau = np.hstack((0, t))
     x = np.hstack((0, l))
@@ -649,7 +649,7 @@ def fit_all_synthesis(
     Returns:
         The estimated value for alpha, beta and gamma.
     """
-    l = l.A if issparse(l) else l
+    l = l.toarray() if issparse(l) else l
 
     tau = np.hstack((0, t))
     x = np.hstack((0, l))

diff --git a/dynamo/estimation/csc/velocity.py b/dynamo/estimation/csc/velocity.py
@@ -1578,8 +1578,8 @@ def fit_gamma_steady_state(self, u, s, intercept=True, perc_left=None, perc_righ
         """
         if intercept and perc_left is None:
             perc_left = perc_right
-        u = u.A.flatten() if issparse(u) else u.flatten()
-        s = s.A.flatten() if issparse(s) else s.flatten()
+        u = u.toarray().flatten() if issparse(u) else u.flatten()
+        s = s.toarray().flatten() if issparse(s) else s.flatten()
 
         mask = find_extreme(
             s,
@@ -1660,10 +1660,10 @@ def fit_gamma_stochastic(
             all_r2: float
                 Coefficient of determination or r square for all data points.
         """
-        u = u.A.flatten() if issparse(u) else u.flatten()
-        s = s.A.flatten() if issparse(s) else s.flatten()
-        us = us.A.flatten() if issparse(us) else us.flatten()
-        ss = ss.A.flatten() if issparse(ss) else ss.flatten()
+        u = u.toarray().flatten() if issparse(u) else u.flatten()
+        s = s.toarray().flatten() if issparse(s) else s.flatten()
+        us = us.toarray().flatten() if issparse(us) else us.flatten()
+        ss = ss.toarray().flatten() if issparse(ss) else ss.flatten()
 
         mask = find_extreme(
             s,
@@ -1795,7 +1795,7 @@ def solve_alpha_mix_std_stm(self, t, ul, beta, clusters=None, alpha_time_depende
             range(ul.shape[0]),
             desc="solving steady state alpha and induction alpha",
         ):
-            l = ul[i].A.flatten() if issparse(ul) else ul[i]
+            l = ul[i].toarray().flatten() if issparse(ul) else ul[i]
             for t_ind in np.arange(1, len(t_uniq)):
                 alpha_stm[i, t_ind] = solve_alpha_2p(
                     t_max - t_uniq[t_ind],
@@ -1870,7 +1870,7 @@ def get_n_genes(self, key=None, data=None):
             else:
                 data = self.data[key]
         if type(data) is list:
-            ret = len(data[0].A) if issparse(data[0]) else len(data[0])
+            ret = len(data[0].toarray()) if issparse(data[0]) else len(data[0])
         else:
             ret = data.shape[0]
         return ret

diff --git a/dynamo/estimation/tsc/twostep.py b/dynamo/estimation/tsc/twostep.py
@@ -37,10 +37,10 @@ def fit_slope_stochastic(
         zip(np.arange(n_var), S, U, US, S2),
         "Estimate slope k via linear regression.",
     ):
-        u = u.A.flatten() if issparse(u) else u.flatten()
-        s = s.A.flatten() if issparse(s) else s.flatten()
-        us = us.A.flatten() if issparse(us) else us.flatten()
-        s2 = s2.A.flatten() if issparse(s2) else s2.flatten()
+        u = u.toarray().flatten() if issparse(u) else u.flatten()
+        s = s.toarray().flatten() if issparse(s) else s.flatten()
+        us = us.toarray().flatten() if issparse(us) else us.flatten()
+        s2 = s2.toarray().flatten() if issparse(s2) else s2.flatten()
 
         mask = find_extreme(u, s, perc_left=perc_left, perc_right=perc_right)
         k[i] = fit_stochastic_linreg(u[mask], s[mask], us[mask], s2[mask])
@@ -77,8 +77,8 @@ def lin_reg_gamma_synthesis(
         zip(np.arange(n_var), R, N),
         "Estimate gamma via linear regression of t vs. -ln(1-K)",
     ):
-        r = r.A.flatten() if issparse(r) else r.flatten()
-        n = n.A.flatten() if issparse(n) else n.flatten()
+        r = r.toarray().flatten() if issparse(r) else r.flatten()
+        n = n.toarray().flatten() if issparse(n) else n.flatten()
 
         K_list[i], R2 = fit_labeling_synthesis(n, r, time, perc_right=perc_right)
         gamma[i], r2[i] = compute_gamma_synthesis(K_list[i], np.unique(time))

diff --git a/dynamo/external/scifate.py b/dynamo/external/scifate.py
@@ -193,9 +193,9 @@ def adata_processing_TF_link(
 
     # normalize data (size factor correction, log transform and the scaling)
     if issparse(new):
-        new = new.A
+        new = new.toarray()
     if issparse(total):
-        total = total.A
+        total = total.toarray()
     new_mat = normalize_data(new, szfactors, pseudo_expr=0.1)
     tot_mat = normalize_data(total, szfactors, pseudo_expr=0.1)
     new_mat = pd.DataFrame(new_mat, index=adata.obs_names, columns=adata.var_names)
@@ -207,7 +207,7 @@ def adata_processing_TF_link(
     var.loc[:, "gene_short_name"] = make_index_unique(var.loc[:, "gene_short_name"].astype("str"))
     ntr = adata.layers["new"].sum(1).A1 / adata.layers["total"].sum(1).A1
     if issparse(ntr):
-        obs.loc[:, "labeling_rate"] = ntr.A1
+        obs.loc[:, "labeling_rate"] = ntr.toarray().ravel()
     else:
         obs.loc[:, "labeling_rate"] = ntr
 

diff --git a/dynamo/external/scribe.py b/dynamo/external/scribe.py
@@ -252,7 +252,7 @@ def coexp_measure(
     pearson = np.zeros(len(genes))
 
     X, Y = adata[:, genes].layers[layer_x].T, adata[:, genes].layers[layer_y].T
-    X, Y = X.A if issparse(X) else X, Y.A if issparse(Y) else Y
+    X, Y = X.toarray() if issparse(X) else X, Y.toarray() if issparse(Y) else Y
 
     k = min(5, int(adata.n_obs / 5 + 1))
     for i in tqdm(
@@ -424,7 +424,7 @@ def pool_mi(x, y, k):
                     return mi(x, y, k)
 
                 X = np.repeat(x[:, None], len(Targets), axis=1)
-                Y = t1_df[:, Targets] if issparse(t1_df) else t1_df[:, Targets].A
+                Y = t1_df[:, Targets].toarray() if issparse(t1_df) else t1_df[:, Targets]
                 pool = ThreadPool(cores)
                 res = pool.starmap(pool_mi, zip(X, Y, itertools.repeat(k)))
                 pool.close()

diff --git a/dynamo/plot/dynamics.py b/dynamo/plot/dynamics.py
@@ -341,8 +341,8 @@ def phase_portraits(
             raise ValueError("adata has no vkey {} in either the layers or the obsm slot".format(vkey))
 
     E_vec, V_vec = (
-        E_vec.A if issparse(E_vec) else E_vec,
-        V_vec.A if issparse(V_vec) else V_vec,
+        E_vec.toarray() if issparse(E_vec) else E_vec,
+        V_vec.toarray() if issparse(V_vec) else V_vec,
     )
 
     if k_name in vel_params_df.columns:
@@ -368,11 +368,11 @@ def phase_portraits(
             index_gene(adata, adata.layers[mapper["X_total"]], genes),
         )
 
-        new_mat, tot_mat = (new_mat.A, tot_mat.A) if issparse(new_mat) else (new_mat, tot_mat)
+        new_mat, tot_mat = (new_mat.toarray(), tot_mat.toarray()) if issparse(new_mat) else (new_mat, tot_mat)
 
         vel_u, vel_s = (
-            index_gene(adata, adata.layers["velocity_N"].A, genes),
-            index_gene(adata, adata.layers["velocity_T"].A, genes),
+            index_gene(adata, adata.layers["velocity_N"].toarray(), genes),
+            index_gene(adata, adata.layers["velocity_T"].toarray(), genes),
         )
 
         df = pd.DataFrame(
@@ -398,12 +398,12 @@ def phase_portraits(
         )
 
         unspliced_mat, spliced_mat = (
-            (unspliced_mat.A, spliced_mat.A) if issparse(unspliced_mat) else (unspliced_mat, spliced_mat)
+            (unspliced_mat.toarray(), spliced_mat.toarray()) if issparse(unspliced_mat) else (unspliced_mat, spliced_mat)
         )
 
         vel_u, vel_s = (
-            np.zeros_like(index_gene(adata, adata.layers["velocity_S"].A, genes)),
-            index_gene(adata, adata.layers["velocity_S"].A, genes),
+            np.zeros_like(index_gene(adata, adata.layers["velocity_S"].toarray(), genes)),
+            index_gene(adata, adata.layers["velocity_S"].toarray(), genes),
         )
 
         df = pd.DataFrame(
@@ -429,17 +429,17 @@ def phase_portraits(
             index_gene(adata, adata.layers[mapper["X_new"]], genes),
             index_gene(adata, adata.layers[mapper["X_total"]], genes),
         )
-        U, S, N, T = (U.A, S.A, N.A, T.A) if issparse(U) else (U, S, N, T)
+        U, S, N, T = (U.toarray(), S.toarray(), N.toarray(), T.toarray()) if issparse(U) else (U, S, N, T)
 
         vel_u, vel_s = (
             (
-                index_gene(adata, adata.layers["velocity_U"].A, genes) if "velocity_U" in adata.layers.keys() else None,
-                index_gene(adata, adata.layers["velocity_S"].A, genes),
+                index_gene(adata, adata.layers["velocity_U"].toarray(), genes) if "velocity_U" in adata.layers.keys() else None,
+                index_gene(adata, adata.layers["velocity_S"].toarray(), genes),
             )
             if vkey == "velocity_S"
             else (
-                index_gene(adata, adata.layers["velocity_N"].A, genes) if "velocity_U" in adata.layers.keys() else None,
-                index_gene(adata, adata.layers["velocity_T"].A, genes),
+                index_gene(adata, adata.layers["velocity_N"].toarray(), genes) if "velocity_U" in adata.layers.keys() else None,
+                index_gene(adata, adata.layers["velocity_T"].toarray(), genes),
             )
         )
         if "protein" in adata.obsm.keys():
@@ -461,9 +461,9 @@ def phase_portraits(
                 if (["X_protein"] in adata.obsm.keys() or [mapper["X_protein"]] in adata.obsm.keys())
                 else index_gene(adata, adata.obsm["protein"], genes)
             )
-            P = P.A if issparse(P) else P
+            P = P.toarray() if issparse(P) else P
             if issparse(P_vec):
-                P_vec = P_vec.A
+                P_vec = P_vec.toarray()
 
             vel_p = np.zeros_like(adata.obsm["velocity_P"][:, :])
 
@@ -1683,16 +1683,16 @@ def dynamics(
                     if has_splicing:
                         tmp = (
                             [
-                                valid_adata[:, gene_name].layers["M_ul"].A.T,
-                                valid_adata.layers["M_sl"].A.T,
+                                valid_adata[:, gene_name].layers["M_ul"].toarray().T,
+                                valid_adata.layers["M_sl"].toarray().T,
                             ]
                             if "M_ul" in valid_adata.layers.keys()
                             else [
-                                valid_adata[:, gene_name].layers["ul"].A.T,
-                                valid_adata.layers["sl"].A.T,
+                                valid_adata[:, gene_name].layers["ul"].toarray().T,
+                                valid_adata.layers["sl"].toarray().T,
                             ]
                         )
-                        x_data = [tmp[0].A, tmp[1].A] if issparse(tmp[0]) else tmp
+                        x_data = [tmp[0].toarray(), tmp[1].toarray()] if issparse(tmp[0]) else tmp
                         if log_unnormalized and "X_ul" not in valid_adata.layers.keys():
                             x_data = [np.log1p(tmp[0]), np.log1p(tmp[1])]
 
@@ -1717,7 +1717,7 @@ def dynamics(
                             if "X_new" in valid_adata.layers.keys()
                             else valid_adata[:, gene_name].layers["new"].T
                         )
-                        x_data = [tmp.A] if issparse(tmp) else [tmp]
+                        x_data = [tmp.toarray()] if issparse(tmp) else [tmp]
 
                         if log_unnormalized and "X_new" not in valid_adata.layers.keys():
                             x_data = [np.log1p(x_data[0])]

diff --git a/dynamo/plot/markers.py b/dynamo/plot/markers.py
@@ -199,7 +199,7 @@ def bubble(
 
     cells_df = adata.obs.get(group)
     gene_df = adata[:, genes].layers[layer]
-    gene_df = gene_df.A if issparse(gene_df) else gene_df
+    gene_df = gene_df.toarray() if issparse(gene_df) else gene_df
     gene_df = pd.DataFrame(gene_df.T, index=genes, columns=adata.obs_names)
 
     xmin, xmax = gene_df.quantile(vmin / 100, axis=1), gene_df.quantile(vmax / 100, axis=1)

diff --git a/dynamo/plot/networks.py b/dynamo/plot/networks.py
@@ -103,10 +103,10 @@ def nxvizPlot(
         # data has to be float
         if cluster is not None:
             network.nodes[n]["size"] = (
-                adata[adata.obs[cluster].isin(cluster_names), n].layers[layer].A.mean().astype(float)
+                adata[adata.obs[cluster].isin(cluster_names), n].layers[layer].toarray().mean().astype(float)
             )
         else:
-            network.nodes[n]["size"] = adata[:, n].layers[layer].A.mean().astype(float)
+            network.nodes[n]["size"] = adata[:, n].layers[layer].toarray().mean().astype(float)
 
         network.nodes[n]["label"] = n
     for e in network.edges():

diff --git a/dynamo/plot/preprocess.py b/dynamo/plot/preprocess.py
@@ -734,7 +734,7 @@ def exp_by_groups(
         raise ValueError(f"The layer {layer} is not existed in your adata object!")
 
     exprs = adata[:, valid_genes].X if layer == "X" else adata[:, valid_genes].layers[layer]
-    exprs = exprs.A if issparse(exprs) else exprs
+    exprs = exprs.toarray() if issparse(exprs) else exprs
     if use_ratio:
         (
             has_splicing,
@@ -749,7 +749,7 @@ def exp_by_groups(
                     if use_smoothed
                     else adata[:, valid_genes].layers["X_total"]
                 )
-                tot = tot.A if issparse(tot) else tot
+                tot = tot.toarray() if issparse(tot) else tot
                 exprs = exprs / tot
             else:
                 exprs = exprs
@@ -761,7 +761,7 @@ def exp_by_groups(
                     if use_smoothed
                     else adata[:, valid_genes].layers["X_unspliced"] + adata[:, valid_genes].layers["X_spliced"]
                 )
-                tot = tot.A if issparse(tot) else tot
+                tot = tot.toarray() if issparse(tot) else tot
                 exprs = exprs / tot
             else:
                 exprs = exprs

diff --git a/dynamo/plot/scatters.py b/dynamo/plot/scatters.py
@@ -2847,8 +2847,8 @@ def _map_cur_axis_to_title(
         else:
             x_points_df_data, x_points_column = _map_cur_axis_to_title(axis_x, _adata, cur_b, cur_l_smoothed)
             y_points_df_data, y_points_column = _map_cur_axis_to_title(axis_y, _adata, cur_b, cur_l_smoothed)
-            x_points_df_data = x_points_df_data.A.flatten() if issparse(x_points_df_data) else x_points_df_data
-            y_points_df_data = y_points_df_data.A.flatten() if issparse(y_points_df_data) else y_points_df_data
+            x_points_df_data = x_points_df_data.toarray().flatten() if issparse(x_points_df_data) else x_points_df_data
+            y_points_df_data = y_points_df_data.toarray().flatten() if issparse(y_points_df_data) else y_points_df_data
             points = pd.DataFrame(
                 {
                     axis_x: x_points_df_data,

diff --git a/dynamo/plot/time_series.py b/dynamo/plot/time_series.py
@@ -117,7 +117,7 @@ def kinetic_curves(
         color = list(set(color).intersection(adata.obs.keys()))
         Color = adata.obs[color].values.T.flatten() if len(color) > 0 else np.empty((0, 1))
 
-    exprs = exprs.A if issparse(exprs) else exprs
+    exprs = exprs.toarray() if issparse(exprs) else exprs
     if len(set(genes).intersection(valid_genes)) > 0:
         # by default, expression values are log1p tranformed if using the expression from adata.
         exprs = np.expm1(exprs) if not log else exprs
@@ -310,7 +310,7 @@ def kinetic_heatmap(
 
         valid_genes = [x for x in genes if x in valid_genes]
 
-        exprs = exprs.A if issparse(exprs) else exprs
+        exprs = exprs.toarray() if issparse(exprs) else exprs
         if mode != "pseudotime":
             exprs = np.log1p(exprs) if log else exprs