From 11a0f8caa583e7bffb58cdab5d73917472f8edd3 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 8 Jun 2024 20:00:29 -0400 Subject: [PATCH 01/29] Added option to use the inner product as a dissimilarity measure. --- src/rsatoolbox/rdm/calc.py | 57 +++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/src/rsatoolbox/rdm/calc.py b/src/rsatoolbox/rdm/calc.py index 227c1635..76240908 100644 --- a/src/rsatoolbox/rdm/calc.py +++ b/src/rsatoolbox/rdm/calc.py @@ -94,6 +94,8 @@ def calc_rdm( elif method == 'crossnobis': rdm = calc_rdm_crossnobis(dataset, descriptor, noise, cv_descriptor, remove_mean) + elif method == 'dotproduct': + rdm = calc_rdm_dotproduct(dataset, descriptor) elif method == 'poisson': rdm = calc_rdm_poisson(dataset, descriptor, prior_lambda=prior_lambda, @@ -209,9 +211,9 @@ def calc_rdm_euclidean( rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM """ measurements, desc = _parse_input(dataset, descriptor, remove_mean) - sum_sq_measurements = np.sum(measurements**2, axis=1, keepdims=True) + sum_sq_measurements = np.sum(measurements ** 2, axis=1, keepdims=True) rdm = sum_sq_measurements + sum_sq_measurements.T \ - - 2 * np.dot(measurements, measurements.T) + - 2 * np.dot(measurements, measurements.T) rdm = _extract_triu_(rdm) / measurements.shape[1] return _build_rdms(rdm, dataset, 'squared euclidean', descriptor, desc) @@ -269,7 +271,7 @@ def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None, remove_mean: bool noise = _check_noise(noise, dataset.n_channel) kernel = measurements @ noise @ measurements.T rdm = np.expand_dims(np.diag(kernel), 0) + \ - np.expand_dims(np.diag(kernel), 1) - 2 * kernel + np.expand_dims(np.diag(kernel), 1) - 2 * kernel rdm = _extract_triu_(rdm) / measurements.shape[1] return _build_rdms( rdm, @@ -281,6 +283,29 @@ def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None, remove_mean: bool ) +def calc_rdm_dotproduct( + dataset: DatasetBase, + descriptor: Optional[str] = None, + remove_mean: bool = False): + """ + Args: + dataset (rsatoolbox.data.DatasetBase): + The dataset the RDM is computed from + descriptor (String): + obs_descriptor used to define the rows/columns of the RDM + defaults to one row/column per row in the dataset + remove_mean (bool): + whether the mean of each pattern shall be removed + before calculating dotproducts. + Returns: + rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM + """ + measurements, desc = _parse_input(dataset, descriptor, remove_mean) + rdm = measurements @ measurements.T + rdm = _extract_triu_(rdm) + return _build_rdms(rdm, dataset, 'dotproduct', descriptor, desc) + + def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None, remove_mean: bool = False): """ @@ -371,7 +396,7 @@ def calc_rdm_crossnobis(dataset, descriptor, noise=None, measurements[i_fold], measurements[j_fold], np.linalg.inv( (variances[i_fold] + variances[j_fold]) / 2) - ) + ) rdms.append(rdm) rdms = np.array(rdms) rdm = np.einsum('ij->j', rdms) / rdms.shape[0] @@ -406,10 +431,10 @@ def calc_rdm_poisson(dataset, descriptor=None, prior_lambda=1, """ measurements, desc = _parse_input(dataset, descriptor) measurements = (measurements + prior_lambda * prior_weight) \ - / (1 + prior_weight) + / (1 + prior_weight) kernel = measurements @ np.log(measurements).T rdm = np.expand_dims(np.diag(kernel), 0) + \ - np.expand_dims(np.diag(kernel), 1) - kernel - kernel.T + np.expand_dims(np.diag(kernel), 1) - kernel - kernel.T rdm = _extract_triu_(rdm) / measurements.shape[1] return _build_rdms(rdm, dataset, 'poisson', descriptor, desc) @@ -455,13 +480,13 @@ def calc_rdm_poisson_cv(dataset, descriptor=None, prior_lambda=1, measurements_test, _, _ = average_dataset_by(data_test, descriptor) measurements_train = (measurements_train + prior_lambda * prior_weight) \ - / (1 + prior_weight) + / (1 + prior_weight) measurements_test = (measurements_test + prior_lambda * prior_weight) \ - / (1 + prior_weight) + / (1 + prior_weight) kernel = measurements_train @ np.log(measurements_test).T rdm = np.expand_dims(np.diag(kernel), 0) + \ - np.expand_dims(np.diag(kernel), 1) - kernel - kernel.T + np.expand_dims(np.diag(kernel), 1) - kernel - kernel.T rdm = _extract_triu_(rdm) / measurements_train.shape[1] return _build_rdms(rdm, dataset, 'poisson_cv', descriptor) @@ -469,7 +494,7 @@ def calc_rdm_poisson_cv(dataset, descriptor=None, prior_lambda=1, def _calc_rdm_crossnobis_single(meas1, meas2, noise) -> NDArray: kernel = meas1 @ noise @ meas2.T rdm = np.expand_dims(np.diag(kernel), 0) + \ - np.expand_dims(np.diag(kernel), 1) - kernel - kernel.T + np.expand_dims(np.diag(kernel), 1) - kernel - kernel.T return _extract_triu_(rdm) / meas1.shape[1] @@ -481,8 +506,8 @@ def _gen_default_cv_descriptor(dataset, descriptor) -> np.ndarray: desc = dataset.obs_descriptors[descriptor] values, counts = np.unique(desc, return_counts=True) assert np.all(counts == counts[0]), ( - 'cv_descriptor generation failed:\n' - + 'different number of observations per pattern') + 'cv_descriptor generation failed:\n' + + 'different number of observations per pattern') n_repeats = counts[0] cv_descriptor = np.zeros_like(desc) for i_val in values: @@ -491,10 +516,10 @@ def _gen_default_cv_descriptor(dataset, descriptor) -> np.ndarray: def _parse_input( - dataset: DatasetBase, - descriptor: Optional[str], - remove_mean: bool = False - ) -> Tuple[np.ndarray, Optional[np.ndarray]]: + dataset: DatasetBase, + descriptor: Optional[str], + remove_mean: bool = False +) -> Tuple[np.ndarray, Optional[np.ndarray]]: if descriptor is None: measurements = dataset.measurements desc = None From 9e8f18e0499b77367e0e0811ab754a43e929fbf5 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 13 Jun 2024 12:23:03 -0400 Subject: [PATCH 02/29] Added option to use the inner product as a dissimilarity measure. --- src/rsatoolbox/rdm/calc.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/rsatoolbox/rdm/calc.py b/src/rsatoolbox/rdm/calc.py index 76240908..d408083a 100644 --- a/src/rsatoolbox/rdm/calc.py +++ b/src/rsatoolbox/rdm/calc.py @@ -96,6 +96,8 @@ def calc_rdm( cv_descriptor, remove_mean) elif method == 'dotproduct': rdm = calc_rdm_dotproduct(dataset, descriptor) + elif method == 'mean_profile': + rdm = calc_mean_profile(dataset, descriptor) elif method == 'poisson': rdm = calc_rdm_poisson(dataset, descriptor, prior_lambda=prior_lambda, @@ -306,6 +308,27 @@ def calc_rdm_dotproduct( return _build_rdms(rdm, dataset, 'dotproduct', descriptor, desc) +def calc_mean_profile( + dataset: DatasetBase, + descriptor: Optional[str] = None): + """ + Args: + dataset (rsatoolbox.data.DatasetBase): + The dataset the RDM is computed from + descriptor (String): + obs_descriptor used to define the rows/columns of the RDM + defaults to one row/column per row in the dataset + remove_mean (bool): + whether the mean of each pattern shall be removed + before calculating dotproducts. + Returns: + rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM + """ + measurements, desc = _parse_input(dataset, descriptor, remove_mean=False) + vals = measurements.mean(axis=1) + return _build_rdms(vals, dataset, 'mean_activation_profile', descriptor, desc) + + def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None, remove_mean: bool = False): """ From 58fd5a99bea38098d980bcf0fd81f0acacf50318 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 13 Jun 2024 12:34:28 -0400 Subject: [PATCH 03/29] Added option to use the inner product as a dissimilarity measure. --- src/rsatoolbox/rdm/calc.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/rsatoolbox/rdm/calc.py b/src/rsatoolbox/rdm/calc.py index d408083a..76240908 100644 --- a/src/rsatoolbox/rdm/calc.py +++ b/src/rsatoolbox/rdm/calc.py @@ -96,8 +96,6 @@ def calc_rdm( cv_descriptor, remove_mean) elif method == 'dotproduct': rdm = calc_rdm_dotproduct(dataset, descriptor) - elif method == 'mean_profile': - rdm = calc_mean_profile(dataset, descriptor) elif method == 'poisson': rdm = calc_rdm_poisson(dataset, descriptor, prior_lambda=prior_lambda, @@ -308,27 +306,6 @@ def calc_rdm_dotproduct( return _build_rdms(rdm, dataset, 'dotproduct', descriptor, desc) -def calc_mean_profile( - dataset: DatasetBase, - descriptor: Optional[str] = None): - """ - Args: - dataset (rsatoolbox.data.DatasetBase): - The dataset the RDM is computed from - descriptor (String): - obs_descriptor used to define the rows/columns of the RDM - defaults to one row/column per row in the dataset - remove_mean (bool): - whether the mean of each pattern shall be removed - before calculating dotproducts. - Returns: - rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM - """ - measurements, desc = _parse_input(dataset, descriptor, remove_mean=False) - vals = measurements.mean(axis=1) - return _build_rdms(vals, dataset, 'mean_activation_profile', descriptor, desc) - - def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None, remove_mean: bool = False): """ From b458b059cbabb83b3c74ddab33fbbd6a7e3afc63 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 13 Jun 2024 13:12:24 -0400 Subject: [PATCH 04/29] Added option to use the inner product as a dissimilarity measure. --- src/rsatoolbox/rdm/calc.py | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/rsatoolbox/rdm/calc.py b/src/rsatoolbox/rdm/calc.py index 76240908..c1a9d2d3 100644 --- a/src/rsatoolbox/rdm/calc.py +++ b/src/rsatoolbox/rdm/calc.py @@ -96,6 +96,10 @@ def calc_rdm( cv_descriptor, remove_mean) elif method == 'dotproduct': rdm = calc_rdm_dotproduct(dataset, descriptor) + elif method == 'mean_profile': + rdm = calc_rdm_mean_profile(dataset, descriptor) + elif method == 'norm_profile': + rdm = calc_rdm_norm_profile(dataset, descriptor) elif method == 'poisson': rdm = calc_rdm_poisson(dataset, descriptor, prior_lambda=prior_lambda, @@ -306,6 +310,48 @@ def calc_rdm_dotproduct( return _build_rdms(rdm, dataset, 'dotproduct', descriptor, desc) +def calc_rdm_mean_profile( + dataset: DatasetBase, + descriptor: Optional[str] = None): + """ + Args: + dataset (rsatoolbox.data.DatasetBase): + The dataset the RDM is computed from + descriptor (String): + obs_descriptor used to define the rows/columns of the RDM + defaults to one row/column per row in the dataset + remove_mean (bool): + whether the mean of each pattern shall be removed + before calculating dotproducts. + Returns: + rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM + """ + measurements, desc = _parse_input(dataset, descriptor, remove_mean=False) + measurements = measurements.mean(axis=1) + return measurements + + +def calc_rdm_norm_profile( + dataset: DatasetBase, + descriptor: Optional[str] = None): + """ + Args: + dataset (rsatoolbox.data.DatasetBase): + The dataset the RDM is computed from + descriptor (String): + obs_descriptor used to define the rows/columns of the RDM + defaults to one row/column per row in the dataset + remove_mean (bool): + whether the mean of each pattern shall be removed + before calculating dotproducts. + Returns: + rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM + """ + measurements, desc = _parse_input(dataset, descriptor, remove_mean=False) + measurements = np.linalg.norm(measurements, axis=1) + return measurements + + def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None, remove_mean: bool = False): """ From 6f2c473d031a981503b9eb25b442abcd054f2173 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 13 Jun 2024 13:32:50 -0400 Subject: [PATCH 05/29] Added option to use the inner product as a dissimilarity measure. --- src/rsatoolbox/rdm/calc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rsatoolbox/rdm/calc.py b/src/rsatoolbox/rdm/calc.py index c1a9d2d3..00adffd9 100644 --- a/src/rsatoolbox/rdm/calc.py +++ b/src/rsatoolbox/rdm/calc.py @@ -111,8 +111,12 @@ def calc_rdm( prior_weight=prior_weight) else: raise NotImplementedError - if descriptor is not None: + if (descriptor is not None) and (method not in ['mean_profile', 'norm_profile']): rdm.sort_by(**{descriptor: 'alpha'}) + else: + desc = np.array(rdm.pattern_descriptors[descriptor]) + inds = desc.argsort() + rdm = rdm[inds] return rdm From b282f9648c61f9219fd7629785e68df2b7d3ea15 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 13 Jun 2024 13:36:34 -0400 Subject: [PATCH 06/29] Added option to use the inner product as a dissimilarity measure. --- src/rsatoolbox/rdm/calc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rsatoolbox/rdm/calc.py b/src/rsatoolbox/rdm/calc.py index 00adffd9..1e607c48 100644 --- a/src/rsatoolbox/rdm/calc.py +++ b/src/rsatoolbox/rdm/calc.py @@ -114,7 +114,7 @@ def calc_rdm( if (descriptor is not None) and (method not in ['mean_profile', 'norm_profile']): rdm.sort_by(**{descriptor: 'alpha'}) else: - desc = np.array(rdm.pattern_descriptors[descriptor]) + desc = np.unique(np.array(dataset.obs_descriptors[descriptor])) inds = desc.argsort() rdm = rdm[inds] return rdm From 0910d896dfacf19e18bf754f25d748a1845606b9 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Tue, 16 Jul 2024 18:49:04 -0400 Subject: [PATCH 07/29] Experimenting with using all-ones and all-zeros with the whitened cosine comparator, after correcting the distance covariance matrix. --- src/rsatoolbox/rdm/compare.py | 74 ++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 9a3e1d55..432748e8 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -3,12 +3,14 @@ """ Comparison methods for comparing two RDMs objects """ +import itertools as it import numpy as np import scipy.stats from scipy import linalg from scipy.optimize import minimize from scipy.stats._stats import _kendall_dis from scipy.spatial.distance import squareform +from statistics import mode from rsatoolbox.util.matrix import pairwise_contrast_sparse from rsatoolbox.util.matrix import pairwise_contrast from rsatoolbox.util.rdm_utils import _get_n_from_reduced_vectors @@ -132,7 +134,9 @@ def compare_cosine_cov_weighted(rdm1, rdm2, sigma_k=None): """ vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) - sim = _cosine_cov_weighted(vector1, vector2, sigma_k, nan_idx) + stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) + frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) + sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim @@ -259,16 +263,16 @@ def compare_neg_riemannian_distance(rdm1, rdm2, sigma_k=None): n_cond = _get_n_from_length(vector1.shape[1]) if sigma_k is None: sigma_k = np.eye(n_cond) - P = np.block([-1*np.ones((n_cond - 1, 1)), np.eye(n_cond - 1)]) - sigma_k_hat = P@sigma_k@P.T + P = np.block([-1 * np.ones((n_cond - 1, 1)), np.eye(n_cond - 1)]) + sigma_k_hat = P @ sigma_k @ P.T # construct RDM to 2nd-moment (G) transformation - pairs = pairwise_contrast(np.arange(n_cond-1)) + pairs = pairwise_contrast(np.arange(n_cond - 1)) pairs[pairs == -1] = 1 T = np.block([ - [np.eye(n_cond - 1), np.zeros((n_cond-1, vector1.shape[1] - n_cond + 1))], + [np.eye(n_cond - 1), np.zeros((n_cond - 1, vector1.shape[1] - n_cond + 1))], [0.5 * pairs, np.diag(-0.5 * np.ones(vector1.shape[1] - n_cond + 1))]]) - vec_G1 = vector1@np.transpose(T) - vec_G2 = vector2@np.transpose(T) + vec_G1 = vector1 @ np.transpose(T) + vec_G2 = vector2 @ np.transpose(T) sim = _all_combinations(vec_G1, vec_G2, _riemannian_distance, sigma_k_hat) return sim @@ -301,7 +305,7 @@ def _all_combinations(vectors1, vectors2, func, *args, **kwargs): return value -def _cosine_cov_weighted_slow(vector1, vector2, sigma_k=None, nan_idx=None): +def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, nan_idx=None): """computes the cosine similarities between two sets of vectors after whitening by their covariance. @@ -327,6 +331,9 @@ def _cosine_cov_weighted_slow(vector1, vector2, sigma_k=None, nan_idx=None): else: n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) + # Now adjust v to account for any frozen patterns. + v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] for i in range(vector1.shape[0])]) @@ -343,7 +350,37 @@ def _cosine_cov_weighted_slow(vector1, vector2, sigma_k=None, nan_idx=None): return cos -def _cosine_cov_weighted(vector1, vector2, sigma_k=None, nan_idx=None): +def _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds): + '''Rules: + 1. If a shared pattern is frozen, set covariance to zero + 2. If one pattern in a pair is frozen, set variance to half. + 3. If both patterns frozen, set variance to zero. + ''' + frozen_inds = set(frozen_inds) + if len(frozen_inds) == 0: + return v + for (i, (x1, y1)), (j, (x2, y2)) in it.product(enumerate(it.combinations(range(n_cond), 2)), + enumerate(it.combinations(range(n_cond), 2))): + if len(np.unique([x1, y1, x2, y2])) == 4: # if no shared patterns, skip + continue + + if len(np.unique([x1, y1, x2, y2])) == 2: # if the same pair: + num_frozen = np.sum([num in frozen_inds for num in [x1, y1]]) + if num_frozen == 2: + v[i, j] = 0 + elif num_frozen == 1: + v[i, j] /= 2 + + if len(np.unique([x1, y1, x2, y2])) == 3: # if one shared pattern + # Check if the shared pattern is frozen, and if so, set the entry to zero. + rep_ind = mode([x1, y1, x2, y2]) + if rep_ind in frozen_inds: + v[i, j] = 0 + + return v + + +def _cosine_cov_weighted(vector1, vector2, frozen_inds=[], sigma_k=None, nan_idx=None): """computes the cosine angles between two sets of vectors weighted by the covariance If no covariance is given this is computed using the linear CKA, @@ -365,7 +402,7 @@ def _cosine_cov_weighted(vector1, vector2, sigma_k=None, nan_idx=None): """ if (sigma_k is not None) and (sigma_k.ndim >= 2): cos = _cosine_cov_weighted_slow( - vector1, vector2, sigma_k=sigma_k, nan_idx=nan_idx) + vector1, vector2, frozen_inds=frozen_inds, sigma_k=sigma_k, nan_idx=nan_idx) else: if nan_idx is None: nan_idx = np.ones(vector1[0].shape, bool) @@ -428,8 +465,8 @@ def _cov_weighting(vector, nan_idx, sigma_k=None): diag = np.concatenate((np.ones((n_dist, 1)) / 2, np.ones((n_cond, 1)))) # one line version much faster here! vector_w = vector_w - ( - vector_w - @ sumI @ np.linalg.inv(sumI.T @ (diag * sumI)) @ (diag * sumI).T) + vector_w + @ sumI @ np.linalg.inv(sumI.T @ (diag * sumI)) @ (diag * sumI).T) if sigma_k is not None: if sigma_k.ndim == 1: sigma_k_sqrt = np.sqrt(sigma_k) @@ -492,12 +529,13 @@ def _riemannian_distance(vec_G1, vec_G2, sigma_k): negative riemannian distance """ n_cond = _get_n_from_length(len(vec_G1)) - G1 = np.diag(vec_G1[0:(n_cond-1)])+squareform(vec_G1[(n_cond-1):len(vec_G1)]) - G2 = np.diag(vec_G2[0:(n_cond-1)])+squareform(vec_G2[(n_cond-1):len(vec_G2)]) + G1 = np.diag(vec_G1[0:(n_cond - 1)]) + squareform(vec_G1[(n_cond - 1):len(vec_G1)]) + G2 = np.diag(vec_G2[0:(n_cond - 1)]) + squareform(vec_G2[(n_cond - 1):len(vec_G2)]) def fun(theta): return np.sqrt((np.log(linalg.eigvalsh( - np.exp(theta[0]) * G1 + np.exp(theta[1]) * sigma_k, G2))**2).sum()) + np.exp(theta[0]) * G1 + np.exp(theta[1]) * sigma_k, G2)) ** 2).sum()) + theta = minimize(fun, (0, 0), method='Nelder-Mead') neg_riem = -1 * theta.fun return neg_riem @@ -542,8 +580,8 @@ def _tau_a(vector1, vector2): (vector2[1:] != vector2[:-1]), True] cnt = np.diff(np.nonzero(obs)[0]).astype('int64', copy=False) ntie = (cnt * (cnt - 1) // 2).sum() # joint ties - xtie, _, _ = _count_rank_tie(vector1) # ties in x, stats - ytie, _, _ = _count_rank_tie(vector2) # ties in y, stats + xtie, _, _ = _count_rank_tie(vector1) # ties in x, stats + ytie, _, _ = _count_rank_tie(vector2) # ties in y, stats tot = (size * (size - 1)) // 2 # Note that tot = con + dis + (xtie - ntie) + (ytie - ntie) + ntie # = con + dis + xtie + ytie - ntie @@ -569,7 +607,7 @@ def _count_rank_tie(ranks): cnt = cnt[cnt > 1] return ((cnt * (cnt - 1) // 2).sum(), (cnt * (cnt - 1.) * (cnt - 2)).sum(), - (cnt * (cnt - 1.) * (2*cnt + 5)).sum()) + (cnt * (cnt - 1.) * (2 * cnt + 5)).sum()) def _get_v(n_cond, sigma_k): From d524aab9aba7a3d7ee1fa0aa60dae0ee0215ad69 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Tue, 16 Jul 2024 20:33:23 -0400 Subject: [PATCH 08/29] Now removing the all-zero rows from the covariance matrix (corresponding to distances between fixed patterns) --- src/rsatoolbox/rdm/compare.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 432748e8..718aa094 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -333,6 +333,11 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # Omit any all-zero rows and columns. + nonzero_rows = np.where(~np.all(v == 0, axis=1))[0] + v = v[nonzero_rows][:, nonzero_rows] + vector1 = vector1[:, nonzero_rows] + vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] From 8e613e731bb4d7d844099cfff9dc618181d6fac0 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Tue, 16 Jul 2024 20:40:36 -0400 Subject: [PATCH 09/29] Now removing the all-zero rows from the covariance matrix (corresponding to distances between fixed patterns) --- src/rsatoolbox/rdm/compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 718aa094..9e6b2239 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -334,7 +334,7 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na # Now adjust v to account for any frozen patterns. v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns. - nonzero_rows = np.where(~np.all(v == 0, axis=1))[0] + nonzero_rows = np.where(~np.all(v.toarray() == 0, axis=1))[0] v = v[nonzero_rows][:, nonzero_rows] vector1 = vector1[:, nonzero_rows] vector2 = vector2[:, nonzero_rows] From 0af82606b6f6ae6d7fa4d132cf1924b9495c7eff Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Tue, 30 Jul 2024 17:38:06 -0400 Subject: [PATCH 10/29] Commit before changing branch --- src/rsatoolbox/rdm/compare.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 9e6b2239..104e6d99 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -136,6 +136,8 @@ def compare_cosine_cov_weighted(rdm1, rdm2, sigma_k=None): vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) + sigma_k[frozen_inds, :] = 0 + sigma_k[:, frozen_inds] = 0 sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim @@ -156,10 +158,14 @@ def compare_correlation_cov_weighted(rdm1, rdm2, sigma_k=None): """ vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) + stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) + frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) + sigma_k[frozen_inds, :] = 0 + sigma_k[:, frozen_inds] = 0 # compute by subtracting the mean and then calculating cosine similarity vector1 = vector1 - np.mean(vector1, 1, keepdims=True) vector2 = vector2 - np.mean(vector2, 1, keepdims=True) - sim = _cosine_cov_weighted(vector1, vector2, sigma_k, nan_idx) + sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim From 044505161065e31773904bfd2ea88d912ba76185 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Tue, 30 Jul 2024 19:01:48 -0400 Subject: [PATCH 11/29] Commit before changing branch --- src/rsatoolbox/rdm/compare.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 104e6d99..3200e428 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -136,8 +136,8 @@ def compare_cosine_cov_weighted(rdm1, rdm2, sigma_k=None): vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) - sigma_k[frozen_inds, :] = 0 - sigma_k[:, frozen_inds] = 0 + # sigma_k[frozen_inds, :] = 0 + # sigma_k[:, frozen_inds] = 0 sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim @@ -160,8 +160,8 @@ def compare_correlation_cov_weighted(rdm1, rdm2, sigma_k=None): vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) - sigma_k[frozen_inds, :] = 0 - sigma_k[:, frozen_inds] = 0 + # sigma_k[frozen_inds, :] = 0 + # sigma_k[:, frozen_inds] = 0 # compute by subtracting the mean and then calculating cosine similarity vector1 = vector1 - np.mean(vector1, 1, keepdims=True) vector2 = vector2 - np.mean(vector2, 1, keepdims=True) From 2eedeebec01fd82ec02e6b5c43d01cc394506f26 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 1 Aug 2024 16:28:18 -0400 Subject: [PATCH 12/29] Trying just the diagonals --- src/rsatoolbox/rdm/compare.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 3200e428..7ffb5567 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -338,12 +338,12 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns. - nonzero_rows = np.where(~np.all(v.toarray() == 0, axis=1))[0] - v = v[nonzero_rows][:, nonzero_rows] - vector1 = vector1[:, nonzero_rows] - vector2 = vector2[:, nonzero_rows] + # nonzero_rows = np.where(~np.all(v.toarray() == 0, axis=1))[0] + # v = v[nonzero_rows][:, nonzero_rows] + # vector1 = vector1[:, nonzero_rows] + # vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] From aba8898d3ffe7d4a644c76ab22daf2f32fd12bbc Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 3 Aug 2024 11:05:34 -0400 Subject: [PATCH 13/29] Now trying the framed RSA again, using diagonal covariance matrix. --- src/rsatoolbox/rdm/compare.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 7ffb5567..08fe2dc1 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -338,12 +338,12 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) - # Omit any all-zero rows and columns. - # nonzero_rows = np.where(~np.all(v.toarray() == 0, axis=1))[0] - # v = v[nonzero_rows][:, nonzero_rows] - # vector1 = vector1[:, nonzero_rows] - # vector2 = vector2[:, nonzero_rows] + v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # Omit any all-zero rows and columns, keeping as a sparse matrix. + nonzero_rows = np.where(v.sum(axis=1) != 0)[0] + v = v[nonzero_rows][:, nonzero_rows] + vector1 = vector1[:, nonzero_rows] + vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] From 5736b8a43e7dec45c7d8a9e63abcd63d543d103b Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 3 Aug 2024 11:22:06 -0400 Subject: [PATCH 14/29] Now trying the framed RSA again, using diagonal covariance matrix. --- src/rsatoolbox/rdm/compare.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 08fe2dc1..cee1b7eb 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -338,12 +338,12 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. - nonzero_rows = np.where(v.sum(axis=1) != 0)[0] - v = v[nonzero_rows][:, nonzero_rows] - vector1 = vector1[:, nonzero_rows] - vector2 = vector2[:, nonzero_rows] + # nonzero_rows = np.where(v.sum(axis=1) != 0)[0] + # v = v[nonzero_rows][:, nonzero_rows] + # vector1 = vector1[:, nonzero_rows] + # vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] From b2c341acf91e489914d7d49808b3b28bfd39dceb Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 3 Aug 2024 11:52:39 -0400 Subject: [PATCH 15/29] Now trying the framed RSA again, using diagonal covariance matrix. --- src/rsatoolbox/rdm/compare.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index cee1b7eb..6f938599 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -5,6 +5,7 @@ """ import itertools as it import numpy as np +import os import scipy.stats from scipy import linalg from scipy.optimize import minimize @@ -338,12 +339,12 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. - # nonzero_rows = np.where(v.sum(axis=1) != 0)[0] - # v = v[nonzero_rows][:, nonzero_rows] - # vector1 = vector1[:, nonzero_rows] - # vector2 = vector2[:, nonzero_rows] + nonzero_rows = np.where(v.sum(axis=1) != 0)[0] + v = v[nonzero_rows][:, nonzero_rows] + vector1 = vector1[:, nonzero_rows] + vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] @@ -370,6 +371,13 @@ def _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds): frozen_inds = set(frozen_inds) if len(frozen_inds) == 0: return v + + # Fetch cached value if it exists. + fname = "_".join(sorted(frozen_inds)) + f"_{n_cond}conds" + "_cov_matrix.npz" + if os.path.exists(fname): + v = scipy.sparse.load_npz(fname) + return v + for (i, (x1, y1)), (j, (x2, y2)) in it.product(enumerate(it.combinations(range(n_cond), 2)), enumerate(it.combinations(range(n_cond), 2))): if len(np.unique([x1, y1, x2, y2])) == 4: # if no shared patterns, skip @@ -387,7 +395,7 @@ def _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds): rep_ind = mode([x1, y1, x2, y2]) if rep_ind in frozen_inds: v[i, j] = 0 - + scipy.sparse.save_npz(fname, v) return v From 00c5539ff4c269449386dd553d414958bf813b9c Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 3 Aug 2024 11:58:37 -0400 Subject: [PATCH 16/29] Now trying the framed RSA again, using diagonal covariance matrix. --- src/rsatoolbox/rdm/compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 6f938599..220d6d7a 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -373,7 +373,7 @@ def _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds): return v # Fetch cached value if it exists. - fname = "_".join(sorted(frozen_inds)) + f"_{n_cond}conds" + "_cov_matrix.npz" + fname = "_".join(str(num) for num in sorted(frozen_inds)) + f"_{n_cond}conds" + "_cov_matrix.npz" if os.path.exists(fname): v = scipy.sparse.load_npz(fname) return v From 34a32a085fcabf338630ec19c6ab61c78bd6069b Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 3 Aug 2024 12:14:17 -0400 Subject: [PATCH 17/29] Now trying the framed RSA again, using diagonal covariance matrix. --- src/rsatoolbox/rdm/compare.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 220d6d7a..62b1c7cb 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -6,6 +6,7 @@ import itertools as it import numpy as np import os +import pickle import scipy.stats from scipy import linalg from scipy.optimize import minimize @@ -373,9 +374,10 @@ def _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds): return v # Fetch cached value if it exists. - fname = "_".join(str(num) for num in sorted(frozen_inds)) + f"_{n_cond}conds" + "_cov_matrix.npz" + fname = "_".join(str(num) for num in sorted(frozen_inds)) + f"_{n_cond}conds" + "_cov_matrix.pkl" if os.path.exists(fname): - v = scipy.sparse.load_npz(fname) + with open(fname, 'rb') as f: + v = pickle.load(f) return v for (i, (x1, y1)), (j, (x2, y2)) in it.product(enumerate(it.combinations(range(n_cond), 2)), @@ -395,7 +397,8 @@ def _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds): rep_ind = mode([x1, y1, x2, y2]) if rep_ind in frozen_inds: v[i, j] = 0 - scipy.sparse.save_npz(fname, v) + with open(fname, 'wb') as f: + pickle.dump(v, f, pickle.HIGHEST_PROTOCOL) return v From d4f0d7f83889d662120db5ce81ebde2a33c19ec7 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 10 Aug 2024 20:13:53 -0400 Subject: [PATCH 18/29] Now trying the framed RSA again, using diagonal covariance matrix. --- src/rsatoolbox/rdm/compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 62b1c7cb..99327c60 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -138,8 +138,8 @@ def compare_cosine_cov_weighted(rdm1, rdm2, sigma_k=None): vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) - # sigma_k[frozen_inds, :] = 0 - # sigma_k[:, frozen_inds] = 0 + sigma_k[frozen_inds, :] = 0 + sigma_k[:, frozen_inds] = 0 sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim From 917c9a9d558be03dc720d815b179891caaa37927 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Mon, 12 Aug 2024 16:00:59 -0400 Subject: [PATCH 19/29] Trying another way of correcting the covariance. --- src/rsatoolbox/rdm/compare.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 99327c60..fb0d5b1d 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -334,13 +334,13 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na """ if nan_idx is not None: n_cond = _get_n_from_reduced_vectors(nan_idx.reshape(1, -1)) - v = _get_v(n_cond, sigma_k) + v = _get_v(n_cond, sigma_k, frozen_inds) v = v[nan_idx][:, nan_idx] else: n_cond = _get_n_from_reduced_vectors(vector1) - v = _get_v(n_cond, sigma_k) + v = _get_v(n_cond, sigma_k, frozen_inds) # Now adjust v to account for any frozen patterns. - v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. nonzero_rows = np.where(v.sum(axis=1) != 0)[0] v = v[nonzero_rows][:, nonzero_rows] @@ -632,10 +632,11 @@ def _count_rank_tie(ranks): (cnt * (cnt - 1.) * (2 * cnt + 5)).sum()) -def _get_v(n_cond, sigma_k): +def _get_v(n_cond, sigma_k, frozen_inds=[]): """ get the rdm covariance from sigma_k """ # calculate Xi c_mat = pairwise_contrast_sparse(np.arange(n_cond)) + c_mat[:, frozen_inds] = 0 # Testing whether this is the right way to treat the frozen indices. if sigma_k is None: xi = c_mat @ c_mat.transpose() elif sigma_k.ndim == 1: From 22633145c7b0ce1bd970da214e5b68a0114ad65a Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Mon, 12 Aug 2024 16:07:38 -0400 Subject: [PATCH 20/29] Trying another way of correcting the covariance. --- src/rsatoolbox/rdm/compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index fb0d5b1d..eb762463 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -138,8 +138,8 @@ def compare_cosine_cov_weighted(rdm1, rdm2, sigma_k=None): vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) - sigma_k[frozen_inds, :] = 0 - sigma_k[:, frozen_inds] = 0 + # sigma_k[frozen_inds, :] = 0 + # sigma_k[:, frozen_inds] = 0 sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim From 93232b0e7945ba93dbeccbf956d1ce6eb95f5e73 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 15 Aug 2024 09:54:22 -0400 Subject: [PATCH 21/29] Trying another way of correcting the covariance. --- src/rsatoolbox/rdm/compare.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index eb762463..62b1c7cb 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -334,13 +334,13 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na """ if nan_idx is not None: n_cond = _get_n_from_reduced_vectors(nan_idx.reshape(1, -1)) - v = _get_v(n_cond, sigma_k, frozen_inds) + v = _get_v(n_cond, sigma_k) v = v[nan_idx][:, nan_idx] else: n_cond = _get_n_from_reduced_vectors(vector1) - v = _get_v(n_cond, sigma_k, frozen_inds) + v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. nonzero_rows = np.where(v.sum(axis=1) != 0)[0] v = v[nonzero_rows][:, nonzero_rows] @@ -632,11 +632,10 @@ def _count_rank_tie(ranks): (cnt * (cnt - 1.) * (2 * cnt + 5)).sum()) -def _get_v(n_cond, sigma_k, frozen_inds=[]): +def _get_v(n_cond, sigma_k): """ get the rdm covariance from sigma_k """ # calculate Xi c_mat = pairwise_contrast_sparse(np.arange(n_cond)) - c_mat[:, frozen_inds] = 0 # Testing whether this is the right way to treat the frozen indices. if sigma_k is None: xi = c_mat @ c_mat.transpose() elif sigma_k.ndim == 1: From ff56b3ce9cd7d90e02bee126840d0886d9c41314 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Wed, 21 Aug 2024 14:06:22 -0700 Subject: [PATCH 22/29] Trying all the framed RSA options with no covariance adjustment: does it actually improve identification performance? --- src/rsatoolbox/rdm/compare.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 62b1c7cb..a4d221b7 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -340,12 +340,12 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. - nonzero_rows = np.where(v.sum(axis=1) != 0)[0] - v = v[nonzero_rows][:, nonzero_rows] - vector1 = vector1[:, nonzero_rows] - vector2 = vector2[:, nonzero_rows] + # nonzero_rows = np.where(v.sum(axis=1) != 0)[0] + # v = v[nonzero_rows][:, nonzero_rows] + # vector1 = vector1[:, nonzero_rows] + # vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] From 6c1f1ebf049ce23de38264b7000cad985985e748 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sat, 31 Aug 2024 12:55:30 -0400 Subject: [PATCH 23/29] Comparing constant c values across a wide range --- src/rsatoolbox/rdm/compare.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index a4d221b7..62b1c7cb 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -340,12 +340,12 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. - # nonzero_rows = np.where(v.sum(axis=1) != 0)[0] - # v = v[nonzero_rows][:, nonzero_rows] - # vector1 = vector1[:, nonzero_rows] - # vector2 = vector2[:, nonzero_rows] + nonzero_rows = np.where(v.sum(axis=1) != 0)[0] + v = v[nonzero_rows][:, nonzero_rows] + vector1 = vector1[:, nonzero_rows] + vector2 = vector2[:, nonzero_rows] # compute V^-1 vector1/2 for all vectors by solving Vx = vector1/2 vector1_m = np.array([scipy.sparse.linalg.cg(v, vector1[i], atol=0)[0] From b2bd269b71396854223074a8c715f89dc7b6941a Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Wed, 4 Sep 2024 12:38:41 -0400 Subject: [PATCH 24/29] Testing whether fixing the dof for the noise covariance fixes our problem. --- src/rsatoolbox/data/noise.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/rsatoolbox/data/noise.py b/src/rsatoolbox/data/noise.py index ebc90eac..07541565 100755 --- a/src/rsatoolbox/data/noise.py +++ b/src/rsatoolbox/data/noise.py @@ -33,7 +33,8 @@ def _check_demean(matrix): dof = matrix.shape[0] - 1 elif matrix.ndim == 3: matrix -= np.mean(matrix, axis=2, keepdims=True) - dof = (matrix.shape[0] - 1) * matrix.shape[2] + # dof = (matrix.shape[0] - 1) * matrix.shape[2] JohnMark checking + dof = matrix.shape[0] * (matrix.shape[2] - 1) matrix = matrix.transpose(0, 2, 1).reshape( matrix.shape[0] * matrix.shape[2], matrix.shape[1]) else: @@ -147,13 +148,13 @@ def _covariance_eye(matrix, dof): b2 = min(d2, b2) # shrink covariance matrix s_shrink = b2 / d2 * m * np.eye(s.shape[0]) \ - + (d2-b2) / d2 * s + + (d2 - b2) / d2 * s # correction for degrees of freedom s_shrink = s_shrink * matrix.shape[0] / dof return s_shrink -def _covariance_diag(matrix, dof, mem_threshold=(10**9)/8): +def _covariance_diag(matrix, dof, mem_threshold=(10 ** 9) / 8): """ computes the sample covariance matrix from a 2d-array. matrix should be demeaned before! @@ -189,11 +190,11 @@ def _covariance_diag(matrix, dof, mem_threshold=(10**9)/8): s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / (matrix.shape[0] - 1) s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / (matrix.shape[0] - 1) var_hat = matrix.shape[0] / dof ** 2 \ - * (s2_mean - s_mean ** 2) + * (s2_mean - s_mean ** 2) mask = ~np.eye(s.shape[0], dtype=bool) lamb = np.sum(var_hat[mask]) / np.sum(s_mean[mask] ** 2) lamb = max(min(lamb, 1), 0) - scaling = np.eye(s.shape[0]) + (1-lamb) * mask + scaling = np.eye(s.shape[0]) + (1 - lamb) * mask s_shrink = s * scaling return s_shrink From 7e42934bd813092860cdd8098670447a31c7f0bb Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Wed, 4 Sep 2024 18:38:52 -0400 Subject: [PATCH 25/29] Testing whether fixing the dof for the noise covariance fixes our problem. --- src/rsatoolbox/data/noise.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/rsatoolbox/data/noise.py b/src/rsatoolbox/data/noise.py index 07541565..9423cbad 100755 --- a/src/rsatoolbox/data/noise.py +++ b/src/rsatoolbox/data/noise.py @@ -187,8 +187,10 @@ def _covariance_diag(matrix, dof, mem_threshold=(10 ** 9) / 8): s = s_sum / dof var = np.diag(s) std = np.sqrt(var) - s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / (matrix.shape[0] - 1) - s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / (matrix.shape[0] - 1) + # s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / (matrix.shape[0] - 1) JohnMark check + # s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / (matrix.shape[0] - 1) + s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / dof + s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / dof var_hat = matrix.shape[0] / dof ** 2 \ * (s2_mean - s_mean ** 2) mask = ~np.eye(s.shape[0], dtype=bool) @@ -196,6 +198,13 @@ def _covariance_diag(matrix, dof, mem_threshold=(10 ** 9) / 8): lamb = max(min(lamb, 1), 0) scaling = np.eye(s.shape[0]) + (1 - lamb) * mask s_shrink = s * scaling + + mean_eigenvalue = np.mean(np.linalg.eigvals(s_shrink)) + print(f"data shape: {matrix.shape}") + print(f"dof: {dof}") + print(f"lambda: {lamb}") + print(f"mean var: {np.mean(var)}") + print(f"mean eigenvalue: {mean_eigenvalue}") return s_shrink From fb8acbedcd158c797e67144d03d39db303dede1f Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 5 Sep 2024 09:07:04 -0400 Subject: [PATCH 26/29] Running more of the tuning c-norm tests for framed RSA. --- src/rsatoolbox/data/noise.py | 43 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/rsatoolbox/data/noise.py b/src/rsatoolbox/data/noise.py index 9423cbad..75c50460 100755 --- a/src/rsatoolbox/data/noise.py +++ b/src/rsatoolbox/data/noise.py @@ -42,7 +42,7 @@ def _check_demean(matrix): return matrix, dof -def _estimate_covariance(matrix, dof, method): +def _estimate_covariance(matrix, dof, method, lamb_opt=None): """ calls the right covariance estimation function based on the ""method" argument Args: @@ -67,7 +67,7 @@ def _estimate_covariance(matrix, dof, method): if method == 'shrinkage_eye': cov_mat = _covariance_eye(matrix, dof) elif method == 'shrinkage_diag': - cov_mat = _covariance_diag(matrix, dof) + cov_mat = _covariance_diag(matrix, dof, lamb_opt) elif method == 'diag': cov_mat = _variance(matrix, dof) elif method == 'full': @@ -154,7 +154,7 @@ def _covariance_eye(matrix, dof): return s_shrink -def _covariance_diag(matrix, dof, mem_threshold=(10 ** 9) / 8): +def _covariance_diag(matrix, dof, lamb_opt=None, mem_threshold=(10 ** 9) / 8): """ computes the sample covariance matrix from a 2d-array. matrix should be demeaned before! @@ -185,26 +185,31 @@ def _covariance_diag(matrix, dof, mem_threshold=(10 ** 9) / 8): s_sum += xt_x s2_sum += xt_x ** 2 s = s_sum / dof - var = np.diag(s) - std = np.sqrt(var) - # s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / (matrix.shape[0] - 1) JohnMark check - # s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / (matrix.shape[0] - 1) - s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / dof - s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / dof - var_hat = matrix.shape[0] / dof ** 2 \ - * (s2_mean - s_mean ** 2) mask = ~np.eye(s.shape[0], dtype=bool) - lamb = np.sum(var_hat[mask]) / np.sum(s_mean[mask] ** 2) - lamb = max(min(lamb, 1), 0) + if lamb_opt is None: + var = np.diag(s) + std = np.sqrt(var) + # s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / (matrix.shape[0] - 1) JohnMark check + # s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / (matrix.shape[0] - 1) + s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / dof + s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / dof + var_hat = matrix.shape[0] / dof ** 2 \ + * (s2_mean - s_mean ** 2) + lamb = np.sum(var_hat[mask]) / np.sum(s_mean[mask] ** 2) + lamb = max(min(lamb, 1), 0) + else: + lamb = lamb_opt scaling = np.eye(s.shape[0]) + (1 - lamb) * mask s_shrink = s * scaling - mean_eigenvalue = np.mean(np.linalg.eigvals(s_shrink)) + mean_shrunk_eigenvalue = np.mean(np.linalg.eigvals(s_shrink)) + mean_full_eigenvalue = np.mean(np.linalg.eigvals(s)) print(f"data shape: {matrix.shape}") print(f"dof: {dof}") print(f"lambda: {lamb}") print(f"mean var: {np.mean(var)}") - print(f"mean eigenvalue: {mean_eigenvalue}") + print(f"mean full eigenvalue: {mean_full_eigenvalue}") + print(f"mean shrunk eigenvalue: {mean_shrunk_eigenvalue}") return s_shrink @@ -282,7 +287,7 @@ def prec_from_residuals(residuals, dof=None, method='shrinkage_diag'): return prec -def cov_from_measurements(dataset, obs_desc, dof=None, method='shrinkage_diag'): +def cov_from_measurements(dataset, obs_desc, dof=None, method='shrinkage_diag', lamb_opt=None): """ Estimates a covariance matrix from measurements. Allows for shrinkage estimates. Use 'method' to choose which estimation method is used. @@ -321,11 +326,11 @@ def cov_from_measurements(dataset, obs_desc, dof=None, method='shrinkage_diag'): "obs_desc not contained in the dataset's obs_descriptors" tensor, _ = dataset.get_measurements_tensor(obs_desc) # calculate sample covariance matrix s - cov_mat = _estimate_covariance(tensor, dof, method) + cov_mat = _estimate_covariance(tensor, dof, method, lamb_opt) return cov_mat -def prec_from_measurements(dataset, obs_desc, dof=None, method='shrinkage_diag'): +def prec_from_measurements(dataset, obs_desc, dof=None, method='shrinkage_diag', lamb_opt=None): """ Estimates the covariance matrix from measurements and finds its multiplicative inverse (= the precision matrix) @@ -347,7 +352,7 @@ def prec_from_measurements(dataset, obs_desc, dof=None, method='shrinkage_diag') numpy.ndarray (or list): sigma_p: precision matrix over channels """ - cov = cov_from_measurements(dataset, obs_desc, dof=dof, method=method) + cov = cov_from_measurements(dataset, obs_desc, dof=dof, method=method, lamb_opt=lamb_opt) if not isinstance(cov, np.ndarray): prec = [None] * len(cov) for i, cov_i in enumerate(cov): From e56add1b0ecc26af2f0db3e0a44eb3c70dba9e22 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Thu, 5 Sep 2024 09:11:09 -0400 Subject: [PATCH 27/29] Running more of the tuning c-norm tests for framed RSA. --- src/rsatoolbox/data/noise.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rsatoolbox/data/noise.py b/src/rsatoolbox/data/noise.py index 75c50460..e032e46a 100755 --- a/src/rsatoolbox/data/noise.py +++ b/src/rsatoolbox/data/noise.py @@ -185,10 +185,10 @@ def _covariance_diag(matrix, dof, lamb_opt=None, mem_threshold=(10 ** 9) / 8): s_sum += xt_x s2_sum += xt_x ** 2 s = s_sum / dof + var = np.diag(s) + std = np.sqrt(var) mask = ~np.eye(s.shape[0], dtype=bool) if lamb_opt is None: - var = np.diag(s) - std = np.sqrt(var) # s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / (matrix.shape[0] - 1) JohnMark check # s2_mean = s2_sum / np.expand_dims(var, 0) / np.expand_dims(var, 1) / (matrix.shape[0] - 1) s_mean = s_sum / np.expand_dims(std, 0) / np.expand_dims(std, 1) / dof From 95c218a77932645f2e39a51189c4229087fe8577 Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Sun, 15 Sep 2024 17:01:08 -0400 Subject: [PATCH 28/29] Computing covariance using all available data for each subject. --- src/rsatoolbox/data/noise.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rsatoolbox/data/noise.py b/src/rsatoolbox/data/noise.py index e032e46a..bd961be8 100755 --- a/src/rsatoolbox/data/noise.py +++ b/src/rsatoolbox/data/noise.py @@ -32,11 +32,12 @@ def _check_demean(matrix): matrix = matrix - np.mean(matrix, axis=0, keepdims=True) dof = matrix.shape[0] - 1 elif matrix.ndim == 3: - matrix -= np.mean(matrix, axis=2, keepdims=True) + matrix -= np.nanmean(matrix, axis=2, keepdims=True) # dof = (matrix.shape[0] - 1) * matrix.shape[2] JohnMark checking dof = matrix.shape[0] * (matrix.shape[2] - 1) matrix = matrix.transpose(0, 2, 1).reshape( matrix.shape[0] * matrix.shape[2], matrix.shape[1]) + matrix = matrix[~np.isnan(matrix).any(axis=1)] else: raise ValueError('Matrix for covariance estimation has wrong # of dimensions!') return matrix, dof From 2b80b50916d746b8744593e317a878d712210adb Mon Sep 17 00:00:00 2001 From: JohnMark Taylor Date: Mon, 2 Dec 2024 21:56:20 -0500 Subject: [PATCH 29/29] Fixed the calculation of the RDM covariance matrix for the frozen patterns. --- src/rsatoolbox/rdm/compare.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rsatoolbox/rdm/compare.py b/src/rsatoolbox/rdm/compare.py index 62b1c7cb..7f74a120 100644 --- a/src/rsatoolbox/rdm/compare.py +++ b/src/rsatoolbox/rdm/compare.py @@ -138,8 +138,8 @@ def compare_cosine_cov_weighted(rdm1, rdm2, sigma_k=None): vector1, vector2, nan_idx = _parse_input_rdms(rdm1, rdm2) stim_nums = np.array(rdm1.pattern_descriptors['stimulus']) frozen_inds = list(np.where(stim_nums == -1)[0]) + list(np.where(stim_nums == -2)[0]) - # sigma_k[frozen_inds, :] = 0 - # sigma_k[:, frozen_inds] = 0 + sigma_k[frozen_inds, :] = 0 + sigma_k[:, frozen_inds] = 0 sim = _cosine_cov_weighted(vector1, vector2, frozen_inds, sigma_k, nan_idx) return sim @@ -340,7 +340,7 @@ def _cosine_cov_weighted_slow(vector1, vector2, frozen_inds=[], sigma_k=None, na n_cond = _get_n_from_reduced_vectors(vector1) v = _get_v(n_cond, sigma_k) # Now adjust v to account for any frozen patterns. - v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) + # v = _correct_covariance_for_frozen_patterns(v, n_cond, frozen_inds) # Omit any all-zero rows and columns, keeping as a sparse matrix. nonzero_rows = np.where(v.sum(axis=1) != 0)[0] v = v[nonzero_rows][:, nonzero_rows]