diff --git a/cityseer/algos/data.py b/cityseer/algos/data.py index 419a87d0..3e40c7fd 100644 --- a/cityseer/algos/data.py +++ b/cityseer/algos/data.py @@ -420,18 +420,15 @@ def aggregate_landuses(node_data: np.ndarray, checks.check_data_map(data_map, check_assigned=True) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) # check landuse encodings - compute_landuses = False if len(landuse_encodings) == 0: - if len(mixed_use_hill_keys) != 0 or len(mixed_use_other_keys) != 0 or len(accessibility_keys) != 0: - raise ValueError('Mixed use metrics or land-use accessibilities require an array of landuse labels.') + raise ValueError('Mixed use metrics or land-use accessibilities require an array of landuse labels.') elif len(landuse_encodings) != len(data_map): raise ValueError('The number of landuse encodings does not match the number of data points.') else: checks.check_categorical_data(landuse_encodings) # catch completely missing metrics if len(mixed_use_hill_keys) == 0 and len(mixed_use_other_keys) == 0 and len(accessibility_keys) == 0: - raise ValueError( - 'No metrics specified, please specify at least one metric to compute.') + raise ValueError('No metrics specified, please specify at least one metric to compute.') # catch missing qs if len(mixed_use_hill_keys) != 0 and len(qs) == 0: raise ValueError('Hill diversity measures require that at least one value of q is specified.') @@ -491,18 +488,16 @@ def disp_check(disp_matrix): netw_nodes_live = node_data[:, 2] # setup data structures # hill mixed uses are structured separately to take values of q into account - mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), np.nan) # 4 dim - mixed_use_other_data = np.full((3, d_n, netw_n), np.nan) # 3 dim + mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), 0.0) # 4 dim + mixed_use_other_data = np.full((3, d_n, netw_n), 0.0) # 3 dim accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0) accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n), 0.0) # iterate through each vert and aggregate + # parallelise over n nodes: + # each distance or stat array index is therefore only touched by one thread at a time + # i.e. no need to use inner array deductions as with centralities steps = int(netw_n / 10000) for netw_src_idx in prange(netw_n): - # shadowed arrays for non-race reductions - _mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), np.nan) # 4 dim - _mixed_use_other_data = np.full((3, d_n, netw_n), np.nan) # 3 dim - _accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0) - _accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n), 0.0) if not suppress_progress: checks.progress_bar(netw_src_idx, netw_n, steps) # only compute for live nodes @@ -543,8 +538,8 @@ def disp_check(disp_matrix): # if within distance, and if in accessibility keys, then aggregate accessibility too for ac_idx, ac_code in enumerate(accessibility_keys): if ac_code == cl_code: - _accessibility_data[ac_idx, d_idx, netw_src_idx] += 1 - _accessibility_data_wt[ac_idx, d_idx, netw_src_idx] += np.exp(-b * data_dist) + accessibility_data[ac_idx, d_idx, netw_src_idx] += 1 + accessibility_data_wt[ac_idx, d_idx, netw_src_idx] += np.exp(-b * data_dist) # if a match was found, then no need to check others break # mixed uses can be calculated now that the local class counts are aggregated @@ -558,37 +553,32 @@ def disp_check(disp_matrix): for mu_hill_key in mixed_use_hill_keys: for q_idx, q_key in enumerate(qs): if mu_hill_key == 0: - _mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \ + mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity(cl_counts, q_key) elif mu_hill_key == 1: - _mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \ + mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) elif mu_hill_key == 2: - _mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \ + mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) # land-use classification disparity hill diversity # the wt matrix can be used without mapping because cl_counts is based on all classes # regardless of whether they are reachable elif mu_hill_key == 3: - _mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \ + mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, wt_matrix=cl_disparity_wt_matrix, q=q_key) for mu_other_key in mixed_use_other_keys: if mu_other_key == 0: - _mixed_use_other_data[0, d_idx, netw_src_idx] = \ + mixed_use_other_data[0, d_idx, netw_src_idx] = \ diversity.shannon_diversity(cl_counts) elif mu_other_key == 1: - _mixed_use_other_data[1, d_idx, netw_src_idx] = \ + mixed_use_other_data[1, d_idx, netw_src_idx] = \ diversity.gini_simpson_diversity(cl_counts) elif mu_other_key == 2: - _mixed_use_other_data[2, d_idx, netw_src_idx] = \ + mixed_use_other_data[2, d_idx, netw_src_idx] = \ diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix) - # reduce - mixed_use_hill_data += _mixed_use_hill_data - mixed_use_other_data += _mixed_use_other_data - accessibility_data += _accessibility_data - accessibility_data_wt += _accessibility_data_wt # send the data back in the same types and same order as the original keys - convert to int for indexing mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0) for i, k in enumerate(mixed_use_hill_keys): @@ -597,9 +587,9 @@ def disp_check(disp_matrix): for i, k in enumerate(mixed_use_other_keys): mu_other_k_int[i] = k - return mixed_use_hill_data[mu_hill_k_int], \ + return mixed_use_hill_data[mu_hill_k_int],\ mixed_use_other_data[mu_other_k_int], \ - accessibility_data, \ + accessibility_data,\ accessibility_data_wt @@ -636,46 +626,33 @@ def aggregate_stats(node_data: np.ndarray, checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_data_map(data_map, check_assigned=True) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) - # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan)) if numerical_arrays.shape[1] != len(data_map): raise ValueError('The length of the numerical data arrays do not match the length of the data map.') checks.check_numerical_data(numerical_arrays) - # establish variables netw_n = len(node_data) d_n = len(distances) n_n = len(numerical_arrays) global_max_dist = float(np.nanmax(distances)) netw_nodes_live = node_data[:, 2] - # setup data structures - stats_sum = np.full((n_n, d_n, netw_n), np.nan) - stats_sum_wt = np.full((n_n, d_n, netw_n), np.nan) + stats_sum = np.full((n_n, d_n, netw_n), 0.0) + stats_sum_wt = np.full((n_n, d_n, netw_n), 0.0) stats_mean = np.full((n_n, d_n, netw_n), np.nan) stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan) - stats_count = np.full((n_n, d_n, netw_n), np.nan) # use np.nan instead of 0 to avoid division by zero issues - stats_count_wt = np.full((n_n, d_n, netw_n), np.nan) + stats_count = np.full((n_n, d_n, netw_n), 0.0) + stats_count_wt = np.full((n_n, d_n, netw_n), 0.0) stats_variance = np.full((n_n, d_n, netw_n), np.nan) stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan) stats_max = np.full((n_n, d_n, netw_n), np.nan) stats_min = np.full((n_n, d_n, netw_n), np.nan) - # iterate through each vert and aggregate steps = int(netw_n / 10000) + # parallelise over n nodes: + # each distance or stat array index is therefore only touched by one thread at a time + # i.e. no need to use inner array deductions as with centralities for netw_src_idx in prange(netw_n): - # setup shadow arrays for reductions - _stats_sum = np.full((n_n, d_n, netw_n), np.nan) - _stats_sum_wt = np.full((n_n, d_n, netw_n), np.nan) - _stats_mean = np.full((n_n, d_n, netw_n), np.nan) - _stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan) - _stats_count = np.full((n_n, d_n, netw_n), np.nan) # use np.nan instead of 0 to avoid division by zero issues - _stats_count_wt = np.full((n_n, d_n, netw_n), np.nan) - _stats_variance = np.full((n_n, d_n, netw_n), np.nan) - _stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan) - _stats_max = np.full((n_n, d_n, netw_n), np.nan) - _stats_min = np.full((n_n, d_n, netw_n), np.nan) - if not suppress_progress: checks.progress_bar(netw_src_idx, netw_n, steps) # only compute for live nodes @@ -711,33 +688,28 @@ def aggregate_stats(node_data: np.ndarray, # increment mean aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate - if np.isnan(_stats_sum[num_idx, d_idx, netw_src_idx]): - _stats_sum[num_idx, d_idx, netw_src_idx] = num - _stats_count[num_idx, d_idx, netw_src_idx] = 1 - _stats_sum_wt[num_idx, d_idx, netw_src_idx] = num * np.exp(-b * data_dist) - _stats_count_wt[num_idx, d_idx, netw_src_idx] = np.exp(-b * data_dist) - else: - _stats_sum[num_idx, d_idx, netw_src_idx] += num - _stats_count[num_idx, d_idx, netw_src_idx] += 1 - _stats_sum_wt[num_idx, d_idx, netw_src_idx] += num * np.exp(-b * data_dist) - _stats_count_wt[num_idx, d_idx, netw_src_idx] += np.exp(-b * data_dist) - - if np.isnan(_stats_max[num_idx, d_idx, netw_src_idx]): - _stats_max[num_idx, d_idx, netw_src_idx] = num - elif num > _stats_max[num_idx, d_idx, netw_src_idx]: - _stats_max[num_idx, d_idx, netw_src_idx] = num - - if np.isnan(_stats_min[num_idx, d_idx, netw_src_idx]): - _stats_min[num_idx, d_idx, netw_src_idx] = num - elif num < _stats_min[num_idx, d_idx, netw_src_idx]: - _stats_min[num_idx, d_idx, netw_src_idx] = num + stats_sum[num_idx, d_idx, netw_src_idx] += num + stats_count[num_idx, d_idx, netw_src_idx] += 1 + stats_sum_wt[num_idx, d_idx, netw_src_idx] += num * np.exp(-b * data_dist) + stats_count_wt[num_idx, d_idx, netw_src_idx] += np.exp(-b * data_dist) + # max + if np.isnan(stats_max[num_idx, d_idx, netw_src_idx]): + stats_max[num_idx, d_idx, netw_src_idx] = num + elif num > stats_max[num_idx, d_idx, netw_src_idx]: + stats_max[num_idx, d_idx, netw_src_idx] = num + # min + if np.isnan(stats_min[num_idx, d_idx, netw_src_idx]): + stats_min[num_idx, d_idx, netw_src_idx] = num + elif num < stats_min[num_idx, d_idx, netw_src_idx]: + stats_min[num_idx, d_idx, netw_src_idx] = num # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast for num_idx in range(n_n): for d_idx in range(d_n): - _stats_mean[num_idx, d_idx, netw_src_idx] = \ - _stats_sum[num_idx, d_idx, netw_src_idx] / _stats_count[num_idx, d_idx, netw_src_idx] - _stats_mean_wt[num_idx, d_idx, netw_src_idx] = \ - _stats_sum_wt[num_idx, d_idx, netw_src_idx] / _stats_count_wt[num_idx, d_idx, netw_src_idx] + # use divide so that division through zero doesn't trigger + stats_mean[num_idx, d_idx, netw_src_idx] = np.divide(stats_sum[num_idx, d_idx, netw_src_idx], + stats_count[num_idx, d_idx, netw_src_idx]) + stats_mean_wt[num_idx, d_idx, netw_src_idx] = np.divide(stats_sum_wt[num_idx, d_idx, netw_src_idx], + stats_count_wt[num_idx, d_idx, netw_src_idx]) # calculate variances - counts are already computed per above # weighted version is IDW by division through equivalently weighted counts above # iterate the reachable indices and related distances @@ -757,41 +729,31 @@ def aggregate_stats(node_data: np.ndarray, # increment variance aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate - if np.isnan(_stats_variance[num_idx, d_idx, netw_src_idx]): - _stats_variance[num_idx, d_idx, netw_src_idx] = \ - np.square(num - _stats_mean[num_idx, d_idx, netw_src_idx]) - _stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ - np.square(num - _stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp( - -b * data_dist) + if np.isnan(stats_variance[num_idx, d_idx, netw_src_idx]): + stats_variance[num_idx, d_idx, netw_src_idx] = \ + np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) + stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ + np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist) else: - _stats_variance[num_idx, d_idx, netw_src_idx] += \ - np.square(num - _stats_mean[num_idx, d_idx, netw_src_idx]) - _stats_variance_wt[num_idx, d_idx, netw_src_idx] += \ - np.square(num - _stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp( - -b * data_dist) + stats_variance[num_idx, d_idx, netw_src_idx] += \ + np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) + stats_variance_wt[num_idx, d_idx, netw_src_idx] += \ + np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist) # finalise variance calculations for num_idx in range(n_n): for d_idx in range(d_n): - _stats_variance[num_idx, d_idx, netw_src_idx] = \ - _stats_variance[num_idx, d_idx, netw_src_idx] / _stats_count[num_idx, d_idx, netw_src_idx] - _stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ - _stats_variance_wt[num_idx, d_idx, netw_src_idx] / _stats_count_wt[num_idx, d_idx, netw_src_idx] - # reductions - stats_sum += _stats_sum - stats_sum_wt += _stats_sum_wt - stats_mean += _stats_mean - stats_mean_wt += _stats_mean_wt - stats_count += _stats_count - stats_count_wt += _stats_count_wt - stats_variance += _stats_variance - stats_variance_wt += _stats_variance_wt - stats_max += _stats_max - stats_min += _stats_min + # use divide so that division through zero doesn't trigger + stats_variance[num_idx, d_idx, netw_src_idx] = np.divide( + stats_variance[num_idx, d_idx, netw_src_idx], + stats_count[num_idx, d_idx, netw_src_idx]) + stats_variance_wt[num_idx, d_idx, netw_src_idx] = np.divide( + stats_variance_wt[num_idx, d_idx, netw_src_idx], + stats_count_wt[num_idx, d_idx, netw_src_idx]) return stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min -@njit(cache=True, nogil=True) +@njit(cache=True, nogil=True, parallel=True) def singly_constrained(node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, @@ -835,28 +797,22 @@ def singly_constrained(node_data: np.ndarray, if len(i_weights) != len(i_data_map): raise ValueError('The i_weights array must be the same length as the i_data_map.') - if len(j_weights) != len(j_data_map): raise ValueError('The j_weights array must be the same length as the j_data_map.') - # establish variables netw_n = len(node_data) d_n = len(distances) global_max_dist = np.max(distances) - netw_flows = np.full((d_n, netw_n), 0.0) - i_n = len(i_data_map) k_agg = np.full((d_n, i_n), 0.0) - - j_n = len(j_data_map) - j_assigned = np.full((d_n, j_n), 0.0) - # iterate all i nodes # filter all reachable nodes k and aggregate k attractiveness * negative exponential of distance steps = int(i_n / 10000) - for i_idx in range(i_n): + for i_idx in prange(i_n): if not suppress_progress: checks.progress_bar(i_idx, i_n, steps) + # setup shadowed array for reductions + _k_agg = np.full((d_n, i_n), 0.0) # get the nearest node i_assigned_netw_idx = int(i_data_map[i_idx, 2]) # calculate the base distance from the data point to the nearest assigned node @@ -872,7 +828,6 @@ def singly_constrained(node_data: np.ndarray, j_data_map, global_max_dist, angular) - # aggregate the weighted j (all k) nodes # iterate the reachable indices and related distances for j_idx, (j_reachable, j_dist) in enumerate(zip(reachable_j, reachable_j_dist)): @@ -883,23 +838,28 @@ def singly_constrained(node_data: np.ndarray, total_dist = j_dist + i_door_dist # increment weighted k aggregations at respective distances if the distance is less than current d if total_dist <= d: - k_agg[d_idx, i_idx] += j_weights[j_idx] * np.exp(-b * total_dist) - + _k_agg[d_idx, i_idx] += j_weights[j_idx] * np.exp(-b * total_dist) + # array reductions (non-race) + k_agg += _k_agg # this is the second step # this time, filter all reachable j vertices and aggregate the proportion of flow from i to j # this is done by dividing i-j flow through i-k_agg flow from previous step + netw_flows = np.full((d_n, netw_n), 0.0) + j_n = len(j_data_map) + j_assigned = np.full((d_n, j_n), 0.0) steps = int(i_n / 10000) - for i_idx in range(i_n): + for i_idx in prange(i_n): if not suppress_progress: checks.progress_bar(i_idx, i_n, steps) - + # setup shadowed arrays for reductions + _netw_flows = np.full((d_n, netw_n), 0.0) + _j_assigned = np.full((d_n, j_n), 0.0) # get the nearest node i_assigned_netw_idx = int(i_data_map[i_idx, 2]) # calculate the base distance from the data point to the nearest assigned node i_x, i_y = i_data_map[i_idx, :2] n_x, n_y = node_data[i_assigned_netw_idx, :2] i_door_dist = np.hypot(i_x - n_x, i_y - n_y) - # find the reachable j data points and their respective points from the closest node reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx(i_assigned_netw_idx, node_data, @@ -908,10 +868,10 @@ def singly_constrained(node_data: np.ndarray, j_data_map, global_max_dist, angular) - # aggregate j divided through all k nodes # iterate the reachable indices and related distances for j_idx, (j_reachable, j_dist) in enumerate(zip(reachable_j, reachable_j_dist)): + # if not j_reachable: continue # iterate the distance dimensions @@ -925,15 +885,15 @@ def singly_constrained(node_data: np.ndarray, if k_agg[d_idx, i_idx] == 0: assigned = 0 else: - assigned = i_weights[i_idx] * j_weights[j_idx] * np.exp(-b * total_dist) / k_agg[d_idx, - i_idx] - j_assigned[d_idx, j_idx] += assigned + assigned = np.divide(i_weights[i_idx] * j_weights[j_idx] * np.exp(-b * total_dist), + k_agg[d_idx, i_idx]) + _j_assigned[d_idx, j_idx] += assigned # assign trips to network if assigned != 0: # get the j assigned node j_assigned_netw_idx = int(j_data_map[j_idx, 2]) # in this case start and end nodes are counted...! - netw_flows[d_idx, j_assigned_netw_idx] += assigned + _netw_flows[d_idx, j_assigned_netw_idx] += assigned # skip if same start / end node if j_assigned_netw_idx == i_assigned_netw_idx: continue @@ -941,11 +901,14 @@ def singly_constrained(node_data: np.ndarray, inter_idx = np.int(tree_preds[j_assigned_netw_idx]) while True: # end nodes counted, so place above break - netw_flows[d_idx, inter_idx] += assigned + _netw_flows[d_idx, inter_idx] += assigned # break out of while loop if the intermediary has reached the source node if inter_idx == i_assigned_netw_idx: break # follow the chain inter_idx = np.int(tree_preds[inter_idx]) + # array reductions (non-race) + netw_flows += _netw_flows + j_assigned += _j_assigned return j_assigned, netw_flows diff --git a/cityseer/metrics/layers.py b/cityseer/metrics/layers.py index d9cc95dd..9a5eca6b 100644 --- a/cityseer/metrics/layers.py +++ b/cityseer/metrics/layers.py @@ -347,11 +347,6 @@ def compute_aggregated(self, **kwargs): """ This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use [`compute_landuses`](#datalayercompute_landuses) or [`compute_stats`](#datalayercompute_stats) instead. - - Raises - ------ - DeprecationWarning - """ raise DeprecationWarning('The compute_aggregated method has been deprecated. ' 'It has been split into two: ' @@ -662,23 +657,95 @@ def compute_landuses(self, for d_idx, d_key in enumerate(self.Network.distances): self.Network.metrics['accessibility'][k][ac_label][d_key] = ac_data[ac_idx][d_idx] + def hill_diversity(self, + landuse_labels: Union[list, tuple, np.ndarray], + qs: Union[list, tuple, np.ndarray] = None): + """ + Compute hill diversity for the provided `landuse_labels` at the specified values of `q`. See + [`DataLayer.compute_landuses`](#datalayercompute_landuses) for additional information. + + Parameters + ---------- + landuse_labels + A set of land-use labels corresponding to the length and order of the data points. The labels should + correspond to descriptors from the land-use schema, such as "retail" or "commercial". + qs + The values of `q` for which to compute Hill diversity, by default None + + Notes + ----- + The data key is `hill`, e.g.: + + `NetworkLayer.metrics['mixed_uses']['hill'][<>][<>][<>]` + """ + return self.compute_landuses(landuse_labels, mixed_use_keys=['hill'], qs=qs) + + def hill_branch_wt_diversity(self, + landuse_labels: Union[list, tuple, np.ndarray], + qs: Union[list, tuple, np.ndarray] = None): + """ + Compute distance-weighted hill diversity for the provided `landuse_labels` at the specified values of `q`. See + [`DataLayer.compute_landuses`](#datalayercompute_landuses) for additional information. + + Parameters + ---------- + landuse_labels + A set of land-use labels corresponding to the length and order of the data points. The labels should + correspond to descriptors from the land-use schema, such as "retail" or "commercial". + qs + The values of `q` for which to compute Hill diversity, by default None + + Notes + ----- + The data key is `hill_branch_wt`, e.g.: + + `NetworkLayer.metrics['mixed_uses']['hill_branch_wt'][<>][<>][<>]` + """ + return self.compute_landuses(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=qs) + + def compute_accessibilities(self, + landuse_labels: Union[list, tuple, np.ndarray], + accessibility_keys: Union[list, tuple]): + """ + Compute land-use accessibilities for the specified land-use classification keys. See + [`DataLayer.compute_landuses`](#datalayercompute_landuses) for additional information. + + Parameters + ---------- + landuse_labels + A set of land-use labels corresponding to the length and order of the data points. The labels should + correspond to descriptors from the land-use schema, such as "retail" or "commercial". + accessibility_keys + The land-use keys for which to compute accessibilies. The keys should be selected from the same land-use + schema used for the `landuse_labels` parameter, e.g. "retail". The calculations will be performed in both + `weighted` and `non_weighted` variants. + + Notes + ----- + The data keys will correspond to the `accessibility_keys` specified, e.g. where computing `retail` + accessibility: + + `NetworkLayer.metrics['accessibility']['weighted']['retail'][<>][<>]`
+ `NetworkLayer.metrics['accessibility']['non_weighted']['retail'][<>][<>]` + """ + return self.compute_landuses(landuse_labels, accessibility_keys=accessibility_keys) + def compute_stats(self, - stats_keys: Union[list, tuple], - stats_data_arrs: Union[List[Union[list, tuple, np.ndarray]], - Tuple[Union[list, tuple, np.ndarray]], - np.ndarray], + stats_keys: Union[str, list, tuple], + stats_data_arrs: Union[ + List[Union[list, tuple, np.ndarray]], + Tuple[Union[list, tuple, np.ndarray]], + list, + tuple, + np.ndarray], angular: bool = False): """ - This method wraps the underlying `numba` optimised functions for computing statistical measures. Situations - requiring only a single measure can instead make use of the simplified - [`DataLayer.compute_stats_single`](#datalayercompute_stats_single), and - [`DataLayer.compute_stats_multiple`](#datalayercompute_stats_multiple) methods. - - The data is aggregated and computed over the street network relative to the `Network Layer` nodes, with the - implication that statistical aggregations are generated from the same locations as for centrality computations, - which can therefore be correlated or otherwise compared. The outputs of the calculations are written to the - corresponding node indices in the same `NetworkLayer.metrics` dictionary used for centrality methods, and will - be categorised by the respective keys and parameters. + This method wraps the underlying `numba` optimised functions for computing statistical measures. The data is + aggregated and computed over the street network relative to the `Network Layer` nodes, with the implication + that statistical aggregations are generated from the same locations as for centrality computations, which can + therefore be correlated or otherwise compared. The outputs of the calculations are written to the corresponding + node indices in the same `NetworkLayer.metrics` dictionary used for centrality methods, and will be categorised + by the respective keys and parameters. For example, if a `valuations` stats key is computed on a `Network Layer` instantiated with 800m and 1600m distance thresholds, then the dictionary would assume the following structure: @@ -729,15 +796,17 @@ def compute_stats(self, Parameters ---------- stats_keys - A `list` or `tuple` of keys corresponding to the number of nested arrays passed to the `stats_data_arrs` - parameter. The computed stats will be saved to the `N.metrics` dictionary using these keys. This parameter - is only required if computing stats for a `stats_data_arrs` parameter. + If computing a single stat: a `str` key describing the stats computed for the `stats_data_arr` parameter. + If computing multiple stats: a `list` or `tuple` of keys. Computed stats will be saved under the supplied + key to the `N.metrics` dictionary. stats_data_arrs - A 2d `list`, `tuple` or `numpy` array of numerical data, where the first dimension corresponds to the number - of keys in the `stats_keys` parameter and the second dimension corresponds to number of data points in the - `DataLayer`. e.g: + If computing a single stat: a 1d `list`, `tuple` or `numpy` array of numerical data, where the length + corresponds to the number of data points in the `DataLayer`. + If computing multiple stats keys: a 2d `list`, `tuple`, or `numpy` array of numerical data, where the first + dimension corresponds to the number of keys in the `stats_keys` parameter and the second dimension + corresponds to number of data points in the `DataLayer`. e.g: ```python - # for a DataLayer containg 5 data points + # if computing three keys for a DataLayer containg 5 data points stats_keys = ['valuations', 'floors', 'occupants'] stats_data_arrs = [ [50000, 60000, 55000, 42000, 46000], # valuations @@ -752,6 +821,12 @@ def compute_stats(self, Notes ----- + The data keys will correspond to the `stats_keys` parameter, e.g.: + + `NetworkLayer.metrics['stats']['valuations'][<>][<>][<>]`
+ `NetworkLayer.metrics['stats']['floors'][<>][<>][<>]`
+ `NetworkLayer.metrics['stats']['occupants'][<>][<>][<>]` + A worked example: ```python from cityseer.metrics import networks, layers @@ -773,16 +848,9 @@ def compute_stats(self, L = layers.DataLayerFromDict(data_dict) # assign to the network L.assign_to_network(N, max_dist=500) - # compute some metrics - here we'll use the full interface, see below for simplified interfaces - # FULL INTERFACE - # ============== - L.compute_stats(stats_keys=['mock_stat'], + # compute some metrics + L.compute_stats(stats_keys='mock_stat', stats_data_arrs=stats_data) - # note that the above measures can optionally be run individually using simplified interfaces, e.g. - # SIMPLIFIED INTERFACES - # ===================== - # L.compute_stats_single('mock_stat', stats_data[0]) # this method requires a 1d array - # let's prepare some keys for accessing the computational outputs # distance idx: any of the distances with which the NetworkLayer was initialised distance_idx = 200 @@ -794,21 +862,39 @@ def compute_stats(self, # prints: 71297.82967202332 ``` - Note that the data can also be unpacked to a dictionary using [`NetworkLayer.metrics_to_dict`](/metrics/networks/#networklayermetrics_to_dict), or transposed to a `networkX` graph using [`NetworkLayer.to_networkX`](/metrics/networks/#networklayerto_networkx). + Note that the data can also be unpacked to a dictionary using + [`NetworkLayer.metrics_to_dict`](/metrics/networks/#networklayermetrics_to_dict), or transposed to a `networkX` + graph using [`NetworkLayer.to_networkX`](/metrics/networks/#networklayerto_networkx). + :::tip Comment + Per the above worked example, the following stat types will be available for each `stats_key` for each of the + computed distances: + - `max` and `min` + - `sum` and `sum_weighted` + - `mean` and `mean_weighted` + - `variance` and `variance_weighted` + ::: """ if self.Network is None: raise ValueError('Assign this data layer to a network prior to computing mixed-uses or accessibilities.') - elif len(stats_data_arrs) != len(stats_keys): + # check keys + if not isinstance(stats_keys, (str, list, tuple)): + raise TypeError('Stats keys should be a string else a list or tuple of strings.') + # wrap single keys + if isinstance(stats_keys, str): + stats_keys = [stats_keys] + # check data arrays + if not isinstance(stats_data_arrs, (list, tuple, np.ndarray)): + raise TypeError('Stats data must be in the form of a list, tuple, or numpy array.') + stats_data_arrs = np.array(stats_data_arrs) + # check for single dimensional arrays and change to 2d if necessary + if stats_data_arrs.ndim == 1: + stats_data_arrs = np.expand_dims(stats_data_arrs, axis=0) + # lengths of keys and array dims should match + if len(stats_data_arrs) != len(stats_keys): raise ValueError('An equal number of stats labels and stats data arrays is required.') - elif not isinstance(stats_data_arrs, (list, tuple, np.ndarray)): - raise ValueError('Stats data must be in the form of a list, tuple, or numpy array.') - else: - stats_data_arrs = np.array(stats_data_arrs) - if stats_data_arrs.ndim == 1: - stats_data_arrs = np.array([stats_data_arrs]) - if stats_data_arrs.shape[1] != len(self._data): - raise ValueError('The length of all data arrays must match the number of data points.') + if stats_data_arrs.shape[1] != len(self._data): + raise ValueError('The length of data arrays must match the number of data points.') if not checks.quiet_mode: logger.info(f'Computing stats for: {", ".join(stats_keys)}') # call the underlying method @@ -847,150 +933,23 @@ def compute_stats(self, for d_idx, d_key in enumerate(self.Network.distances): self.Network.metrics['stats'][stats_key][k][d_key] = stats_data[num_idx][d_idx] - def hill_diversity(self, - landuse_labels: Union[list, tuple, np.ndarray], - qs: Union[list, tuple, np.ndarray] = None): - """ - Compute hill diversity for the provided `landuse_labels` at the specified values of `q`. See - [`DataLayer.compute_aggregated`](#datalayercompute_aggregated) for additional information. - - Parameters - ---------- - landuse_labels - A set of land-use labels corresponding to the length and order of the data points. The labels should - correspond to descriptors from the land-use schema, such as "retail" or "commercial". - qs - The values of `q` for which to compute Hill diversity, by default None - - Notes - ----- - The data key is `hill`, e.g.: - - `NetworkLayer.metrics['mixed_uses']['hill'][<>][<>][<>]` - """ - return self.compute_landuses(landuse_labels, mixed_use_keys=['hill'], qs=qs) - - def hill_branch_wt_diversity(self, - landuse_labels: Union[list, tuple, np.ndarray], - qs: Union[list, tuple, np.ndarray] = None): - """ - Compute distance-weighted hill diversity for the provided `landuse_labels` at the specified values of `q`. See - [`DataLayer.compute_aggregated`](#datalayercompute_aggregated) for additional information. - - Parameters - ---------- - landuse_labels - A set of land-use labels corresponding to the length and order of the data points. The labels should - correspond to descriptors from the land-use schema, such as "retail" or "commercial". - qs - The values of `q` for which to compute Hill diversity, by default None - - Notes - ----- - The data key is `hill_branch_wt`, e.g.: - - `NetworkLayer.metrics['mixed_uses']['hill_branch_wt'][<>][<>][<>]` - """ - return self.compute_landuses(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=qs) - - def compute_accessibilities(self, - landuse_labels: Union[list, tuple, np.ndarray], - accessibility_keys: Union[list, tuple]): - """ - Compute land-use accessibilities for the specified land-use classification keys. See - [`DataLayer.compute_aggregated`](#datalayercompute_aggregated) for additional information. - - Parameters - ---------- - landuse_labels - A set of land-use labels corresponding to the length and order of the data points. The labels should - correspond to descriptors from the land-use schema, such as "retail" or "commercial". - accessibility_keys - The land-use keys for which to compute accessibilies. The keys should be selected from the same land-use - schema used for the `landuse_labels` parameter, e.g. "retail". The calculations will be performed in both - `weighted` and `non_weighted` variants. - - Notes - ----- - The data keys will correspond to the `accessibility_keys` specified, e.g. where computing `retail` - accessibility: - - `NetworkLayer.metrics['accessibility']['weighted']['retail'][<>][<>]`
- `NetworkLayer.metrics['accessibility']['non_weighted']['retail'][<>][<>]` + # deprecated method + def compute_stats_single(self, **kwargs): """ - return self.compute_landuses(landuse_labels, accessibility_keys=accessibility_keys) - - def compute_stats_single(self, - stats_key: str, - stats_data_arr: Union[list, tuple, np.ndarray]): + This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use + [`compute_stats`](#datalayercompute_stats) instead. """ - Compute stats for a single `stats_key` parameter. As with the landuse and mixed-use measures: stats will be - computed for each distance that was specified when initialising the `NetworkLayer`. - - Parameters - ---------- - stats_key - A `str` key describing the stats computed for the `stats_data_arr` parameter. The computed stats will be - saved to the `N.metrics` dictionary under this key. - stats_data_arr - A 1d `list`, `tuple` or `numpy` array of numerical data, where the length corresponds to the number of data - points in the `DataLayer`. - - Notes - ----- - The data key will correspond to the `stats_key` parameter, e.g. where using `occupants` as the key: - - `NetworkLayer.metrics['stats']['occupants'][<>][<>][<>]` + raise DeprecationWarning('The compute_stats_single method has been deprecated. ' + 'Please use the compute_stats method instead.') - :::tip Comment - Per the above worked example, the following stat types will be available for each `stats_key` for each of the computed distances: - - `max` and `min` - - `sum` and `sum_weighted` - - `mean` and `mean_weighted` - - `variance` and `variance_weighted` - - ::: - """ - if stats_data_arr.ndim != 1: - raise ValueError( - 'The stats_data_arr must be a single dimensional array with a length corresponding to the number of data points in the DataLayer.') - return self.compute_stats(stats_keys=[stats_key], stats_data_arrs=[stats_data_arr]) - - def compute_stats_multiple(self, - stats_keys: Union[list, tuple], - stats_data_arrs: Union[list, tuple, np.ndarray]): + # deprecated method + def compute_stats_multiple(self, **kwargs): """ - - - Parameters - ---------- - stats_keys - A `list` or `tuple` of keys corresponding to the number of nested arrays passed to the `stats_data_arrs` - parameter. The computed stats will be saved to the `N.metrics` dictionary under these keys. - stats_data_arrs - A 2d `list`, `tuple` or `numpy` array of numerical data, where the first dimension corresponds to the number - of keys in the `stats_keys` parameter and the second dimension corresponds to number of data points in the - `DataLayer`. See the below example. - - Notes - ----- - ```python - # for a DataLayer containg 5 data points - stats_keys = ['valuations', 'floors', 'occupants'] - stats_data_arrs = [ - [50000, 60000, 55000, 42000, 46000], # valuations - [3, 3, 2, 3, 5], # floors - [420, 300, 220, 250, 600] # occupants - ] - ``` - The data keys will correspond to the `stats_keys` parameter: - - `NetworkLayer.metrics['stats']['valuations'][<>][<>][<>]`
- `NetworkLayer.metrics['stats']['floors'][<>][<>][<>]`
- `NetworkLayer.metrics['stats']['occupants'][<>][<>][<>]` + This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use + [`compute_stats`](#datalayercompute_stats) instead. """ - - return self.compute_stats(stats_keys=stats_keys, stats_data_arrs=stats_data_arrs) + raise DeprecationWarning('The compute_stats_multiple method has been deprecated. ' + 'Please use the compute_stats method instead.') def model_singly_constrained(self, key: str, @@ -1002,7 +961,6 @@ def model_singly_constrained(self, """ Undocumented method for computing singly-constrained interactions. """ - j_assigned, netw_flows = data.singly_constrained(self.Network._node_data, self.Network._edge_data, self.Network._node_edge_map, @@ -1032,7 +990,7 @@ class DataLayerFromDict(DataLayer): [`DataLayer`](#class-datalayer) class, which can be referenced for more information. """ - def __init__(self, data_dict: dict) -> DataLayer: + def __init__(self, data_dict: dict): """ Parameters ---------- diff --git a/cityseer/metrics/networks.py b/cityseer/metrics/networks.py index e52e73fb..a0844743 100644 --- a/cityseer/metrics/networks.py +++ b/cityseer/metrics/networks.py @@ -655,11 +655,6 @@ def compute_centrality(self, **kwargs): This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use [`node_centrality`](#networklayernode_centrality) or [`segment_centrality`](#networklayersegment_centrality) instead. - - Raises - ------ - DeprecationWarning - """ raise DeprecationWarning('The compute_centrality method has been deprecated. ' 'It has been split into two: ' diff --git a/docs/content/intro.md b/docs/content/intro.md index 75668eea..94ee86f1 100644 --- a/docs/content/intro.md +++ b/docs/content/intro.md @@ -93,7 +93,6 @@ The [`NetworkLayer.node_centrality`](/metrics/networks/#networklayernode_central ```python from cityseer.metrics import networks - # create a Network layer from the networkX graph N = networks.NetworkLayerFromNX(G_decomp, distances=[200, 400, 800, 1600]) # the underlying method allows the computation of various centralities simultaneously, e.g. @@ -106,7 +105,6 @@ A [`DataLayer`](/metrics/layers/#class-datalayer) represents data points. A `Dat ```python from cityseer.metrics import layers - # a mock data dictionary representing the 'x', 'y' attributes for data points data_dict = mock.mock_data_dict(G_decomp, random_seed=25) print(data_dict[0], data_dict[1], 'etc.') @@ -125,14 +123,13 @@ _Data points assigned to a Network Layer._ ![DataLayer assigned to a decomposed NetworkLayer](../src/assets/plots/images/assignment_decomposed.png) _Data assignment becomes more precise on a decomposed Network Layer._ -Once the data has been assigned, the [`DataLayer.compute_aggregated`](/metrics/layers/#datalayercompute_aggregated) method is used for the calculation of mixed-use, accessibility, and statistical measures. As with the centrality methods, the measures are all computed simultaneously (and for all distances); however, simpler stand-alone methods are also available, including: [`DataLayer.hill_diversity`](/metrics/layers/#datalayerhill_diversity) , [`DataLayer.hill_branch_wt_diversity`](/metrics/layers/#datalayerhill_branch_wt_diversity) , [`DataLayer.compute_accessibilities`](/metrics/layers/#datalayercompute_accessibilities) , [`DataLayer.compute_stats_single`](/metrics/layers/#datalayercompute_stats_single), and [`DataLayer.compute_stats_multiple`](/metrics/layers/#datalayercompute_stats_multiple). +Once the data has been assigned, the [`DataLayer.compute_landuses`](/metrics/layers/#datalayercompute_landuses) method is used for the calculation of mixed-use and land-use accessibility measures whereas [`DataLayer.compute_stats`](/metrics/layers/#datalayercompute_stats) can likewise be used for statistical measures. As with the centrality methods, the measures are all computed simultaneously (and for all distances); however, simpler stand-alone methods are also available, including [`DataLayer.hill_diversity`](/metrics/layers/#datalayerhill_diversity), [`DataLayer.hill_branch_wt_diversity`](/metrics/layers/#datalayerhill_branch_wt_diversity), and [`DataLayer.compute_accessibilities`](/metrics/layers/#datalayercompute_accessibilities). Landuse labels can be used to generate mixed-use and land-use accessibility measures. Let's create mock landuse labels for the points in our data dictionary and compute mixed-uses and land-use accessibilities: ```python landuse_labels = mock.mock_categorical_data(len(data_dict), random_seed=25) print(landuse_labels) -# prints: ['e' 'g' 'h' 'c' 'i' 'e' 'j' 'e' 'f' 'b' etc. # example easy-wrapper method for computing mixed-uses D.hill_branch_wt_diversity(landuse_labels, qs=[0, 1, 2]) # example easy-wrapper method for computing accessibilities @@ -140,7 +137,7 @@ D.hill_branch_wt_diversity(landuse_labels, qs=[0, 1, 2]) # for which accessibilities will be computed D.compute_accessibilities(landuse_labels, accessibility_keys=['a', 'c']) # or compute multiple measures at once, e.g.: -D.compute_aggregated(landuse_labels, +D.compute_landuses(landuse_labels, mixed_use_keys=['hill', 'hill_branch_wt', 'shannon'], accessibility_keys=['a', 'c'], qs=[0, 1, 2]) @@ -152,7 +149,7 @@ We can do the same thing with numerical data. Let's generate some mock numerical mock_valuations_data = mock.mock_numerical_data(len(data_dict), random_seed=25) print(mock_valuations_data) # compute max, min, mean, mean-weighted, variance, and variance-weighted -D.compute_stats_single(stats_key='valuations', stats_data_arr=mock_valuations_data[0]) +D.compute_stats('valuations', mock_valuations_data) ``` The data is aggregated and computed over the street network relative to the `NetworkLayer` (i.e. street) nodes. The mixed-use, accessibility, and statistical aggregations can therefore be compared directly to centrality computations from the same locations, and can be correlated or otherwise compared. The outputs of the calculations are written to the corresponding node indices in the same `NetworkLayer.metrics` dictionary used for centrality methods, and will be categorised by the respective keys and parameters. @@ -188,7 +185,6 @@ The data can then be passed to data analysis or plotting methods. For example, t ```python # plot centrality from matplotlib import colors - segment_harmonic_vals = [] mixed_uses_vals = [] for node, data in G_metrics.nodes(data=True): diff --git a/docs/content/metrics/layers.md b/docs/content/metrics/layers.md index a71d3408..91d03a2f 100644 --- a/docs/content/metrics/layers.md +++ b/docs/content/metrics/layers.md @@ -290,22 +290,30 @@ _Assignment of data to network nodes becomes more contextually precise on decomp
-DataLayer.compute_aggregated(landuse_labels=None,
-                             mixed_use_keys=None,
-                             accessibility_keys=None,
-                             cl_disparity_wt_matrix=None,
-                             qs=None,
-                             stats_keys=None,
-                             stats_data_arrs=None,
-                             angular=False)
+DataLayer.compute_aggregated(**kwargs)
 
-This method wraps the underlying `numba` optimised functions for aggregating and computing various mixed-use, land-use accessibility, and statistical measures. These are computed simultaneously for any required combinations of measures (and distances), which can have significant speed implications. Situations requiring only a single measure can instead make use of the simplified [`DataLayer.hill_diversity`](#datalayerhill_diversity), [`DataLayer.hill_branch_wt_diversity`](#datalayerhill_branch_wt_diversity), [`DataLayer.compute_accessibilities`](#datalayercompute_accessibilities), [`DataLayer.compute_stats_single`](#datalayercompute_stats_single), and [`DataLayer.compute_stats_multiple`](#datalayercompute_stats_multiple) methods. +This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use [`compute_landuses`](#datalayercompute_landuses) or [`compute_stats`](#datalayercompute_stats) instead. -The data is aggregated and computed over the street network relative to the `Network Layer` nodes, with the implication that mixed-use, accessibility, and statistical aggregations are generated from the same locations as for centrality computations, which can therefore be correlated or otherwise compared. The outputs of the calculations are written to the corresponding node indices in the same `NetworkLayer.metrics` dictionary used for centrality methods, and will be categorised by the respective keys and parameters. +## DataLayer.compute\_landuses -For example, if `hill` and `shannon` mixed-use keys; `shops` and `factories` accessibility keys; and a `valuations` stats keys are computed on a `Network Layer` instantiated with 800m and 1600m distance thresholds, then the dictionary would assume the following structure: + +
+DataLayer.compute_landuses(landuse_labels,
+                           mixed_use_keys=None,
+                           accessibility_keys=None,
+                           cl_disparity_wt_matrix=None,
+                           qs=None,
+                           angular=False)
+
+
+ +This method wraps the underlying `numba` optimised functions for aggregating and computing various mixed-use and land-use accessibility measures. These are computed simultaneously for any required combinations of measures (and distances), which can have significant speed implications. Situations requiring only a single measure can instead make use of the simplified [`DataLayer.hill_diversity`](#datalayerhill_diversity), [`DataLayer.hill_branch_wt_diversity`](#datalayerhill_branch_wt_diversity), and [`DataLayer.compute_accessibilities`](#datalayercompute_accessibilities) methods. + +The data is aggregated and computed over the street network relative to the `Network Layer` nodes, with the implication that mixed-use and land-use accessibility aggregations are generated from the same locations as for centrality computations, which can therefore be correlated or otherwise compared. The outputs of the calculations are written to the corresponding node indices in the same `NetworkLayer.metrics` dictionary used for centrality methods, and will be categorised by the respective keys and parameters. + +For example, if `hill` and `shannon` mixed-use keys; `shops` and `factories` accessibility keys are computed on a `Network Layer` instantiated with 800m and 1600m distance thresholds, then the dictionary would assume the following structure: ```python NetworkLayer.metrics = { @@ -351,44 +359,6 @@ NetworkLayer.metrics = { 1600: [...] } } - }, - 'stats': { - # stats grouped by each stats key - 'valuations': { - # each stat will have the following key-value pairs - 'max': { - 800: [...], - 1600: [...] - }, - 'min': { - 800: [...], - 1600: [...] - }, - 'sum': { - 800: [...], - 1600: [...] - }, - 'sum_weighted': { - 800: [...], - 1600: [...] - }, - 'mean': { - 800: [...], - 1600: [...] - }, - 'mean_weighted': { - 800: [...], - 1600: [...] - }, - 'variance': { - 800: [...], - 1600: [...] - }, - 'variance_weighted': { - 800: [...], - 1600: [...] - } - } } } ``` @@ -397,7 +367,7 @@ NetworkLayer.metrics = { -A set of land-use labels corresponding to the length and order of the data points. The labels should correspond to descriptors from the land-use schema, such as "retail" or "commercial". This parameter is only required if computing mixed-uses or land-use accessibilities, by default None. +A set of land-use labels corresponding to the length and order of the data points. The labels should correspond to descriptors from the land-use schema, such as "retail" or "commercial". This parameter is only required if computing mixed-uses or land-use accessibilities. @@ -425,30 +395,6 @@ The values of `q` for which to compute Hill diversity. This parameter is only re - - -A `list` or `tuple` of keys corresponding to the number of nested arrays passed to the `stats_data_arrs` parameter. The computed stats will be saved to the `N.metrics` dictionary using these keys. This parameter is only required if computing stats for a `stats_data_arrs` parameter, by default None - - - - - -A 2d `list`, `tuple` or `numpy` array of numerical data, where the first dimension corresponds to the number of keys in the `stats_keys` parameter and the second dimension corresponds to number of data points in the `DataLayer`, by default None. e.g: - -```python -# for a DataLayer containg 5 data points -stats_keys = ['valuations', 'floors', 'occupants'] -stats_data_arrs = [ - [50000, 60000, 55000, 42000, 46000], # valuations - [3, 3, 2, 3, 5], # floors - [420, 300, 220, 250, 600] # occupants -] -``` - - - Whether to use a simplest-path heuristic in-lieu of a shortest-path heuristic when calculating aggregations and distances, by default False @@ -489,8 +435,6 @@ N = networks.NetworkLayerFromNX(G, distances=[200, 400, 800, 1600]) data_dict = mock.mock_data_dict(G, random_seed=25) # prepare some mock land-use classifications landuses = mock.mock_categorical_data(len(data_dict), random_seed=25) -# let's also prepare some numerical data -stats_data = mock.mock_numerical_data(len(data_dict), num_arrs=1, random_seed=25) # generate a data layer L = layers.DataLayerFromDict(data_dict) @@ -499,18 +443,15 @@ L.assign_to_network(N, max_dist=500) # compute some metrics - here we'll use the full interface, see below for simplified interfaces # FULL INTERFACE # ============== -L.compute_aggregated(landuse_labels=landuses, - mixed_use_keys=['hill'], - qs=[0, 1], - accessibility_keys=['c', 'd', 'e'], - stats_keys=['mock_stat'], - stats_data_arrs=stats_data) +L.compute_landuses(landuse_labels=landuses, + mixed_use_keys=['hill'], + qs=[0, 1], + accessibility_keys=['c', 'd', 'e']) # note that the above measures can optionally be run individually using simplified interfaces, e.g. # SIMPLIFIED INTERFACES # ===================== # L.hill_diversity(landuses, qs=[0]) # L.compute_accessibilities(landuses, ['a', 'b']) -# L.compute_stats_single('mock_stat', stats_data[0]) # this method requires a 1d array # let's prepare some keys for accessing the computational outputs # distance idx: any of the distances with which the NetworkLayer was initialised @@ -527,8 +468,6 @@ print(N.metrics['accessibility']['weighted']['d'][distance_idx][node_idx]) # prints: 0.019168843947614676 print(N.metrics['accessibility']['non_weighted']['d'][distance_idx][node_idx]) # prints: 1.0 -print(N.metrics['stats']['mock_stat']['mean_weighted'][distance_idx][node_idx]) -# prints: 71297.82967202332 ``` Note that the data can also be unpacked to a dictionary using [`NetworkLayer.metrics_to_dict`](/metrics/networks/#networklayermetrics_to_dict), or transposed to a `networkX` graph using [`NetworkLayer.to_networkX`](/metrics/networks/#networklayerto_networkx). @@ -547,7 +486,7 @@ DataLayer.hill_diversity(landuse_labels, -Compute hill diversity for the provided `landuse_labels` at the specified values of `q`. See [`DataLayer.compute_aggregated`](#datalayercompute_aggregated) for additional information. +Compute hill diversity for the provided `landuse_labels` at the specified values of `q`. See [`DataLayer.compute_landuses`](#datalayercompute_landuses) for additional information. Parameters @@ -578,7 +517,7 @@ DataLayer.hill_branch_wt_diversity(landuse_labels, -Compute distance-weighted hill diversity for the provided `landuse_labels` at the specified values of `q`. See [`DataLayer.compute_aggregated`](#datalayercompute_aggregated) for additional information. +Compute distance-weighted hill diversity for the provided `landuse_labels` at the specified values of `q`. See [`DataLayer.compute_landuses`](#datalayercompute_landuses) for additional information. Parameters @@ -609,7 +548,7 @@ DataLayer.compute_accessibilities(landuse_labels, -Compute land-use accessibilities for the specified land-use classification keys. See [`DataLayer.compute_aggregated`](#datalayercompute_aggregated) for additional information. +Compute land-use accessibilities for the specified land-use classification keys. See [`DataLayer.compute_landuses`](#datalayercompute_landuses) for additional information. Parameters @@ -632,37 +571,144 @@ The data keys will correspond to the `accessibility_keys` specified, e.g. where `NetworkLayer.metrics['accessibility']['weighted']['retail'][<>][<>]`
`NetworkLayer.metrics['accessibility']['non_weighted']['retail'][<>][<>]` -## DataLayer.compute\_stats\_single +## DataLayer.compute\_stats
-DataLayer.compute_stats_single(stats_key,
-                               stats_data_arr)
+DataLayer.compute_stats(stats_keys,
+                        stats_data_arrs,
+                        angular=False)
 
-Compute stats for a single `stats_key` parameter. As with the landuse and mixed-use measures: stats will be computed for each distance that was specified when initialising the `NetworkLayer`. +This method wraps the underlying `numba` optimised functions for computing statistical measures. The data is aggregated and computed over the street network relative to the `Network Layer` nodes, with the implication that statistical aggregations are generated from the same locations as for centrality computations, which can therefore be correlated or otherwise compared. The outputs of the calculations are written to the corresponding node indices in the same `NetworkLayer.metrics` dictionary used for centrality methods, and will be categorised by the respective keys and parameters. + +For example, if a `valuations` stats key is computed on a `Network Layer` instantiated with 800m and 1600m distance thresholds, then the dictionary would assume the following structure: + +```python +NetworkLayer.metrics = { + 'stats': { + # stats grouped by each stats key + 'valuations': { + # each stat will have the following key-value pairs + 'max': { + 800: [...], + 1600: [...] + }, + 'min': { + 800: [...], + 1600: [...] + }, + 'sum': { + 800: [...], + 1600: [...] + }, + 'sum_weighted': { + 800: [...], + 1600: [...] + }, + 'mean': { + 800: [...], + 1600: [...] + }, + 'mean_weighted': { + 800: [...], + 1600: [...] + }, + 'variance': { + 800: [...], + 1600: [...] + }, + 'variance_weighted': { + 800: [...], + 1600: [...] + } + } + } +} +``` Parameters - + + +If computing a single stat: a `str` key describing the stats computed for the `stats_data_arr` parameter. If computing multiple stats: a `list` or `tuple` of keys. Computed stats will be saved under the supplied key to the `N.metrics` dictionary. + + + + -A `str` key describing the stats computed for the `stats_data_arr` parameter. The computed stats will be saved to the `N.metrics` dictionary under this key. +If computing a single stat: a 1d `list`, `tuple` or `numpy` array of numerical data, where the length corresponds to the number of data points in the `DataLayer`. If computing multiple stats keys: a 2d `list`, `tuple`, or `numpy` array of numerical data, where the first dimension corresponds to the number of keys in the `stats_keys` parameter and the second dimension corresponds to number of data points in the `DataLayer`. e.g: + +```python +# if computing three keys for a DataLayer containg 5 data points +stats_keys = ['valuations', 'floors', 'occupants'] +stats_data_arrs = [ + [50000, 60000, 55000, 42000, 46000], # valuations + [3, 3, 2, 3, 5], # floors + [420, 300, 220, 250, 600] # occupants +] +``` - + -A 1d `list`, `tuple` or `numpy` array of numerical data, where the length corresponds to the number of data points in the `DataLayer`. +Whether to use a simplest-path heuristic in-lieu of a shortest-path heuristic when calculating aggregations and distances, by default False Notes -The data key will correspond to the `stats_key` parameter, e.g. where using `occupants` as the key: +The data keys will correspond to the `stats_keys` parameter, e.g.: +`NetworkLayer.metrics['stats']['valuations'][<>][<>][<>]`
+`NetworkLayer.metrics['stats']['floors'][<>][<>][<>]`
`NetworkLayer.metrics['stats']['occupants'][<>][<>][<>]` +A worked example: + +```python +from cityseer.metrics import networks, layers +from cityseer.tools import mock, graphs + +# prepare a mock graph +G = mock.mock_graph() +G = graphs.nX_simple_geoms(G) + +# generate the network layer +N = networks.NetworkLayerFromNX(G, distances=[200, 400, 800, 1600]) + +# prepare a mock data dictionary +data_dict = mock.mock_data_dict(G, random_seed=25) +# let's prepare some numerical data +stats_data = mock.mock_numerical_data(len(data_dict), num_arrs=1, random_seed=25) + +# generate a data layer +L = layers.DataLayerFromDict(data_dict) +# assign to the network +L.assign_to_network(N, max_dist=500) +# compute some metrics +L.compute_stats(stats_keys='mock_stat', + stats_data_arrs=stats_data) +# let's prepare some keys for accessing the computational outputs +# distance idx: any of the distances with which the NetworkLayer was initialised +distance_idx = 200 +# a node idx +node_idx = 0 + +# the data is available at N.metrics +print(N.metrics['stats']['mock_stat']['mean_weighted'][distance_idx][node_idx]) +# prints: 71297.82967202332 +``` + +Note that the data can also be unpacked to a dictionary using [`NetworkLayer.metrics_to_dict`](/metrics/networks/#networklayermetrics_to_dict), or transposed to a `networkX` graph using [`NetworkLayer.to_networkX`](/metrics/networks/#networklayerto_networkx). + :::tip Comment Per the above worked example, the following stat types will be available for each `stats_key` for each of the computed distances: @@ -671,49 +717,27 @@ Per the above worked example, the following stat types will be available for eac - `sum` and `sum_weighted` - `mean` and `mean_weighted` - `variance` and `variance_weighted` - ::: -## DataLayer.compute\_stats\_multiple +## DataLayer.compute\_stats\_single
-DataLayer.compute_stats_multiple(stats_keys,
-                                 stats_data_arrs)
+DataLayer.compute_stats_single(**kwargs)
 
-Parameters - - - -A `list` or `tuple` of keys corresponding to the number of nested arrays passed to the `stats_data_arrs` parameter. The computed stats will be saved to the `N.metrics` dictionary under these keys. +This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use [`compute_stats`](#datalayercompute_stats) instead. - - - - -A 2d `list`, `tuple` or `numpy` array of numerical data, where the first dimension corresponds to the number of keys in the `stats_keys` parameter and the second dimension corresponds to number of data points in the `DataLayer`. See the below example. - - - -Notes - -```python -# for a DataLayer containg 5 data points -stats_keys = ['valuations', 'floors', 'occupants'] -stats_data_arrs = [ - [50000, 60000, 55000, 42000, 46000], # valuations - [3, 3, 2, 3, 5], # floors - [420, 300, 220, 250, 600] # occupants -] -``` +## DataLayer.compute\_stats\_multiple - The data keys will correspond to the `stats_keys` parameter: + +
+DataLayer.compute_stats_multiple(**kwargs)
+
+
-`NetworkLayer.metrics['stats']['valuations'][<>][<>][<>]`
-`NetworkLayer.metrics['stats']['floors'][<>][<>][<>]`
-`NetworkLayer.metrics['stats']['occupants'][<>][<>][<>]` +This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use [`compute_stats`](#datalayercompute_stats) instead. ## DataLayer.model\_singly\_constrained @@ -739,7 +763,6 @@ Directly transposes an appropriately prepared data dictionary into a `DataLayer`
 DataLayerFromDict(data_dict)
-                  -> DataLayer
 
diff --git a/docs/content/metrics/networks.md b/docs/content/metrics/networks.md index 9fd8c64d..10ee86f4 100644 --- a/docs/content/metrics/networks.md +++ b/docs/content/metrics/networks.md @@ -485,8 +485,6 @@ NetworkLayer.compute_centrality(**kwargs) This method is deprecated and, if invoked, will raise a DeprecationWarning. Please use [`node_centrality`](#networklayernode_centrality) or [`segment_centrality`](#networklayersegment_centrality) instead. -Raises - ## NetworkLayer.node\_centrality diff --git a/docs/generateDocs.js b/docs/generateDocs.js index 8eedafe4..e02d4e19 100644 --- a/docs/generateDocs.js +++ b/docs/generateDocs.js @@ -1,3 +1,4 @@ +// this script stopped working and no idea why or how to debug. const path = require('path') const { PythonShell } = require('python-shell') @@ -9,6 +10,7 @@ const options = { args: ['--config', '../.yap_config.yaml'], } -PythonShell.run('__init__.py', options, function (err) { +PythonShell.run('__init__.py', options, function (err, results) { if (err) throw err + console.log(results) }) diff --git a/docs/package.json b/docs/package.json index c2bb1de3..4bc5ef70 100644 --- a/docs/package.json +++ b/docs/package.json @@ -1,10 +1,10 @@ { "scripts": { - "generate": "node generateDocs.js", + "NOTE": "echo NOTE: Run yapper from terminal in project root directory to build files.", "lint": "markdownlint 'content/**/*.md' --fix", "validateLinks": "remark -u validate-links .", - "dev": "yarn run generate && yarn run lint && gridsome develop", - "build": "yarn run generate && yarn run lint && gridsome build", + "dev": "yarn run NOTE && yarn run lint && gridsome develop", + "build": "yarn run NOTE && yarn run lint && gridsome build", "ci": "yarn run lint && gridsome build", "explore": "yarn run generate && yarn run lint && gridsome explore" }, @@ -28,7 +28,7 @@ "@silvenon/remark-smartypants": "^1.0.0", "@tailwindcss/postcss7-compat": "^2.1.2", "autoprefixer": "^9", - "eslint": "^7.26.0", + "eslint": "^7.27.0", "eslint-config-prettier": "^8.3.0", "eslint-plugin-gridsome": "^1.6.2", "eslint-plugin-prettier": "^3.4.0", diff --git a/docs/src/assets/plots/images/assignment.png b/docs/src/assets/plots/images/assignment.png index 738f24a8..361d587b 100644 Binary files a/docs/src/assets/plots/images/assignment.png and b/docs/src/assets/plots/images/assignment.png differ diff --git a/docs/src/assets/plots/images/assignment_decomposed.png b/docs/src/assets/plots/images/assignment_decomposed.png index e138f246..9a367e26 100644 Binary files a/docs/src/assets/plots/images/assignment_decomposed.png and b/docs/src/assets/plots/images/assignment_decomposed.png differ diff --git a/docs/src/assets/plots/images/assignment_plot.png b/docs/src/assets/plots/images/assignment_plot.png index e362165c..359ea4be 100644 Binary files a/docs/src/assets/plots/images/assignment_plot.png and b/docs/src/assets/plots/images/assignment_plot.png differ diff --git a/docs/src/assets/plots/images/betas.png b/docs/src/assets/plots/images/betas.png index 3a39dc15..27c5c1dd 100644 Binary files a/docs/src/assets/plots/images/betas.png and b/docs/src/assets/plots/images/betas.png differ diff --git a/docs/src/assets/plots/images/graph.png b/docs/src/assets/plots/images/graph.png index f35bfacf..34204ff3 100644 Binary files a/docs/src/assets/plots/images/graph.png and b/docs/src/assets/plots/images/graph.png differ diff --git a/docs/src/assets/plots/images/graph_after.png b/docs/src/assets/plots/images/graph_after.png index 7b2e2756..2510dee0 100644 Binary files a/docs/src/assets/plots/images/graph_after.png and b/docs/src/assets/plots/images/graph_after.png differ diff --git a/docs/src/assets/plots/images/graph_before.png b/docs/src/assets/plots/images/graph_before.png index 7b2e2756..2510dee0 100644 Binary files a/docs/src/assets/plots/images/graph_before.png and b/docs/src/assets/plots/images/graph_before.png differ diff --git a/docs/src/assets/plots/images/graph_cleaning_1.png b/docs/src/assets/plots/images/graph_cleaning_1.png index 20a253a1..560aff0c 100644 Binary files a/docs/src/assets/plots/images/graph_cleaning_1.png and b/docs/src/assets/plots/images/graph_cleaning_1.png differ diff --git a/docs/src/assets/plots/images/graph_cleaning_2.png b/docs/src/assets/plots/images/graph_cleaning_2.png index 0720e773..6be7c1e1 100644 Binary files a/docs/src/assets/plots/images/graph_cleaning_2.png and b/docs/src/assets/plots/images/graph_cleaning_2.png differ diff --git a/docs/src/assets/plots/images/graph_cleaning_3.png b/docs/src/assets/plots/images/graph_cleaning_3.png index 4c5f0c48..c419dba2 100644 Binary files a/docs/src/assets/plots/images/graph_cleaning_3.png and b/docs/src/assets/plots/images/graph_cleaning_3.png differ diff --git a/docs/src/assets/plots/images/graph_cleaning_4.png b/docs/src/assets/plots/images/graph_cleaning_4.png index 707addaa..2658a107 100644 Binary files a/docs/src/assets/plots/images/graph_cleaning_4.png and b/docs/src/assets/plots/images/graph_cleaning_4.png differ diff --git a/docs/src/assets/plots/images/graph_cleaning_5.png b/docs/src/assets/plots/images/graph_cleaning_5.png index 678547db..e2d773b8 100644 Binary files a/docs/src/assets/plots/images/graph_cleaning_5.png and b/docs/src/assets/plots/images/graph_cleaning_5.png differ diff --git a/docs/src/assets/plots/images/graph_colour.png b/docs/src/assets/plots/images/graph_colour.png index d49d52bb..1d09753b 100644 Binary files a/docs/src/assets/plots/images/graph_colour.png and b/docs/src/assets/plots/images/graph_colour.png differ diff --git a/docs/src/assets/plots/images/graph_decomposed.png b/docs/src/assets/plots/images/graph_decomposed.png index 5ba8f4e5..cdff8e6a 100644 Binary files a/docs/src/assets/plots/images/graph_decomposed.png and b/docs/src/assets/plots/images/graph_decomposed.png differ diff --git a/docs/src/assets/plots/images/graph_dual.png b/docs/src/assets/plots/images/graph_dual.png index e22d5b6f..b4752d5a 100644 Binary files a/docs/src/assets/plots/images/graph_dual.png and b/docs/src/assets/plots/images/graph_dual.png differ diff --git a/docs/src/assets/plots/images/graph_example.png b/docs/src/assets/plots/images/graph_example.png index 7b2e2756..2510dee0 100644 Binary files a/docs/src/assets/plots/images/graph_example.png and b/docs/src/assets/plots/images/graph_example.png differ diff --git a/docs/src/assets/plots/images/graph_simple.png b/docs/src/assets/plots/images/graph_simple.png index f78f6f3f..8e14b208 100644 Binary files a/docs/src/assets/plots/images/graph_simple.png and b/docs/src/assets/plots/images/graph_simple.png differ diff --git a/docs/src/assets/plots/images/intro_mixed_uses.png b/docs/src/assets/plots/images/intro_mixed_uses.png index 53b11cd1..3a43e555 100644 Binary files a/docs/src/assets/plots/images/intro_mixed_uses.png and b/docs/src/assets/plots/images/intro_mixed_uses.png differ diff --git a/docs/src/assets/plots/images/intro_segment_harmonic.png b/docs/src/assets/plots/images/intro_segment_harmonic.png index 35b5af26..53bde3aa 100644 Binary files a/docs/src/assets/plots/images/intro_segment_harmonic.png and b/docs/src/assets/plots/images/intro_segment_harmonic.png differ diff --git a/docs/src/assets/plots/images/osmnx_cityseer_simplification.png b/docs/src/assets/plots/images/osmnx_cityseer_simplification.png index 2b314e4f..fcb834f3 100644 Binary files a/docs/src/assets/plots/images/osmnx_cityseer_simplification.png and b/docs/src/assets/plots/images/osmnx_cityseer_simplification.png differ diff --git a/docs/src/assets/plots/images/osmnx_simplification.png b/docs/src/assets/plots/images/osmnx_simplification.png index 670c48e2..c3bc4089 100644 Binary files a/docs/src/assets/plots/images/osmnx_simplification.png and b/docs/src/assets/plots/images/osmnx_simplification.png differ diff --git a/setup.py b/setup.py index fb9dea2d..b23aa35a 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,12 @@ ''' -DOWNLOADING FROM TEST REPO: pip install --extra-index-url https://test.pypi.org/simple/ cityseer==1.0.6b3 +DOWNLOADING FROM TEST REPO: pip install --extra-index-url https://test.pypi.org/simple/ cityseer==1.1.2b1 ''' from setuptools import setup setup( name='cityseer', - version='1.1.1', + version='1.1.2', packages=['cityseer', 'cityseer.algos', 'cityseer.metrics', 'cityseer.tools'], description='Computational tools for urban analysis', url='https://github.com/benchmark-urbanism/cityseer-api', diff --git a/tests/algos/test_data.py b/tests/algos/test_data.py index 2ecc165b..54513d77 100644 --- a/tests/algos/test_data.py +++ b/tests/algos/test_data.py @@ -250,9 +250,9 @@ def test_aggregate_to_src_idx(primal_graph): assert reachable_dist == next_nearest_dist -def test_local_aggregator_signatures(primal_graph): - node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX( - primal_graph) # generate node and edge maps +def test_aggregate_landuses_signatures(primal_graph): + # generate node and edge maps + node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) @@ -265,42 +265,42 @@ def test_local_aggregator_signatures(primal_graph): landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) # check that empty land_use encodings are caught with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - mixed_use_hill_keys=np.array([0])) + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + mixed_use_hill_keys=np.array([0])) # check that unequal land_use encodings vs data map lengths are caught with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings=landuse_encodings[:-1], - mixed_use_other_keys=np.array([0])) + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings=landuse_encodings[:-1], + mixed_use_other_keys=np.array([0])) # check that no provided metrics flags with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings=landuse_encodings) + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings=landuse_encodings) # check that missing qs flags with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - mixed_use_hill_keys=np.array([0]), - landuse_encodings=landuse_encodings) + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + mixed_use_hill_keys=np.array([0]), + landuse_encodings=landuse_encodings) # check that problematic mixed use and accessibility keys are caught for mu_h_key, mu_o_key, ac_key in [ # negatives @@ -316,47 +316,47 @@ def test_local_aggregator_signatures(primal_graph): ([1], [1, 1], [1]), ([1], [1], [1, 1])]: with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings, - qs=qs, - mixed_use_hill_keys=np.array(mu_h_key), - mixed_use_other_keys=np.array(mu_o_key), - accessibility_keys=np.array(ac_key)) + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings, + qs=qs, + mixed_use_hill_keys=np.array(mu_h_key), + mixed_use_other_keys=np.array(mu_o_key), + accessibility_keys=np.array(ac_key)) for h_key, o_key in (([3], []), ([], [2])): # check that missing matrix is caught for disparity weighted indices with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings=landuse_encodings, - qs=qs, - mixed_use_hill_keys=np.array(h_key), - mixed_use_other_keys=np.array(o_key)) + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings=landuse_encodings, + qs=qs, + mixed_use_hill_keys=np.array(h_key), + mixed_use_other_keys=np.array(o_key)) # check that non-square disparity matrix is caught mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1) with pytest.raises(ValueError): - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings=landuse_encodings, - qs=qs, - mixed_use_hill_keys=np.array(h_key), - mixed_use_other_keys=np.array(o_key), - cl_disparity_wt_matrix=mock_matrix[:-1]) - - -def test_local_aggregator_categorical_components(primal_graph): + data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings=landuse_encodings, + qs=qs, + mixed_use_hill_keys=np.array(h_key), + mixed_use_other_keys=np.array(o_key), + cl_disparity_wt_matrix=mock_matrix[:-1]) + + +def test_aggregate_landuses_categorical_components(primal_graph): # generate node and edge maps node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph) # setup data @@ -378,21 +378,19 @@ def test_local_aggregator_categorical_components(primal_graph): ac_keys = np.array([1, 2, 5]) np.random.shuffle(ac_keys) # generate - mu_data_hill, mu_data_other, ac_data, ac_data_wt, stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, \ - stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings=landuse_encodings, - qs=qs, - mixed_use_hill_keys=hill_keys, - mixed_use_other_keys=non_hill_keys, - accessibility_keys=ac_keys, - cl_disparity_wt_matrix=mock_matrix, - angular=False) + mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings=landuse_encodings, + qs=qs, + mixed_use_hill_keys=hill_keys, + mixed_use_other_keys=non_hill_keys, + accessibility_keys=ac_keys, + cl_disparity_wt_matrix=mock_matrix, + angular=False) # hill hill = mu_data_hill[np.where(hill_keys == 0)][0] hill_branch_wt = mu_data_hill[np.where(hill_keys == 1)][0] @@ -508,37 +506,34 @@ def test_local_aggregator_categorical_components(primal_graph): landuse_classes_dual, landuse_encodings_dual = layers.encode_categorical(mock_categorical) mock_matrix = np.full((len(landuse_classes_dual), len(landuse_classes_dual)), 1) - mu_hill_dual, mu_other_dual, ac_dual, ac_wt_dual, stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, \ - stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_data_dual, - edge_data_dual, - node_edge_map_dual, - data_map_dual, - distances, - betas, - landuse_encodings_dual, - qs=qs, - mixed_use_hill_keys=hill_keys, - mixed_use_other_keys=non_hill_keys, - accessibility_keys=ac_keys, - cl_disparity_wt_matrix=mock_matrix, - angular=True) - - mu_hill_dual_sidestep, mu_other_dual_sidestep, ac_dual_sidestep, ac_wt_dual_sidestep, \ - stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_data_dual, - edge_data_dual, - node_edge_map_dual, - data_map_dual, - distances, - betas, - landuse_encodings_dual, - qs=qs, - mixed_use_hill_keys=hill_keys, - mixed_use_other_keys=non_hill_keys, - accessibility_keys=ac_keys, - cl_disparity_wt_matrix=mock_matrix, - angular=False) + mu_hill_dual, mu_other_dual, ac_dual, ac_wt_dual = data.aggregate_landuses(node_data_dual, + edge_data_dual, + node_edge_map_dual, + data_map_dual, + distances, + betas, + landuse_encodings_dual, + qs=qs, + mixed_use_hill_keys=hill_keys, + mixed_use_other_keys=non_hill_keys, + accessibility_keys=ac_keys, + cl_disparity_wt_matrix=mock_matrix, + angular=True) + + mu_hill_dual_sidestep, mu_other_dual_sidestep, ac_dual_sidestep, ac_wt_dual_sidestep = \ + data.aggregate_landuses(node_data_dual, + edge_data_dual, + node_edge_map_dual, + data_map_dual, + distances, + betas, + landuse_encodings_dual, + qs=qs, + mixed_use_hill_keys=hill_keys, + mixed_use_other_keys=non_hill_keys, + accessibility_keys=ac_keys, + cl_disparity_wt_matrix=mock_matrix, + angular=False) assert not np.allclose(mu_hill_dual, mu_hill_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(mu_other_dual, mu_other_dual_sidestep, atol=0.001, rtol=0) @@ -547,9 +542,8 @@ def test_local_aggregator_categorical_components(primal_graph): def test_local_aggregator_numerical_components(primal_graph): - node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX( - primal_graph) # generate node and edge maps - + # generate node and edge maps + node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) @@ -557,23 +551,20 @@ def test_local_aggregator_numerical_components(primal_graph): # for debugging # from cityseer.tools import plot # plot.plot_graph_maps(node_uids, node_data, edge_data, data_map) - # set parameters - use a large enough distance such that simple non-weighted checks can be run for max, mean, variance betas = np.array([0.00125]) distances = networks.distance_from_beta(betas) mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0) - - mu_data_hill, mu_data_other, ac_data, ac_data_wt, stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, \ - stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_data, - edge_data, - node_edge_map, - data_map, - distances, - betas, - numerical_arrays=mock_numerical, - angular=False) - + # compute + stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ + data.aggregate_stats(node_data, + edge_data, + node_edge_map, + data_map, + distances, + betas, + numerical_arrays=mock_numerical, + angular=False) # non connected portions of the graph will have different stats # used manual data plots from test_assign_to_network() to see which nodes the data points are assigned to # connected graph is from 0 to 48 -> assigned data points are all except 5, 8, 17, 33, 48 @@ -604,9 +595,9 @@ def test_local_aggregator_numerical_components(primal_graph): assert np.allclose(stats_min[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].min(), atol=0.001, rtol=0) # sum - assert np.isnan(stats_sum[stats_idx, d_idx, 49]) - assert np.allclose(stats_sum[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].sum(), - atol=0.001, rtol=0) + assert stats_sum[stats_idx, d_idx, 49] == 0 + assert np.allclose(stats_sum[stats_idx, d_idx, [50, 51]], + mock_numerical[stats_idx, [17, 33]].sum(), atol=0.001, rtol=0) assert np.allclose(stats_sum[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].sum(), atol=0.001, rtol=0) assert np.allclose(stats_sum[stats_idx, d_idx, connected_nodes_idx], diff --git a/tests/metrics/test_layers.py b/tests/metrics/test_layers.py index 7d317515..9819198e 100644 --- a/tests/metrics/test_layers.py +++ b/tests/metrics/test_layers.py @@ -6,7 +6,7 @@ from cityseer.algos import data from cityseer.metrics import layers, networks -from cityseer.tools import mock, graphs +from cityseer.tools import mock from cityseer.tools.mock import primal_graph @@ -14,7 +14,6 @@ def test_dict_wgs_to_utm(primal_graph): # check that node coordinates are correctly converted G_utm = mock.mock_graph() data_dict_utm = mock.mock_data_dict(G_utm) - # create a test dictionary test_dict = copy.deepcopy(data_dict_utm) # cast to lat, lon @@ -26,16 +25,13 @@ def test_dict_wgs_to_utm(primal_graph): lat, lng = utm.to_latlon(easting, northing, 30, 'U') test_dict[k]['x'] = lng test_dict[k]['y'] = lat - # convert back dict_converted = layers.dict_wgs_to_utm(test_dict) - # check that round-trip converted match with reasonable proximity given rounding errors for k in data_dict_utm.keys(): # rounding can be tricky assert np.allclose(data_dict_utm[k]['x'], dict_converted[k]['x'], atol=0.1, rtol=0) # relax precision assert np.allclose(data_dict_utm[k]['y'], dict_converted[k]['y'], atol=0.1, rtol=0) # relax precision - # check that missing node attributes throw an error for attr in ['x', 'y']: G_wgs = mock.mock_graph(wgs84_coords=True) @@ -46,7 +42,6 @@ def test_dict_wgs_to_utm(primal_graph): # check that missing attribute throws an error with pytest.raises(AttributeError): layers.dict_wgs_to_utm(data_dict_wgs) - # check that non WGS coordinates throw error with pytest.raises(AttributeError): layers.dict_wgs_to_utm(data_dict_utm) @@ -55,12 +50,9 @@ def test_dict_wgs_to_utm(primal_graph): def test_encode_categorical(): # generate mock data mock_categorical = mock.mock_categorical_data(50) - classes, class_encodings = layers.encode_categorical(mock_categorical) - for cl in classes: assert cl in mock_categorical - for idx, label in enumerate(mock_categorical): assert label in classes assert classes.index(label) == class_encodings[idx] @@ -70,15 +62,12 @@ def test_data_map_from_dict(primal_graph): # generate mock data data_dict = mock.mock_data_dict(primal_graph) data_uids, data_map = layers.data_map_from_dict(data_dict) - assert len(data_uids) == len(data_map) == len(data_dict) - for d_label, d in zip(data_uids, data_map): assert d[0] == data_dict[d_label]['x'] assert d[1] == data_dict[d_label]['y'] assert np.isnan(d[2]) assert np.isnan(d[3]) - # check that missing attributes throw errors for attr in ['x', 'y']: for k in data_dict.keys(): @@ -92,7 +81,6 @@ def test_Data_Layer(primal_graph): data_uids, data_map = layers.data_map_from_dict(data_dict) x_arr = data_map[:, 0] y_arr = data_map[:, 1] - # test against DataLayer internal process D = layers.DataLayer(data_uids, data_map) assert D.uids == data_uids @@ -106,7 +94,6 @@ def test_Data_Layer_From_Dict(primal_graph): data_uids, data_map = layers.data_map_from_dict(data_dict) x_arr = data_map[:, 0] y_arr = data_map[:, 1] - # test against DataLayerFromDict's internal process D = layers.DataLayerFromDict(data_dict) assert D.uids == data_uids @@ -115,7 +102,7 @@ def test_Data_Layer_From_Dict(primal_graph): assert np.allclose(D.data_y_arr, y_arr, atol=0.001, rtol=0) -def test_compute_aggregated_A(primal_graph): +def test_compute_landuses(primal_graph): betas = np.array([0.01, 0.005]) distances = networks.distance_from_beta(betas) # network layer @@ -133,54 +120,49 @@ def test_compute_aggregated_A(primal_graph): landuse_labels = mock.mock_categorical_data(len(data_dict)) landuse_classes, landuse_encodings = layers.encode_categorical(landuse_labels) # compute hill mixed uses - D.compute_aggregated(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=qs) + D.compute_landuses(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=qs) # test against underlying method data_map = D._data - mu_data_hill, mu_data_other, ac_data, ac_data_wt, \ - stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_map, - edge_map, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings, - qs=qs, - mixed_use_hill_keys=np.array([1])) + mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_map, + edge_map, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings, + qs=qs, + mixed_use_hill_keys=np.array([1])) for q_idx, q_key in enumerate(qs): for d_idx, d_key in enumerate(distances): assert np.allclose(N.metrics['mixed_uses']['hill_branch_wt'][q_key][d_key], mu_data_hill[0][q_idx][d_idx], atol=0.001, rtol=0) # gini simpson - D.compute_aggregated(landuse_labels, mixed_use_keys=['gini_simpson']) + D.compute_landuses(landuse_labels, mixed_use_keys=['gini_simpson']) # test against underlying method data_map = D._data - mu_data_hill, mu_data_other, ac_data, ac_data_wt, \ - stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_map, - edge_map, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings, - mixed_use_other_keys=np.array([1])) + mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_map, + edge_map, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings, + mixed_use_other_keys=np.array([1])) for d_idx, d_key in enumerate(distances): assert np.allclose(N.metrics['mixed_uses']['gini_simpson'][d_key], mu_data_other[0][d_idx], atol=0.001, rtol=0) # accessibilities - D.compute_aggregated(landuse_labels, accessibility_keys=['c']) + D.compute_landuses(landuse_labels, accessibility_keys=['c']) # test against underlying method data_map = D._data - mu_data_hill, mu_data_other, ac_data, ac_data_wt, \ - stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_map, - edge_map, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings, - accessibility_keys=np.array([landuse_classes.index('c')])) + mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_map, + edge_map, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings, + accessibility_keys=np.array( + [landuse_classes.index('c')])) for d_idx, d_key in enumerate(distances): assert np.allclose(N.metrics['accessibility']['non_weighted']['c'][d_key], ac_data[0][d_idx], atol=0.001, rtol=0) @@ -194,145 +176,78 @@ def test_compute_aggregated_A(primal_graph): 'gini_simpson', 'raos_pairwise_disparity']) ac_codes = np.array(landuse_classes) - + # mixed uses hill mu_hill_random = np.arange(len(mixed_uses_hill_types)) np.random.shuffle(mu_hill_random) - + # mixed uses other mu_other_random = np.arange(len(mixed_use_other_types)) np.random.shuffle(mu_other_random) - + # accessibility ac_random = np.arange(len(landuse_classes)) np.random.shuffle(ac_random) - # mock disparity matrix mock_disparity_wt_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1) - # not necessary to do all labels, first few should do for mu_h_min in range(3): mu_h_keys = np.array(mu_hill_random[mu_h_min:]) - for mu_o_min in range(3): mu_o_keys = np.array(mu_other_random[mu_o_min:]) - for ac_min in range(3): ac_keys = np.array(ac_random[ac_min:]) - # in the final case, set accessibility to a single code otherwise an error would be raised if len(mu_h_keys) == 0 and len(mu_o_keys) == 0 and len(ac_keys) == 0: ac_keys = np.array([0]) - # randomise order of keys and metrics mu_h_metrics = mixed_uses_hill_types[mu_h_keys] mu_o_metrics = mixed_use_other_types[mu_o_keys] ac_metrics = ac_codes[ac_keys] - + # prepare network and compute N_temp = networks.NetworkLayerFromNX(primal_graph, distances=distances) D_temp = layers.DataLayerFromDict(data_dict) D_temp.assign_to_network(N_temp, max_dist=500) - D_temp.compute_aggregated(landuse_labels, - mixed_use_keys=list(mu_h_metrics) + list(mu_o_metrics), - accessibility_keys=ac_metrics, - cl_disparity_wt_matrix=mock_disparity_wt_matrix, - qs=qs) - + D_temp.compute_landuses(landuse_labels, + mixed_use_keys=list(mu_h_metrics) + list(mu_o_metrics), + accessibility_keys=ac_metrics, + cl_disparity_wt_matrix=mock_disparity_wt_matrix, + qs=qs) # test against underlying method - mu_data_hill, mu_data_other, ac_data, ac_data_wt, stats_sum, stats_sum_wt, \ - stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_map, - edge_map, - node_edge_map, - data_map, - distances, - betas, - landuse_encodings, - qs=qs, - mixed_use_hill_keys=mu_h_keys, - mixed_use_other_keys=mu_o_keys, - accessibility_keys=ac_keys, - cl_disparity_wt_matrix=mock_disparity_wt_matrix) - + mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_map, + edge_map, + node_edge_map, + data_map, + distances, + betas, + landuse_encodings, + qs=qs, + mixed_use_hill_keys=mu_h_keys, + mixed_use_other_keys=mu_o_keys, + accessibility_keys=ac_keys, + cl_disparity_wt_matrix=mock_disparity_wt_matrix) for mu_h_idx, mu_h_met in enumerate(mu_h_metrics): for q_idx, q_key in enumerate(qs): for d_idx, d_key in enumerate(distances): assert np.allclose(N_temp.metrics['mixed_uses'][mu_h_met][q_key][d_key], mu_data_hill[mu_h_idx][q_idx][d_idx], atol=0.001, rtol=0) - for mu_o_idx, mu_o_met in enumerate(mu_o_metrics): for d_idx, d_key in enumerate(distances): assert np.allclose(N_temp.metrics['mixed_uses'][mu_o_met][d_key], mu_data_other[mu_o_idx][d_idx], atol=0.001, rtol=0) - for ac_idx, ac_met in enumerate(ac_metrics): for d_idx, d_key in enumerate(distances): assert np.allclose(N_temp.metrics['accessibility']['non_weighted'][ac_met][d_key], ac_data[ac_idx][d_idx], atol=0.001, rtol=0) assert np.allclose(N_temp.metrics['accessibility']['weighted'][ac_met][d_key], ac_data_wt[ac_idx][d_idx], atol=0.001, rtol=0) - # most integrity checks happen in underlying method, though check here for mismatching labels length and typos with pytest.raises(ValueError): - D.compute_aggregated(landuse_labels[-1], mixed_use_keys=['shannon']) + D.compute_landuses(landuse_labels[-1], mixed_use_keys=['shannon']) with pytest.raises(ValueError): - D.compute_aggregated(landuse_labels, mixed_use_keys=['spelling_typo']) + D.compute_landuses(landuse_labels, mixed_use_keys=['spelling_typo']) # don't check accessibility_labels for typos - because only warning is triggered (not all labels will be in all data) # check that unassigned data layer flags with pytest.raises(ValueError): D_new = layers.DataLayerFromDict(data_dict) - D_new.compute_aggregated(landuse_labels, mixed_use_keys=['shannon']) - - -def test_compute_aggregated_B(primal_graph): - ''' - Test stats component - ''' - betas = np.array([0.01, 0.005]) - distances = networks.distance_from_beta(betas) - # network layer - N = networks.NetworkLayerFromNX(primal_graph, distances=distances) - node_map = N._node_data - edge_map = N._edge_data - node_edge_map = N._node_edge_map - # data layer - data_dict = mock.mock_data_dict(primal_graph) - qs = np.array([0, 1, 2]) - D = layers.DataLayerFromDict(data_dict) - # check single metrics independently against underlying for some use-cases, e.g. hill, non-hill, accessibility... - D.assign_to_network(N, max_dist=500) - - # generate some mock landuse data - mock_numeric = mock.mock_numerical_data(len(data_dict), num_arrs=2) - - # generate stats - D.compute_aggregated(stats_keys=['boo', 'baa'], stats_data_arrs=mock_numeric) - - # test against underlying method - data_map = D._data - mu_data_hill, mu_data_other, ac_data, ac_data_wt, \ - stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ - data.local_aggregator(node_map, - edge_map, - node_edge_map, - data_map, - distances, - betas, - numerical_arrays=mock_numeric) - - stats_keys = ['max', 'min', 'sum', 'sum_weighted', 'mean', 'mean_weighted', 'variance', 'variance_weighted'] - stats_data = [stats_max, stats_min, stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, - stats_variance_wt] - - for num_idx, num_label in enumerate(['boo', 'baa']): - for s_key, stats in zip(stats_keys, stats_data): - for d_idx, d_key in enumerate(distances): - assert np.allclose(N.metrics['stats'][num_label][s_key][d_key], stats[num_idx][d_idx], atol=0.001, - rtol=0) - - # check that mismatching label and array lengths are caught - for labels, arrs in ((['a'], mock_numeric), # mismatching lengths - (['a', 'b'], None), # missing arrays - (None, mock_numeric)): # missing labels - with pytest.raises(ValueError): - D.compute_aggregated(stats_keys=labels, stats_data_arrs=arrs) + D_new.compute_landuses(landuse_labels, mixed_use_keys=['shannon']) def network_generator(): @@ -355,7 +270,7 @@ def test_hill_diversity(primal_graph): N_full = networks.NetworkLayerFromNX(G, distances=distances) D_full = layers.DataLayerFromDict(data_dict) D_full.assign_to_network(N_full, max_dist=500) - D_full.compute_aggregated(landuse_labels, mixed_use_keys=['hill'], qs=[0, 1, 2]) + D_full.compute_landuses(landuse_labels, mixed_use_keys=['hill'], qs=[0, 1, 2]) # compare for d in distances: for q in [0, 1, 2]: @@ -377,7 +292,7 @@ def test_hill_branch_wt_diversity(primal_graph): N_full = networks.NetworkLayerFromNX(G, distances=distances) D_full = layers.DataLayerFromDict(data_dict) D_full.assign_to_network(N_full, max_dist=500) - D_full.compute_aggregated(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=[0, 1, 2]) + D_full.compute_landuses(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=[0, 1, 2]) # compare for d in distances: for q in [0, 1, 2]: @@ -399,7 +314,7 @@ def test_compute_accessibilities(primal_graph): N_full = networks.NetworkLayerFromNX(G, distances=distances) D_full = layers.DataLayerFromDict(data_dict) D_full.assign_to_network(N_full, max_dist=500) - D_full.compute_aggregated(landuse_labels, accessibility_keys=['c']) + D_full.compute_landuses(landuse_labels, accessibility_keys=['c']) # compare for d in distances: for wt in ['weighted', 'non_weighted']: @@ -407,52 +322,72 @@ def test_compute_accessibilities(primal_graph): N_full.metrics['accessibility'][wt]['c'][d], atol=0.001, rtol=0) -def test_compute_stats_single(primal_graph): - for distances, betas in network_generator(): - G = primal_graph.copy() - data_dict = mock.mock_data_dict(G) - numeric_data = mock.mock_numerical_data(len(data_dict), num_arrs=1) - # easy version - N_easy = networks.NetworkLayerFromNX(G, distances=distances) - D_easy = layers.DataLayerFromDict(data_dict) - D_easy.assign_to_network(N_easy, max_dist=500) - D_easy.compute_stats_single('boo', numeric_data[0]) - # custom version - N_full = networks.NetworkLayerFromNX(G, distances=distances) - D_full = layers.DataLayerFromDict(data_dict) - D_full.assign_to_network(N_full, max_dist=500) - D_full.compute_aggregated(stats_keys=['boo'], stats_data_arrs=numeric_data) - # compare - for n_label in ['boo']: - for s_label in ['max', 'min', 'mean', 'mean_weighted', 'variance', 'variance_weighted']: - for dist in distances: - assert np.allclose(N_easy.metrics['stats'][n_label][s_label][dist], - N_full.metrics['stats'][n_label][s_label][dist], equal_nan=True, atol=0.001, - rtol=0) - # check that non-single dimension arrays are caught - with pytest.raises(ValueError): - D_easy.compute_stats_single('boo', numeric_data) - - -def test_compute_stats_multiple(primal_graph): - for distances, betas in network_generator(): - G = primal_graph.copy() - data_dict = mock.mock_data_dict(G) - numeric_data = mock.mock_numerical_data(len(data_dict), num_arrs=2) - # easy version - N_easy = networks.NetworkLayerFromNX(G, distances=distances) - D_easy = layers.DataLayerFromDict(data_dict) - D_easy.assign_to_network(N_easy, max_dist=500) - D_easy.compute_stats_multiple(['boo', 'baa'], numeric_data) - # custom version - N_full = networks.NetworkLayerFromNX(G, distances=distances) - D_full = layers.DataLayerFromDict(data_dict) - D_full.assign_to_network(N_full, max_dist=500) - D_full.compute_aggregated(stats_keys=['boo', 'baa'], stats_data_arrs=numeric_data) - # compare - for n_label in ['boo', 'baa']: - for s_label in ['max', 'min', 'mean', 'mean_weighted', 'variance', 'variance_weighted']: - for dist in distances: - assert np.allclose(N_easy.metrics['stats'][n_label][s_label][dist], - N_full.metrics['stats'][n_label][s_label][dist], equal_nan=True, atol=0.001, - rtol=0) +def test_compute_stats(primal_graph): + ''' + Test stats component + ''' + betas = np.array([0.01, 0.005]) + distances = networks.distance_from_beta(betas) + # network layer + N_single = networks.NetworkLayerFromNX(primal_graph, distances=distances) + N_multi = networks.NetworkLayerFromNX(primal_graph, distances=distances) + node_map = N_multi._node_data + edge_map = N_multi._edge_data + node_edge_map = N_multi._node_edge_map + # data layer + data_dict = mock.mock_data_dict(primal_graph) + D_single = layers.DataLayerFromDict(data_dict) + D_multi = layers.DataLayerFromDict(data_dict) + # check single metrics independently against underlying for some use-cases, e.g. hill, non-hill, accessibility... + D_single.assign_to_network(N_single, max_dist=500) + D_multi.assign_to_network(N_multi, max_dist=500) + # generate some mock landuse data + mock_numeric = mock.mock_numerical_data(len(data_dict), num_arrs=2) + # generate stats + D_single.compute_stats(stats_keys='boo', stats_data_arrs=mock_numeric[0]) + D_single.compute_stats(stats_keys='baa', stats_data_arrs=mock_numeric[1]) + D_multi.compute_stats(stats_keys=['boo', 'baa'], stats_data_arrs=mock_numeric) + # test against underlying method + data_map = D_single._data + stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ + data.aggregate_stats(node_map, + edge_map, + node_edge_map, + data_map, + distances, + betas, + numerical_arrays=mock_numeric) + stats_keys = ['max', 'min', 'sum', 'sum_weighted', 'mean', 'mean_weighted', 'variance', 'variance_weighted'] + stats_data = [stats_max, stats_min, + stats_sum, stats_sum_wt, + stats_mean, stats_mean_wt, + stats_variance, stats_variance_wt] + for num_idx, num_label in enumerate(['boo', 'baa']): + for s_key, stats in zip(stats_keys, stats_data): + for d_idx, d_key in enumerate(distances): + # check one-at-a-time computed vs multiply computed + assert np.allclose(N_single.metrics['stats'][num_label][s_key][d_key], + N_multi.metrics['stats'][num_label][s_key][d_key], + atol=0.001, + rtol=0, + equal_nan=True) + # check one-at-a-time against manual + assert np.allclose(N_single.metrics['stats'][num_label][s_key][d_key], + stats[num_idx][d_idx], + atol=0.001, + rtol=0, + equal_nan=True) + # check multiply computed against manual + assert np.allclose(N_multi.metrics['stats'][num_label][s_key][d_key], + stats[num_idx][d_idx], + atol=0.001, + rtol=0, + equal_nan=True) + # check that problematic keys and data arrays are caught + for labels, arrs, err in ((['a'], mock_numeric, ValueError), # mismatching lengths + (['a', 'b'], None, TypeError), # missing arrays + (['a', 'b'], [], ValueError), # missing arrays + (None, mock_numeric, TypeError), # missing labels + ([], mock_numeric, ValueError)): # missing labels + with pytest.raises(err): + D_multi.compute_stats(stats_keys=labels, stats_data_arrs=arrs) \ No newline at end of file diff --git a/tests/metrics/test_networks.py b/tests/metrics/test_networks.py index a7d07479..b7ac6dc2 100644 --- a/tests/metrics/test_networks.py +++ b/tests/metrics/test_networks.py @@ -264,17 +264,6 @@ def test_metrics_to_dict(primal_graph): data_dict = mock.mock_data_dict(primal_graph) landuse_labels = mock.mock_categorical_data(len(data_dict)) numerical_data = mock.mock_numerical_data(len(data_dict)) - # TODO: - ''' - D = layers.DataLayerFromDict(data_dict) - D.assign_to_network(N, max_dist=400) - D.compute_aggregated(landuse_labels, - mixed_use_keys=['hill', 'shannon'], - accessibility_keys=['a', 'c'], - qs=[0, 1], - stats_keys=['boo'], - stats_data_arrs=numerical_data) - ''' metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N) diff --git a/tests/tools/test_graphs.py b/tests/tools/test_graphs.py index 51e78a7b..83c6668f 100644 --- a/tests/tools/test_graphs.py +++ b/tests/tools/test_graphs.py @@ -39,7 +39,6 @@ def test_nX_simple_geoms(): def test_add_node(diamond_graph): - new_name = graphs._add_node(diamond_graph, [0, 1], 50, 50) assert new_name == '0±1' assert list(diamond_graph.nodes) == [0, 1, 2, 3, '0±1'] @@ -289,7 +288,6 @@ def test_nX_remove_filler_nodes(primal_graph): assert 'x' in d assert 'y' in d - # lollipop test - where a looping component (all nodes == degree 2) suspends off a node with degree > 2 G_lollipop = nx.MultiGraph() nodes = [ @@ -381,7 +379,7 @@ def test_nX_remove_filler_nodes(primal_graph): after_len += d['geom'].length assert before_len == after_len assert G_stairway_simpl['1-down']['1-up'][0]['geom'].wkt == \ - 'LINESTRING (400 750, 400 650, 500 550, 400 450, 300 550, 400 650, 500 550, 400 450, 300 550, 400 650, 400 750)' + 'LINESTRING (400 750, 400 650, 500 550, 400 450, 300 550, 400 650, 500 550, 400 450, 300 550, 400 650, 400 750)' # check that missing geoms throw an error G_k = G_messy.copy() @@ -818,10 +816,10 @@ def test_nX_from_graph_maps(primal_graph): landuse_labels = mock.mock_categorical_data(len(data_dict)) D = layers.DataLayerFromDict(data_dict) D.assign_to_network(N, max_dist=400) - D.compute_aggregated(landuse_labels, - mixed_use_keys=['hill', 'shannon'], - accessibility_keys=['a', 'c'], - qs=[0, 1]) + D.compute_landuses(landuse_labels, + mixed_use_keys=['hill', 'shannon'], + accessibility_keys=['a', 'c'], + qs=[0, 1]) metrics_dict = N.metrics_to_dict() # without backbone G_round_trip_data = graphs.nX_from_graph_maps(node_uids, @@ -886,6 +884,7 @@ def test_nX_from_graph_maps(primal_graph): node_edge_map, networkX_multigraph=corrupt_primal_graph) + def test_nX_from_OSMnx(): - #TODO: not yet implemented. + # TODO: not yet implemented. pass