diff --git a/data/validation+testing/number_check.py b/data/validation+testing/number_check.py index 5016cfe..b50f7dd 100644 --- a/data/validation+testing/number_check.py +++ b/data/validation+testing/number_check.py @@ -12,20 +12,20 @@ def main(): enriched_rui_locations = load_json("../enriched_rui_locations.jsonld") corridors = load_json("../corridors.jsonld") - - # unqiue rui locations, datasets, and cell types + # dict for corridors and associated AS unique_rui_locations = set() unique_datasets = set() unique_cell_types = set() unique_as_in_corridors = set() - + as_counts_in_corridor = {} + # get unique datasets and cell types for cell_summary in dataset_cell_summaries['@graph']: unique_datasets.add(cell_summary['cell_source']) for row in cell_summary['summary']: unique_cell_types.add(row['cell_id']) - + # get AS that collide with tissue blocks that form corridors for donor in enriched_rui_locations['@graph']: for sample in donor['samples']: @@ -33,8 +33,18 @@ def main(): for collision_item in sample['rui_location']['all_collisions']: for collision in collision_item['collisions']: unique_as_in_corridors.add(collision['as_3d_id']) - - + + # get numbers of AS per corridor + for donor in enriched_rui_locations['@graph']: + for sample in donor['samples']: + if "corridor" in sample['rui_location']: + as_counts_in_corridor[sample['rui_location'] + ['corridor']['file']] = [] + for collision_item in sample['rui_location']['all_collisions']: + for collision in collision_item['collisions']: + as_counts_in_corridor[sample['rui_location']['corridor']['file']].append(collision['as_3d_id']) + + # print results for unique counts print(f''' Unique datasets: {len(unique_datasets)} Unique cell types: {len(unique_cell_types)} @@ -42,8 +52,13 @@ def main(): Unique tissue blocks: {len(count_tissue_blocks(enriched_rui_locations))} Unique AS in corridors: {len(unique_as_in_corridors)} ''') - - # covering which unique eorgans + + # print result for AS per corridor + for key in as_counts_in_corridor: + print(f'''Corridor {key} has {len(as_counts_in_corridor[key])} AS: ''') + for anatomical_structure in as_counts_in_corridor[key]: + print(f'''\tThose AS are: {anatomical_structure} AS: ''') + def count_tissue_blocks(response): """A function to count unique tissue blocks @@ -61,9 +76,10 @@ def count_tissue_blocks(response): result.add(sample['@id']) except: continue - + return result - + + def count_rui_locations(response): """A function to count unique rui locations @@ -73,7 +89,7 @@ def count_rui_locations(response): Returns: set: A set of unique RUI location IDs """ - + result = set() for donor in response['@graph']: for sample in donor['samples']: @@ -81,9 +97,10 @@ def count_rui_locations(response): result.add(sample['rui_location']['@id']) except: continue - + return result + def load_json(file_path): """A function to load a json file and return the data as a dict