From 6730dd6caaeeb15cbad07867246d87687b19089d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 15 Feb 2024 17:50:28 +1100 Subject: [PATCH] made session_id equal to subject_visit --- xnat_ingest/cli/stage.py | 4 ++-- xnat_ingest/cli/upload.py | 32 +++++++++++++++++++---------- xnat_ingest/session.py | 34 +++++++++++++++++++------------ xnat_ingest/tests/test_cli.py | 8 ++++---- xnat_ingest/tests/test_session.py | 2 +- 5 files changed, 49 insertions(+), 31 deletions(-) diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py index 2c653bd..d64fd1f 100644 --- a/xnat_ingest/cli/stage.py +++ b/xnat_ingest/cli/stage.py @@ -150,7 +150,7 @@ def stage( associated_files: AssociatedFiles, project_field: str, subject_field: str, - session_field: str, + visit_field: str, project_id: str | None, delete: bool, log_level: str, @@ -175,7 +175,7 @@ def stage( dicoms_path=dicoms_path, project_field=project_field, subject_field=subject_field, - session_field=session_field, + visit_field=visit_field, project_id=project_id, ) diff --git a/xnat_ingest/cli/upload.py b/xnat_ingest/cli/upload.py index b896276..1d9cdc1 100644 --- a/xnat_ingest/cli/upload.py +++ b/xnat_ingest/cli/upload.py @@ -129,8 +129,10 @@ "--use-manifest/--dont-use-manifest", default=None, envvar="XNAT_INGEST_REQUIRE_MANIFEST", - help=("Whether to use the manifest file in the staged sessions to load the " - "directory structure. By default it is used if present and ignore if not there"), + help=( + "Whether to use the manifest file in the staged sessions to load the " + "directory structure. By default it is used if present and ignore if not there" + ), type=bool, ) def upload( @@ -158,11 +160,13 @@ def upload( with xnat_repo.connection: - def xnat_session_exists(project_id, subject_id, session_id): + def xnat_session_exists(project_id, subject_id, visit_id): try: xnat_repo.connection.projects[project_id].subjects[ subject_id - ].experiments[session_id] + ].experiments[ + ImagingSession.make_session_id(project_id, subject_id, visit_id) + ] except KeyError: return False else: @@ -170,14 +174,16 @@ def xnat_session_exists(project_id, subject_id, session_id): "Skipping session '%s-%s-%s' as it already exists on XNAT", project_id, subject_id, - session_id, + visit_id, ) return True if staged.startswith("s3://"): # List sessions stored in s3 bucket s3 = boto3.resource( - "s3", aws_access_key_id=store_credentials[0], aws_secret_access_key=store_credentials[1] + "s3", + aws_access_key_id=store_credentials[0], + aws_secret_access_key=store_credentials[1], ) bucket_name, prefix = staged[5:].split("/", 1) bucket = s3.Bucket(bucket_name) @@ -186,7 +192,7 @@ def xnat_session_exists(project_id, subject_id, session_id): for obj in all_objects: if obj.key.endswith("/"): continue - path_parts = obj.key[len(prefix):].split("/") + path_parts = obj.key[len(prefix) :].split("/") session_ids = tuple(path_parts[:3]) session_objs[session_ids].append((path_parts[3:], obj)) @@ -212,7 +218,7 @@ def iter_staged_sessions(): session_tmp_dir.mkdir(parents=True, exist_ok=True) for relpath, obj in tqdm( objs, - desc=f"Downloading scans in {':'.join(ids)} session from S3 bucket" + desc=f"Downloading scans in {':'.join(ids)} session from S3 bucket", ): obj_path = session_tmp_dir.joinpath(*relpath) obj_path.parent.mkdir(parents=True, exist_ok=True) @@ -237,14 +243,18 @@ def iter_staged_sessions(): ): sessions.append(session_dir) num_sessions = len(sessions) - logger.info("Found %d sessions in staging directory '%s'", num_sessions, staged) + logger.info( + "Found %d sessions in staging directory '%s'", num_sessions, staged + ) for session_staging_dir in tqdm( sessions, total=num_sessions, desc=f"Processing staged sessions found in '{staged}'", ): - session = ImagingSession.load(session_staging_dir, use_manifest=use_manifest) + session = ImagingSession.load( + session_staging_dir, use_manifest=use_manifest + ) try: if "MR" in session.modalities: SessionClass = xnat_repo.connection.classes.MrSessionData @@ -282,7 +292,7 @@ def iter_staged_sessions(): ) xsession = SessionClass(label=session.session_id, parent=xsubject) session_path = ( - f"{session.project_id}:{session.subject_id}:{session.session_id}" + f"{session.project_id}:{session.subject_id}:{session.visit_id}" ) # Access Arcana dataset associated with project diff --git a/xnat_ingest/session.py b/xnat_ingest/session.py index 8707c9c..6503699 100644 --- a/xnat_ingest/session.py +++ b/xnat_ingest/session.py @@ -58,7 +58,7 @@ def scans_converter( class ImagingSession: project_id: str subject_id: str - session_id: str + visit_id: str scans: ty.Dict[str, ImagingScan] = attrs.field( factory=dict, converter=scans_converter, @@ -72,11 +72,19 @@ def __getitem__(self, fieldname: str) -> ty.Any: @property def name(self): - return f"{self.project_id}-{self.subject_id}-{self.session_id}" + return f"{self.project_id}-{self.subject_id}-{self.visit_id}" @property def staging_relpath(self): - return [self.project_id, self.subject_id, self.session_id] + return [self.project_id, self.subject_id, self.visit_id] + + @property + def session_id(self): + return self.make_session_id(self.project_id, self.subject_id, self.visit_id) + + @classmethod + def make_session_id(cls, project_id, subject_id, visit_id): + return f"{subject_id}_{visit_id}" @cached_property def modalities(self) -> ty.Set[str]: @@ -189,7 +197,7 @@ def from_dicoms( dicoms_path: str | Path, project_field: str = "StudyID", subject_field: str = "PatientID", - session_field: str = "AccessionNumber", + visit_field: str = "AccessionNumber", project_id: str | None = None, ) -> ty.List["ImagingSession"]: """Loads all imaging sessions from a list of DICOM files @@ -205,7 +213,7 @@ def from_dicoms( subject_field : str the name of the DICOM field that is to be interpreted as the corresponding XNAT project - session_field : str + visit_field : str the name of the DICOM field that is to be interpreted as the corresponding XNAT project project_id : str @@ -248,7 +256,7 @@ def from_dicoms( "SOPInstanceUID", # used in ordering the contents of the dicom series project_field.keyword, subject_field.keyword, - session_field.keyword, + visit_field.keyword, ], ): # Restrict the metadata fields that are loaded (others are ignored), @@ -298,7 +306,7 @@ def get_id(field): scans=scans, project_id=(project_id if project_id else get_id(project_field)), subject_id=get_id(subject_field), - session_id=get_id(session_field), + visit_id=get_id(visit_field), ) ) @@ -330,7 +338,7 @@ def load(cls, session_dir: Path, use_manifest: ty.Optional[bool] = None) -> "Ima """ project_id = session_dir.parent.parent.name subject_id = session_dir.parent.name - session_id = session_dir.name + visit_id = session_dir.name yaml_file = session_dir / cls.MANIFEST_FILENAME if yaml_file.exists() and use_manifest is not False: # Load session from YAML file metadata @@ -363,7 +371,7 @@ def load(cls, session_dir: Path, use_manifest: ty.Optional[bool] = None) -> "Ima session = cls( project_id=project_id, subject_id=subject_id, - session_id=session_id, + visit_id=visit_id, **dct, ) elif use_manifest is not True: @@ -387,7 +395,7 @@ def load(cls, session_dir: Path, use_manifest: ty.Optional[bool] = None) -> "Ima scans=scans, project_id=project_id, subject_id=subject_id, - session_id=session_id, + visit_id=visit_id, ) else: raise FileNotFoundError( @@ -417,7 +425,7 @@ def save(self, save_dir: Path, just_manifest: bool = False) -> "ImagingSession": scans = {} saved = deepcopy(self) session_dir = ( - save_dir / self.project_id / self.subject_id / self.session_id + save_dir / self.project_id / self.subject_id / self.visit_id ).absolute() session_dir.mkdir(parents=True, exist_ok=True) for scan in self.scans.values(): @@ -506,7 +514,7 @@ def stage( ) staged_scans = [] staged_metadata = {} - session_dir = dest_dir / self.project_id / self.subject_id / self.session_id + session_dir = dest_dir / self.project_id / self.subject_id / self.visit_id session_dir.mkdir(parents=True) for scan in tqdm( self.scans.values(), f"Staging DICOM sessions to {session_dir}" @@ -652,7 +660,7 @@ def stage( staged = type(self)( project_id=self.project_id, subject_id=self.subject_id, - session_id=self.session_id, + visit_id=self.visit_id, scans=staged_scans, ) staged.save(dest_dir, just_manifest=True) diff --git a/xnat_ingest/tests/test_cli.py b/xnat_ingest/tests/test_cli.py index 8510176..c1e1a56 100644 --- a/xnat_ingest/tests/test_cli.py +++ b/xnat_ingest/tests/test_cli.py @@ -57,7 +57,7 @@ def test_stage_and_upload( os.unlink(log_file) # Delete any existing sessions from previous test runs - session_ids = [] + visit_ids = [] with xnat4tests.connect() as xnat_login: for i, c in enumerate("abc"): first_name = f"First{c.upper()}" @@ -65,7 +65,7 @@ def test_stage_and_upload( PatientName = f"{first_name}^{last_name}" PatientID = f"subject{i}" AccessionNumber = f"98765432{i}" - session_ids.append(AccessionNumber) + visit_ids.append(AccessionNumber) StudyInstanceUID = ( f"1.3.12.2.1107.5.1.4.10016.3000002308242209356530000001{i}" @@ -227,8 +227,8 @@ def test_stage_and_upload( with xnat4tests.connect() as xnat_login: xproject = xnat_login.projects[xnat_project] - for session_id in session_ids: - xsession = xproject.experiments[session_id] + for visit_id in visit_ids: + xsession = xproject.experiments[visit_id] scan_ids = sorted(xsession.scans) assert scan_ids == [ diff --git a/xnat_ingest/tests/test_session.py b/xnat_ingest/tests/test_session.py index 2ddd2da..b0097fc 100644 --- a/xnat_ingest/tests/test_session.py +++ b/xnat_ingest/tests/test_session.py @@ -52,7 +52,7 @@ def imaging_session() -> ImagingSession: return ImagingSession( project_id="PROJECTID", subject_id="SUBJECTID", - session_id="SESSIONID", + visit_id="SESSIONID", scans=scans, )