Add --merge option to unpack (#57)

* bump to new ome-types * updating things for ome_types=0.4.0 * first try at merge projects, stubs for others * only missing find_dataset * first pass at merge, find_dataset needs reviewing * now resolving dataset refs to actual datasets * basic merge functionality working * added basic prepare metadata * doing clever image matching (again?) considering annotations * tests passing - made `conn` optional on `make_image_map` * merge tests passing, some extra minor fixes - merging orphaned datasets - using `isInstance` instead of `type` * updated readme and setup.py
ome · Aug 3, 2023 · 581996c · 581996c
1 parent 2f3c319
commit 581996c
Show file tree

Hide file tree

Showing 11 changed files with 255 additions and 67 deletions.
diff --git a/.gitignore b/.gitignore
@@ -131,3 +131,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# vs code settings
+.vscode/
diff --git a/README.md b/README.md
@@ -73,6 +73,10 @@ Note that unpack needs to be able to identify the images it imports inequivocall
 
 `--folder` allows the user to point to a previously-unpacked folder rather than a single file.
 
+`--merge` will use existing Projects, Datasets and Screens if the current user
+already owns entities with the same name as ones defined in `transfer.xml`,
+effectively merging the "new" unpacked entities with existing ones.
+
 Examples:
 ```
 omero transfer unpack transfer_pack.zip

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 ezomero>=2.0.0
-ome-types>=0.3.4
+ome-types>=0.4.0
 setuptools>=58.0.0
diff --git a/setup.py b/setup.py
@@ -84,7 +84,7 @@ def read(fname):
     packages=['', 'omero.plugins'],
     package_dir={"": "src"},
     name="omero-cli-transfer",
-    version='0.6.0',
+    version='0.7.0',
     maintainer="Erick Ratamero",
     maintainer_email="[email protected]",
     description=("A set of utilities for exporting a transfer"
@@ -96,7 +96,7 @@ def read(fname):
     url="https://github.com/TheJacksonLaboratory/omero-cli-transfer",
     install_requires=[
         'ezomero==2.0.0',
-        'ome-types==0.3.4'
+        'ome-types==0.4.0'
     ],
     extras_require={
         "rocrate": ["rocrate==0.7.0"],

diff --git a/src/generate_omero_objects.py b/src/generate_omero_objects.py
@@ -25,6 +25,20 @@
 import copy
 
 
+def create_or_set_projects(pjs: List[Project], conn: BlitzGateway,
+                           merge: bool) -> dict:
+    pj_map = {}
+    if not merge:
+        pj_map = create_projects(pjs, conn)
+    else:
+        for pj in pjs:
+            pj_id = find_project(pj, conn)
+            if not pj_id:
+                pj_id = ezomero.post_project(conn, pj.name, pj.description)
+            pj_map[pj.id] = pj_id
+    return pj_map
+
+
 def create_projects(pjs: List[Project], conn: BlitzGateway) -> dict:
     pj_map = {}
     for pj in pjs:
@@ -33,6 +47,29 @@ def create_projects(pjs: List[Project], conn: BlitzGateway) -> dict:
     return pj_map
 
 
+def find_project(pj: Project, conn: BlitzGateway) -> int:
+    id = 0
+    my_exp_id = conn.getUser().getId()
+    for p in conn.getObjects("Project", opts={'owner': my_exp_id}):
+        if p.getName() == pj.name:
+            id = p.getId()
+    return id
+
+
+def create_or_set_screens(scrs: List[Screen], conn: BlitzGateway, merge: bool
+                          ) -> dict:
+    scr_map = {}
+    if not merge:
+        scr_map = create_screens(scrs, conn)
+    else:
+        for scr in scrs:
+            scr_id = find_screen(scr, conn)
+            if not scr_id:
+                scr_id = ezomero.post_screen(conn, scr.name, scr.description)
+            scr_map[scr.id] = scr_id
+    return scr_map
+
+
 def create_screens(scrs: List[Screen], conn: BlitzGateway) -> dict:
     scr_map = {}
     for scr in scrs:
@@ -41,6 +78,34 @@ def create_screens(scrs: List[Screen], conn: BlitzGateway) -> dict:
     return scr_map
 
 
+def find_screen(sc: Screen, conn: BlitzGateway) -> int:
+    id = 0
+    my_exp_id = conn.getUser().getId()
+    for s in conn.getObjects("Screen", opts={'owner': my_exp_id}):
+        if s.getName() == sc.name:
+            id = s.getId()
+    return id
+
+
+def create_or_set_datasets(dss: List[Dataset], pjs: List[Project],
+                           conn: BlitzGateway, merge: bool) -> dict:
+    ds_map = {}
+    if not merge:
+        ds_map = create_datasets(dss, conn)
+    else:
+        for ds in dss:
+            ds_id = find_dataset(ds, pjs, conn)
+            if not ds_id:
+                dataset = DatasetWrapper(conn, DatasetI())
+                dataset.setName(ds.name)
+                if ds.description is not None:
+                    dataset.setDescription(ds.description)
+                dataset.save()
+                ds_id = dataset.getId()
+            ds_map[ds.id] = ds_id
+    return ds_map
+
+
 def create_datasets(dss: List[Dataset], conn: BlitzGateway) -> dict:
     """
     Currently doing it the non-ezomero way because ezomero always
@@ -58,6 +123,31 @@ def create_datasets(dss: List[Dataset], conn: BlitzGateway) -> dict:
     return ds_map
 
 
+def find_dataset(ds: Dataset, pjs: List[Project], conn: BlitzGateway) -> int:
+    id = 0
+    my_exp_id = conn.getUser().getId()
+    orphan = True
+    for pj in pjs:
+        for dsref in pj.dataset_refs:
+            if dsref.id == ds.id:
+                orphan = False
+    if not orphan:
+        for pj in pjs:
+            for p in conn.getObjects("Project", opts={'owner': my_exp_id}):
+                if p.getName() == pj.name:
+                    for dsref in pj.dataset_refs:
+                        if dsref.id == ds.id:
+                            for ds_rem in p.listChildren():
+                                if ds.name == ds_rem.getName():
+                                    id = ds_rem.getId()
+    else:
+        for d in conn.getObjects("Dataset", opts={'owner': my_exp_id,
+                                                  'orphaned': True}):
+            if d.getName() == ds.name:
+                id = d.getId()
+    return id
+
+
 def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str,
                        folder: str, metadata: List[str]) -> dict:
     ann_map = {}
@@ -73,7 +163,7 @@ def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str,
             namespace = an.namespace
             map_ann.setNs(namespace)
             key_value_data = []
-            for v in an.value.m:
+            for v in an.value.ms:
                 if int(an.id.split(":")[-1]) < 0:
                     if not metadata:
                         key_value_data.append(['empty_metadata', "True"])
@@ -131,7 +221,7 @@ def create_original_file(ann: FileAnnotation, ans: List[Annotation],
                          conn: BlitzGateway, folder: str
                          ) -> OriginalFileWrapper:
     curr_folder = str(Path('.').resolve())
-    for an in ann.annotation_ref:
+    for an in ann.annotation_refs:
         clean_id = int(an.id.split(":")[-1])
         if clean_id < 0:
             cmnt_id = an.id
@@ -149,7 +239,7 @@ def create_plate_map(ome: OME, img_map: dict, conn: BlitzGateway
     plate_map = {}
     map_ref_ids = []
     for plate in ome.plates:
-        ann_ids = [i.id for i in plate.annotation_ref]
+        ann_ids = [i.id for i in plate.annotation_refs]
         file_path = ""
         for ann in ome.structured_annotations:
             if (ann.id in ann_ids and
@@ -201,9 +291,9 @@ def create_plate_map(ome: OME, img_map: dict, conn: BlitzGateway
             plate_id = create_plate_from_images(plate, img_map, conn)
         plate_map[plate.id] = plate_id
     for p in newome.plates:
-        for ref in p.annotation_ref:
+        for ref in p.annotation_refs:
             if ref.id in map_ref_ids:
-                p.annotation_ref.remove(ref)
+                p.annotation_refs.remove(ref)
     return plate_map, newome
 
 
@@ -342,7 +432,7 @@ def _int_to_rgba(omero_val: int) -> Tuple[int, int, int, int]:
 def create_rois(rois: List[ROI], imgs: List[Image], img_map: dict,
                 conn: BlitzGateway):
     for img in imgs:
-        for roiref in img.roi_ref:
+        for roiref in img.roi_refs:
             roi = next(filter(lambda x: x.id == roiref.id, rois))
             shapes = create_shapes(roi)
             img_id_dest = img_map[img.id]
@@ -354,10 +444,15 @@ def create_rois(rois: List[ROI], imgs: List[Image], img_map: dict,
 def link_datasets(ome: OME, proj_map: dict, ds_map: dict, conn: BlitzGateway):
     for proj in ome.projects:
         proj_id = proj_map[proj.id]
+        proj_obj = conn.getObject("Project", proj_id)
+        existing_ds = []
+        for dataset in proj_obj.listChildren():
+            existing_ds.append(dataset.getId())
         ds_ids = []
-        for ds in proj.dataset_ref:
+        for ds in proj.dataset_refs:
             ds_id = ds_map[ds.id]
-            ds_ids.append(ds_id)
+            if ds_id not in existing_ds:
+                ds_ids.append(ds_id)
         ezomero.link_datasets_to_project(conn, ds_ids, proj_id)
     return
 
@@ -366,10 +461,15 @@ def link_plates(ome: OME, screen_map: dict, plate_map: dict,
                 conn: BlitzGateway):
     for screen in ome.screens:
         screen_id = screen_map[screen.id]
+        scr_obj = conn.getObject("Screen", screen_id)
+        existing_pl = []
+        for pl in scr_obj.listChildren():
+            existing_pl.append(pl.getId())
         pl_ids = []
-        for pl in screen.plate_ref:
+        for pl in screen.plate_refs:
             pl_id = plate_map[pl.id]
-            pl_ids.append(pl_id)
+            if pl_id not in existing_pl:
+                pl_ids.append(pl_id)
         ezomero.link_plates_to_screen(conn, pl_ids, screen_id)
     return
 
@@ -378,7 +478,7 @@ def link_images(ome: OME, ds_map: dict, img_map: dict, conn: BlitzGateway):
     for ds in ome.datasets:
         ds_id = ds_map[ds.id]
         img_ids = []
-        for img in ds.image_ref:
+        for img in ds.image_refs:
             try:
                 img_id = img_map[img.id]
                 img_ids.append(img_id)
@@ -395,22 +495,22 @@ def link_annotations(ome: OME, proj_map: dict, ds_map: dict, img_map: dict,
         proj_id = proj_map[proj.id]
         proj_obj = conn.getObject("Project", proj_id)
         anns = ome.structured_annotations
-        for annref in proj.annotation_ref:
+        for annref in proj.annotation_refs:
             ann = next(filter(lambda x: x.id == annref.id, anns))
             link_one_annotation(proj_obj, ann, ann_map, conn)
     for ds in ome.datasets:
         ds_id = ds_map[ds.id]
         ds_obj = conn.getObject("Dataset", ds_id)
         anns = ome.structured_annotations
-        for annref in ds.annotation_ref:
+        for annref in ds.annotation_refs:
             ann = next(filter(lambda x: x.id == annref.id, anns))
             link_one_annotation(ds_obj, ann, ann_map, conn)
     for img in ome.images:
         try:
             img_id = img_map[img.id]
             img_obj = conn.getObject("Image", img_id)
             anns = ome.structured_annotations
-            for annref in img.annotation_ref:
+            for annref in img.annotation_refs:
                 ann = next(filter(lambda x: x.id == annref.id, anns))
                 link_one_annotation(img_obj, ann, ann_map, conn)
         except KeyError:
@@ -419,23 +519,23 @@ def link_annotations(ome: OME, proj_map: dict, ds_map: dict, img_map: dict,
         scr_id = scr_map[scr.id]
         scr_obj = conn.getObject("Screen", scr_id)
         anns = ome.structured_annotations
-        for annref in scr.annotation_ref:
+        for annref in scr.annotation_refs:
             ann = next(filter(lambda x: x.id == annref.id, anns))
             link_one_annotation(scr_obj, ann, ann_map, conn)
     for pl in ome.plates:
         pl_id = pl_map[pl.id]
         pl_obj = conn.getObject("Plate", pl_id)
         anns = ome.structured_annotations
-        for annref in pl.annotation_ref:
+        for annref in pl.annotation_refs:
             ann = next(filter(lambda x: x.id == annref.id, anns))
             link_one_annotation(pl_obj, ann, ann_map, conn)
         anns = ome.structured_annotations
         for well in pl.wells:
-            if len(well.annotation_ref) > 0:
+            if len(well.annotation_refs) > 0:
                 row, col = well.row, well.column
                 well_id = ezomero.get_well_id(conn, pl_id, row, col)
                 well_obj = conn.getObject("Well", well_id)
-                for annref in well.annotation_ref:
+                for annref in well.annotation_refs:
                     ann = next(filter(lambda x: x.id == annref.id, anns))
                     link_one_annotation(well_obj, ann, ann_map, conn)
     return
@@ -485,13 +585,13 @@ def rename_plates(pls: List[Plate], pl_map: dict, conn: BlitzGateway):
 
 
 def populate_omero(ome: OME, img_map: dict, conn: BlitzGateway, hash: str,
-                   folder: str, metadata: List[str]):
+                   folder: str, metadata: List[str], merge: bool):
     plate_map, ome = create_plate_map(ome, img_map, conn)
     rename_images(ome.images, img_map, conn)
     rename_plates(ome.plates, plate_map, conn)
-    proj_map = create_projects(ome.projects, conn)
-    ds_map = create_datasets(ome.datasets, conn)
-    screen_map = create_screens(ome.screens, conn)
+    proj_map = create_or_set_projects(ome.projects, conn, merge)
+    ds_map = create_or_set_datasets(ome.datasets, ome.projects, conn, merge)
+    screen_map = create_or_set_screens(ome.screens, conn, merge)
     ann_map = create_annotations(ome.structured_annotations, conn,
                                  hash, folder, metadata)
     create_rois(ome.rois, ome.images, img_map, conn)