Skip to content

Commit

Permalink
Add --merge option to unpack (#57)
Browse files Browse the repository at this point in the history
* bump to new ome-types

* updating things for ome_types=0.4.0

* first try at merge projects, stubs for others

* only missing find_dataset

* first pass at merge, find_dataset needs reviewing

* now resolving dataset refs to actual datasets

* basic merge functionality working

* added basic prepare metadata

* doing clever image matching (again?) considering annotations

* tests passing - made `conn` optional on `make_image_map`

* merge tests passing, some extra minor fixes

- merging orphaned datasets
- using `isInstance` instead of `type`

* updated readme and setup.py
  • Loading branch information
erickmartins authored Aug 3, 2023
1 parent 2f3c319 commit 581996c
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 67 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,6 @@ dmypy.json

# Pyre type checker
.pyre/

# vs code settings
.vscode/
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ Note that unpack needs to be able to identify the images it imports inequivocall

`--folder` allows the user to point to a previously-unpacked folder rather than a single file.

`--merge` will use existing Projects, Datasets and Screens if the current user
already owns entities with the same name as ones defined in `transfer.xml`,
effectively merging the "new" unpacked entities with existing ones.

Examples:
```
omero transfer unpack transfer_pack.zip
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
ezomero>=2.0.0
ome-types>=0.3.4
ome-types>=0.4.0
setuptools>=58.0.0
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def read(fname):
packages=['', 'omero.plugins'],
package_dir={"": "src"},
name="omero-cli-transfer",
version='0.6.0',
version='0.7.0',
maintainer="Erick Ratamero",
maintainer_email="[email protected]",
description=("A set of utilities for exporting a transfer"
Expand All @@ -96,7 +96,7 @@ def read(fname):
url="https://github.com/TheJacksonLaboratory/omero-cli-transfer",
install_requires=[
'ezomero==2.0.0',
'ome-types==0.3.4'
'ome-types==0.4.0'
],
extras_require={
"rocrate": ["rocrate==0.7.0"],
Expand Down
144 changes: 122 additions & 22 deletions src/generate_omero_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@
import copy


def create_or_set_projects(pjs: List[Project], conn: BlitzGateway,
merge: bool) -> dict:
pj_map = {}
if not merge:
pj_map = create_projects(pjs, conn)
else:
for pj in pjs:
pj_id = find_project(pj, conn)
if not pj_id:
pj_id = ezomero.post_project(conn, pj.name, pj.description)
pj_map[pj.id] = pj_id
return pj_map


def create_projects(pjs: List[Project], conn: BlitzGateway) -> dict:
pj_map = {}
for pj in pjs:
Expand All @@ -33,6 +47,29 @@ def create_projects(pjs: List[Project], conn: BlitzGateway) -> dict:
return pj_map


def find_project(pj: Project, conn: BlitzGateway) -> int:
id = 0
my_exp_id = conn.getUser().getId()
for p in conn.getObjects("Project", opts={'owner': my_exp_id}):
if p.getName() == pj.name:
id = p.getId()
return id


def create_or_set_screens(scrs: List[Screen], conn: BlitzGateway, merge: bool
) -> dict:
scr_map = {}
if not merge:
scr_map = create_screens(scrs, conn)
else:
for scr in scrs:
scr_id = find_screen(scr, conn)
if not scr_id:
scr_id = ezomero.post_screen(conn, scr.name, scr.description)
scr_map[scr.id] = scr_id
return scr_map


def create_screens(scrs: List[Screen], conn: BlitzGateway) -> dict:
scr_map = {}
for scr in scrs:
Expand All @@ -41,6 +78,34 @@ def create_screens(scrs: List[Screen], conn: BlitzGateway) -> dict:
return scr_map


def find_screen(sc: Screen, conn: BlitzGateway) -> int:
id = 0
my_exp_id = conn.getUser().getId()
for s in conn.getObjects("Screen", opts={'owner': my_exp_id}):
if s.getName() == sc.name:
id = s.getId()
return id


def create_or_set_datasets(dss: List[Dataset], pjs: List[Project],
conn: BlitzGateway, merge: bool) -> dict:
ds_map = {}
if not merge:
ds_map = create_datasets(dss, conn)
else:
for ds in dss:
ds_id = find_dataset(ds, pjs, conn)
if not ds_id:
dataset = DatasetWrapper(conn, DatasetI())
dataset.setName(ds.name)
if ds.description is not None:
dataset.setDescription(ds.description)
dataset.save()
ds_id = dataset.getId()
ds_map[ds.id] = ds_id
return ds_map


def create_datasets(dss: List[Dataset], conn: BlitzGateway) -> dict:
"""
Currently doing it the non-ezomero way because ezomero always
Expand All @@ -58,6 +123,31 @@ def create_datasets(dss: List[Dataset], conn: BlitzGateway) -> dict:
return ds_map


def find_dataset(ds: Dataset, pjs: List[Project], conn: BlitzGateway) -> int:
id = 0
my_exp_id = conn.getUser().getId()
orphan = True
for pj in pjs:
for dsref in pj.dataset_refs:
if dsref.id == ds.id:
orphan = False
if not orphan:
for pj in pjs:
for p in conn.getObjects("Project", opts={'owner': my_exp_id}):
if p.getName() == pj.name:
for dsref in pj.dataset_refs:
if dsref.id == ds.id:
for ds_rem in p.listChildren():
if ds.name == ds_rem.getName():
id = ds_rem.getId()
else:
for d in conn.getObjects("Dataset", opts={'owner': my_exp_id,
'orphaned': True}):
if d.getName() == ds.name:
id = d.getId()
return id


def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str,
folder: str, metadata: List[str]) -> dict:
ann_map = {}
Expand All @@ -73,7 +163,7 @@ def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str,
namespace = an.namespace
map_ann.setNs(namespace)
key_value_data = []
for v in an.value.m:
for v in an.value.ms:
if int(an.id.split(":")[-1]) < 0:
if not metadata:
key_value_data.append(['empty_metadata', "True"])
Expand Down Expand Up @@ -131,7 +221,7 @@ def create_original_file(ann: FileAnnotation, ans: List[Annotation],
conn: BlitzGateway, folder: str
) -> OriginalFileWrapper:
curr_folder = str(Path('.').resolve())
for an in ann.annotation_ref:
for an in ann.annotation_refs:
clean_id = int(an.id.split(":")[-1])
if clean_id < 0:
cmnt_id = an.id
Expand All @@ -149,7 +239,7 @@ def create_plate_map(ome: OME, img_map: dict, conn: BlitzGateway
plate_map = {}
map_ref_ids = []
for plate in ome.plates:
ann_ids = [i.id for i in plate.annotation_ref]
ann_ids = [i.id for i in plate.annotation_refs]
file_path = ""
for ann in ome.structured_annotations:
if (ann.id in ann_ids and
Expand Down Expand Up @@ -201,9 +291,9 @@ def create_plate_map(ome: OME, img_map: dict, conn: BlitzGateway
plate_id = create_plate_from_images(plate, img_map, conn)
plate_map[plate.id] = plate_id
for p in newome.plates:
for ref in p.annotation_ref:
for ref in p.annotation_refs:
if ref.id in map_ref_ids:
p.annotation_ref.remove(ref)
p.annotation_refs.remove(ref)
return plate_map, newome


Expand Down Expand Up @@ -342,7 +432,7 @@ def _int_to_rgba(omero_val: int) -> Tuple[int, int, int, int]:
def create_rois(rois: List[ROI], imgs: List[Image], img_map: dict,
conn: BlitzGateway):
for img in imgs:
for roiref in img.roi_ref:
for roiref in img.roi_refs:
roi = next(filter(lambda x: x.id == roiref.id, rois))
shapes = create_shapes(roi)
img_id_dest = img_map[img.id]
Expand All @@ -354,10 +444,15 @@ def create_rois(rois: List[ROI], imgs: List[Image], img_map: dict,
def link_datasets(ome: OME, proj_map: dict, ds_map: dict, conn: BlitzGateway):
for proj in ome.projects:
proj_id = proj_map[proj.id]
proj_obj = conn.getObject("Project", proj_id)
existing_ds = []
for dataset in proj_obj.listChildren():
existing_ds.append(dataset.getId())
ds_ids = []
for ds in proj.dataset_ref:
for ds in proj.dataset_refs:
ds_id = ds_map[ds.id]
ds_ids.append(ds_id)
if ds_id not in existing_ds:
ds_ids.append(ds_id)
ezomero.link_datasets_to_project(conn, ds_ids, proj_id)
return

Expand All @@ -366,10 +461,15 @@ def link_plates(ome: OME, screen_map: dict, plate_map: dict,
conn: BlitzGateway):
for screen in ome.screens:
screen_id = screen_map[screen.id]
scr_obj = conn.getObject("Screen", screen_id)
existing_pl = []
for pl in scr_obj.listChildren():
existing_pl.append(pl.getId())
pl_ids = []
for pl in screen.plate_ref:
for pl in screen.plate_refs:
pl_id = plate_map[pl.id]
pl_ids.append(pl_id)
if pl_id not in existing_pl:
pl_ids.append(pl_id)
ezomero.link_plates_to_screen(conn, pl_ids, screen_id)
return

Expand All @@ -378,7 +478,7 @@ def link_images(ome: OME, ds_map: dict, img_map: dict, conn: BlitzGateway):
for ds in ome.datasets:
ds_id = ds_map[ds.id]
img_ids = []
for img in ds.image_ref:
for img in ds.image_refs:
try:
img_id = img_map[img.id]
img_ids.append(img_id)
Expand All @@ -395,22 +495,22 @@ def link_annotations(ome: OME, proj_map: dict, ds_map: dict, img_map: dict,
proj_id = proj_map[proj.id]
proj_obj = conn.getObject("Project", proj_id)
anns = ome.structured_annotations
for annref in proj.annotation_ref:
for annref in proj.annotation_refs:
ann = next(filter(lambda x: x.id == annref.id, anns))
link_one_annotation(proj_obj, ann, ann_map, conn)
for ds in ome.datasets:
ds_id = ds_map[ds.id]
ds_obj = conn.getObject("Dataset", ds_id)
anns = ome.structured_annotations
for annref in ds.annotation_ref:
for annref in ds.annotation_refs:
ann = next(filter(lambda x: x.id == annref.id, anns))
link_one_annotation(ds_obj, ann, ann_map, conn)
for img in ome.images:
try:
img_id = img_map[img.id]
img_obj = conn.getObject("Image", img_id)
anns = ome.structured_annotations
for annref in img.annotation_ref:
for annref in img.annotation_refs:
ann = next(filter(lambda x: x.id == annref.id, anns))
link_one_annotation(img_obj, ann, ann_map, conn)
except KeyError:
Expand All @@ -419,23 +519,23 @@ def link_annotations(ome: OME, proj_map: dict, ds_map: dict, img_map: dict,
scr_id = scr_map[scr.id]
scr_obj = conn.getObject("Screen", scr_id)
anns = ome.structured_annotations
for annref in scr.annotation_ref:
for annref in scr.annotation_refs:
ann = next(filter(lambda x: x.id == annref.id, anns))
link_one_annotation(scr_obj, ann, ann_map, conn)
for pl in ome.plates:
pl_id = pl_map[pl.id]
pl_obj = conn.getObject("Plate", pl_id)
anns = ome.structured_annotations
for annref in pl.annotation_ref:
for annref in pl.annotation_refs:
ann = next(filter(lambda x: x.id == annref.id, anns))
link_one_annotation(pl_obj, ann, ann_map, conn)
anns = ome.structured_annotations
for well in pl.wells:
if len(well.annotation_ref) > 0:
if len(well.annotation_refs) > 0:
row, col = well.row, well.column
well_id = ezomero.get_well_id(conn, pl_id, row, col)
well_obj = conn.getObject("Well", well_id)
for annref in well.annotation_ref:
for annref in well.annotation_refs:
ann = next(filter(lambda x: x.id == annref.id, anns))
link_one_annotation(well_obj, ann, ann_map, conn)
return
Expand Down Expand Up @@ -485,13 +585,13 @@ def rename_plates(pls: List[Plate], pl_map: dict, conn: BlitzGateway):


def populate_omero(ome: OME, img_map: dict, conn: BlitzGateway, hash: str,
folder: str, metadata: List[str]):
folder: str, metadata: List[str], merge: bool):
plate_map, ome = create_plate_map(ome, img_map, conn)
rename_images(ome.images, img_map, conn)
rename_plates(ome.plates, plate_map, conn)
proj_map = create_projects(ome.projects, conn)
ds_map = create_datasets(ome.datasets, conn)
screen_map = create_screens(ome.screens, conn)
proj_map = create_or_set_projects(ome.projects, conn, merge)
ds_map = create_or_set_datasets(ome.datasets, ome.projects, conn, merge)
screen_map = create_or_set_screens(ome.screens, conn, merge)
ann_map = create_annotations(ome.structured_annotations, conn,
hash, folder, metadata)
create_rois(ome.rois, ome.images, img_map, conn)
Expand Down
Loading

0 comments on commit 581996c

Please sign in to comment.