diff --git a/src/label_studio_sdk/converter/brush.py b/src/label_studio_sdk/converter/brush.py index 68df2988f..e7d9cd454 100644 --- a/src/label_studio_sdk/converter/brush.py +++ b/src/label_studio_sdk/converter/brush.py @@ -37,6 +37,11 @@ from collections import defaultdict from itertools import groupby +from label_studio_sdk.converter.utils import ( + download, + ensure_dir, +) + logger = logging.getLogger(__name__) @@ -99,7 +104,7 @@ def decode_rle(rle, print_params: bool = False): return out -def decode_from_annotation(from_name, results): +def decode_from_annotation(results): """from LS annotation to {"tag_name + label_name": [numpy uint8 image (width x height)]}""" layers = {} counters = defaultdict(int) @@ -116,7 +121,7 @@ def decode_from_annotation(from_name, results): width = result["original_width"] height = result["original_height"] labels = result[key] if key in result else ["no_label"] - name = from_name + "-" + "-".join(labels) + name = "".join(labels) # result count i = str(counters[name]) @@ -129,37 +134,17 @@ def decode_from_annotation(from_name, results): def save_brush_images_from_annotation( - task_id, - annotation_id, - completed_by, - from_name, + image_name, results, out_dir, out_format="numpy", ): - layers = decode_from_annotation(from_name, results) - if isinstance(completed_by, dict): - email = completed_by.get("email", "") - else: - email = str(completed_by) - email = "".join( - x for x in email if x.isalnum() or x == "@" or x == "." - ) # sanitize filename + layers = decode_from_annotation(results) + image_base = ".".join(image_name.split('.')[0:-1]) for name in layers: sanitized_name = name.replace("/", "-").replace("\\", "-") - - filename = os.path.join( - out_dir, - "task-" - + str(task_id) - + "-annotation-" - + str(annotation_id) - + "-by-" - + email - + "-" - + sanitized_name, - ) + filename = os.path.join(out_dir,image_base+"-"+sanitized_name) image = layers[name] logger.debug(f"Save image to {filename}") if out_format == "numpy": @@ -175,10 +160,7 @@ def convert_task(item, out_dir, out_format="numpy"): """Task with multiple annotations to brush images, out_format = numpy | png""" for from_name, results in item["output"].items(): save_brush_images_from_annotation( - item["id"], - item["annotation_id"], - item["completed_by"], - from_name, + os.path.basename(item["input"]["image"]), results, out_dir, out_format, @@ -193,6 +175,89 @@ def convert_task_dir(items, out_dir, out_format="numpy"): # convert_task_dir('/ls/test/completions', '/ls/test/completions/output', 'numpy') +def convert_to_brush( + self, + input_data, + output_dir, + output_image_dir=None, + output_label_dir=None, + is_dir=True, + out_format="png", +): + """Convert data in a specific format to either PNG or Numpy format. + + Parameters + ---------- + input_data : str + The input data a directory. + output_dir : str + The directory to store the output files in. + output_image_dir : str, optional + The directory to store the image files in. If not provided, it will default to a subdirectory called 'images' in output_dir. + output_label_dir : str, optional + The directory to store the label files in. If not provided, it will default to a subdirectory called 'masks' in output_dir. + is_dir : bool, optional + A boolean indicating whether `input_data` is a directory (True) or a JSON file (False). + output_format : str, optional + A string either 'png' or 'numpy' indicating which mask format to use. + """ + ensure_dir(output_dir) + if output_image_dir is not None: + ensure_dir(output_image_dir) + else: + output_image_dir = os.path.join(output_dir, "images") + os.makedirs(output_image_dir, exist_ok=True) + if output_label_dir is not None: + ensure_dir(output_label_dir) + else: + output_label_dir = os.path.join(output_dir, "masks") + os.makedirs(output_label_dir, exist_ok=True) + categories, category_name_to_id = self._get_labels() + data_key = self._data_keys[0] + + # Write all segmentation PNGs or Numpy masks + items = ( + self.iter_from_dir(input_data) + if is_dir + else self.iter_from_json_file(input_data) + ) + convert_task_dir(items, output_label_dir, out_format) + + # Write all raw images to the "images" folder + item_iterator = ( + self.iter_from_dir(input_data) + if is_dir + else self.iter_from_json_file(input_data) + ) + for item_idx, item in enumerate(item_iterator): + # get image path(s) and label file path + image_paths = item["input"][data_key] + image_paths = [image_paths] if isinstance(image_paths, str) else image_paths + # download image(s) + image_path = None + # TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes + # fixing the issue in RND-84 + for image_path in reversed(image_paths): + if not os.path.exists(image_path): + try: + image_path = download( + image_path, + output_image_dir, + project_dir=self.project_dir, + return_relative_path=True, + upload_dir=self.upload_dir, + download_resources=self.download_resources, + ) + except: + logger.info( + "Unable to download {image_path}. The item {item} will be skipped".format( + image_path=image_path, item=item + ), + exc_info=True, + ) + if not image_path: + logger.error(f"No image path found for item #{item_idx}") + continue ### Brush Import ### diff --git a/src/label_studio_sdk/converter/converter.py b/src/label_studio_sdk/converter/converter.py index ecfeb9c2a..2ddb18279 100644 --- a/src/label_studio_sdk/converter/converter.py +++ b/src/label_studio_sdk/converter/converter.py @@ -239,19 +239,29 @@ def convert(self, input_data, output_data, format, is_dir=True, **kwargs): input_data, output_data, output_image_dir=image_dir, is_dir=is_dir ) elif format == Format.BRUSH_TO_NUMPY: - items = ( - self.iter_from_dir(input_data) - if is_dir - else self.iter_from_json_file(input_data) + image_dir = kwargs.get("image_dir") + label_dir = kwargs.get("label_dir") + brush.convert_to_brush( + self, + input_data, + output_data, + output_image_dir=image_dir, + output_label_dir=label_dir, + is_dir=is_dir, + out_format="numpy", ) - brush.convert_task_dir(items, output_data, out_format="numpy") elif format == Format.BRUSH_TO_PNG: - items = ( - self.iter_from_dir(input_data) - if is_dir - else self.iter_from_json_file(input_data) + image_dir = kwargs.get("image_dir") + label_dir = kwargs.get("label_dir") + brush.convert_to_brush( + self, + input_data, + output_data, + output_image_dir=image_dir, + output_label_dir=label_dir, + is_dir=is_dir, + out_format="png", ) - brush.convert_task_dir(items, output_data, out_format="png") elif format == Format.ASR_MANIFEST: items = ( self.iter_from_dir(input_data)