diff --git a/README.md b/README.md index a34d288..56572fd 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,9 @@ The purpose of this repository is to have one place to manage local modules for ## Included scripts: * molkart_clahe.py - Contrast-limited adjusted histogram equalization (CLAHE) on single-channel `tif` images. +* molkart_crophdf5.py - converts input to `hdf5` and creates crops and a CropOverview. * molkart_croptiff.py - creates `tiff` crops based on a provided CropOverview (To be adapted) * molkart_maskfilter.py - takes a segmentation mask and filters cells based on area from the mask. * molkart_spot2cell.py - matches a spot table to a segmentation mask to produce a cell-by-transcript matrix. +* molkart_stack.py - takes multiple input images and concatenates them to a stack - pyramidal output. (channel names not preserved) * molkartqc.py - gathers provided quality control metrices and gathers them into one `csv` file. \ No newline at end of file diff --git a/scripts/molkart_clahe.py b/scripts/molkart_clahe.py index 2514fdb..fea5bd9 100644 --- a/scripts/molkart_clahe.py +++ b/scripts/molkart_clahe.py @@ -7,7 +7,6 @@ from os.path import abspath import os import numpy as np -import tifffile as tf from skimage.exposure import equalize_adapthist from multiprocessing.spawn import import_main_path import sys @@ -17,7 +16,6 @@ import tifffile import zarr import skimage.transform -from aicsimageio import aics_image as AI from ome_types import from_tiff, to_xml from os.path import abspath from argparse import ArgumentParser as AP @@ -38,23 +36,65 @@ def get_args(): description = """Easy-to-use, large scale CLAHE""" # Add parser - parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) + parser = AP( + description=description, formatter_class=argparse.RawDescriptionHelpFormatter + ) # Sections - inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") - inputs.add_argument("-r", "--input", dest="input", action="store", required=True, help="File path to input image.") - inputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output image.") + inputs = parser.add_argument_group( + title="Required Input", description="Path to required input file" + ) + inputs.add_argument( + "-r", + "--input", + dest="input", + action="store", + required=True, + help="File path to input image.", + ) + inputs.add_argument( + "-o", + "--output", + dest="output", + action="store", + required=True, + help="Path to output image.", + ) inputs.add_argument( - "--cliplimit", dest="clip", action="store", required=True, type=float, default=0.01, help="Clip Limit for CLAHE" + "--cliplimit", + dest="clip", + action="store", + required=True, + type=float, + default=0.01, + help="Clip Limit for CLAHE", ) inputs.add_argument( - "--kernel", dest="kernel", action="store", required=False, type=int, default=25, help="Kernel size for CLAHE" + "--kernel", + dest="kernel", + action="store", + required=False, + type=int, + default=25, + help="Kernel size for CLAHE", ) inputs.add_argument( - "--nbins", dest="nbins", action="store", required=False, type=int, default=256, help="Number of bins for CLAHE" + "--nbins", + dest="nbins", + action="store", + required=False, + type=int, + default=256, + help="Number of bins for CLAHE", ) inputs.add_argument( - "-p", "--pixel-size", dest="pixel_size", action="store", type=float, required=False, help="Image pixel size" + "-p", + "--pixel-size", + dest="pixel_size", + action="store", + type=float, + required=False, + help="Image pixel size", ) inputs.add_argument( "--tile-size", @@ -130,17 +170,22 @@ def detect_pixel_size(img_path, pixel_size=None): def main(args): _version = "0.1.0" print(f"Head directory = {args.input}") - print(f"ClipLimit = {args.clip}, nbins = {args.nbins}, kernel_size = {args.kernel}, pixel_size = {args.pixel_size}") + print( + f"ClipLimit = {args.clip}, nbins = {args.nbins}, kernel_size = {args.kernel}, pixel_size = {args.pixel_size}" + ) # clahe = cv2.createCLAHE(clipLimit = int(args.clip), tileGridSize=tuple(map(int, args.grid))) - img_in = AI.AICSImage(args.input) - img_dask = img_in.get_image_dask_data("CYX").astype("uint16") - adapted = img_dask[0].compute() / 65535 + img_in = tifffile.imread(args.input).astype("uint16") + print(img_in.shape) + adapted = img_in / 65535 adapted = ( - equalize_adapthist(adapted, kernel_size=args.kernel, clip_limit=args.clip, nbins=args.nbins) * 65535 + equalize_adapthist( + adapted, kernel_size=args.kernel, clip_limit=args.clip, nbins=args.nbins + ) + * 65535 ).astype("uint16") - img_dask[0] = adapted + img_in = adapted[np.newaxis, :, :] # construct levels tile_size = args.tile_size @@ -149,9 +194,9 @@ def main(args): if pixel_size is None: pixel_size = 1 - dtype = img_dask.dtype - base_shape = img_dask[0].shape - num_channels = img_dask.shape[0] + dtype = img_in.dtype + base_shape = img_in[0].shape + num_channels = img_in.shape[0] num_levels = (np.ceil(np.log2(max(1, max(base_shape) / tile_size))) + 1).astype(int) factors = 2 ** np.arange(num_levels) shapes = (np.ceil(np.array(base_shape) / factors[:, None])).astype(int) @@ -170,7 +215,10 @@ def main(args): level_full_shapes.append((num_channels, shape[0], shape[1])) level_shapes = shapes tip_level = np.argmax(np.all(level_shapes < tile_size, axis=1)) - tile_shapes = [(tile_size, tile_size) if i <= tip_level else None for i in range(len(level_shapes))] + tile_shapes = [ + (tile_size, tile_size) if i <= tip_level else None + for i in range(len(level_shapes)) + ] software = f"molkart_clahe {_version}" pixel_size = pixel_size @@ -187,7 +235,7 @@ def main(args): # write pyramid with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: tiff.write( - data=img_dask, + data=img_in, metadata=metadata, shape=level_full_shapes[0], subifds=int(num_levels - 1), @@ -195,9 +243,13 @@ def main(args): resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), tile=tile_shapes[0], ) - for level, (shape, tile_shape) in enumerate(zip(level_full_shapes[1:], tile_shapes[1:]), 1): + for level, (shape, tile_shape) in enumerate( + zip(level_full_shapes[1:], tile_shapes[1:]), 1 + ): tiff.write( - data=subres_tiles(level, level_full_shapes, tile_shapes, args.output, scale), + data=subres_tiles( + level, level_full_shapes, tile_shapes, args.output, scale + ), shape=shape, subfiletype=1, dtype=dtype, diff --git a/scripts/molkart_crophdf5.py b/scripts/molkart_crophdf5.py new file mode 100644 index 0000000..0e0b803 --- /dev/null +++ b/scripts/molkart_crophdf5.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +import tifffile +import numpy as np +import h5py +import pathlib +import os +import random +from skimage import filters +import scipy.io +import math +import argparse +import time + +# Most of the code by Joshua Hess from the labsyspharm/mcmicro-ilastik repo: https://github.com/labsyspharm/mcmicro-ilastik + + +def IlastikPrepOME( + input, + output, + crop, + crop_size, + nonzero_fraction, + nuclei_index, + num_channels, + channelIDs, + ring_mask, + crop_amount, +): + """Function for exporting a large ome.tiff image as an hdf5 image for + training ilastik random forest pixel classifier for cell segmentation""" + + # Create a pathlib object for the image name + im_name = pathlib.Path(input) + # Get the input directory + im_dir = im_name.parent + # Get the image name (remove ".ome") + im_stem = im_name.stem.replace(".ome", "") + # Create hdf5 name + h5_name = im_stem + ".hdf5" + + # Check to see if ring mask is being applied + if ring_mask: + # Read the matlab file + mat = scipy.io.loadmat( + os.path.join(str(im_dir), (str(im_stem) + "-ROI-nodes.mat")) + ) + # Get the width and height indices for cropping + min_w, max_w = math.floor(abs(mat["nodes"][:, 0]).min()), math.ceil( + abs(mat["nodes"][:, 0]).max() + ) + min_h, max_h = math.floor(abs(mat["nodes"][:, 1]).min()), math.ceil( + abs(mat["nodes"][:, 1]).max() + ) + + # Check to see if the num_channels exists + if num_channels == None and channelIDs == None: + # raise an error + raise ValueError("--num_channels and --channelIDs are not specified") + + # Otherwise continue + else: + # Condition 1 + if num_channels == None and channelIDs != None: + # Set number of channels to length of channel IDs + num_channels = len(channelIDs) + # Check if number of channels and channelIDs agree + elif num_channels != None and channelIDs == None: + # Set channelIDs to be first n channels for num_channels + channelIDs = range(0, num_channels) + # infer the number of channels give the channel IDs + else: + # Check that the two agree + if num_channels != len(channelIDs): + # raise error + raise ValueError( + "--num_channels and length of --channelIDs do not agree" + ) + + # Check if the number of channels is even or odd + if (num_channels % 2) == 0: + step = 2 + else: + step = 1 + + # Read the tif image - Reads the image as cyx + print("Reading " + im_name.stem + "...") + tif = tifffile.TiffFile(im_name) + # Set the index for the loop + idx = 0 + # Add counter for channel index + chan_idx = 0 + for i in range(int(num_channels / step)): + # Get the channel indices based on the step + chan_idx = channelIDs[idx : idx + step] + # Convert the tifffile object to array + im = tif.asarray(series=0, key=chan_idx) + # Check to see what step size is (if step is 1, tiffile will not read color channel, only width and height) + if step != 1: + # Swap the axes to be in the order zyxc for ilastik + im = np.swapaxes(im, 0, 2) + # Swap the axes to be in the order zyxc for ilastik + im = np.swapaxes(im, 0, 1) + # Check if step size is 1 or two (again, if 1, then no color channel) + if step != 1: + # Reshape the array + im = im.reshape((1, im.shape[0], im.shape[1], im.shape[2])) + else: + # Add a color axis when reshaping instead + im = im.reshape((1, im.shape[0], im.shape[1], 1)) + # Check to see if ring mask is being applied + if ring_mask: + # Crop the region + im = im[:, min_h:max_h, min_w:max_w, :] + # Create an hdf5 dataset if idx is 0 plane + if idx == 0: + # Create hdf5 + h5 = h5py.File(pathlib.Path(os.path.join(output, h5_name)), "w") + h5.create_dataset( + str(im_stem), + data=im[:, :, :, :], + chunks=True, + maxshape=(1, None, None, None), + ) + h5.close() + else: + # Append hdf5 dataset + h5 = h5py.File(pathlib.Path(os.path.join(output, h5_name)), "a") + # Add step size to the z axis + h5[str(im_stem)].resize((idx + step), axis=3) + # Add the image to the new channels + h5[str(im_stem)][:, :, :, idx : idx + step] = im[:, :, :, :] + h5.close() + # Update the index + idx = idx + step + # Finished exporting the image + print("Finished exporting image") + + # Optional to crop out regions for ilastik training + if crop: + # Get the index of nuclei in channelIDs + nuclei_index = channelIDs.index(nuclei_index) + # Run through each cropping iteration + full_h5 = h5py.File(pathlib.Path(os.path.join(output, h5_name)), "r") + im_nuc = full_h5[str(im_stem)][:, :, :, nuclei_index] + im = full_h5[str(im_stem)][:, :, :, :] + indices = {} + count = 0 + thresh = filters.threshold_otsu(im_nuc[:, :, :]) + while count < crop_amount: + # Get random height value that falls within crop range of the edges + extension_h = crop_size[0] // 2 + h = random.randint(extension_h, im_nuc.shape[1] - extension_h) + h_up, h_down = h - extension_h, h + extension_h + # Get random width value that falls within crop range of the edges + extension_w = crop_size[1] // 2 + w = random.randint(extension_w, im_nuc.shape[2] - extension_w) + w_lt, w_rt = w - extension_w, w + extension_w + # Crop the image with these coordinates expanding from center + crop = im_nuc[:, h_up:h_down, w_lt:w_rt] + crop_name = pathlib.Path( + os.path.join(output, (im_stem + "_crop" + str(count) + ".hdf5")) + ) + # Check to see if the crop passes the nonzero fraction test + if ( + (crop[0, :, :] > thresh).sum() / (crop.shape[1] * crop.shape[2]) + ) >= nonzero_fraction: + # Export the image to hdf5 + print("Writing " + crop_name.stem + ".hdf5...") + crop = im[:, h_up:h_down, w_lt:w_rt, :] + h5_crop = h5py.File(crop_name, "w") + h5_crop.create_dataset( + str(im_stem) + "_" + str(count), data=crop, chunks=True + ) + h5_crop.close() + print("Finished exporting " + crop_name.stem + ".hdf5") + # Add one to the counter + count = count + 1 + # Add the indices to a table to store the cropped indices + indices.update({crop_name.stem: [(h_up, h_down), (w_lt, w_rt)]}) + # Export the indices to a text file to track the cropped regions + summary = open( + pathlib.Path(os.path.join(output, im_stem) + "_CropSummary.txt"), "w" + ) + summary.write(str(indices)) + summary.close() + + +def MultiIlastikOMEPrep( + input, + output, + crop, + crop_size, + nonzero_fraction, + nuclei_index, + num_channels, + channelIDs, + ring_mask, + crop_amount, +): + """Function for iterating over a list of files and output locations to + export large ome.tiff images in the correct hdf5 image format for ilastik + random forest pixel classification and batch processing""" + + # Iterate over each image in the list if only a single output + if len(output) < 2: + # Iterate through the images and export to the same location + for im_name in input: + # Run the IlastikPrepOME function for this image + IlastikPrepOME( + im_name, + output[0], + crop, + crop_size, + nonzero_fraction, + nuclei_index, + num_channels, + channelIDs, + ring_mask, + crop_amount, + ) + # Alternatively, iterate over output directories + else: + # Check to make sure the output directories and image paths are equal in length + if len(output) != len(input): + raise ( + ValueError( + "Detected more than one output but not as many directories as images" + ) + ) + else: + # Iterate through images and output directories + for i in range(len(input)): + # Run the IlastikPrepOME function for this image and output directory + IlastikPrepOME( + input[i], + output[i], + crop, + crop_size, + nonzero_fraction, + nuclei_index, + num_channels, + channelIDs, + ring_mask, + crop_amount, + ) + + +def ParseInputOME(): + """Function for parsing command line arguments for input to ilastik prep functions""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--input", + nargs="*", + help="enter path to images with spaces between each image (Ex: /path1/image1.ome.tiff /path2/image2.ome.tiff)", + ) + parser.add_argument("--output", nargs="*") + parser.add_argument("--crop", action="store_true", default=False) + parser.add_argument("--no-crop", dest="crop", action="store_false") + parser.add_argument("--crop_size", type=int, nargs="*") + parser.add_argument("--nonzero_fraction", type=float) + parser.add_argument("--nuclei_index", type=int) + parser.add_argument("--num_channels", type=int) + parser.add_argument("--channelIDs", type=int, nargs="*") + parser.add_argument("--ring_mask", action="store_true", default=False) + parser.add_argument("--no-ring_mask", dest="ring_mask", action="store_false") + parser.add_argument("--crop_amount", type=int) + parser.add_argument("--version", action="version", version="0.1.0") + + args = parser.parse_args() + + # Adjustment to account for user-facing 1-based indexing and the 0-based Python implementation + if args.nuclei_index != None: + nuc_idx = args.nuclei_index - 1 + else: + nuc_idx = None + if args.channelIDs != None: + chIDs = [x - 1 for x in args.channelIDs] + else: + chIDs = None + + # Create a dictionary object to pass to the next function + dict = { + "input": args.input, + "output": args.output, + "crop": args.crop, + "crop_size": args.crop_size, + "nonzero_fraction": args.nonzero_fraction, + "nuclei_index": nuc_idx, + "num_channels": args.num_channels, + "channelIDs": chIDs, + "ring_mask": args.ring_mask, + "crop_amount": args.crop_amount, + } + # Print the dictionary object + print(dict) + # Return the dictionary + return dict + + +if __name__ == "__main__": + # Parse the command line arguments + args = ParseInputOME() + + # Run script + st = time.time() + # Run the MultiIlastikOMEPrep function + MultiIlastikOMEPrep(**args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/scripts/molkart_croptiff.py b/scripts/molkart_croptiff.py index dd7b4fd..d00115f 100644 --- a/scripts/molkart_croptiff.py +++ b/scripts/molkart_croptiff.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# importing the module import ast import tifffile as tiff import os @@ -37,6 +36,7 @@ def create_crops(tiff_image, crop_dict): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", help="Input tiffile.") parser.add_argument("-c", "--crop_summary", help="Crop summary file.") + parser.add_argument("--version", action="version", version="0.1.0") args = parser.parse_args() # reading the crop information from the file diff --git a/scripts/molkart_maskfilter.py b/scripts/molkart_maskfilter.py index 5e7ecc0..afb3dba 100644 --- a/scripts/molkart_maskfilter.py +++ b/scripts/molkart_maskfilter.py @@ -18,14 +18,36 @@ def get_args(): description = """Segmentation mask filtering""" # Add parser - parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) + parser = AP( + description=description, formatter_class=argparse.RawDescriptionHelpFormatter + ) # Sections - inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") - inputs.add_argument("-r", "--input", dest="input", action="store", required=True, help="File path to input image.") - inputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output image.") + inputs = parser.add_argument_group( + title="Required Input", description="Path to required input file" + ) + inputs.add_argument( + "-r", + "--input", + dest="input", + action="store", + required=True, + help="File path to input image.", + ) inputs.add_argument( - "--output_qc", dest="output_qc", action="store", required=False, help="Path to output qc csv file." + "-o", + "--output", + dest="output", + action="store", + required=True, + help="Path to output image.", + ) + inputs.add_argument( + "--output_qc", + dest="output_qc", + action="store", + required=False, + help="Path to output qc csv file.", ) inputs.add_argument( "--min_area", @@ -115,4 +137,4 @@ def main(args): st = time.time() main(args) rt = time.time() - st - print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") \ No newline at end of file + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/scripts/molkart_spot2cell.py b/scripts/molkart_spot2cell.py index 10e97b5..7c922a3 100644 --- a/scripts/molkart_spot2cell.py +++ b/scripts/molkart_spot2cell.py @@ -96,7 +96,9 @@ def assign_spots2cell(spot_table, cell_mask): # Make Cell_ID the first column in gene_counts_df gene_counts_df = gene_counts_df.set_index("CellID").reset_index() - gene_counts_df[spot_table.gene.unique()] = gene_counts_df[spot_table.gene.unique()].astype(int) + gene_counts_df[spot_table.gene.unique()] = gene_counts_df[ + spot_table.gene.unique() + ].astype(int) # Filter out cell_ID = 0 into it's own dataframe called background background = gene_counts_df[gene_counts_df["CellID"] == 0] @@ -119,7 +121,11 @@ def assign_spots2cell(spot_table, cell_mask): ## Read in spot table spot_data = pd.read_csv( - args.spot_table, names=["x", "y", "z", "gene", "empty"], sep="\t", header=None, index_col=None + args.spot_table, + names=["x", "y", "z", "gene", "empty"], + sep="\t", + header=None, + index_col=None, ) cell_mask = tiff.imread(args.cell_mask) diff --git a/scripts/molkart_stack.py b/scripts/molkart_stack.py index c736e18..f4da474 100644 --- a/scripts/molkart_stack.py +++ b/scripts/molkart_stack.py @@ -1,56 +1,60 @@ #!/usr/bin/env python - -### This script takes a list of images and stacks them into a single image stack using Dask - import numpy as np import argparse import tifffile -from aicsimageio.writers import OmeTiffWriter -from aicsimageio import aics_image as AI -import aicsimageio -import dask import dask.array as da +from argparse import ArgumentParser as AP +import palom.pyramid +import palom.reader +import copy +import math +import time -def main(): - parser = argparse.ArgumentParser() +def get_args(): + parser = AP( + description="Stack a list of images into a single image stack using Dask" + ) parser.add_argument("-i", "--input", nargs="+", help="List of images to stack") - parser.add_argument( - "-o", - "--output", - dest="output", - type=str, + parser.add_argument("-o", "--output", dest="output", type=str) + parser.add_argument("--pixel_size", dest="pixel_size", type=float, default=0.138) + parser.add_argument("--tile_size", dest="tilesize", type=int, default=1072) + parser.add_argument("--version", action="version", version="0.1.0") + return parser.parse_args() + + +def num_levels_patch(self, base_shape): + factor = max(base_shape) / self.max_pyramid_img_size + return math.ceil(math.log(max(1, factor), self.downscale_factor)) + 1 + + +def main(args): + img = palom.reader.OmePyramidReader(args.input[0]) + mosaic = img.pyramid[0] + mosaic_out = copy.copy(mosaic) + + for i in range(1, len(args.input)): + img = palom.reader.OmePyramidReader(args.input[i]) + mosaic = img.pyramid[0] + mosaic_out = da.concatenate([mosaic_out, copy.copy(mosaic)], axis=0) + + palom.pyramid.PyramidSetting.num_levels = num_levels_patch + palom.pyramid.write_pyramid( + [mosaic_out], + args.output, + channel_names=["stack"], + downscale_factor=2, + pixel_size=0.138, + tile_size=368, ) - parser.add_argument("--num-channels", dest="num_channels", type=int) - - args = parser.parse_args() - - channel_counter = 0 - - img = AI.AICSImage(args.input[0]).get_image_dask_data("CYX") - out = da.empty(shape=[args.num_channels, img[0].shape[0], img[0].shape[1]]) - print(out.shape) - if img.shape[0] > 1: - for channel in range(img.shape[0]): - out[channel_counter] = img[channel] - channel_counter += 1 - else: - out[channel_counter] = img[0] - channel_counter += 1 - - if len(args.input) > 1: - for i in range(len(args.input[1:])): - img = AI.AICSImage(args.input[1 + i]).get_image_dask_data("CYX") - if img.shape[0] > 1: - for channel in range(img.shape[0]): - out[channel_counter] = img[channel] - channel_counter += 1 - else: - out[channel_counter] = img[0] - channel_counter += 1 - - OmeTiffWriter.save(out, args.output, dim_order="CYX") if __name__ == "__main__": - main() + # Read in arguments + args = get_args() + + # Run script + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/scripts/molkartqc.py b/scripts/molkartqc.py index 923cbcf..1c56453 100644 --- a/scripts/molkartqc.py +++ b/scripts/molkartqc.py @@ -2,9 +2,33 @@ #### This script takes regionprops_tabe output from mcquant and the raw spot tables from Resolve bioscience as input #### and calculates some QC metrics for masks and spot assignments +### If png files are provided, it combines them into one import argparse import pandas as pd +from PIL import Image, ImageDraw, ImageFont +import os + + +def combine_png_files(input_paths, output_path): + print(input_paths) + images = [] + for file_path in input_paths: + img = Image.open(file_path) + image_name = ( + os.path.basename(file_path).replace(".ome", "").replace(".crop", "_crop") + ) + draw = ImageDraw.Draw(img) + font_size = 50 + font = ImageFont.load_default(font_size) + draw.text((100, 50), image_name, fill="black", font=font) + images.append(img) + + width, height = images[0].size + combined_image = Image.new("RGB", (width, len(images) * height)) + for i, img in enumerate(images): + combined_image.paste(img, (0, i * height)) + combined_image.save(os.path.join(output_path, "crop_overview.png")) def summarize_spots(spot_table): @@ -37,7 +61,13 @@ def summarize_segmasks(cellxgene_table, spots_summary): spot_assign_percent = spot_assign_total / spots_summary[1] * 100 spot_assign_percent = round(spot_assign_percent, 2) - return (total_cells, avg_area, spot_assign_per_cell, spot_assign_total, spot_assign_percent) + return ( + total_cells, + avg_area, + spot_assign_per_cell, + spot_assign_total, + spot_assign_percent, + ) if __name__ == "__main__": @@ -49,63 +79,76 @@ def summarize_segmasks(cellxgene_table, spots_summary): parser.add_argument("-d", "--sample_id", help="Sample ID.") parser.add_argument("-g", "--segmentation_method", help="Segmentation method used.") parser.add_argument("--filterqc", required=False, help="QC from mask filter step") + parser.add_argument("--png_overview", nargs="+", help="Crop overview image paths") parser.add_argument("--version", action="version", version="0.1.0") args = parser.parse_args() - ## Read in cellxgene_table table - cellxgene_table = pd.read_csv(args.cellxgene, sep=",") - - ## Read in spot table - spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"]) - duplicated = sum(spots.gene.str.contains("Duplicated")) - spots = spots[~spots.gene.str.contains("Duplicated")] - - ## Pass on filterqc values - filterqc = pd.read_csv( - args.filterqc, - names=["below_min_area", "below_percentage", "above_max_area", "above_percentage", "total_labels"], - header=None, - ) - - ## Summarize spots table - summary_spots = summarize_spots(spots) - summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots) - - ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame - summary_df = pd.DataFrame( - columns=[ - "sample_id", - "segmentation_method", - "total_cells", - "avg_area", - "total_spots", - "spot_assign_per_cell", - "spot_assign_total", - "spot_assign_percent", - "duplicated_total", - "labels_total", - "labels_below_thresh", - "labels_above_thresh", + if args.png_overview != None: + combine_png_files(args.png_overview, args.outdir) + + else: + ## Read in cellxgene_table table + cellxgene_table = pd.read_csv(args.cellxgene, sep=",") + + ## Read in spot table + spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"]) + duplicated = sum(spots.gene.str.contains("Duplicated")) + spots = spots[~spots.gene.str.contains("Duplicated")] + + ## Pass on filterqc values + filterqc = pd.read_csv( + args.filterqc, + names=[ + "below_min_area", + "below_percentage", + "above_max_area", + "above_percentage", + "total_labels", + ], + header=None, + ) + + ## Summarize spots table + summary_spots = summarize_spots(spots) + summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots) + + ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame + summary_df = pd.DataFrame( + columns=[ + "sample_id", + "segmentation_method", + "total_cells", + "avg_area", + "total_spots", + "spot_assign_per_cell", + "spot_assign_total", + "spot_assign_percent", + "duplicated_total", + "labels_total", + "labels_below_thresh", + "labels_above_thresh", + ] + ) + summary_df.loc[0] = [ + ##args.sample_id, + args.sample_id + "_" + args.segmentation_method, + args.segmentation_method, + summary_segmentation[0], + summary_segmentation[1], + summary_spots[1], + summary_segmentation[2], + summary_segmentation[3], + summary_segmentation[4], + duplicated, + filterqc.total_labels[1], + filterqc.below_min_area[1], + filterqc.above_max_area[1], ] - ) - summary_df.loc[0] = [ - ##args.sample_id, - args.sample_id + "_" + args.segmentation_method, - args.segmentation_method, - summary_segmentation[0], - summary_segmentation[1], - summary_spots[1], - summary_segmentation[2], - summary_segmentation[3], - summary_segmentation[4], - duplicated, - filterqc.total_labels[1], - filterqc.below_min_area[1], - filterqc.above_max_area[1], - ] - print(args.sample_id) - # Write summary_df to a csv file - summary_df.to_csv( - f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False - ) + print(args.sample_id) + # Write summary_df to a csv file + summary_df.to_csv( + f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", + header=True, + index=False, + )