diff --git a/README.md b/README.md index 242ffdd..9321b6c 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,14 @@ Write all values of a column to a file. For example, passing *\_rlnMicrographNam Find particles that are shared between the input star file and the one provided by ```--f``` based on the column provided here. Two new star files will be output, one with the shared particles and one with the unique particles. +**```--match_mics```** + +FKeep only micrographs that also exist in a second star file provided by ```--f```. + +**```--extract_min```** *```minimum-value```* + +Find the micrographs that have this minimum number of particles in them and extract all the particles belonging to them. + **```--extract_if_nearby```** *```distance```* *`--f otherfile.star`* For every particle in the input star file, check the nearest particle in a second star file provided by ```--f```; particles that have a neighbor closer than the distance (in pixels) provided here will be written to particles_close.star, and those that don't will be written to particles_far.star. Particles that couldn't be matched to a neighbor will be skipped (i.e. if the second star file lacks particles in that micrograph). It will also output a histogram of nearest distances to Particles_distances.png (use ```--t``` to change the file type; see the [*Output*](#output) options). diff --git a/starparser/__init__.py b/starparser/__init__.py index 07d137d..5803d55 100644 --- a/starparser/__init__.py +++ b/starparser/__init__.py @@ -1,4 +1,4 @@ import os -__version__ = '1.38' +__version__ = '1.39' _ROOT = os.path.abspath(os.path.dirname(__file__)) \ No newline at end of file diff --git a/starparser/argparser.py b/starparser/argparser.py index 4fa4174..9338b3d 100644 --- a/starparser/argparser.py +++ b/starparser/argparser.py @@ -47,6 +47,14 @@ def argparse(): action="store", dest="parser_findshared", type="string", default="", metavar='column-name', help="Find particles that are shared between the input star file and the one provided by --f based on the column provided here. Two new star files will be written, one with the shared particles and one with the unique particles.") + info_opts.add_option("--match_mics", + action="store_true", dest="parser_matchmics", default=False, + help="Keep only micrographs that also exist in a second star file provided by --f.") + + info_opts.add_option("--extract_min", + action="store", dest="parser_exractmin", type="int", default=-1, metavar='minimum-number', + help="Find the micrographs that have this minimum number of particles in them and extract all the particles belonging to them.") + info_opts.add_option("--extract_if_nearby", action="store", dest="parser_findnearby", type="float", default=-1, metavar='distance', help="Find the nearest particle in a second star file (specified by --f); particles that have a neighbor in the second star file closer than the distance provided here will be written to particles_close.star and those that don't will be written to particles_far.star. Particles that couldn't be matched to a neighbor will be skipped (i.e. if the second star file lacks particles in that micrograph). It will also write a histogram of nearest distances to Particles_distances.png.") diff --git a/starparser/decisiontree.py b/starparser/decisiontree.py index defcc10..b3fbf47 100644 --- a/starparser/decisiontree.py +++ b/starparser/decisiontree.py @@ -479,6 +479,26 @@ def decide(): fileparser.writestar(unsharedparticles, metadata, "unique.star", relegateflag) sys.exit() + + """ + --match_mics + """ + + if params["parser_matchmics"]: + columntocheckunique = params["parser_findshared"] + if params["parser_file2"] == "": + print("\n>> Error: provide a second file with --f to compare to.\n") + sys.exit() + file2 = params["parser_file2"] + if not os.path.isfile(file2): + print("\n>> Error: \"" + file2 + "\" does not exist.\n") + sys.exit(); + otherparticles, f2metadata = fileparser.getparticles(file2) + matchedparticles = allparticles[allparticles["_rlnMicrographName"].isin(otherparticles["_rlnMicrographName"])] + print("\n>> Kept " + str(len(set(matchedparticles["_rlnMicrographName"].tolist()))) + " micrographs that matched the second file (out of " + str(len(set(allparticles["_rlnMicrographName"].tolist()))) + ").\n") + fileparser.writestar(matchedparticles, metadata, "output.star", relegateflag) + sys.exit() + """ --extract_if_nearby """ @@ -567,6 +587,19 @@ def decide(): fileparser.writestar(clusterparticles, metadata, params["parser_outname"], relegateflag) sys.exit() + + """ + --extract_min + """ + + if params["parser_exractmin"] != -1: + extractmin = params["parser_exractmin"] + print("\n>> Extracting particles that belong to micrographs with at least " + str(extractmin) + " particles.\n") + particlesfrommin = specialparticles.extractwithmin(allparticles, extractmin) + print(">> Removed " + str(len(allparticles.index)-len(particlesfrommin.index)) + " that did not match the criteria (" + str(len(particlesfrommin.index)) + " remaining out of " + str(len(allparticles.index)) + ").") + fileparser.writestar(particlesfrommin, metadata, params["parser_outname"], relegateflag) + sys.exit() + """ --plot_class_proportions """ diff --git a/starparser/specialparticles.py b/starparser/specialparticles.py index 0a1d392..6565092 100644 --- a/starparser/specialparticles.py +++ b/starparser/specialparticles.py @@ -270,4 +270,45 @@ def getcluster(particles,threshold,minimum): particles_purged = pd.concat(toconcat) + return(particles_purged) + +""" +--extract_minimum +""" +def extractwithmin(particles,minimum): + + #~needs explanation~# + + uniquemics = particles.groupby(["_rlnMicrographName"]) + nameloc = particles.columns.get_loc("_rlnImageName")+1 + + keep = [] + badmics = 0 + for mic in uniquemics: + if len(mic[1]) > minimum: + for particle in mic[1].itertuples(): + keep.append(particle[nameloc]) + else: + badmics+=1 + + if len(keep) == 0: + print("\n>> Error: no particles were retained based on the criteria.\n") + sys.exit() + elif len(keep) == len(particles.index): + print("\n>> Error: all particles were retained. No star file will be output.") + sys.exit() + + print(">> " + str(badmics) + " micrographs don't meet the criteria.\n") + + """ + With dataframes, stating dataframe1 = dataframe2 only creates + a reference. Therefore, we must create a copy if we want to leave + the original dataframe unmodified. + """ + particles_purged = particles.copy() + + toconcat = [particles_purged[particles_purged["_rlnImageName"] == q] for q in keep] + + particles_purged = pd.concat(toconcat) + return(particles_purged) \ No newline at end of file