Skip to content

Commit

Permalink
Update v1.39
Browse files Browse the repository at this point in the history
  • Loading branch information
sami-chaaban committed Aug 11, 2023
1 parent 822cd94 commit f7117e3
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 1 deletion.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ Write all values of a column to a file. For example, passing *\_rlnMicrographNam

Find particles that are shared between the input star file and the one provided by ```--f``` based on the column provided here. Two new star files will be output, one with the shared particles and one with the unique particles.

**```--match_mics```**

FKeep only micrographs that also exist in a second star file provided by ```--f```.

**```--extract_min```** *```minimum-value```*

Find the micrographs that have this minimum number of particles in them and extract all the particles belonging to them.

**```--extract_if_nearby```** *```distance```* *`--f otherfile.star`*

For every particle in the input star file, check the nearest particle in a second star file provided by ```--f```; particles that have a neighbor closer than the distance (in pixels) provided here will be written to particles_close.star, and those that don't will be written to particles_far.star. Particles that couldn't be matched to a neighbor will be skipped (i.e. if the second star file lacks particles in that micrograph). It will also output a histogram of nearest distances to Particles_distances.png (use ```--t``` to change the file type; see the [*Output*](#output) options).
Expand Down
2 changes: 1 addition & 1 deletion starparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os

__version__ = '1.38'
__version__ = '1.39'
_ROOT = os.path.abspath(os.path.dirname(__file__))
8 changes: 8 additions & 0 deletions starparser/argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ def argparse():
action="store", dest="parser_findshared", type="string", default="", metavar='column-name',
help="Find particles that are shared between the input star file and the one provided by --f based on the column provided here. Two new star files will be written, one with the shared particles and one with the unique particles.")

info_opts.add_option("--match_mics",
action="store_true", dest="parser_matchmics", default=False,
help="Keep only micrographs that also exist in a second star file provided by --f.")

info_opts.add_option("--extract_min",
action="store", dest="parser_exractmin", type="int", default=-1, metavar='minimum-number',
help="Find the micrographs that have this minimum number of particles in them and extract all the particles belonging to them.")

info_opts.add_option("--extract_if_nearby",
action="store", dest="parser_findnearby", type="float", default=-1, metavar='distance',
help="Find the nearest particle in a second star file (specified by --f); particles that have a neighbor in the second star file closer than the distance provided here will be written to particles_close.star and those that don't will be written to particles_far.star. Particles that couldn't be matched to a neighbor will be skipped (i.e. if the second star file lacks particles in that micrograph). It will also write a histogram of nearest distances to Particles_distances.png.")
Expand Down
33 changes: 33 additions & 0 deletions starparser/decisiontree.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,26 @@ def decide():
fileparser.writestar(unsharedparticles, metadata, "unique.star", relegateflag)
sys.exit()


"""
--match_mics
"""

if params["parser_matchmics"]:
columntocheckunique = params["parser_findshared"]
if params["parser_file2"] == "":
print("\n>> Error: provide a second file with --f to compare to.\n")
sys.exit()
file2 = params["parser_file2"]
if not os.path.isfile(file2):
print("\n>> Error: \"" + file2 + "\" does not exist.\n")
sys.exit();
otherparticles, f2metadata = fileparser.getparticles(file2)
matchedparticles = allparticles[allparticles["_rlnMicrographName"].isin(otherparticles["_rlnMicrographName"])]
print("\n>> Kept " + str(len(set(matchedparticles["_rlnMicrographName"].tolist()))) + " micrographs that matched the second file (out of " + str(len(set(allparticles["_rlnMicrographName"].tolist()))) + ").\n")
fileparser.writestar(matchedparticles, metadata, "output.star", relegateflag)
sys.exit()

"""
--extract_if_nearby
"""
Expand Down Expand Up @@ -567,6 +587,19 @@ def decide():
fileparser.writestar(clusterparticles, metadata, params["parser_outname"], relegateflag)
sys.exit()


"""
--extract_min
"""

if params["parser_exractmin"] != -1:
extractmin = params["parser_exractmin"]
print("\n>> Extracting particles that belong to micrographs with at least " + str(extractmin) + " particles.\n")
particlesfrommin = specialparticles.extractwithmin(allparticles, extractmin)
print(">> Removed " + str(len(allparticles.index)-len(particlesfrommin.index)) + " that did not match the criteria (" + str(len(particlesfrommin.index)) + " remaining out of " + str(len(allparticles.index)) + ").")
fileparser.writestar(particlesfrommin, metadata, params["parser_outname"], relegateflag)
sys.exit()

"""
--plot_class_proportions
"""
Expand Down
41 changes: 41 additions & 0 deletions starparser/specialparticles.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,4 +270,45 @@ def getcluster(particles,threshold,minimum):

particles_purged = pd.concat(toconcat)

return(particles_purged)

"""
--extract_minimum
"""
def extractwithmin(particles,minimum):

#~needs explanation~#

uniquemics = particles.groupby(["_rlnMicrographName"])
nameloc = particles.columns.get_loc("_rlnImageName")+1

keep = []
badmics = 0
for mic in uniquemics:
if len(mic[1]) > minimum:
for particle in mic[1].itertuples():
keep.append(particle[nameloc])
else:
badmics+=1

if len(keep) == 0:
print("\n>> Error: no particles were retained based on the criteria.\n")
sys.exit()
elif len(keep) == len(particles.index):
print("\n>> Error: all particles were retained. No star file will be output.")
sys.exit()

print(">> " + str(badmics) + " micrographs don't meet the criteria.\n")

"""
With dataframes, stating dataframe1 = dataframe2 only creates
a reference. Therefore, we must create a copy if we want to leave
the original dataframe unmodified.
"""
particles_purged = particles.copy()

toconcat = [particles_purged[particles_purged["_rlnImageName"] == q] for q in keep]

particles_purged = pd.concat(toconcat)

return(particles_purged)

0 comments on commit f7117e3

Please sign in to comment.