Skip to content

Commit

Permalink
Added B-factor filtering modes for pdb_selb.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmedselim2017 committed Sep 21, 2024
1 parent cc39ec3 commit 9cee6f9
Showing 1 changed file with 100 additions and 47 deletions.
147 changes: 100 additions & 47 deletions pdbtools/pdb_selb.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Selects atoms according to their B-factor value.
Selects residues according to the B-factor values.
Valid Operations:
[g] - greater than
[l] - less than
[e] - equal to
[n] - not equal to
Valid B-Factor Filtering Modes:
[mean] - Filter by the mean of the B-factors of all atoms in a residue
[min] - Filter by the atom with the minimum B-factor in a residue
[max] - Filter by the atom with the maximum B-factor in a residue
Usage:
python pdb_selb.py -<operators> -treshold <pdb file>
python pdb_selb.py -<filtering_mode>:<operators> -treshold <pdb file>
Example:
python pdb_selb.py -g -10 1CTF.pdb # selects atoms with B-factor greater than 10
python pdb_selb.py -l -10 1CTF.pdb # selects atoms with B-factor less than 10
python pdb_selb.py -ge -10 1CTF.pdb # selects atoms with B-factor greater or equal to 10
python pdb_selb.py -le -10 1CTF.pdb # selects atoms with B-factor less or equal to 10
python pdb_selb.py -e -10 1CTF.pdb # selects atoms with B-factor equal to 10
python pdb_selb.py -n -10 1CTF.pdb # selects atoms with B-factor not equal to 10
python pdb_selb.py -mean:g -10 1CTF.pdb # selects residues with a mean B-factor greater than 10
python pdb_selb.py -min:g -10 1CTF.pdb # selects residues where the atom with the minimum B-factor has a B-factor greater than 10
python pdb_selb.py -max:g -10 1CTF.pdb # selects residues where the atom with the maximum B-factor has a B-factor greater than 10
python pdb_selb.py -mean:l -10 1CTF.pdb # selects residues with a mean B-factor less than 10
python pdb_selb.py -min:ge -10 1CTF.pdb # selects residues where the atom with the minimum B-factor has a B-factor greater than or equal to 10
python pdb_selb.py -max:le -10 1CTF.pdb # selects residues where the atom with the maximum B-factor has a B-factor less than or equal to 10
python pdb_selb.py -mean:e -10 1CTF.pdb # selects residues with a mean B-factor equal to 10
python pdb_selb.py -mean:n -10 1CTF.pdb # selects residues with a mean B-factor not equal to 10
This program is part of the `pdb-tools` suite of utilities and should not be
distributed isolatedly. The `pdb-tools` were created to quickly manipulate PDB
Expand All @@ -42,6 +50,7 @@
"""
import os
import sys
import statistics

__author__ = "Ahmed Selim Üzüm"
__email__ = "[email protected]"
Expand All @@ -51,8 +60,9 @@ def check_input(args):
"""Checks whether to read from stdin/file and validates user input/options."""

# Defaults
filtering_modes = {"min", "max", "mean"}
operators = {"g", "l", "e", "n"}
operator = ""
option = ""
operator_treshold = None
fh = sys.stdin # file handle

Expand All @@ -65,10 +75,9 @@ def check_input(args):
elif len(args) == 2:
# Two options: Options & Pipe
if args[0].startswith("-"):
operator = args[0][1:]
option = args[0][1:]
if args[1].startswith("-"):
operator_treshold = args[1][1:]

if sys.stdin.isatty(): # ensure the PDB data is streamed in
emsg = "ERROR!! No data to process!\n"
sys.stderr.write(emsg)
Expand All @@ -94,38 +103,52 @@ def check_input(args):
sys.stderr.write(__doc__)
sys.exit(1)

operator = args[0][1:]
option = args[0][1:]
operator_treshold = args[1][1:]
fh = open(args[2], "r")

else: # Whatever ...
sys.stderr.write(__doc__)
sys.exit(1)

# Validate option
# Validate options
try:
assert set(operator) <= operators
filtering_mode, operator = option.split(":")
except ValueError:
emsg = "ERROR!! You have provided filtering mode and operator in an invalid format: '{}'"
sys.stderr.write(emsg.format(option))
sys.exit(1)

if operator == "":
emsg = "ERROR!! You did not provided an operator."
sys.stderr.write(emsg.format())
sys.exit(1)
elif operator_treshold == None:
emsg = "ERROR!! You did not provided an operator treshold."
sys.stderr.write(emsg.format())
sys.exit(1)
if filtering_mode == '':
emsg = "ERROR!! You did not provided a filtering_mode."
sys.stderr.write(emsg.format())
sys.exit(1)
elif filtering_mode not in filtering_modes:
emsg = "ERROR!! You provided an invalid filtering_mode: '{}'"
sys.stderr.write(emsg.format(filtering_mode))
sys.exit(1)

operator_treshold = float(operator_treshold)
except AssertionError:
if operator == '':
emsg = "ERROR!! You did not provided an operator."
sys.stderr.write(emsg.format())
sys.exit(1)
elif not set(operator).issubset(operators):
emsg = "ERROR!! You provided an invalid operator: '{}'"
sys.stderr.write(emsg.format(operator))
sys.exit(1)

try:
if operator_treshold is None:
emsg = "ERROR!! You did not provided an operator treshold."
sys.stderr.write(emsg.format())
sys.exit(1)
operator_treshold = float(operator_treshold)
except ValueError:
emsg = "ERROR!! You provided an invalid b-factor treshold value: '{}'"
sys.stderr.write(emsg.format(operator_treshold))
sys.exit(1)

return (fh, operator, operator_treshold)
return (fh, filtering_mode, operator, operator_treshold)


def pad_line(line):
Expand All @@ -137,7 +160,7 @@ def pad_line(line):
return line[:81] # 80 + newline character


def run(fhandle, operator, operator_treshold):
def run(fhandle, filtering_mode, operator, operator_treshold):
"""
Filter according to the bfactor value using the given operator and operator
treshold.
Expand All @@ -148,6 +171,15 @@ def run(fhandle, operator, operator_treshold):
----------
fhandle : a line-by-line iterator of the original PDB file.
filtering_mode: string
The desired filtering mode that should be used to select in which
conditions a residue should be filtered.
Filtering mode values:
"mean": Filter by the mean of the B-factors of all atoms in a residue
"min": Filter by the atom with the minimum B-factor in a residue
"max": Filter by the atom with the maximum B-factor in a residue
operator : string
The desired operator that should be used to filter by bafctor. The
combination of multiple operator values such as "ge" can also be used.
Expand All @@ -167,28 +199,48 @@ def run(fhandle, operator, operator_treshold):
"""
_pad_line = pad_line
records = ("ATOM", "HETATM")
res_lines = []
prev_res_id = None
for line in fhandle:
if line.startswith(records):
line = _pad_line(line)
bfactor = float(line[60:66])

for o in operator:
if o == "g":
if bfactor > operator_treshold:
yield line
continue
elif o == "l":
if bfactor < operator_treshold:
yield line
continue
elif o == "e":
if bfactor == operator_treshold:
yield line
continue
elif o == "n":
if bfactor != operator_treshold:
yield line
continue

res_id = int(line[23:26])

if prev_res_id == res_id or prev_res_id is None:
res_lines.append(line)
else:
bfactors = [float(rl[60:66]) for rl in res_lines]

selected_bfactor = None
if filtering_mode == "mean":
selected_bfactor = statistics.fmean(bfactors)
elif filtering_mode == "min":
selected_bfactor = min(bfactors)
elif filtering_mode == "max":
selected_bfactor = max(bfactors)

for o in operator:
if o == "g":
if selected_bfactor > operator_treshold:
yield from res_lines
continue
elif o == "l":
if selected_bfactor < operator_treshold:
yield from res_lines
continue
elif o == "e":
if selected_bfactor == operator_treshold:
yield from res_lines
continue
elif o == "n":
if selected_bfactor != operator_treshold:
yield from res_lines
continue
res_lines = []

prev_res_id = res_id

else:
yield line

Expand All @@ -199,10 +251,11 @@ def run(fhandle, operator, operator_treshold):
def main():

# Check Input
pdbfh, operator, operator_treshold = check_input(sys.argv[1:])
pdbfh, filtering_mode, operator, operator_treshold = check_input(
sys.argv[1:])

# Do the job
new_pdb = run(pdbfh, operator, operator_treshold)
new_pdb = run(pdbfh, filtering_mode, operator, operator_treshold)

# Output results
try:
Expand Down

0 comments on commit 9cee6f9

Please sign in to comment.