Skip to content

Commit

Permalink
Merge branch 'changes'
Browse files Browse the repository at this point in the history
  • Loading branch information
hlasimpk committed Feb 1, 2021
2 parents c1bcc80 + d3696d3 commit ac7c969
Show file tree
Hide file tree
Showing 31 changed files with 571 additions and 247 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
Changelog
=========

0.2.3
-----

Changed
~~~~~~~
- Lots of changes to the code to improve python3 compatibility
- Updated tests
- Replaced RCSB REST API which has been deprecated

Added
~~~~~
- Logging util and logging.json to set default logging behaviour and fix logging in py2/3

0.2.1
-----

Expand Down
23 changes: 6 additions & 17 deletions scripts/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,18 @@
"""Script to compute some SIMBAD statistics --- work in progress"""
from __future__ import print_function

__author__ = "Felix Simkovic"
__author__ = "Felix Simkovic & Adam Simpkin"
__date__ = "21 Jul 2019"
__version__ = "2.0"

import json
import logging
import os
import re
import requests

LOG = logging.getLogger(__name__)
BASE_URL = "http://www.rcsb.org/pdb/rest/"
PAYLOAD = """
<orgPdbQuery>
<queryType>org.pdb.query.simple.AdvancedKeywordQuery</queryType>
<description>Text Search for: simbad</description>
<keywords>simbad</keywords>
</orgPdbQuery>
"""
URL = 'https://search.rcsb.org/rcsbsearch/v1/query?json={"query":{"type":"terminal","service":"text","parameters":{"value":"simbad"}},"return_type":"entry"}'


def update_readme(n_sols):
Expand All @@ -37,14 +31,9 @@ def update_readme(n_sols):
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

header = {"Content-Type": "application/x-www-form-urlencoded"}
url = requests.compat.urljoin(BASE_URL, "search")
response = requests.get(URL)
out = json.loads(response.text)

response = requests.post(url, data=PAYLOAD, headers=header)
response.raise_for_status()

entries = response.content.splitlines()

n_solutions = len(entries)
n_solutions = out['total_count']
LOG.info("Found %s solutions", n_solutions)
update_readme(n_solutions)
1 change: 1 addition & 0 deletions simbad/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@
LATTICE_DB = os.path.join(SIMBAD_SHARE_STATIC_DIR, "niggli_database.npz")
CONTAMINANT_MODELS = os.path.join(SIMBAD_SHARE_STATIC_DIR, "contaminants")
MORDA_MODELS = os.path.join(SIMBAD_SHARE_STATIC_DIR, "morda")
LOGGING_CONFIG = os.path.join(SIMBAD_SHARE_STATIC_DIR, 'logging.json')
91 changes: 6 additions & 85 deletions simbad/command_line/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys
import time

from enum import Enum


from pyjob import cexec
from pyjob.factory import TASK_PLATFORMS
Expand Down Expand Up @@ -96,89 +96,6 @@ def is_valid_dir(parser, arg):
parser.error("The directory %s does not exist!" % arg)


class LogColors(Enum):
"""Color container for log messages"""

CRITICAL = 31
DEBUG = 34
DEFAULT = 0
ERROR = 31
WARNING = 33


class LogLevels(Enum):
"""Log level container"""

DEBUG = logging.DEBUG
ERROR = logging.ERROR
INFO = logging.INFO
NOTSET = logging.NOTSET
WARNING = logging.WARNING


class LogColorFormatter(logging.Formatter):
"""Formatter for log messages"""

def format(self, record):
if record.levelname in LogColors.__members__:
prefix = "\033[1;{}m".format(LogColors[record.levelname].value)
postfix = "\033[{}m".format(LogColors["DEFAULT"].value)
record.msg = os.linesep.join([prefix + msg + postfix for msg in str(record.msg).splitlines()])
return logging.Formatter.format(self, record)


class LogController(object):
"""Controller class for log messaging"""

def __init__(self, reset=True):
logging.getLogger().setLevel(logging.NOTSET)
self._custom_added = False

def add_console(self, level="info", format="%(message)s", stream=sys.stdout):
levelname = self.get_levelname(level)
ch = logging.StreamHandler(stream=stream)
ch.setLevel(levelname)
ch.setFormatter(LogColorFormatter(format))
if not self._custom_added:
self.reset()
logging.getLogger().addHandler(ch)
self._custom_added = True

def add_logfile(self, file, level="info", format="%(message)s"):
levelname = self.get_levelname(level)
fh = logging.FileHandler(file)
fh.setLevel(levelname)
fh.setFormatter(logging.Formatter(format))
if not self._custom_added:
self.reset()
logging.getLogger().addHandler(fh)
self._custom_added = True

def get_levelname(self, level):
level_uc = level.upper()
if LogController.level_valid(level_uc):
return LogLevels[level_uc].value
else:
raise ValueError("Please provide a valid log level - %s is not!" % level)

def get_logger(self):
return logging.getLogger()

def close(self):
for h in logging.getLogger().handlers[:]:
h.close()
logging.getLogger().removeHandler(h)

def reset(self):
map(logging.getLogger().removeHandler, logging.getLogger().handlers[:])
map(logging.getLogger().removeFilter, logging.getLogger().filters[:])
self._custom_added = False

@staticmethod
def level_valid(level):
return level in LogLevels.__members__


def _argparse_core_options(p):
"""Add core options to an already existing parser"""
sg = p.add_argument_group("Basic options")
Expand Down Expand Up @@ -494,13 +411,17 @@ def _simbad_lattice_search(args):

MTZ_AVAIL = args.mtz is not None
temp_mtz = None
space_group = None

logger = logging.getLogger(__name__)
if MTZ_AVAIL:
temp_mtz = os.path.join(args.work_dir, "input.mtz")
simbad.util.mtz_util.ctruncate(args.mtz, temp_mtz)
mp = MtzParser(temp_mtz)
space_group = "".join(mp.spacegroup_symbol.encode("ascii").split())
if isinstance(mp.spacegroup_symbol, str):
space_group = "".join(mp.spacegroup_symbol.split())
elif isinstance(mp.spacegroup_symbol, unicode):
space_group = "".join(mp.spacegroup_symbol.encode("ascii").split())
cell_parameters = (mp.cell.a, mp.cell.b, mp.cell.c, mp.cell.alpha, mp.cell.beta, mp.cell.gamma)
else:
space_group = args.space_group
Expand Down
8 changes: 2 additions & 6 deletions simbad/command_line/simbad_contaminant.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import simbad.command_line
import simbad.exit
import simbad.util.logging_util
import simbad.util.pyrvapi_results

logger = None
Expand All @@ -38,12 +39,8 @@ def main():

log_file = os.path.join(args.work_dir, "simbad.log")
debug_log_file = os.path.join(args.work_dir, "debug.log")
log_class = simbad.command_line.LogController()
log_class.add_console(level=args.debug_lvl)
log_class.add_logfile(log_file, level="info", format="%(message)s")
log_class.add_logfile(debug_log_file, level="notset", format="%(asctime)s\t%(name)s [%(lineno)d]\t%(levelname)s\t%(message)s")
global logger
logger = log_class.get_logger()
logger = simbad.util.logging_util.setup_logging(args.debug_lvl, logfile=log_file, debugfile=debug_log_file)

if not os.path.isfile(args.amore_exe):
raise OSError("amore executable not found")
Expand Down Expand Up @@ -75,7 +72,6 @@ def main():
gui.display_results(True, args.results_to_display)
if args.rvapi_document:
gui.save_document()
log_class.close()


if __name__ == "__main__":
Expand Down
74 changes: 34 additions & 40 deletions simbad/command_line/simbad_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
import glob
import json
import numpy as np
import pandas as pd
import morda
import os
import shutil
import ssl
import sys
import tarfile
import urllib
import uuid

from distutils.version import StrictVersion
Expand All @@ -33,16 +34,17 @@
import simbad.rotsearch.amore_search

import simbad.util
import simbad.util.logging_util
from simbad.util import submit_chunk
from simbad.util import tmp_dir
from simbad.util.pdb_util import PdbStructure

if sys.version_info.major < 3:
from urllib2 import urlopen, HTTPError
from urllib import urlretrieve
else:
from urllib.request import urlopen
HTTPError = urllib.error.HTTPError

from urllib.error import HTTPError
from urllib.request import urlopen, urlretrieve

logger = None

Expand Down Expand Up @@ -83,8 +85,8 @@ def is_readable_file(file):
try:
pdb_struct = PdbStructure.from_file(file)
# Call functions that require file to be properly read
pdb_struct.molecular_weight
pdb_struct.nres
pdb_struct.molecular_weight()
pdb_struct.nres()
except Exception:
return False
return True
Expand Down Expand Up @@ -142,36 +144,26 @@ def create_lattice_db(database):
if not is_valid_db_location(database):
raise RuntimeError("Permission denied! Cannot write to {}!".format(os.path.dirname(database)))

logger.info('Querying the RCSB Protein DataBank')

url = 'http://www.rcsb.org/pdb/rest/customReport.csv?pdbids=*&customReportColumns=lengthOfUnitCellLatticeA,'\
+ 'lengthOfUnitCellLatticeB,lengthOfUnitCellLatticeC,unitCellAngleAlpha,unitCellAngleBeta,' \
'unitCellAngleGamma,spaceGroup,experimentalTechnique&service=wsfile&format=csv'
logger.info('Querying the PDBe Protein DataBank')

crystal_data, error_count = [], 0
rcsb_report_file = os.path.join(os.environ["CCP4_SCR"], "rcsb_{}.csv".format(uuid.uuid1()))
urllib.urlretrieve(url, rcsb_report_file)
rcsb_f = open(rcsb_report_file, "r")
rcsb_lines = rcsb_f.readlines()
rcsb_f.close()

for line in rcsb_lines:
if line.startswith('structureId'):
continue
pdb_code, rest = line[1:-1].split('","', 1)
unit_cell, space_group, exp_tech = rest.rsplit('","', 2)
unit_cell = unit_cell.replace('","', ',')
space_group = space_group.replace(" ", "").strip()

if "X-RAY DIFFRACTION" not in exp_tech.strip().upper():
continue

try:
unit_cell = map(float, unit_cell.split(','))
except ValueError as e:
logger.debug('Skipping pdb entry %s\t%s', pdb_code, e)
error_count += 1
continue
url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=pdb_id:*' \
'&fq=experimental_method:"X-ray%20diffraction"&rows=1000000&fl=pdb_id,cell_alpha,cell_beta,cell_gamma,' \
'cell_a,cell_b,cell_c,spacegroup,experimental_method&wt=csv'

pdbe_report_file = os.path.join(os.environ["CCP4_SCR"], "pdbe_{}.csv".format(uuid.uuid1()))
ssl._create_default_https_context = ssl._create_unverified_context
urlretrieve(url, pdbe_report_file)

df = pd.read_csv(pdbe_report_file)
df = df.drop_duplicates()
df = df.dropna()

for index, row in df.iterrows():
pdb_code = row['pdb_id']
unit_cell = [row['cell_a'], row['cell_b'], row['cell_c'],
row['cell_alpha'], row['cell_beta'], row['cell_gamma']]
space_group = row['spacegroup'].replace(" ", "").strip()
space_group = CCTBX_ERROR_SG.get(space_group, space_group)
try:
symmetry = cctbx.crystal.symmetry(
Expand All @@ -182,8 +174,8 @@ def create_lattice_db(database):
continue
crystal_data.append((pdb_code, symmetry))
logger.debug('Error processing %d pdb entries', error_count)
if os.path.isfile(rcsb_report_file):
os.remove(rcsb_report_file)
if os.path.isfile(pdbe_report_file):
os.remove(pdbe_report_file)

logger.info('Calculating the Niggli cells')
niggli_data = np.zeros((len(crystal_data), 11))
Expand Down Expand Up @@ -839,9 +831,7 @@ def main():
args = p.parse_args()

global logger
log_class = simbad.command_line.LogController()
log_class.add_console(level=args.debug_lvl)
logger = log_class.get_logger()
logger = simbad.util.logging_util.setup_logging(args.debug_lvl)

simbad.command_line.print_header()

Expand Down Expand Up @@ -882,7 +872,11 @@ def main():

stopwatch.stop()
logger.info("Database creation completed in %d days, %d hours, %d minutes, and %d seconds", *stopwatch.time_pretty)
log_class.close()

if os.path.exists('simbad.log'):
os.unlink('simbad.log')
if os.path.exists('debug.log'):
os.unlink('debug.log')


if __name__ == "__main__":
Expand Down
14 changes: 6 additions & 8 deletions simbad/command_line/simbad_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import simbad.command_line
import simbad.exit
import simbad.util
import simbad.util.logging_util
import simbad.util.pyrvapi_results

logger = None
Expand Down Expand Up @@ -49,13 +50,8 @@ def main():

log_file = os.path.join(args.work_dir, 'simbad.log')
debug_log_file = os.path.join(args.work_dir, 'debug.log')
log_class = simbad.command_line.LogController()
log_class.add_console(level=args.debug_lvl)
log_class.add_logfile(log_file, level="info", format="%(message)s")
log_class.add_logfile(debug_log_file, level="notset",
format="%(asctime)s\t%(name)s [%(lineno)d]\t%(levelname)s\t%(message)s")
global logger
logger = log_class.get_logger()
logger = simbad.util.logging_util.setup_logging(args.debug_lvl, logfile=log_file, debugfile=debug_log_file)

if not os.path.isfile(args.amore_exe):
raise OSError("amore executable not found")
Expand Down Expand Up @@ -139,7 +135,10 @@ def main():
end_of_cycle = True

if len(all_results) >= 1:
sorted_results = sorted(all_results.iteritems(), key=lambda kv: (kv[1], kv))
if sys.version_info.major == 3:
sorted_results = sorted(all_results.items(), key=lambda kv: (kv[1], kv))
else:
sorted_results = sorted(all_results.iteritems(), key=lambda kv: (kv[1], kv))
result = sorted_results[0][1]
simbad.util.output_files(args.work_dir, result, args.output_pdb, args.output_mtz)

Expand All @@ -150,7 +149,6 @@ def main():
gui.display_results(True, args.results_to_display)
if args.rvapi_document:
gui.save_document()
log_class.close()


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit ac7c969

Please sign in to comment.