Skip to content

Commit

Permalink
major refactoring improving configuration system
Browse files Browse the repository at this point in the history
it now has a default file with mostly all configuration,
and a user config where it choses
  • Loading branch information
averissimo committed Dec 11, 2015
1 parent 042c16c commit 8e579c4
Show file tree
Hide file tree
Showing 14 changed files with 245 additions and 252 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
output/
config.yml
*~
Gemfile.lock
vendor/bundle
.bundle
5 changes: 5 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
source 'http://rubygems.org'

gem 'bio'
gem 'deep_merge'
gem 'configatron', :github => 'markbates/configatron'
132 changes: 17 additions & 115 deletions blast.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,24 @@
#
require_relative 'blast_interface'
require_relative 'reporting'
require_relative 'config_blast'
#
#
#
class Blast
#
include BlastInterface
include ConfigBlast
#
needs_implementation :blast
#
DEF_OUTPUT_DIR = 'output'
DEF_OUTPUT_EXT = '.out'
DEF_CONFIG_PATH = './config.yml'

attr_reader :logger, :out_dir
attr_writer :out_dir, :dbs, :folders
attr_reader :logger, :store

#
#
# initialize class with all necessary data
def initialize(config_path = DEF_CONFIG_PATH)
def initialize(config_path = nil)
super(config_path)
# create logger object
@logger = Logger.new(STDOUT)
logger.level = Logger::INFO
Expand All @@ -36,13 +34,13 @@ def initialize(config_path = DEF_CONFIG_PATH)
def blast_folders(folders = nil,
query_parent = nil,
db_parent = nil)
query_parent = @query_parent if query_parent.nil?
folders = @folders if folders.nil?
query_parent = @store.query.parent if query_parent.nil?
query_folders = @store.query.folders if folders.nil?
# create new queue to add all operations
call_queue = Queue.new
list = []
# run through each directory
folders.each do |query|
query_folders.each do |query|
list = blast_folders_each(query, query_parent, db_parent, call_queue)
end

Expand Down Expand Up @@ -73,12 +71,12 @@ def blast_folders_each(query, query_parent, db_parent, call_queue)
#
logger.debug "going to blast with query: '#{query_file}'"
# run query against all databases
@dbs.each do |db|
@store.db.list.each do |db|
logger.debug "using db: #{db}"
new_item = {}
new_item[:qfile] = query_file
new_item[:db] = db
new_item[:out_file] = gen_filename(query, query_file, db)
new_item[:qfile] = query_file
new_item[:db] = db
new_item[:out_file] = gen_filename(query, query_file, db)
new_item[:query_parent] = '' # empty, because it will
# already have the prefix
new_item[:db_parent] = db_parent
Expand All @@ -89,24 +87,10 @@ def blast_folders_each(query, query_parent, db_parent, call_queue)
end

def cleanup
logger.info("removing #{@out_dir}")
FileUtils.remove_dir(@out_dir)
end

def db_parent=(new_db_parent)
@db_parent = File.expand_path(new_db_parent)
logger.info("removing #{@store.output.dir}")
FileUtils.remove_dir(@store.output.dir)
end

def query_parent=(new_query_parent)
@query_parent = File.expand_path(new_query_parent)
end

def reload_config(config_path = 'config.yml')
@config = YAML.load_file(config_path)
logger.debug(@config.inspect)
set_config
logger.debug('loaded config.yml file')
end
# _ _
# (_) | |
# _ __ _ __ ___ ____ _| |_ ___
Expand All @@ -131,99 +115,17 @@ def get_nt_seq_from_blastdb(seq_id, db, qstart, qend)
spliced = seq.splice("#{qstart}..#{qend}")
spliced
end

#
#
# Generate filenames for each of the query's output
def gen_filename(prefix, query, db)
name = query.gsub(%r{[\S]+\/}, '').gsub(/[\.]query/, '').gsub(/[ ]/, '_')
list = []
list << @task
list << @store.task
list << prefix unless prefix.nil?
list << name
list << db
File.join(@out_dir, list.join('#') + @out_ext)
end

#
#
# Set config variables
def set_config
# parent directories for query and blast db
self.query_parent = get_config(@config['query_parent'], Dir.pwd)
#
self.db_parent = get_config(@config['db_parent'], Dir.pwd)
# optional arguments
self.dbs = @config['dbs']
self.folders = @config['query_folders']
@opts = @config['opts']
@task = @config['task']
@outfmt = @config['format']['outfmt']
#
@identity_threshold = @config['identity_threshold']
@identity_threshold *= 100
#
# orf options
@orf = {}
@orf[:stop] = @config['orf']['stop_codon']
@orf[:start] = @config['orf']['start_codon']
@orf[:reverse] = @config['orf']['reverse']
@orf[:direct] = @config['orf']['direct']
@orf[:min] = @config['orf']['min']
#
@verbose_out = !get_config(@config['clean_output'], false)
#
@out_dir = get_config(@config['output']['dir'], DEF_OUTPUT_DIR)
@out_ext = get_config(@config['output']['ext'], DEF_OUTPUT_EXT)

@out_dir = File.expand_path(@out_dir)
create_out_dir
#
#
logger.debug('query_parent: ' + @query_parent)
logger.debug('db_parent: ' + @db_parent)
#
fail 'Databases must be defined in config.yml.' if @dbs.nil?
fail 'Folders must be defined in config.yml.' if @folders.nil?
# set existing dbs
logger.info("loads databases (from directory '#{@query_parent}'): " +
@dbs.join(', '))

# outfmt specifiers for the blast query (we choose all)
@outfmt_spec = @config['format']['specifiers'].keys
# outfmt specifiers details to add to the report's second line
@outfmt_details = @config['format']['specifiers'].values
end

def create_out_dir
# create output dir if does not exist
begin
Dir.mkdir @out_dir unless Dir.exist?(@out_dir)
rescue
logger.error(msg = 'Could not create output directory')
raise msg
end
# create output dir with timestamp
begin
if @config['force_folder'].nil?
@out_dir = @out_dir +
File::Separator +
Time.now.strftime('%Y_%m_%d-%H_%M_%S') +
'-' + srand.to_s[3..6]
Dir.mkdir @out_dir
else
@out_dir = @out_dir + File::Separator + @config['force_folder']
Dir.mkdir(@out_dir) unless Dir.exist?(@out_dir)
end
rescue
logger.error(msg = 'Could not create output directory')
raise msg
end
end

#
#
# get default value
def get_config(yml_var, default)
yml_var.nil? ? default : yml_var
File.join(@store.output.dir, list.join('#') + @store.output.extension)
end
end # end of class
28 changes: 8 additions & 20 deletions blastn.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,20 @@
#
#
class Blastn < Blast
#
DEF_OPTIONS = '-dust no -max_target_seqs 500 -evalue 1E-100'
DEF_FORMAT = '6'
DEF_TASK = 'blastn'

def initialize(*args)
super(*args)
@opts = get_config(@config['opts'], DEF_OPTIONS)
@task = get_config(@config['task'], DEF_TASK)
@outfmt = get_config(@config['format']['outfmt'], DEF_FORMAT)
end

#
# blastn blast
def blast(qfile, db, out_file, query_parent = nil, db_parent = nil)
query_parent = @query_parent if query_parent.nil?
db_parent = @db_parent if db_parent.nil?
query_parent = @store.query.parent if query_parent.nil?
db_parent = @store.db.parent if db_parent.nil?
# create command for this call
cmd = "blastn -query \"#{File.join(query_parent, qfile)}\" -db \"#{db}\""
cmd += " #{@opts} -out #{out_file}"
cmd += " -outfmt \"#{@outfmt} #{@outfmt_spec.join(' ')}\""
logger.info "running '#{qfile}' with database '#{db}' that will \
store in '#{out_file}'"
cmd = "blastn -query \"#{File.join(query_parent, qfile)}\" -db \"#{db}\"" \
" #{@store.opts} -out #{out_file}" \
" -outfmt \"#{@store.format.outfmt}" \
" #{@store.format.specifiers.keys.join(' ')}\""
logger.info "running '#{qfile}' with database '#{db}' that will " \
"store in '#{out_file}'"
logger.debug cmd
output = `BLASTDB="#{db_parent}" #{cmd}` # actual call to blast
logger.debug output
end

end
78 changes: 78 additions & 0 deletions config_blast.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
require 'configatron/core'

require 'yaml'

#
#
#
module ConfigBlast
#
def initialize(*args)
# setup config defaults
@store = Configatron::RootStore.new
@store.config.default = File.expand_path('default.yml')
@store.config.user = File.expand_path('config.yml')
end

def reload_config(config_path = nil)
config_path = @store.config.user if config_path.nil?
# get configuration from default yml file
logger.info("loads configuration from defaults: #{@store.config.default}")
@store.configure_from_hash(YAML.load_file(@store.config.default))
logger.info("loads configuration from user: #{config_path}")
@store.configure_from_hash(YAML.load_file(config_path))
# process the configuration to adjust paths and values
process_config
logger.debug('loaded and processed configuration files')
end

private

# create output dir if does not exist
def create_output_dir
begin
Dir.mkdir @store.output.dir unless Dir.exist?(@store.output.dir)
rescue
logger.error(msg = 'Could not create output directory')
raise msg
end
# create output dir with timestamp
begin
if !@store.key?(:force_folder)
@store.output.dir += File::Separator +
Time.now.strftime('%Y_%m_%d-%H_%M_%S') +
'-' + srand.to_s[3..6]
Dir.mkdir @store.output.dir
else
@store.output.dir += File::Separator + @store.force_folder
Dir.mkdir(@store.output.dir) unless Dir.exist?(@store.output.dir)
end
rescue StandardError => e
logger.error msg = "Could not create output directory (why: #{e.message})"
raise msg
end
end

#
#
# Set config variables
def process_config
# optional arguments
@store.identity_threshold *= 100
# convert paths to an absolutes
@store.output.dir = File.expand_path(@store.output.dir)
@store.db.parent = File.expand_path(@store.db.parent)
@store.query.parent = File.expand_path(@store.query.parent)
# create the output directory
create_output_dir
#
logger.debug('query_parent: ' + @store.query.parent)
logger.debug('db_parent: ' + @store.db.parent)
#
fail 'Databases must be defined in config.yml.' if @store.db.list.nil?
fail 'Folders must be defined in config.yml.' if @store.query.folders.nil?
# set existing dbs
logger.info("loads databases (from directory '#{@store.db.parent}'): " +
@store.db.list.join(', '))
end
end
Loading

0 comments on commit 8e579c4

Please sign in to comment.