Skip to content

Commit

Permalink
Merge pull request #56 from MeeshCompBio/master
Browse files Browse the repository at this point in the history
Two-tail GO, linkage fix, locality fix
  • Loading branch information
schae234 authored May 17, 2017
2 parents 1024d25 + 8c27c94 commit 9572e6d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 3 deletions.
8 changes: 7 additions & 1 deletion camoco/COB.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import networkx as nx
import pandas as pd
import numpy as np
from numpy import nan
import itertools
from odo import odo
from scipy.misc import comb
Expand Down Expand Up @@ -1092,8 +1093,11 @@ def locality(self, gene_list, iter_name=None, include_regression=False):
degree.columns = ['global', 'local']
degree = degree.sort_values(by='global')
if include_regression:
#set up variables to use astype to aviod pandas sm.OLS error
loc_deg = degree['local']
glob_deg = degree['global']
# Add the regression lines
ols = sm.OLS(degree['local'], degree['global']).fit()
ols = sm.OLS(loc_deg.astype(float), glob_deg.astype(float)).fit()
degree['resid'] = ols.resid
degree['fitted'] = ols.fittedvalues
degree = degree.sort_values(by='resid',ascending=False)
Expand Down Expand Up @@ -1499,6 +1503,8 @@ def _calculate_gene_hierarchy(self,method='single'):
dists = (dists * pcc_std) + pcc_mean
dists = np.tanh(dists)
dists = 1 - dists
#convert nan to 0's, linkage can only use finite values
dists[np.isnan(dists)] = 0
gc.collect()
# Find the leaves from hierarchical clustering
gene_link = linkage(dists, method=method)
Expand Down
6 changes: 6 additions & 0 deletions camoco/cli/camoco
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,12 @@ if __name__ == '__main__':
default=None,
help='Perform Gene Ontology Statistics. (default: None)'
)
health.add_argument(
'--two_tailed_GO',
action='store_true',
default=False,
help='Include negative density values for GO statistics (default: False)'
)
health.add_argument(
'--min-term-size',
default=10,
Expand Down
15 changes: 13 additions & 2 deletions camoco/cli/commands/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,23 @@ def cob_health(args):
term.loci = list(filter(lambda x: x in cob, term.loci))
if len(term) < args.min_term_size or len(term) > args.max_term_size:
continue
#set density value for two tailed go so we only test it once
density = cob.density(term.loci)
#one tailed vs two tailed test
if args.two_tailed_GO is True:
#run one tail for only positive values
if density > 0:
density_emp.append(density)
#skip negative density values
else:
continue
#if two_tailed_go is not none
else:
density_emp.append(density)
term_ids.append(term.id)
term_sizes.append(len(term))
term_desc.append(str(term.desc))
# ------ Density
density = cob.density(term.loci)
density_emp.append(density)
# Calculate PVals
density_bs = np.array([
cob.density(cob.refgen.random_genes(n=len(term.loci))) \
Expand Down

0 comments on commit 9572e6d

Please sign in to comment.