Merge branch 'dev'

LinkageIO · May 17, 2017 · 6d2a1d8 · 6d2a1d8
2 parents 5ba6586 + 9572e6d
commit 6d2a1d8
Show file tree

Hide file tree

Showing 27 changed files with 1,247 additions and 492 deletions.
diff --git a/camoco/Annotation.py b/camoco/Annotation.py
@@ -6,142 +6,6 @@
 import pandas as pd
 from .Camoco import Camoco
 
-class RefGenFunc(Camoco):
-    def __init__(self, refgen, description=None):
-        if not isinstance(refgen, str):
-            refgen = refgen.name
-        super().__init__(refgen,type='RefGenFunc')
-        self._global('refgen',refgen)
-        self.refgen = co.RefGen(refgen)
-        self._create_tables() 
-
-    def __getitem__(self,item):
-        # Build the query from all the genes provided
-        if isinstance(item,(set,list)):
-            ls = "{}".format("','".join([str(x) for x in item]))
-            single = False
-        else:
-            ls = item
-            single = True
-        query = "SELECT * FROM func WHERE id IN ('{}');".format(ls)
-
-        # Run the query and turn the result into a list of tuples
-        cur = self.db.cursor()
-        cur.execute(query)
-        annotes = cur.fetchall()
-
-        # If a list of genes was passed in, return a dictionary of lists
-        if not single:
-            res = {}
-            for id,desc in annotes:
-                if id in res:
-                    res[id].append(desc)
-                else:
-                    res[id] = [desc]
-
-        # Otherwise just return the list annotations
-        else:
-            res = []
-            for id,desc in annotes:
-                res.append(desc)
-        return res
-
-    def to_csv(self, filename=None, sep="\t"):
-        '''
-            Make a table of all functional annotations.
-        '''
-        # Find the default filename
-        if filename == None:
-            filename = self.name + '_func.tsv'
-
-        # Pull them all from sqlite
-        cur = self.db.cursor()
-        cur.execute("SELECT * FROM func;")
-
-        # Used pandas to save it
-        df = pd.DataFrame(cur.fetchall(),columns=['gene','desc']).set_index('gene')
-        df.to_csv(filename,sep=sep)
-
-    def add_table(self, filename, sep="\t", gene_col=0, skip_cols=None):
-        ''' 
-            Imports Annotation relationships from a csv file. By default will
-            assume gene names are first column
-        '''
-        # import from file, assume right now that in correct order
-        tbl = pd.read_table(filename,sep=sep,dtype=object)
-        idx_name = tbl.columns[gene_col]
-        tbl[idx_name] = tbl[idx_name].str.upper()
-
-        # Drop columns if we need to
-        if skip_cols is not None:
-            # removing certain columns
-            tbl.drop(tbl.columns[skip_cols],axis=1,inplace=True)
-
-        # Get rid of any genes not in the refence genome
-        refcur = self.refgen.db.cursor()
-        refcur.execute('SELECT id FROM genes;')
-        rm = set(tbl[idx_name]) - set([id[0] for id in refcur.fetchall()])
-        tbl.drop(rm,axis=0,inplace=True)
-        del rm, refcur
-
-        # One Annotation per row, drop the nulls and duplicates
-        tbl = pd.melt(tbl,id_vars=idx_name,var_name='col',value_name='desc')
-        tbl.drop('col',axis=1,inplace=True)
-        tbl.dropna(axis=0,inplace=True)
-        tbl.drop_duplicates(inplace=True)
-
-        # Run the transaction to throw them in there
-        cur = self.db.cursor()
-        try:
-            cur.execute('BEGIN TRANSACTION')
-            cur.executemany(
-                'INSERT INTO func VALUES (?,?)'
-                ,tbl.itertuples(index=False))
-            cur.execute('END TRANSACTION')
-
-        except Exception as e:
-            self.log("import failed: {}",e)
-            cur.execute('ROLLBACK')
-
-        # Make sure the indices are built
-        self._build_indices()
-
-    @classmethod
-    def create(cls, refgen, description=None):
-        if not isinstance(refgen, str):
-            refgen = refgen.name
-        self = super().create(refgen, description, type='RefGenFunc')
-        return self
-
-    @classmethod
-    def from_table(cls, filename, refgen, description=None, 
-        sep="\t", gene_col=0, skip_cols=None):
-        self = cls.create(refgen, description)
-        self.add_table(filename, sep=sep, gene_col=gene_col, skip_cols=skip_cols)
-
-    # We also have the groundwork for a table of ortholog annotations
-    # Currently there is no interface to either add or access it
-    # We may add it in the future though
-    def _create_tables(self):
-        cur = self.db.cursor()
-        cur.execute('''
-            CREATE TABLE IF NOT EXISTS func (
-                id TEXT,
-                desc TEXT,
-                UNIQUE(id,desc) ON CONFLICT IGNORE
-            );
-            CREATE TABLE IF NOT EXISTS ortho_func (
-                id TEXT,
-                desc TEXT,
-                UNIQUE(id,desc) ON CONFLICT IGNORE
-            );
-        ''')
-
-    def _build_indices(self):
-        cur = self.db.cursor()
-        cur.execute('CREATE INDEX IF NOT EXISTS id ON func(id)')
-        cur.execute('CREATE INDEX IF NOT EXISTS id ON ortho_func(id)')
-
 class GWASData(Camoco):
     def __init__(self, gwas):
         if not isinstance(gwas, str):
@@ -230,7 +94,7 @@ def add_dir(self, dir, sep='\t'):
             tbl = pd.read_table(fn,sep=sep,dtype=object)
             tbl = tbl[['gene','COB','Term','WindowSize','FlankLimit','score',
                 'zscore','fdr','num_real','num_random','bs_mean','bs_std',
-                'NumSNPs','NumBootstraps']]
+                'TermLoci','NumBootstraps']]
             try:
                 cur.executemany('''
                     INSERT INTO gwas_data VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)