-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
68 lines (66 loc) · 3.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import luigi
from guide_design.tasks.featurize import FeaturizeTrain, FeaturizeDoenchTest
from guide_design.tasks.cross_validate import CrossValidate
from guide_design.tasks.model import BestModel, PredictModel, ModelCoefficients
from guide_design.tasks.fasta_format import Fasta
from guide_design.tasks.get_data import DoenchTestData
from guide_design.tasks.filter_data import FilteredAchillesData, FilteredRS2Data
import numpy as np
if __name__ == '__main__':
stage = 'predict'
feats = {'Pos. Ind. 1mer': True,
'Pos. Ind. 2mer': True,
'Pos. Ind. 3mer': False,
'Pos. Ind. Zipper': True,
'Pos. Dep. 1mer': True,
'Pos. Dep. 2mer': True,
'Pos. Dep. 3mer': False,
'Pos. Dep. Zipper': True,
'Pos. Ind. Rep.': True,
'GC content': True,
'Tm': True,
'Cas9 PAM': False,
'Physio': True,
'OOF Mutation Rate': True,
'Double Zipper': False}
if stage == 'feat':
luigi.build([FeaturizeTrain(activity_column ='score_drug_gene_rank',
kmer_column = '30mer',
features = {'Pos. Ind. 1mer': True,
'Pos. Ind. 2mer': True,
'Pos. Ind. 3mer': False,
'Pos. Dep. 1mer': True,
'Pos. Dep. 2mer': True,
'Pos. Dep. 3mer': False,
'GC content': True,
'Tm': True,
'Cas9 PAM': False,
'Physio': False,
'OOF Mutation Rate': True
},
guide_start = 5, guide_length = 20,
pam_start = 25, pam_length = 3)], local_scheduler=True)
elif stage == 'cv':
luigi.build([CrossValidate(model_str = model_str, folds = 10,
param_grid = param_grid)
for model_str, param_grid in {'lasso': {'alpha': np.logspace(-1, 0, 1).tolist()},
'GB': {'max_depth': [int(x) for x in np.linspace(2, 40, 1)],
'max_features': ['log2', 'sqrt'],
'min_samples_split': np.linspace(0.2,0.4,1).tolist(),
'subsample': [0.8]}}.items()],
local_scheduler=True, workers=2)
elif stage == 'model':
luigi.build([BestModel()], local_scheduler=True, workers=2)
elif stage == 'predict':
luigi.build([PredictModel(features=feats,
guide_start = 5, guide_length = 20,
pam_start = 25, pam_length = 3)], local_scheduler=True, workers=1)
elif stage == 'fasta':
luigi.build([Fasta(seq_col = '30mer')], local_scheduler=True)
elif stage == 'coefs':
luigi.build([ModelCoefficients(features=feats,
guide_start = 5, guide_length = 20,
pam_start = 25, pam_length = 3)], local_scheduler=True, workers = 1)
elif stage == 'filter':
luigi.build([FilteredAchillesData(), FilteredRS2Data()], local_scheduler=True,
workers = 1)