-
Notifications
You must be signed in to change notification settings - Fork 2
/
create.py
90 lines (73 loc) · 2.35 KB
/
create.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Thanks for Andreas Klosterman for dask suggestion
from poretools.Fast5File import Fast5File
from dask import compute, delayed
import dask.multiprocessing
import dask.threaded
import sys
import md5
import hashlib
import re
import argparse
create_schema = ["""
create table flowcell (
flowcell_id varchar(50) not null,
asic_id varchar(50) primary key not null
);""",
"""create unique index flowcell_idx on flowcell ( flowcell_id, asic_id );""",
"""create table experiment (
flowcell_id varchar(50) references flowcell ( flowcell_id ) not null,
experiment_id varchar(100) primary key not null,
asic_id varchar(50) references flowcell ( asic_id ) not null,
library_name varchar(40) not null,
script_name varchar(100) not null,
host_name varchar(100) not null,
exp_start_time integer not null,
minion_id varchar(40) not null
);""",
"""create unique index experiment_id on experiment ( experiment_id );""",
"""-- all files in file system
create table trackedfiles (
file_id integer null,
experiment_id varchar(100) references experiment ( experiment_id ) not null,
uuid varchar(64) not null,
md5 varchar(64) not null,
filepath text primary key not null,
sequenced_date integer not null,
channel integer not null,
read_number integer not null,
mux integer not null,
duration integer not null
);""",
"""-- create unique index trackedfiles_filepath on trackedfiles ( filepath );""",
"""-- create index trackedfiles_uuid on trackedfiles ( uuid );""",
"""-- basecaller
create table basecaller (
basecaller_id integer primary key not null,
name varchar(100) not null,
version varchar(100) not null
);""",
"""-- basecealls
-- TODO: complement
-- TODO: 2d
-- TODO: barcoding
-- do not use file_id
create table basecall (
file_id integer null,
filepath text not null references trackedfiles ( filepath ) not null,
basecaller_id integer references basecaller ( basecaller_id ) not null,
group_id integer not null,
template text null,
template_length integer null,
num_events integer not null
);""",
"""-- create index basecall_filepath on basecall ( filepath );"""]
import sqlite3
import logging
logging.basicConfig()
logger = logging.getLogger('poretools')
def run(parser, args):
conn = sqlite3.connect(args.db, check_same_thread=False, timeout=30)
c = conn.cursor()
for statement in create_schema:
c.execute(statement)
conn.commit()