-
Notifications
You must be signed in to change notification settings - Fork 0
/
fastn2random_subset.py
executable file
·47 lines (39 loc) · 1.54 KB
/
fastn2random_subset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python3.3
import sys
import argparse
import random
import fastn
import utils
parser = argparse.ArgumentParser(
description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
'from a mates file. Ouptut is interleaved if mates file given',
usage = '%(prog)s [options] <fasta/q in> <outfile> <percent reads wanted in [0,100]>')
parser.add_argument('--mate_file', help='Name of fasta/q mates file')
parser.add_argument('infile', help='Name of fasta/q file to be read')
parser.add_argument('outfile', help='Name of fasta/q output file')
parser.add_argument('read_percent', type=int, help='percent of reads to take from input file')
options = parser.parse_args()
seq_reader = fastn.file_reader(options.infile)
fout = utils.open_file_write(options.outfile)
counter_in = 0
counter_out = 0
if options.mate_file:
mate_seq_reader = fastn.file_reader(options.mate_file)
for seq in seq_reader:
counter_in += 1
if options.mate_file:
try:
mate_seq = next(mate_seq_reader)
except StopIteration:
print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
sys.exit(1)
if random.randint(0, 100) <= options.read_percent:
counter_out += 1
print(seq, file=fout)
if options.mate_file:
print(mate_seq, file=fout)
utils.close(fout)
if options.mate_file:
print('Used', counter_out, 'pairs from total of', counter_in)
else:
print('Used', counter_out, 'sequences from total of', counter_in)