-
Notifications
You must be signed in to change notification settings - Fork 0
/
combineQ.py
50 lines (32 loc) · 1.05 KB
/
combineQ.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import argparse
import sys
import numpy as np
import pandas as pd
def parse_args(argv):
parser = argparse.ArgumentParser(
description="Combine .rfmix.Q global ancestry from all chromosomes"
)
parser.add_argument(
"--file", help="Text file, list of global ancestry output .rfmix.Q"
)
parser.add_argument("--out", help="Prefix of the output files")
parser.add_argument(
"--pop",
help="the ancestry to output, match to the header of rfmix output",
type=str,
)
return parser.parse_args(argv)
def main(argv):
args = parse_args(argv)
rfmixQ = args.file
outprefix = args.out
with open(rfmixQ, "r") as f:
lines = [line.strip() for line in f.readlines()]
print(lines)
dfs = [pd.read_csv(line, sep="\t", skiprows=1) for line in lines]
Q = np.mean([df[args.pop] for df in dfs], axis=0)
df_ = dfs[0][["#sample"]].copy()
df_[args.pop] = Q
df_.to_csv(outprefix + ".rfmix.Q", index=None, sep="\t")
if __name__ == "__main__":
main(sys.argv[1:])