-
Notifications
You must be signed in to change notification settings - Fork 0
/
makeSamplesheet.py
61 lines (47 loc) · 2.28 KB
/
makeSamplesheet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import csv
# Get the current working directory as the input directory
input_directory = os.getcwd()
# Define the base output CSV file name
base_output_csv = "samplesheet.csv"
# Check if samplesheet.csv already exists
existing_samplesheets = [filename for filename in os.listdir(input_directory) if filename.startswith("samplesheet") and filename.endswith(".csv")]
samplesheet_count = len(existing_samplesheets)
# Determine the output CSV file name
if samplesheet_count == 0:
output_csv = base_output_csv
else:
output_csv = f"samplesheet_{samplesheet_count}.csv"
# Initialize a dictionary to store sample information
sample_data = {}
# Iterate over the files in the input directory
for filename in os.listdir(input_directory):
if filename.endswith(("_1.fastq.gz", "_2.fastq.gz", "_R1.fastq.gz", "_R2.fastq.gz")):
# Extract sample name and read type
parts = filename.split("_")
sample_name = f"{parts[0]}"
read_type = parts[1].split(".")[0]
fastq_file = filename
# Check if the sample name already exists in the dictionary
if sample_name not in sample_data:
sample_data[sample_name] = {"sample": sample_name, "fastq_1": "", "fastq_2": "", "strandedness": ""}
# Assign the fastq file to the correct field in the dictionary
if read_type == "1" or read_type == "R1":
sample_data[sample_name]["fastq_1"] = fastq_file
elif read_type == "2" or read_type == "R2":
sample_data[sample_name]["fastq_2"] = fastq_file
# Prompt the user for strandedness input
strandedness = input("Enter strandedness: ")
# Update the 'strandedness' field in the sample_data dictionary
for sample_entry in sample_data.values():
sample_entry["strandedness"] = strandedness
# Convert the dictionary values to a list for writing to the CSV file
sample_list = list(sample_data.values())
# Sort the samples alphabetically based on sample name
sample_list.sort(key=lambda x: x["sample"])
# Write the data to the CSV file
with open(output_csv, 'w', newline='') as csvfile:
csv_writer = csv.DictWriter(csvfile, fieldnames=["sample", "fastq_1", "fastq_2", "strandedness"])
csv_writer.writeheader()
csv_writer.writerows(sample_list)
print(f"Samplesheet created and saved as {output_csv}")