Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Process HA and NA alignments separately #122

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 32 additions & 28 deletions ha-na-nextstrain/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -142,20 +142,6 @@ rule seasonal_flu_reassortment_align:
--nthreads {threads}
"""

rule seasonal_flu_reassortment_concat:
input:
sequence_ha = "ha-na-nextstrain/results/aligned_ha.fasta",
sequence_na = "ha-na-nextstrain/results/aligned_na.fasta",
output:
fasta = "ha-na-nextstrain/results/aligned_concatenated.fasta"
conda: "../cartography.yml"
shell:
"""
python3 ha-na-nextstrain/scripts/concat_sequences.py \
--sequences {input.sequence_ha} {input.sequence_na}\
--output {output.fasta} \
"""

rule seasonal_flu_reassortment_tree:
input:
alignment = "ha-na-nextstrain/results/aligned_{segment}.fasta"
Expand Down Expand Up @@ -311,9 +297,9 @@ rule seasonal_flu_reassortment_clades:

rule seasonal_flu_reassortment_create_distance_matrix:
input:
alignment = "ha-na-nextstrain/results/aligned_{ha_concatenated}.fasta"
alignment = "ha-na-nextstrain/results/aligned_{segment}.fasta"
output:
output = "ha-na-nextstrain/results/distance_matrix_{ha_concatenated}.csv"
output = "ha-na-nextstrain/results/distance_matrix_{segment}.csv"
conda: "../cartography.yml"
shell:
"""
Expand All @@ -324,22 +310,44 @@ rule seasonal_flu_reassortment_create_distance_matrix:

rule seasonal_flu_reassortment_clean_alignment_for_pca:
input:
alignment="ha-na-nextstrain/results/aligned_{ha_concatenated}.fasta",
alignment="ha-na-nextstrain/results/aligned_{segment}.fasta",
output:
alignment="ha-na-nextstrain/results/cleaned_aligned_{ha_concatenated}.fasta",
alignment="ha-na-nextstrain/results/cleaned_aligned_{segment}.fasta",
conda: "../cartography.yml"
log:
"logs/seasonal_flu_reassortment_clean_alignment_for_pca_{ha_concatenated}.txt"
"logs/seasonal_flu_reassortment_clean_alignment_for_pca_{segment}.txt"
shell:
"""
python3 notebooks/scripts/clean_alignment.py \
--alignment {input.alignment} \
--output {output.alignment} 2>&1 | tee {log}
"""

def get_h3n2_alignments_by_wildcards(wildcards):
alignments = [
"ha-na-nextstrain/results/cleaned_aligned_ha.fasta"
]
if wildcards.ha_concatenated == "concatenated":
alignments.append(
"ha-na-nextstrain/results/cleaned_aligned_na.fasta"
)

return alignments

def get_h3n2_distances_by_wildcards(wildcards):
distances = [
"ha-na-nextstrain/results/distance_matrix_ha.csv"
]
if wildcards.ha_concatenated == "concatenated":
distances.append(
"ha-na-nextstrain/results/distance_matrix_na.csv"
)

return distances

rule seasonal_flu_reassortment_embed_pca:
input:
alignment = "ha-na-nextstrain/results/cleaned_aligned_{ha_concatenated}.fasta",
alignment = get_h3n2_alignments_by_wildcards,
parameters="simulations/influenza-like/no-reassortment/pca_parameters.csv",
output:
dataframe = "ha-na-nextstrain/results/embed_pca_{ha_concatenated}.csv",
Expand All @@ -363,8 +371,7 @@ rule seasonal_flu_reassortment_embed_pca:

rule seasonal_flu_reassortment_embed_mds:
input:
alignment = "ha-na-nextstrain/results/cleaned_aligned_{ha_concatenated}.fasta",
distance_matrix = rules.seasonal_flu_reassortment_create_distance_matrix.output.output,
distance_matrix = get_h3n2_distances_by_wildcards,
parameters="simulations/influenza-like/no-reassortment/mds_parameters.csv",
output:
dataframe = "ha-na-nextstrain/results/embed_mds_{ha_concatenated}.csv",
Expand All @@ -375,7 +382,6 @@ rule seasonal_flu_reassortment_embed_mds:
shell:
"""
pathogen-embed \
--alignment {input.alignment} \
--distance-matrix {input.distance_matrix} \
--embedding-parameters {input.parameters} \
--random-seed {params.random_seed} \
Expand All @@ -386,8 +392,8 @@ rule seasonal_flu_reassortment_embed_mds:

rule seasonal_flu_reassortment_embed_tsne:
input:
alignment = "ha-na-nextstrain/results/cleaned_aligned_{ha_concatenated}.fasta",
distance_matrix = rules.seasonal_flu_reassortment_create_distance_matrix.output.output,
alignment = get_h3n2_alignments_by_wildcards,
distance_matrix = get_h3n2_distances_by_wildcards,
parameters="simulations/influenza-like/no-reassortment/t-sne_parameters.csv",
output:
dataframe = "ha-na-nextstrain/results/embed_t-sne_{ha_concatenated}.csv",
Expand All @@ -410,8 +416,7 @@ rule seasonal_flu_reassortment_embed_tsne:

rule seasonal_flu_reassortment_embed_umap:
input:
alignment = "ha-na-nextstrain/results/cleaned_aligned_{ha_concatenated}.fasta",
distance_matrix = rules.seasonal_flu_reassortment_create_distance_matrix.output.output,
distance_matrix = get_h3n2_distances_by_wildcards,
parameters="simulations/influenza-like/no-reassortment/umap_parameters.csv",
output:
dataframe = "ha-na-nextstrain/results/embed_umap_{ha_concatenated}.csv",
Expand All @@ -422,7 +427,6 @@ rule seasonal_flu_reassortment_embed_umap:
shell:
"""
pathogen-embed \
--alignment {input.alignment} \
--distance-matrix {input.distance_matrix} \
--embedding-parameters {input.parameters} \
--random-seed {params.random_seed} \
Expand Down