Skip to content

Commit

Permalink
update to gtf processing with incomplete cytoband info
Browse files Browse the repository at this point in the history
  • Loading branch information
maxfieldk committed Oct 9, 2024
1 parent a47c94c commit 14bc22a
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
8 changes: 4 additions & 4 deletions workflow/aref/scripts/annotate_rtes.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ tryCatch(
theme(legend.position = "none") +
labs(x = "ORF Length", y = "Z-Score", title = element) +
scale_color_manual(values = c("TRUE" = "red", "FALSE" = "black"))
mysave(sprintf("%s/figures/%s_orf_lengths.pdf", outputdir, element), 4, 4)
mysave(sprintf("%s/intactness_annotation_workdir/%s_orf_lengths.pdf", outputdir, element), 4, 4)
modal_widths <- binwidth_df %>%
filter(orf_length_zscore > z_score_cutoff) %>%
arrange(-average_start) %$% width
Expand All @@ -277,8 +277,8 @@ tryCatch(
consensus_aa <- Biostrings::translate(consensus)
consensus_aa_ss <- AAStringSet(consensus_aa)
names(consensus_aa_ss) <- c("consensus")
orf_consensus_path <- sprintf("%s/figures/%s_orf_length_%s_consensus.fa", outputdir, element, modal_width)
orf_aa_consensus_path <- sprintf("%s/figures/%s_orf_length_%s_aa_consensus.fa", outputdir, element, modal_width)
orf_consensus_path <- sprintf("%s/intactness_annotation_workdir/%s_orf_length_%s_consensus.fa", outputdir, element, modal_width)
orf_aa_consensus_path <- sprintf("%s/intactness_annotation_workdir/%s_orf_length_%s_aa_consensus.fa", outputdir, element, modal_width)
writeXStringSet(consensus_ss, file = orf_consensus_path)
writeXStringSet(consensus_aa_ss, file = orf_aa_consensus_path)

Expand All @@ -291,7 +291,7 @@ tryCatch(


library(seqinr)
orf_fa_path <- sprintf("%s/figures/%s_orf_length_around_%s_aa.fa", outputdir, element, modal_width)
orf_fa_path <- sprintf("%s/intactness_annotation_workdir/%s_orf_length_around_%s_aa.fa", outputdir, element, modal_width)
writeXStringSet(c(Biostrings::translate(orf_ss), consensus_aa_ss), file = orf_fa_path)
system(sprintf("echo $(pwd); mafft --auto %s > %s.aln.fa", orf_fa_path, orf_fa_path))
aln <- read.alignment(sprintf("%s.aln.fa", orf_fa_path), format = "fasta")
Expand Down
14 changes: 11 additions & 3 deletions workflow/aref/scripts/process_gtf_tldr.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,17 @@ cytobands <- cytobandsdf %>%
dplyr::rename(seqnames = X1, start = X2, end = X3) %>%
GRanges()

overlaps <- findOverlaps(gr_for_overlap_analysis, cytobands, select = "first")
gr_for_overlap_analysis$cytobands <- mcols(cytobands[overlaps])$X4

gr_for_overlap_with_cytobands <- subsetByOverlaps(gr_for_overlap_analysis, cytobands)
gr_for_overlap_analysis_lacking_cytobands <- subsetByOverlaps(gr_for_overlap_analysis, cytobands, invert = TRUE)

overlaps <- findOverlaps(gr_for_overlap_with_cytobands, cytobands, select = "first")
gr_for_overlap_with_cytobands$cytobands <- mcols(cytobands[overlaps])$X4
if (length(gr_for_overlap_analysis_lacking_cytobands) != 0) {
gr_for_overlap_analysis_lacking_cytobands$cytobands <- "NoCytobandInfo"
gr_for_overlap_analysis <<- c(gr_for_overlap_with_cytobands, gr_for_overlap_analysis_lacking_cytobands)
} else {
gr_for_overlap_analysis <<- gr_for_overlap_with_cytobands
}
new_id_mapping_ref <- gr_for_overlap_analysis %>%
as.data.frame() %>%
tibble() %>%
Expand Down

0 comments on commit 14bc22a

Please sign in to comment.