From 402960b7b1ff3b93df6fd7ed11d895dbae288c85 Mon Sep 17 00:00:00 2001 From: ssahebkashaf Date: Fri, 22 Oct 2021 04:31:19 -0400 Subject: [PATCH] restructuring directory --- scripts/plotting/dnadiff_plot.R | 49 ------------------------------- scripts/plotting/plot_framework.R | 35 ---------------------- 2 files changed, 84 deletions(-) delete mode 100755 scripts/plotting/dnadiff_plot.R delete mode 100755 scripts/plotting/plot_framework.R diff --git a/scripts/plotting/dnadiff_plot.R b/scripts/plotting/dnadiff_plot.R deleted file mode 100755 index 6f7bd4d..0000000 --- a/scripts/plotting/dnadiff_plot.R +++ /dev/null @@ -1,49 +0,0 @@ -# This file is part of MAG Snakemake workflow. -# -# MAG Snakemake workflow is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# MAG Snakemake workflow is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with MAG Snakemake workflow. If not, see . - - -library(ggplot2) -library(gridExtra) - -args <- commandArgs(trailingOnly = TRUE) - -checkm_sr=read.csv(args[1],header=TRUE,stringsAsFactor=FALSE) -checkm_coas=read.csv(args[2],header=TRUE,stringsAsFactor=FALSE) -dnadiff=read.table(args[3],header=TRUE,stringsAsFactor=FALSE) - - -colnames(dnadiff)=c("Ref","genome","Ref_length", "Refcovered","Query_length","Queryaligned", "ANI") -dnadiff$genome= gsub(".*/","",dnadiff$genome) -dnadiff$genome=gsub(".fa","",dnadiff$genome) - -checkm_sr$`Assembly approach`="Single run" -checkm_coas$`Assembly approach`="Co-assembly" -checkm=rbind.data.frame(checkm_sr,checkm_coas) - -checkm$Quality="Medium quality" -checkm$Quality[checkm$completeness>=90&checkm$contamination<=5]="High quality" -checkm$genome=gsub(".fa","",checkm$genome) - -comb=merge(dnadiff,checkm,by="genome") -left<-ggplot(comb, aes(x = Queryaligned, y = Refcovered, color=`Assembly approach`, shape=Quality)) + - geom_point(stroke = 1,size=1)+xlab("MAG Aligned (%)")+ylab("Reference Aligned (%)") + theme_classic() +scale_color_manual(breaks=c("Single run","Co-assembly"), values=c("#BBBBBB","#4477AA"))+xlim(0,100)+ ylim(0,100)+scale_shape_manual(values = c(1,2))+guides(color=guide_legend(title="Approach")) -right<- -ggplot(comb, aes(x=ANI,color=`Assembly approach`)) + geom_density()+theme_classic()+xlab("ANI") +xlim(0,100)+ylab("Density")+scale_color_manual(breaks=c("Single run","Co-assembly"), values=c("#BBBBBB","#4477AA"))+guides(color=guide_legend(title="Approach")) - -p<-grid.arrange(left, right, nrow = 1) -ggsave("data/figures/dnadiff.png",p, width=10,height=5) - - - diff --git a/scripts/plotting/plot_framework.R b/scripts/plotting/plot_framework.R deleted file mode 100755 index 47d45e9..0000000 --- a/scripts/plotting/plot_framework.R +++ /dev/null @@ -1,35 +0,0 @@ -library(ggplot2) -library(gridExtra) - -args <- commandArgs(trailingOnly = TRUE) - -readcounts=read.table(args[1],header=TRUE,stringsAsFactor=FALSE) -df_flag=read.table(args[2],header=FALSE,stringsAsFactor=FALSE) -bwa.counts_sr=read.table(args[3],header=FALSE,stringsAsFactor=FALSE) -bwa.counts_coas=read.table(args[4],header=FALSE,stringsAsFactor=FALSE) -summary_out=args[5] - -readcounts$Run=gsub("data/00_preprocessing/processed/singlerun/","",readcounts$Run) -readcounts=readcounts[!grepl("_2.fastq", readcounts$Run),] -readcounts$Run=gsub("_1.fastq","",readcounts$Run) -readcounts$Readcount=2*readcounts$Readcount - -colnames(df_flag)=c("Run","Assembly") -df_comb=merge(df_flag,readcounts,by="Run") -df_comb$percassemb=df_comb$Assembly/df_comb$Readcount*100 - -colnames(bwa.counts_sr)=c("Run","Catalogue") -bwa.counts_sr$`Assembly Approach`="Single run" - -colnames(bwa.counts_coas)=c("Run","Catalogue") -bwa.counts_coas$`Assembly Approach`="Coassembly" - -bwa.counts=rbind.data.frame(bwa.counts_sr,bwa.counts_coas) -merged=merge(df_comb,bwa.counts, by="Run") -merged$percmags=merged$Catalogue/merged$Readcount*100 -merged$percassemb=merged$Assembly/merged$Readcount*100 - -write.csv(merged,summary_out, quote=FALSE,row.names=FALSE) - -ggplot(merged, aes(x=percmags, y=percassemb, color=`Assembly Approach`)) + geom_point() +xlab("Reads mapping to MAGs (%)") + ylab("Reads mapping to assembly (%)")+theme_classic()+scale_color_manual(breaks=c("Coassembly","Single run"), values=c("#a8ddb5","#c994c7"))+xlim(0,100)+ylim(0,100) -ggsave("data/figures/perassemb_perref.png",width=5,height=5)