Skip to content

Latest commit

 

History

History
708 lines (551 loc) · 26.6 KB

Cell_densities_test.md

File metadata and controls

708 lines (551 loc) · 26.6 KB
title author date output
Exploring microbial cell quantification
Arnau Vich
11/13/2018
html_document

Microbial quantification: total cell density effect

Load Packages

library(vegan)
library (plyr)
library (reshape2)
library (ggplot2)
library (ppcor)
library (ggforce)
library(dplyr)

Load data

#meta_sp=read.table("~/Desktop/QMP/data/metaphlan_sp.txt", sep="\t", header=T, row.names = 1, check.names = F)
#With categorical values
#phenos=read.table("~/Desktop/QMP/phenotypes/db2.txt", sep="\t", heade =T, row.names = 1)
#phenos_num=read.table("~/Desktop/QMP/phenotypes/db2_num.txt", sep="\t", heade =T, row.names = 1)
#species_select=read.table("~/Desktop/QMP/phenotypes/bacteria_of_interest.txt", sep="\t", heade =T, row.names = 1)
#Only numeric values

Alternatively load the saved R work space

load ("~/Desktop/QMP/hopefully_final_results/R_space.RData")

Calculate Bray-Curtis distances

beta_meta=vegdist(t(meta_sp), method="bray")
#Calculate the first 5 components
my_pcoa_meta=cmdscale(beta_meta, k = 5)
#Transform to dataframe
my_pcoa_meta=as.data.frame(my_pcoa_meta)
#Merge with metadata 
my_pcoa_meta=merge(my_pcoa_meta,phenos, by="row.names")

Plot first 2 components, connecting dots per individual (1 ind. = 2 time-points), and colour per individual

ggplot(my_pcoa_meta,aes(V1,V2, group=Individual_ID))  +  geom_point (size=2, aes(colour=time_point)) + theme_classic() + labs(x="PCoA1", y="PCoA2") + geom_line( alpha=0.2)

Repeat it for other phenotypes, such as location or cell density (FISH counts)


ggplot(my_pcoa_meta,aes(V1,V2, group=Individual_ID))  +  geom_point (size=2, aes(colour=as.factor(MontrealL))) + theme_classic() + labs(x="PCoA1", y="PCoA2") + geom_line( alpha=0.2)

#Remove sample 37 because FISH failed! 
my_pcoa2=my_pcoa_meta[-62,]
my_pcoa2=my_pcoa2[-62,]

ggplot(my_pcoa2,aes(V1,V2, group=Individual_ID))  +  geom_point (size=2, aes(colour=FISH_total)) + theme_classic() + labs(x="PCoA1", y="PCoA2") + geom_line( alpha=0.2) + scale_color_gradient(low="red", high = "blue")

#Also log transform FISH
ggplot(my_pcoa2,aes(V1,V2, group=Individual_ID))  +  geom_point (size=2, aes(colour=log(FISH_total))) + theme_classic() + labs(x="PCoA1", y="PCoA2") + geom_line( alpha=0.2) + scale_color_gradient(low="red", high = "blue")

Prepare phenotypes for ADONIS test, log transform FISH and Sequencing Depth and remove sample 37

#phenos_num2=phenos_num[-62,]
#phenos_num2=phenos_num2[-62,]
#phenos_num2$FISH_log=log(phenos_num2$FISH_total)
#phenos_num2$RD_log=log(phenos_num2$PF_Reads)

phenos2=phenos[-62,]
phenos2=phenos2[-62,]
phenos2$FISH_log=log(phenos_num2$FISH_total)
phenos2$RD_log=log(phenos_num2$PF_Reads)

Prepare microbiome data (species level) for ADONIS test

#taxa_data=as.data.frame(t(meta_sp))
#Check that phenotypes and microbiome tables are concordant in the number of samples
#taxa_data=taxa_data[rownames(phenos_num2),]
#Calculate (again) Bray Curtis distance matrix
#dist.matrix <- vegdist(taxa_data,method = "bray")

taxa_data=as.data.frame(t(meta_sp))
#Check that phenotypes and microbiome tables are concordant in the number of samples
taxa_data=taxa_data[rownames(phenos2),]
#Calculate (again) Bray Curtis distance matrix
dist.matrix <- vegdist(taxa_data,method = "bray")

ADONIS test

#Start an empty matrix

adonis_meta <- matrix(ncol = 7, nrow=ncol(phenos_num2))

#Calculate variance explained per phenotype after 10.000 permutations
for (i in 1:ncol(phenos2)){
  ad<-adonis(dist.matrix ~ phenos2[,i],permutations=10000)
  aov_table <- ad$aov.tab
  #Df
  adonis_meta[i,1]=aov_table[1,1]
  #SumsOfSqs
  adonis_meta[i,2]=aov_table[1,2]
  #MeanSqs
  adonis_meta[i,3]=aov_table[1,3]
  #FModel
  adonis_meta[i,4]=aov_table[1,4]
  #R2
  adonis_meta[i,5]=aov_table[1,5]
  #Pval
  adonis_meta[i,6]=aov_table[1,6]
}
adonis_meta= as.data.frame(adonis_meta)
rownames(adonis_meta) = colnames(phenos2)
adonis_meta$V7=p.adjust(adonis_meta$V6, method = "BH")
adonis_meta$V8="No"
adonis_meta$V8[adonis_meta$V7<0.05]="Yes"
# View(adonis_meta)
colnames(adonis_meta)=c("DF", "SumsOfSqs", "MeanSqs", "FModel","R2","pval","FDR(BH)","Significant")
  • First row (Pnumber) is the individual inter-variance, which explains around 80% of the variation

  • As expected, ileum resection is in the top 10

  • Cell density (FISH_data) explains ~7%

ggplot(adonis_meta, aes(reorder(row.names(adonis_meta), R2), R2, fill=Significant)) + geom_bar(stat = "identity") + coord_flip() + theme_bw() + ylab ("Explained variance") + xlab ("Factor")  + theme(text = element_text(size=8))

Normalise data (arcsine sqroot for taxa and log for FISH and Read depth)

taxa_data=taxa_data/100
taxa_data= asin(sqrt(taxa_data))

Filter taxa prevalent in less than 10% of the samples


taxa_tmp=as.data.frame(t(taxa_data))
taxa_tmp$prevalence=((rowSums(taxa_tmp!=0))/(ncol(taxa_tmp)))*100
taxa_filt=taxa_tmp[taxa_tmp$prevalence>10,]
taxa_filt$prevalence=NULL
yy=merge(phenos2,as.data.frame(t(taxa_filt)), by="row.names")

Test linear models with and without FISH covariate

associations=matrix( ncol=4,nrow=174)
associations_fish=matrix( ncol=5,nrow=174)
c=1
for (b in 73:ncol(yy)){

  v=lm(yy[,b]~Age+BMI+Sex+PPI, data=yy)
  a=summary(v)
  #pval Age
  associations[c,1]=a$coefficients[2,4]
  #pval BMI
  associations[c,2]=a$coefficients[3,4]
  #pval Sex
  associations[c,3]=a$coefficients[4,4]
  #pval PPI
  associations[c,4]=a$coefficients[5,4]

  vv=lm(yy[,b]~FISH_log+Age+BMI+Sex+PPI, data=yy)
  aa=summary(vv)

  associations_fish[c,1]=aa$coefficients[2,4]
  #pval Age
  associations_fish[c,2]=aa$coefficients[3,4]
  #pval BMI
  associations_fish[c,3]=aa$coefficients[4,4]
  #pval Sex
  associations_fish[c,4]=aa$coefficients[5,4]
  #pval PPI
  associations_fish[c,5]=aa$coefficients[6,4]
  c=c+1
}

associations=as.data.frame(associations)
rownames(associations)=colnames(yy)[73:245]
colnames(associations)=c("age","bmi","sex","PPI")
associations_fish=as.data.frame(associations_fish)
rownames(associations_fish)=colnames(yy)[73:245]
colnames(associations_fish)=c("fish","age","bmi","sex","PPI")

Predict FISH data using Shannon and read depth

#Calculate shannon
my_richness=diversity(taxa_data,index = "shannon")
yy$shannon=my_richness
#Move to the first column
yy2=select(yy,shannon,everything())

lm_pred=lm(FISH_log~shannon+RD_log, data=yy)
yy$FISH_pred=predict(lm_pred)
yy=select(yy,FISH_pred,everything())

cor.test(yy$FISH_log, yy$FISH_pred, method="spearman")

ggplot(yy, aes(FISH_log, FISH_pred)) + geom_point() + theme_bw() + xlab("Cell density") + ylab("Cell density predicted from Shannon + Read Depth")

Predict FISH data using Shannon and DNA concentration

lm_pred2=lm(FISH_total~shannon_meta+Conc, data=zz)
zz$FISH_pred2=predict(lm_pred2)

cor.test(zz$FISH_total, zz$FISH_pred2, method="spearman")

ggplot(zz, aes(FISH_total, FISH_pred2)) + geom_point() + theme_bw() + xlab("Cell density") + ylab("Cell density predicted from Shannon + DNA concentration")

Correlation bacteria ~ phenotype and partial correlations using FISH data as covariate

Subset phenotypes

##Age, Sex, BMI, Ileal resection, Minimum 2 resectins, PPI,Calprotectin, log_FISH
phenos_subset=phenos_num2[,c(18,35,34,27,26,23,59,67)]
phenos_subset=merge(phenos_subset,as.data.frame(t(taxa_filt)), by="row.names")
row.names(phenos_subset)=phenos_subset$Row.names
phenos_subset$Row.names=NULL

Start matrix for results

correlation_results=matrix( ncol=28,nrow=173)

Loop to correlate taxapheno and taxapheno+FISH

c=1
for (b in 9:ncol(phenos_subset)){

  #Correlation Age
  x1=cor.test(phenos_subset[,b],phenos_subset[,1], method="spearman")
  correlation_results[c,1]=x1$p.value
  correlation_results[c,2]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,1],phenos_subset[,8],method="spearman")
  correlation_results[c,3]=p1[1,2]
  correlation_results[c,4]=p1[1,1]

  #Correlation Sex
  x1=cor.test(phenos_subset[,b],phenos_subset[,2], method="spearman")
  correlation_results[c,5]=x1$p.value
  correlation_results[c,6]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,2],phenos_subset[,8],method="spearman")
  correlation_results[c,7]=p1[1,2]
  correlation_results[c,8]=p1[1,1]

  #Correlation BMI
  x1=cor.test(phenos_subset[,b],phenos_subset[,3], method="spearman")
  correlation_results[c,9]=x1$p.value
  correlation_results[c,10]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,3],phenos_subset[,8],method="spearman")
  correlation_results[c,11]=p1[1,2]
  correlation_results[c,12]=p1[1,1]

  #Ileal resections(1=colon, 2=ileum)
  x1=cor.test(phenos_subset[,b],phenos_subset[,4], method="spearman")
  correlation_results[c,13]=x1$p.value
  correlation_results[c,14]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,4],phenos_subset[,8],method="spearman")
  correlation_results[c,15]=p1[1,2]
  correlation_results[c,16]=p1[1,1]

  #Min. 2 resections
  x1=cor.test(phenos_subset[,b],phenos_subset[,5], method="spearman")
  correlation_results[c,17]=x1$p.value
  correlation_results[c,18]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,5],phenos_subset[,8],method="spearman")
  correlation_results[c,19]=p1[1,2]
  correlation_results[c,20]=p1[1,1]

  #Correlation PPI
  x1=cor.test(phenos_subset[,b],phenos_subset[,6], method="spearman")
  correlation_results[c,21]=x1$p.value
  correlation_results[c,22]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,6],phenos_subset[,8],method="spearman")
  correlation_results[c,23]=p1[1,2]
  correlation_results[c,24]=p1[1,1]

  #Correlation Calprotectin
  x1=cor.test(phenos_subset[,b],phenos_subset[,7], method="spearman")
  correlation_results[c,25]=x1$p.value
  correlation_results[c,26]=x1$estimate
  p1=pcor.test(phenos_subset[,b],phenos_subset[,7],phenos_subset[,8],method="spearman")
  correlation_results[c,27]=p1[1,2]
  correlation_results[c,28]=p1[1,1]

  c=c+1
}

correlation_results=as.data.frame(correlation_results)
rownames(correlation_results)=colnames(phenos_subset)[9:181]
colnames(correlation_results)=c("age_pval", "age_estimate", "age_fish_pval", "age_fish_estimate", "sex_pval", "sex_estimate", "sex_fish_pval", "sex_fish_estimate","bmi_pval", "bmi_estimate", "bmi_fish_pval", "bmi_fish_estimate","resec_pval", "resec_estimate", "resec_fish_pval", "resec_fish_estimate","min2_pval", "min2_estimate", "min2_fish_pval", "min2_fish_estimate", "PPI_pval", "PPI_estimate", "PPI_fish_pval", "PPI_fish_estimate","Calp_pval", "Calp_estimate", "Calp_fish_pval", "Calp_fish_estimate"  )
  • PPI effect is large and not influenced by cell density
correlation_results$fdr=p.adjust(correlation_results$PPI_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"
ggplot(correlation_results, aes(PPI_pval,PPI_fish_pval)) + geom_point() + theme_bw() + ylab("pval correlation PPI +FISH") + xlab("pval correlation PPI")  + facet_zoom (x=PPI_pval<0.05, y=PPI_fish_pval<0.05, zoom.size = 0.5, split = F) + scale_color_manual(values=c("black", "red"))
  • Sex effect is not influenced by cell density
correlation_results$fdr=p.adjust(correlation_results$sex_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"

ggplot(correlation_results, aes(sex_pval,sex_fish_pval, color=significant)) + geom_point() + theme_bw() + ylab("pval correlation Sex +FISH") + xlab("pval correlation Sex (male=1, female=2)")  + facet_zoom (x=sex_pval<0.05, y=sex_fish_pval<0.05, zoom.size = 0.5, split = F)+ scale_color_manual(values=c("black", "red"))
  • Age show a moderate influence
correlation_results$fdr=p.adjust(correlation_results$age_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"

ggplot(correlation_results, aes(age_pval,age_fish_pval,color=significant)) + geom_point() + theme_bw() + ylab("pval correlation Age +FISH") + xlab("pval correlation Age")  + facet_zoom (x=age_pval<0.05, y=age_fish_pval<0.05, zoom.size = 0.5, split = F) + scale_color_manual(values=c("black", "red"))
  • Big differences when taking cell density into consideration in patients with Ileal resection!
correlation_results$fdr=p.adjust(correlation_results$bmi_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"

ggplot(correlation_results, aes(bmi_pval,bmi_fish_pval,color=significant)) + geom_point() + theme_bw() + ylab("pval correlation BMI+FISH") + xlab("pval correlation BMI") + facet_zoom (x=bmi_pval<0.05, y=bmi_fish_pval<0.05, zoom.size = 0.5, split = F) + scale_color_manual(values=c("black", "red")) 
  • Ileal resection show a big influence
correlation_results$fdr=p.adjust(correlation_results$resec_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"

ggplot(correlation_results, aes(resec_pval,resec_fish_pval,color=significant)) + geom_point() + theme_bw() + ylab("pval correlation Ileal resection +FISH") + xlab("pval correlation Ileal resection") + facet_zoom (x=resec_pval<0.05, y=resec_fish_pval<0.05, zoom.size = 0.5, split = F)+ scale_color_manual(values=c("black", "red")) 
correlation_results$fdr=p.adjust(correlation_results$min2_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"

ggplot(correlation_results, aes(min2_pval,min2_fish_pval,color=significant)) + geom_point() + theme_bw() + ylab("pval correlation More than 1 intestinal resection (yes/no) +FISH") + xlab("pval correlation More than 1 intestinal resection") + facet_zoom (x=min2_pval<0.05, y=min2_fish_pval<0.05, zoom.size = 0.5, split = F)+ scale_color_manual(values=c("black", "red")) 
correlation_results$fdr=p.adjust(correlation_results$Calp_fish_pval, method = "BH")
correlation_results$significant="No"
correlation_results[correlation_results$fdr<0.05,]$significant="Yes"
ggplot(correlation_results, aes(Calp_pval,Calp_fish_pval,color=significant)) + geom_point() + theme_bw() + ylab("pval correlation Calprotectin levels +FISH") + xlab("pval correlation Calprotectin levels") + facet_zoom (x=Calp_pval<0.05, y=Calp_fish_pval<0.05, zoom.size = 0.5, split = F)+ scale_color_manual(values=c("black", "red")) 

Test each phenotype against FISH

phenos_vs_fish=phenos2
phenos_vs_fish$Individual_ID=NULL
phenos_vs_fish$FISH_total=NULL
phenos_vs_fish$PF_Reads=NULL
phenos_vs_fish$AbsEcoli=NULL
phenos_vs_fish$AbsFprau=NULL
phenos_vs_fish$FISHAveragePreRatioEcoliFprau=NULL
phenos_vs_fish$FISHAveragePreRatioErecEcoli=NULL
phenos_vs_fish$FISHEcoli=NULL
phenos_vs_fish$FISHErec=NULL
phenos_vs_fish$FISHFprau=NULL
phenos_vs_fish$FISH_Rectale=NULL
phenos_vs_fish$Faecalibacterium_prausnitzii_meta=NULL
phenos_vs_fish$Escherichia_coli_meta=NULL
phenos_vs_fish$Eubacterium_rectale_meta=NULL
phenos_vs_fish=select(phenos_vs_fish,FISH_log,everything())
phenos_vs_fish=select(phenos_vs_fish,RD_log,everything())

my_results=matrix(nrow=100, ncol=4)
a=1
for (i in 3:ncol(phenos_vs_fish)){
  if (is.numeric(phenos_vs_fish[,i])){
    my_test=cor.test(phenos_vs_fish[,2], phenos_vs_fish[,i], method="spearman")
    my_results[a,4]=my_test$p.value
    my_results[a,2]=my_test$estimate
    my_results[a,1]=colnames(phenos_vs_fish)[i]
    a=a+1
  } else {
    my_test=pairwise.wilcox.test(phenos_vs_fish[,2], phenos_vs_fish[,i] ,p.adjust.method = "none")
    my_test2=melt(my_test$p.value)
    for (x in 1:nrow(my_test2)){
      my_results[a,4]=as.character(my_test2[x,3])
      my_results[a,3]=as.character(my_test2[x,2])
      my_results[a,2]=as.character(my_test2[x,1])
      my_results[a,1]=colnames(phenos_vs_fish)[i]
      a=a+1  
    }
  }
}
my_results2=as.data.frame(my_results)
my_results2=my_results2[c(1:61),]
my_results2=my_results2[-c(22),]
my_results2=my_results2[-c(18),]
my_results2$V4=as.numeric(as.character(my_results2$V4))
my_results2$FDR=p.adjust(my_results2$V4, method="BH")

ggplot(phenos2, aes(DNA_conc,FISH_total)) + geom_point() + theme_bw()

ggplot(phenos2, aes(Min2Resection,FISH_total)) + geom_boxplot(outlier.size = NA) + theme_bw() + geom_jitter(aes(color=IlealResection)) + xlab ("Patient with at leat 2 resections") + ylab("Cell density") + scale_color_manual(values=c("black", "red")) 

ggplot(phenos2, aes(Min2Resection,FISH_total)) + geom_boxplot(outlier.size = NA) + theme_bw() + geom_jitter(aes(color=Colectomy)) + xlab ("Patient with at leat 2 resections") + ylab("Cell density") + scale_color_manual(values=c("black", "red")) 

ggplot(phenos2, aes(as.factor(liqstool1),FISH_total)) + geom_boxplot(outlier.size = NA) + theme_bw() + xlab("Number liquid stools before sampling") + ylab("Cell density") + geom_jitter()

Correlate bacteria to Read Depth and Cell density

phenos_subset=phenos_num2[,c(18,35,34,27,26,23,59,67,68)]
phenos_subset=merge(phenos_subset,as.data.frame(t(taxa_filt)), by="row.names")
row.names(phenos_subset)=phenos_subset$Row.names
phenos_subset$Row.names=NULL

species_tito=matrix( ncol=4,nrow=173)
 c=1
> for (b in 10:ncol(phenos_subset)){
 
   #Correlation RD
   x1=cor.test(phenos_subset[,b],phenos_subset[,9], method="spearman")
   species_tito[c,1]=x1$p.value
   species_tito[c,2]=x1$estimate
 
   #Correlation FISH
   x1=cor.test(phenos_subset[,b],phenos_subset[,8], method="spearman")
   species_tito[c,3]=x1$p.value
   species_tito[c,4]=x1$estimate
   c=c+1
}
 
species_tito=as.data.frame(species_tito) 
rownames(species_tito)=colnames(phenos_subset)[10:ncol(phenos_subset)]
colnames(species_tito)=c("RD_pvalue", "RD_beta", "FISH_pval", "FISH_beta")
mean_abundance=as.data.frame(colMeans(phenos_subset)[10:ncol(phenos_subset)])
colnames(mean_abundance)=("rel_abund")
species_tito=merge(species_tito,mean_abundance, by="row.names")
ggplot (species_tito, aes(FISH_beta, RD_beta))  + geom_point(aes(size=rel_abund))  + theme_bw() + geom_abline(intercept =0 , slope = 1, color="red") + xlim(c(-0.6,0.6)) + ylim (c(-0.6,0.6)) + xlab("Spearman coefficient bacteria ~ cell density (FISH)") + ylab ("Spearman coefficient bacteria ~ Sequencing depth")

Check relation cell counts and selected bacteria

other_phen=phenos[c(1,6,31)]
species_select=merge(species_select, other_phen, by="row.names")
  • Fecalibacterium prausnitzii
ggplot(species_select, aes(FISH_total,FISH_FP_rel, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("F.Prau. relative abundance (FISH)")

ggplot(species_select, aes(FISH_total,Faecalibacterium_prausnitzii_metaphlan, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("F.Prau. relative abundance (MGS)") + ylim(0,20)

ggplot(species_select, aes(FISH_total,FISH_PRAU, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("F.Prau.counts (FISH)")
  • Eubacterium rectale
ggplot(species_select, aes(FISH_total,FISH_ER_rel, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("E.Rectale relative abundance (FISH)")

ggplot(species_select, aes(FISH_total,Eubacterium_rectale_metaphlan, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("E.Rectale relative abundance (MGS)") 

ggplot(species_select, aes(FISH_total,FISH_Rectale, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("E.Rectale counts (FISH)")
  • Escherichia coli
ggplot(species_select, aes(FISH_total,FISH_EC_rel, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("E.coli relative abundance (FISH)")

ggplot(species_select, aes(FISH_total,Escherichia_coli_metaphlan, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("E.coli relative abundance (MGS)") 

ggplot(species_select, aes(FISH_total,FISH_Coli, group=Individual_ID, color=time_point)) + geom_point() + theme_bw() + geom_line( alpha=0.2, color="black")+ scale_color_manual(values=c("black", "red")) + xlab("Cell density (FISH)") + ylab("E.coli counts (FISH)")

Check p-value distribution (change the phenotype you like)

sel_correlation= correlation_results[,c("resec_fish_pval","resec_pval")]
sel_correlation$id=row.names(sel_correlation)
sel2=melt(sel_correlation, id.vars = "id")
my.pvalue.list<-list("Ileal Resection"=sel_correlation$resec_pval, "Ileal Resection FISH"=sel_correlation$resec_fish_pval)
ggplot(sel2, aes(value, fill=variable, color=variable)) + geom_histogram(bins=20,position="dodge", alpha =0.7) + theme_bw()
library(lattice)
qqunif.plot<-function(pvalues, 
                      should.thin=T, thin.obs.places=2, thin.exp.places=2, 
                      xlab=expression(paste("Expected (",-log[10], " p-value)")),
                      ylab=expression(paste("Observed (",-log[10], " p-value)")), 
                      draw.conf=TRUE, conf.points=1000, conf.col="lightgray", conf.alpha=.05,
                      already.transformed=FALSE, pch=20, aspect="iso", prepanel=prepanel.qqunif,
                      par.settings=list(superpose.symbol=list(pch=pch)), ...) {
    
    
    #error checking
    if (length(pvalues)==0) stop("pvalue vector is empty, can't draw plot")
    if(!(class(pvalues)=="numeric" || 
         (class(pvalues)=="list" && all(sapply(pvalues, class)=="numeric"))))
        stop("pvalue vector is not numeric, can't draw plot")
    if (any(is.na(unlist(pvalues)))) stop("pvalue vector contains NA values, can't draw plot")
    if (already.transformed==FALSE) {
        if (any(unlist(pvalues)==0)) stop("pvalue vector contains zeros, can't draw plot")
    } else {
        if (any(unlist(pvalues)<0)) stop("-log10 pvalue vector contains negative values, can't draw plot")
    }
    
    
    grp<-NULL
    n<-1
    exp.x<-c()
    if(is.list(pvalues)) {
        nn<-sapply(pvalues, length)
        rs<-cumsum(nn)
        re<-rs-nn+1
        n<-min(nn)
        if (!is.null(names(pvalues))) {
            grp=factor(rep(names(pvalues), nn), levels=names(pvalues))
            names(pvalues)<-NULL
        } else {
            grp=factor(rep(1:length(pvalues), nn))
        }
        pvo<-pvalues
        pvalues<-numeric(sum(nn))
        exp.x<-numeric(sum(nn))
        for(i in 1:length(pvo)) {
            if (!already.transformed) {
                pvalues[rs[i]:re[i]] <- -log10(pvo[[i]])
                exp.x[rs[i]:re[i]] <- -log10((rank(pvo[[i]], ties.method="first")-.5)/nn[i])
            } else {
                pvalues[rs[i]:re[i]] <- pvo[[i]]
                exp.x[rs[i]:re[i]] <- -log10((nn[i]+1-rank(pvo[[i]], ties.method="first")-.5)/(nn[i]+1))
            }
        }
    } else {
        n <- length(pvalues)+1
        if (!already.transformed) {
            exp.x <- -log10((rank(pvalues, ties.method="first")-.5)/n)
            pvalues <- -log10(pvalues)
        } else {
            exp.x <- -log10((n-rank(pvalues, ties.method="first")-.5)/n)
        }
    }
    
    
    #this is a helper function to draw the confidence interval
    panel.qqconf<-function(n, conf.points=1000, conf.col="gray", conf.alpha=.05, ...) {
        require(grid)
        conf.points = min(conf.points, n-1);
        mpts<-matrix(nrow=conf.points*2, ncol=2)
        for(i in seq(from=1, to=conf.points)) {
            mpts[i,1]<- -log10((i-.5)/n)
            mpts[i,2]<- -log10(qbeta(1-conf.alpha/2, i, n-i))
            mpts[conf.points*2+1-i,1]<- -log10((i-.5)/n)
            mpts[conf.points*2+1-i,2]<- -log10(qbeta(conf.alpha/2, i, n-i))
        }
        grid.polygon(x=mpts[,1],y=mpts[,2], gp=gpar(fill=conf.col, lty=0), default.units="native")
    }
    
    #reduce number of points to plot
    if (should.thin==T) {
        if (!is.null(grp)) {
            thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
                                      exp.x = round(exp.x, thin.exp.places),
                                      grp=grp))
            grp = thin$grp
        } else {
            thin <- unique(data.frame(pvalues = round(pvalues, thin.obs.places),
                                      exp.x = round(exp.x, thin.exp.places)))
        }
        pvalues <- thin$pvalues
        exp.x <- thin$exp.x
    }
    gc()
    
    prepanel.qqunif= function(x,y,...) {
        A = list()
        A$xlim = range(x, y)*1.02
        A$xlim[1]=0
        A$ylim = A$xlim
        return(A)
    }
    
    #draw the plot
    xyplot(pvalues~exp.x, groups=grp, xlab=xlab, ylab=ylab, aspect=aspect,
           prepanel=prepanel, scales=list(axs="i"), pch=pch,
           panel = function(x, y, ...) {
               if (draw.conf) {
                   panel.qqconf(n, conf.points=conf.points, 
                                conf.col=conf.col, conf.alpha=conf.alpha)
               };
               panel.xyplot(x,y, ...);
               panel.abline(0,1);
           }, par.settings=par.settings, ...
    )
}


qqunif.plot(my.pvalue.list, auto.key=list(corner=c(.95,.05)))