Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

We have a problem using the mzTab file input type #49

Open
Douerww opened this issue Jan 13, 2023 · 0 comments
Open

We have a problem using the mzTab file input type #49

Douerww opened this issue Jan 13, 2023 · 0 comments

Comments

@Douerww
Copy link

Douerww commented Jan 13, 2023

We refer to the sample code in msqrob2Example and rewrite it into a code that can input . mzTab format, and successfully run the PXD000279 dataset (data can be downloaded from here. It is worth mentioning that the file we use at runtime changed the peptide_abundance_std_error_study_variable[X] column name to the sumIntensity_X column name ).

However, we encountered an error that we could not solve when running another dataset PXD007145. (data can be downloaded from here. It is worth mentioning that the file we use at runtime changed the peptide_abundance_std_error_study_variable[X] column name to the sumIntensity_X column name ).
When running pe <- msqrob2::msqrob(object = pe, i = "protein", formula = ~condition), an error occurred Error in if (m == 0) { : Missing values cannot be used where TRUE/FALSE values are required
When we try to use msqrob2gui to run the same dataset, the following prompt appears in the lower right corner of the Model tab: The model is overparameterized. The residual degrees of freedom is 0. Variances and standard errors can not be estimated from data with this design. An error occurred on the Inference tab, but we use msqrob2gui to run PXD000279 very smoothly.

Annotations file contents are as follows
4LIT2OR7U(WNHKA$MX8TW_1

Any possible help is needed, thank you very much!

The codes to run PXD000279 is as follows

mzTab_pep = read.csv("./PXD000279/onlyPEP-filter-PXD000279.dynamic.sdrf_openms_design_openms.mzTab", sep="\t")

#mzTab_pep = mzTab_pep[mzTab_pep$opt_global_cv_MS.1002217_decoy_peptide == 0,]
mzTab_pep = mzTab_pep[!grepl("DECOY", mzTab_pep$accession),]

#peptide_abundance_study variable[1] == sumIntensity_1
mzTab_pep = mzTab_pep[,c("sequence", "accession", "sumIntensity_1", "sumIntensity_2",
                       "sumIntensity_3", "sumIntensity_4", "sumIntensity_5", "sumIntensity_6",
                       "sumIntensity_7", "sumIntensity_8")]
mzTab_pep_sum <- mzTab_pep %>% group_by(sequence, accession) %>% summarise(sumIntensity_1 = sum(sumIntensity_1),
                                                                           sumIntensity_2 = sum(sumIntensity_2),
                                                                           sumIntensity_3 = sum(sumIntensity_3),
                                                                           sumIntensity_4 = sum(sumIntensity_4),
                                                                           sumIntensity_5 = sum(sumIntensity_5),
                                                                           sumIntensity_6 = sum(sumIntensity_6),
                                                                           sumIntensity_7 = sum(sumIntensity_7),
                                                                           sumIntensity_8 = sum(sumIntensity_8))

mzTab_pep_sum <- mzTab_pep_sum[which((rowSums(mzTab_pep_sum[,3:6]) > 0)|(rowSums(mzTab_pep_sum[7:10]) > 0)),]

names(mzTab_pep_sum)[names(mzTab_pep_sum) =="accession"] <-"Proteins"

ecols <- grep("sumIntensity_", names(mzTab_pep_sum))

  pe <- readQFeatures(
  table = mzTab_pep_sum, fnames = 1, ecol = ecols,
  name = "peptideRaw", sep = "\t"
)


colData(pe)$condition <- as.factor(c("UPS1", "UPS1", "UPS1", "UPS1", "UPS2", "UPS2", "UPS2", "UPS2"))

rowData(pe[["peptideRaw"]])$nNonZero <- rowSums(assay(pe[["peptideRaw"]]) > 0)
pe <- zeroIsNA(pe, "peptideRaw") # convert 0 to NA

MSnbase::plotNA(assay(pe[["peptideRaw"]])) +
  xlab("Peptide index (ordered by data completeness)")

pe <- logTransform(pe, base = 2, i = "peptideRaw", name = "peptideLog")
limma::plotDensities(assay(pe[["peptideLog"]]))

Protein_filter <- rowData(pe[["peptideLog"]])$Proteins %in% smallestUniqueGroups(rowData(pe[["peptideLog"]])$Proteins)
pe <- pe[Protein_filter,,]


pe <- filterFeatures(pe, ~ nNonZero >= 2)


pe <- normalize(pe,
                i = "peptideLog",
                name = "peptideNorm",
                method = "center.median"
)


limma::plotDensities(assay(pe[["peptideNorm"]]))
boxplot(assay(pe[["peptideNorm"]]),
        col = palette()[-1],
        main = "Peptide distribtutions after normalisation", ylab = "intensity"
)

limma::plotMDS(assay(pe[["peptideNorm"]]), col = as.numeric(colData(pe)$condition))


# Summarization to protein level
pe <- QFeatures::aggregateFeatures(pe, i = "peptideNorm", fcol = "Proteins", na.rm = TRUE, name = "protein")


plotMDS(assay(pe[["protein"]]), col = as.numeric(colData(pe)$condition))

pe <- msqrob2::msqrob(object = pe, i = "protein", formula = ~condition)
getCoef(rowData(pe[["protein"]])$msqrobModels[[1]])

L <- makeContrast("conditionUPS2=0", parameterNames = c("conditionUPS2"))
pe <- hypothesisTest(object = pe, i = "protein", contrast = L)

volcano <- ggplot(
  rowData(pe[["protein"]])$conditionUPS2,
  aes(x = logFC, y = -log10(pval), color = adjPval < 0.05)
) +
  geom_point(cex = 2.5) +
  scale_color_manual(values = alpha(c("black", "red"), 0.5)) +
  theme_minimal() +
  ggtitle("Default workflow")

volcano

write.csv(rowData(pe[["protein"]])$conditionUPS2, file = "ups-quantms-dep-CM.csv")

The codes to run PXD007145 is as follows

mzTab_pep = read.csv("./PXD007145/only_pep_filterPXD007145-Th.sdrf_openms_design_openms.mzTab", sep="\t")
#mzTab_pep = mzTab_pep[mzTab_pep$opt_global_cv_MS.1002217_decoy_peptide == 0,]
mzTab_pep = mzTab_pep[!grepl("DECOY", mzTab_pep$accession),]

mzTab_pep = mzTab_pep[,c("sequence", "accession", "sumIntensity_1", "sumIntensity_2",
                         "sumIntensity_3")]
mzTab_pep_sum <- mzTab_pep %>% group_by(sequence, accession) %>% summarise(sumIntensity_1 = sum(sumIntensity_1),
                                                                           sumIntensity_2 = sum(sumIntensity_2),
                                                                           sumIntensity_3 = sum(sumIntensity_3))

mzTab_pep_sum <- mzTab_pep_sum[which((rowSums(mzTab_pep_sum[,3]) > 0)&(rowSums(mzTab_pep_sum[,4]) > 0)&(rowSums(mzTab_pep_sum[,5]) > 0)),]

names(mzTab_pep_sum)[names(mzTab_pep_sum) =="accession"] <-"Proteins"

ecols <- grep("sumIntensity_", names(mzTab_pep_sum))

pe <- readQFeatures(
  table = mzTab_pep_sum, fnames = 1, ecol = ecols,
  name = "peptideRaw", sep = "\t"
)


colData(pe)$condition <- as.factor(c("1", "4", "10"))

rowData(pe[["peptideRaw"]])$nNonZero <- rowSums(assay(pe[["peptideRaw"]]) > 0)
pe <- zeroIsNA(pe, "peptideRaw") # convert 0 to NA

MSnbase::plotNA(assay(pe[["peptideRaw"]])) +
  xlab("Peptide index (ordered by data completeness)")

pe <- logTransform(pe, base = 2, i = "peptideRaw", name = "peptideLog")
limma::plotDensities(assay(pe[["peptideLog"]]))

Protein_filter <- rowData(pe[["peptideLog"]])$Proteins %in% smallestUniqueGroups(rowData(pe[["peptideLog"]])$Proteins)
pe <- pe[Protein_filter,,]


pe <- filterFeatures(pe, ~ nNonZero >= 2)


pe <- normalize(pe,
                i = "peptideLog",
                name = "peptideNorm",
                method = "center.median"
)


limma::plotDensities(assay(pe[["peptideNorm"]]))
boxplot(assay(pe[["peptideNorm"]]),
        col = palette()[-1],
        main = "Peptide distribtutions after normalisation", ylab = "intensity"
)

limma::plotMDS(assay(pe[["peptideNorm"]]), col = as.numeric(colData(pe)$condition))


# Summarization to protein level
pe <- QFeatures::aggregateFeatures(pe, i = "peptideNorm", fcol = "Proteins", na.rm = TRUE, name = "protein")


plotMDS(assay(pe[["protein"]]), col = as.numeric(colData(pe)$condition))

pe <- msqrob2::msqrob(object = pe, i = "protein", formula = ~condition)

getCoef(rowData(pe[["protein"]])$msqrobModels[[1]])

L4 <- makeContrast("condition4=0", parameterNames = c("condition4"))
pe4 <- hypothesisTest(object = pe, i = "protein", contrast = L4)


volcano <- ggplot(
  rowData(pe4[["protein"]])$condition4,
  aes(x = logFC, y = -log10(pval), color = adjPval < 0.05)
) +
  geom_point(cex = 2.5) +
  scale_color_manual(values = alpha(c("black", "red"), 0.5)) +
  theme_minimal() +
  ggtitle("Default workflow")

volcano

write.csv(rowData(pe4[["protein"]])$condition4, file = "PXD007145-4FC-quantms-dep-CM.csv")


L10 <- makeContrast("condition10=0", parameterNames = c("condition10"))
pe10 <- hypothesisTest(object = pe, i = "protein", contrast = L10)

volcano <- ggplot(
  rowData(pe10[["protein"]])$condition10,
  aes(x = logFC, y = -log10(pval), color = adjPval < 0.05)
) +
  geom_point(cex = 2.5) +
  scale_color_manual(values = alpha(c("black", "red"), 0.5)) +
  theme_minimal() +
  ggtitle("Default workflow")

volcano

write.csv(rowData(pe10[["protein"]])$condition10, file = "PXD007145-10FC-quantms-dep-CM.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant