From 87ec17d7dbdc3fcdd733d8fee5c4153f25784d9c Mon Sep 17 00:00:00 2001 From: cpauvert Date: Wed, 11 Sep 2024 16:50:02 +0200 Subject: [PATCH] add maldipickr vignette for improved quickstart this fixes #48 --- DESCRIPTION | 1 + README.Rmd | 61 ------------------- dev/config_fusen.yaml | 13 +++++ dev/maldipickr.Rmd | 110 ++++++++++++++++++++++++++++++++++ vignettes/maldipickr.Rmd | 123 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 61 deletions(-) create mode 100644 dev/maldipickr.Rmd create mode 100644 vignettes/maldipickr.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 6cf89f1..5ec11b6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,6 +41,7 @@ Imports: tools, utils Suggests: + coop, knitr, rmarkdown, spelling, diff --git a/README.Rmd b/README.Rmd index d436aa3..2a35538 100644 --- a/README.Rmd +++ b/README.Rmd @@ -36,67 +36,6 @@ knitr::opts_chunk$set( Illustration (click for a bigger version) of the data flow when using `{maldipickr}` to cherry-pick bacterial isolates with MALDI Biotyper. It depicts the two possible approaches using either taxonomic identification reports (left) or spectra data (right). -## Quickstart - -How to **cherry-pick bacterial isolates** with MALDI Biotyper: - -* [using taxonomic identification report](#using-taxonomic-identification-report) -* [using spectra data](#using-spectra-data) - - -### Using taxonomic identification report - -```{r quickstart_report, eval=TRUE} -library(maldipickr) -# Import Biotyper CSV report -# and glimpse at the table -report_tbl <- read_biotyper_report( - system.file("biotyper_unknown.csv", package = "maldipickr") -) -report_tbl %>% - dplyr::select(name, bruker_species, bruker_log) - - -# Delineate clusters from the identifications after filtering the reliable ones -# and cherry-pick one representative spectra. -# The chosen ones are indicated by `to_pick` column -report_tbl <- report_tbl %>% - dplyr::mutate( - bruker_species = dplyr::if_else(bruker_log >= 2, bruker_species, - "not reliable identification") - ) -report_tbl %>% - delineate_with_identification() %>% - pick_spectra(report_tbl, criteria_column = "bruker_log") %>% - dplyr::relocate(name, to_pick, bruker_species) -``` - -### Using spectra data - -```{r quickstart_spectra} -library(maldipickr) -# Set up the directory location of your spectra data -spectra_dir <- system.file("toy-species-spectra", package = "maldipickr") - -# Import and process the spectra -processed <- spectra_dir %>% - import_biotyper_spectra() %>% - process_spectra() - -# Delineate spectra clusters using Cosine similarity -# and cherry-pick one representative spectra. -# The chosen ones are indicated by `to_pick` column -processed %>% - list() %>% - merge_processed_spectra() %>% - coop::tcosine() %>% - delineate_with_similarity(threshold = 0.92) %>% - set_reference_spectra(processed$metadata) %>% - pick_spectra() %>% - dplyr::relocate(name, to_pick) -``` - - ## Installation diff --git a/dev/config_fusen.yaml b/dev/config_fusen.yaml index 53d3421..50664f9 100644 --- a/dev/config_fusen.yaml +++ b/dev/config_fusen.yaml @@ -79,3 +79,16 @@ keep: - tests/testthat/test-remove_spectra_logical.R - tests/testthat/test-remove_spectra.R vignettes: [] +maldipickr.Rmd: + path: dev/maldipickr.Rmd + state: active + R: [] + tests: [] + vignettes: vignettes/maldipickr.Rmd + inflate: + flat_file: dev/maldipickr.Rmd + vignette_name: maldipickr + open_vignette: true + check: false + document: true + overwrite: ask diff --git a/dev/maldipickr.Rmd b/dev/maldipickr.Rmd new file mode 100644 index 0000000..32573dd --- /dev/null +++ b/dev/maldipickr.Rmd @@ -0,0 +1,110 @@ +--- +title: "maldipickr" +output: html_document +editor_options: + chunk_output_type: console +--- + +```{r development-load} +# Load already included functions if relevant +pkgload::load_all(export_all = FALSE) +``` + +## Quickstart + +The `{maldipickr}` package helps microbiologists reduce duplicate/clonal bacteria from their cultures and eventually exclude previously selected bacteria. `{maldipickr}` achieve this feat by grouping together data from MALDI Biotyper and helps choose representative bacteria from each group using user-relevant metadata -- a process known as **cherry-picking**. + +`{maldipickr}` cherry-picks bacterial isolates with MALDI Biotyper: + +* [using taxonomic identification report](#using-taxonomic-identification-report) +* [using spectra data](#using-spectra-data) + + +### Using taxonomic identification report + +First make sure `{maldipickr}` is installed and loaded, alternatively [follow the instructions to install the package](https://clavellab.github.io/maldipickr/index.html#installation). + +Cherry-picking four isolates based on their taxonomic identification by the MALDI Biotyper is done in a few steps with `{maldipickr}`. + +#### Get example data + +We import an example Biotyper CSV report and glimpse at the table. + +```{r quickstart_report_data, eval=TRUE} +report_tbl <- read_biotyper_report( + system.file("biotyper_unknown.csv", package = "maldipickr") +) +report_tbl %>% + dplyr::select(name, bruker_species, bruker_log) %>% knitr::kable() +``` + +#### Delineate clusters and cherry-pick + +Delineate clusters from the identifications after filtering the reliable ones and cherry-pick one representative spectra. + +Unreliable identifications based on the log-score are replaced by "not reliable identification", but stay tuned as they do not represent the same isolates! + +```{r quickstart_report_filter, eval=TRUE} +report_tbl <- report_tbl %>% + dplyr::mutate( + bruker_species = dplyr::if_else(bruker_log >= 2, bruker_species, + "not reliable identification") + ) +knitr::kable(report_tbl) +``` + +The chosen ones are indicated by `to_pick` column. + +```{r quickstart_report_delineate, eval=TRUE} +report_tbl %>% + delineate_with_identification() %>% + pick_spectra(report_tbl, criteria_column = "bruker_log") %>% + dplyr::relocate(name, to_pick, bruker_species) %>% + knitr::kable() +``` + +### Using spectra data + +In parallel to taxonomic identification reports, `{maldipickr}` process spectra data. +Make sure `{maldipickr}` is installed and loaded, alternatively [follow the instructions to install the package](https://clavellab.github.io/maldipickr/index.html#installation). + +Cherry-picking six isolates from three species based on their spectra data obtained from the MALDI Biotyper is done in a few steps with `{maldipickr}`. + +#### Get example data + +We set up the directory location of our example spectra data, but adjust for your requirements. We import and process the spectra which gives us a named list of three objects: spectra, peaks and metadata (more details in Value section of `process_spectra()`). + + +```{r quickstart_spectra_data, eval=TRUE} +spectra_dir <- system.file("toy-species-spectra", package = "maldipickr") + +processed <- spectra_dir %>% + import_biotyper_spectra() %>% + process_spectra() +``` + +#### Delineate clusters and cherry-pick + +Delineate spectra clusters using Cosine similarity and cherry-pick one representative spectra. +The chosen ones are indicated by `to_pick` column. + +```{r quickstart_spectra_delineate, eval=TRUE} +processed %>% + list() %>% + merge_processed_spectra() %>% + coop::tcosine() %>% + delineate_with_similarity(threshold = 0.92) %>% + set_reference_spectra(processed$metadata) %>% + pick_spectra() %>% + dplyr::relocate(name, to_pick) %>% + knitr::kable() +``` + +This provides only a brief overview of the features of `{maldipickr}`, browse the others vignettes to learn more about additional features. + +```{r development-inflate, eval=FALSE} +# Run but keep eval=FALSE to avoid infinite loop +# Execute in the console directly +fusen::inflate(flat_file = "dev/maldipickr.Rmd", vignette_name = "maldipickr") +``` + diff --git a/vignettes/maldipickr.Rmd b/vignettes/maldipickr.Rmd new file mode 100644 index 0000000..69f7aa7 --- /dev/null +++ b/vignettes/maldipickr.Rmd @@ -0,0 +1,123 @@ +--- +title: "maldipickr" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{maldipickr} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(maldipickr) +``` + + + +## Quickstart + +The `{maldipickr}` package helps microbiologists reduce duplicate/clonal bacteria from their cultures and eventually exclude previously selected bacteria. `{maldipickr}` achieve this feat by grouping together data from MALDI Biotyper and helps choose representative bacteria from each group using user-relevant metadata -- a process known as **cherry-picking**. + +`{maldipickr}` cherry-picks bacterial isolates with MALDI Biotyper: + +* [using taxonomic identification report](#using-taxonomic-identification-report) +* [using spectra data](#using-spectra-data) + + + +### Using taxonomic identification report + +First make sure `{maldipickr}` is installed and loaded, alternatively [follow the instructions to install the package](https://clavellab.github.io/maldipickr/index.html#installation). + +Cherry-picking four isolates based on their taxonomic identification by the MALDI Biotyper is done in a few steps with `{maldipickr}`. + + +#### Get example data + +We import an example Biotyper CSV report and glimpse at the table. + + +```{r quickstart_report_data, eval = TRUE} +report_tbl <- read_biotyper_report( + system.file("biotyper_unknown.csv", package = "maldipickr") +) +report_tbl %>% + dplyr::select(name, bruker_species, bruker_log) %>% knitr::kable() +``` + +#### Delineate clusters and cherry-pick + +Delineate clusters from the identifications after filtering the reliable ones and cherry-pick one representative spectra. + +Unreliable identifications based on the log-score are replaced by "not reliable identification", but stay tuned as they do not represent the same isolates! + + +```{r quickstart_report_filter, eval = TRUE} +report_tbl <- report_tbl %>% + dplyr::mutate( + bruker_species = dplyr::if_else(bruker_log >= 2, bruker_species, + "not reliable identification") + ) +knitr::kable(report_tbl) +``` + +The chosen ones are indicated by `to_pick` column. + + +```{r quickstart_report_delineate, eval = TRUE} +report_tbl %>% + delineate_with_identification() %>% + pick_spectra(report_tbl, criteria_column = "bruker_log") %>% + dplyr::relocate(name, to_pick, bruker_species) %>% + knitr::kable() +``` + +### Using spectra data + +In parallel to taxonomic identification reports, `{maldipickr}` process spectra data. +Make sure `{maldipickr}` is installed and loaded, alternatively [follow the instructions to install the package](https://clavellab.github.io/maldipickr/index.html#installation). + +Cherry-picking six isolates from three species based on their spectra data obtained from the MALDI Biotyper is done in a few steps with `{maldipickr}`. + + +#### Get example data + +We set up the directory location of our example spectra data, but adjust for your requirements. We import and process the spectra which gives us a named list of three objects: spectra, peaks and metadata (more details in Value section of `process_spectra()`). + + + +```{r quickstart_spectra_data, eval = TRUE} +spectra_dir <- system.file("toy-species-spectra", package = "maldipickr") + +processed <- spectra_dir %>% + import_biotyper_spectra() %>% + process_spectra() +``` + +#### Delineate clusters and cherry-pick + +Delineate spectra clusters using Cosine similarity and cherry-pick one representative spectra. +The chosen ones are indicated by `to_pick` column. + + +```{r quickstart_spectra_delineate, eval = TRUE} +processed %>% + list() %>% + merge_processed_spectra() %>% + coop::tcosine() %>% + delineate_with_similarity(threshold = 0.92) %>% + set_reference_spectra(processed$metadata) %>% + pick_spectra() %>% + dplyr::relocate(name, to_pick) %>% + knitr::kable() +``` + +This provides only a brief overview of the features of `{maldipickr}`, browse the others vignettes to learn more about additional features. + +