diff --git a/NAMESPACE b/NAMESPACE index f24c3ae..de82419 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(get_rapidpro_key) export(get_rapidpro_site) export(get_rapidpro_uuid_names) export(get_user_data) +export(link_data_frames) export(set_rapidpro_key) export(set_rapidpro_site) export(set_rapidpro_uuid_names) diff --git a/R/get_flow_data.R b/R/get_flow_data.R index c4c5b3c..c389933 100644 --- a/R/get_flow_data.R +++ b/R/get_flow_data.R @@ -25,9 +25,6 @@ #' #' @return List separated by each flow_name provided. Each element in the list contains a data frame for each flow_name provided. #' @export -# TODO: `flow_type` is relevant to ParentText. We should have a general wrapper for this function in the rapidpror package -# That wrapper function calls this parenttext specific function from a separate package, and has flow_type = "none". - get_flow_data <- function (uuid_data = get_rapidpro_uuid_names(), flow_name = NULL, rapidpro_site = get_rapidpro_site(), token = get_rapidpro_key(), by = c("flow_name", "id_name"), id_names = NULL, flatten = FALSE, checks = FALSE, return_all = FALSE, flow_type = "none", flow_handle_type = NULL, @@ -151,7 +148,6 @@ get_flow_data <- function (uuid_data = get_rapidpro_uuid_names(), flow_name = NU call_type <- "runs.json?contact=" get_command <- paste(rapidpro_site, call_type, id_names[i], sep = "") result_flow <- rapidpror:::httr_get_call(get_command = get_command, token = token) - if (return_all) { flow_data[[i]] <- result_flow } else { @@ -163,11 +159,10 @@ get_flow_data <- function (uuid_data = get_rapidpro_uuid_names(), flow_name = NU flatten = flatten, flow_type = flow_type, flow_handle_type = flow_handle_type, flow_handle_type_sub = flow_handle_type_sub, date_from = date_from, date_to = date_to, format_date = format_date, - tzone_date = tzone_date, created_on = created_on) %>% - dplyr::mutate(flow_type = uuid_flow[1, 1]) + tzone_date = tzone_date, created_on = created_on) } } } - return(plyr::ldply(flow_data)) + return(dplyr::bind_rows(flow_data)) } } \ No newline at end of file diff --git a/R/link_data_frames.R b/R/link_data_frames.R new file mode 100644 index 0000000..f0eae18 --- /dev/null +++ b/R/link_data_frames.R @@ -0,0 +1,28 @@ +#' Link Data Frames within R-Instat +#' +#' This function links two data frames within the R-Instat environment using specified linking pairs. +#' It is specifically designed for use within R-Instat and will stop with an error message if the data book object is not found, +#' indicating that the data frames are not being managed through R-Instat. +#' +#' @param from_data_frame The name of the source data frame to link from, defaults to "flow_data". +#' @param to_data_frame The name of the target data frame to link to, defaults to "user_data". +#' @param link_pairs A named vector that specifies the column names used to establish the link, +#' with the name being the column in the `from_data_frame` and the value being the corresponding column in the `to_data_frame`. +#' Defaults to linking "uuid" in both data frames. +#' +#' @return None; the function modifies the `data_book` environment by adding a new link. +#' @examples +#' # Assuming 'data_book' and required data frames are already loaded in R-Instat: +#' # link_data_frames("survey_data", "participant_data", c(participant_id="id")) +#' +#' @export +link_data_frames <- function(from_data_frame="flow_data", to_data_frame="user_data", link_pairs=c(uuid="uuid")){ + if (!exists("data_book")){ + stop("No data book found. Likely issue: Are you importing data through R-Instat? This function is for use in R-Instat.") + } + data_book$add_link(from_data_frame = from_data_frame, + to_data_frame = to_data_frame, + link_pairs = link_pairs, + type = "keyed_link", + link_name = "link") +} diff --git a/man/link_data_frames.Rd b/man/link_data_frames.Rd new file mode 100644 index 0000000..1a4e145 --- /dev/null +++ b/man/link_data_frames.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/link_data_frames.R +\name{link_data_frames} +\alias{link_data_frames} +\title{Link Data Frames within R-Instat} +\usage{ +link_data_frames( + from_data_frame = "flow_data", + to_data_frame = "user_data", + link_pairs = c(uuid = "uuid") +) +} +\arguments{ +\item{from_data_frame}{The name of the source data frame to link from, defaults to "flow_data".} + +\item{to_data_frame}{The name of the target data frame to link to, defaults to "user_data".} + +\item{link_pairs}{A named vector that specifies the column names used to establish the link, +with the name being the column in the \code{from_data_frame} and the value being the corresponding column in the \code{to_data_frame}. +Defaults to linking "uuid" in both data frames.} +} +\value{ +None; the function modifies the \code{data_book} environment by adding a new link. +} +\description{ +This function links two data frames within the R-Instat environment using specified linking pairs. +It is specifically designed for use within R-Instat and will stop with an error message if the data book object is not found, +indicating that the data frames are not being managed through R-Instat. +} +\examples{ +# Assuming 'data_book' and required data frames are already loaded in R-Instat: +# link_data_frames("survey_data", "participant_data", c(participant_id="id")) + +} diff --git a/paper.Rmd b/paper.Rmd new file mode 100644 index 0000000..96a8408 --- /dev/null +++ b/paper.Rmd @@ -0,0 +1,70 @@ +--- +title: 'rapidpror: An R package for importing data from RapidPro into R' +tags: + - R + - rapidpro + - mobile communication + - chatbots + - importing data + - data analysis +authors: + - name: Lily Clements + orcid: 0000-0000-0000-0000 + equal-contrib: true + affiliation: "1" + - name: David Stern + orcid: 0000-0000-0000-0000 + equal-contrib: true + affiliation: "1" + - name: Chiara Facciola? +affiliations: + - name: IDEMS International + index: 1 +date: 29 April 2024 +bibliography: paper.bib + +--- + +# Summary + +This article introduces `rapidpror`, an R package developed to integrate with RapidPro, a platform developed by UNICEF for creating mobile-based messaging systems like chatbots [@rapidpro]. RapidPro, particularly beneficial in low-connectivity or crisis-affected areas, supports various activities including social and behavioural change communication, and data collection across humanitarian and developmental sectors [@about_rapidpro]. + +The `rapidpror` package simplifies the process of importing chatbot data from RapidPro into the R environment. It incorporates filtering methods and automatic data frame linking to help enhance the data analysis process. This functionality serves to broaden the accessibility of data analysis, catering to users at various levels of R proficiency to help promote efficient research. + +# Statement of need + +RapidPro, developed by UNICEF, is a free, open-source platform designed to create and manage mobile-based messaging systems, such as chatbots [@rapidpro]. This tool is instrumental in facilitating social and behavioral change communication (SBCC) and data collection across various humanitarian and developmental contexts [@unicef2020global]. By enabling operations in regions with limited internet connectivity, RapidPro is particularly valuable in underserved or crisis-affected areas providing real-time information on activities such as health, education, and child-protection [@about_rapidpro]. + +In this article, we present `rapidpror`, an R package that allows for chatbot data to be efficiently and easily imported from RapidPro into R. This is achieved by incorporating advanced filtering mechanisms and automatic linking of different data frames obtained from RapidPro. `rapidpror` caters to users with varying levels of experience in R. + +By simplifying the data import process through `rapidpror`, users can engage more deeply with the insights provided by chatbot data. This can empower researchers to make informed decisions and interventions. This capability is particularly relevant for researchers where timely and efficient data analysis can drive impactful outcomes. + +`rapidpror` is positioned within the broader context of computational tools for data-driven research, offering a direct, simplified channel between the chatbot data received by RapidPro, and statistical analysis tools in R. + +# Usage + +The use of `rapidpror` begins by establishing a connection to RapidPro using the user's API credentials. This involves setting the API key and the RapidPro site URL with the following functions: + +``` +rapidpror::set_rapidpro_key(key = "YOUR_API_TOKEN_HERE") +rapidpror::set_rapidpro_site(site = "YOUR_RAPIDPRO_SITE_URL_HERE") +``` + +This stores the key and site, ensuring that subsequent calls to RapidPro use these credentials. RapidPro processes data at various levels. The primary focus is on user data and flow data: + +- User Data: This includes information related to the users of the chatbot system. To retrieve this data, use `get_user_data()`. +- Flow Data: This refers to the data generated by interactions within the chatbot flows. To access this data, use `get_flow_data()`. You can specify whether to call all flows for a specific set of users, or whether to call specific flows. +The package also allows access to RapidPro’s full suite of data through a more general function: `get_data_from_rapidpro_api()`. In this function, you can specify the call type to make. For example, `get_user_data()` is effectively running + +`get_data_from_rapidpro_api(call_type = “contacts.json”)` + +# Data Integration +With integration support for the open-source software R-Instat [@rinstat], rapidpror enables automatic linking of different data frames obtained from RapidPro. This feature supports comprehensive analysis across various levels of data, facilitating deeper insights. + +# Acknowledgements +Mention (if applicable) a representative set of past or ongoing research projects using the software and recent scholarly publications enabled by it. + +We acknowledge contributions from Brigitta Sipocz, Syrtis Major, and Semyeong +Oh, and support from Kathryn Johnston during the genesis of this project. + +# References \ No newline at end of file diff --git a/paper.md b/paper.md index 4aef88f..934d411 100644 --- a/paper.md +++ b/paper.md @@ -12,11 +12,14 @@ authors: orcid: 0000-0000-0000-0000 equal-contrib: true affiliation: "1" + - name: Chiara Facciola (check spelling/accents) + orcid: 0000-0000-0000-0000 + equal-contrib: true + affiliation: "1" - name: David Stern orcid: 0000-0000-0000-0000 equal-contrib: true affiliation: "1" - - name: Chiara Facciola? affiliations: - name: IDEMS International index: 1 @@ -61,20 +64,6 @@ The package also allows access to RapidPro’s full suite of data through a more # Data Integration With integration support for the open-source software R-Instat [@rinstat], rapidpror enables automatic linking of different data frames obtained from RapidPro. This feature supports comprehensive analysis across various levels of data, facilitating deeper insights. -# Citations - -Citations to entries in paper.bib should be in -[rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) -format. - -If you want to cite a software repository URL (e.g. something on GitHub without a preferred -citation) then you can do it with the example BibTeX entry below for @fidgit. - -For a quick reference, the following citation commands can be used: -- `@author:2001` -> "Author et al. (2001)" -- `[@author:2001]` -> "(Author et al., 2001)" -- `[@author1:2001; @author2:2001]` -> "(Author1 et al., 2001; Author2 et al., 2002)" - # Acknowledgements Mention (if applicable) a representative set of past or ongoing research projects using the software and recent scholarly publications enabled by it.