diff --git a/.directory b/.directory deleted file mode 100644 index ca2a222..0000000 --- a/.directory +++ /dev/null @@ -1,6 +0,0 @@ -[Dolphin] -Timestamp=2018,8,24,14,58,52 -Version=4 - -[Settings] -HiddenFilesShown=true diff --git a/.gitignore b/.gitignore index 0207861..3a59a03 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ Update package.R imgur.key .Renviron README_files +.directory diff --git a/R/rwhatsapp.R b/R/rwhatsapp.R index 14c5634..2eb2080 100755 --- a/R/rwhatsapp.R +++ b/R/rwhatsapp.R @@ -31,7 +31,8 @@ rwa_read <- function(x, ...) { if (verbose) { - start_time <- status("Reading chat history from", appendLF = FALSE, ppfix = "") + start_time <- status("Reading chat history from", + appendLF = FALSE, ppfix = "") } else { start_time <- NULL } @@ -39,21 +40,21 @@ rwa_read <- function(x, chat_raw <- rwa_read_lines(x, verbose, start_time, ...) chat_raw <- chat_raw[!chat_raw == ""] - time <- stringi::stri_extract_first_regex( + time <- stri_extract_first_regex( str = chat_raw, pattern = "^\\d+-\\d+-\\d+.*-|[^-]+ - " ) if (sum(is.na(time)) > (length(time) / 2)) { - time <- stringi::stri_extract_first_regex(str = chat_raw, - pattern = "[^]]+] ") + time <- stri_extract_first_regex(str = chat_raw, + pattern = "[^]]+] ") } if (sum(is.na(time)) == length(time)) { - time <- stringi::stri_extract_first_regex(str = chat_raw, - pattern = "^.*\\d+:\\d+") + time <- stri_extract_first_regex(str = chat_raw, + pattern = "^.*\\d+:\\d+") } for (l in which(is.na(time))) { - chat_raw[l - 1] <- stringi::stri_paste(chat_raw[l - 1], chat_raw[l], - sep = "\n") + chat_raw[l - 1] <- stri_paste(chat_raw[l - 1], chat_raw[l], + sep = "\n") } chat_raw <- chat_raw[!is.na(time)] @@ -61,11 +62,11 @@ rwa_read <- function(x, if (verbose) status("timestamps extracted") source <- names(chat_raw) - chat_raw <- stringi::stri_replace_first_fixed(str = chat_raw, - pattern = time, - replacement = "") + chat_raw <- stri_replace_first_fixed(str = chat_raw, + pattern = time, + replacement = "") - time <- stringi::stri_replace_all_regex( + time <- stri_replace_all_regex( str = time, pattern = c("\\[", "\\]", "-$", "- $"), replacement = c("", "", "", ""), @@ -81,22 +82,22 @@ rwa_read <- function(x, " or add an issue at www.github.com/JBGruber/rwhatsapp.") } - author <- stringi::stri_extract_first_regex(str = chat_raw, - pattern = "[^:]+: ") - chat_raw[!is.na(author)] <- stringi::stri_replace_first_fixed( + author <- stri_extract_first_regex(str = chat_raw, + pattern = "[^:]+: ") + chat_raw[!is.na(author)] <- stri_replace_first_fixed( str = chat_raw[!is.na(author)], pattern = author[!is.na(author)], replacement = "" ) - author <- stringi::stri_replace_last_fixed(str = author, - pattern = ": ", - replacement = "") + author <- stri_replace_last_fixed(str = author, + pattern = ": ", + replacement = "") if (verbose) status("author extracted") tbl <- tibble::tibble( time = time, - author = as.factor(stringi::stri_trim_both(author)), + author = as.factor(stri_trim_both(author)), text = chat_raw, source = source ) @@ -118,7 +119,10 @@ rwa_read <- function(x, #' Read in files from supported formats #' +#' @param start_time For verbose messages. #' @inherit rwa_read +#' @import stringi +#' @noRd rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { # get files zps <- grep(".zip$", x, ignore.case = TRUE) @@ -126,18 +130,18 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { src <- NULL if (length(zps) > 0) { src <- x[zps] - x[zps] <- vapply(x[zps], function(x) { + x[zps] <- vapply(x[zps], FUN.VALUE = character(1), FUN = function(x) { content <- unzip(x, list = TRUE) content <- content[grepl(".txt$", content$Name, ignore.case = TRUE), ] temp <- paste0(tempdir(), "/whatsapp") - unzip(x, files = content$Name, overwrite = TRUE, exdir = temp) + unzip(x, files = content$Name, overwrite = TRUE, exdir = temp) return(list.files(temp, pattern = content$Name, full.names = TRUE)) - }, FUN.VALUE = character(1)) + }) } if (f_exist_s(x)) { if (length(x) == 1) { - chat_raw <- stringi::stri_read_lines(x, ...) + chat_raw <- stri_read_lines(x, ...) names(chat_raw) <- rep(x, length(chat_raw)) if (verbose) { message(" one log file...") @@ -145,7 +149,7 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { } } else { chat_raw <- unlist(lapply(x, function(t) { - cr <- stringi::stri_read_lines(t)#, ...) + cr <- stri_read_lines(t)#, ...) names(cr) <- rep(t, length(cr)) return(cr) })) @@ -162,11 +166,11 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { status("object loaded ") } } else { - stop("Provide either a path to one or multiple txt or zip files of a WhatsApp ", - "history or the history itself as character object.") + stop("Provide either a path to one or multiple txt or zip files of a ", + "WhatsApp history or the history itself as character object.") } if (length(zps) > 0) { - names(chat_raw) <- stringi::stri_replace_last_fixed(names(chat_raw), x[zps], src) + names(chat_raw) <- stri_replace_last_fixed(names(chat_raw), x[zps], src) unlink(temp, recursive = TRUE) } return(chat_raw) @@ -175,7 +179,10 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { #' Parse time #' +#' @param time A character object with times to parse. #' @inherit rwa_read +#' @import stringi +#' @noRd rwa_parse_time <- function(time, format, tz) { if (is.null(format)) { formats <- c( @@ -188,38 +195,38 @@ rwa_parse_time <- function(time, format, tz) { "MM.dd.yyyy, HH:mm:ss", "MM.dd.yyyy, HH:mm" ) - if (any(stringi::stri_detect_fixed(time, "."))) { - if (sum(stringi::stri_detect_regex(time, "\\d+.\\d+.\\d{2}")) > + if (any(stri_detect_fixed(time, "."))) { + if (sum(stri_detect_regex(time, "\\d+.\\d+.\\d{2}")) > (length(time) * 0.9)) { - formats <- stringi::stri_replace_all_fixed( + formats <- stri_replace_all_fixed( formats, "yyyy", "yy" ) } - } else if (any(stringi::stri_detect_fixed(time, "/"))) { - formats <- stringi::stri_replace_all_fixed( + } else if (any(stri_detect_fixed(time, "/"))) { + formats <- stri_replace_all_fixed( formats, ".", "/" ) - if (sum(stringi::stri_detect_regex(time, "\\d+/\\d+/\\d{2}")) > + if (sum(stri_detect_regex(time, "\\d+/\\d+/\\d{2}")) > (length(time) * 0.9)) { - formats <- stringi::stri_replace_all_fixed( + formats <- stri_replace_all_fixed( formats, "yyyy", "yy" ) } - } else if (any(stringi::stri_detect_fixed(time, "-"))) { - formats <- stringi::stri_replace_all_fixed( + } else if (any(stri_detect_fixed(time, "-"))) { + formats <- stri_replace_all_fixed( formats, ".", "-" ) - if (sum(stringi::stri_detect_regex(time, "\\d+-\\d+-\\d{2}")) > + if (sum(stri_detect_regex(time, "\\d+-\\d+-\\d{2}")) > (length(time) * 0.9)) { - formats <- stringi::stri_replace_all_fixed( + formats <- stri_replace_all_fixed( formats, "yyyy", "yy" @@ -231,18 +238,18 @@ rwa_parse_time <- function(time, format, tz) { ) } test <- sapply(formats, function(f) { - test <- stringi::stri_datetime_parse(str = head(time, n = 1000), - format = f, - lenient = FALSE, - tz = tz) + test <- stri_datetime_parse(str = head(time, n = 1000), + format = f, + lenient = FALSE, + tz = tz) sum(is.na(test)) }) format <- names(which.min(test)) } - time <- stringi::stri_datetime_parse(str = time, - format = format, - tz = tz) + time <- stri_datetime_parse(str = time, + format = format, + tz = tz) return(time) } @@ -255,7 +262,7 @@ rwa_parse_time <- function(time, format, tz) { #' @importFrom rlang .data rwa_add_emoji <- function(x) { x$id <- seq_along(x$text) - x$text <- stringi::stri_replace_all_regex( + x$text <- stri_replace_all_regex( x$text, "[[:alnum:]]", "x" @@ -286,12 +293,17 @@ rwa_add_emoji <- function(x) { # creates status message and exports start_time if not in parent environment yet -status <- function(..., sep = "", appendLF = TRUE, ppfix = "...", indent = "\t") { +status <- function(..., + sep = "", + appendLF = TRUE, + ppfix = "...", + indent = "\t") { if (exists("start_time", envir = parent.frame())) { start_time <- mget("start_time", envir = parent.frame())[[1]] diff <- format((Sys.time() - start_time), digits = 2, nsmall = 2) - message(paste(indent, ppfix, ..., " [", diff, "]", sep = sep), appendLF = appendLF) + message(paste(indent, ppfix, ..., " [", diff, "]", sep = sep), + appendLF = appendLF) } else { export <- Sys.time() start_time <- export diff --git a/README.Rmd b/README.Rmd index c6018d6..975e9ca 100644 --- a/README.Rmd +++ b/README.Rmd @@ -26,7 +26,7 @@ WhatsApp seems to become increasingly important not just as a messaging service Furthermore, retrieving chat logs from the Android or iOS app is very straightforward: Simply choose `More` in the menu of a chat, then `Export chat` and export the history to a txt file. - + This package is intended make the first step of analysing WhatsApp text data as easy as possible---reading your chat history into `R`. This should work, no matter which device or locale you used to retrieve the `txt` or `zip` file containing your conversations. diff --git a/README.md b/README.md index 9150367..18ba9f3 100755 --- a/README.md +++ b/README.md @@ -25,9 +25,7 @@ capabilities. Furthermore, retrieving chat logs from the Android or iOS app is very straightforward: Simply choose `More` in the menu of a chat, then `Export chat` and export the history to a txt file. - - - + This package is intended make the first step of analysing WhatsApp text data as easy as possible—reading your chat history into `R`. This should @@ -76,7 +74,11 @@ chat ## 6 2017-07-13 09:16:48 Johanne… Haha it sure… /home/johann… ## 7 2018-09-28 13:27:48 Johanne… Did you know… /home/johann… ## 8 2018-09-28 13:28:48 Johanne… 😀😃😄😁😆😅😂🤣☺😊😇🙂… /home/johann… + ## 9 2018-09-28 13:30:48 Johanne… 🤷 +♀🤷🏻 +♂🙎 +♀🙎 +… /home/johann… Now, this isn’t very interesting so you will probably want to use your own data. For this demonstration, I use one of my own chat logs from a diff --git a/inst/WORDLIST b/inst/WORDLIST index 7fca143..f0fe504 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,6 +1,7 @@ Artur Artur's Artur’s +chr das eig eigentlich @@ -11,6 +12,7 @@ im ja jaa joh +johann Macbook ne oman diff --git a/man/rwa_parse_time.Rd b/man/rwa_parse_time.Rd deleted file mode 100644 index 23c9d86..0000000 --- a/man/rwa_parse_time.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rwhatsapp.R -\name{rwa_parse_time} -\alias{rwa_parse_time} -\title{Parse time} -\usage{ -rwa_parse_time(time, format, tz) -} -\arguments{ -\item{format}{Most formats are automatically detected. If you encounter -problems you can provide a custom format here. Refer to -\link[stringi]{stri_datetime_parse} for guidance.} - -\item{tz}{A time zone for date conversion. Set NULL or "" for the default -time zone or a single string with a timezone identifier, see -\link[stringi]{stri_timezone_list}.} -} -\value{ -a tibble -} -\description{ -The history can be obtained going to the menu in a chat on the WhatsApp app, -choosing "more", then "Export chat". -} -\examples{ -history <- system.file("extdata", "sample.txt", package = "rwhatsapp") -df <- rwa_read(history) -} diff --git a/man/rwa_read_lines.Rd b/man/rwa_read_lines.Rd deleted file mode 100644 index 50340d0..0000000 --- a/man/rwa_read_lines.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rwhatsapp.R -\name{rwa_read_lines} -\alias{rwa_read_lines} -\title{Read in files from supported formats} -\usage{ -rwa_read_lines(x, verbose, ...) -} -\arguments{ -\item{x}{Path to a txt or zip file of a WhatsApp history or the history -itself as character object.} - -\item{verbose}{A logical flag indicating whether information should be -printed to the screen.} - -\item{...}{Further arguments passed to \link[stringi]{stri_read_lines}.} -} -\value{ -a tibble -} -\description{ -The history can be obtained going to the menu in a chat on the WhatsApp app, -choosing "more", then "Export chat". -} -\examples{ -history <- system.file("extdata", "sample.txt", package = "rwhatsapp") -df <- rwa_read(history) -} diff --git a/tests/testthat/test-rwhatsapp.R b/tests/testthat/test-rwhatsapp.R index 63a7c18..80a9f79 100755 --- a/tests/testthat/test-rwhatsapp.R +++ b/tests/testthat/test-rwhatsapp.R @@ -577,7 +577,7 @@ test_that("reading from file", { dir.create(dir) file.copy(system.file("extdata", "sample.txt", package = "rwhatsapp"), dir) - zip(paste0(dir, "test.zip"), paste0(dir, "sample.txt"), flags = "-jr9X") + utils::zip(paste0(dir, "test.zip"), paste0(dir, "sample.txt"), flags = "-jr9X") out <- rwa_read(x = paste0(dir, "test.zip"), tz = "GMT", encoding = "UTF-8", diff --git a/vignettes/Text_Analysis_using_WhatsApp_data.Rmd b/vignettes/Text_Analysis_using_WhatsApp_data.Rmd index 03082dc..db32ac2 100644 --- a/vignettes/Text_Analysis_using_WhatsApp_data.Rmd +++ b/vignettes/Text_Analysis_using_WhatsApp_data.Rmd @@ -25,9 +25,7 @@ capabilities. Furthermore, retrieving chat logs from the Android or iOS app is very straightforward: Simply choose `More` in the menu of a chat, then `Export chat` and export the history to a txt file. - - - + This package is intended make the first step of analysing WhatsApp text data as easy as possible—reading your chat history into `R`. This should