diff --git a/.directory b/.directory
deleted file mode 100644
index ca2a222..0000000
--- a/.directory
+++ /dev/null
@@ -1,6 +0,0 @@
-[Dolphin]
-Timestamp=2018,8,24,14,58,52
-Version=4
-
-[Settings]
-HiddenFilesShown=true
diff --git a/.gitignore b/.gitignore
index 0207861..3a59a03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ Update package.R
imgur.key
.Renviron
README_files
+.directory
diff --git a/R/rwhatsapp.R b/R/rwhatsapp.R
index 14c5634..2eb2080 100755
--- a/R/rwhatsapp.R
+++ b/R/rwhatsapp.R
@@ -31,7 +31,8 @@ rwa_read <- function(x,
...) {
if (verbose) {
- start_time <- status("Reading chat history from", appendLF = FALSE, ppfix = "")
+ start_time <- status("Reading chat history from",
+ appendLF = FALSE, ppfix = "")
} else {
start_time <- NULL
}
@@ -39,21 +40,21 @@ rwa_read <- function(x,
chat_raw <- rwa_read_lines(x, verbose, start_time, ...)
chat_raw <- chat_raw[!chat_raw == ""]
- time <- stringi::stri_extract_first_regex(
+ time <- stri_extract_first_regex(
str = chat_raw,
pattern = "^\\d+-\\d+-\\d+.*-|[^-]+ - "
)
if (sum(is.na(time)) > (length(time) / 2)) {
- time <- stringi::stri_extract_first_regex(str = chat_raw,
- pattern = "[^]]+] ")
+ time <- stri_extract_first_regex(str = chat_raw,
+ pattern = "[^]]+] ")
}
if (sum(is.na(time)) == length(time)) {
- time <- stringi::stri_extract_first_regex(str = chat_raw,
- pattern = "^.*\\d+:\\d+")
+ time <- stri_extract_first_regex(str = chat_raw,
+ pattern = "^.*\\d+:\\d+")
}
for (l in which(is.na(time))) {
- chat_raw[l - 1] <- stringi::stri_paste(chat_raw[l - 1], chat_raw[l],
- sep = "\n")
+ chat_raw[l - 1] <- stri_paste(chat_raw[l - 1], chat_raw[l],
+ sep = "\n")
}
chat_raw <- chat_raw[!is.na(time)]
@@ -61,11 +62,11 @@ rwa_read <- function(x,
if (verbose) status("timestamps extracted")
source <- names(chat_raw)
- chat_raw <- stringi::stri_replace_first_fixed(str = chat_raw,
- pattern = time,
- replacement = "")
+ chat_raw <- stri_replace_first_fixed(str = chat_raw,
+ pattern = time,
+ replacement = "")
- time <- stringi::stri_replace_all_regex(
+ time <- stri_replace_all_regex(
str = time,
pattern = c("\\[", "\\]", "-$", "- $"),
replacement = c("", "", "", ""),
@@ -81,22 +82,22 @@ rwa_read <- function(x,
" or add an issue at www.github.com/JBGruber/rwhatsapp.")
}
- author <- stringi::stri_extract_first_regex(str = chat_raw,
- pattern = "[^:]+: ")
- chat_raw[!is.na(author)] <- stringi::stri_replace_first_fixed(
+ author <- stri_extract_first_regex(str = chat_raw,
+ pattern = "[^:]+: ")
+ chat_raw[!is.na(author)] <- stri_replace_first_fixed(
str = chat_raw[!is.na(author)],
pattern = author[!is.na(author)],
replacement = ""
)
- author <- stringi::stri_replace_last_fixed(str = author,
- pattern = ": ",
- replacement = "")
+ author <- stri_replace_last_fixed(str = author,
+ pattern = ": ",
+ replacement = "")
if (verbose) status("author extracted")
tbl <- tibble::tibble(
time = time,
- author = as.factor(stringi::stri_trim_both(author)),
+ author = as.factor(stri_trim_both(author)),
text = chat_raw,
source = source
)
@@ -118,7 +119,10 @@ rwa_read <- function(x,
#' Read in files from supported formats
#'
+#' @param start_time For verbose messages.
#' @inherit rwa_read
+#' @import stringi
+#' @noRd
rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
# get files
zps <- grep(".zip$", x, ignore.case = TRUE)
@@ -126,18 +130,18 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
src <- NULL
if (length(zps) > 0) {
src <- x[zps]
- x[zps] <- vapply(x[zps], function(x) {
+ x[zps] <- vapply(x[zps], FUN.VALUE = character(1), FUN = function(x) {
content <- unzip(x, list = TRUE)
content <- content[grepl(".txt$", content$Name, ignore.case = TRUE), ]
temp <- paste0(tempdir(), "/whatsapp")
- unzip(x, files = content$Name, overwrite = TRUE, exdir = temp)
+ unzip(x, files = content$Name, overwrite = TRUE, exdir = temp)
return(list.files(temp, pattern = content$Name, full.names = TRUE))
- }, FUN.VALUE = character(1))
+ })
}
if (f_exist_s(x)) {
if (length(x) == 1) {
- chat_raw <- stringi::stri_read_lines(x, ...)
+ chat_raw <- stri_read_lines(x, ...)
names(chat_raw) <- rep(x, length(chat_raw))
if (verbose) {
message(" one log file...")
@@ -145,7 +149,7 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
}
} else {
chat_raw <- unlist(lapply(x, function(t) {
- cr <- stringi::stri_read_lines(t)#, ...)
+ cr <- stri_read_lines(t)#, ...)
names(cr) <- rep(t, length(cr))
return(cr)
}))
@@ -162,11 +166,11 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
status("object loaded ")
}
} else {
- stop("Provide either a path to one or multiple txt or zip files of a WhatsApp ",
- "history or the history itself as character object.")
+ stop("Provide either a path to one or multiple txt or zip files of a ",
+ "WhatsApp history or the history itself as character object.")
}
if (length(zps) > 0) {
- names(chat_raw) <- stringi::stri_replace_last_fixed(names(chat_raw), x[zps], src)
+ names(chat_raw) <- stri_replace_last_fixed(names(chat_raw), x[zps], src)
unlink(temp, recursive = TRUE)
}
return(chat_raw)
@@ -175,7 +179,10 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
#' Parse time
#'
+#' @param time A character object with times to parse.
#' @inherit rwa_read
+#' @import stringi
+#' @noRd
rwa_parse_time <- function(time, format, tz) {
if (is.null(format)) {
formats <- c(
@@ -188,38 +195,38 @@ rwa_parse_time <- function(time, format, tz) {
"MM.dd.yyyy, HH:mm:ss",
"MM.dd.yyyy, HH:mm"
)
- if (any(stringi::stri_detect_fixed(time, "."))) {
- if (sum(stringi::stri_detect_regex(time, "\\d+.\\d+.\\d{2}")) >
+ if (any(stri_detect_fixed(time, "."))) {
+ if (sum(stri_detect_regex(time, "\\d+.\\d+.\\d{2}")) >
(length(time) * 0.9)) {
- formats <- stringi::stri_replace_all_fixed(
+ formats <- stri_replace_all_fixed(
formats,
"yyyy",
"yy"
)
}
- } else if (any(stringi::stri_detect_fixed(time, "/"))) {
- formats <- stringi::stri_replace_all_fixed(
+ } else if (any(stri_detect_fixed(time, "/"))) {
+ formats <- stri_replace_all_fixed(
formats,
".",
"/"
)
- if (sum(stringi::stri_detect_regex(time, "\\d+/\\d+/\\d{2}")) >
+ if (sum(stri_detect_regex(time, "\\d+/\\d+/\\d{2}")) >
(length(time) * 0.9)) {
- formats <- stringi::stri_replace_all_fixed(
+ formats <- stri_replace_all_fixed(
formats,
"yyyy",
"yy"
)
}
- } else if (any(stringi::stri_detect_fixed(time, "-"))) {
- formats <- stringi::stri_replace_all_fixed(
+ } else if (any(stri_detect_fixed(time, "-"))) {
+ formats <- stri_replace_all_fixed(
formats,
".",
"-"
)
- if (sum(stringi::stri_detect_regex(time, "\\d+-\\d+-\\d{2}")) >
+ if (sum(stri_detect_regex(time, "\\d+-\\d+-\\d{2}")) >
(length(time) * 0.9)) {
- formats <- stringi::stri_replace_all_fixed(
+ formats <- stri_replace_all_fixed(
formats,
"yyyy",
"yy"
@@ -231,18 +238,18 @@ rwa_parse_time <- function(time, format, tz) {
)
}
test <- sapply(formats, function(f) {
- test <- stringi::stri_datetime_parse(str = head(time, n = 1000),
- format = f,
- lenient = FALSE,
- tz = tz)
+ test <- stri_datetime_parse(str = head(time, n = 1000),
+ format = f,
+ lenient = FALSE,
+ tz = tz)
sum(is.na(test))
})
format <- names(which.min(test))
}
- time <- stringi::stri_datetime_parse(str = time,
- format = format,
- tz = tz)
+ time <- stri_datetime_parse(str = time,
+ format = format,
+ tz = tz)
return(time)
}
@@ -255,7 +262,7 @@ rwa_parse_time <- function(time, format, tz) {
#' @importFrom rlang .data
rwa_add_emoji <- function(x) {
x$id <- seq_along(x$text)
- x$text <- stringi::stri_replace_all_regex(
+ x$text <- stri_replace_all_regex(
x$text,
"[[:alnum:]]",
"x"
@@ -286,12 +293,17 @@ rwa_add_emoji <- function(x) {
# creates status message and exports start_time if not in parent environment yet
-status <- function(..., sep = "", appendLF = TRUE, ppfix = "...", indent = "\t") {
+status <- function(...,
+ sep = "",
+ appendLF = TRUE,
+ ppfix = "...",
+ indent = "\t") {
if (exists("start_time", envir = parent.frame())) {
start_time <- mget("start_time", envir = parent.frame())[[1]]
diff <- format((Sys.time() - start_time), digits = 2, nsmall = 2)
- message(paste(indent, ppfix, ..., " [", diff, "]", sep = sep), appendLF = appendLF)
+ message(paste(indent, ppfix, ..., " [", diff, "]", sep = sep),
+ appendLF = appendLF)
} else {
export <- Sys.time()
start_time <- export
diff --git a/README.Rmd b/README.Rmd
index c6018d6..975e9ca 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -26,7 +26,7 @@ WhatsApp seems to become increasingly important not just as a messaging service
Furthermore, retrieving chat logs from the Android or iOS app is very straightforward:
Simply choose `More` in the menu of a chat, then `Export chat` and export the history to a txt file.
-
+
This package is intended make the first step of analysing WhatsApp text data as easy as possible---reading your chat history into `R`.
This should work, no matter which device or locale you used to retrieve the `txt` or `zip` file containing your conversations.
diff --git a/README.md b/README.md
index 9150367..18ba9f3 100755
--- a/README.md
+++ b/README.md
@@ -25,9 +25,7 @@ capabilities. Furthermore, retrieving chat logs from the Android or iOS
app is very straightforward: Simply choose `More` in the menu of a chat,
then `Export chat` and export the history to a txt file.
-
-
-
+
This package is intended make the first step of analysing WhatsApp text
data as easy as possible—reading your chat history into `R`. This should
@@ -76,7 +74,11 @@ chat
## 6 2017-07-13 09:16:48 Johanne… Haha it sure… /home/johann…
## 7 2018-09-28 13:27:48 Johanne… Did you know… /home/johann…
## 8 2018-09-28 13:28:48 Johanne… 😀😃😄😁😆😅😂🤣☺😊😇🙂… /home/johann…
+ ## 9 2018-09-28 13:30:48 Johanne… 🤷
+♀🤷🏻
+♂🙎
+♀🙎
+… /home/johann…
Now, this isn’t very interesting so you will probably want to use your
own data. For this demonstration, I use one of my own chat logs from a
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 7fca143..f0fe504 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,6 +1,7 @@
Artur
Artur's
Artur’s
+chr
das
eig
eigentlich
@@ -11,6 +12,7 @@ im
ja
jaa
joh
+johann
Macbook
ne
oman
diff --git a/man/rwa_parse_time.Rd b/man/rwa_parse_time.Rd
deleted file mode 100644
index 23c9d86..0000000
--- a/man/rwa_parse_time.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rwhatsapp.R
-\name{rwa_parse_time}
-\alias{rwa_parse_time}
-\title{Parse time}
-\usage{
-rwa_parse_time(time, format, tz)
-}
-\arguments{
-\item{format}{Most formats are automatically detected. If you encounter
-problems you can provide a custom format here. Refer to
-\link[stringi]{stri_datetime_parse} for guidance.}
-
-\item{tz}{A time zone for date conversion. Set NULL or "" for the default
-time zone or a single string with a timezone identifier, see
-\link[stringi]{stri_timezone_list}.}
-}
-\value{
-a tibble
-}
-\description{
-The history can be obtained going to the menu in a chat on the WhatsApp app,
-choosing "more", then "Export chat".
-}
-\examples{
-history <- system.file("extdata", "sample.txt", package = "rwhatsapp")
-df <- rwa_read(history)
-}
diff --git a/man/rwa_read_lines.Rd b/man/rwa_read_lines.Rd
deleted file mode 100644
index 50340d0..0000000
--- a/man/rwa_read_lines.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rwhatsapp.R
-\name{rwa_read_lines}
-\alias{rwa_read_lines}
-\title{Read in files from supported formats}
-\usage{
-rwa_read_lines(x, verbose, ...)
-}
-\arguments{
-\item{x}{Path to a txt or zip file of a WhatsApp history or the history
-itself as character object.}
-
-\item{verbose}{A logical flag indicating whether information should be
-printed to the screen.}
-
-\item{...}{Further arguments passed to \link[stringi]{stri_read_lines}.}
-}
-\value{
-a tibble
-}
-\description{
-The history can be obtained going to the menu in a chat on the WhatsApp app,
-choosing "more", then "Export chat".
-}
-\examples{
-history <- system.file("extdata", "sample.txt", package = "rwhatsapp")
-df <- rwa_read(history)
-}
diff --git a/tests/testthat/test-rwhatsapp.R b/tests/testthat/test-rwhatsapp.R
index 63a7c18..80a9f79 100755
--- a/tests/testthat/test-rwhatsapp.R
+++ b/tests/testthat/test-rwhatsapp.R
@@ -577,7 +577,7 @@ test_that("reading from file", {
dir.create(dir)
file.copy(system.file("extdata", "sample.txt", package = "rwhatsapp"),
dir)
- zip(paste0(dir, "test.zip"), paste0(dir, "sample.txt"), flags = "-jr9X")
+ utils::zip(paste0(dir, "test.zip"), paste0(dir, "sample.txt"), flags = "-jr9X")
out <- rwa_read(x = paste0(dir, "test.zip"),
tz = "GMT",
encoding = "UTF-8",
diff --git a/vignettes/Text_Analysis_using_WhatsApp_data.Rmd b/vignettes/Text_Analysis_using_WhatsApp_data.Rmd
index 03082dc..db32ac2 100644
--- a/vignettes/Text_Analysis_using_WhatsApp_data.Rmd
+++ b/vignettes/Text_Analysis_using_WhatsApp_data.Rmd
@@ -25,9 +25,7 @@ capabilities. Furthermore, retrieving chat logs from the Android or iOS
app is very straightforward: Simply choose `More` in the menu of a chat,
then `Export chat` and export the history to a txt file.
-
-
-
+
This package is intended make the first step of analysing WhatsApp text
data as easy as possible—reading your chat history into `R`. This should