Skip to content

Commit

Permalink
Fixed some problems in documentation and code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
JBGruber committed Sep 14, 2019
1 parent e91222a commit 311c4eb
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 119 deletions.
6 changes: 0 additions & 6 deletions .directory

This file was deleted.

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Update package.R
imgur.key
.Renviron
README_files
.directory
108 changes: 60 additions & 48 deletions R/rwhatsapp.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,41 +31,42 @@ rwa_read <- function(x,
...) {

if (verbose) {
start_time <- status("Reading chat history from", appendLF = FALSE, ppfix = "")
start_time <- status("Reading chat history from",
appendLF = FALSE, ppfix = "")
} else {
start_time <- NULL
}

chat_raw <- rwa_read_lines(x, verbose, start_time, ...)

chat_raw <- chat_raw[!chat_raw == ""]
time <- stringi::stri_extract_first_regex(
time <- stri_extract_first_regex(
str = chat_raw,
pattern = "^\\d+-\\d+-\\d+.*-|[^-]+ - "
)
if (sum(is.na(time)) > (length(time) / 2)) {
time <- stringi::stri_extract_first_regex(str = chat_raw,
pattern = "[^]]+] ")
time <- stri_extract_first_regex(str = chat_raw,
pattern = "[^]]+] ")
}
if (sum(is.na(time)) == length(time)) {
time <- stringi::stri_extract_first_regex(str = chat_raw,
pattern = "^.*\\d+:\\d+")
time <- stri_extract_first_regex(str = chat_raw,
pattern = "^.*\\d+:\\d+")
}
for (l in which(is.na(time))) {
chat_raw[l - 1] <- stringi::stri_paste(chat_raw[l - 1], chat_raw[l],
sep = "\n")
chat_raw[l - 1] <- stri_paste(chat_raw[l - 1], chat_raw[l],
sep = "\n")
}

chat_raw <- chat_raw[!is.na(time)]
time <- time[!is.na(time)]
if (verbose) status("timestamps extracted")

source <- names(chat_raw)
chat_raw <- stringi::stri_replace_first_fixed(str = chat_raw,
pattern = time,
replacement = "")
chat_raw <- stri_replace_first_fixed(str = chat_raw,
pattern = time,
replacement = "")

time <- stringi::stri_replace_all_regex(
time <- stri_replace_all_regex(
str = time,
pattern = c("\\[", "\\]", "-$", "- $"),
replacement = c("", "", "", ""),
Expand All @@ -81,22 +82,22 @@ rwa_read <- function(x,
" or add an issue at www.github.com/JBGruber/rwhatsapp.")
}

author <- stringi::stri_extract_first_regex(str = chat_raw,
pattern = "[^:]+: ")
chat_raw[!is.na(author)] <- stringi::stri_replace_first_fixed(
author <- stri_extract_first_regex(str = chat_raw,
pattern = "[^:]+: ")
chat_raw[!is.na(author)] <- stri_replace_first_fixed(
str = chat_raw[!is.na(author)],
pattern = author[!is.na(author)],
replacement = ""
)
author <- stringi::stri_replace_last_fixed(str = author,
pattern = ": ",
replacement = "")
author <- stri_replace_last_fixed(str = author,
pattern = ": ",
replacement = "")

if (verbose) status("author extracted")

tbl <- tibble::tibble(
time = time,
author = as.factor(stringi::stri_trim_both(author)),
author = as.factor(stri_trim_both(author)),
text = chat_raw,
source = source
)
Expand All @@ -118,34 +119,37 @@ rwa_read <- function(x,

#' Read in files from supported formats
#'
#' @param start_time For verbose messages.
#' @inherit rwa_read
#' @import stringi
#' @noRd
rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
# get files
zps <- grep(".zip$", x, ignore.case = TRUE)
temp <- NULL
src <- NULL
if (length(zps) > 0) {
src <- x[zps]
x[zps] <- vapply(x[zps], function(x) {
x[zps] <- vapply(x[zps], FUN.VALUE = character(1), FUN = function(x) {
content <- unzip(x, list = TRUE)
content <- content[grepl(".txt$", content$Name, ignore.case = TRUE), ]
temp <- paste0(tempdir(), "/whatsapp")
unzip(x, files = content$Name, overwrite = TRUE, exdir = temp)
unzip(x, files = content$Name, overwrite = TRUE, exdir = temp)
return(list.files(temp, pattern = content$Name, full.names = TRUE))
}, FUN.VALUE = character(1))
})
}

if (f_exist_s(x)) {
if (length(x) == 1) {
chat_raw <- stringi::stri_read_lines(x, ...)
chat_raw <- stri_read_lines(x, ...)
names(chat_raw) <- rep(x, length(chat_raw))
if (verbose) {
message(" one log file...")
status("one log file loaded")
}
} else {
chat_raw <- unlist(lapply(x, function(t) {
cr <- stringi::stri_read_lines(t)#, ...)
cr <- stri_read_lines(t)#, ...)
names(cr) <- rep(t, length(cr))
return(cr)
}))
Expand All @@ -162,11 +166,11 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {
status("object loaded ")
}
} else {
stop("Provide either a path to one or multiple txt or zip files of a WhatsApp ",
"history or the history itself as character object.")
stop("Provide either a path to one or multiple txt or zip files of a ",
"WhatsApp history or the history itself as character object.")
}
if (length(zps) > 0) {
names(chat_raw) <- stringi::stri_replace_last_fixed(names(chat_raw), x[zps], src)
names(chat_raw) <- stri_replace_last_fixed(names(chat_raw), x[zps], src)
unlink(temp, recursive = TRUE)
}
return(chat_raw)
Expand All @@ -175,7 +179,10 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) {

#' Parse time
#'
#' @param time A character object with times to parse.
#' @inherit rwa_read
#' @import stringi
#' @noRd
rwa_parse_time <- function(time, format, tz) {
if (is.null(format)) {
formats <- c(
Expand All @@ -188,38 +195,38 @@ rwa_parse_time <- function(time, format, tz) {
"MM.dd.yyyy, HH:mm:ss",
"MM.dd.yyyy, HH:mm"
)
if (any(stringi::stri_detect_fixed(time, "."))) {
if (sum(stringi::stri_detect_regex(time, "\\d+.\\d+.\\d{2}")) >
if (any(stri_detect_fixed(time, "."))) {
if (sum(stri_detect_regex(time, "\\d+.\\d+.\\d{2}")) >
(length(time) * 0.9)) {
formats <- stringi::stri_replace_all_fixed(
formats <- stri_replace_all_fixed(
formats,
"yyyy",
"yy"
)
}
} else if (any(stringi::stri_detect_fixed(time, "/"))) {
formats <- stringi::stri_replace_all_fixed(
} else if (any(stri_detect_fixed(time, "/"))) {
formats <- stri_replace_all_fixed(
formats,
".",
"/"
)
if (sum(stringi::stri_detect_regex(time, "\\d+/\\d+/\\d{2}")) >
if (sum(stri_detect_regex(time, "\\d+/\\d+/\\d{2}")) >
(length(time) * 0.9)) {
formats <- stringi::stri_replace_all_fixed(
formats <- stri_replace_all_fixed(
formats,
"yyyy",
"yy"
)
}
} else if (any(stringi::stri_detect_fixed(time, "-"))) {
formats <- stringi::stri_replace_all_fixed(
} else if (any(stri_detect_fixed(time, "-"))) {
formats <- stri_replace_all_fixed(
formats,
".",
"-"
)
if (sum(stringi::stri_detect_regex(time, "\\d+-\\d+-\\d{2}")) >
if (sum(stri_detect_regex(time, "\\d+-\\d+-\\d{2}")) >
(length(time) * 0.9)) {
formats <- stringi::stri_replace_all_fixed(
formats <- stri_replace_all_fixed(
formats,
"yyyy",
"yy"
Expand All @@ -231,18 +238,18 @@ rwa_parse_time <- function(time, format, tz) {
)
}
test <- sapply(formats, function(f) {
test <- stringi::stri_datetime_parse(str = head(time, n = 1000),
format = f,
lenient = FALSE,
tz = tz)
test <- stri_datetime_parse(str = head(time, n = 1000),
format = f,
lenient = FALSE,
tz = tz)
sum(is.na(test))
})
format <- names(which.min(test))
}

time <- stringi::stri_datetime_parse(str = time,
format = format,
tz = tz)
time <- stri_datetime_parse(str = time,
format = format,
tz = tz)

return(time)
}
Expand All @@ -255,7 +262,7 @@ rwa_parse_time <- function(time, format, tz) {
#' @importFrom rlang .data
rwa_add_emoji <- function(x) {
x$id <- seq_along(x$text)
x$text <- stringi::stri_replace_all_regex(
x$text <- stri_replace_all_regex(
x$text,
"[[:alnum:]]",
"x"
Expand Down Expand Up @@ -286,12 +293,17 @@ rwa_add_emoji <- function(x) {


# creates status message and exports start_time if not in parent environment yet
status <- function(..., sep = "", appendLF = TRUE, ppfix = "...", indent = "\t") {
status <- function(...,
sep = "",
appendLF = TRUE,
ppfix = "...",
indent = "\t") {

if (exists("start_time", envir = parent.frame())) {
start_time <- mget("start_time", envir = parent.frame())[[1]]
diff <- format((Sys.time() - start_time), digits = 2, nsmall = 2)
message(paste(indent, ppfix, ..., " [", diff, "]", sep = sep), appendLF = appendLF)
message(paste(indent, ppfix, ..., " [", diff, "]", sep = sep),
appendLF = appendLF)
} else {
export <- Sys.time()
start_time <- export
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ WhatsApp seems to become increasingly important not just as a messaging service
Furthermore, retrieving chat logs from the Android or iOS app is very straightforward:
Simply choose `More` in the menu of a chat, then `Export chat` and export the history to a txt file.

<img src="https://i.imgur.com/9pZjPFC.jpg" width="275" /> <img src="https://i.imgur.com/OwUE6aE.jpg" width="275" /> <img src="https://i.imgur.com/8lCJQfZ.jpg" width="275" />
<img src="https://i.imgur.com/9pZjPFC.jpg" width="200" /> <img src="https://i.imgur.com/OwUE6aE.jpg" width="200" /> <img src="https://i.imgur.com/8lCJQfZ.jpg" width="200" />

This package is intended make the first step of analysing WhatsApp text data as easy as possible---reading your chat history into `R`.
This should work, no matter which device or locale you used to retrieve the `txt` or `zip` file containing your conversations.
Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ capabilities. Furthermore, retrieving chat logs from the Android or iOS
app is very straightforward: Simply choose `More` in the menu of a chat,
then `Export chat` and export the history to a txt file.

<img src="https://i.imgur.com/9pZjPFC.jpg" width="275" />
<img src="https://i.imgur.com/OwUE6aE.jpg" width="275" />
<img src="https://i.imgur.com/8lCJQfZ.jpg" width="275" />
<img src="https://i.imgur.com/9pZjPFC.jpg" width="200" /> <img src="https://i.imgur.com/OwUE6aE.jpg" width="200" /> <img src="https://i.imgur.com/8lCJQfZ.jpg" width="200" />

This package is intended make the first step of analysing WhatsApp text
data as easy as possible—reading your chat history into `R`. This should
Expand Down Expand Up @@ -76,7 +74,11 @@ chat
## 6 2017-07-13 09:16:48 Johanne… Haha it sure… /home/johann… <chr… <chr [1]>
## 7 2018-09-28 13:27:48 Johanne… Did you know… /home/johann… <chr… <chr [0]>
## 8 2018-09-28 13:28:48 Johanne… 😀😃😄😁😆😅😂🤣☺😊😇🙂… /home/johann… <chr… <chr [242…
## 9 2018-09-28 13:30:48 Johanne… 🤷‍♀🤷🏻‍♂🙎‍♀🙎‍… /home/johann… <chr… <chr [87]>
## 9 2018-09-28 13:30:48 Johanne… 🤷
♀🤷🏻
♂🙎
♀🙎
… /home/johann… <chr… <chr [87]>

Now, this isn’t very interesting so you will probably want to use your
own data. For this demonstration, I use one of my own chat logs from a
Expand Down
2 changes: 2 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Artur
Artur's
Artur’s
chr
das
eig
eigentlich
Expand All @@ -11,6 +12,7 @@ im
ja
jaa
joh
johann
Macbook
ne
oman
Expand Down
28 changes: 0 additions & 28 deletions man/rwa_parse_time.Rd

This file was deleted.

28 changes: 0 additions & 28 deletions man/rwa_read_lines.Rd

This file was deleted.

2 changes: 1 addition & 1 deletion tests/testthat/test-rwhatsapp.R
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ test_that("reading from file", {
dir.create(dir)
file.copy(system.file("extdata", "sample.txt", package = "rwhatsapp"),
dir)
zip(paste0(dir, "test.zip"), paste0(dir, "sample.txt"), flags = "-jr9X")
utils::zip(paste0(dir, "test.zip"), paste0(dir, "sample.txt"), flags = "-jr9X")
out <- rwa_read(x = paste0(dir, "test.zip"),
tz = "GMT",
encoding = "UTF-8",
Expand Down
4 changes: 1 addition & 3 deletions vignettes/Text_Analysis_using_WhatsApp_data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ capabilities. Furthermore, retrieving chat logs from the Android or iOS
app is very straightforward: Simply choose `More` in the menu of a chat,
then `Export chat` and export the history to a txt file.

<img src="https://i.imgur.com/9pZjPFC.jpg" width="275" />
<img src="https://i.imgur.com/OwUE6aE.jpg" width="275" />
<img src="https://i.imgur.com/8lCJQfZ.jpg" width="275" />
<img src="https://i.imgur.com/9pZjPFC.jpg" width="200" /> <img src="https://i.imgur.com/OwUE6aE.jpg" width="200" /> <img src="https://i.imgur.com/8lCJQfZ.jpg" width="200" />

This package is intended make the first step of analysing WhatsApp text
data as easy as possible—reading your chat history into `R`. This should
Expand Down

0 comments on commit 311c4eb

Please sign in to comment.