2_codebook_en.Rmd

---
title: "Codebook GOCD2"
output:
  html_document:
    toc: true
    toc_depth: 4
    toc_float: true
    code_folding: 'hide'
    self_contained: false
---
  
```{r results='hide',message=F,warning=F}
source("0_helpers.R")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

load("data/cleaned.rdata")

knitr::opts_chunk$set(echo = FALSE, error = TRUE, dev = "CairoPNG",
                      cache = FALSE)
library(ggplot2)
theme_set(theme_bw())

library(codebook)

attributes(s3_daily$grooming_broad)$scale_item_names <- NULL
s3_daily <- s3_daily %>% filter(!is.na(session), !is.na(created))
# s3_daily <- s3_daily %>% select(-!!names(lab))
s3_daily <- s3_daily %>% ungroup()
s3_daily$sleep_fell_asleep_time <- as.numeric(s3_daily$sleep_fell_asleep_time)
s3_daily$sleep_awoke_time <- as.numeric(s3_daily$sleep_awoke_time)
s3_daily$DAL <- as.numeric(s3_daily$DAL)
s3_daily$window_length <- as.numeric(s3_daily$window_length)
attributes(s3_daily$menstruation_imputed)$names <- NULL
attributes(s3_daily$menstruation)$names <- NULL
vars <- c("session", "short", "created_date", setdiff(names(s3_daily), names(lab)))
s3_daily <- s3_daily[, vars]

library(future)
if (file.exists("codings/reliabilities_new.rdata")) {
  load("codings/reliabilities_new.rdata")
  } else {
    library(future.batchtools)
  # plan(tweak(multicore, workers = 3))
  login <- tweak(remote, workers = "arslan@arc-lin-cpt10.mpib-berlin.mpg.de") # Your username goes before the @
  plan(list(login, tweak(multicore, workers = 10))) # this tells the future package that we want to use a remote server for running our future operations
  
  # login <- tweak(remote, workers = "arslan@tardis.mpib-berlin.mpg.de")
  # qsub <- tweak(batchtools_torque, template = 'torque-lido.tmpl', 
  #             # workers = "export LSF_ENVDIR=/opt/lsf/conf",
  #                 resources = list(job.name = 'multilevel_reliability',
  #                                 queue = 'default',
  #                                 walltime = "5:0:0",
  #                                 memory = 32, #gb
  #                                 ncpus = 1))
  # ## Specify future topology
  # ## login node -> { cluster node (compile brms model) } -> { run chains on multiple cores }
  # plan(list(
  #   login,
  #   qsub
  # ))
  
  reliabilities = future::value(future({
      reliabilities_diary <- codebook::compute_reliabilities(s3_daily, 'repeated_many')
      reliabilities_s2_initial <- codebook::compute_reliabilities(s2_initial)
      reliabilities_s4_followup <- codebook::compute_reliabilities(s4_followup)
      reliabilities <- list(
        diary = reliabilities_diary,
        s2_initial = reliabilities_s2_initial,
        s4_followup = reliabilities_s4_followup
      )
      save(reliabilities, file = "codings/reliabilities_new.rdata")
      reliabilities
  }))
  save(reliabilities, file = "codings/reliabilities_new.rdata")
}

plan(multicore)
```

```{r}
translate_metadata <- function(dataset, 
                               questions, choices) {
  questions <- questions %>% 
    mutate_if(is.character, ~ if_na(., ""))

  choices <- choices %>% 
    filter(!is.na(name), name != "") %>% 
    mutate(
      list_name = na_if(list_name, ""),
      list_name = zoo::na.locf(list_name)
      )
  
  if(exists("choice1", questions)) {
    choices <- bind_rows(
      choices, 
      questions %>% select(list_name = name, starts_with("choice")) %>% 
      pivot_longer(-list_name, 
                   names_to = c("name", "lang"), 
                   names_pattern = "choice(\\d+)(_en)?") %>% 
      filter(!is.na(value), value != "") %>% 
      mutate(lang = if_else(is.na(lang), "label", "label_en")) %>% 
      pivot_wider(names_from = "lang")
    )
  }
  
  questions <- questions %>% 
        mutate(list_name = str_match(type, "^[a-z_]+( [0-9_,]+)? ([a-z_]+)")[,3])
  
  # loop over all questions in the sheet
  for (i in seq_along(questions$name)) {
    var <- questions$name[i]
    # replace the variable label
    if (var != "" && exists(var, dataset)) {
      var_label(dataset[[var]]) <- questions %>% filter(name == var) %>% pull(label_en)

      choice_list_name <- questions %>% 
        filter(name == var) %>% 
        pull(list_name)
      # find out if there were labelled choices
      translated_choices <- choices %>% 
            filter((!is.na(choice_list_name) & list_name == choice_list_name) |
                   (is.na(choice_list_name) & list_name == var)) %>% 
            select(label, label_en) %>% 
        # make sure they're the right type
            distinct() %>% 
            deframe()
      if ( !is.null(val_labels(dataset[[var]])) && length(translated_choices) > 0) {
      # if so overwrite
        item_val_labels <- val_labels(dataset[[var]])
        replace <- paste0("\\1",translated_choices)
        names(replace) <- paste0("^(\\d+: )?(",Hmisc::escapeRegex(names(translated_choices)), ")$")
        names(item_val_labels) <- str_replace_all(names(item_val_labels), replace)
        val_labels(dataset[[var]]) <- item_val_labels
      }
    }
  }
  dataset
}
```


# Demographics

Participants filled out these surveys before being invited to the daily diary. After finishing them, they were invited to fill out the diary on the next day.

```{r}
questions <- rio::import("https://docs.google.com/spreadsheets/d/1tLQDVyYUAXLBkblTT8BXow_rcg5G6xK9Vi3xTGieN20/edit#gid=0", format = "xlsx")
choices <- rio::import("https://docs.google.com/spreadsheets/d/1tLQDVyYUAXLBkblTT8BXow_rcg5G6xK9Vi3xTGieN20/edit#gid=1116762580", which = 2)

s1_demo <- translate_metadata(s1_demo, questions, choices)
```

```{r pre_survey, results='asis'}
codebook(s1_demo,  missingness_report = FALSE, indent = "#")
```

# Personality


```{r}
questions <- rio::import("https://docs.google.com/spreadsheets/d/1apYY4LZO05plk8jr-EVQUQfLWW2fgBfMrX2ntmPEMdc/edit#gid=483254913", format = "xlsx")
choices <- rio::import("https://docs.google.com/spreadsheets/d/1apYY4LZO05plk8jr-EVQUQfLWW2fgBfMrX2ntmPEMdc/edit#gid=1927428581", which = 2)


s2_initial <- translate_metadata(s2_initial, questions, choices)
```

```{r initial}
codebook(s2_initial, reliabilities = reliabilities$s2_initial, missingness_report = FALSE, indent = "#")
```

# Diary


```{r}
questions <- rio::import("https://docs.google.com/spreadsheets/d/1Xo4fRvIzPYbWibVgJ9nm7vES39DSAWQBztnB8j7PdIo/edit#gid=232713254", format = "xlsx")
choices <- rio::import("https://docs.google.com/spreadsheets/d/1Xo4fRvIzPYbWibVgJ9nm7vES39DSAWQBztnB8j7PdIo/edit#gid=1837266155", which = 2)

s3_daily <- translate_metadata(s3_daily, questions, choices)
```

Participants received invitations to fill out the diary daily over a period of 70 days.

```{r diary}
metadata(s3_daily)$name <- "Goettigen Ovulatory Cycle Diaries 2"
metadata(s3_daily)$description <- "A 70-day online diary study focusing on menstrual cycles, sexuality, mood, and behaviour"
metadata(s3_daily)$identifier <- "https://doi.org/10.17605/OSF.IO/XXXX"
metadata(s3_daily)$creator <- "Ruben C. Arslan"
metadata(s3_daily)$citation <- "Arslan, R.C., Driebe, XXXX"
metadata(s3_daily)$url <- "https://rubenarslan.github.io/gocd2"
metadata(s3_daily)$datePublished <- "2020-06-02"
metadata(s3_daily)$temporalCoverage <- "2014-2015"
metadata(s3_daily)$spatialCoverage <- "German-speaking countries, online"

codebook(s3_daily, reliabilities = reliabilities$diary, missingness_report = FALSE, indent = "#")
```

# Time spent
```{r}
questions <- rio::import("https://docs.google.com/spreadsheets/d/1Ic99Fq4z4UmcJ-b8BKLSZAQmOVHOKJzRtRs-H80n8qA/edit#gid=813263650", format = "xlsx")
choices <- rio::import("https://docs.google.com/spreadsheets/d/1Ic99Fq4z4UmcJ-b8BKLSZAQmOVHOKJzRtRs-H80n8qA/edit#gid=273950804", which = 2)

s4_timespent <- translate_metadata(s4_timespent, questions, choices)
```

Single participants answered questions about who they spent time with and/or thought about during the diary. We asked them questions about commonly mentioned names after the end of the diary.

```{r time_spent}
codebook(s4_timespent, indent = "#")
```

# Follow-up survey
```{r}
questions <- rio::import("https://docs.google.com/spreadsheets/d/1-wR_uSIvzSs8GiSV2RhuZY2YhFLAt_K7vAygSj1SQPI/edit#gid=813263650", format = "xlsx")
choices <- rio::import("https://docs.google.com/spreadsheets/d/1-wR_uSIvzSs8GiSV2RhuZY2YhFLAt_K7vAygSj1SQPI/edit#gid=273950804", which = 2)

s4_followup <- translate_metadata(s4_followup, questions, choices)
```

At the end of the diary, participants were debriefed and answered these follow-up questions.

```{r follow_up}
codebook(s4_followup, reliabilities = reliabilities$s4_followup, indent = "#")
```

# Menstruation follow-up
```{r}
questions <- rio::import("https://docs.google.com/spreadsheets/d/1rAGYC7JbgjsP5bymsWyRoznZZsNmHG-C2_uswxyyMbM/edit#gid=59825778", format = "xlsx")
choices <- rio::import("https://docs.google.com/spreadsheets/d/1rAGYC7JbgjsP5bymsWyRoznZZsNmHG-C2_uswxyyMbM/edit#gid=2096123197", which = 2)

s5_hadmenstruation <- translate_metadata(s5_hadmenstruation, questions, choices)
```

To be able to backward-count from the onset of the next menstruation, we followed up with women who had not menstruated in the last 5 days of the diary, until the next menstrual onset for up to 40 days.

```{r menstruation_follow_up}
codebook(s5_hadmenstruation, indent = "#")
```


# Lab Study

A subset of the diary participants also participated in a lab study, in which hormones where collected. Participation dates were intended to overlap, but scheduling did not always permit this.

```{r lab}
codebook(lab, indent = "#")
```