Screening.Rmd

---
title: "Screening"
author: "Devi Veytia"
date: "2023-01-14"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


# Import results

```{r load libraries}
library(revtools)
library(litsearchr)
library(metagear)
```

```{r read in references and de-duplicate}
# read in results
refs <- litsearchr::import_results(directory = "data", file= "WOS-results_2023-01-14.bib", verbose = TRUE)
nrow(refs)
# 151 results from WOS

# de-duplicate
unique_id <- revtools::find_duplicates(refs, to_lower = TRUE)
# number of duplicates
nDuplicates <- length(which(duplicated(unique_id)))
print(paste("There are",nDuplicates, "duplicates from doi matching"))


# then also by fuzzy matching -- takes a while so only run in the final search
unique_id_fuzz <- revtools::find_duplicates(refs, to_lower = TRUE, match_variable = "title",
                                            match_function = "fuzzdist", method="fuzz_token_sort_ratio")
length(which(duplicated(unique_id_fuzz)))


# no duplicates from either, so assign unique id
refs$unique_id <- unique_id


```

# Title + abstract screening

```{r set up screening}
unscreened_refs <- effort_distribute(refs[!duplicated(refs$unique_id),], 
                                                  reviewers = c("Devi"), effort= c(100), initialize = TRUE,
                                                  save_split = TRUE, directory = file.path(getwd(),"derived_data"))

buttonOptions <- c("No","E","C","O","Yes")
buttonKeys <- c("n","e","c","o","y")

```

```{r screen articles, eval = FALSE}

# screen abstracts
# can save from the gui if partially done and resume by running this chunk
metagear::abstract_screener(file.path(getwd(),"derived_data","effort_Devi.csv"), aReviewer = "Devi",
                            abstractColumnName = "abstract", titleColumnName = "title",
                            theButtons = buttonOptions, keyBindingToButtons = buttonKeys)

```

```{r summarise screen results}

screened_refs <- read.csv(file.path(getwd(),"derived_data","effort_Devi.csv"))

# number of articles included
sum(screened_refs$INCLUDE == "Yes")

## 33 articles included

```

```{r create coding sheet}

full_text_code <- subset(screened_refs, INCLUDE == "Yes")
full_text_code <- full_text_code[,c("STUDY_ID","title","author", "year","doi")]

# add columns for other variables
code_variables <- c("INCLUDE", "study_type","population_type", "krill_life_stage", "env_drivers", "outcome_description", "outcome_metrics", "outcome_spatial_scale", "outcome_temporal_scale", "empirical_model_used","empirical_temp_coverage", "empirical_spat_coverage")

full_text_code[,code_variables] <- NA

# write to excel
# writexl::write_xlsx(full_text_code, file.path(getwd(), "derived_data", "full_text_codebook.xlsx"))


```

```{r write dois to extract full texts}
# write dois to extract full texts
# cat(paste0(full_text_code$doi, collapse=" OR "), file = file.path(getwd(), "derived_data", "dois_for_full_text_retreival.txt"))
```

```{r try to extract full text automatically but didn't work}
# set scihub url to renviron 
#Sys.setenv(SCIHUB_URL = "https://sci-hub.se/")

# get the dois for the full texts to retreive
full_text_code <- readxl::read_excel(file.path(getwd(), "derived_data", "full_text_codebook.xlsx"))

source(here::here("R", "utils.R"))
source(here::here("R", "get-pdf.R"))

# clean dois 
dois <- c(full_text_code[, "doi"])
dois <- dois[!is.na(dois)]
dois <- unlist(dois)
dois <- paste0("https://doi.org/", "", dois)


# set directory to place downloaded pdfs
path_pdf = here::here("data", "full-texts")
if(isFALSE(dir.exists(path_pdf))){
  dir.create(path_pdf, showWarnings = FALSE, recursive = TRUE)
}


# download pdf if possible
for (i in 1:length(dois)) {
  
  if (length(dois[i]) == 1) {
    
    # # get filename
    # fn <- gsub("\\s","_",full_text_code[i,"title"])
    # fn <- gsub("- ", "-", fn)          # Deal with caesura
    # fn <- gsub("[[:punct:]]", "_", fn) # Remove punctuation
    # fn <- gsub("[0-9]", "", fn)        # Remove numbers
    # fn <- gsub("\\s+", "", fn)        # Remove whitespaces
    # fn <- gsub("^\\s|\\s$", "", fn)    # Remove whitespaces

    
    get_pdf(dois[i], path = path_pdf, filename = paste("ID", as.character(full_text_code[i, "STUDY_ID"]), sep="_"))
    
    Sys.sleep(sample(seq(0, 5, by = 0.01), 1))
  }
}


```