-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScreening.Rmd
150 lines (98 loc) · 4.26 KB
/
Screening.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
---
title: "Screening"
author: "Devi Veytia"
date: "2023-01-14"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Import results
```{r load libraries}
library(revtools)
library(litsearchr)
library(metagear)
```
```{r read in references and de-duplicate}
# read in results
refs <- litsearchr::import_results(directory = "data", file= "WOS-results_2023-01-14.bib", verbose = TRUE)
nrow(refs)
# 151 results from WOS
# de-duplicate
unique_id <- revtools::find_duplicates(refs, to_lower = TRUE)
# number of duplicates
nDuplicates <- length(which(duplicated(unique_id)))
print(paste("There are",nDuplicates, "duplicates from doi matching"))
# then also by fuzzy matching -- takes a while so only run in the final search
unique_id_fuzz <- revtools::find_duplicates(refs, to_lower = TRUE, match_variable = "title",
match_function = "fuzzdist", method="fuzz_token_sort_ratio")
length(which(duplicated(unique_id_fuzz)))
# no duplicates from either, so assign unique id
refs$unique_id <- unique_id
```
# Title + abstract screening
```{r set up screening}
unscreened_refs <- effort_distribute(refs[!duplicated(refs$unique_id),],
reviewers = c("Devi"), effort= c(100), initialize = TRUE,
save_split = TRUE, directory = file.path(getwd(),"derived_data"))
buttonOptions <- c("No","E","C","O","Yes")
buttonKeys <- c("n","e","c","o","y")
```
```{r screen articles, eval = FALSE}
# screen abstracts
# can save from the gui if partially done and resume by running this chunk
metagear::abstract_screener(file.path(getwd(),"derived_data","effort_Devi.csv"), aReviewer = "Devi",
abstractColumnName = "abstract", titleColumnName = "title",
theButtons = buttonOptions, keyBindingToButtons = buttonKeys)
```
```{r summarise screen results}
screened_refs <- read.csv(file.path(getwd(),"derived_data","effort_Devi.csv"))
# number of articles included
sum(screened_refs$INCLUDE == "Yes")
## 33 articles included
```
```{r create coding sheet}
full_text_code <- subset(screened_refs, INCLUDE == "Yes")
full_text_code <- full_text_code[,c("STUDY_ID","title","author", "year","doi")]
# add columns for other variables
code_variables <- c("INCLUDE", "study_type","population_type", "krill_life_stage", "env_drivers", "outcome_description", "outcome_metrics", "outcome_spatial_scale", "outcome_temporal_scale", "empirical_model_used","empirical_temp_coverage", "empirical_spat_coverage")
full_text_code[,code_variables] <- NA
# write to excel
# writexl::write_xlsx(full_text_code, file.path(getwd(), "derived_data", "full_text_codebook.xlsx"))
```
```{r write dois to extract full texts}
# write dois to extract full texts
# cat(paste0(full_text_code$doi, collapse=" OR "), file = file.path(getwd(), "derived_data", "dois_for_full_text_retreival.txt"))
```
```{r try to extract full text automatically but didn't work}
# set scihub url to renviron
#Sys.setenv(SCIHUB_URL = "https://sci-hub.se/")
# get the dois for the full texts to retreive
full_text_code <- readxl::read_excel(file.path(getwd(), "derived_data", "full_text_codebook.xlsx"))
source(here::here("R", "utils.R"))
source(here::here("R", "get-pdf.R"))
# clean dois
dois <- c(full_text_code[, "doi"])
dois <- dois[!is.na(dois)]
dois <- unlist(dois)
dois <- paste0("https://doi.org/", "", dois)
# set directory to place downloaded pdfs
path_pdf = here::here("data", "full-texts")
if(isFALSE(dir.exists(path_pdf))){
dir.create(path_pdf, showWarnings = FALSE, recursive = TRUE)
}
# download pdf if possible
for (i in 1:length(dois)) {
if (length(dois[i]) == 1) {
# # get filename
# fn <- gsub("\\s","_",full_text_code[i,"title"])
# fn <- gsub("- ", "-", fn) # Deal with caesura
# fn <- gsub("[[:punct:]]", "_", fn) # Remove punctuation
# fn <- gsub("[0-9]", "", fn) # Remove numbers
# fn <- gsub("\\s+", "", fn) # Remove whitespaces
# fn <- gsub("^\\s|\\s$", "", fn) # Remove whitespaces
get_pdf(dois[i], path = path_pdf, filename = paste("ID", as.character(full_text_code[i, "STUDY_ID"]), sep="_"))
Sys.sleep(sample(seq(0, 5, by = 0.01), 1))
}
}
```