-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHelpR.R
148 lines (118 loc) · 4.33 KB
/
HelpR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# ==============================================================================
# 0 Housekeeping
# ==============================================================================
# Load libraries
library(data.table)
library(dplyr)
library(purrr)
library(readr)
library(vroom)
# Clear the environment
rm(list = ls())
# Clear the console
cat("\014")
# Start a script timer
# Place this at the start of your script
start_time <- Sys.time()
# Stop the script timer
# Place this at the end of your script
Sys.time() - start_time
rm(start_time)
# ==============================================================================
# 1 Download data
# ==============================================================================
# ==============================================================================
# 1.1 Download a compressed archive
# ==============================================================================
# Set variables
destfile <- "data/iris.zip"
url <- "https://github.com/TheAviationDoctor/HelpR/blob/main/data/iris.zip"
# Download data only if not already downloaded
if(!file.exists(destfile)) { download.file(url = url, destfile = destfile) }
# Remove variables
rm(destfile, url)
# ==============================================================================
# 1.2 Unzip a compressed archive
# ==============================================================================
# Set variables
zipfile <- "data/iris.zip"
pth <- "data/iris"
# Unzip a compressed archive
suppressWarnings(
unzip(
zipfile = zipfile, # Compressed file to unzip
exdir = pth, # Extract to a sub folder
overwrite = FALSE # Overwrite if already exists
)
)
# Remove variables
rm(zipfile, pth)
# ==============================================================================
# 1.3 Download an uncompressed file
# ==============================================================================
# Set variables
destfile <- "data/iris.csv"
url <- "https://github.com/TheAviationDoctor/HelpR/blob/main/data/iris.csv"
# Download data only if not already downloaded
if(!file.exists(destfile)) { download.file(url = url, destfile = destfile) }
# Remove variables
rm(destfile, url)
# ==============================================================================
# 3 Read data
# ==============================================================================
# ==============================================================================
# 3.1 Read and concatenate all files inside a compressed archive
# ==============================================================================
# Set variables
zipfile <- "data/iris.zip"
pth <- "data/iris"
sep <- ","
# List files inside the compressed archive
all <- paste(pth, unzip(zipfile = zipfile, list = TRUE)$Name, sep = "/")
# Base R
df <- do.call(
rbind,
lapply(
X = as.list(all),
FUN = read.table,
sep = sep,
header = TRUE, # Change this if first row of data is not a header
colClasses = c("NULL", rep("numeric", 4L), "factor"), # Change as needed
)
)
# vroom
df <- lapply(
X = as.data.frame(all),
FUN = vroom::vroom,
delim = sep,
col_select = c(2:6), # Change as needed
col_types = c("innnnf") # Change as needed
)
# Tidyverse
df <- as.list(all) |>
purrr::map(
.f = readr::read_delim,
delim = sep,
col_select = c(2:6), # Change as needed
col_types = c("innnnf") # Change as needed
) |>
dplyr::bind_rows()
# data.table
dt <- data.table::rbindlist(
lapply(
X = as.list(all),
FUN = data.table::fread,
sep = ",",
header = TRUE, # Change this if first row of data is not a header
select = c(2:6), # Change as needed
colClasses = c("NULL", rep("numeric", 4L), "factor"), # Change as needed
)
)
# Remove variables
rm(all, zipfile, pth, sep, df, dt)
# ==============================================================================
# 3.2 Read and concatenate several specific files inside a compressed archive
# ==============================================================================
# ==============================================================================
# 3.3 Read a single file inside a compressed archive
# ==============================================================================