Merge pull request #57 from bczernecki/dev

Dev
bczernecki · May 2, 2021 · 1c23d1f · 1c23d1f
2 parents 1af6d0f + b853f60
commit 1c23d1f
Show file tree

Hide file tree

Showing 49 changed files with 651 additions and 592 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: climate
 Title: Interface to Download Meteorological (and Hydrological) Datasets
-Version: 1.0.0
+Version: 1.0.1
 Authors@R: c(person(given = "Bartosz",
            family = "Czernecki",
            role = c("aut", "cre"),
@@ -33,7 +33,8 @@ Depends:
 Imports: 
     XML,
     httr,
-    curl
+    curl,
+    data.table
 Suggests: 
     testthat,
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -24,6 +24,7 @@ import(httr)
 importFrom(XML,readHTMLTable)
 importFrom(curl,curl_download)
 importFrom(curl,has_internet)
+importFrom(data.table,fread)
 importFrom(stats,na.omit)
 importFrom(stats,runif)
 importFrom(utils,data)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# climate 1.0.1
+
+* Adding `data.table` package to read CP1250 on machines that do not support this encoding (translit used instead)
+
 # climate 0.9.9
 
 * Changing URL `danepubliczne.imgw.pl` to `dane.imgw.pl` where needed

diff --git a/R/check_locale.R b/R/check_locale.R
@@ -1,12 +1,23 @@
 #' Check locale
 #'
-#' This is an extra check for some systems that make use of "C.UTF-8" that cannot parse properly tags used inside the Polish metservice's repository
+#' This is an extra check for some systems that make use of "C.UTF-8" or any iso-like encoding recognized as causing potential problems; 
+#' The provided list of checked encoding cannot parse properly characters used in the Polish metservice's repository and therefore will be forced to use ASCII//TRANSLIT
 #' @noRd
 
 check_locale = function(){
-  if(any(strsplit(Sys.getlocale(), "/")[[1]] == "C.UTF-8")){
-    message("  Your system locale contains 'C.UTF-8' which may cause trouble. 
+
+  if(Sys.getlocale("LC_CTYPE") %in% c("C.UTF-8", "en_US.iso885915")){
+    locale = Sys.getlocale("LC_CTYPE")
+    message(paste0("    Your system locale is: " , locale," which may cause trouble. 
     Please consider changing it manually while working with climate, e.g.: 
-    Sys.setlocale(category = 'LC_ALL', locale = 'en_US.UTF-8') ")
+    Sys.setlocale(category = 'LC_ALL', locale = 'en_US.UTF-8') "))
+    Sys.sleep(4)
+    return(1)
+
+  } else {
+
+    return(0)
   }
+
 }
+
diff --git a/R/clean_metadata_hydro.R b/R/clean_metadata_hydro.R
@@ -5,32 +5,13 @@
 #' @param interval temporal interval
 #' @importFrom utils read.fwf
 #' @keywords internal
-clean_metadata_hydro <- function(address, interval){
-  #miesieczne
-  #address="https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/miesieczne/mies_info.txt"
-  #dobowe
-  #address="https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/dobowe/codz_info.txt"
-  #"https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/dobowe/zjaw_info.txt"
-  #polroczne_i_roczne
-  #address="https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/polroczne_i_roczne/polr_info.txt"
-  #a <- suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
-  #                                       fileEncoding = "CP1250", stringsAsFactors = FALSE)))
+clean_metadata_hydro = function(address, interval){
 
   temp = tempfile()
 
   test_url(link = address, output = temp)
   a = readLines(temp, warn = FALSE)
 
-  # if (!httr::http_error(address)) {
-  #   a = readLines(address, warn = FALSE)
-  # } else {
-  #   stop(call. = FALSE, 
-  #        paste0("\nDownload failed. ",
-  #               "Check your internet connection or validate this url in your browser: ",
-  #               url, "\n"))
-  # }
-
-
   a = iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # usuwamy polskie znaki, bo to robi spore "kuku"
   a = gsub(a, pattern = "\\?", replacement = "") # usuwamy znaki zapytania powstale po konwersji
 
@@ -39,27 +20,27 @@ clean_metadata_hydro <- function(address, interval){
   a = gsub(x = a, pattern="\\^", replacement = "")
 
   if (interval == "monthly") {
-    b <- list(data.frame(parameters = a[3:12])) # skład danych jeszcze nie wiem jak ominąć problem kontroli
+    b = list(data.frame(parameters = a[3:12])) # sklad danych jeszcze nie wiem jak ominąć problem kontroli
     # ale on może się zmienić nie wiem czy nie lepiej wykluczyć ostatni rok
   }
   if (interval == "daily") {
-    b <- data.frame(parameters = a[3:12])
+    b = data.frame(parameters = a[3:12])
   }
   if (interval == "semiannual_and_annual") {
-    godzina <- paste0(a[15], ":", a[16]) # nie jestem pewien czy tak bo w dokumentacji jest podzial na dwie kolumny,
+    godzina = paste0(a[15], ":", a[16]) # nie jestem pewien czy tak bo w dokumentacji jest podzial na dwie kolumny,
     #ale w pliku jest jedna kolumna a pomiaru brak
-    data <- c(a[12:14], godzina)
-    data_od <- paste0("wystapienie_od_", data)
-    data_do <- paste0("wystapienie_od_", data)
-    SPT <- unlist(strsplit(a[10], "]/")) # stan/przeplyw/temperatura
-    SPT[1] <- paste0(SPT[1], "]")
-    SPT[2] <- paste0(SPT[2], "]")
-    b <- NULL
+    data = c(a[12:14], godzina)
+    data_od = paste0("wystapienie_od_", data)
+    data_do = paste0("wystapienie_od_", data)
+    SPT = unlist(strsplit(a[10], "]/")) # stan/przeplyw/temperatura
+    SPT[1] = paste0(SPT[1], "]")
+    SPT[2] = paste0(SPT[2], "]")
+    b = NULL
     for (i in seq_along(SPT)) {
-      tmp <- c(a[3:9], SPT[i], data_od, data_do)
-      b <- cbind(b, tmp)
+      tmp = c(a[3:9], SPT[i], data_od, data_do)
+      b = cbind(b, tmp)
     }
-    b <- list("H" = data.frame(parameters = b[, 1]),
+    b = list("H" = data.frame(parameters = b[, 1]),
               "Q" = data.frame(parameters = b[, 2]),
               "T" = data.frame(parameters = b[, 3]))
   }

diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R
@@ -16,61 +16,51 @@
 #' }
 #'
 
-clean_metadata_meteo <- function(address, rank = "synop", interval = "hourly"){
+clean_metadata_meteo = function(address, rank = "synop", interval = "hourly"){
 
   temp = tempfile()
 
 
   test_url(link = address, output = temp)
   a = readLines(temp, warn = FALSE)
 
-  # if (!httr::http_error(address)) {
-  #   a = readLines(address, warn = FALSE)
-  # } else {
-  #   a = stop(call. = FALSE, 
-  #        paste0("\nDownload failed. ",
-  #               "Check your internet connection or validate this url in your browser: ",
-  #               url, "\n"))
-  # }
-
-  a <- iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # usuwamy polskie znaki, bo to robi spore "kuku"
-  a <- gsub(a, pattern = "\\?", replacement = "") # usuwamy znaki zapytania powstale po konwersji
+  a = iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # usuwamy polskie znaki, bo to robi spore "kuku"
+  a = gsub(a, pattern = "\\?", replacement = "") # usuwamy znaki zapytania powstale po konwersji
 
   # additional workarounds for mac os but not only...
   a = gsub(x = a, pattern="'", replacement = "")
   a = gsub(x = a, pattern="\\^0", replacement = "")
 
 
-  a <- data.frame(V1 = a[nchar(a) > 0], stringsAsFactors = FALSE)
+  a = data.frame(V1 = a[nchar(a) > 0], stringsAsFactors = FALSE)
 
   # to nie dziala na windowsie:
-  # a <- suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
+  # a = suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
   #                                        fileEncoding = "CP1250", stringsAsFactors = FALSE)))
-
 
-  length_char <- max(nchar(a$V1), na.rm = TRUE)
+  length_char = max(nchar(a$V1), na.rm = TRUE)
 
-  if(rank == "precip" && interval == "hourly") length_char <- 40 # wyjatek dla precipow
-  if(rank == "precip" && interval == "daily") length_char <- 40 # wyjatek dla precipow dobowych
-  if(rank == "synop" && interval == "hourly") length_char <- 60 # wyjatek dla synopow terminowych
-  if(rank == "climate" && interval == "monthly") length_char <- 52 # wyjatek dla synopow terminowych
+  if(rank == "precip" && interval == "hourly") length_char = 40 # wyjatek dla precipow
+  if(rank == "precip" && interval == "daily") length_char = 40 # wyjatek dla precipow dobowych
+  if(rank == "synop" && interval == "hourly") length_char = 60 # wyjatek dla synopow terminowych
+  if(rank == "climate" && interval == "monthly") length_char = 52 # wyjatek dla synopow terminowych
 
-  field <- substr(a$V1, length_char - 3, length_char)
+  field = substr(a$V1, length_char - 3, length_char)
 
   if(rank == "synop" && interval == "monthly") {
-    length_char <- as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2
-    field <- substr(a$V1, length_char - 3, length_char + 2)
+    length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2
+    field = substr(a$V1, length_char - 3, length_char + 2)
   }
 
-  a$field1 <- suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1]))))
-  a$field2 <- suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))
+  a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1]))))
+  a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))
 
-  a$V1 <- trimws(substr(a$V1, 1, nchar(a$V1) - 3))
+  a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3))
 
   strsplit(x = a$V1, split = "/")
 
-  #a <- a[nchar(a$V1)>2,] # usuwamy puste lub prawie puste wiersze dodatkowo...
-  a <- a[!(is.na(a$field1) & is.na(a$field2)), ] # usuwanie info o statusach
-  colnames(a)[1] <- "parameters"
+  #a = a[nchar(a$V1)>2,] # usuwamy puste lub prawie puste wiersze dodatkowo...
+  a = a[!(is.na(a$field1) & is.na(a$field2)), ] # usuwanie info o statusach
+  colnames(a)[1] = "parameters"
   a
 }
diff --git a/R/get_coord_from_string.R b/R/get_coord_from_string.R
@@ -10,24 +10,24 @@
 #'
 #' @examples
 #' \donttest{
-#'  txt <- "12120:   Leba (Poland)\nLatitude: 54-45N    Longitude: 017-32E    Altitude: 2 m."
+#'  txt = "12120:   Leba (Poland)\nLatitude: 54-45N    Longitude: 017-32E    Altitude: 2 m."
 #'  climate:::get_coord_from_string(txt, pattern = "Latitude")
 #' }
 #'
-get_coord_from_string <- function(txt, pattern = "Longitude") {
-  tt <- gregexpr(pattern, txt)
-  start <- tt[[1]][1] + attributes(tt[[1]])$match.length + 1
-  tmp <- trimws(substr(txt, start = start, stop = start + 8))
-  tmp <- strsplit(tmp, "-")[[1]]
-  hemisphere <- gsub("[0-9]", "", strsplit(tmp, "-")[2])
-  hemisphere <- gsub(".*?(\\b[A-Za-z0-9 ]+\\b).*","\\1", hemisphere)
+get_coord_from_string = function(txt, pattern = "Longitude") {
+  tt = gregexpr(pattern, txt)
+  start = tt[[1]][1] + attributes(tt[[1]])$match.length + 1
+  tmp = trimws(substr(txt, start = start, stop = start + 8))
+  tmp = strsplit(tmp, "-")[[1]]
+  hemisphere = gsub("[0-9]", "", strsplit(tmp, "-")[2])
+  hemisphere = gsub(".*?(\\b[A-Za-z0-9 ]+\\b).*","\\1", hemisphere)
 
-  tmp <- suppressWarnings(as.numeric(gsub("([0-9]+).*$", "\\1", strsplit(tmp, "-"))))
+  tmp = suppressWarnings(as.numeric(gsub("([0-9]+).*$", "\\1", strsplit(tmp, "-"))))
 
-  wsp <- suppressWarnings(as.numeric(tmp)[1] + (as.numeric(tmp)[2] * 5 / 3) / 100)
+  wsp = suppressWarnings(as.numeric(tmp)[1] + (as.numeric(tmp)[2] * 5 / 3) / 100)
 
   if( hemisphere %in% c("W","S") ) {
-    wsp <- wsp * -1
+    wsp = wsp * -1
   }
 return(wsp)
 }

diff --git a/R/hydro_imgw.R b/R/hydro_imgw.R
@@ -17,20 +17,20 @@
 #'
 #' @examples 
 #' \donttest{
-#'   x <- hydro_imgw("monthly", year = 1999)
+#'   x = hydro_imgw("monthly", year = 1999)
 #'   head(x)
 #' }
-hydro_imgw <- function(interval, year, coords = FALSE, value = "H", station = NULL, col_names = "short", ...){
+hydro_imgw = function(interval, year, coords = FALSE, value = "H", station = NULL, col_names = "short", ...){
 
   if (interval == "daily"){
     # dobowe
-    calosc <- hydro_imgw_daily(year = year, coords = coords, station = station, col_names = col_names, ...)
+    calosc = hydro_imgw_daily(year = year, coords = coords, station = station, col_names = col_names, ...)
   } else if (interval == "monthly"){
     #miesieczne
-    calosc <- hydro_imgw_monthly(year = year, coords = coords, station = station, col_names = col_names, ...)
+    calosc = hydro_imgw_monthly(year = year, coords = coords, station = station, col_names = col_names, ...)
   } else if (interval == "semiannual_and_annual"){
     # polroczne_i_roczne
-    calosc <- hydro_imgw_annual(year = year, coords = coords, value = value, station = station, col_names = col_names, ...)
+    calosc = hydro_imgw_annual(year = year, coords = coords, value = value, station = station, col_names = col_names, ...)
   } else{
     stop("Wrong `interval` value. It should be either 'daily', 'monthly', or 'semiannual_and_annual'.", call. = FALSE)
   }

diff --git a/R/hydro_imgw_annual.R b/R/hydro_imgw_annual.R
@@ -12,8 +12,9 @@
 #' @param ... other parameters that may be passed to the 'shortening' function that shortens column names
 #' @importFrom XML readHTMLTable
 #' @importFrom utils download.file unzip read.csv
+#' @importFrom data.table fread
 #' @export
-#'
+#' 
 #' @examples
 #' \donttest{
 #'   yearly = hydro_imgw_annual(year = 2000, value = "H", station = "ANNOPOL")
@@ -23,7 +24,7 @@ hydro_imgw_annual =  function(year, coords = FALSE, value = "H", station = NULL,
 
  # options(RCurlOptions = list(ssl.verifypeer = FALSE)) # required on windows for RCurl
 
-  check_locale()
+  translit = check_locale()
 
   base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/"
   interval = "semiannual_and_annual"
@@ -70,7 +71,13 @@ hydro_imgw_annual =  function(year, coords = FALSE, value = "H", station = NULL,
     #download.file(address, temp)
     unzip(zipfile = temp, exdir = temp2)
     file1 = paste(temp2, dir(temp2), sep = "/")[1]
-    data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
+
+    if(translit){
+      data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1)))
+    } else {
+      data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
+    }
+
     colnames(data1) = meta[[value]]$parameters
     all_data[[i]] = data1
   }
@@ -101,7 +108,7 @@ hydro_imgw_annual =  function(year, coords = FALSE, value = "H", station = NULL,
   }
 
   all_data = all_data[order(all_data$`Nazwa stacji`, all_data$`Rok hydrologiczny`), ]
-  # dodanie opcji  dla skracania kolumn i usuwania duplikatow:
+  # adding option for shortening column names and removing duplicates
   all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)
 
   return(all_data)

diff --git a/R/hydro_imgw_daily.R b/R/hydro_imgw_daily.R
@@ -10,6 +10,7 @@
 #' @param ... other parameters that may be passed to the 'shortening' function that shortens column names
 #' @importFrom XML readHTMLTable
 #' @importFrom utils download.file unzip read.csv
+#' @importFrom data.table fread
 #' @export
 #'
 #' @examples \donttest{
@@ -21,7 +22,7 @@
 hydro_imgw_daily = function(year, coords = FALSE, station = NULL, col_names= "short", ...){
   #options(RCurlOptions = list(ssl.verifypeer = FALSE)) # required on windows for RCurl
 
-  check_locale()
+  translit = check_locale()
 
   base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/"
   interval = "daily"
@@ -68,7 +69,13 @@ hydro_imgw_daily = function(year, coords = FALSE, station = NULL, col_names= "sh
       #download.file(address, temp)
       unzip(zipfile = temp, exdir = temp2)
       file1 = paste(temp2, dir(temp2), sep = "/")[1]
-      data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
+
+      if(translit){
+        data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1)))
+      } else {
+        data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
+      }
+
       colnames(data1) = meta[[1]][,1]
       data=rbind(data,data1)
     }
@@ -80,7 +87,13 @@ hydro_imgw_daily = function(year, coords = FALSE, station = NULL, col_names= "sh
     test_url(address, temp)
     unzip(zipfile = temp, exdir = temp2)
     file2 = paste(temp2, dir(temp2), sep = "/")[1]
-    data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
+
+    if(translit){
+      data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)))
+    } else {
+      data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
+    }
+
     colnames(data2) = meta[[2]][, 1]
 
     all_data[[i]] = merge(data, data2,