Skip to content

Commit

Permalink
Merge pull request #57 from bczernecki/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
bczernecki authored May 2, 2021
2 parents 1af6d0f + b853f60 commit 1c23d1f
Show file tree
Hide file tree
Showing 49 changed files with 651 additions and 592 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: climate
Title: Interface to Download Meteorological (and Hydrological) Datasets
Version: 1.0.0
Version: 1.0.1
Authors@R: c(person(given = "Bartosz",
family = "Czernecki",
role = c("aut", "cre"),
Expand Down Expand Up @@ -33,7 +33,8 @@ Depends:
Imports:
XML,
httr,
curl
curl,
data.table
Suggests:
testthat,
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import(httr)
importFrom(XML,readHTMLTable)
importFrom(curl,curl_download)
importFrom(curl,has_internet)
importFrom(data.table,fread)
importFrom(stats,na.omit)
importFrom(stats,runif)
importFrom(utils,data)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# climate 1.0.1

* Adding `data.table` package to read CP1250 on machines that do not support this encoding (translit used instead)

# climate 0.9.9

* Changing URL `danepubliczne.imgw.pl` to `dane.imgw.pl` where needed
Expand Down
19 changes: 15 additions & 4 deletions R/check_locale.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
#' Check locale
#'
#' This is an extra check for some systems that make use of "C.UTF-8" that cannot parse properly tags used inside the Polish metservice's repository
#' This is an extra check for some systems that make use of "C.UTF-8" or any iso-like encoding recognized as causing potential problems;
#' The provided list of checked encoding cannot parse properly characters used in the Polish metservice's repository and therefore will be forced to use ASCII//TRANSLIT
#' @noRd

check_locale = function(){
if(any(strsplit(Sys.getlocale(), "/")[[1]] == "C.UTF-8")){
message(" Your system locale contains 'C.UTF-8' which may cause trouble.

if(Sys.getlocale("LC_CTYPE") %in% c("C.UTF-8", "en_US.iso885915")){
locale = Sys.getlocale("LC_CTYPE")
message(paste0(" Your system locale is: " , locale," which may cause trouble.
Please consider changing it manually while working with climate, e.g.:
Sys.setlocale(category = 'LC_ALL', locale = 'en_US.UTF-8') ")
Sys.setlocale(category = 'LC_ALL', locale = 'en_US.UTF-8') "))
Sys.sleep(4)
return(1)

} else {

return(0)
}

}

47 changes: 14 additions & 33 deletions R/clean_metadata_hydro.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,13 @@
#' @param interval temporal interval
#' @importFrom utils read.fwf
#' @keywords internal
clean_metadata_hydro <- function(address, interval){
#miesieczne
#address="https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/miesieczne/mies_info.txt"
#dobowe
#address="https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/dobowe/codz_info.txt"
#"https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/dobowe/zjaw_info.txt"
#polroczne_i_roczne
#address="https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/polroczne_i_roczne/polr_info.txt"
#a <- suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
# fileEncoding = "CP1250", stringsAsFactors = FALSE)))
clean_metadata_hydro = function(address, interval){

temp = tempfile()

test_url(link = address, output = temp)
a = readLines(temp, warn = FALSE)

# if (!httr::http_error(address)) {
# a = readLines(address, warn = FALSE)
# } else {
# stop(call. = FALSE,
# paste0("\nDownload failed. ",
# "Check your internet connection or validate this url in your browser: ",
# url, "\n"))
# }


a = iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # usuwamy polskie znaki, bo to robi spore "kuku"
a = gsub(a, pattern = "\\?", replacement = "") # usuwamy znaki zapytania powstale po konwersji

Expand All @@ -39,27 +20,27 @@ clean_metadata_hydro <- function(address, interval){
a = gsub(x = a, pattern="\\^", replacement = "")

if (interval == "monthly") {
b <- list(data.frame(parameters = a[3:12])) # skład danych jeszcze nie wiem jak ominąć problem kontroli
b = list(data.frame(parameters = a[3:12])) # sklad danych jeszcze nie wiem jak ominąć problem kontroli
# ale on może się zmienić nie wiem czy nie lepiej wykluczyć ostatni rok
}
if (interval == "daily") {
b <- data.frame(parameters = a[3:12])
b = data.frame(parameters = a[3:12])
}
if (interval == "semiannual_and_annual") {
godzina <- paste0(a[15], ":", a[16]) # nie jestem pewien czy tak bo w dokumentacji jest podzial na dwie kolumny,
godzina = paste0(a[15], ":", a[16]) # nie jestem pewien czy tak bo w dokumentacji jest podzial na dwie kolumny,
#ale w pliku jest jedna kolumna a pomiaru brak
data <- c(a[12:14], godzina)
data_od <- paste0("wystapienie_od_", data)
data_do <- paste0("wystapienie_od_", data)
SPT <- unlist(strsplit(a[10], "]/")) # stan/przeplyw/temperatura
SPT[1] <- paste0(SPT[1], "]")
SPT[2] <- paste0(SPT[2], "]")
b <- NULL
data = c(a[12:14], godzina)
data_od = paste0("wystapienie_od_", data)
data_do = paste0("wystapienie_od_", data)
SPT = unlist(strsplit(a[10], "]/")) # stan/przeplyw/temperatura
SPT[1] = paste0(SPT[1], "]")
SPT[2] = paste0(SPT[2], "]")
b = NULL
for (i in seq_along(SPT)) {
tmp <- c(a[3:9], SPT[i], data_od, data_do)
b <- cbind(b, tmp)
tmp = c(a[3:9], SPT[i], data_od, data_do)
b = cbind(b, tmp)
}
b <- list("H" = data.frame(parameters = b[, 1]),
b = list("H" = data.frame(parameters = b[, 1]),
"Q" = data.frame(parameters = b[, 2]),
"T" = data.frame(parameters = b[, 3]))
}
Expand Down
48 changes: 19 additions & 29 deletions R/clean_metadata_meteo.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,61 +16,51 @@
#' }
#'

clean_metadata_meteo <- function(address, rank = "synop", interval = "hourly"){
clean_metadata_meteo = function(address, rank = "synop", interval = "hourly"){

temp = tempfile()


test_url(link = address, output = temp)
a = readLines(temp, warn = FALSE)

# if (!httr::http_error(address)) {
# a = readLines(address, warn = FALSE)
# } else {
# a = stop(call. = FALSE,
# paste0("\nDownload failed. ",
# "Check your internet connection or validate this url in your browser: ",
# url, "\n"))
# }

a <- iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # usuwamy polskie znaki, bo to robi spore "kuku"
a <- gsub(a, pattern = "\\?", replacement = "") # usuwamy znaki zapytania powstale po konwersji
a = iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # usuwamy polskie znaki, bo to robi spore "kuku"
a = gsub(a, pattern = "\\?", replacement = "") # usuwamy znaki zapytania powstale po konwersji

# additional workarounds for mac os but not only...
a = gsub(x = a, pattern="'", replacement = "")
a = gsub(x = a, pattern="\\^0", replacement = "")


a <- data.frame(V1 = a[nchar(a) > 0], stringsAsFactors = FALSE)
a = data.frame(V1 = a[nchar(a) > 0], stringsAsFactors = FALSE)

# to nie dziala na windowsie:
# a <- suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
# a = suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
# fileEncoding = "CP1250", stringsAsFactors = FALSE)))


length_char <- max(nchar(a$V1), na.rm = TRUE)
length_char = max(nchar(a$V1), na.rm = TRUE)

if(rank == "precip" && interval == "hourly") length_char <- 40 # wyjatek dla precipow
if(rank == "precip" && interval == "daily") length_char <- 40 # wyjatek dla precipow dobowych
if(rank == "synop" && interval == "hourly") length_char <- 60 # wyjatek dla synopow terminowych
if(rank == "climate" && interval == "monthly") length_char <- 52 # wyjatek dla synopow terminowych
if(rank == "precip" && interval == "hourly") length_char = 40 # wyjatek dla precipow
if(rank == "precip" && interval == "daily") length_char = 40 # wyjatek dla precipow dobowych
if(rank == "synop" && interval == "hourly") length_char = 60 # wyjatek dla synopow terminowych
if(rank == "climate" && interval == "monthly") length_char = 52 # wyjatek dla synopow terminowych

field <- substr(a$V1, length_char - 3, length_char)
field = substr(a$V1, length_char - 3, length_char)

if(rank == "synop" && interval == "monthly") {
length_char <- as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2
field <- substr(a$V1, length_char - 3, length_char + 2)
length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2
field = substr(a$V1, length_char - 3, length_char + 2)
}

a$field1 <- suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1]))))
a$field2 <- suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))
a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1]))))
a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))

a$V1 <- trimws(substr(a$V1, 1, nchar(a$V1) - 3))
a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3))

strsplit(x = a$V1, split = "/")

#a <- a[nchar(a$V1)>2,] # usuwamy puste lub prawie puste wiersze dodatkowo...
a <- a[!(is.na(a$field1) & is.na(a$field2)), ] # usuwanie info o statusach
colnames(a)[1] <- "parameters"
#a = a[nchar(a$V1)>2,] # usuwamy puste lub prawie puste wiersze dodatkowo...
a = a[!(is.na(a$field1) & is.na(a$field2)), ] # usuwanie info o statusach
colnames(a)[1] = "parameters"
a
}
22 changes: 11 additions & 11 deletions R/get_coord_from_string.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,24 @@
#'
#' @examples
#' \donttest{
#' txt <- "12120: Leba (Poland)\nLatitude: 54-45N Longitude: 017-32E Altitude: 2 m."
#' txt = "12120: Leba (Poland)\nLatitude: 54-45N Longitude: 017-32E Altitude: 2 m."
#' climate:::get_coord_from_string(txt, pattern = "Latitude")
#' }
#'
get_coord_from_string <- function(txt, pattern = "Longitude") {
tt <- gregexpr(pattern, txt)
start <- tt[[1]][1] + attributes(tt[[1]])$match.length + 1
tmp <- trimws(substr(txt, start = start, stop = start + 8))
tmp <- strsplit(tmp, "-")[[1]]
hemisphere <- gsub("[0-9]", "", strsplit(tmp, "-")[2])
hemisphere <- gsub(".*?(\\b[A-Za-z0-9 ]+\\b).*","\\1", hemisphere)
get_coord_from_string = function(txt, pattern = "Longitude") {
tt = gregexpr(pattern, txt)
start = tt[[1]][1] + attributes(tt[[1]])$match.length + 1
tmp = trimws(substr(txt, start = start, stop = start + 8))
tmp = strsplit(tmp, "-")[[1]]
hemisphere = gsub("[0-9]", "", strsplit(tmp, "-")[2])
hemisphere = gsub(".*?(\\b[A-Za-z0-9 ]+\\b).*","\\1", hemisphere)

tmp <- suppressWarnings(as.numeric(gsub("([0-9]+).*$", "\\1", strsplit(tmp, "-"))))
tmp = suppressWarnings(as.numeric(gsub("([0-9]+).*$", "\\1", strsplit(tmp, "-"))))

wsp <- suppressWarnings(as.numeric(tmp)[1] + (as.numeric(tmp)[2] * 5 / 3) / 100)
wsp = suppressWarnings(as.numeric(tmp)[1] + (as.numeric(tmp)[2] * 5 / 3) / 100)

if( hemisphere %in% c("W","S") ) {
wsp <- wsp * -1
wsp = wsp * -1
}
return(wsp)
}
Expand Down
10 changes: 5 additions & 5 deletions R/hydro_imgw.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@
#'
#' @examples
#' \donttest{
#' x <- hydro_imgw("monthly", year = 1999)
#' x = hydro_imgw("monthly", year = 1999)
#' head(x)
#' }
hydro_imgw <- function(interval, year, coords = FALSE, value = "H", station = NULL, col_names = "short", ...){
hydro_imgw = function(interval, year, coords = FALSE, value = "H", station = NULL, col_names = "short", ...){

if (interval == "daily"){
# dobowe
calosc <- hydro_imgw_daily(year = year, coords = coords, station = station, col_names = col_names, ...)
calosc = hydro_imgw_daily(year = year, coords = coords, station = station, col_names = col_names, ...)
} else if (interval == "monthly"){
#miesieczne
calosc <- hydro_imgw_monthly(year = year, coords = coords, station = station, col_names = col_names, ...)
calosc = hydro_imgw_monthly(year = year, coords = coords, station = station, col_names = col_names, ...)
} else if (interval == "semiannual_and_annual"){
# polroczne_i_roczne
calosc <- hydro_imgw_annual(year = year, coords = coords, value = value, station = station, col_names = col_names, ...)
calosc = hydro_imgw_annual(year = year, coords = coords, value = value, station = station, col_names = col_names, ...)
} else{
stop("Wrong `interval` value. It should be either 'daily', 'monthly', or 'semiannual_and_annual'.", call. = FALSE)
}
Expand Down
15 changes: 11 additions & 4 deletions R/hydro_imgw_annual.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
#' @param ... other parameters that may be passed to the 'shortening' function that shortens column names
#' @importFrom XML readHTMLTable
#' @importFrom utils download.file unzip read.csv
#' @importFrom data.table fread
#' @export
#'
#'
#' @examples
#' \donttest{
#' yearly = hydro_imgw_annual(year = 2000, value = "H", station = "ANNOPOL")
Expand All @@ -23,7 +24,7 @@ hydro_imgw_annual = function(year, coords = FALSE, value = "H", station = NULL,

# options(RCurlOptions = list(ssl.verifypeer = FALSE)) # required on windows for RCurl

check_locale()
translit = check_locale()

base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/"
interval = "semiannual_and_annual"
Expand Down Expand Up @@ -70,7 +71,13 @@ hydro_imgw_annual = function(year, coords = FALSE, value = "H", station = NULL,
#download.file(address, temp)
unzip(zipfile = temp, exdir = temp2)
file1 = paste(temp2, dir(temp2), sep = "/")[1]
data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")

if(translit){
data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1)))
} else {
data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
}

colnames(data1) = meta[[value]]$parameters
all_data[[i]] = data1
}
Expand Down Expand Up @@ -101,7 +108,7 @@ hydro_imgw_annual = function(year, coords = FALSE, value = "H", station = NULL,
}

all_data = all_data[order(all_data$`Nazwa stacji`, all_data$`Rok hydrologiczny`), ]
# dodanie opcji dla skracania kolumn i usuwania duplikatow:
# adding option for shortening column names and removing duplicates
all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)

return(all_data)
Expand Down
19 changes: 16 additions & 3 deletions R/hydro_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' @param ... other parameters that may be passed to the 'shortening' function that shortens column names
#' @importFrom XML readHTMLTable
#' @importFrom utils download.file unzip read.csv
#' @importFrom data.table fread
#' @export
#'
#' @examples \donttest{
Expand All @@ -21,7 +22,7 @@
hydro_imgw_daily = function(year, coords = FALSE, station = NULL, col_names= "short", ...){
#options(RCurlOptions = list(ssl.verifypeer = FALSE)) # required on windows for RCurl

check_locale()
translit = check_locale()

base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/"
interval = "daily"
Expand Down Expand Up @@ -68,7 +69,13 @@ hydro_imgw_daily = function(year, coords = FALSE, station = NULL, col_names= "sh
#download.file(address, temp)
unzip(zipfile = temp, exdir = temp2)
file1 = paste(temp2, dir(temp2), sep = "/")[1]
data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")

if(translit){
data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1)))
} else {
data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
}

colnames(data1) = meta[[1]][,1]
data=rbind(data,data1)
}
Expand All @@ -80,7 +87,13 @@ hydro_imgw_daily = function(year, coords = FALSE, station = NULL, col_names= "sh
test_url(address, temp)
unzip(zipfile = temp, exdir = temp2)
file2 = paste(temp2, dir(temp2), sep = "/")[1]
data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")

if(translit){
data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)))
} else {
data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
}

colnames(data2) = meta[[2]][, 1]

all_data[[i]] = merge(data, data2,
Expand Down
Loading

0 comments on commit 1c23d1f

Please sign in to comment.