diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a3ac61827b2c1da67b69c948f88e5b25cd186f16..4fa82672a27c9928b74f3fd72f89ef2bff86faa2 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [arrow] pull_request: - branches: [main, master] + branches: [arrow] name: R-CMD-check diff --git a/DESCRIPTION b/DESCRIPTION index be7b5c4539f1e637aa28eed304b7baa8dbb64519..34071ea2a27c09e022273c5314e59c8f82284b27 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,13 +14,14 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Imports: checkmate, - DBI, - duckdb, + rlang, + dplyr, + arrow, ggplot2, - glue, knitr, lobstr, - rmarkdown + rmarkdown, + magrittr Suggests: testthat (>= 3.0.0), spelling diff --git a/NAMESPACE b/NAMESPACE index 00067754fb832869e832358f7b406dee8d7734f6..452ee49f573200d6a529a11f51aa6515c57a0755 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,10 @@ # Generated by roxygen2: do not edit by hand +export("%>%") export(create_report) export(fetch_data) export(product_info) import(ggplot2) import(knitr) +importFrom(magrittr,"%>%") +importFrom(rlang,.data) diff --git a/R/fetch_data.R b/R/fetch_data.R index 26a5441be60851ddbd256dd923dff29ebac6880d..ccb299b31cc1a988b51ea999927a2e10d1170a0c 100644 --- a/R/fetch_data.R +++ b/R/fetch_data.R @@ -15,6 +15,7 @@ #' #' @return A `data.frame` with date and value columns. #' @export +#' @importFrom rlang .data #' #' @examples #' \donttest{ @@ -35,6 +36,11 @@ fetch_data <- function(code_muni, product, indicator, statistics, date_start, da checkmate::assert_date(date_start) checkmate::assert_date(date_end, lower = date_start) + # Check Arrow capabilities + if(!arrow::arrow_with_gcs()){ + stop("Your {arrow} package installation do not have GCS capabilities. Refer to https://arrow.apache.org/docs/r/articles/install.html") + } + # Argument check and retrieve product info if(product == "brdwgd"){ # Check indicator @@ -45,6 +51,10 @@ fetch_data <- function(code_muni, product, indicator, statistics, date_start, da # Retrieve indicator info indi_info <- brclimr::brdwgd_data[[indicator]] + + # Product bucket + bucket <- arrow::gs_bucket("brclim-brdwgd", anonymous = TRUE) + } else if(product == "terraclimate"){ # Check indicator checkmate::assert_choice( @@ -54,10 +64,10 @@ fetch_data <- function(code_muni, product, indicator, statistics, date_start, da # Retrive indicator info indi_info <- brclimr::terraclimate_data[[indicator]] - } - # Retrive indicator link - indi_link <- indi_info[["link"]] + # Product bucket + bucket <- arrow::gs_bucket("brclim-terraclimate", anonymous = TRUE) + } # Check statistics checkmate::assert_choice( @@ -69,27 +79,19 @@ fetch_data <- function(code_muni, product, indicator, statistics, date_start, da indi_statname <- indi_info[["stats"]][[statistics]] - # Create duckdb connection - conn <- DBI::dbConnect(duckdb::duckdb()) - - # Install and load httpfs - DBI::dbExecute(conn, "INSTALL httpfs;") - DBI::dbExecute(conn, "LOAD httpfs;") - - # Fetch and return data - res <- DBI::dbGetQuery( - conn, - glue::glue("SELECT date, value - FROM '{indi_link}' - WHERE (code_muni = {code_muni} AND - date >= '{date_start}' AND - date <= '{date_end}' AND - name = '{indi_statname}')") - ) + # Open dataset + dataset <- arrow::open_dataset(sources = bucket, partitioning = indicator) - # Disconnect database - DBI::dbDisconnect(conn, shutdown = TRUE) + # Query dataset + cod <- code_muni + res <- dataset %>% + dplyr::filter(indicator == indicator) %>% + dplyr::filter(.data$name == indi_statname) %>% + dplyr::filter(.data$date >= date_start & .data$date <= date_end) %>% + dplyr::filter(.data$code_muni == cod) %>% + dplyr::select(.data$date, .data$value) %>% + dplyr::collect() - # Return falues + # Return values return(res) } diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 0000000000000000000000000000000000000000..fd0b1d13db4ff91b7f836f72b7d5d88d958f6e1f --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,14 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL diff --git a/README.Rmd b/README.Rmd index 75711a9bf851e01ff00dd72442e82a557cee2fe6..fd4cc1bab7699b91d8dace0f7d60511518706251 100644 --- a/README.Rmd +++ b/README.Rmd @@ -29,27 +29,19 @@ Details about the used methodology to calculate the zonal statistics are availab ## Installation -### Linux and MacOS - -#### Stable version +### Stable version ```{r eval=FALSE} install.packages("brclimr") ``` -#### Development version - -```{r eval=FALSE} -remotes::install_github(repo = "rfsaldanha/brclimr") -``` - -### Windows +### Development version ```{r eval=FALSE} -remotes::install_github(repo = "rfsaldanha/brclimr", ref = "arrow") +remotes::install_github(repo = "rfsaldanha/brclimr", ref = "duckdb") ``` -Note: due to an [issue](https://github.com/rfsaldanha/brclimr/issues/1) with a package dependency, we adopted a different approach to query parquet files on S3 buckets under Windows OS. This solution is slower, and we expect to archive this branch as soon as the issue is resolved. +Note: the stable version adopted a universal solution for querying Parquet files that works on all operating systems but it is slower. A development version is available for Linux and MacOS with a faster solution. More details on this [issue](https://github.com/rfsaldanha/brclimr/issues/1). ## Example diff --git a/README.md b/README.md index 33e5b5f1a13e7e2844b15314762b63aa5adcae15..cdfaa737ca72afe46979633d74168f5e7d7ef4bb 100644 --- a/README.md +++ b/README.md @@ -29,30 +29,23 @@ available at *Articles \> Methodology*. ## Installation -### Linux and MacOS - -#### Stable version +### Stable version ``` r install.packages("brclimr") ``` -#### Development version +### Development version ``` r -remotes::install_github(repo = "rfsaldanha/brclimr") +remotes::install_github(repo = "rfsaldanha/brclimr", ref = "duckdb") ``` -### Windows - -``` r -remotes::install_github(repo = "rfsaldanha/brclimr", ref = "arrow") -``` - -Note: due to an [issue](https://github.com/rfsaldanha/brclimr/issues/1) -with a package dependency, we adopted a different approach to query -parquet files on S3 buckets under Windows OS. This solution is slower, -and we expect to archive this branch as soon as the issue is resolved. +Note: the stable version adopted a universal solution for querying +Parquet files that works on all operating systems but it is slower. A +development version is available for Linux and MacOS with a faster +solution. More details on this +[issue](https://github.com/rfsaldanha/brclimr/issues/1). ## Example @@ -72,13 +65,15 @@ fetch_data( date_start = as.Date("2010-10-15"), date_end = as.Date("2010-10-20") ) -#> date value -#> 1 2010-10-15 74.48010 -#> 2 2010-10-16 73.53403 -#> 3 2010-10-17 77.84841 -#> 4 2010-10-18 90.10590 -#> 5 2010-10-19 74.33522 -#> 6 2010-10-20 71.50061 +#> # A tibble: 6 × 2 +#> date value +#> +#> 1 2010-10-15 74.5 +#> 2 2010-10-16 73.5 +#> 3 2010-10-17 77.8 +#> 4 2010-10-18 90.1 +#> 5 2010-10-19 74.3 +#> 6 2010-10-20 71.5 ``` If you need to query several municipalities, indicators and zonal @@ -145,7 +140,7 @@ ggplot(data = rbind(tmax, tmin), aes(x = date, y = value, color = name)) + theme(legend.position = "bottom", legend.direction = "horizontal") ``` - + ``` r ggplot(data = pr, aes(x = date, y = value)) + @@ -162,4 +157,4 @@ ggplot(data = pr, aes(x = date, y = value)) + theme(legend.position = "bottom", legend.direction = "horizontal") ``` - + diff --git a/data-raw/brdwgd_data.R b/data-raw/brdwgd_data.R index c8353424f3673740629047112fee9c61faa30d23..2697bb29be60648439230eb48a1a1541826c964c 100644 --- a/data-raw/brdwgd_data.R +++ b/data-raw/brdwgd_data.R @@ -2,7 +2,6 @@ brdwgd_data <- list( "tmax" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Ftmax.parquet", "name" = "Maximum temperature", "unit" = "\u00B0C", "date_range" = "Daily, 1961-01-01 to 2020-07-31", @@ -14,7 +13,6 @@ brdwgd_data <- list( ) ), "tmin" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Ftmin.parquet", "name" = "Minimum temperature", "unit" = "\u00B0C", "date_range" = "Daily, 1961-01-01 to 2020-07-31", @@ -26,7 +24,6 @@ brdwgd_data <- list( ) ), "pr" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Fpr.parquet", "name" = "Precipitation", "unit" = "mm", "date_range" = "Daily, 1961-01-01 to 2020-07-31", @@ -39,7 +36,6 @@ brdwgd_data <- list( ) ), "eto" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Feto.parquet", "name" = "Evapotranspiration", "unit" = "mm", "date_range" = "Daily, 1961-01-01 to 2020-07-31", @@ -52,7 +48,6 @@ brdwgd_data <- list( ) ), "rh" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Frh.parquet", "name" = "Relative humidity", "unit" = "%", "date_range" = "Daily, 1961-01-01 to 2020-07-31", @@ -64,7 +59,6 @@ brdwgd_data <- list( ) ), "rs" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Frs.parquet", "name" = "Solar radiation", "unit" = "MJ/m2", "date_range" = "Daily, 1961-01-01 to 2020-07-31", @@ -76,7 +70,6 @@ brdwgd_data <- list( ) ), "u2" = list( - "link" = "https://brdwgd.nyc3.cdn.digitaloceanspaces.com/parquet%2Fu2.parquet", "name" = "Wind speed", "unit" = "m/2", "date_range" = "Daily, 1961-01-01 to 2020-07-31", diff --git a/data-raw/terraclimate_data.R b/data-raw/terraclimate_data.R index 97fb360a586084c4136efdbd62c53f6e1dd27a2d..7d1a369d2acec0d66d9da39ff3f45038fa85e9f6 100644 --- a/data-raw/terraclimate_data.R +++ b/data-raw/terraclimate_data.R @@ -2,7 +2,6 @@ terraclimate_data <- list( "tmax" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Ftmax.parquet", "name" = "Maximum temperature", "detail" = "Average for month", "unit" = "\u00B0C", @@ -15,7 +14,6 @@ terraclimate_data <- list( ) ), "tmin" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Ftmin.parquet", "name" = "Minimum temperature", "detail" = "Average for month", "unit" = "\u00B0C", @@ -28,7 +26,6 @@ terraclimate_data <- list( ) ), "ppt" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fppt.parquet", "name" = "Precipitation", "detail" = "Monthly total", "unit" = "mm", @@ -42,7 +39,6 @@ terraclimate_data <- list( ) ), "aet" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Faet.parquet", "name" = "Actual Evapotranspiration", "detail" = "Monthly total", "unit" = "mm", @@ -56,7 +52,6 @@ terraclimate_data <- list( ) ), "def" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fdef.parquet", "name" = "Climate Water Deficit", "detail" = "Monthly total", "unit" = "mm", @@ -70,7 +65,6 @@ terraclimate_data <- list( ) ), "pdsi" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fpdsi.parquet", "name" = "Palmer Drought Severity Index", "detail" = "At end of month", "unit" = "unitless", @@ -83,7 +77,6 @@ terraclimate_data <- list( ) ), "pet" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fpet.parquet", "name" = "Potential evapotranspiration", "detail" = "Monthly total", "unit" = "mm", @@ -97,7 +90,6 @@ terraclimate_data <- list( ) ), "q" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fq.parquet", "name" = "Runoff", "detail" = "Monthly total", "unit" = "mm", @@ -111,7 +103,6 @@ terraclimate_data <- list( ) ), "soil" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fsoil.parquet", "name" = "Soil Moisture", "detail" = "Total column, at end of month", "unit" = "mm", @@ -125,7 +116,6 @@ terraclimate_data <- list( ) ), "srad" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fsrad.parquet", "name" = "Downward surface shortwave radiation", "unit" = "W/m2", "date_range" = "Monthly, 1958-01 to 2021-12", @@ -138,7 +128,6 @@ terraclimate_data <- list( ) ), "swe" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fswe.parquet", "name" = "Snow water equivalent", "detail" = "At end of month", "unit" = "mm", @@ -152,7 +141,6 @@ terraclimate_data <- list( ) ), "vap" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fvap.parquet", "name" = "Vapor pressure", "detail" = "Average for month", "unit" = "kPa", @@ -165,7 +153,6 @@ terraclimate_data <- list( ) ), "vpd" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fvpd.parquet", "name" = "Vapor Pressure Deficit", "detail" = "Average for month", "unit" = "kPa", @@ -178,7 +165,6 @@ terraclimate_data <- list( ) ), "ws" = list( - "link" = "https://terraclimate.nyc3.cdn.digitaloceanspaces.com/parquet%2Fws.parquet", "name" = "Wind speed", "detail" = "Average for month", "unit" = "m/s", diff --git a/data/brdwgd_data.rda b/data/brdwgd_data.rda index 1666fce360d388057cf02905b29dd8621df1ea50..7a751a23f06fa7942050c0e6399f3ac9a4099126 100644 Binary files a/data/brdwgd_data.rda and b/data/brdwgd_data.rda differ diff --git a/data/terraclimate_data.rda b/data/terraclimate_data.rda index ad5bb32ace0966caae6bf0de6d9f652f3d95abf8..8f6c260bcb66d532aabf4ec11ce663659b945f3a 100644 Binary files a/data/terraclimate_data.rda and b/data/terraclimate_data.rda differ diff --git a/man/figures/README-unnamed-chunk-8-1.png b/man/figures/README-unnamed-chunk-8-1.png index 77a54858d02a661f3d3e15976cc5af9c7d8b347f..b29408fe8490ec4223f7776f75a566b08152416b 100644 Binary files a/man/figures/README-unnamed-chunk-8-1.png and b/man/figures/README-unnamed-chunk-8-1.png differ diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a648c2969b222841abe76fb2e13c62c351078b2e --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling \code{rhs(lhs)}. +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/tests/testthat/test-fetch_data.R b/tests/testthat/test-fetch_data.R index 1beb84981eae4ba6bef138368b0a5266c20a058f..11c9c33fa03d90f7fc5a9f485b44bbd29d079fd7 100644 --- a/tests/testthat/test-fetch_data.R +++ b/tests/testthat/test-fetch_data.R @@ -10,7 +10,7 @@ test_that("fetch data from brdwgd works", { date_end = as.Date("2010-10-20") ) - expect_equal(class(res), "data.frame") + expect_true("data.frame" %in% class(res)) }) test_that("fetch data from terraclimate works", { @@ -25,5 +25,5 @@ test_that("fetch data from terraclimate works", { date_end = as.Date("2010-03-01") ) - expect_equal(class(res), "data.frame") + expect_true("data.frame" %in% class(res)) })