diff --git a/R/ByChunks.R b/R/ByChunks.R index dd101120d1b394ba8cae6eca7f8bf1cb43688f8f..8185763bf5c31d388bdb28d5ab3873d8636f46d9 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -354,7 +354,17 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # Check all input headers have matching dimensions cube_index <- 1 for (cube_header in cube_headers) { - if (!all(attr(cube_header, 'Dimensions') == all_dims_merged[names(attr(cube_header, 'Dimensions'))])) { + + # Check if all the margin dims are consistent among datasets + if (!all(chunked_dims %in% names(attr(cube_header, "Dimensions")))) { + trouble_dim_name <- chunked_dims[which(!chunked_dims %in% + names(attr(cube_header, "Dimensions")))] + stop(paste0("Found margin dimension, ", toString(trouble_dim_name), + ", is not in input data ", cube_index, ".")) + } + + # Only check margin dimensions (i.e., chunked_dims) + if (!all(attr(cube_header, 'Dimensions')[chunked_dims] == all_dims_merged[names(attr(cube_header, 'Dimensions'))][chunked_dims])) { stop("All provided 'cube_headers' must have matching dimension lengths ", "with each other.") } diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 06f2c044eb64673cedaf1141a00c0cc69bc2b5b5..2d1ab9d9a3e7c031e050d4bff1d647c385f7ce0d 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -72,8 +72,9 @@ You can find more explanation in FAQ [How-to-20](inst/doc/faq.md#20-use-metadata 10. [Apply an existing mask on data](inst/doc/usecase/ex2_10_existing_mask.R) This use case shows you how to apply the existing mask file on your data. If you need to create the mask file on your own, go to ex2_9_mask.R. - - - + 11. [Two datasets with different length of target dimensions](inst/doc/usecase/ex2_11_two_dat_inconsistent_target_dim.R) + This use case uses experimental and the corresponding observational data to calculate +the temporal mean and spatial weighted mean. Notice that the spatial resolutions of the two +datasets are different, but it still works because lat and lon are target dimensions. diff --git a/inst/doc/usecase/ex2_11_two_dat_inconsistent_target_dim.R b/inst/doc/usecase/ex2_11_two_dat_inconsistent_target_dim.R new file mode 100644 index 0000000000000000000000000000000000000000..69267b06c60749b88c4ae0f1514ef0255e162955 --- /dev/null +++ b/inst/doc/usecase/ex2_11_two_dat_inconsistent_target_dim.R @@ -0,0 +1,125 @@ +# Author: Chihchung Chou, An-Chi Ho +# Date: 1st July 2021 +# ------------------------------------------------------------------ +# This use case uses experimental and the corresponding observational data to calculate +# the temporal mean and spatial weighted mean. Notice that the spatial resolutions of the +# two datasets are different, but it still works because lat and lon are target dimensions. +# ------------------------------------------------------------------ +library(startR) + + +# exp +repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', + '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') + +exp <- Start(dat = repos_exp, + var = 'tas', + sdate = as.character(c(2005:2008)), + time = indices(1:3), + lat = 'all', + lon = 'all', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'sdate'), + retrieve = FALSE) + +lons_exp <- as.vector(attr(exp, 'Variables')$common$lon) +lats_exp <- as.vector(attr(exp, 'Variables')$common$lat) +dates_exp <- attr(exp, 'Variables')$common$time + +attr(exp, 'Dimensions') +# dat var sdate time lat lon +# 1 1 4 3 256 512 +dates_exp +# [1] "2005-01-16 12:00:00 UTC" "2006-01-16 12:00:00 UTC" +# [3] "2007-01-16 12:00:00 UTC" "2008-01-16 12:00:00 UTC" +# [5] "2005-02-15 00:00:00 UTC" "2006-02-15 00:00:00 UTC" +# [7] "2007-02-15 00:00:00 UTC" "2008-02-15 12:00:00 UTC" +# [9] "2005-03-16 12:00:00 UTC" "2006-03-16 12:00:00 UTC" +#[11] "2007-03-16 12:00:00 UTC" "2008-03-16 12:00:00 UTC" + + +# obs + path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' + obs <- Start(dat = path.obs, + var = 'tas', + date = unique(format(dates_exp, '%Y%m')), + time = values(dates_exp), + time_across = 'date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + lat = 'all', + lon = 'all', + synonims = list(lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'date'), + retrieve = FALSE) + +lons_obs <- as.vector(attr(obs, 'Variables')$common$lon) +lats_obs <- as.vector(attr(obs, 'Variables')$common$lat) +dates_obs <- attr(obs, 'Variables')$common$time + + +attr(obs, 'Dimensions') +# data var sdate time lat lon +# 1 1 4 3 721 1440 +dates_obs +# [1] "2005-01-16 11:30:00 UTC" "2006-01-16 11:30:00 UTC" +# [3] "2007-01-16 11:30:00 UTC" "2008-01-16 11:30:00 UTC" +# [5] "2005-02-14 23:30:00 UTC" "2006-02-14 23:30:00 UTC" +# [7] "2007-02-14 23:30:00 UTC" "2008-02-15 11:30:00 UTC" +# [9] "2005-03-16 11:30:00 UTC" "2006-03-16 11:30:00 UTC" +#[11] "2007-03-16 11:30:00 UTC" "2008-03-16 11:30:00 UTC" + + +fun <- function(exp, obs, + lons_exp = lons_exp, lats_exp = lats_exp, + lons_obs = lons_obs, lats_obs = lats_obs) { + # exp + e <- s2dv::MeanDims(drop(exp), 'time') + sst.e <- ClimProjDiags::WeightedMean(e, lons_exp, lats_exp, + londim = which(names(dim(e)) == 'lon'), + latdim = which(names(dim(e)) == 'lat')) + index.exp <- (sst.e - mean(sst.e))/sd(sst.e) + + # obs + o <- s2dv::MeanDims(drop(obs), 'time') + sst.o <- ClimProjDiags::WeightedMean(o, lons_obs, lats_obs, + londim = which(names(dim(o)) == 'lon'), + latdim = which(names(dim(o)) == 'lat')) + index.obs <- (sst.o - mean(sst.o))/sd(sst.o) + + # give dim name + dim(index.exp) <- c(sdate = length(index.exp)) + dim(index.obs) <- c(sdate = length(index.obs)) + + return(list(ind_exp = index.exp, ind_obs = index.obs)) + +} + +# If ClimProjDiags::WeightedMean accepts two-dim input, 'sdate' can be margin dimension. +step <- Step(fun, + target_dims = list(exp = c('sdate', 'time', 'lat', 'lon'), + obs = c('sdate', 'time', 'lat', 'lon')), + output_dims = list(ind_exp = 'sdate', ind_obs = 'sdate')) + +workflow <- AddStep(list(exp = exp, obs = obs), step, + lons_exp = lons_exp, lats_exp = lats_exp, + lons_obs = lons_obs, lats_obs = lats_obs) + +res <- Compute(workflow$ind_exp, + chunks = list(var = 1)) + +str(res) +#List of 2 +# $ ind_exp: num [1:4, 1, 1] 1.195 0.422 -0.6 -1.017 +# $ ind_obs: num [1:4, 1, 1] 0.4642 0.0206 0.9123 -1.3971 +# ... +# ... + + diff --git a/inst/doc/usecase/ex2_4_two_func.R b/inst/doc/usecase/ex2_4_two_func.R index 4f3da94e554a396c29aa6f7a84646787ac103934..d4d1b5f14a6f1eb2a162f2becd80f50c06b1a559 100644 --- a/inst/doc/usecase/ex2_4_two_func.R +++ b/inst/doc/usecase/ex2_4_two_func.R @@ -14,38 +14,35 @@ library(startR) retrieve = FALSE) fun_deb3 <- function(x) { - source("/esarchive/scratch/nperez/Season_v2.R") lons_data = as.vector(attr(x, 'Variables')$dat1$longitude) lats_data = as.vector(attr(x, 'Variables')$dat1$latitude) - resgrid = "r360x180" # prlr - y = Season_v2(x, posdim = 'time', monini = 1, moninf = 1, monsup = 3) - r <- s2dverification::CDORemap(y, lons_data, lats_data, resgrid, - 'bil', crop = FALSE, force_remap = TRUE)[[1]] + resgrid = "r360x180" +# y <- s2dv::Season(x, time_dim = 'time', monini = 1, moninf = 1, monsup = 3) + y <- apply(x, c(1, 2), s2dv:::.Season, monini = 1, moninf = 1, monsup = 3) + r <- s2dv::CDORemap(y, lons_data, lats_data, resgrid, + 'bil', crop = FALSE, force_remap = TRUE)[[1]] return(r) } step4 <- Step(fun = fun_deb3, target_dims = c('latitude','longitude', 'time'), - output_dims = c('latitude', 'longitude', 'time'), + output_dims = c('latitude', 'longitude'), use_attributes = list(data = "Variables")) - wf4 <- AddStep(data, step4) + wf4 <- AddStep(list(data = data), step4) ## locally res4 <- Compute(workflow = wf4, chunks = list(ensemble = 2, sdate = 2)) - dim(res4$output1) - head(res4$output1) - summary(res4$output1) # ------------------------------------------------------------------ # Output: -#> dim(res4$output1) -# latitude longitude time dat var sdate ensemble -# 180 360 1 1 1 2 2 -#> head(res4$output1) +dim(res4$output1) +# latitude longitude dat var sdate ensemble +# 180 360 1 1 2 2 +head(res4$output1) #[1] 237.1389 237.2601 238.0882 238.0312 237.7883 238.4835 -#> summary(res4$output1) +summary(res4$output1) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 227.3 259.6 280.8 277.1 296.2 306.7 # ------------------------------------------------------------------ diff --git a/inst/doc/usecase/ex2_6_ext_param_func.R b/inst/doc/usecase/ex2_6_ext_param_func.R index 302b52a8697d7aaf394b2554fe89819941cb9732..8bd128fa6e1465cbb247bb093ad89b82ca2e9363 100644 --- a/inst/doc/usecase/ex2_6_ext_param_func.R +++ b/inst/doc/usecase/ex2_6_ext_param_func.R @@ -88,15 +88,15 @@ # Notice that the function uses rnorm() inside. So the results will be different. # ----------------------------------------------------------- -#names(res) +names(res) #[1] "strat" "t_test" -#> dim(res$strat) +dim(res$strat) # phase longitude latitude # 8 30 20 -#> summary(res$strat) +summary(res$strat) # Min. 1st Qu. Median Mean 3rd Qu. Max. #-0.133300 -0.032530 -0.001822 -0.005715 0.031700 0.094220 -#> res$strat[1:5, 1:2, 1] +res$strat[1:5, 1:2, 1] # [,1] [,2] #[1,] -0.04661354 -0.04661539 #[2,] -0.01058483 -0.01053589 diff --git a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R index f24fa42a45b4f2a4fad8505bdd74b6019608557c..8b746071ecff79ef898695fffb2c499ac4fa948d 100644 --- a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R +++ b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R @@ -33,7 +33,7 @@ func <- function(x, y) { - crps <- mean(SpecsVerification::EnsCrps(x, y, R.new = Inf)) + crps <- mean(SpecsVerification::EnsCrps(x, y, R.new = Inf), na.rm = T) return(crps) } step <- Step(func, target_dims = list(c('sdate', 'ensemble'), c('sdate')), @@ -100,7 +100,7 @@ # 1 1 1 256 512 summary(res$output1) # Min. 1st Qu. Median Mean 3rd Qu. Max. -#0.09941 0.37760 0.71640 0.83570 1.20300 6.23400 +#0.09882 0.37815 0.71648 0.83638 1.20353 6.23452 # Plotting diff --git a/inst/doc/usecase/ex2_9_mask.R b/inst/doc/usecase/ex2_9_mask.R index 13be87888875d37d123b26d81af25da4b9dc13d5..aca6162a34e01c400fd5c40f3d6731f64fa56539 100644 --- a/inst/doc/usecase/ex2_9_mask.R +++ b/inst/doc/usecase/ex2_9_mask.R @@ -107,15 +107,20 @@ wf_mask <- AddStep(list(data, mask), stepMask) res <- Compute(workflow = wf_mask, chunks = list(latitude = 2, longitude = 2)) + ##################################################################### # -------------------------------------------------------------------------------- ##################################################################### # Extra lines for output verification: # Output verification: - -summary(res$output1) dim(res$output1) +# var latitude longitude +# 1 42 40 +summary(res$output1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's +# 295.4 300.0 300.2 300.0 300.5 301.0 840 head(res$output1) +#[1] 300.0644 NA NA NA NA NA mask_loaded <- Start(dat = path, var = 'mask', @@ -125,8 +130,13 @@ mask_loaded <- Start(dat = path, longitude = 'dat'), retrieve = TRUE) summary(res$output1[mask_loaded == 0]) # All are NA's: correct +# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's +# NA NA NA NaN NA NA 840 summary(res$output1[mask_loaded == 1]) # There is no NA's: correct +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 295.4 300.0 300.2 300.0 300.5 301.0 sum(mask_loaded == 0) # The number of NA's are 840: correct +#[1] 840 # compare values: data_loaded <- Start(dat = repos, @@ -141,9 +151,13 @@ data_loaded <- Start(dat = repos, time = 'sdate'), retrieve = TRUE) mean(data_loaded[1,1, , , ,1,1]) +#[1] 300.0644 res$output1[1,1,1] +#[1] 300.0644 mean(data_loaded[1,1, , , ,1,2]) +#[1] 300.3169 res$output1[1,1,2] +#[1] 300.3169 out <- mask_loaded for (i in 1:(dim(data_loaded)['latitude'])) { diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R new file mode 100644 index 0000000000000000000000000000000000000000..241ab6e70404ca670238398b9fd28bf05eb3b119 --- /dev/null +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -0,0 +1,138 @@ +context("Compute()/ByChunks(): dimension consistence check") +# If dataset are more than 1 (e.g., exp and obs), ByChunks() checks if +# they have consistent dimensions in favor of Apply() computation. However, +# only margin dimensions need to be identical. Target dimensions can have +# different lengths. + +path.exp <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' + +var <- 'tos' +y1 <- 1981 +y2 <- 1983 + +lons.min <- 220 +lons.max <- 240 +lats.min <- -5 +lats.max <- 5 + +sdate <- paste0(y1:y2, '1201') + +suppressWarnings( +exp <- Start(data = path.exp, + var = var, + member = indices(1:2), + sdate = sdate, + time = 1:3, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude'), + member = c('member', 'ensemble')), + return_vars = list(longitude = NULL, + latitude = NULL, + time = 'sdate'), + retrieve = FALSE) +) +lons.exp <- attr(exp, 'Variables')$common$longitude +lats.exp <- attr(exp, 'Variables')$common$latitude +dates.exp <- attr(exp, 'Variables')$common$time + +# Manually create date/ime +dates.obs <- c(paste0(y1:y2, '1201'), + paste0((y1 + 1):(y2 + 1), '01', '01'), + paste0((y1 + 1):(y2 + 1), '02', '01')) +time.obs <- as.POSIXct(dates.obs, "%Y%m%d", + origin = "1981-12", tz = 'UTC') +dim(time.obs) <- c(dim(dates.exp)['sdate'], dim(dates.exp)['time']) + +suppressWarnings( +obs <- Start(data = path.obs, + var = var, + date = unique(format(time.obs, '%Y%m')), + time = values(time.obs), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + time_across = 'date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(longitude = NULL, + latitude = NULL, + time = 'date'), + retrieve = FALSE) +) + +lons.obs <- attr(obs, 'Variables')$common$longitude +lats.obs <- attr(obs, 'Variables')$common$latitude +dates.obs <- attr(obs, 'Variables')$common$time + +lons_exp <- as.vector(lons.exp) +lats_exp <- as.vector(lats.exp) +lons_obs <- as.vector(lons.obs) +lats_obs <- as.vector(lats.obs) + + +fun <- function(exp, obs, path.output, + lons_exp = lons_exp, lats_exp = lats_exp, + lons_obs = lons_obs, lats_obs = lats_obs) { + + e <- s2dv::MeanDims(drop(exp), c('member', 'time')) + sst.e <- ClimProjDiags::WeightedMean(e, lons_exp, lats_exp, + londim = which(names(dim(e)) == 'longitude'), + latdim = which(names(dim(e)) == 'latitude')) + index.exp <- (sst.e - mean(sst.e))/sd(sst.e) + + o <- s2dv::MeanDims(drop(obs), 'time') + sst.o <- ClimProjDiags::WeightedMean(o, lons_obs, lats_obs, + londim = which(names(dim(o)) == 'longitude'), + latdim = which(names(dim(o)) == 'latitude')) + index.obs <- (sst.o - mean(sst.o))/sd(sst.o) + + # give dim name + dim(index.exp) <- c(sdate = length(index.exp)) + dim(index.obs) <- c(sdate = length(index.obs)) + + return(list(ind_exp = index.exp, ind_obs = index.obs)) + +} + +step <- Step(fun, + target_dims = list(exp = c('member', 'sdate', 'time', 'latitude', 'longitude'), + obs = c('sdate', 'time', 'latitude', 'longitude')), + output_dims = list(ind_exp = 'sdate', ind_obs = 'sdate')) + +workflow <- AddStep(list(exp = exp, obs = obs), step, + lons_exp = lons_exp, lats_exp = lats_exp, + lons_obs = lons_obs, lats_obs = lats_obs) + +res <- Compute(workflow$ind_exp, + chunks = list(var = 1)) + + +expect_equal( +attr(exp, 'Dimensions'), +c(data = 1, var = 1, member = 2, sdate = 3, time = 3, latitude = 11, longitude = 21) +) +expect_equal( +attr(obs, 'Dimensions'), +c(data = 1, var = 1, sdate = 3, time = 3, latitude = 41, longitude = 81) +) +expect_equal( +names(res), +c('ind_exp', 'ind_obs') +) +expect_equal( +mean(res$ind_exp)*10^18, +-9.251859, +tolerance = 0.00001 +) +expect_equal( +mean(res$ind_obs)*10^15, +-9.584944, +tolerance = 0.00001 +) +