diff --git a/NAMESPACE b/NAMESPACE index 6da8d0c7ee74b297aab584a94a5431777f714d19..a08603d53576373e793cc91205c1d10ef9daf330 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(Corr) export(Eno) export(GMST) export(GSAT) +export(Histo2Hindcast) export(InsertDim) export(LeapYear) export(Load) diff --git a/R/Histo2Hindcast.R b/R/Histo2Hindcast.R new file mode 100644 index 0000000000000000000000000000000000000000..860f56b96bae59a9b06e0eff937a24d5df79668d --- /dev/null +++ b/R/Histo2Hindcast.R @@ -0,0 +1,161 @@ +#'Chunk long simulations for comparison with hindcasts +#' +#'Reorganize a long run (historical typically) with only one start date into +#'chunks corresponding to a set of start dates. The time frequency of the data +#'should be monthly. +#' +#'@param data A numeric array of model or observational data with dimensions +#' at least sdate_dim and ftime_dim. +#'@param sdatesin A character string of the start date of 'data'. The format +#' should be 'YYYYMMDD' or 'YYYYMM'. +#'@param sdatesout A vector of character string indicating the expected start +#' dates of the output. The format should be 'YYYYMMDD' or 'YYYYMM'. +#'@param nleadtimesout A positive integer indicating the length of leadtimes of +#' the output. +#'@param sdate_dim A character string indicating the name of the sdate date +#' dimension of 'data'. The default value is 'sdate'. +#'@param ftime_dim A character string indicating the name of the lead time +#' dimension of 'data'. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric array with the same dimensions as data, except the length +#' of sdate_dim is 'sdatesout' and the length of ftime_dim is nleadtimesout. +#' +#'@examples +#' \dontshow{ +#'startDates <- c('19901101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 60, +#' output = 'areave', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#' +#'sdates_out <- c('19901101', '19911101', '19921101', '19931101', '19941101') +#'leadtimes_per_startdate <- 12 +#'exp_data <- Histo2Hindcast(sampleData$mod, startDates, +#' sdates_out, leadtimes_per_startdate) +#'obs_data <- Histo2Hindcast(sampleData$obs, startDates, +#' sdates_out, leadtimes_per_startdate) +#' \dontrun{ +#'exp_data <- Reorder(exp_data, c(3, 4, 1, 2)) +#'obs_data <- Reorder(obs_data, c(3, 4, 1, 2)) +#'PlotAno(exp_data, obs_data, sdates_out, +#' toptitle = paste('Anomalies reorganized into shorter chunks'), +#' ytitle = 'K', fileout = NULL) +#' } +#' +#'@import multiApply +#'@export +Histo2Hindcast <- function(data, sdatesin, sdatesout, nleadtimesout, + sdate_dim = 'sdate', ftime_dim = 'ftime', + ncores = NULL) { + + ## Input Checks + # data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + # sdatesin + if (is.null(sdatesin)) { + stop("Parameter 'sdatesin' cannot be NULL.") + } + if (!is.character(sdatesin) | length(sdatesin) > 1) { + stop(paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.")) + } else if (!nchar(sdatesin) %in% c(6, 8) | is.na(as.numeric(sdatesin))) { + stop(paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.")) + } + # sdatesout + if (is.null(sdatesout)) { + stop("Parameter 'sdatesout' cannot be NULL.") + } + if (!is.character(sdatesout) | !is.vector(sdatesout)) { + stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.")) + } else if (!all(nchar(sdatesout) %in% c(6, 8)) | any(is.na(as.numeric(sdatesin)))) { + stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.")) + } + # nleadtimesout + if (is.null(nleadtimesout)) { + stop("Parameter 'nleadtimesout' cannot be NULL.") + } + if (!is.numeric(nleadtimesout) | nleadtimesout %% 1 != 0 | + nleadtimesout < 0 | length(nleadtimesout) > 1) { + stop("Parameter 'nleadtimesout' must be a positive integer.") + } + # sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) > 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(data))) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + if (dim(data)[sdate_dim] > 1) { + stop("The dimension length of sdate_dim of 'data' must be 1.") + } + # ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim(data))) { + stop("Parameter 'ftime_dim' is not found in 'data' dimension.") + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + yrin <- as.numeric(substr(sdatesin, 1, 4)) + yrout <- as.numeric(substr(sdatesout, 1, 4)) + mthin <- as.numeric(substr(sdatesin, 5, 6)) + if (mthin > 12) { + stop(paste0("Parameter 'sdatesin' must be in the format 'YYYYMMDD' or ", + "'YYYYMM'. Found the month is over 12.")) + } + mthout <- as.numeric(substr(sdatesout, 5, 6)) + if (any(mthout > 12)) { + stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'. Found certain month is over 12.")) + } + + res <- Apply(data, + target_dims = c(sdate_dim, ftime_dim), + output_dims = c(sdate_dim, ftime_dim), + fun = .Histo2Hindcast, + yrin = yrin, yrout = yrout, + mthin = mthin, mthout = mthout, + nleadtimesout = nleadtimesout, + ncores = ncores)$output1 + + return(res) + +} + +.Histo2Hindcast <- function(data, yrin = yrin, yrout = yrout, mthin = mthin, mthout = mthout, nleadtimesout) { + # data: [sdate = 1, ftime] + + res <- array(dim = c(sdate = length(yrout), ftime = nleadtimesout)) + + diff_mth <- (yrout - yrin) * 12 + (mthout - mthin) + for (i in 1:length(diff_mth)) { + if (diff_mth[i] < dim(data)[2]) { + ftime_ind <- max(1 + diff_mth[i], 1):min(nleadtimesout + diff_mth[i], dim(data)[2]) + res[i, 1:length(ftime_ind)] <- data[1, ftime_ind] + } + } + + return(res) +} diff --git a/man/Histo2Hindcast.Rd b/man/Histo2Hindcast.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a2bc5cac3a7ff241d3912edd6b0123f2f890d3d5 --- /dev/null +++ b/man/Histo2Hindcast.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Histo2Hindcast.R +\name{Histo2Hindcast} +\alias{Histo2Hindcast} +\title{Chunk long simulations for comparison with hindcasts} +\usage{ +Histo2Hindcast( + data, + sdatesin, + sdatesout, + nleadtimesout, + sdate_dim = "sdate", + ftime_dim = "ftime", + ncores = NULL +) +} +\arguments{ +\item{data}{A numeric array of model or observational data with dimensions +at least sdate_dim and ftime_dim.} + +\item{sdatesin}{A character string of the start date of 'data'. The format +should be 'YYYYMMDD' or 'YYYYMM'.} + +\item{sdatesout}{A vector of character string indicating the expected start +dates of the output. The format should be 'YYYYMMDD' or 'YYYYMM'.} + +\item{nleadtimesout}{A positive integer indicating the length of leadtimes of +the output.} + +\item{sdate_dim}{A character string indicating the name of the sdate date +dimension of 'data'. The default value is 'sdate'.} + +\item{ftime_dim}{A character string indicating the name of the lead time +dimension of 'data'. The default value is 'ftime'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric array with the same dimensions as data, except the length + of sdate_dim is 'sdatesout' and the length of ftime_dim is nleadtimesout. +} +\description{ +Reorganize a long run (historical typically) with only one start date into +chunks corresponding to a set of start dates. The time frequency of the data +should be monthly. +} +\examples{ + \dontshow{ +startDates <- c('19901101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 60, + output = 'areave', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } + +sdates_out <- c('19901101', '19911101', '19921101', '19931101', '19941101') +leadtimes_per_startdate <- 12 +exp_data <- Histo2Hindcast(sampleData$mod, startDates, + sdates_out, leadtimes_per_startdate) +obs_data <- Histo2Hindcast(sampleData$obs, startDates, + sdates_out, leadtimes_per_startdate) + \dontrun{ +exp_data <- Reorder(exp_data, c(3, 4, 1, 2)) +obs_data <- Reorder(obs_data, c(3, 4, 1, 2)) +PlotAno(exp_data, obs_data, sdates_out, + toptitle = paste('Anomalies reorganized into shorter chunks'), + ytitle = 'K', fileout = NULL) + } + +} diff --git a/tests/testthat/test-Histo2Hindcast.R b/tests/testthat/test-Histo2Hindcast.R new file mode 100644 index 0000000000000000000000000000000000000000..025f0035677e712d259954b11fe009353bb19626 --- /dev/null +++ b/tests/testthat/test-Histo2Hindcast.R @@ -0,0 +1,183 @@ +context("s2dv::Histo2Hindcast tests") + +############################################## +# dat1 +set.seed(1) +dat1 <- array(rnorm(24), dim = c(sdate = 1, ftime = 24)) +sdatesin1 <- '19901101' +sdatesout1 <- c('19901101', '19911101') +nleadtimesout1 <- 12 + +# dat2 +set.seed(1) +dat2 <- array(rnorm(288), dim = c(dat = 1, member = 2, sdate = 1, ftime = 24, lat = 2, lon = 3)) +sdatesin2 <- '19901101' +sdatesout2 <- c('19901101', '19911101') +nleadtimesout2 <- 12 + +############################################## +test_that("1. Input checks", { + + # dat + expect_error( + Histo2Hindcast(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Histo2Hindcast(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + # sdatesin + expect_error( + Histo2Hindcast(dat1, c()), + "Parameter 'sdatesin' cannot be NULL." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = '1999'), + paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.") + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = c('19991101', '19991201')), + paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.") + ) + # sdatesout + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, c()), + "Parameter 'sdatesout' cannot be NULL." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = 1999:2000), + paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.") + ) + # nleadtimesout + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, c()), + "Parameter 'nleadtimesout' cannot be NULL." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = c(10, 12)), + "Parameter 'nleadtimesout' must be a positive integer." + ) + # sdate_dim + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, sdate_dim = 1), + "Parameter 'sdate_dim' must be a character string." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, sdate_dim = 'time'), + "Parameter 'sdate_dim' is not found in 'data' dimension." + ) + expect_error( + Histo2Hindcast(array(1:10, dim = c(sdate = 2, ftime = 5)), + sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1), + "The dimension length of sdate_dim of 'data' must be 1." + ) + # ftime_dim + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, ftime_dim = 2), + "Parameter 'ftime_dim' must be a character string." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, ftime_dim = 'time'), + "Parameter 'ftime_dim' is not found in 'data' dimension." + ) + # ncores + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + dim(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + c(sdate = 2, ftime = 12) + ) + expect_equal( + mean(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + 0.1498669, + tolerance = 0.00001 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[1, 5:7], + c(0.3295078, -0.8204684, 0.4874291), + tolerance = 0.00001 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[2, 5:7], + c(-0.01619026, 0.94383621, 0.82122120), + tolerance = 0.00001 + ) + +sdatesout1 <- c('19901101', '19910101') +nleadtimesout1 <- 6 + + expect_equal( + dim(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + c(sdate = 2, ftime = 6) + ) + expect_equal( + mean(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + 0.1100272, + tolerance = 0.00001 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[1, 3:5], + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[2, 1:3], + tolerance = 0.00001 + ) + + +sdatesout1 <- c('19901101', '19911101') +nleadtimesout1 <- 15 + + expect_equal( + mean(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1), na.rm = T), + 0.06984426, + tolerance = 0.00001 + ) + expect_equal( + length(which(is.na(Histo2Hindcast(dat1, sdatesin = sdatesin1, + sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)))), + 3 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[1, 13:15], + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[2, 1:3] + ) + +}) +############################################## +test_that("3. dat2", { + + expect_equal( + dim(Histo2Hindcast(dat2, sdatesin = sdatesin2, sdatesout = sdatesout2, + nleadtimesout = nleadtimesout2)), + c(sdate = 2, ftime = 12, dat = 1, member = 2, lat = 2, lon = 3) + ) + +})