From c3650910b776058c41c078f67296659999db8a83 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 11 May 2019 16:12:54 +0200 Subject: [PATCH 01/26] First complete version of the MultiEOFs function --- R/CST_MultiEOF.R | 334 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 R/CST_MultiEOF.R diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R new file mode 100644 index 00000000..0c260042 --- /dev/null +++ b/R/CST_MultiEOF.R @@ -0,0 +1,334 @@ +#' @rdname CST_MultiEOF +#' @title EOF analysis of multiple variables +#' +#' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +#' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +#' +#' @description This function performs EOF analysis over multiple variables, +#' accepting in input a list of CSTools objects. Based on Singular Value Decomposition. +#' +#' @param datalist A list of objects of the class 's2dv_cube', containing the variables to be analysed. +#' Each data object in the list is expected to have an element named \code{$data} with at least two +#' spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". +#' @param neof_composed Number of composed eofs to return in output +#' @param neof_max Maximum number of single eofs considered in the first decomposition +#' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) +#' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) +#' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), +#' \code{$variance} (a matrix of explained variances), +#' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +#' @import abind +#' @export +#' @examples +#' \donttest{ +#' exp <- 1 : (2 * 3 * 4 * 8 * 8) +#' } + +CST_MultiEOFs <- function(datalist, + neof_max = 40, neof_composed = 5, + xlim = c(-180, 180), ylim = c(20, 85)) { + print("Pasting data...") + exp <- abind(lapply(datalist, '[[', 'exp'), along = 0) + dim(exp) <- c(var=length(datalist), dim(datalist[[1]]$exp)) + print("...done") + result <- MultiEOFs(exp, datalist[[1]]$lon, datalist[[1]]$lat, + datalist[[1]]$Dates$start, + time_dim=c("ftime", "sdate"), + neof=neof, neof_composed = neof_composed, + xlim = xlim, ylim = ylim) + + return(result) +} + +#' @rdname MultiEOF +#' @title EOF analysis of multiple variables starting from an array (reduced version) +#' +#' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +#' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +#' +#' @description This function performs EOF analysis over multiple variables, accepting in +#' input an arry with a dimension \code{"var"} for each variable to analyse. +#' Based on Singular Value Decomposition. +#' +#' @param datalist A multidimensional array with dimension \code{"var"}, +#' containing the variables to be analysed. The other diemnsions follow the same structure as the +#' \code{"exp"} element of a 's2dv_cube' object. +#' @param lon Vector of longitudes. +#' @param lat Vector of latitudes. +#' @param time Vector or matrix of dates in POSIXct format. +#' @param time_dim Character vector of dimension names to be considered as times +#' @param lon_dim String with dimension name of longitudinal coordinate (default: "lon") +#' @param lat_dim String with dimension name of latitudinal coordinate (default: "lat") +#' @param neof_composed Number of composed eofs to return in output +#' @param neof_max Maximum number of single eofs considered in the first decomposition +#' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) +#' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) +#' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), +#' \code{$variance} (a matrix of explained variances), +#' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +#' @import multiApply +#' @export +#' @examples +#' # Example for the 'reduced' MultiEOFs function +#' +#' \donttest{ +#' } + +MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, + lon_dim = "lon", lat_dim = "lat", + neof_max = 40, neof_composed = 5, + xlim = c(-180, 180), ylim = c(20, 85)) { + # Check/detect time_dim + if (is.null(time_dim)) { + time_dim_names <- c("ftime", "sdate", "time") + time_dim_num <- which(time_dim_names %in% names(dim(data))) + if (length(time_dim_num) > 0) { + # Find time dimension with length > 1 + ilong <- which(dim(data)[time_dim_names[time_dim_num]] > 1) + if (length(ilong) > 0) { + time_dim <- time_dim_names[time_dim_num[ilong[1]]] + } else { + stop("No time dimension longer than one found.") + } + } else { + stop("Could not automatically detect a target time dimension ", + "in the provided data in 'data'.") + } + print(paste("Selected time dim: ", time_dim)) + } + +# reorder and group time_dim together at the end + cdim0 <- dim(data) + imask <- names(cdim0) %in% time_dim + data <- .aperm2(data, c(which(!imask), which(imask))) + cdim <- dim(data) + ind <- 1:length(which(!imask)) + # compact (multiply) time_dim dimensions + dim(data) <- c(cdim[ind], samples = prod(cdim[-ind])) + +#print("str data") +# print(str(data)) +# print(names(dim(data))) +#print("str time") +# print(str(time)) +# print(names(dim(time))) + + # Repeatedly apply .multi.eofs + result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), + .multi.eofs, lon, lat, time, neof_max = neof_max, + neof_composed = neof_composed, + xlim = xlim, ylim = ylim) + + return(result) +} + + +#' Atomic MultiEOFs +#' @param field_arr_raw an array of dimension: (n_field, lon, lat, time). +#' where n_field is the number of variables over which to calculate +#' the multi_eofs. +#' @param neof_composed Number of composed eofs to return in output +#' @param neof_max Maximum number of single eofs considered in the first decomposition +#' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) +#' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) +#' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), +#' \code{$variance} (a matrix of explained variances), +#' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +#' @noRd + +.multi.eofs <- function(field_arr_raw, lon, lat, time, + neof_max = 40, neof_composed = 5, + xlim = c(-180, 180), ylim = c(20, 85)) { + + if (exists(".lm.fit")) { + lin.fit <- .lm.fit + } else { + lin.fit <- lm.fit + } + + n_field <- dim(field_arr_raw)[1] + + etime <- power.date.new(time) + print("Calculating anomalies...") + field_arr <- array(dim = dim(field_arr_raw)) + for (k in seq(1, n_field, 1)) { + field_arr[k, , , ] <- daily.anom.mean( + lon, lat, field_arr_raw[k, , , ], etime + ) + } + + # area weighting, based on the root of cosine + print("Area Weighting...") + ww <- area.weight(lon, lat, root = T) + + for (k in seq(1, n_field, 1)) { + field <- field_arr[k, , , ] + # calculate the area weight + wwfield <- sweep(field, c(1, 2), ww, "*") + + # selection of the box + box <- wwfield[whicher(lon, xlim[1]):whicher(lon, xlim[2]), + whicher(lat, ylim[1]):whicher(lat, ylim[2]), ] + slon <- lon[whicher(lon, xlim[1]):whicher(lon, xlim[2])] + slat <- lat[whicher(lat, ylim[1]):whicher(lat, ylim[2])] + + # transform 3D field in a matrix + new_box <- array(box, dim = c(dim(box)[1] * dim(box)[2], dim(box)[3])) + + # calling SVD + print("Calling SVD...") + SVD <- svd(new_box, nu = neof_max, nv = neof_max) + + # extracting EOFs (loading pattern), expansions coefficient + # and variance explained + pattern <- array(SVD$u, dim = c(dim(box)[1], dim(box)[2], neof_max)) + coefficient <- SVD$v + variance <- (SVD$d[1:neof_max])^2 / sum((SVD$d)^2) + print("Accumulated variance:") + print(cumsum(variance)) + reqPC <- which(cumsum(variance) > minvar)[1] + print("Number of EOFs needed for var:") + print(reqPC) + variance <- variance[1:reqPC] + coefficient <- coefficient[, 1:reqPC] + if (reqPC == 1) { + coefficient <- replicate(1, coefficient) + } + coefficient <- apply(coefficient, c(2), standardize) + regression <- array(NA, dim = c(length(lon), length(lat), neof_max)) + for (i in 1:reqPC) { + regression[, , i] <- apply(get(paste0("field", k)), c(1, 2), + function(x) lin.fit(as.matrix(coefficient[, i], + ncol = 1), x)$coefficients + ) + } + + assign( + paste0("pc", k), list(coeff = coefficient, variance = variance, + wcoeff = sweep(coefficient, c(2), variance, "*"), + regression = regression) + ) + } + + newpc <- NULL + for (k in seq(1, n_field, 1)) { + newpc <- cbind(newpc, get(paste0("pc", k))$wcoeff) + } + newpc <- t(newpc) + + print("Calculating composed EOFs") + SVD <- svd(newpc, nu = neof_composed, nv = neof_composed) + # extracting EOFs, expansions coefficient and variance explained + coefficient <- SVD$v + variance <- (SVD$d[1:(neof_composed)]) ^ 2 / sum( (SVD$d) ^ 2) + coefficient <- apply(coefficient, c(2), standardize) + + # linear regressions on anomalies + regression <- array(dim = c(n_field, length(lon), + length(lat), neof_composed)) + for (k in seq(1, n_field, 1)) { + print("Linear Regressions (it can take a while)... ") + for (i in 1:neof_composed) { + regression[k, , , i] <- apply( + field_arr[k, , , ], c(1, 2), + function(x) lin.fit( + as.matrix(coefficient[, i], + ncol = 1), + x)$coefficients + ) + } + } + + # preparing output + # pattern=list(x=slon,y=slat,z=pattern) + # out=list(pattern=pattern,coeff=coefficient,variance=variance,regression=regression) + + print("Finalize...") + + out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) + + return(out) +} + + +# new function to create simple list with date values - Oct-18 +# it needs a date or PCICt object, and returns also the season subdivision +power.date.new <- function(datas, verbose = FALSE) { + + # create a "season" for continuous time, used by persistance tracking + startpoints <- c(0, which(diff(datas) > 1)) + #print(startpoints) + deltapoints <- diff(c(startpoints, length(datas))) + seas <- inverse.rle(list(lengths = deltapoints, values = seq(1, length(startpoints)))) + + etime <- list( + day = as.numeric(format(datas, "%d")), month = as.numeric(format(datas, "%m")), + year = as.numeric(format(datas, "%Y")), data = datas, season = seas + ) + + printv("Time Array Built", verbose) + printv(paste("Length:", length(seas)), verbose) + printv(paste("From", datas[1], "to", datas[length(seas)]), verbose) + return(etime) +} + + +# function for daily anomalies, use array predeclaration and rowMeans (40 times faster!) +daily.anom.mean <- function(ics, ipsilon, field, etime) { + condition <- paste(etime$day, etime$month) + daily <- array(NA, dim = c(length(ics), length(ipsilon), length(unique(condition)))) + anom <- field * NA + + for (t in unique(condition)) { + if (sum(t == condition) == 1) { + print("Cannot compute a mean with a single value: using climatological mean") + #anom <- sweep(field, 1:2, apply(field, 1:2, mean), "-") + anom[, , which(t == condition)] <- rowMeans(field, dims = 2) + } else { + daily[, , which(t == unique(condition))] <- rowMeans(field[, , t == condition], dims = 2) + anom[, , which(t == condition)] <- sweep(field[, , which(t == condition)], c(1, 2), daily[, , which(t == unique(condition))], "-") + } + } + return(anom) +} + + +# produce a 2d matrix of area weights +area.weight <- function(ics, ipsilon, root = T) { + field <- array(NA, dim = c(length(ics), length(ipsilon))) + if (root == T) { + for (j in 1:length(ipsilon)) { + field[, j] <- sqrt(cos(pi / 180 * ipsilon[j])) + } + } + + if (root == F) { + for (j in 1:length(ipsilon)) { + field[, j] <- cos(pi / 180 * ipsilon[j]) + } + } + + return(field) +} + +# verbose-only printing function +printv <- function(value, verbosity = TRUE) { + if (verbosity) { + print(value) + } +} + + +# detect ics ipsilon lat-lon +whicher <- function(axis, number) { + out <- which.min(abs(axis - number)) + return(out) +} + + +# normalize a time series +standardize <- function(timeseries) { + out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) + return(out) +} + -- GitLab From 39e13fe41fcb7f8b544905b2f3aeb3d3b56b3dd3 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 11 May 2019 16:50:46 +0200 Subject: [PATCH 02/26] lint cleanup --- R/CST_MultiEOF.R | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 0c260042..b5567721 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -29,18 +29,18 @@ CST_MultiEOFs <- function(datalist, xlim = c(-180, 180), ylim = c(20, 85)) { print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'exp'), along = 0) - dim(exp) <- c(var=length(datalist), dim(datalist[[1]]$exp)) + dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$exp)) print("...done") result <- MultiEOFs(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, - time_dim=c("ftime", "sdate"), - neof=neof, neof_composed = neof_composed, + time_dim = c("ftime", "sdate"), + neof = neof, neof_composed = neof_composed, xlim = xlim, ylim = ylim) return(result) } -#' @rdname MultiEOF +#' @rdname MultiEOFs #' @title EOF analysis of multiple variables starting from an array (reduced version) #' #' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} @@ -75,7 +75,7 @@ CST_MultiEOFs <- function(datalist, #' } MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, - lon_dim = "lon", lat_dim = "lat", + lon_dim = "lon", lat_dim = "lat", neof_max = 40, neof_composed = 5, xlim = c(-180, 180), ylim = c(20, 85)) { # Check/detect time_dim @@ -97,22 +97,15 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, print(paste("Selected time dim: ", time_dim)) } -# reorder and group time_dim together at the end + # reorder and group time_dim together at the end cdim0 <- dim(data) imask <- names(cdim0) %in% time_dim data <- .aperm2(data, c(which(!imask), which(imask))) cdim <- dim(data) ind <- 1:length(which(!imask)) - # compact (multiply) time_dim dimensions + # compact (multiply) time_dim dimensions dim(data) <- c(cdim[ind], samples = prod(cdim[-ind])) -#print("str data") -# print(str(data)) -# print(names(dim(data))) -#print("str time") -# print(str(time)) -# print(names(dim(time))) - # Repeatedly apply .multi.eofs result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), .multi.eofs, lon, lat, time, neof_max = neof_max, @@ -183,7 +176,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, # and variance explained pattern <- array(SVD$u, dim = c(dim(box)[1], dim(box)[2], neof_max)) coefficient <- SVD$v - variance <- (SVD$d[1:neof_max])^2 / sum((SVD$d)^2) + variance <- (SVD$d[1:neof_max]) ^ 2 / sum((SVD$d) ^ 2) print("Accumulated variance:") print(cumsum(variance)) reqPC <- which(cumsum(variance) > minvar)[1] @@ -239,10 +232,6 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, } } - # preparing output - # pattern=list(x=slon,y=slat,z=pattern) - # out=list(pattern=pattern,coeff=coefficient,variance=variance,regression=regression) - print("Finalize...") out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) @@ -257,12 +246,13 @@ power.date.new <- function(datas, verbose = FALSE) { # create a "season" for continuous time, used by persistance tracking startpoints <- c(0, which(diff(datas) > 1)) - #print(startpoints) deltapoints <- diff(c(startpoints, length(datas))) - seas <- inverse.rle(list(lengths = deltapoints, values = seq(1, length(startpoints)))) + seas <- inverse.rle(list(lengths = deltapoints, + values = seq(1, length(startpoints)))) etime <- list( - day = as.numeric(format(datas, "%d")), month = as.numeric(format(datas, "%m")), + day = as.numeric(format(datas, "%d")), + month = as.numeric(format(datas, "%m")), year = as.numeric(format(datas, "%Y")), data = datas, season = seas ) @@ -276,17 +266,20 @@ power.date.new <- function(datas, verbose = FALSE) { # function for daily anomalies, use array predeclaration and rowMeans (40 times faster!) daily.anom.mean <- function(ics, ipsilon, field, etime) { condition <- paste(etime$day, etime$month) - daily <- array(NA, dim = c(length(ics), length(ipsilon), length(unique(condition)))) + daily <- array(NA, dim = c(length(ics), length(ipsilon), + length(unique(condition)))) anom <- field * NA - + for (t in unique(condition)) { if (sum(t == condition) == 1) { - print("Cannot compute a mean with a single value: using climatological mean") - #anom <- sweep(field, 1:2, apply(field, 1:2, mean), "-") + print(paste0("Cannot compute a mean with only one value: ", + "using climatological mean")) anom[, , which(t == condition)] <- rowMeans(field, dims = 2) } else { daily[, , which(t == unique(condition))] <- rowMeans(field[, , t == condition], dims = 2) - anom[, , which(t == condition)] <- sweep(field[, , which(t == condition)], c(1, 2), daily[, , which(t == unique(condition))], "-") + anom[, , which(t == condition)] <- sweep(field[, , which(t == condition)], + c(1, 2), + daily[, , which(t == unique(condition))], "-") } } return(anom) @@ -331,4 +324,3 @@ standardize <- function(timeseries) { out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) return(out) } - -- GitLab From ce0d300260bee3e8375dfffe01f969701065b631 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sun, 12 May 2019 12:13:13 +0200 Subject: [PATCH 03/26] added variance explained --- R/CST_MultiEOF.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index b5567721..3447c42b 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -11,9 +11,10 @@ #' Each data object in the list is expected to have an element named \code{$data} with at least two #' spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". #' @param neof_composed Number of composed eofs to return in output +#' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) -#' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) +#' @param xlim Vector with longitudinal range limits for the calculation +#' @param ylim Vector with latitudinal range limits for the calculation #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -25,7 +26,7 @@ #' } CST_MultiEOFs <- function(datalist, - neof_max = 40, neof_composed = 5, + neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = c(-180, 180), ylim = c(20, 85)) { print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'exp'), along = 0) @@ -60,9 +61,10 @@ CST_MultiEOFs <- function(datalist, #' @param lon_dim String with dimension name of longitudinal coordinate (default: "lon") #' @param lat_dim String with dimension name of latitudinal coordinate (default: "lat") #' @param neof_composed Number of composed eofs to return in output +#' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) -#' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) +#' @param xlim Vector with longitudinal range limits for the calculation +#' @param ylim Vector with latitudinal range limits for the calculation #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -76,7 +78,7 @@ CST_MultiEOFs <- function(datalist, MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, lon_dim = "lon", lat_dim = "lat", - neof_max = 40, neof_composed = 5, + neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = c(-180, 180), ylim = c(20, 85)) { # Check/detect time_dim if (is.null(time_dim)) { @@ -109,7 +111,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, # Repeatedly apply .multi.eofs result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), .multi.eofs, lon, lat, time, neof_max = neof_max, - neof_composed = neof_composed, + neof_composed = neof_composed, minvar = minvar, xlim = xlim, ylim = ylim) return(result) @@ -121,6 +123,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, #' where n_field is the number of variables over which to calculate #' the multi_eofs. #' @param neof_composed Number of composed eofs to return in output +#' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition #' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) #' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) @@ -130,7 +133,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, #' @noRd .multi.eofs <- function(field_arr_raw, lon, lat, time, - neof_max = 40, neof_composed = 5, + neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = c(-180, 180), ylim = c(20, 85)) { if (exists(".lm.fit")) { -- GitLab From 369a0ba91b246143d7f5315412dd85d70bfc7472 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sun, 12 May 2019 12:24:58 +0200 Subject: [PATCH 04/26] fix naming of output dimensions --- R/CST_MultiEOF.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 3447c42b..668f315e 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -236,6 +236,10 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, } print("Finalize...") + names(dim(coefficient)) <- c("time", "eof") + variance <- array(variance) + names(dim(variance)) <- "eof" + names(dim(regression)) <- c("var", "lon", "lat", "eof") out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) -- GitLab From b95555a5cec74a1fa6e2987a7279842b31e47a10 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Thu, 16 May 2019 15:55:35 +0200 Subject: [PATCH 05/26] dots in front of all helper functions --- R/CST_MultiEOF.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 668f315e..cfe1b7e5 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -144,18 +144,18 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, n_field <- dim(field_arr_raw)[1] - etime <- power.date.new(time) + etime <- .power.date(time) print("Calculating anomalies...") field_arr <- array(dim = dim(field_arr_raw)) for (k in seq(1, n_field, 1)) { - field_arr[k, , , ] <- daily.anom.mean( + field_arr[k, , , ] <- .daily.anom.mean( lon, lat, field_arr_raw[k, , , ], etime ) } # area weighting, based on the root of cosine print("Area Weighting...") - ww <- area.weight(lon, lat, root = T) + ww <- .area.weight(lon, lat, root = T) for (k in seq(1, n_field, 1)) { field <- field_arr[k, , , ] @@ -163,10 +163,10 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, wwfield <- sweep(field, c(1, 2), ww, "*") # selection of the box - box <- wwfield[whicher(lon, xlim[1]):whicher(lon, xlim[2]), - whicher(lat, ylim[1]):whicher(lat, ylim[2]), ] - slon <- lon[whicher(lon, xlim[1]):whicher(lon, xlim[2])] - slat <- lat[whicher(lat, ylim[1]):whicher(lat, ylim[2])] + box <- wwfield[.whicher(lon, xlim[1]):.whicher(lon, xlim[2]), + .whicher(lat, ylim[1]):.whicher(lat, ylim[2]), ] + slon <- lon[.whicher(lon, xlim[1]):.whicher(lon, xlim[2])] + slat <- lat[.whicher(lat, ylim[1]):.whicher(lat, ylim[2])] # transform 3D field in a matrix new_box <- array(box, dim = c(dim(box)[1] * dim(box)[2], dim(box)[3])) @@ -190,7 +190,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, if (reqPC == 1) { coefficient <- replicate(1, coefficient) } - coefficient <- apply(coefficient, c(2), standardize) + coefficient <- apply(coefficient, c(2), .standardize) regression <- array(NA, dim = c(length(lon), length(lat), neof_max)) for (i in 1:reqPC) { regression[, , i] <- apply(get(paste0("field", k)), c(1, 2), @@ -217,7 +217,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, # extracting EOFs, expansions coefficient and variance explained coefficient <- SVD$v variance <- (SVD$d[1:(neof_composed)]) ^ 2 / sum( (SVD$d) ^ 2) - coefficient <- apply(coefficient, c(2), standardize) + coefficient <- apply(coefficient, c(2), .standardize) # linear regressions on anomalies regression <- array(dim = c(n_field, length(lon), @@ -249,7 +249,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, # new function to create simple list with date values - Oct-18 # it needs a date or PCICt object, and returns also the season subdivision -power.date.new <- function(datas, verbose = FALSE) { +.power.date <- function(datas, verbose = FALSE) { # create a "season" for continuous time, used by persistance tracking startpoints <- c(0, which(diff(datas) > 1)) @@ -263,15 +263,15 @@ power.date.new <- function(datas, verbose = FALSE) { year = as.numeric(format(datas, "%Y")), data = datas, season = seas ) - printv("Time Array Built", verbose) - printv(paste("Length:", length(seas)), verbose) - printv(paste("From", datas[1], "to", datas[length(seas)]), verbose) + .printv("Time Array Built", verbose) + .printv(paste("Length:", length(seas)), verbose) + .printv(paste("From", datas[1], "to", datas[length(seas)]), verbose) return(etime) } # function for daily anomalies, use array predeclaration and rowMeans (40 times faster!) -daily.anom.mean <- function(ics, ipsilon, field, etime) { +.daily.anom.mean <- function(ics, ipsilon, field, etime) { condition <- paste(etime$day, etime$month) daily <- array(NA, dim = c(length(ics), length(ipsilon), length(unique(condition)))) @@ -294,7 +294,7 @@ daily.anom.mean <- function(ics, ipsilon, field, etime) { # produce a 2d matrix of area weights -area.weight <- function(ics, ipsilon, root = T) { +.area.weight <- function(ics, ipsilon, root = T) { field <- array(NA, dim = c(length(ics), length(ipsilon))) if (root == T) { for (j in 1:length(ipsilon)) { @@ -312,7 +312,7 @@ area.weight <- function(ics, ipsilon, root = T) { } # verbose-only printing function -printv <- function(value, verbosity = TRUE) { +.printv <- function(value, verbosity = TRUE) { if (verbosity) { print(value) } @@ -320,14 +320,14 @@ printv <- function(value, verbosity = TRUE) { # detect ics ipsilon lat-lon -whicher <- function(axis, number) { +.whicher <- function(axis, number) { out <- which.min(abs(axis - number)) return(out) } # normalize a time series -standardize <- function(timeseries) { +.standardize <- function(timeseries) { out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) return(out) } -- GitLab From b8ac50ca2f81daf896ede29a51fba3a8ac72d534 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Thu, 16 May 2019 15:57:34 +0200 Subject: [PATCH 06/26] cahnged name to CST_MultiEOF --- R/CST_MultiEOF.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index cfe1b7e5..33d5bdd0 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -25,14 +25,14 @@ #' exp <- 1 : (2 * 3 * 4 * 8 * 8) #' } -CST_MultiEOFs <- function(datalist, - neof_max = 40, neof_composed = 5, minvar = 0.6, - xlim = c(-180, 180), ylim = c(20, 85)) { +CST_MultiEOF <- function(datalist, + neof_max = 40, neof_composed = 5, minvar = 0.6, + xlim = c(-180, 180), ylim = c(20, 85)) { print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'exp'), along = 0) dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$exp)) print("...done") - result <- MultiEOFs(exp, datalist[[1]]$lon, datalist[[1]]$lat, + result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, time_dim = c("ftime", "sdate"), neof = neof, neof_composed = neof_composed, @@ -41,7 +41,7 @@ CST_MultiEOFs <- function(datalist, return(result) } -#' @rdname MultiEOFs +#' @rdname MultiEOF #' @title EOF analysis of multiple variables starting from an array (reduced version) #' #' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} @@ -71,12 +71,12 @@ CST_MultiEOFs <- function(datalist, #' @import multiApply #' @export #' @examples -#' # Example for the 'reduced' MultiEOFs function +#' # Example for the 'reduced' MultiEOF function #' #' \donttest{ #' } -MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, +MultiEOF <- function(data, lon, lat, time, time_dim = NULL, lon_dim = "lon", lat_dim = "lat", neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = c(-180, 180), ylim = c(20, 85)) { @@ -118,7 +118,7 @@ MultiEOFs <- function(data, lon, lat, time, time_dim = NULL, } -#' Atomic MultiEOFs +#' Atomic MultiEOF #' @param field_arr_raw an array of dimension: (n_field, lon, lat, time). #' where n_field is the number of variables over which to calculate #' the multi_eofs. -- GitLab From 6ba7c2383128f92b5a4669a6c7451e04a5692076 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Thu, 16 May 2019 16:19:04 +0200 Subject: [PATCH 07/26] optional xlim --- R/CST_MultiEOF.R | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 33d5bdd0..fbb016c4 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -27,7 +27,7 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, - xlim = c(-180, 180), ylim = c(20, 85)) { + xlim = NULL, ylim = NULL) { print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'exp'), along = 0) dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$exp)) @@ -79,7 +79,7 @@ CST_MultiEOF <- function(datalist, MultiEOF <- function(data, lon, lat, time, time_dim = NULL, lon_dim = "lon", lat_dim = "lat", neof_max = 40, neof_composed = 5, minvar = 0.6, - xlim = c(-180, 180), ylim = c(20, 85)) { + xlim = NULL, ylim = NULL) { # Check/detect time_dim if (is.null(time_dim)) { time_dim_names <- c("ftime", "sdate", "time") @@ -125,8 +125,8 @@ MultiEOF <- function(data, lon, lat, time, time_dim = NULL, #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the calculation (default: c(-180 180)) -#' @param ylim Vector with latitudinal range limits for the calculation (default: c(20 85)) +#' @param xlim Vector with longitudinal range limits for the calculation +#' @param ylim Vector with latitudinal range limits for the calculation #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -134,7 +134,7 @@ MultiEOF <- function(data, lon, lat, time, time_dim = NULL, .multi.eofs <- function(field_arr_raw, lon, lat, time, neof_max = 40, neof_composed = 5, minvar = 0.6, - xlim = c(-180, 180), ylim = c(20, 85)) { + xlim = NULL, ylim = NULL) { if (exists(".lm.fit")) { lin.fit <- .lm.fit @@ -160,24 +160,29 @@ MultiEOF <- function(data, lon, lat, time, time_dim = NULL, for (k in seq(1, n_field, 1)) { field <- field_arr[k, , , ] # calculate the area weight - wwfield <- sweep(field, c(1, 2), ww, "*") + field <- sweep(field, c(1, 2), ww, "*") + if(is.null(xlim) | is.null(xlim)) { + slon <- lon + slat <- lat + } else { # selection of the box - box <- wwfield[.whicher(lon, xlim[1]):.whicher(lon, xlim[2]), - .whicher(lat, ylim[1]):.whicher(lat, ylim[2]), ] - slon <- lon[.whicher(lon, xlim[1]):.whicher(lon, xlim[2])] - slat <- lat[.whicher(lat, ylim[1]):.whicher(lat, ylim[2])] + field <- field[.whicher(lon, xlim[1]):.whicher(lon, xlim[2]), + .whicher(lat, ylim[1]):.whicher(lat, ylim[2]), ] + slon <- lon[.whicher(lon, xlim[1]):.whicher(lon, xlim[2])] + slat <- lat[.whicher(lat, ylim[1]):.whicher(lat, ylim[2])] + } # transform 3D field in a matrix - new_box <- array(box, dim = c(dim(box)[1] * dim(box)[2], dim(box)[3])) + field <- array(field, dim = c(dim(field)[1] * dim(field)[2], dim(field)[3])) # calling SVD print("Calling SVD...") - SVD <- svd(new_box, nu = neof_max, nv = neof_max) + SVD <- svd(field, nu = neof_max, nv = neof_max) # extracting EOFs (loading pattern), expansions coefficient # and variance explained - pattern <- array(SVD$u, dim = c(dim(box)[1], dim(box)[2], neof_max)) + pattern <- array(SVD$u, dim = c(dim(field)[1], dim(field)[2], neof_max)) coefficient <- SVD$v variance <- (SVD$d[1:neof_max]) ^ 2 / sum((SVD$d) ^ 2) print("Accumulated variance:") -- GitLab From 1c653ef4e254454e74b99733b67162899fbceb27 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Fri, 8 Nov 2019 16:54:06 +0100 Subject: [PATCH 08/26] wrong field name in atomic function and data instad of exp in CST function --- R/CST_MultiEOF.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index fbb016c4..3f841c50 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -9,7 +9,7 @@ #' #' @param datalist A list of objects of the class 's2dv_cube', containing the variables to be analysed. #' Each data object in the list is expected to have an element named \code{$data} with at least two -#' spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". +#' spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition @@ -29,13 +29,13 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = NULL, ylim = NULL) { print("Pasting data...") - exp <- abind(lapply(datalist, '[[', 'exp'), along = 0) - dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$exp)) + exp <- abind(lapply(datalist, '[[', 'data'), along = 0) + dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) print("...done") result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, time_dim = c("ftime", "sdate"), - neof = neof, neof_composed = neof_composed, + neof_max = neof_max, neof_composed = neof_composed, xlim = xlim, ylim = ylim) return(result) @@ -47,7 +47,7 @@ CST_MultiEOF <- function(datalist, #' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} #' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} #' -#' @description This function performs EOF analysis over multiple variables, accepting in +#' @description This function performs EOF analysis over multiple variables, accepting in #' input an arry with a dimension \code{"var"} for each variable to analyse. #' Based on Singular Value Decomposition. #' @@ -57,7 +57,7 @@ CST_MultiEOF <- function(datalist, #' @param lon Vector of longitudes. #' @param lat Vector of latitudes. #' @param time Vector or matrix of dates in POSIXct format. -#' @param time_dim Character vector of dimension names to be considered as times +#' @param time_dim Character vector of dimension names to be considered as times #' @param lon_dim String with dimension name of longitudinal coordinate (default: "lon") #' @param lat_dim String with dimension name of latitudinal coordinate (default: "lat") #' @param neof_composed Number of composed eofs to return in output @@ -158,9 +158,9 @@ MultiEOF <- function(data, lon, lat, time, time_dim = NULL, ww <- .area.weight(lon, lat, root = T) for (k in seq(1, n_field, 1)) { - field <- field_arr[k, , , ] + field_orig <- field_arr[k, , , ] # calculate the area weight - field <- sweep(field, c(1, 2), ww, "*") + field <- sweep(field_orig, c(1, 2), ww, "*") if(is.null(xlim) | is.null(xlim)) { slon <- lon @@ -198,7 +198,7 @@ MultiEOF <- function(data, lon, lat, time, time_dim = NULL, coefficient <- apply(coefficient, c(2), .standardize) regression <- array(NA, dim = c(length(lon), length(lat), neof_max)) for (i in 1:reqPC) { - regression[, , i] <- apply(get(paste0("field", k)), c(1, 2), + regression[, , i] <- apply(field_orig, c(1, 2), function(x) lin.fit(as.matrix(coefficient[, i], ncol = 1), x)$coefficients ) -- GitLab From 24f9780fa85cb64ce3d0de577a93a98d0143b088 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Fri, 8 Nov 2019 21:03:38 +0100 Subject: [PATCH 09/26] test for NaN and test for s2dv_cube --- R/CST_MultiEOF.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 3f841c50..88ba5afa 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -28,10 +28,21 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = NULL, ylim = NULL) { + + if (!(all(sapply(datalist, inherits, 's2dv_cube')))) { + stop("Elements of the list in parameter 'datalist' must be of the class ", + "'s2dv_cube', as output by CSTools::CST_Load.") + } + print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'data'), along = 0) dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) print("...done") + + if (any(is.na(exp))) { + stop("Input data contain NA values.") + } + result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, time_dim = c("ftime", "sdate"), -- GitLab From 79103d405d68c66c870962044add81e5059de46c Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 9 Nov 2019 09:53:27 +0100 Subject: [PATCH 10/26] add test on dimensions of all input data being equal --- R/CST_MultiEOF.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 88ba5afa..7a6c37a0 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -34,6 +34,13 @@ CST_MultiEOF <- function(datalist, "'s2dv_cube', as output by CSTools::CST_Load.") } + # Check if all dims equal + adims=lapply(lapply(datalist, function(x) x$data), dim) + if( !all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == + (length(adims)-1))) { + stop("Input data fields must all have the same dimensions.") + } + print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'data'), along = 0) dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) -- GitLab From 9bc5916ba6abe80e439afe30afebce87dbc142b1 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 9 Nov 2019 15:38:55 +0100 Subject: [PATCH 11/26] wrong ordering of sdate and ftime --- R/CST_MultiEOF.R | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 7a6c37a0..7332feb9 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -52,7 +52,6 @@ CST_MultiEOF <- function(datalist, result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, - time_dim = c("ftime", "sdate"), neof_max = neof_max, neof_composed = neof_composed, xlim = xlim, ylim = ylim) @@ -75,7 +74,6 @@ CST_MultiEOF <- function(datalist, #' @param lon Vector of longitudes. #' @param lat Vector of latitudes. #' @param time Vector or matrix of dates in POSIXct format. -#' @param time_dim Character vector of dimension names to be considered as times #' @param lon_dim String with dimension name of longitudinal coordinate (default: "lon") #' @param lat_dim String with dimension name of latitudinal coordinate (default: "lat") #' @param neof_composed Number of composed eofs to return in output @@ -94,35 +92,20 @@ CST_MultiEOF <- function(datalist, #' \donttest{ #' } -MultiEOF <- function(data, lon, lat, time, time_dim = NULL, +MultiEOF <- function(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = NULL, ylim = NULL) { # Check/detect time_dim - if (is.null(time_dim)) { - time_dim_names <- c("ftime", "sdate", "time") - time_dim_num <- which(time_dim_names %in% names(dim(data))) - if (length(time_dim_num) > 0) { - # Find time dimension with length > 1 - ilong <- which(dim(data)[time_dim_names[time_dim_num]] > 1) - if (length(ilong) > 0) { - time_dim <- time_dim_names[time_dim_num[ilong[1]]] - } else { - stop("No time dimension longer than one found.") - } - } else { - stop("Could not automatically detect a target time dimension ", - "in the provided data in 'data'.") - } - print(paste("Selected time dim: ", time_dim)) - } - # reorder and group time_dim together at the end + # reorder and group ftime and sdate together at the end in that order cdim0 <- dim(data) - imask <- names(cdim0) %in% time_dim - data <- .aperm2(data, c(which(!imask), which(imask))) + imaskt <- names(cdim0) %in% "ftime" + imasks <- names(cdim0) %in% "sdate" + data <- .aperm2(data, c(which(!(imasks | imaskt)), + which(imaskt), which(imasks))) cdim <- dim(data) - ind <- 1:length(which(!imask)) + ind <- 1:length(which(!(imaskt | imasks))) # compact (multiply) time_dim dimensions dim(data) <- c(cdim[ind], samples = prod(cdim[-ind])) @@ -132,6 +115,9 @@ MultiEOF <- function(data, lon, lat, time, time_dim = NULL, neof_composed = neof_composed, minvar = minvar, xlim = xlim, ylim = ylim) + # Expand back samples to compacted dims + dim(result$coeff) <- c(cdim[-ind], dim(result$coeff)[-1]) + return(result) } -- GitLab From 6f049ce4c2cbe38e559d3bd5a5ed3e999ad1c4b5 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 9 Nov 2019 18:03:48 +0100 Subject: [PATCH 12/26] added testthat test --- tests/testthat/test-CST_MultiEOF.R | 56 ++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/testthat/test-CST_MultiEOF.R diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R new file mode 100644 index 00000000..d294c07c --- /dev/null +++ b/tests/testthat/test-CST_MultiEOF.R @@ -0,0 +1,56 @@ +context("Generic tests") +test_that("Sanity checks and simple use case", { + # Generate simple synthetic data + mod1 <- 1 : (2 * 3 * 4 * 5 * 6 * 8) + dim(mod1) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) + mod2 <- sin( 1 : (2 * 3 * 4 * 5 * 6 * 8)) + dim(mod2) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) + mod3 <- cos( 0.5 + ( 1 : (2 * 3 * 4 * 4 * 6 * 8))) + dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 4, lat = 6, lon = 8) + lon <- seq(0, 35, 5) + lat <- seq(0, 25, 5) + exp1 <- list(data = mod1, lat = lat, lon = lon) + exp2 <- list(data = mod2, lat = lat, lon = lon) + exp3 <- list(data = mod3, lat = lat, lon = lon) + attr(exp1, 'class') <- 's2dv_cube' + attr(exp2, 'class') <- 's2dv_cube' + attr(exp3, 'class') <- 's2dv_cube' + d=as.POSIXct(c("2017/01/01", "2017/01/02", "2017/01/03", "2017/01/04", "2017/01/05", + "2018/01/01", "2018/01/02", "2018/01/03", "2018/01/04", "2018/01/05", + "2019/01/01", "2019/01/02", "2019/01/03", "2019/01/04", "2019/01/05", + "2020/01/01", "2020/01/02", "2020/01/03", "2020/01/04", "2020/01/05")) + + exp1$Dates$start=d + exp2$Dates$start=d + exp3$Dates$start=d + + expect_error( + CST_MultiEOF(datalist = 1), + "Elements of the list in parameter 'datalist' must be of the class 's2dv_cube', as output by CSTools::CST_Load." + ) + expect_error( + CST_MultiEOF(list(exp1, exp3)), + "Input data fields must all have the same dimensions." + ) + mod3 <- cos( 0.5 + ( 1 : (2 * 3 * 4 * 5 * 6 * 8))) + dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) + exp3$data <- mod3 + + cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) + expect_equal(length(cal), 3) + dimexp=dim(exp1$data) + expect_equal(dim(cal$coeff), c(dimexp["ftime"], dimexp["sdate"], + eof=2, dimexp["dataset"], dimexp["member"])) + expect_equal(dim(cal$variance), c(eof=2, dimexp["dataset"], dimexp["member"])) + expect_equal(dim(cal$eof_pattern), c(var=3, dimexp["lon"], dimexp["lat"], + eof=2, dimexp["dataset"], + dimexp["member"])) + expect_equal(cal$variance[1, 1, 1], 0.5032451, tolerance = .00001) + expect_equal(cal$coeff[2, 1, 1, 1, 1], -1.336491, tolerance = .00001) + expect_equal(cal$eof_pattern[1, 2, 2, 2, 1, 1], -0.04351991, tolerance = .00001) + exp3$data[1,1,1,1,1,1]=NaN + expect_error( + CST_MultiEOF(list(exp1, exp3), neof_max=8, neof_composed=2), + "Input data contain NA values." + ) +}) -- GitLab From 93043bb86618bd3b063ee64a442ad739d020085e Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 9 Nov 2019 20:13:30 +0100 Subject: [PATCH 13/26] better tests --- tests/testthat/test-CST_MultiEOF.R | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index d294c07c..72a7ff1c 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -1,11 +1,13 @@ context("Generic tests") test_that("Sanity checks and simple use case", { # Generate simple synthetic data - mod1 <- 1 : (2 * 3 * 4 * 5 * 6 * 8) + seq <- 1 : (2 * 3 * 4 * 5 * 6 * 8) + seq3 <- 1 : (2 * 3 * 4 * 4 * 6 * 8) + mod1 <- sin( 0.7 + seq )^2 + cos( seq ^ 2 * 1.22 ) dim(mod1) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) - mod2 <- sin( 1 : (2 * 3 * 4 * 5 * 6 * 8)) + mod2 <- sin( seq * 2 ) ^ 3 + cos( seq ^ 2 ) dim(mod2) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) - mod3 <- cos( 0.5 + ( 1 : (2 * 3 * 4 * 4 * 6 * 8))) + mod3 <- cos( 0.5 + seq3 ) + sin ( seq3 ^ 2 * 0.2 ) dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 4, lat = 6, lon = 8) lon <- seq(0, 35, 5) lat <- seq(0, 25, 5) @@ -32,11 +34,11 @@ test_that("Sanity checks and simple use case", { CST_MultiEOF(list(exp1, exp3)), "Input data fields must all have the same dimensions." ) - mod3 <- cos( 0.5 + ( 1 : (2 * 3 * 4 * 5 * 6 * 8))) + mod3 <- cos( 0.5 + seq ) + sin( seq ^ 2 * 0.2 ) dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) exp3$data <- mod3 - cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_composed=2) expect_equal(length(cal), 3) dimexp=dim(exp1$data) expect_equal(dim(cal$coeff), c(dimexp["ftime"], dimexp["sdate"], @@ -45,9 +47,16 @@ test_that("Sanity checks and simple use case", { expect_equal(dim(cal$eof_pattern), c(var=3, dimexp["lon"], dimexp["lat"], eof=2, dimexp["dataset"], dimexp["member"])) - expect_equal(cal$variance[1, 1, 1], 0.5032451, tolerance = .00001) - expect_equal(cal$coeff[2, 1, 1, 1, 1], -1.336491, tolerance = .00001) - expect_equal(cal$eof_pattern[1, 2, 2, 2, 1, 1], -0.04351991, tolerance = .00001) + expect_equal(cal$variance[1, 1, 1], 0.2909419, tolerance = .00001) + expect_equal(cal$coeff[2, 1, 1, 1, 1], 0.5414261, tolerance = .00001) + expect_equal(cal$eof_pattern[1, 2, 2, 2, 1, 1], 0.3932484, tolerance = .00001) + + cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2, minvar=0.2) + expect_equal(cal$coeff[2, 1, 1, 1, 1], -0.6117927, tolerance = .00001) + + cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(10, 25)) + expect_equal(cal$coeff[2, 1, 1, 1, 1], 0.8539488, tolerance = .00001) + exp3$data[1,1,1,1,1,1]=NaN expect_error( CST_MultiEOF(list(exp1, exp3), neof_max=8, neof_composed=2), -- GitLab From 5a35063a40cc578e64612f5bb90f791e4a2f95fb Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 9 Nov 2019 20:13:50 +0100 Subject: [PATCH 14/26] added examples --- R/CST_MultiEOF.R | 58 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 7332feb9..4c8455b7 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -13,8 +13,8 @@ #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the calculation -#' @param ylim Vector with latitudinal range limits for the calculation +#' @param xlim Vector with longitudinal range limits for the EOF calculation +#' @param ylim Vector with latitudinal range limits for the EOF calculation #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -22,7 +22,34 @@ #' @export #' @examples #' \donttest{ -#' exp <- 1 : (2 * 3 * 4 * 8 * 8) +#' library(zeallot) +#' c(exp, obs) %<-% lonlat_data +#' # Create three datasets (from the members) +#' exp1 <- exp +#' exp2 <- exp +#' exp3 <- exp +#' exp1$data <- exp$data[ , 1:5, , , , ] +#' exp2$data <- exp$data[ , 6:10, , , , ] +#' exp3$data <- exp$data[ , 11:15, , , , ] +#' +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) +#' str(cal) +#' # List of 3 +#' $ coeff : num [1:3, 1:6, 1:2, 1:5] -0.312 -0.588 0.724 1.202 1.181 ... +#' $ variance : num [1:2, 1:5] 0.413 0.239 0.352 0.27 0.389 ... +#' $ eof_pattern: num [1:3, 1:53, 1:22, 1:2, 1:5] -1.47 -0.446 -0.656 -1.534 -0.464 ... +#' dim(cal$coeff) +#' # ftime sdate eof member +#' # 3 6 2 3 +#' +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3) , minvar=0.9) +#' str(cal) +#' $ coeff : num [1:3, 1:6, 1:5, 1:5] 0.338 0.603 -0.736 -1.191 -1.198 ... +#' $ variance : num [1:5, 1:5] 0.3903 0.2264 0.1861 0.1032 0.0379 ... +#' $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... +#' +#' cal <- CST_MultiEOF(list(exp1, exp2)) +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(35, 50), neof_composed=3) #' } CST_MultiEOF <- function(datalist, @@ -41,17 +68,17 @@ CST_MultiEOF <- function(datalist, stop("Input data fields must all have the same dimensions.") } - print("Pasting data...") + #print("Pasting data...") exp <- abind(lapply(datalist, '[[', 'data'), along = 0) dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) - print("...done") + #print("...done") if (any(is.na(exp))) { stop("Input data contain NA values.") } result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, - datalist[[1]]$Dates$start, + datalist[[1]]$Dates$start, minvar = minvar, neof_max = neof_max, neof_composed = neof_composed, xlim = xlim, ylim = ylim) @@ -149,7 +176,7 @@ MultiEOF <- function(data, lon, lat, time, n_field <- dim(field_arr_raw)[1] etime <- .power.date(time) - print("Calculating anomalies...") + #print("Calculating anomalies...") field_arr <- array(dim = dim(field_arr_raw)) for (k in seq(1, n_field, 1)) { field_arr[k, , , ] <- .daily.anom.mean( @@ -158,7 +185,7 @@ MultiEOF <- function(data, lon, lat, time, } # area weighting, based on the root of cosine - print("Area Weighting...") + #print("Area Weighting...") ww <- .area.weight(lon, lat, root = T) for (k in seq(1, n_field, 1)) { @@ -181,7 +208,7 @@ MultiEOF <- function(data, lon, lat, time, field <- array(field, dim = c(dim(field)[1] * dim(field)[2], dim(field)[3])) # calling SVD - print("Calling SVD...") + #print("Calling SVD...") SVD <- svd(field, nu = neof_max, nv = neof_max) # extracting EOFs (loading pattern), expansions coefficient @@ -189,11 +216,10 @@ MultiEOF <- function(data, lon, lat, time, pattern <- array(SVD$u, dim = c(dim(field)[1], dim(field)[2], neof_max)) coefficient <- SVD$v variance <- (SVD$d[1:neof_max]) ^ 2 / sum((SVD$d) ^ 2) - print("Accumulated variance:") - print(cumsum(variance)) + #print("Accumulated variance:") + #print(cumsum(variance)) reqPC <- which(cumsum(variance) > minvar)[1] - print("Number of EOFs needed for var:") - print(reqPC) + #print("Number of EOFs needed for var:") variance <- variance[1:reqPC] coefficient <- coefficient[, 1:reqPC] if (reqPC == 1) { @@ -221,7 +247,7 @@ MultiEOF <- function(data, lon, lat, time, } newpc <- t(newpc) - print("Calculating composed EOFs") + #print("Calculating composed EOFs") SVD <- svd(newpc, nu = neof_composed, nv = neof_composed) # extracting EOFs, expansions coefficient and variance explained coefficient <- SVD$v @@ -232,7 +258,7 @@ MultiEOF <- function(data, lon, lat, time, regression <- array(dim = c(n_field, length(lon), length(lat), neof_composed)) for (k in seq(1, n_field, 1)) { - print("Linear Regressions (it can take a while)... ") + #print("Linear Regressions (it can take a while)... ") for (i in 1:neof_composed) { regression[k, , , i] <- apply( field_arr[k, , , ], c(1, 2), @@ -244,7 +270,7 @@ MultiEOF <- function(data, lon, lat, time, } } - print("Finalize...") + #print("Finalize...") names(dim(coefficient)) <- c("time", "eof") variance <- array(variance) names(dim(variance)) <- "eof" -- GitLab From feb9aab66fe645dedf45e534b05bda8799eb4edd Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sat, 9 Nov 2019 20:26:40 +0100 Subject: [PATCH 15/26] edited examples --- R/CST_MultiEOF.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 4c8455b7..9cd3ca02 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -19,7 +19,6 @@ #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). #' @import abind -#' @export #' @examples #' \donttest{ #' library(zeallot) @@ -51,6 +50,7 @@ #' cal <- CST_MultiEOF(list(exp1, exp2)) #' cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(35, 50), neof_composed=3) #' } +#' @export CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, @@ -113,11 +113,6 @@ CST_MultiEOF <- function(datalist, #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). #' @import multiApply #' @export -#' @examples -#' # Example for the 'reduced' MultiEOF function -#' -#' \donttest{ -#' } MultiEOF <- function(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", -- GitLab From 2f94f3400605c832fbbcd315817178a5ea9b14e9 Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Sun, 10 Nov 2019 11:13:30 +0100 Subject: [PATCH 16/26] fixed examples to run during check --- R/CST_MultiEOF.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 9cd3ca02..70ce8460 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -34,18 +34,18 @@ #' cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) #' str(cal) #' # List of 3 -#' $ coeff : num [1:3, 1:6, 1:2, 1:5] -0.312 -0.588 0.724 1.202 1.181 ... -#' $ variance : num [1:2, 1:5] 0.413 0.239 0.352 0.27 0.389 ... -#' $ eof_pattern: num [1:3, 1:53, 1:22, 1:2, 1:5] -1.47 -0.446 -0.656 -1.534 -0.464 ... +#' # $ coeff : num [1:3, 1:6, 1:2, 1:5] -0.312 -0.588 0.724 1.202 1.181 ... +#' # $ variance : num [1:2, 1:5] 0.413 0.239 0.352 0.27 0.389 ... +#' # $ eof_pattern: num [1:3, 1:53, 1:22, 1:2, 1:5] -1.47 -0.446 -0.656 -1.534 -0.464 ... #' dim(cal$coeff) #' # ftime sdate eof member #' # 3 6 2 3 #' #' cal <- CST_MultiEOF(list(exp1, exp2, exp3) , minvar=0.9) #' str(cal) -#' $ coeff : num [1:3, 1:6, 1:5, 1:5] 0.338 0.603 -0.736 -1.191 -1.198 ... -#' $ variance : num [1:5, 1:5] 0.3903 0.2264 0.1861 0.1032 0.0379 ... -#' $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... +#' # $ coeff : num [1:3, 1:6, 1:5, 1:5] 0.338 0.603 -0.736 -1.191 -1.198 ... +#' # $ variance : num [1:5, 1:5] 0.3903 0.2264 0.1861 0.1032 0.0379 ... +#' # $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... #' #' cal <- CST_MultiEOF(list(exp1, exp2)) #' cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(35, 50), neof_composed=3) @@ -95,7 +95,7 @@ CST_MultiEOF <- function(datalist, #' input an arry with a dimension \code{"var"} for each variable to analyse. #' Based on Singular Value Decomposition. #' -#' @param datalist A multidimensional array with dimension \code{"var"}, +#' @param data A multidimensional array with dimension \code{"var"}, #' containing the variables to be analysed. The other diemnsions follow the same structure as the #' \code{"exp"} element of a 's2dv_cube' object. #' @param lon Vector of longitudes. -- GitLab From 9b6843a44298c55b7fe7ddbb11e3e8891622291a Mon Sep 17 00:00:00 2001 From: Jost von Hardenberg Date: Mon, 11 Nov 2019 13:09:45 +0100 Subject: [PATCH 17/26] added possibly useless loading of library abind in test --- tests/testthat/test-CST_MultiEOF.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index 72a7ff1c..d0855977 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -1,5 +1,6 @@ context("Generic tests") test_that("Sanity checks and simple use case", { + library(abind) # Generate simple synthetic data seq <- 1 : (2 * 3 * 4 * 5 * 6 * 8) seq3 <- 1 : (2 * 3 * 4 * 4 * 6 * 8) -- GitLab From 2981b639bc4aa684a7f9b3cebf66922e9b8cd66b Mon Sep 17 00:00:00 2001 From: Federico Fabiano Date: Mon, 18 Nov 2019 15:22:57 +0100 Subject: [PATCH 18/26] Updated description --- R/CST_MultiEOF.R | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 70ce8460..60a2de8e 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -5,7 +5,8 @@ #' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} #' #' @description This function performs EOF analysis over multiple variables, -#' accepting in input a list of CSTools objects. Based on Singular Value Decomposition. +#' accepting in input a list of CSTools objects. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. + #' #' @param datalist A list of objects of the class 's2dv_cube', containing the variables to be analysed. #' Each data object in the list is expected to have an element named \code{$data} with at least two @@ -14,7 +15,7 @@ #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition #' @param xlim Vector with longitudinal range limits for the EOF calculation -#' @param ylim Vector with latitudinal range limits for the EOF calculation +#' @param ylim Vector with latitudinal range limits for the EOF calculation #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -23,7 +24,7 @@ #' \donttest{ #' library(zeallot) #' c(exp, obs) %<-% lonlat_data -#' # Create three datasets (from the members) +#' # Create three datasets (from the members) #' exp1 <- exp #' exp2 <- exp #' exp3 <- exp @@ -62,7 +63,7 @@ CST_MultiEOF <- function(datalist, } # Check if all dims equal - adims=lapply(lapply(datalist, function(x) x$data), dim) + adims=lapply(lapply(datalist, function(x) x$data), dim) if( !all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == (length(adims)-1))) { stop("Input data fields must all have the same dimensions.") @@ -75,7 +76,7 @@ CST_MultiEOF <- function(datalist, if (any(is.na(exp))) { stop("Input data contain NA values.") - } + } result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, minvar = minvar, @@ -91,9 +92,7 @@ CST_MultiEOF <- function(datalist, #' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} #' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} #' -#' @description This function performs EOF analysis over multiple variables, accepting in -#' input an arry with a dimension \code{"var"} for each variable to analyse. -#' Based on Singular Value Decomposition. +#' @description This function performs EOF analysis over multiple variables, accepting in input an array with a dimension \code{"var"} for each variable to analyse. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. #' #' @param data A multidimensional array with dimension \code{"var"}, #' containing the variables to be analysed. The other diemnsions follow the same structure as the -- GitLab From ddfaa9fc277e7f2aa9cfb7e8471a375d6e8bd5e6 Mon Sep 17 00:00:00 2001 From: jhardenberg Date: Tue, 19 Nov 2019 12:52:14 +0100 Subject: [PATCH 19/26] rewritten box selection mechanism + lon, lat in output --- R/CST_MultiEOF.R | 58 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 60a2de8e..b42cc637 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -14,8 +14,8 @@ #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the EOF calculation -#' @param ylim Vector with latitudinal range limits for the EOF calculation +#' @param xlim Vector with longitudinal range limits for the EOF calculation for all input variables +#' @param ylim Vector with latitudinal range limits for the EOF calculation for all input variables #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -100,13 +100,13 @@ CST_MultiEOF <- function(datalist, #' @param lon Vector of longitudes. #' @param lat Vector of latitudes. #' @param time Vector or matrix of dates in POSIXct format. -#' @param lon_dim String with dimension name of longitudinal coordinate (default: "lon") -#' @param lat_dim String with dimension name of latitudinal coordinate (default: "lat") +#' @param lon_dim String with dimension name of longitudinal coordinate +#' @param lat_dim String with dimension name of latitudinal coordinate #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the calculation -#' @param ylim Vector with latitudinal range limits for the calculation +#' @param xlim Vector with longitudinal range limits for the calculation for all input variables +#' @param ylim Vector with latitudinal range limits for the calculation for all input variables #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -138,6 +138,11 @@ MultiEOF <- function(data, lon, lat, time, # Expand back samples to compacted dims dim(result$coeff) <- c(cdim[-ind], dim(result$coeff)[-1]) + # Recover first lon and first lat list + dd=dim(result$lon)[1]; m=matrix(1, nrow=dd, ncol=length(dim(result$lon))); + m[1:dd]=1:dd; result$lon = result$lon[m] + dd=dim(result$lat)[1]; m=matrix(1, nrow=dd, ncol=length(dim(result$lat))); + m[1:dd]=1:dd; result$lat = result$lat[m] return(result) } @@ -187,22 +192,41 @@ MultiEOF <- function(data, lon, lat, time, # calculate the area weight field <- sweep(field_orig, c(1, 2), ww, "*") - if(is.null(xlim) | is.null(xlim)) { - slon <- lon - slat <- lat + # selection of the xbox and ybox if defined + if (!is.null(xlim)) { + # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged + # Bring them all to the same units in the 0:360 range + xlim1 <- xlim[1] %% 360 + xlim2 <- xlim[2] %% 360 + if (xlim1 > xlim2) { + # If box crosses 0 first limit is negative + xlim1 <- xlim1 - 360 + } + lonm <- lon %% 360 + # If it crosses 0 leave unchanged + if (lonm[1] > tail(lonm, 1)) { + lonm <- lon + } + ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) + if (!any(ilonsel)) { + stop("No intersection between longitude bounds and data domain.") + } + } else { + ilonsel <- 1:length(lon) + } + if (!is.null(ylim)) { + ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) } else { - # selection of the box - field <- field[.whicher(lon, xlim[1]):.whicher(lon, xlim[2]), - .whicher(lat, ylim[1]):.whicher(lat, ylim[2]), ] - slon <- lon[.whicher(lon, xlim[1]):.whicher(lon, xlim[2])] - slat <- lat[.whicher(lat, ylim[1]):.whicher(lat, ylim[2])] + ilatsel <- 1:length(lat) } + slon <- lon[ilonsel] + slat <- lat[ilatsel] + field <- field[ilonsel, ilatsel, ] # transform 3D field in a matrix field <- array(field, dim = c(dim(field)[1] * dim(field)[2], dim(field)[3])) # calling SVD - #print("Calling SVD...") SVD <- svd(field, nu = neof_max, nv = neof_max) # extracting EOFs (loading pattern), expansions coefficient @@ -270,7 +294,7 @@ MultiEOF <- function(data, lon, lat, time, names(dim(variance)) <- "eof" names(dim(regression)) <- c("var", "lon", "lat", "eof") - out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) + out <- list(coeff = coefficient, variance = variance, eof_pattern = regression, lon = slon, lat = slat) return(out) } @@ -350,7 +374,7 @@ MultiEOF <- function(data, lon, lat, time, # detect ics ipsilon lat-lon .whicher <- function(axis, number) { - out <- which.min(abs(axis - number)) + out <- which.min(abs((axis %% 360) - (number %% 360))) return(out) } -- GitLab From dc1b384b9dbb8ff27456978b1454d39393a54042 Mon Sep 17 00:00:00 2001 From: jhardenberg Date: Tue, 19 Nov 2019 12:58:11 +0100 Subject: [PATCH 20/26] forgot to update tests --- R/CST_MultiEOF.R | 8 -------- tests/testthat/test-CST_MultiEOF.R | 15 +++++++++++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index b42cc637..28322f52 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -371,14 +371,6 @@ MultiEOF <- function(data, lon, lat, time, } } - -# detect ics ipsilon lat-lon -.whicher <- function(axis, number) { - out <- which.min(abs((axis %% 360) - (number %% 360))) - return(out) -} - - # normalize a time series .standardize <- function(timeseries) { out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index d0855977..a5ae45bb 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -39,8 +39,12 @@ test_that("Sanity checks and simple use case", { dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) exp3$data <- mod3 + expect_error( + CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(-250, -245), ylim=c(10, 25)), + "No intersection between longitude bounds and data domain.") + cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_composed=2) - expect_equal(length(cal), 3) + expect_equal(length(cal), 5) dimexp=dim(exp1$data) expect_equal(dim(cal$coeff), c(dimexp["ftime"], dimexp["sdate"], eof=2, dimexp["dataset"], dimexp["member"])) @@ -57,8 +61,15 @@ test_that("Sanity checks and simple use case", { cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(10, 25)) expect_equal(cal$coeff[2, 1, 1, 1, 1], 0.8539488, tolerance = .00001) + expect_equivalent(cal$lon, seq(5, 30, 5)) + expect_equivalent(cal$lat, seq(10, 25, 5)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(350, 15), ylim=c(10, 25)) + expect_equivalent(cal$lon, seq(0, 15, 5)) + expect_equivalent(cal$lat, seq(10, 25, 5)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(-355, -345)) + expect_equivalent(cal$lon, seq(5, 15, 5)) - exp3$data[1,1,1,1,1,1]=NaN + exp3$data[1, 1, 1, 1, 1, 1]=NaN expect_error( CST_MultiEOF(list(exp1, exp3), neof_max=8, neof_composed=2), "Input data contain NA values." -- GitLab From a10ee6ec062d378193c2f44bac59b7cbc2ba140d Mon Sep 17 00:00:00 2001 From: jhardenberg Date: Tue, 19 Nov 2019 16:30:02 +0100 Subject: [PATCH 21/26] renamed xlim in lon_lim --- R/CST_MultiEOF.R | 18 +++++++++--------- tests/testthat/test-CST_MultiEOF.R | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 28322f52..84d5ebd9 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -14,8 +14,8 @@ #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the EOF calculation for all input variables -#' @param ylim Vector with latitudinal range limits for the EOF calculation for all input variables +#' @param lon_lim Vector with longitudinal range limits for the EOF calculation for all input variables +#' @param lat_lim Vector with latitudinal range limits for the EOF calculation for all input variables #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -49,13 +49,13 @@ #' # $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... #' #' cal <- CST_MultiEOF(list(exp1, exp2)) -#' cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(35, 50), neof_composed=3) +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(5, 30), lat_lim=c(35, 50), neof_composed=3) #' } #' @export CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, - xlim = NULL, ylim = NULL) { + lon_lim = NULL, lat_lim = NULL) { if (!(all(sapply(datalist, inherits, 's2dv_cube')))) { stop("Elements of the list in parameter 'datalist' must be of the class ", @@ -81,7 +81,7 @@ CST_MultiEOF <- function(datalist, result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, datalist[[1]]$Dates$start, minvar = minvar, neof_max = neof_max, neof_composed = neof_composed, - xlim = xlim, ylim = ylim) + lon_lim = lon_lim, lat_lim = lat_lim) return(result) } @@ -105,8 +105,8 @@ CST_MultiEOF <- function(datalist, #' @param neof_composed Number of composed eofs to return in output #' @param minvar Minimum variance fraction to be explained in first decomposition #' @param neof_max Maximum number of single eofs considered in the first decomposition -#' @param xlim Vector with longitudinal range limits for the calculation for all input variables -#' @param ylim Vector with latitudinal range limits for the calculation for all input variables +#' @param lon_lim Vector with longitudinal range limits for the calculation for all input variables +#' @param lat_lim Vector with latitudinal range limits for the calculation for all input variables #' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), #' \code{$variance} (a matrix of explained variances), #' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). @@ -116,7 +116,7 @@ CST_MultiEOF <- function(datalist, MultiEOF <- function(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", neof_max = 40, neof_composed = 5, minvar = 0.6, - xlim = NULL, ylim = NULL) { + lon_lim = NULL, lat_lim = NULL) { # Check/detect time_dim # reorder and group ftime and sdate together at the end in that order @@ -134,7 +134,7 @@ MultiEOF <- function(data, lon, lat, time, result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), .multi.eofs, lon, lat, time, neof_max = neof_max, neof_composed = neof_composed, minvar = minvar, - xlim = xlim, ylim = ylim) + xlim = lon_lim, ylim = lat_lim) # Expand back samples to compacted dims dim(result$coeff) <- c(cdim[-ind], dim(result$coeff)[-1]) diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index a5ae45bb..d9c46ab1 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -40,7 +40,7 @@ test_that("Sanity checks and simple use case", { exp3$data <- mod3 expect_error( - CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(-250, -245), ylim=c(10, 25)), + CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(-250, -245), lat_lim=c(10, 25)), "No intersection between longitude bounds and data domain.") cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_composed=2) @@ -59,14 +59,14 @@ test_that("Sanity checks and simple use case", { cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2, minvar=0.2) expect_equal(cal$coeff[2, 1, 1, 1, 1], -0.6117927, tolerance = .00001) - cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(5, 30), ylim=c(10, 25)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(5, 30), lat_lim=c(10, 25)) expect_equal(cal$coeff[2, 1, 1, 1, 1], 0.8539488, tolerance = .00001) expect_equivalent(cal$lon, seq(5, 30, 5)) expect_equivalent(cal$lat, seq(10, 25, 5)) - cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(350, 15), ylim=c(10, 25)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(350, 15), lat_lim=c(10, 25)) expect_equivalent(cal$lon, seq(0, 15, 5)) expect_equivalent(cal$lat, seq(10, 25, 5)) - cal <- CST_MultiEOF(list(exp1, exp2, exp3), xlim=c(-355, -345)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(-355, -345)) expect_equivalent(cal$lon, seq(5, 15, 5)) exp3$data[1, 1, 1, 1, 1, 1]=NaN -- GitLab From 30e3acd01ac56d8d1e965de31e74664099b42d80 Mon Sep 17 00:00:00 2001 From: jhardenberg Date: Tue, 19 Nov 2019 18:11:22 +0100 Subject: [PATCH 22/26] the final box fix --- R/CST_MultiEOF.R | 64 +++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 84d5ebd9..d58548ce 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -192,36 +192,10 @@ MultiEOF <- function(data, lon, lat, time, # calculate the area weight field <- sweep(field_orig, c(1, 2), ww, "*") - # selection of the xbox and ybox if defined - if (!is.null(xlim)) { - # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged - # Bring them all to the same units in the 0:360 range - xlim1 <- xlim[1] %% 360 - xlim2 <- xlim[2] %% 360 - if (xlim1 > xlim2) { - # If box crosses 0 first limit is negative - xlim1 <- xlim1 - 360 - } - lonm <- lon %% 360 - # If it crosses 0 leave unchanged - if (lonm[1] > tail(lonm, 1)) { - lonm <- lon - } - ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) - if (!any(ilonsel)) { - stop("No intersection between longitude bounds and data domain.") - } - } else { - ilonsel <- 1:length(lon) - } - if (!is.null(ylim)) { - ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) - } else { - ilatsel <- 1:length(lat) - } - slon <- lon[ilonsel] - slat <- lat[ilatsel] - field <- field[ilonsel, ilatsel, ] + idx <- .selbox(lon, lat, xlim, ylim) + slon <- lon[idx$ilon] + slat <- lat[idx$ilat] + field <- field[idx$ilon, idx$ilat, ] # transform 3D field in a matrix field <- array(field, dim = c(dim(field)[1] * dim(field)[2], dim(field)[3])) @@ -376,3 +350,33 @@ MultiEOF <- function(data, lon, lat, time, out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) return(out) } + +.selbox <- function(lon, lat, xlim = NULL, ylim = NULL) { + if (!is.null(xlim)) { + # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged + # Bring them all to the same units in the 0:360 range + xlim1 <- xlim[1] %% 360 + xlim2 <- xlim[2] %% 360 + lonm <- lon %% 360 + if (lonm[1] > tail(lonm, 1)) { + lonm <- lon + } + if (xlim1 > xlim2) { + # If box crosses 0 + ilonsel <- (lonm >= xlim1) | (lonm <= xlim2) + } else { + ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) + } + if (!any(ilonsel)) { + stop("No intersection between longitude bounds and data domain.") + } + } else { + ilonsel <- 1:length(lon) + } + if (!is.null(ylim)) { + ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) + } else { + ilatsel <- 1:length(lat) + } + return(list(ilon = ilonsel, ilat = ilatsel)) +} -- GitLab From 75815126aff5f0eb76bcc978b59a7feddf4e2653 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 20 Nov 2019 15:10:12 +0100 Subject: [PATCH 23/26] Documentation added and functions moved to zzz --- NAMESPACE | 3 ++ R/CST_MultiEOF.R | 62 --------------------------------------- R/zzz.R | 63 ++++++++++++++++++++++++++++++++++++++++ man/CST_MultiEOF.Rd | 71 +++++++++++++++++++++++++++++++++++++++++++++ man/MultiEOF.Rd | 49 +++++++++++++++++++++++++++++++ 5 files changed, 186 insertions(+), 62 deletions(-) create mode 100644 man/CST_MultiEOF.Rd create mode 100644 man/MultiEOF.Rd diff --git a/NAMESPACE b/NAMESPACE index ef0bfd71..c79a0ee5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,16 +4,19 @@ export(CST_Anomaly) export(CST_BiasCorrection) export(CST_Calibration) export(CST_Load) +export(CST_MultiEOF) export(CST_MultiMetric) export(CST_MultivarRMSE) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) +export(MultiEOF) export(PlotCombinedMap) export(PlotForecastPDF) export(PlotMostLikelyQuantileMap) export(RFSlope) export(RainFARM) +import(abind) import(ggplot2) import(multiApply) import(ncdf4) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index d58548ce..f1e213d7 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -318,65 +318,3 @@ MultiEOF <- function(data, lon, lat, time, } return(anom) } - - -# produce a 2d matrix of area weights -.area.weight <- function(ics, ipsilon, root = T) { - field <- array(NA, dim = c(length(ics), length(ipsilon))) - if (root == T) { - for (j in 1:length(ipsilon)) { - field[, j] <- sqrt(cos(pi / 180 * ipsilon[j])) - } - } - - if (root == F) { - for (j in 1:length(ipsilon)) { - field[, j] <- cos(pi / 180 * ipsilon[j]) - } - } - - return(field) -} - -# verbose-only printing function -.printv <- function(value, verbosity = TRUE) { - if (verbosity) { - print(value) - } -} - -# normalize a time series -.standardize <- function(timeseries) { - out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) - return(out) -} - -.selbox <- function(lon, lat, xlim = NULL, ylim = NULL) { - if (!is.null(xlim)) { - # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged - # Bring them all to the same units in the 0:360 range - xlim1 <- xlim[1] %% 360 - xlim2 <- xlim[2] %% 360 - lonm <- lon %% 360 - if (lonm[1] > tail(lonm, 1)) { - lonm <- lon - } - if (xlim1 > xlim2) { - # If box crosses 0 - ilonsel <- (lonm >= xlim1) | (lonm <= xlim2) - } else { - ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) - } - if (!any(ilonsel)) { - stop("No intersection between longitude bounds and data domain.") - } - } else { - ilonsel <- 1:length(lon) - } - if (!is.null(ylim)) { - ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) - } else { - ilatsel <- 1:length(lat) - } - return(list(ilon = ilonsel, ilat = ilatsel)) -} diff --git a/R/zzz.R b/R/zzz.R index 662624d9..2d44347b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -16,3 +16,66 @@ attributes(x) <- c(attributes(x), attr_bk) x } + +# verbose-only printing function +.printv <- function(value, verbosity = TRUE) { + if (verbosity) { + print(value) + } +} + +# normalize a time series +.standardize <- function(timeseries) { + out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) + return(out) +} + + +.selbox <- function(lon, lat, xlim = NULL, ylim = NULL) { + if (!is.null(xlim)) { + # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged + # Bring them all to the same units in the 0:360 range + xlim1 <- xlim[1] %% 360 + xlim2 <- xlim[2] %% 360 + lonm <- lon %% 360 + if (lonm[1] > tail(lonm, 1)) { + lonm <- lon + } + if (xlim1 > xlim2) { + # If box crosses 0 + ilonsel <- (lonm >= xlim1) | (lonm <= xlim2) + } else { + ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) + } + if (!any(ilonsel)) { + stop("No intersection between longitude bounds and data domain.") + } + } else { + ilonsel <- 1:length(lon) + } + if (!is.null(ylim)) { + ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) + } else { + ilatsel <- 1:length(lat) + } + return(list(ilon = ilonsel, ilat = ilatsel)) +} + +# produce a 2d matrix of area weights +.area.weight <- function(ics, ipsilon, root = T) { + field <- array(NA, dim = c(length(ics), length(ipsilon))) + if (root == T) { + for (j in 1:length(ipsilon)) { + field[, j] <- sqrt(cos(pi / 180 * ipsilon[j])) + } + } + + if (root == F) { + for (j in 1:length(ipsilon)) { + field[, j] <- cos(pi / 180 * ipsilon[j]) + } + } + + return(field) +} + diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd new file mode 100644 index 00000000..657d2d36 --- /dev/null +++ b/man/CST_MultiEOF.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_MultiEOF.R +\name{CST_MultiEOF} +\alias{CST_MultiEOF} +\title{EOF analysis of multiple variables} +\usage{ +CST_MultiEOF(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, + lon_lim = NULL, lat_lim = NULL) +} +\arguments{ +\item{datalist}{A list of objects of the class 's2dv_cube', containing the variables to be analysed. +Each data object in the list is expected to have an element named \code{$data} with at least two +spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate".} + +\item{neof_max}{Maximum number of single eofs considered in the first decomposition} + +\item{neof_composed}{Number of composed eofs to return in output} + +\item{minvar}{Minimum variance fraction to be explained in first decomposition} + +\item{lon_lim}{Vector with longitudinal range limits for the EOF calculation for all input variables} + +\item{lat_lim}{Vector with latitudinal range limits for the EOF calculation for all input variables} +} +\value{ +A list with elements \code{$coeff} (an array of time-varying principal component coefficients), + \code{$variance} (a matrix of explained variances), + \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +} +\description{ +This function performs EOF analysis over multiple variables, +accepting in input a list of CSTools objects. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. +} +\examples{ +\donttest{ +library(zeallot) +c(exp, obs) \%<-\% lonlat_data +# Create three datasets (from the members) +exp1 <- exp +exp2 <- exp +exp3 <- exp +exp1$data <- exp$data[ , 1:5, , , , ] +exp2$data <- exp$data[ , 6:10, , , , ] +exp3$data <- exp$data[ , 11:15, , , , ] + +cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) +str(cal) +# List of 3 +# $ coeff : num [1:3, 1:6, 1:2, 1:5] -0.312 -0.588 0.724 1.202 1.181 ... +# $ variance : num [1:2, 1:5] 0.413 0.239 0.352 0.27 0.389 ... +# $ eof_pattern: num [1:3, 1:53, 1:22, 1:2, 1:5] -1.47 -0.446 -0.656 -1.534 -0.464 ... +dim(cal$coeff) +# ftime sdate eof member +# 3 6 2 3 + +cal <- CST_MultiEOF(list(exp1, exp2, exp3) , minvar=0.9) +str(cal) +# $ coeff : num [1:3, 1:6, 1:5, 1:5] 0.338 0.603 -0.736 -1.191 -1.198 ... +# $ variance : num [1:5, 1:5] 0.3903 0.2264 0.1861 0.1032 0.0379 ... +# $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... + +cal <- CST_MultiEOF(list(exp1, exp2)) +cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(5, 30), lat_lim=c(35, 50), neof_composed=3) +} +} +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} + +Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +} + diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd new file mode 100644 index 00000000..1e822fc4 --- /dev/null +++ b/man/MultiEOF.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_MultiEOF.R +\name{MultiEOF} +\alias{MultiEOF} +\title{EOF analysis of multiple variables starting from an array (reduced version)} +\usage{ +MultiEOF(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", + neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, + lat_lim = NULL) +} +\arguments{ +\item{data}{A multidimensional array with dimension \code{"var"}, +containing the variables to be analysed. The other diemnsions follow the same structure as the +\code{"exp"} element of a 's2dv_cube' object.} + +\item{lon}{Vector of longitudes.} + +\item{lat}{Vector of latitudes.} + +\item{time}{Vector or matrix of dates in POSIXct format.} + +\item{lon_dim}{String with dimension name of longitudinal coordinate} + +\item{lat_dim}{String with dimension name of latitudinal coordinate} + +\item{neof_max}{Maximum number of single eofs considered in the first decomposition} + +\item{neof_composed}{Number of composed eofs to return in output} + +\item{minvar}{Minimum variance fraction to be explained in first decomposition} + +\item{lon_lim}{Vector with longitudinal range limits for the calculation for all input variables} + +\item{lat_lim}{Vector with latitudinal range limits for the calculation for all input variables} +} +\value{ +A list with elements \code{$coeff} (an array of time-varying principal component coefficients), + \code{$variance} (a matrix of explained variances), + \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +} +\description{ +This function performs EOF analysis over multiple variables, accepting in input an array with a dimension \code{"var"} for each variable to analyse. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. +} +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} + +Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +} + -- GitLab From 2eb3a26ca6bb2d5dc34dbdcd8b581ad58f75f30a Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 20 Nov 2019 15:28:19 +0100 Subject: [PATCH 24/26] example corrected --- NAMESPACE | 6 +++--- R/CST_MultiEOF.R | 3 +++ man/CST_MultiEOF.Rd | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9f31d641..33ffd248 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,9 @@ # Generated by roxygen2: do not edit by hand export(Analogs) -export(CST_Analogs) export(BEI_PDFBest) export(BEI_Weights) +export(CST_Analogs) export(CST_Anomaly) export(CST_BEI_Weighting) export(CST_BiasCorrection) @@ -15,16 +15,16 @@ export(CST_MultivarRMSE) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) -export(MultiEOF) export(CST_SaveExp) +export(MultiEOF) export(PlotCombinedMap) export(PlotForecastPDF) export(PlotMostLikelyQuantileMap) export(RFSlope) export(RainFARM) -import(ClimProjDiags) export(as.s2dv_cube) export(s2dv_cube) +import(ClimProjDiags) import(abind) import(ggplot2) import(multiApply) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index f1e213d7..d608c23e 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -31,6 +31,9 @@ #' exp1$data <- exp$data[ , 1:5, , , , ] #' exp2$data <- exp$data[ , 6:10, , , , ] #' exp3$data <- exp$data[ , 11:15, , , , ] +#' names(dim(exp1$data)) <- names(dim(exp$data)) +#' names(dim(exp2$data)) <- names(dim(exp$data)) +#' names(dim(exp3$data)) <- names(dim(exp$data)) #' #' cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) #' str(cal) diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index 657d2d36..6a51c5a4 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -42,6 +42,9 @@ exp3 <- exp exp1$data <- exp$data[ , 1:5, , , , ] exp2$data <- exp$data[ , 6:10, , , , ] exp3$data <- exp$data[ , 11:15, , , , ] +names(dim(exp1$data)) <- names(dim(exp$data)) +names(dim(exp2$data)) <- names(dim(exp$data)) +names(dim(exp3$data)) <- names(dim(exp$data)) cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) str(cal) -- GitLab From 2bf2ac02ea2150e686e579dfee88d22202563f8a Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 20 Nov 2019 15:46:45 +0100 Subject: [PATCH 25/26] fix example using ClimProjDiags --- R/CST_MultiEOF.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index d608c23e..45542341 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -23,17 +23,15 @@ #' @examples #' \donttest{ #' library(zeallot) +#' library(ClimProjDiags) #' c(exp, obs) %<-% lonlat_data #' # Create three datasets (from the members) #' exp1 <- exp #' exp2 <- exp #' exp3 <- exp -#' exp1$data <- exp$data[ , 1:5, , , , ] -#' exp2$data <- exp$data[ , 6:10, , , , ] -#' exp3$data <- exp$data[ , 11:15, , , , ] -#' names(dim(exp1$data)) <- names(dim(exp$data)) -#' names(dim(exp2$data)) <- names(dim(exp$data)) -#' names(dim(exp3$data)) <- names(dim(exp$data)) +#' exp1$data <- Subset(exp$data, along = 2, indices = 1 : 5) +#' exp2$data <- Subset(exp$data, along = 2, indices = 6 : 10) +#' exp3$data <- Subset(exp$data, along = 2, indices = 11 : 15) #' #' cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) #' str(cal) -- GitLab From 887e9b503239e57b70fd67aac4dcd890aaf56090 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 20 Nov 2019 15:54:03 +0100 Subject: [PATCH 26/26] Fixing the example in the documentation devtools --- man/CST_MultiEOF.Rd | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index 6a51c5a4..fb584751 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -34,17 +34,15 @@ accepting in input a list of CSTools objects. Based on Singular Value Decomposit \examples{ \donttest{ library(zeallot) +library(ClimProjDiags) c(exp, obs) \%<-\% lonlat_data # Create three datasets (from the members) exp1 <- exp exp2 <- exp exp3 <- exp -exp1$data <- exp$data[ , 1:5, , , , ] -exp2$data <- exp$data[ , 6:10, , , , ] -exp3$data <- exp$data[ , 11:15, , , , ] -names(dim(exp1$data)) <- names(dim(exp$data)) -names(dim(exp2$data)) <- names(dim(exp$data)) -names(dim(exp3$data)) <- names(dim(exp$data)) +exp1$data <- Subset(exp$data, along = 2, indices = 1 : 5) +exp2$data <- Subset(exp$data, along = 2, indices = 6 : 10) +exp3$data <- Subset(exp$data, along = 2, indices = 11 : 15) cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) str(cal) -- GitLab