#'Save CSTools objects of class 's2dv_cube' containing experiments or observed #'data in NetCDF format #' #'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} #' #'@description This function allows to divide and save a object of class #''s2dv_cube' into a NetCDF file, allowing to reload the saved data using #'\code{CST_Load} function. #' #'@param data An object of class \code{s2dv_cube}. #'@param destination A character string containing the directory name in which #' to save the data. NetCDF file for each starting date are saved into the #' folder tree: \cr #' destination/experiment/variable/. By default the function #' creates and saves the data into the folder "CST_Data" in the working #' directory. #'@param extra_string A character string to be include as part of the file name, #' for instance, to identify member or realization. It would be added to the #' file name between underscore characters. #'@param unique_file A logical value indicating if all object is saved in a #' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, #' the array is separated for dataset, variable and start date. #' #'@seealso \code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}} #' #'@examples #'\dontrun{ #'library(CSTools) #'data <- lonlat_temp$exp #'destination <- "./path2/" #'CST_SaveExp(data = data, destination = destination) #'} #' #'@import ncdf4 #'@importFrom s2dv Reorder InsertDim #'@importFrom ClimProjDiags Subset #'@import multiApply #'@export CST_SaveExp <- function(data, destination = "./CST_Data", unique_file = TRUE, extra_string = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } # Check object structure if (!all(c('data', 'coords', 'attrs') %in% names(data))) { stop("Parameter 'data' must have 'data', 'coords' and 'attrs' elements ", "within the 's2dv_cube' structure.") } # sdate_name if (!any(names(data$coords) %in% .KnownStartDateNames())) { stop("Start date dimension name do not match with any of the ", "accepted names by the package.") } sdate_name <- names(data$coords)[which(names(data$coords) %in% .KnownStartDateNames())] if (length(sdate_name) > 1) { warning("Found more than one start date dimension. Only the first one ", "will be used.") sdate_name <- sdate_name[1] } sdates <- data$coords[[sdate_name]] # var_name if (!is.character(data$attrs$Variable$varName)) { stop("Element 'varName' mustbe a character string.") } var_name <- data$attrs$Variable$varName # metadata if (!inherits(data$attrs$Variable$metadata, 'list')) { stop("Element $attrs$Variable$metadata must be a list.") } if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { warning("Metadata is not found for any coordinate.") } else if (!any(names(data$attrs$Variable$metadata) %in% data$attrs$Variable$varName)) { warning("Metadata is not found for any variable.") } # Coordinate attributes if (!any(names(data$coords) %in% .KnownLonNames()) | !any(names(data$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names do not match any of the names accepted by ", "the package.") } lon_name <- names(data$coords)[which(names(data$coords) %in% .KnownLonNames())] lat_name <- names(data$coords)[which(names(data$coords) %in% .KnownLatNames())] # Dates time_values <- data$attrs$Dates if (is.null(dim(time_values))) { stop("Dates element in '$data$attrs$Dates' must have time dimensios.") } SaveExp(data = data$data, destination = destination, coords = data$coords, startdates = data$coords[[sdate_name]], Dates = data$attrs$Dates, Dataset = data$attrs$Datasets, var_name = data$attrs$Variable$varName, metadata = data$attrs$Variable$metadata, extra_string = extra_string, unique_file = unique_file) } #'Save an experiment in a format compatible with CST_Load #'@description This function is created for compatibility with CST_Load/Load for #'saving post-processed datasets such as those calibrated of downscaled with #'CSTools functions #' #'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} #' #'@param data An multi-dimensional array with named dimensions (longitude, #' latitude, time, member, sdate). #'@param destination A character string indicating the path where to store the #' NetCDF files. #'@param coords A named list with elements of the coordinates corresponding to #' the dimensions of the data parameter. The names and length of each element #' must correspond to the names of the dimensions. If any coordinate is not #' provided, it is set as an index vector with the values from 1 to the length #' of the corresponding dimension. #'@param startdates A vector of dates indicating the initialization date of each #' simulations. #'@param Dates A matrix of dates with the corresponding sdate and forecast time #' dimension. #'@param Dataset A vector of character string indicating the names of the #' datasets. #'@param var_name A character string indicating the name of the variable to be #' saved. #'@param metadata A named list where each element is a variable containing the #' corresponding information. The information must be contained in a list of #' lists for each variable. #'@param unique_file A logical value indicating if all object is saved in a #' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, #' the array is separated for dataset, variable and start date. #'@param extra_string A character string to be include as part of the file name, #' for instance, to identify member or realization. #' #'@return The function creates as many files as sdates per dataset. Each file #'could contain multiple members. It would be added to the file name between #'underscore characters. The path will be created with the name of the variable #'and each Datasets. #' #'@examples #'\dontrun{ #'data <- lonlat_temp$exp$data #'lon <- lonlat_temp$exp$lon #'lat <- lonlat_temp$exp$lat #'Dataset <- 'XXX' #'var_name <- 'tas' #'units <- 'k' #'startdates <- lapply(1:length(lonlat_temp$exp$Datasets), #' function(x) { #' lonlat_temp$exp$Datasets[[x]]$InitializationDates[[1]]})[[1]] #'Dates <- lonlat_temp$exp$Dates$start #'dim(Dates) <- c(time = length(Dates)/length(startdates), sdate = length(startdates)) #'destination = './path/' #'SaveExp(data, lon, lat, Dataset, var_name, units, startdates, Dates, #' cdo_grid_name, projection, destination) #'} #'@import ncdf4 #'@importFrom s2dv Reorder InsertDim #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export SaveExp <- function(data, destination = NULL, coords = NULL, startdates = NULL, Dates = NULL, Dataset = NULL, var_name = NULL, metadata = NULL, unique_file = FALSE, extra_string = NULL) { ## Initial input parameter checks: # data if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } dimnames <- names(dim(data)) if (is.null(dimnames)) { stop("Parameter 'data' must have named dimensions.") } # destination if (!is.character(destination) & length(destination) > 1) { stop("Parameter 'destination' must be a character string of one element ", "indicating the name of the file (including the folder if needed) ", "where the data will be saved.") } # startdates if (is.null(startdates)) { stop("Parameter 'startdates' cannot be NULL.") } # Dates if (is.null(Dates)) { stop("Parameter 'Dates' cannot be NULL.") } # var_name if (is.null(var_name)) { warning("Parameter 'var_name' is NULL. It will be assigned to 'X'.") var_name <- 'X' } else if (length(var_name) > 1) { multiple_vars <- TRUE } else { multiple_vars <- FALSE } if (!all(sapply(var_name, is.character))) { stop("Parameter 'var_name' must be a character string with the ", "variable names.") } ## Coordinates # longitude and latitude if (!is.null(coords)) { if (!all(names(coords) %in% dimnames)) { coords <- coords[-which(!names(coords) %in% dimnames)] } for (i_coord in dimnames) { if (i_coord %in% names(coords)) { if (length(coords[[i_coord]]) != dim(data)[i_coord]) { warning(paste0("Coordinate '", i_coord, "' has different lenght as ", "its dimension and it will not be used.")) coords[[i_coord]] <- 1:dim(data)[i_coord] } } else { warning(paste0("Coordinate '", i_coord, "' is not provided ", "and it will be set as index in element coords.")) coords[[i_coord]] <- 1:dim(data)[i_coord] } } } else { coords <- sapply(dimnames, function(x) 1:dims[x]) } ## Dimensions checks: # Spatial coordinates if (!any(dimnames %in% .KnownLonNames()) | !any(dimnames %in% .KnownLatNames())) { stop("Spatial coordinate names do not match any of the names accepted by ", "the package.") } lon_name <- dimnames[which(dimnames %in% .KnownLonNames())] lat_name <- dimnames[which(dimnames %in% .KnownLatNames())] if (length(lon_name) > 1) { warning("Found more than one longitudinal dimension. Only the first one ", "will be used.") lon_name <- lon_name[1] } if (length(lat_name) > 1) { warning("Found more than one latitudinal dimension. Only the first one ", "will be used.") lat_name <- lat_name[1] } # ftime_name if (!any(dimnames %in% .KnownForecastTimeNames())) { stop("Forecast time dimension name do not match with any of the ", "accepted names by the package.") } ftime_name <- dimnames[which(dimnames %in% .KnownForecastTimeNames())] if (length(ftime_name) > 1) { warning("Found more than one forecast time dimension. Only the first one ", "will be used.") ftime_name <- ftime_name[1] } # sdate_name if (!any(dimnames %in% .KnownStartDateNames())) { stop("Start date dimension name do not match with any of the ", "accepted names by the package.") } sdate_name <- dimnames[which(dimnames %in% .KnownStartDateNames())] multiple_sdates <- FALSE if (length(sdate_name) == 0) { stop("Start date dimension name do not match with any of the ", "accepted names by the package. The accepted start date names ", "are: 'sdate', 'sdates', 'syear', 'sweek' and 'sday'.") } else if (length(sdate_name) > 1) { multiple_sdates <- TRUE stop("Found more than one start date dimension. This functionality is not developed yet.") } sdate_pos <- which(dimnames == sdate_name) # memb_dim if (!any(dimnames %in% .KnownMemberNames())) { memb_name <- NULL } else { memb_name <- dimnames[which(dimnames %in% .KnownMemberNames())] if (length(memb_name) > 1) { stop("Found more than one member dimension. This functionality is not developed yet.") } } if (any(dimnames == 'level')) { stop("Ask for saving 3Dim fields to the mantainer.") } # extra_string if (!is.null(extra_string)) { if (!is.character(extra_string)) { stop("Parameter 'extra_string' must be a character string.") } } # dataset dataset_pos <- which(dimnames %in% c('dataset', 'dat')) dat_dim_name <- names(dim(data))[dataset_pos] dims <- dim(data) if (length(dataset_pos) == 0) { warning("Dataset dimension is not found in 'data'. ", "All data is stored in the same 'dataset' folder.") data <- InsertDim(data, posdim = 1, lendim = 1, name = "dataset") dimnames <- names(dim(data)) dataset_pos <- 1 } else if (length(dataset_pos) > 1) { stop("There is more than one 'dataset' dimension in data.") } n_datasets <- dim(data)[dataset_pos] # number of folder by dataset # dataset names if (is.null(Dataset)) { warning("Parameter 'Dataset' is NULL. Files will be saved with a ", "directory name of 'XXX'.") Dataset <- rep('XXX', n_datasets ) } datasets <- Dataset if (n_datasets > length(datasets)) { warning("Dimension 'dataset' in 'data' is greater than those listed in ", "element 'Datasets' and the first element will be reused.") datasets <- c(datasets, rep(datasets[1], n_datasets - length(datasets))) } else if (n_datasets < length(datasets)) { warning("Dimension 'dataset' in 'data' is smaller than those listed in ", "element 'Datasets' and only the firsts elements will be used.") datasets <- datasets[1:n_datasets] } # From Load if (inherits(datasets, 'list')) { datasets <- names(datasets) } # var var_dim <- NULL var_dim_name <- NULL if (any(c('var', 'vars', 'variable', 'variables') %in% dimnames)) { var_dim <- which(dimnames %in% c('var', 'vars', 'variable', 'variables')) var_dim_name <- names(dim(data))[var_dim] if (length(var_dim) > 1) { warning("There is more than one 'variable' dimension in data.") var_dim <- var_dim[1] } if (dims[var_dim] == 1) { num_vars <- 1 } else { num_vars <- dim(data)[var_dim] } } else { num_vars <- 1 var_dim_name <- NULL } known_dim_names <- c("var", "vars", "dat", "dataset", "nlevel", "levels", "level", .KnownLatNames(), .KnownLonNames(), .KnownForecastTimeNames(), .KnownStartDateNames()) dim_names <- names(dim(data)) if (any(dim_names != c(dat_dim_name, var_dim_name, sdate_name, lon_name, lat_name, memb_name, ftime_name))) { data <- Reorder(data, c(dat_dim_name, var_dim_name, sdate_name, lon_name, lat_name, memb_name, ftime_name)) } # Dimensions definition defined_dims <- NULL extra_info_dim <- NULL filedims <- names(dim(data))[which(!names(dim(data)) %in% c(dat_dim_name, var_dim_name, sdate_name, ftime_name))] for (i_coord in filedims) { dim_info <- list() # vals if (i_coord %in% names(coords)) { dim_info[['vals']] <- as.vector(coords[[i_coord]]) } else { dim_info[['vals']] <- 1:dim(data)[i_coord] } # name dim_info[['name']] <- i_coord # len dim_info[['len']] <- as.numeric(dim(data)[i_coord]) # unlim dim_info[['unlim']] <- FALSE # create_dimvar dim_info[['create_dimvar']] <- TRUE ## metadata if (i_coord %in% names(metadata)) { if ('variables' %in% names(attributes(metadata[[i_coord]]))) { # from Start: 'lon' or 'lat' attrs <- attributes(metadata[[i_coord]])[['variables']][[i_coord]] i_coord_info <- attrs[!sapply(attrs, inherits, 'list')] } else if (inherits(metadata[[i_coord]], 'list')) { # from Start and Load: main var i_coord_info <- metadata[[i_coord]] } else if (!is.null(attributes(metadata[[i_coord]]))) { # from Load i_coord_info <- attributes(metadata[[i_coord]]) } else { stop("Metadata is not correct.") } # len if ('size' %in% names(i_coord_info)) { if (i_coord_info[['size']] != dim(data)[i_coord]) { dim_info[['original_len']] <- i_coord_info[['size']] i_coord_info[['size']] <- NULL } } # units if (!('units' %in% names(i_coord_info))) { dim_info[['units']] <- '' } else { dim_info[['units']] <- i_coord_info[['units']] i_coord_info[['units']] <- NULL } # calendar if (!('calendar' %in% names(i_coord_info))) { dim_info[['calendar']] <- NA } else { dim_info[['calendar']] <- i_coord_info[['calendar']] i_coord_info[['calendar']] <- NULL } # longname if ('long_name' %in% names(i_coord_info)) { dim_info[['longname']] <- i_coord_info[['long_name']] i_coord_info[['long_name']] <- NULL } else if ('longname' %in% names(i_coord_info)) { dim_info[['longname']] <- i_coord_info[['longname']] i_coord_info[['longname']] <- NULL } else { dim_info[['longname']] <- i_coord } # extra information if (!is.null(names(i_coord_info))) { extra_info_dim[[i_coord]] <- i_coord_info } } else { # units dim_info[['units']] <- "adim" # longname dim_info[['longname']] <- NULL # calendar dim_info[['calendar']] <- NA } new_dim <- list(ncdim_def(name = dim_info[['name']], units = dim_info[['units']], vals = dim_info[['vals']], unlim = dim_info[['unlim']], create_dimvar = dim_info[['create_dimvar']], calendar = dim_info[['calendar']], longname = dim_info[['longname']])) names(new_dim) <- i_coord defined_dims <- c(defined_dims, new_dim) } defined_vars <- list() if (!unique_file) { for (i in 1:n_datasets) { path <- file.path(destination, datasets[i], var_name) for (j in 1:num_vars) { dir.create(path[j], recursive = TRUE) sdates <- gsub("-", "", startdates) dim(sdates) <- c(length(sdates)) names(dim(sdates)) <- sdate_name Apply(data = list(data, sdates, Dates), target_dims = list(c(memb_name, ftime_name, lat_name, lon_name), NULL, ftime_name), fun = .saveExp, ftime_name = ftime_name, defined_dims = defined_dims, var_name = var_name[j], metadata_var = metadata[[var_name[j]]], destination = path[j], extra_info_dim = extra_info_dim, extra_string = extra_string) } } } else { # dataset definition # From here new_dim <- list(ncdim_def(name = dat_dim_name, units = "adim", vals = 1 : dim(data)[dat_dim_name], longname = 'dataset', create_dimvar = TRUE)) names(new_dim) <- dat_dim_name defined_dims <- c(new_dim, defined_dims) extra_info_dim[[dat_dim_name]] <- list(datasets = paste(datasets, collapse = ', ')) # sdate definition sdates <- Subset(Dates, along = ftime_name, 1, drop = 'selected') differ <- as.numeric((sdates - sdates[1])/3600) new_dim <- list(ncdim_def(name = sdate_name, units = paste('hours since', sdates[1]), vals = differ, longname = sdate_name, create_dimvar = TRUE)) names(new_dim) <- sdate_name defined_dims <- c(defined_dims, new_dim) extra_info_dim[[sdate_name]] <- list(sdates = paste(sdates, collapse = ', ')) # ftime definition ftime_dates <- Subset(Dates, along = sdate_name, 1, drop = 'selected') differ <- as.numeric((ftime_dates - ftime_dates[1])/3600) # DOUBT HERE: which values we take? dim_time <- list(ncdim_def(name = ftime_name, units = paste('hours since', Dates[1]), vals = differ, calendar = 'proleptic_gregorian', longname = ftime_name, unlim = TRUE)) names(dim_time) <- ftime_name defined_dims <- c(defined_dims, dim_time) # var definition defined_vars <- list() extra_info_var <- NULL for (j in 1:num_vars) { var_info <- list() i_var_info <- metadata[[var_name[j]]][!sapply(metadata[[var_name[j]]], inherits, 'list')] ## Define metadata # name var_info[['name']] <- var_name[j] # units if ('units' %in% names(i_var_info)) { var_info[['units']] <- i_var_info[['units']] i_var_info[['units']] <- NULL } else { var_info[['units']] <- '' } # dim var_info[['dim']] <- defined_dims # missval if ('missval' %in% names(i_var_info)) { var_info[['missval']] <- i_var_info[['missval']] i_var_info[['missval']] <- NULL } else { var_info[['missval']] <- NULL } # longname if (any(c('longname', 'long_name') %in% names(i_var_info))) { longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] var_info[['longname']] <- i_var_info[[longname]] i_var_info[[longname]] <- NULL } else { var_info[['longname']] <- NULL } # prec if ('prec' %in% names(i_var_info)) { var_info[['prec']] <- i_var_info[['prec']] i_var_info[['prec']] <- NULL } else { prec <- typeof(data) if (prec == 'character') { var_info[['prec']] <- 'char' } if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { var_info[['prec']] <- prec } else { var_info[['prec']] <- 'double' } } # extra information if (!is.null(names(i_var_info))) { extra_info_var[[var_name[j]]] <- i_var_info } new_var <- list(ncvar_def(name = var_info[['name']], units = var_info[['units']], dim = var_info[['dim']], missval = var_info[['missval']], longname = var_info[['longname']], prec = var_info[['prec']])) names(new_var) <- var_name[j] defined_vars <- c(defined_vars, new_var) } if (is.null(extra_string)) { file_name <- paste0(var_name[j], ".nc") } else { file_name <- paste0(var_name[j], "_", extra_string, ".nc") } full_filename <- file.path(destination, file_name) file_nc <- nc_create(full_filename, defined_vars) if (is.null(var_dim_name)) { ncvar_put(file_nc, var_name, vals = data) } else { for (j in 1:num_vars) { ncvar_put(file_nc, defined_vars[[j]]$name, vals = Subset(data, var_dim_name, j, drop = 'selected')) } } # Additional dimension attributes for (dim in names(defined_dims)) { if (dim %in% names(extra_info_dim)) { for (info_dim in names(extra_info_dim[[dim]])) { ncatt_put(file_nc, dim, info_dim, as.character(extra_info_dim[[dim]][[info_dim]])) } } } # Additional dimension attributes for (var in names(defined_vars)) { if (var %in% names(extra_info_var)) { for (info_var in names(extra_info_var[[var]])) { ncatt_put(file_nc, var, info_var, as.character(extra_info_var[[var]][[info_var]])) } } } nc_close(file_nc) } } .saveExp <- function(data, sdates, dates, ftime_name, var_name, units, defined_dims, metadata_var, destination, extra_info_dim = NULL, extra_string = NULL) { differ <- as.numeric((dates - dates[1])/3600) dim_time <- list(ncdim_def(name = ftime_name, units = paste('hours since', Dates[1]), vals = differ, calendar = 'proleptic_gregorian', longname = ftime_name, unlim = TRUE)) names(dim_time) <- ftime_name defined_dims <- c(defined_dims, dim_time) ## Define var metadata var_info <- NULL extra_info_var <- NULL i_var_info <- metadata_var[!sapply(metadata_var, inherits, 'list')] # name var_info[['name']] <- var_name # units if ('units' %in% names(i_var_info)) { var_info[['units']] <- i_var_info[['units']] i_var_info[['units']] <- NULL } else { var_info[['units']] <- '' } # dim var_info[['dim']] <- defined_dims # missval if ('missval' %in% names(i_var_info)) { var_info[['missval']] <- i_var_info[['missval']] i_var_info[['missval']] <- NULL } else { var_info[['missval']] <- NULL } # longname if (any(c('longname', 'long_name') %in% names(i_var_info))) { longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] var_info[['longname']] <- i_var_info[[longname]] i_var_info[[longname]] <- NULL } else { var_info[['longname']] <- NULL } # prec if ('prec' %in% names(i_var_info)) { var_info[['prec']] <- i_var_info[['prec']] i_var_info[['prec']] <- NULL } else { prec <- typeof(data) if (prec == 'character') { var_info[['prec']] <- 'char' } if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { var_info[['prec']] <- prec } else { var_info[['prec']] <- 'double' } } # extra information if (!is.null(names(i_var_info))) { extra_info_var <- i_var_info } datanc <- ncvar_def(name = var_info[['name']], units = var_info[['units']], dim = var_info[['dim']], missval = var_info[['missval']], longname = var_info[['longname']], prec = var_info[['prec']]) if (is.null(extra_string)) { file_name <- paste0(var_name, "_", sdates, ".nc") } else { file_name <- paste0(var_name, "_", extra_string, "_", sdates, ".nc") } full_filename <- file.path(destination, file_name) file_nc <- nc_create(full_filename, datanc) ncvar_put(file_nc, datanc, data) # Additional attributes for (dim in names(defined_dims)) { if (dim %in% names(extra_info_dim)) { for (info_dim in names(extra_info_dim[[dim]])) { ncatt_put(file_nc, dim, info_dim, as.character(extra_info_dim[[dim]][[info_dim]])) } } } # Additional dimension attributes if (!is.null(extra_info_var)) { for (info_var in names(extra_info_var)) { ncatt_put(file_nc, var_name, info_var, as.character(extra_info_var[[info_var]])) } } nc_close(file_nc) }