diff --git a/R/ArrayToNetCDF.R b/R/ArrayToNetCDF.R new file mode 100644 index 0000000000000000000000000000000000000000..2b573f639be3db0d7daadd2f278efbe4d5b50ff7 --- /dev/null +++ b/R/ArrayToNetCDF.R @@ -0,0 +1,305 @@ +ArrayToNetCDF <- function(arrays, file_path) { + # Check parameter arrays. + if (is.array(arrays)) { + arrays <- list(arrays) + } + if (any(!sapply(arrays, function(x) is.array(x) && (is.numeric(x) || is.logical(x))))) { + stop("The parameter 'arrays' must be one or a list of numeric or logical arrays.") + } + # Check parameter file_path. + if (!is.character(file_path)) { + stop("Parameter 'file_path' must be a character string.") + } + + defined_dims <- list() + defined_vars <- list() + global_attrs <- list() + var_dim <- NULL + for (i in 1:length(arrays)) { + array_attrs <- attributes(arrays[[i]]) + if ('variables' %in% names(array_attrs)) { + vars_info <- array_attrs[['variables']] + array_attrs <- array_attrs[-which(names(array_attrs) == 'variables')] + } else { + vars_info <- NULL + } + global_attrs[names(array_attrs)] <- array_attrs + var_dim <- which(names(dim(arrays[[i]])) %in% c('var', 'variable')) + if (length(var_dim) > 0) { + var_dim <- var_dim[1] + num_vars <- dim(arrays[[i]])[var_dim] + } else { + var_dim <- NULL + num_vars <- 1 + } + # Defining ncdf4 variable objects + for (j in 1:num_vars) { + var_info <- vars_info[[j]] + if (length(var_info) == 0) { + var_info <- list() + } + dim_names <- names(dim(arrays[[i]])) + if (!is.null(dim_names)) { + if (any(is.na(dim_names) || (sapply(dim_names, nchar) == 0))) { + stop("The provided arrays must have all named dimensions or ", + "all unnamed dimensions.") + } + } + provided_dims <- sapply(var_info$dim, '[[', 'name') + var_built_dims <- NULL + for (k in 1:length(dim(arrays[[i]]))) { + if (!identical(k, var_dim)) { + final_dim_position <- k - ifelse(!is.null(var_dim) && k > var_dim, 1, 0) + dim_name <- dim_names[k] + if (!is.null(dim_name) && (dim_name %in% provided_dims)) { + dim_info <- var_info$dim[[which(provided_dims == dim_name)]] + } else { + dim_info <- list() + } + if (!('name' %in% names(dim_info))) { + if (!is.null(dim_name)) { + dim_info[['name']] <- dim_name + } else { + dim_info[['name']] <- paste0('dim', final_dim_position) + } + } else { + if (!is.character(dim_info[['name']])) { + stop("The provided 'name' for the ", k, "th dimension in the ", i, "th array must be a character string.") + } + dim_info[['name']] <- dim_info[['name']][1] + } + if (!('len' %in% names(dim_info))) { + dim_info[['len']] <- unname(dim(arrays[[i]])[k]) + } else { + if (!is.numeric(dim_info[['len']])) { + stop("The provided 'len' for the ", k, "th dimension in the ", i, "th array must be a numeric value.") + } + dim_info[['len']] <- round(dim_info[['len']][1]) + if (dim_info[['len']] != dim(arrays[[i]])[k]) { + stop("The provided 'len' for the ", k, "th dimension in the ", i, "th array does not match the actual length of the provided array.") + } + } + if (!('unlim' %in% names(dim_info))) { + dim_info[['unlim']] <- ifelse(dim_info[['name']] == 'time', TRUE, FALSE) + } else { + if (!is.logical(dim_info[['unlim']])) { + stop("The provided 'unlim' for the ", k, "th dimension in the ", i, "th array must be a logical value.") + } + dim_info[['unlim']] <- dim_info[['unlim']][1] + } + if (!('units' %in% names(dim_info))) { + dim_info[['units']] <- '' + } else { + if (!is.character(dim_info[['units']])) { + stop("The provided 'units' for the ", k, "th dimension in the ", i, "th array must be a character string.") + } + dim_info[['units']] <- dim_info[['units']][1] + } + if (!('vals' %in% names(dim_info))) { + dim_info[['vals']] <- 1:dim_info[['len']] + } else { + if (!is.numeric(dim_info[['vals']])) { + stop("The provided 'vals' for the ", k, "th dimension in the ", i, "th array must be a numeric vector.") + } + if (dim_info[['units']] == '') { + dim_info[['vals']] <- as.integer(dim_info[['vals']]) + } + if (length(dim_info[['vals']]) != dim_info[['len']]) { + stop("The length of the provided 'vals' for the ", k, "th dimension in the ", i, "th array does not match the length of the provided array.") + } + } + if (!('create_dimvar' %in% names(dim_info))) { + if (dim_info[['units']] == '') { + dim_info[['create_dimvar']] <- FALSE + } else { + dim_info[['create_dimvar']] <- TRUE + } + } else { + if (!is.logical(dim_info[['create_dimvar']])) { + stop("The provided 'create_dimvar' for the ", k, "th dimension in the ", i, "th array must be a logical value.") + } + dim_info[['create_dimvar']] <- dim_info[['create_dimvar']][1] + if (dim_info[['units']] != '' && !dim_info[['create_dimvar']]) { + stop("Provided 'units' for the ", k, "th dimension in the ", i, "th array but 'create_dimvar' set to FALSE.") + } + } + if (!('calendar' %in% names(dim_info))) { + dim_info[['calendar']] <- NA + } else { + if (!is.character(dim_info[['calendar']])) { + stop("The provided 'calendar' for the ", k, "th dimension in the ", i, "th array must be a character string.") + } + dim_info[['calendar']] <- dim_info[['calendar']][1] + } + if (!('longname' %in% names(dim_info))) { + dim_info[['longname']] <- dim_info[['name']] + } else { + if (!is.character(dim_info[['longname']])) { + stop("The provided 'longname' for the ", k, "th dimension in the ", i, "th array must be a character string.") + } + dim_info[['longname']] <- dim_info[['longname']][1] + } + if (dim_info[['name']] %in% names(defined_dims)) { + items_to_check <- c('name', 'len', 'unlim', 'units', 'vals', + 'create_dimvar', 'longname') + if (!identical(dim_info[items_to_check], + defined_dims[[dim_info[['name']]]][items_to_check]) || + !(identical(dim_info[['calendar']], defined_dims[[dim_info[['name']]]][['calendar']]) || + (is.na(dim_info[['calendar']]) && is.null(defined_dims[[dim_info[['name']]]][['calendar']])))) { + stop("The dimension '", dim_info[['name']], "' is ", + "defined or used more than once in the provided ", + "data but the dimension specifications do not ", + "match.") + } + } else { + new_dim <- list(ncdim_def(dim_info[['name']], dim_info[['units']], + dim_info[['vals']], dim_info[['unlim']], + dim_info[['create_dimvar']], + dim_info[['calendar']], + dim_info[['longname']])) + names(new_dim) <- dim_info[['name']] + defined_dims <- c(defined_dims, new_dim) + } + var_built_dims <- c(var_built_dims, dim_info[['name']]) + } + } + if (!('name' %in% names(var_info))) { + var_name_from_md <- names(vars_info)[j] + var_name_from_ar <- names(arrays)[i] + if (is.character(var_name_from_md) && + !is.na(var_name_from_md) && + (nchar(var_name_from_md) > 0)) { + var_name <- var_name_from_md + } else if (is.character(var_name_from_ar) && + !is.na(var_name_from_ar) && + (nchar(var_name_from_ar) > 0)){ + var_name <- var_name_from_ar + } else { + var_name <- paste0('var', i, '_', j) + } + var_info[['name']] <- var_name + } else { + if (!is.character(var_info[['name']])) { + stop("The provided 'name' for the ", j, "th variable in the ", i, "th array must be a character string.") + } + var_info[['name']] <- var_info[['name']][1] + } + if (!('units' %in% names(var_info))) { + var_info[['units']] <- '' + } else { + if (!is.character(var_info[['units']])) { + stop("The provided 'units' for the ", j, "th variable in the ", i, "th array must be a character string.") + } + var_info[['units']] <- var_info[['units']][1] + } + if (!('missval' %in% names(var_info))) { + var_info[['missval']] <- NULL + } else { + if (!is.numeric(var_info[['missval']])) { + stop("The provided 'missval' for the ", j, "th variable in the ", i, "th array must be a numeric value.") + } + var_info[['missval']] <- var_info[['missval']][1] + } + if (!('longname' %in% names(var_info))) { + var_info[['longname']] <- var_info[['name']] + } else { + if (!is.character(var_info[['longname']])) { + stop("The provided 'longname' for the ", j, "th variable in the ", i, "th array must be a character string.") + } + var_info[['longname']] <- var_info[['longname']][1] + } + if (!('prec' %in% names(var_info))) { + var_info[['prec']] <- 'float' + } else { + if (!is.character(var_info[['prec']])) { + stop("The provided 'prec' for the ", j, "th variable in the ", i, "th array must be a character string.") + } + var_info[['prec']] <- var_info[['prec']][1] + } + new_var <- list(ncvar_def(var_info[['name']], var_info[['units']], + defined_dims[var_built_dims], + var_info[['missval']], + var_info[['longname']], var_info[['prec']])) + names(new_var) <- var_info[['name']] + defined_vars <- c(defined_vars, new_var) + } + } + ncdf_object <- nc_create(file_path, defined_vars) + var_counter <- 1 + # Putting the data and extra attributes. + for (i in 1:length(arrays)) { + array_attrs <- attributes(arrays[[i]]) + if ('variables' %in% names(array_attrs)) { + vars_info <- array_attrs[['variables']] + } else { + vars_info <- NULL + } + var_dim <- which(names(dim(arrays[[i]])) %in% c('var', 'variable')) + if (length(var_dim) > 0) { + var_dim <- var_dim[1] + num_vars <- dim(arrays[[i]])[var_dim] + } else { + var_dim <- NULL + num_vars <- 1 + } + for (j in 1:num_vars) { + var_info <- vars_info[[j]] + if (length(var_info) == 0) { + var_info <- list() + } + if (!('scaleFact' %in% names(var_info))) { + scale_factor <- 1 + } else { + if (!is.numeric(var_info[['scaleFact']])) { + stop("The provided 'scaleFact' for the ", j, "th variable in the ", i, "th array must be a numeric value.") + } + scale_factor <- var_info[['scaleFact']][1] + } + if (!('addOffset' %in% names(var_info))) { + add_offset <- 0 + } else { + if (!is.numeric(var_info[['addOffset']])) { + stop("The provided 'addOffset' for the ", j, "th variable in the ", i, "th array must be a numeric value.") + } + add_offset <- var_info[['addOffset']][1] + } + if (is.null(var_dim)) { + if (scale_factor != 1 || add_offset != 0) { + ncvar_put(ncdf_object, defined_vars[[var_counter]]$name, + (arrays[[i]] - add_offset) / scale_factor, + count = dim(arrays[[i]])) + } else { + ncvar_put(ncdf_object, defined_vars[[var_counter]]$name, + arrays[[i]], + count = dim(arrays[[i]])) + } + } else { + if (scale_factor != 1 || add_offset != 0) { + ncvar_put(ncdf_object, defined_vars[[var_counter]]$name, + (Subset(arrays[[i]], var_dim, j, drop = 'selected') - add_offset) / scale_factor, + count = dim(arrays[[i]])[-var_dim]) + } else { + ncvar_put(ncdf_object, defined_vars[[var_counter]]$name, + Subset(arrays[[i]], var_dim, j, drop = 'selected'), + count = dim(arrays[[i]])[-var_dim]) + } + } + if (scale_factor != 1 || add_offset != 0) { + ncatt_put(ncdf_object, defined_vars[[var_counter]]$name, 'scale_factor', scale_factor) + ncatt_put(ncdf_object, defined_vars[[var_counter]]$name, 'add_offset', add_offset) + } + if ('coordinates' %in% names(var_info)) { + if (!is.character(var_info[['coordinates']])) { + stop("The attribute 'coordinates' must be a character string.") + } + if (!(all(strsplit(var_info[['coordinates']], ' ')[[1]] %in% sapply(defined_vars, '[[', 'name')))) { + stop("All the dimensions appearing in 'coordinates' must point to defined variables.") + } + ncatt_put(ncdf_object, defined_vars[[var_counter]]$name, 'coordinates', var_info[['coordinates']]) + } + var_counter <- var_counter + 1 + } + } + nc_close(ncdf_object) + invisible(NULL) +} diff --git a/R/CDORemap.R b/R/CDORemap.R new file mode 100644 index 0000000000000000000000000000000000000000..24809c3f94f4ae37190068360af75d288f92188f --- /dev/null +++ b/R/CDORemap.R @@ -0,0 +1,746 @@ +CDORemap <- function(data_array = NULL, lons, lats, grid, method, + avoid_writes = TRUE, crop = TRUE, + force_remap = FALSE, write_dir = tempdir()) { #, mask = NULL) { + .isRegularVector <- function(x, tol = 0.1) { + if (length(x) < 2) { + #stop("The provided vector must be of length 2 or greater.") + TRUE + } else { + spaces <- x[2:length(x)] - x[1:(length(x) - 1)] + (sum(abs(spaces - mean(spaces)) > mean(spaces) / (1 / tol)) < 2) + } + } + # Check parameters data_array, lons and lats. + known_lon_names <- .KnownLonNames() + known_lat_names <- .KnownLatNames() + if (!is.numeric(lons) || !is.numeric(lats)) { + stop("Expected numeric 'lons' and 'lats'.") + } + if (any(is.na(lons > 0))) { + stop("Found invalid values in 'lons'.") + } + if (any(is.na(lats > 0))) { + stop("Found invalid values in 'lats'.") + } + if (is.null(dim(lons))) { + dim(lons) <- length(lons) + } + if (is.null(dim(lats))) { + dim(lats) <- length(lats) + } + if (length(dim(lons)) > 2 || length(dim(lats)) > 2) { + stop("'lons' and 'lats' can only have up to 2 dimensions.") + } + if (length(dim(lons)) != length(dim(lats))) { + stop("'lons' and 'lats' must have the same number of dimensions.") + } + if (length(dim(lons)) == 2 && !all(dim(lons) == dim(lats))) { + stop("'lons' and 'lats' must have the same dimension sizes.") + } + return_array <- TRUE + if (is.null(data_array)) { + return_array <- FALSE + if (length(dim(lons)) == 1) { + array_dims <- c(length(lats), length(lons)) + names(array_dims) <- c('lat', 'lon') + } else { + array_dims <- dim(lons) + names(array_dims) <- c('j', 'i') + } + data_array <- array(NA, array_dims) + } + if (!(is.logical(data_array) || is.numeric(data_array)) || !is.array(data_array)) { + stop("Parameter 'data_array' must be a numeric array.") + } + if (is.null(names(dim(data_array)))) { + stop("Parameter 'data_array' must have named dimensions.") + } + lon_dim <- which(known_lon_names %in% names(dim(data_array))) + if (length(lon_dim) < 1) { + stop("Could not find a known longitude dimension name in the provided 'data_array'.") + } + if (length(lon_dim) > 1) { + stop("Found more than one known longitude dimension names in the provided 'data_array'.") + } + lon_dim <- known_lon_names[lon_dim] + lat_dim <- which(known_lat_names %in% names(dim(data_array))) + if (length(lat_dim) < 1) { + stop("Could not find a known latitude dimension name in the provided 'data_array'.") + } + if (length(lat_dim) > 1) { + stop("Found more than one known latitude dimension name in the provided 'data_array'.") + } + lat_dim <- known_lat_names[lat_dim] + if (is.null(names(dim(lons)))) { + if (length(dim(lons)) == 1) { + names(dim(lons)) <- lon_dim + } else { + stop("Parameter 'lons' must be provided with dimension names.") + } + } else { + if (!(lon_dim %in% names(dim(lons)))) { + stop("Parameter 'lon' must have the same longitude dimension name as the 'data_array'.") + } + if (length(dim(lons)) > 1 && !(lat_dim %in% names(dim(lons)))) { + stop("Parameter 'lon' must have the same latitude dimension name as the 'data_array'.") + } + } + if (is.null(names(dim(lats)))) { + if (length(dim(lats)) > 1) { + stop("Parameter 'lats' must be provided with dimension names.") + } + } else { + if (!(lat_dim %in% names(dim(lats)))) { + stop("Parameter 'lat' must have the same latitude dimension name as the 'data_array'.") + } + if (length(dim(lats)) > 1 && !(lon_dim %in% names(dim(lats)))) { + stop("Parameter 'lat' must have the same longitude dimension name as the 'data_array'.") + } + } + lons_attr_bk <- attributes(lons) + if (is.null(lons_attr_bk)) { + lons_attr_bk <- list() + } + lats_attr_bk <- attributes(lats) + if (is.null(lats_attr_bk)) { + lats_attr_bk <- list() + } + if (length(attr(lons, 'variables')) == 0) { + new_metadata <- list(list()) + if (length(dim(lons)) == 1) { + names(new_metadata) <- lon_dim + } else { + names(new_metadata) <- paste0(lon_dim, '_var') + } + attr(lons, 'variables') <- new_metadata + } + if (!('units' %in% names(attr(lons, 'variables')[[1]]))) { + new_metadata <- attr(lons, 'variables') + #names(new_metadata)[1] <- lon_dim + new_metadata[[1]][['units']] <- 'degrees_east' + attr(lons, 'variables') <- new_metadata + } + if (length(attr(lats, 'variables')) == 0) { + new_metadata <- list(list()) + if (length(dim(lats)) == 1) { + names(new_metadata) <- lat_dim + } else { + names(new_metadata) <- paste0(lat_dim, '_var') + } + attr(lats, 'variables') <- new_metadata + } + if (!('units' %in% names(attr(lats, 'variables')[[1]]))) { + new_metadata <- attr(lats, 'variables') + #names(new_metadata)[1] <- lat_dim + new_metadata[[1]][['units']] <- 'degrees_north' + attr(lats, 'variables') <- new_metadata + } + # Check grid. + if (!is.character(grid)) { + stop("Parameter 'grid' must be a character string specifying a ", + "target CDO grid, 'rXxY' or 'tRESgrid', or a path to another ", + "NetCDF file.") + } + if (grepl('^r[0-9]{1,}x[0-9]{1,}$', grid)) { + grid_type <- 'regular' + grid_lons <- as.numeric(strsplit(strsplit(grid, 'x')[[1]][1], 'r')[[1]][2]) + grid_lats <- as.numeric(strsplit(grid, 'x')[[1]][2]) + } else if (grepl('^t[0-9]{1,}grid$', grid)) { + grid_type <- 'gaussian' + grid_t <- as.numeric(strsplit(strsplit(grid, 'grid')[[1]][1], 't')[[1]][2]) + grid_size <- .t2nlatlon(grid_t) + grid_lons <- grid_size[2] + grid_lats <- grid_size[1] + } else { + grid_type <- 'custom' + } + # Check method. + if (method %in% c('bil', 'bilinear')) { + method <- 'bil' + } else if (method %in% c('bic', 'bicubic')) { + method <- 'bic' + } else if (method %in% c('con', 'conservative')) { + method <- 'con' + } else if (method %in% c('dis', 'distance-weighted')) { + method <- 'dis' + } else { + stop("Unsupported CDO remap method. 'bilinear', 'bicubic', 'conservative' or 'distance-weighted' supported only.") + } + # Check avoid_writes + if (!is.logical(avoid_writes)) { + stop("Parameter 'avoid_writes' must be a logical value.") + } + # Check crop + crop_tight <- FALSE + if (is.character(crop)) { + if (crop == 'tight') { + crop_tight <- TRUE + } else if (crop != 'preserve') { + stop("Parameter 'crop' can only take the values 'tight' or 'preserve' if specified as a character string.") + } + crop <- TRUE + } + if (is.logical(crop)) { + if (crop) { + if (length(lons) == 1 || length(lats) == 1) { + stop("CDORemap cannot remap if crop = TRUE and values for only one ", + "longitude or one latitude are provided. Either a) provide ", + "values for more than one longitude/latitude, b) explicitly ", + "specify the crop limits in the parameter crop, or c) set ", + "crop = FALSE.") + } + if (crop_tight) { + lon_extremes <- c(min(lons), max(lons)) + lat_extremes <- c(min(lats), max(lats)) + } else { + # Here we are trying to look for the extreme lons and lats in the data. + # Not the centers of the extreme cells, but the borders of the extreme cells. +###--- + if (length(dim(lons)) == 1) { + tmp_lon <- lons + } else { + min_pos <- which(lons == min(lons), arr.ind = TRUE)[1, ] + tmp_lon <- Subset(lons, lat_dim, min_pos[which(names(dim(lons)) == lat_dim)], drop = 'selected') + } + i <- 1:length(tmp_lon) + lon_model <- lm(tmp_lon ~ poly(i, 3)) + lon_extremes <- c(NA, NA) + left_is_min <- FALSE + right_is_max <- FALSE + if (which.min(tmp_lon) == 1) { + left_is_min <- TRUE + prev_lon <- predict(lon_model, data.frame(i = 0)) + first_lon_cell_width <- (tmp_lon[1] - prev_lon) + lon_extremes[1] <- tmp_lon[1] - first_lon_cell_width / 2 + } else { + lon_extremes[1] <- min(tmp_lon) + } + if (which.max(tmp_lon) == length(tmp_lon)) { + right_is_max <- TRUE + next_lon <- predict(lon_model, data.frame(i = length(tmp_lon) + 1)) + last_lon_cell_width <- (next_lon - tmp_lon[length(tmp_lon)]) + lon_extremes[2] <- tmp_lon[length(tmp_lon)] + last_lon_cell_width / 2 + } else { + lon_extremes[2] <- max(tmp_lon) + } + # Adjust the crop window if possible in order to keep lons from 0 to 360 + # or from -180 to 180 when the extremes of the cropped window are contiguous. + if (right_is_max) { + if (lon_extremes[1] < -180) { + if (!((lon_extremes[2] < 180) && !((180 - lon_extremes[2]) <= last_lon_cell_width / 2))) { + lon_extremes[1] <- -180 + lon_extremes[2] <- 180 + } + } else if (lon_extremes[1] < 0) { + if (!((lon_extremes[2] < 360) && !((360 - lon_extremes[2]) <= last_lon_cell_width / 2))) { + lon_extremes[1] <- 0 + lon_extremes[2] <- 360 + } + } + } + if (left_is_min) { + if (lon_extremes[2] > 360) { + if (!((lon_extremes[1] > 0) && !(lon_extremes[1] <= first_lon_cell_width / 2))) { + lon_extremes[1] <- 0 + lon_extremes[2] <- 360 + } + } else if (lon_extremes[2] > 180) { + if (!((lon_extremes[1] > -180) && !((180 + lon_extremes[1]) <= first_lon_cell_width / 2))) { + lon_extremes[1] <- -180 + lon_extremes[2] <- 180 + } + } + } +## lon_extremes <- signif(lon_extremes, 5) +## lon_extremes <- lon_extremes + 0.00001 +###--- + if (length(dim(lats)) == 1) { + tmp_lat <- lats + } else { + min_pos <- which(lats == min(lats), arr.ind = TRUE)[1, ] + tmp_lat <- Subset(lats, lon_dim, min_pos[which(names(dim(lats)) == lon_dim)], drop = 'selected') + } + i <- 1:length(tmp_lat) + lat_model <- lm(tmp_lat ~ poly(i, 3)) + lat_extremes <- c(NA, NA) + if (which.min(tmp_lat) == 1) { + prev_lat <- predict(lat_model, data.frame(i = 0)) + lat_extremes[1] <- tmp_lat[1] - (tmp_lat[1] - prev_lat) / 2 + } else { + lat_extremes[1] <- min(tmp_lat) + } + if (which.max(tmp_lat) == length(tmp_lat)) { + next_lat <- predict(lat_model, data.frame(i = length(tmp_lat) + 1)) + lat_extremes[2] <- tmp_lat[length(tmp_lat)] + (next_lat - tmp_lat[length(tmp_lat)]) / 2 + } else { + lat_extremes[2] <- max(tmp_lat) + } +## lat_extremes <- signif(lat_extremes, 5) + # Adjust crop window + if (lat_extremes[1] < -90) { + lat_extremes[1] <- -90 + } else if (lat_extremes[1] > 90) { + lat_extremes[1] <- 90 + } + if (lat_extremes[2] < -90) { + lat_extremes[2] <- -90 + } else if (lat_extremes[2] > 90) { + lat_extremes[2] <- 90 + } +###--- + } + } + } else if (is.numeric(crop)) { + if (length(crop) != 4) { + stop("Paramrter 'crop' must be a logical value or a numeric vector of length 4: c(western border, eastern border, southern border, northern border.") + } else { + lon_extremes <- crop[1:2] + lat_extremes <- crop[3:4] + crop <- TRUE + } + } else { + stop("Parameter 'crop' must be a logical value or a numeric vector.") + } + # Check force_remap + if (!is.logical(force_remap)) { + stop("Parameter 'force_remap' must be a logical value.") + } + # Check write_dir + if (!is.character(write_dir)) { + stop("Parameter 'write_dir' must be a character string.") + } + if (!dir.exists(write_dir)) { + stop("Parameter 'write_dir' must point to an existing directory.") + } +# if (!is.null(mask)) { +# if (!is.numeric(mask) || !is.array(mask)) { +# stop("Parameter 'mask' must be a numeric array.") +# } +# if (length(dim(mask)) != 2) { +# stop("Parameter 'mask' must have two dimensions.") +# } +# if (is.null(names(dim(mask)))) { +# if (dim(data_array)[lat_dim] == dim(data_array)[lon_dim]) { +# stop("Cannot disambiguate which is the longitude dimension of ", +# "the provided 'mask'. Provide it with dimension names.") +# } +# names(dim(mask)) <- c('', '') +# found_lon_dim <- which(dim(mask) == dim(data_array)[lon_dim]) +# if (length(found_lon_dim) < 0) { +# stop("The dimension sizes of the provided 'mask' do not match ", +# "the spatial dimension sizes of the array to interpolate.") +# } else { +# names(dim(mask)[found_lon_dim]) <- lon_dim +# } +# found_lat_dim <- which(dim(mask) == dim(data_array)[lat_dim]) +# if (length(found_lat_dim) < 0) { +# stop("The dimension sizes of the provided 'mask' do not match ", +# "the spatial dimension sizes of the array to interpolate.") +# } else { +# names(dim(mask)[found_lat_dim]) <- lat_dim +# } +# } +# lon_position <- which(names(dim(data_array)) == lon_dim) +# lat_position <- which(names(dim(data_array)) == lat_dim) +# if (lon_position > lat_position) { +# if (names(dim(mask))[1] == lon_dim) { +# mask <- t(mask) +# } +# } else { +# if (names(dim(mask))[1] == lat_dim) { +# mask <- t(mask) +# } +# } +# ## TODO: Apply mask!!! Preserve attributes +# } + # Check if interpolation can be skipped. + interpolation_needed <- TRUE + if (!force_remap) { + if (!(grid_type == 'custom')) { + if (length(lons) == grid_lons && length(lats) == grid_lats) { + if (grid_type == 'regular') { + if (.isRegularVector(lons) && .isRegularVector(lats)) { + interpolation_needed <- FALSE + } + } else if (grid_type == 'gaussian') { + # TODO: improve this check. Gaussian quadrature should be used. + if (.isRegularVector(lons) && !.isRegularVector(lats)) { + interpolation_needed <- FALSE + } + } + } + } + } + found_lons <- lons + found_lats <- lats + if (interpolation_needed) { + if (nchar(Sys.which('cdo')[1]) < 1) { + stop("CDO must be installed in order to use the .CDORemap.") + } + # CDO takes arrays of 3 dimensions or 4 if one of them is unlimited. + # The unlimited dimension can only be the left-most (right-most in R). + # There are no restrictions for the dimension names or variable names. + # The longitude and latitude are detected by their units. + # There are no restrictions for the order of the limited dimensions. + # The longitude/latitude variables and dimensions must have the same name. + # The procedure consists in: + # - take out the array metadata + # - be aware of var dimension (replacing the dimension names would do). + # - take arrays of 4 dimensions always if possible + # - make the last dimension unlimited when saving to netcdf + # - if the last dimension is lon or lat, either reorder the array and + # then reorder back or iterate over the dimensions at the right + # side of lon AND lat. + # If the input array has more than 4 dimensions, it is needed to + # run CDO on each sub-array of 4 dimensions because it can handle + # only up to 4 dimensions. The shortest dimensions are chosen to + # iterate over. + is_irregular <- FALSE + if (length(dim(lats)) > 1 && length(dim(lons)) > 1) { + is_irregular <- TRUE + } + attribute_backup <- attributes(data_array) + other_dims <- which(!(names(dim(data_array)) %in% c(lon_dim, lat_dim))) + permutation <- NULL + unlimited_dim <- NULL + dims_to_iterate <- NULL + total_slices <- 1 + other_dims_per_chunk <- ifelse(is_irregular, 1, 2) # 4 (the maximum accepted by CDO) - 2 (lon, lat) = 2. + if (length(other_dims) > 1 || (length(other_dims) > 0 && (is_irregular))) { + if (!(length(dim(data_array)) %in% other_dims)) { + if (avoid_writes || is_irregular) { + dim_to_move <- max(other_dims) + permutation <- (1:length(dim(data_array)))[-dim_to_move] + permutation <- c(permutation, dim_to_move) + permutation_back <- 1:length(dim(data_array)) + permutation_back[dim_to_move] <- length(dim(data_array)) + permutation_back[length(dim(data_array))] <- dim_to_move + dim_backup <- dim(data_array) + data_array <- aperm(data_array, permutation) + dim(data_array) <- dim_backup[permutation] + other_dims <- which(!(names(dim(data_array)) %in% c(lon_dim, lat_dim))) + } else { + # We allow only lon, lat and 1 more dimension per chunk, so + # CDO has no restrictions in the order. + other_dims_per_chunk <- 1 + } + } + other_dims_ordered_by_size <- other_dims[sort(dim(data_array)[other_dims], index.return = TRUE)$ix] + dims_to_iterate <- sort(head(other_dims_ordered_by_size, length(other_dims) - other_dims_per_chunk)) + if (length(dims_to_iterate) == 0) { + dims_to_iterate <- NULL + } else { + slices_to_iterate <- array(1:prod(dim(data_array)[dims_to_iterate]), + dim(data_array)[dims_to_iterate]) + total_slices <- prod(dim(slices_to_iterate)) + } + if ((other_dims_per_chunk > 1) || (other_dims_per_chunk > 0 && is_irregular)) { + unlimited_dim <- tail(sort(tail(other_dims_ordered_by_size, other_dims_per_chunk)), 1) + } + } + + result_array <- NULL + lon_pos <- which(names(dim(data_array)) == lon_dim) + lat_pos <- which(names(dim(data_array)) == lat_dim) + dim_backup <- dim(data_array) + attributes(data_array) <- NULL + dim(data_array) <- dim_backup + names(dim(data_array)) <- paste0('dim', 1:length(dim(data_array))) + names(dim(data_array))[c(lon_pos, lat_pos)] <- c(lon_dim, lat_dim) + if (!is.null(unlimited_dim)) { + # This will make ArrayToNetCDF create this dim as unlimited. + names(dim(data_array))[unlimited_dim] <- 'time' + } + if (length(dim(lons)) == 1) { + names(dim(lons)) <- lon_dim + } + if (length(dim(lats)) == 1) { + names(dim(lats)) <- lat_dim + } + if (length(dim(lons)) > 1) { + lon_var_name <- paste0(lon_dim, '_var') + } else { + lon_var_name <- lon_dim + } + if (length(dim(lats)) > 1) { + lat_var_name <- paste0(lat_dim, '_var') + } else { + lat_var_name <- lat_dim + } + if (is_irregular) { + metadata <- list(list(coordinates = paste(lon_var_name, lat_var_name))) + names(metadata) <- 'var' + attr(data_array, 'variables') <- metadata + } + names(attr(lons, 'variables')) <- lon_var_name + names(attr(lats, 'variables')) <- lat_var_name + for (i in 1:total_slices) { + tmp_file <- tempfile('R_CDORemap_', write_dir, fileext = '.nc') + tmp_file2 <- tempfile('R_CDORemap_', write_dir, fileext = '.nc') + if (!is.null(dims_to_iterate)) { + slice_indices <- which(slices_to_iterate == i, arr.ind = TRUE) + subset <- Subset(data_array, dims_to_iterate, as.list(slice_indices), drop = 'selected') + # Make sure subset goes along with metadata + ArrayToNetCDF(setNames(list(subset, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + } else { + ArrayToNetCDF(setNames(list(data_array, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + } + sellonlatbox <- '' + if (crop) { + sellonlatbox <- paste0('sellonlatbox,', lon_extremes[1], ',', lon_extremes[2], + ',', lat_extremes[1], ',', lat_extremes[2], ' -') + } + err <- try({ +## TODO: Here add sellonlatbox. Also check constantin's issue, may contain hint. Also search if possible to crop without + system(paste0("cdo -s ", sellonlatbox, "remap", method, ",", grid, " ", tmp_file, " ", tmp_file2)) + }) + file.remove(tmp_file) + if (('try-error' %in% class(err)) || err > 0) { + stop("CDO remap failed.") + } + ncdf_remapped <- nc_open(tmp_file2) + found_dim_names <- sapply(ncdf_remapped$var$var$dim, '[[', 'name') + found_lon_dim <- found_dim_names[which(found_dim_names %in% .KnownLonNames())[1]] + found_lat_dim <- found_dim_names[which(found_dim_names %in% .KnownLatNames())[1]] + found_lon_dim_size <- length(ncdf_remapped$dim[[found_lon_dim]]$vals) + found_lat_dim_size <- length(ncdf_remapped$dim[[found_lat_dim]]$vals) + found_lons <- ncvar_get(ncdf_remapped, 'lon', collapse_degen = FALSE) + found_lats <- ncvar_get(ncdf_remapped, 'lat', collapse_degen = FALSE) + if (length(dim(found_lons)) > 1) { + if (found_lon_dim < found_lat_dim) { + names(dim(found_lons)) <- c(found_lon_dim, found_lat_dim) + } else { + names(dim(found_lons)) <- c(found_lat_dim, found_lon_dim) + } + } else { + names(dim(found_lons)) <- found_lon_dim + } + if (length(dim(found_lats)) > 1) { + if (found_lon_dim < found_lat_dim) { + names(dim(found_lats)) <- c(found_lon_dim, found_lat_dim) + } else { + names(dim(found_lats)) <- c(found_lat_dim, found_lon_dim) + } + } else { + names(dim(found_lats)) <- found_lat_dim + } + if (!is.null(dims_to_iterate)) { + if (is.null(result_array)) { + if (return_array) { + new_dims <- dim(data_array) + new_dims[c(lon_dim, lat_dim)] <- c(found_lon_dim_size, found_lat_dim_size) + result_array <- array(dim = new_dims) + store_indices <- as.list(rep(TRUE, length(dim(result_array)))) + } + } + if (return_array) { + store_indices[dims_to_iterate] <- as.list(slice_indices) + result_array <- do.call('[<-', c(list(x = result_array), store_indices, + list(value = ncvar_get(ncdf_remapped, 'var', collapse_degen = FALSE)))) + } + } else { + new_dims <- dim(data_array) + new_dims[c(lon_dim, lat_dim)] <- c(found_lon_dim_size, found_lat_dim_size) + result_array <- ncvar_get(ncdf_remapped, 'var', collapse_degen = FALSE) + names(dim(result_array)) <- names(new_dims) + } + nc_close(ncdf_remapped) + file.remove(tmp_file2) + } + if (!is.null(permutation)) { + dim_backup <- dim(result_array) + result_array <- aperm(result_array, permutation_back) + dim(result_array) <- dim_backup[permutation_back] + } + # Now restore the metadata + result_is_irregular <- FALSE + if (length(dim(found_lats)) > 1 && length(dim(found_lons)) > 1) { + result_is_irregular <- TRUE + } + attribute_backup[['dim']][which(names(dim(result_array)) == lon_dim)] <- dim(result_array)[lon_dim] + attribute_backup[['dim']][which(names(dim(result_array)) == lat_dim)] <- dim(result_array)[lat_dim] + new_lon_name <- names(dim(found_lons))[which(names(dim(found_lons)) %in% .KnownLonNames())] + new_lat_name <- names(dim(found_lats))[which(names(dim(found_lats)) %in% .KnownLatNames())] + names(attribute_backup[['dim']])[which(names(dim(result_array)) == lon_dim)] <- new_lon_name + names(attribute_backup[['dim']])[which(names(dim(result_array)) == lat_dim)] <- new_lat_name + if (!is.null(attribute_backup[['variables']]) && (length(attribute_backup[['variables']]) > 0)) { + for (var in 1:length(attribute_backup[['variables']])) { + if (length(attribute_backup[['variables']][[var]][['dim']]) > 0) { + for (dim in 1:length(attribute_backup[['variables']][[var]][['dim']])) { + dim_name <- NULL + if ('name' %in% names(attribute_backup[['variables']][[var]][['dim']][[dim]])) { + dim_name <- attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] <- new_lon_name + } else { + attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] <- new_lat_name + } + } + } else if (!is.null(names(attribute_backup[['variables']][[var]][['dim']]))) { + dim_name <- names(attribute_backup[['variables']][[var]][['dim']])[dim] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + names(attribute_backup[['variables']][[var]][['dim']])[which(names(attribute_backup[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + } else { + names(attribute_backup[['variables']][[var]][['dim']])[which(names(attribute_backup[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + } + } + } + if (!is.null(dim_name)) { + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + new_vals <- found_lons[TRUE] + } else if (dim_name == lat_dim) { + new_vals <- found_lats[TRUE] + } + if (!is.null(attribute_backup[['variables']][[var]][['dim']][[dim]][['len']])) { + attribute_backup[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + } + if (!is.null(attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']])) { + if (!result_is_irregular) { + attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals + } else { + attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + } + } + } + } + } + } + if (!is_irregular && result_is_irregular) { + attribute_backup[['coordinates']] <- paste(lon_var_name, lat_var_name) + } else if (is_irregular && !result_is_irregular) { + attribute_backup[['coordinates']] <- NULL + } + } + } + attributes(result_array) <- attribute_backup + lons_attr_bk[['dim']] <- dim(found_lons) + if (!is.null(lons_attr_bk[['variables']]) && (length(lons_attr_bk[['variables']]) > 0)) { + for (var in 1:length(lons_attr_bk[['variables']])) { + if (length(lons_attr_bk[['variables']][[var]][['dim']]) > 0) { + dims_to_remove <- NULL + for (dim in 1:length(lons_attr_bk[['variables']][[var]][['dim']])) { + dim_name <- NULL + if ('name' %in% names(lons_attr_bk[['variables']][[var]][['dim']][[dim]])) { + dim_name <- lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lon_name + } else { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lat_name + } + } + } else if (!is.null(names(lons_attr_bk[['variables']][[var]][['dim']]))) { + dim_name <- names(lons_attr_bk[['variables']][[var]][['dim']])[dim] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + names(lons_attr_bk[['variables']][[var]][['dim']])[which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + } else { + names(lons_attr_bk[['variables']][[var]][['dim']])[which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + } + } + } + if (!is.null(dim_name)) { + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + new_vals <- found_lons[TRUE] + } else if (dim_name == lat_dim) { + new_vals <- found_lats[TRUE] + if (!result_is_irregular) { + dims_to_remove <- c(dims_to_remove, dim) + } + } + if (!is.null(lons_attr_bk[['variables']][[var]][['dim']][[dim]][['len']])) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + } + if (!is.null(lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']])) { + if (!result_is_irregular) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals + } else { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + } + } + } + } + } + if (length(dims_to_remove) > 1) { + lons_attr_bk[['variables']][[var]][['dim']] <- lons_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] + } + } + } + names(lons_attr_bk[['variables']])[1] <- lon_var_name + lons_attr_bk[['variables']][[1]][['units']] <- 'degrees_east' + } + attributes(found_lons) <- lons_attr_bk + lats_attr_bk[['dim']] <- dim(found_lats) + if (!is.null(lats_attr_bk[['variables']]) && (length(lats_attr_bk[['variables']]) > 0)) { + for (var in 1:length(lats_attr_bk[['variables']])) { + if (length(lats_attr_bk[['variables']][[var]][['dim']]) > 0) { + dims_to_remove <- NULL + for (dim in 1:length(lats_attr_bk[['variables']][[var]][['dim']])) { + dim_name <- NULL + if ('name' %in% names(lats_attr_bk[['variables']][[var]][['dim']][[dim]])) { + dim_name <- lats_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lon_name + } else { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lat_name + } + } + } else if (!is.null(names(lats_attr_bk[['variables']][[var]][['dim']]))) { + dim_name <- names(lats_attr_bk[['variables']][[var]][['dim']])[dim] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + names(lats_attr_bk[['variables']][[var]][['dim']])[which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + } else { + names(lats_attr_bk[['variables']][[var]][['dim']])[which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + } + } + } + if (!is.null(dim_name)) { + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + new_vals <- found_lons[TRUE] + if (!result_is_irregular) { + dims_to_remove <- c(dims_to_remove, dim) + } + } else if (dim_name == lat_dim) { + new_vals <- found_lats[TRUE] + } + if (!is.null(lats_attr_bk[['variables']][[var]][['dim']][[dim]][['len']])) { + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + } + if (!is.null(lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']])) { + if (!result_is_irregular) { + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals + } else { + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + } + } + } + } + } + if (length(dims_to_remove) > 1) { + lats_attr_bk[['variables']][[var]][['dim']] <- lats_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] + } + } + } + names(lats_attr_bk[['variables']])[1] <- lat_var_name + lats_attr_bk[['variables']][[1]][['units']] <- 'degrees_north' + } + attributes(found_lats) <- lats_attr_bk + } + list(data_array = if (return_array) { + if (interpolation_needed) { + result_array + } else { + data_array + } + } else { + NULL + }, + lons = found_lons, lats = found_lats) +} diff --git a/R/Histo2Hindcast.R b/R/Histo2Hindcast.R index 7993d74e0fb9de2c15b36f3bbc0cc0f6a0c1b733..b8a312c7a8784b9e7085db3abdf6badb02d3e313 100644 --- a/R/Histo2Hindcast.R +++ b/R/Histo2Hindcast.R @@ -7,10 +7,10 @@ Histo2Hindcast <- function(varin, sdatesin, sdatesout, nleadtimesout) { varin <- Enlarge(varin, 7) outvar <- array(dim = c(dim(varin)[1:2], length(sdatesout), nleadtimesout, dim(varin)[5:7])) - yearsin <- as.integer(substr(sdatesin, 1, 4)) - yearsout <- as.integer(substr(sdatesout, 1, 4)) - monthin <- as.integer(substr(sdatesin, 5, 6)) - monthout <- as.integer(substr(sdatesout, 5, 6)) + yearsin <- as.numeric(substr(sdatesin, 1, 4)) + yearsout <- as.numeric(substr(sdatesout, 1, 4)) + monthin <- as.numeric(substr(sdatesin, 5, 6)) + monthout <- as.numeric(substr(sdatesout, 5, 6)) # # Re-organization # ~~~~~~~~~~~~~~~~~ diff --git a/R/Subset.R b/R/Subset.R index 7a6c005f4e4cd58edfa924441c3c71183c69c17d..a084a1c2ff3526c460bdcc87e8741cfa355d0386 100644 --- a/R/Subset.R +++ b/R/Subset.R @@ -71,16 +71,19 @@ Subset <- function(x, along, indices, drop = FALSE) { } } - # Amend the final dimensions and put dimnames + # Amend the final dimensions and put dimnames and attributes + metadata <- attributes(x) + metadata[['dim']] <- dim(subset) if (length(dims_to_drop) > 0) { - dim(subset) <- dim(subset)[-dims_to_drop] + metadata[['dim']] <- metadata[['dim']][-dims_to_drop] if (is.character(dim_names)) { - names(dim(subset)) <- dim_names[-dims_to_drop] - attr(subset, 'dimensions') <- dim_names[-dims_to_drop] + names(metadata[['dim']]) <- dim_names[-dims_to_drop] + metadata[['dimensions']] <- dim_names[-dims_to_drop] } } else if (is.character(dim_names)) { - names(dim(subset)) <- dim_names - attr(subset, 'dimensions') <- dim_names + names(metadata[['dim']]) <- dim_names + metadata[['dimensions']] <- dim_names } + attributes(subset) <- metadata subset } diff --git a/R/Utils.R b/R/Utils.R index 664d858e067739c131eee16b8ed766ab90f249e7..f958ecd10a854d4676a4b145ba4b040fa57f7d23 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -30,6 +30,52 @@ } } +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + .LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { # The purpose, working modes, inputs and outputs of this function are # explained in ?LoadDataFile diff --git a/man/ArrayToNetCDF.Rd b/man/ArrayToNetCDF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..9e0188b46c67ad080faa8baad633b61d12788bb0 --- /dev/null +++ b/man/ArrayToNetCDF.Rd @@ -0,0 +1,205 @@ +\name{ArrayToNetCDF} +\alias{ArrayToNetCDF} +\title{Save multidimensional R arrays into NetCDF files} +\description{This function takes as input one or a list of multidimensional R arrays and stores them in a NetCDF file, using the \code{ncdf4} package. The full path and name of the resulting file must be specified. Metadata can be attached to the arrays and propagated into the NetCDF file in 3 possible ways:\cr + \itemize{ + \item{Via the list names if a list of arrays is provided:}{Each name in the input list, corresponding to one multidimensional array, will be interpreted as the name of the variable it contains.\cr +E.g: \code{ArrayToNetCDF(arrays = list(temperature = array(1:9, c(3, 3))), file_path = 'example.nc')}} + \item{Via the dimension names of each provided array:}{The dimension names of each of the provided arrays will be interpreted as names for the dimensions of the NetCDF files. Read further for special dimension names that will trigger special behaviours, such as 'time' and 'var'.\cr +E.g: + \code{ +temperature <- array(rnorm(10 * 50 * 100), dim = c(10, 50, 100)) +names(dim(temperature)) <- c('time', 'latitude', 'longitude') +ArrayToNetCDF(list(temperature = temperature), file_path = 'example.nc') + } + } + \item{Via the attribute 'variables' of each provided array:}{The arrays can be provided with metadata in an attribute named 'variables', which is expected to be a named list of named lists, where the names of the container list are the names of the variables present in the provided array, and where each sub-list contains metadata for each of the variables. The attribute names and values supported in the sub-lists must follow the same format the package \code{ncdf4} uses to represent the NetCDF file headers.\cr +E.g: + \code{ +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(addOffset = 100, + scaleFact = 10, + dim = list(list(name = 'time', + unlim = FALSE))) + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + } + } + } +The special dimension names are 'var'/'variable' and 'time'.\cr +If a dimension is named 'var' or 'variable', \code{ArrayToNetCDF} will interpret each array entry along such dimension corresponds to a separate new variable, hence will create a new variable inside the NetCDF file and will use it to store all the data in the provided array for the corresponding entry along the 'var'/'variable' dimension.\cr +If a dimension is named 'time', by default it will be interpreted and built as an unlimited dimension. The 'time' dimension must be the last dimension of the array (the right-most). If a 'var'/'variable' dimension is present, the 'time' dimension can be also placed on its left (i.e. the one before the last dimension). The default behaviour of creating the 'time' as unlimited dimension can be disabled by setting manually the attribute \code{unlim = FALSE}, as shown in the previous example. +} +\usage{ +ArrayToNetCDF(arrays, file_path) +} +\arguments{ + \item{arrays}{One or a list of multidimensional data arrays. The list can be provided with names, which will be interpreted as variable names. The arrays can be provided with dimension names. The arrays can be provided with metadata in the attribute 'variables' (read section Description for details).} + \item{file_path}{Path and name of the NetCDF file to be created.} +} +\value{This function returns NULL} +\examples{ + \dontrun{ +# Minimal use case +ArrayToNetCDF(array(1:9, c(3, 3)), 'tmp.nc') + +# Works with arrays of any number of dimensions +ArrayToNetCDF(array(1:27, c(3, 3, 3)), 'tmp.nc') + +# Arrays can also be provided in [named] lists +ArrayToNetCDF(list(tos = array(1:27, c(3, 3, 3))), 'tmp.nc') + +# Or with dimension names +# 'var' dimension name will generate multiple variables in the +# resulting NetCDF file +a <- array(1:27, dim = c(3, 3, 3)) +names(dim(a)) <- c('lon', 'lat', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# 'variable' as dimension name will do the same +a <- array(1:27, dim = c(3, 3, 3)) +names(dim(a)) <- c('lon', 'lat', 'variable') +ArrayToNetCDF(a, 'tmp.nc') + +# The 'time' dimension will be built as unlimited dimension, by default +a <- array(1:1600, dim = c(10, 20, 4, 2)) +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# Putting the 'time' dimension in a position which is not the last, or the one +# right before 'var'/'variable' will crash. Unlimited dimension must be in the +# last position +a <- array(1:1600, dim = c(10, 20, 4, 2)) +names(dim(a)) <- c('time', 'lat', 'lon', 'var') +ArrayToNetCDF(a, 'tmp.nc') +a <- array(1:1600, dim = c(10, 20, 4, 2)) +names(dim(a)) <- c('lat', 'time', 'lon', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# The dimension 'var'/'variable' can be in any position and can have any length +a <- array(1:1600, dim = c(10, 20, 4, 2)) +names(dim(a)) <- c('lat', 'var', 'lon', 'time') +ArrayToNetCDF(a, 'tmp.nc') + +# Multiple arrays can be provided in a list +a <- array(1:400, dim = c(5, 10, 4, 2)) +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(list(a, a), 'tmp.nc') + +# If no dimension names are given to an array, new names will be automatically +# generated +a <- array(1:400, dim = c(5, 10, 4, 2)) +b <- array(1:400, dim = c(5, 11, 4, 2)) +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(list(a, b), 'tmp.nc') + +# If two arrays use a same dimension but their lengths differ, the function +# will crash +a <- array(1:400, dim = c(5, 10, 4, 2)) +b <- array(1:400, dim = c(5, 11, 4, 2)) +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +names(dim(b)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(list(a, b), 'tmp.nc') + +# Metadata can be provided for each variable in each array, via the +# attribute 'variables'. In this example the metadata is empty. +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(), + tas = list() + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# Variable names can be manually specified +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(name = 'name1'), + tas = list(name = 'name2') + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# Units can be specified +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(units = 'K'), + tas = list(units = 'K') + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# addOffset and scaleFactor can be specified +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(addOffset = 100, + scaleFact = 10), + tas = list(addOffset = 100, + scaleFact = 10) + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# Unlimited dimensions can be manually created +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(addOffset = 100, + scaleFact = 10, + dim = list(list(name = 'unlimited', + unlim = TRUE))), + tas = list(addOffset = 100, + scaleFact = 10, + dim = list(list(name = 'unlimited', + unlim = TRUE))) + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'unlimited', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# A 'time' dimension can be built without it necessarily being unlimited +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(addOffset = 100, + scaleFact = 10, + dim = list(list(name = 'time', + unlim = FALSE))), + tas = list(addOffset = 100, + scaleFact = 10, + dim = list(list(name = 'time', + unlim = FALSE))) + ) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNetCDF(a, 'tmp.nc') + +# Multiple arrays with data for multiple variables can be saved into a +# NetCDF file at once. +tos <- array(1:400, dim = c(5, 10, 4)) +metadata <- list(tos = list(units = 'K')) +attr(tos, 'variables') <- metadata +names(dim(tos)) <- c('lat', 'lon', 'time') +lon <- seq(0, 360 - 360 / 10, length.out = 10) +dim(lon) <- length(lon) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +names(dim(lon)) <- 'lon' +lat <- seq(-90, 90, length.out = 5) +dim(lat) <- length(lat) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +names(dim(lat)) <- 'lat' +ArrayToNetCDF(list(tos, lon, lat), 'tmp.nc') + } +} +\author{ +History:\cr +0.0 - 2017-01 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code. +} +\keyword{datagen} diff --git a/man/CDORemap.Rd b/man/CDORemap.Rd new file mode 100644 index 0000000000000000000000000000000000000000..ef6e812ab0a9738739bebd52641b22b339c87a6c --- /dev/null +++ b/man/CDORemap.Rd @@ -0,0 +1,159 @@ +\name{CDORemap} +\alias{CDORemap} +\title{Interpolates arrays with longitude and latitude dimensions using CDO} +\description{This function takes as inputs a multidimensional array (optional), a vector or matrix of longitudes, a vector or matrix of latitudes, a destination grid specification, and the name of a method to be used to interpolate (one of those available in the 'remap' utility in CDO). The interpolated array is returned (if provided) together with the new longitudes and latitudes.\cr\cr +\code{CDORemap()} permutes by default the dimensions of the input array (if needed), splits it in chunks (CDO can work with data arrays of up to 4 dimensions), generates a file with the data of each chunk, interpolates it with CDO, reads it back into R and merges it into a result array. If no input array is provided, the longitude and latitude vectors will be transformed only. If the array is already on the desired destination grid, no transformation is performed (this behvaiour works only for lonlat and gaussian grids).\cr\cr +Any metadata attached to the input data array, longitudes or latitudes will be preserved or accordingly modified. +} +\usage{ +CDORemap(data_array = NULL, lons, lats, grid, method, + avoid_writes = TRUE, crop = TRUE, + force_remap = FALSE, write_dir = tempdir()) +} +\arguments{ + \item{data_array}{Multidimensional numeric array to be interpolated. If provided, it must have at least a longitude and a latitude dimensions, identified by the array dimension names. The names for these dimensions must be one of the recognized by s2dverification (can be checked with \code{s2dverification:::.KnownLonNames()} and \code{s2dverification:::.KnownLatNames()}).} + \item{lons}{Numeric vector or array of longitudes of the centers of the grid cells. Its size must match the size of the longitude/latitude dimensions of the input array.} + \item{lats}{Numeric vector or array of latitudes of the centers of the grid cells. Its size must match the size of the longitude/latitude dimensions of the input array.} + \item{grid}{Character string specifying either a name of a grid (recognized by CDO; e.g.: 'r256x128', 't106grid') or a path to another NetCDF file which to read the grid from (a single grid must be defined in such file).} + \item{method}{Character string specifying an interpolation method (recognized by CDO; e.g.: 'con', 'bil', 'bic', 'dis'). The following long names are also supported: 'conservative', 'bilinear', 'bicubic' and 'distance-weighted'.} + \item{avoid_writes}{The step of permutation is needed when the input array has more than 3 dimensions and none of the longitude or latitude dimensions in the right-most position (CDO would not accept it without permuting previously). This step, executed by default when needed, can be avoided for the price of writing more intermediate files (whis usually is unconvenient) by setting the parameter \code{avoid_writes = TRUE}.} + \item{crop}{Whether to crop the data after interpolation with 'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole world as CDO does by default (FALSE). If \code{crop = TRUE} then the longitude and latitude borders which to crop at are taken as the limits of the cells at the borders ('lons' and 'lats' are perceived as cell centers), i.e. the resulting array will contain data that covers the same area as the input array. This is equivalent to specifying \code{crop = 'preserve'}, i.e. preserving area. If \code{crop = 'tight'} then the borders which to crop at are taken as the minimum and maximum cell centers in 'lons' and 'lats', i.e. the area covered by the resulting array may be smaller if interpolating from a coarse grid to a fine grid. The parameter 'crop' also accepts a numeric vector of custom borders which to crop at: c(western border, eastern border, southern border, northern border). } + \item{force_remap}{Whether to force remapping, even if the input data array is already on the target grid.} + \item{write_dir}{Path to the directory where to create the intermediate files for CDO to work. By default, the R session temporary directory is used (\code{tempdir()}).} +} +\value{A list with the following components:\cr + \item{'data_array'}{The interpolated data array (if an input array is provided at all, NULL otherwise).} + \item{'lons'}{The longitudes of the data on the destination grid.} + \item{'lats'}{The latitudes of the data on the destination grid.} +} +\examples{ + \dontrun{ +# Interpolating only vectors of longitudes and latitudes +lon <- seq(0, 360 - 360/50, length.out = 50) +lat <- seq(-90, 90, length.out = 25) +tas2 <- CDORemap(NULL, lon, lat, 't170grid', 'bil', TRUE) + +# Minimal array interpolation +tas <- array(1:50, dim = c(25, 50)) +names(dim(tas)) <- c('lat', 'lon') +lon <- seq(0, 360 - 360/50, length.out = 50) +lat <- seq(-90, 90, length.out = 25) +tas2 <- CDORemap(tas, lon, lat, 't170grid', 'bil', TRUE) + +# Metadata can be attached to the inputs. It will be preserved and +# accordignly modified. +tas <- array(1:50, dim = c(25, 50)) +names(dim(tas)) <- c('lat', 'lon') +lon <- seq(0, 360 - 360/50, length.out = 50) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +lat <- seq(-90, 90, length.out = 25) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +metadata <- list(tas = list(dim = list(lat = list(len = 25, + vals = lat), + lon = list(len = 50, + vals = lon) + ))) +attr(tas, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't170grid', 'bil', TRUE) + +# Arrays of any number of dimensions in any order can be provided. +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons*10), dim = c(10, num_lats, 10, num_lons, 10)) +names(dim(tas)) <- c('a', 'lat', 'b', 'lon', 'c') +lon <- seq(0, 360 - 360/num_lons, length.out = num_lons) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +lat <- seq(-90, 90, length.out = num_lats) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +metadata <- list(tas = list(dim = list(a = list(), + lat = list(len = num_lats, + vals = lat), + b = list(), + lon = list(len = num_lons, + vals = lon), + c = list() + ))) +attr(tas, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', TRUE) +# The step of permutation can be avoided but more intermediate file writes +# will be performed. +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) + +# If the provided array has the longitude or latitude dimension in the +# right-most position, the same number of file writes will be performed, +# even if avoid_wrties = FALSE. +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons*10), dim = c(10, num_lats, 10, num_lons)) +names(dim(tas)) <- c('a', 'lat', 'b', 'lon') +lon <- seq(0, 360 - 360/num_lons, length.out = num_lons) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +lat <- seq(-90, 90, length.out = num_lats) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +metadata <- list(tas = list(dim = list(a = list(), + lat = list(len = num_lats, + vals = lat), + b = list(), + lon = list(len = num_lons, + vals = lon) + ))) +attr(tas, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', TRUE) +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) + +# An example of an interpolation from and onto a rectangular regular grid +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(1*num_lats*num_lons), dim = c(num_lats, num_lons)) +names(dim(tas)) <- c('y', 'x') +lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), dim = c(num_lons, num_lats)) +metadata <- list(lon = list(units = 'degrees_east')) +names(dim(lon)) <- c('x', 'y') +attr(lon, 'variables') <- metadata +lat <- t(array(seq(-90, 90, length.out = num_lats), dim = c(num_lats, num_lons))) +metadata <- list(lat = list(units = 'degrees_north')) +names(dim(lat)) <- c('x', 'y') +attr(lat, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 'r100x50', 'bil') + +# An example of an interpolation from an irregular grid onto a gaussian grid +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons*10), dim = c(10, num_lats, 10, num_lons)) +names(dim(tas)) <- c('a', 'j', 'b', 'i') +lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), dim = c(num_lons, num_lats)) +metadata <- list(lon = list(units = 'degrees_east')) +names(dim(lon)) <- c('i', 'j') +attr(lon, 'variables') <- metadata +lat <- t(array(seq(-90, 90, length.out = num_lats), dim = c(num_lats, num_lons))) +metadata <- list(lat = list(units = 'degrees_north')) +names(dim(lat)) <- c('i', 'j') +attr(lat, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil') + +# Again, the dimensions can be in any order +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons), dim = c(10, num_lats, 10, num_lons)) +names(dim(tas)) <- c('a', 'j', 'b', 'i') +lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), dim = c(num_lons, num_lats)) +names(dim(lon)) <- c('i', 'j') +lat <- t(array(seq(-90, 90, length.out = num_lats), dim = c(num_lats, num_lons))) +names(dim(lat)) <- c('i', 'j') +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil') +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) +# It is ossible to specify an external NetCDF file as target grid reference +tas2 <- CDORemap(tas, lon, lat, 'external_file.nc', 'bil') + } +} +\author{ +History:\cr +0.0 - 2017-01 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code. +} +\keyword{datagen}