Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
data_array_final_dims <- multiApply::Apply(data_array_final_dims,
target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date')
output_dims = c(across_inner_dim, across_file_dim),
fun = func_remove_blank,
logi_array = logi_array)$output1
## reorder back to the correct dim
tmp <- match(names(final_dims), names(dim(data_array_final_dims)))
data_array_final_dims <- .aperm2(data_array_final_dims, tmp)
data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector
return(data_array_tmp)
}
# When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks
# (i.e., work_piece) is necessary if one file contains values for discrete dimensions
rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims,
final_dims_fake, across_inner_dim, length_inner_across_dim) {
# generate the correct order list from indices_chunk
final_order_list <- list()
i <- 1
j <- 1
a <- indices_chunk[i]
while (i <= length(indices_chunk)) {
while (indices_chunk[i+1] == indices_chunk[i] & i < length(indices_chunk)) {
a <- c(a, indices_chunk[i+1])
i <- i + 1
}
final_order_list[[j]] <- a
a <- indices_chunk[i+1]
i <- i + 1
j <- j + 1
}
names(final_order_list) <- sapply(final_order_list, '[[', 1)
final_order_list <- lapply(final_order_list, length)
if (!all(diff(as.numeric(names(final_order_list))) > 0)) {
# shape the vector into the array without split_dims
split_dims_pos <- match(all_split_dims[[1]], final_dims_fake)
new_dims <- c()
if (split_dims_pos[1] > 1) {
new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
}
new_dims <- c(new_dims, prod(all_split_dims[[1]]))
names(new_dims)[split_dims_pos[1]] <- across_inner_dim
if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
}
data_array_no_split <- array(data_array_tmp, dim = new_dims)
# seperate 'time' dim into each work_piece length
data_array_seperate <- list()
tmp <- cumsum(unlist(length_inner_across_dim))
tmp <- c(0, tmp)
for (i in 1:length(length_inner_across_dim)) {
data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim,
(tmp[i] + 1):tmp[i + 1])
}
# re-build the array: chunk
which_chunk <- as.numeric(names(final_order_list))
how_many_indices <- unlist(final_order_list)
array_piece <- list()
ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate)))
for (i in 1:length(final_order_list)) {
array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]],
across_inner_dim,
ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1))
ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i]
}
# re-build the array: paste
data_array_tmp <- array_piece[[1]]
along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim)
if (length(array_piece) > 1) {
for (i in 2:length(array_piece)) {
data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]],
along = along_pos)
}
}
}
return(data_array_tmp)
}
# Create a list of metadata of the variable (e.g., tas)
create_metadata_list <- function(array_of_metadata_flags, metadata_dims, pattern_dims,
loaded_metadata_files, loaded_metadata, dat_names,
dataset_has_files) {
#NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat
# for $common, it is a list of metadata length. For $dat, it is a list of dat length,
# and each sublist has the metadata for each dat.
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
if (!any(names(dim_of_metadata) == pattern_dims) |
(any(names(dim_of_metadata) == pattern_dims) &
dim_of_metadata[pattern_dims] == 1)) { # put under $common; old code
return_metadata <- vector('list',
length = prod(dim_of_metadata))
return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
dim(return_metadata) <- dim_of_metadata
} else { # put under $dat. metadata_dims has 'dat' and dat length > 1
return_metadata <- vector('list',
length = dim_of_metadata[pattern_dims])
names(return_metadata) <- dat_names
for (kk in 1:length(return_metadata)) {
return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
}
loaded_metadata_count <- 1
for (kk in 1:length(return_metadata)) {
for (jj in 1:length(return_metadata[[kk]])) {
if (dataset_has_files[kk]) {
if (loaded_metadata_count %in% loaded_metadata_files) {
return_metadata[[kk]][jj] <- loaded_metadata[[which(loaded_metadata_files == loaded_metadata_count)]]
names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[which(loaded_metadata_files == loaded_metadata_count)]])
} else {
return_metadata[[kk]][jj] <- NULL
loaded_metadata_count <- loaded_metadata_count + 1
} else {
return_metadata[[kk]][jj] <- NULL
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
return(return_metadata)
}
# This function adds the metadata of the variable (e.g., tas) into the list of picked_vars or
# picked_common_vars. The metadata is only retrieved when 'retrieve = TRUE'.
combine_metadata_picked_vars <- function(return_metadata, picked_vars, picked_common_vars,
metadata_dims, pattern_dims, length_dat) {
#NOTE: The metadata of variables can be saved in one of the two different structures.
# (1) metadata_dims != 'dat', or (metadata_dims == 'dat' & length(dat) == 1):
# put under $common
# (2) (metadata_dims == 'dat' & length(dat) > 1):
# put under $dat1, $dat2, .... Put it in picked_vars list
#TODO: The current (2) uses the inefficient method. Should define the list structure first
# then fill the list, rather than expand it in the for loop.
if (any(metadata_dims == pattern_dims) & length_dat > 1) { # (2)
for (kk in 1:length(return_metadata)) {
sublist_names <- lapply(return_metadata, names)[[kk]]
if (!is.null(sublist_names)) {
for (jj in 1:length(sublist_names)) {
picked_vars[[kk]][[sublist_names[jj]]] <- return_metadata[[kk]][[jj]]
}
}
}
Variables_list <- c(list(common = picked_common_vars), picked_vars)
} else { #(1)
len <- unlist(lapply(return_metadata, length))
len <- sum(len) + length(which(len == 0)) #0 means NULL
name_list <- lapply(return_metadata, names)
new_list <- vector('list', length = len)
count <- 1
for (kk in 1:length(return_metadata)) {
if (length(return_metadata[[kk]]) == 0) { #NULL
count <- count + 1
} else {
for (jj in 1:length(return_metadata[[kk]])) {
new_list[[count]] <- return_metadata[[kk]][[jj]]
names(new_list)[count] <- name_list[[kk]][jj]
count <- count + 1
}
}
}
Variables_list <- c(list(common = c(picked_common_vars, new_list)), picked_vars)
}
return(Variables_list)
}