zzz.R 53.8 KB
Newer Older
# Take *_var parameters apart
take_var_params <- function(dim_params) {
 # Take *_var parameters apart
  var_params_ind <- grep('_var$', names(dim_params))
  var_params <- dim_params[var_params_ind]
  # Check all *_var are NULL or vectors of character strings, and 
  # that they all have a matching dimension param.
  i <- 1
  for (var_param in var_params) {
    if (!is.character(var_param)) {
      stop("All '*_var' parameters must be character strings.")
    } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i],
                                               '_var$')[[1]][1], '$'),
                          names(dim_params)))) {
      stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '",
                  names(var_params)[i], "' but no parameter '",
                  strsplit(names(var_params)[i], '_var$')[[1]][1], "'."))
    }
    i <- i + 1
  }
  # Make the keys of 'var_params' to be the name of 
  # the corresponding dimension.
  if (length(var_params) < 1) {
    var_params <- NULL
  } else {
    names(var_params) <- gsub('_var$', '', names(var_params))
  }
  return(var_params)
}

# Take *_reorder parameters apart
take_var_reorder <- function(dim_params) {
  # Take *_reorder parameters apart
  dim_reorder_params_ind <- grep('_reorder$', names(dim_params))
  dim_reorder_params <- dim_params[dim_reorder_params_ind]
  # Make the keys of 'dim_reorder_params' to be the name of 
  # the corresponding dimension.
  if (length(dim_reorder_params) < 1) {
    dim_reorder_params <- NULL
  } else {
    names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params))
  }
  return(dim_reorder_params)
}

# Take *_depends parameters apart
take_var_depends <- function(dim_params) {
  depends_params_ind <- grep('_depends$', names(dim_params))
  depends_params <- dim_params[depends_params_ind]
  # Check all *_depends are NULL or vectors of character strings, and 
  # that they all have a matching dimension param.
  i <- 1
  for (depends_param in depends_params) {
    if (!is.character(depends_param) || (length(depends_param) > 1)) {
      stop("All '*_depends' parameters must be single character strings.")
    } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i],
                                               '_depends$')[[1]][1], '$'),
                          names(dim_params)))) {
      stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '",
                  names(depends_params)[i], "' but no parameter '",
                  strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'."))
    }
    i <- i + 1
  }
  # Make the keys of 'depends_params' to be the name of 
  # the corresponding dimension.
  if (length(depends_params) < 1) {
    depends_params <- NULL
  } else {
    names(depends_params) <- gsub('_depends$', '', names(depends_params))
  }
  return(depends_params)
}

# Take *_across parameters apart
take_var_across <- function(dim_params) {
  across_params_ind <- grep('_across$', names(dim_params))
  across_params <- dim_params[across_params_ind]
  # Check all *_across are NULL or vectors of character strings, and 
  # that they all have a matching dimension param.
  i <- 1
  for (across_param in across_params) {
    if (!is.character(across_param) || (length(across_param) > 1)) {
      stop("All '*_across' parameters must be single character strings.")
    } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i],
                                               '_across$')[[1]][1], '$'),
                          names(dim_params)))) {
      stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '",
                  names(across_params)[i], "' but no parameter '",
                  strsplit(names(across_params)[i], '_across$')[[1]][1], "'."))
    }
    i <- i + 1
  }
  # Make the keys of 'across_params' to be the name of 
  # the corresponding dimension.
  if (length(across_params) < 1) {
    across_params <- NULL
  } else {
    names(across_params) <- gsub('_across$', '', names(across_params))
  }
  return(across_params)
}

# Leave alone the dimension parameters in the variable dim_params
rebuild_dim_params <- function(dim_params, merge_across_dims,
                               inner_dims_across_files) {
  var_params_ind <- grep('_var$', names(dim_params))
  dim_reorder_params_ind <- grep('_reorder$', names(dim_params))
  tolerance_params_ind <- grep('_tolerance$', names(dim_params))
  depends_params_ind <- grep('_depends$', names(dim_params))
  across_params_ind <- grep('_across$', names(dim_params))
  # Leave alone the dimension parameters in the variable dim_params
  if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind,
               depends_params_ind, across_params_ind)) > 0) {
    dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind,
                                tolerance_params_ind, depends_params_ind,
                                across_params_ind)]
    # Reallocating pairs of across file and inner dimensions if they have
    # to be merged. They are put one next to the other to ease merge later.
    if (merge_across_dims) {
      for (inner_dim_across in names(inner_dims_across_files)) {
        inner_dim_pos <- which(names(dim_params) == inner_dim_across)
        file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]])
        new_pos <- inner_dim_pos
        if (file_dim_pos < inner_dim_pos) {
          new_pos <- new_pos - 1
        }
        dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)]
        dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)]
        new_dim_params <- list()
        if (new_pos > 1) {
          new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)])
        }
        new_dim_params <- c(new_dim_params, dim_params_to_move)
        if (length(dim_params) >= new_pos) {
          new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)])
        }
        dim_params <- new_dim_params
      }
    }
  }
  dim_names <- names(dim_params)
  if (is.null(dim_names)) {
    stop("At least one pattern dim must be specified.")
  }
  return(dim_params)
}

# Look for chunked dims
look_for_chunks <- function(dim_params, dim_names) {
    chunks <- vector('list', length(dim_names))
  names(chunks) <- dim_names
  for (dim_name in dim_names) {
    if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) {
      chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk')
      attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')]
    } else {
      chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1)
    }
  }
  return(chunks)
}

# This is a helper function to compute the chunk indices to take once the total
# number of indices for a dimension has been discovered.
  chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) {
    if (n_chunks > n_indices) {
      stop("Requested to divide dimension '", dim_name, "' of length ",
           n_indices, " in ", n_chunks, " chunks, which is not possible.")
    }
    chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks)
    chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks
    if (chunks_to_extend > 0) {
      chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1
    }
    chunk_size <- chunk_sizes[chunk]
    offset <- 0
    if (chunk > 1) {
      offset <- sum(chunk_sizes[1:(chunk - 1)])
    }
    indices <- 1:chunk_sizes[chunk] + offset
    array(indices, dim = setNames(length(indices), dim_name))
  }

# Check pattern_dims
# Function found_pattern_dims may change pattern_dims in the parent.env
found_pattern_dims <- function(pattern_dims, dim_names, var_params,
                               dim_params, dim_reorder_params) {
  if (is.null(pattern_dims)) {
    .warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '",
                    dim_names[1], "' as 'pattern_dims'."))
    assign('pattern_dims', dim_names[1], envir = parent.frame())
    pattern_dims <- dim_names[1]
  } else if (is.character(pattern_dims) && (length(pattern_dims) > 0)) {
    assign('pattern_dims', unique(pattern_dims), envir = parent.frame())
    pattern_dims <- unique(pattern_dims)
  } else {
    stop("Parameter 'pattern_dims' must be a vector of character strings.")
  }
  if (any(names(var_params) %in% pattern_dims)) {
    stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.")
  }
  # Find the pattern dimension with the pattern specifications
  found_pattern_dim <- NULL
  for (pattern_dim in pattern_dims) {
    # Check all specifications in pattern_dim are valid
aho's avatar
aho committed
#    dat <- datasets <- dim_params[[pattern_dim]]
    dat <- dim_params[[pattern_dim]]
    if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) {
      stop(paste0("Parameter '", pattern_dim,
                  "' must be a list of lists with pattern specifications or a vector of character strings."))
    }
    if (!is.null(dim_reorder_params[[pattern_dim]])) {
      .warning(paste0("A reorder for the selectors of '", pattern_dim,
                      "' has been specified, but it is a pattern dimension and the reorder will be ignored."))
    }
    if (is.list(dat) || any(sapply(dat, is.list))) {
      if (is.null(found_pattern_dim)) {
        found_pattern_dim <- pattern_dim
      } else {
        stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.")
      }
    }
  }
  if (is.null(found_pattern_dim)) {
   .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications."))
    found_pattern_dim <- pattern_dims[1]
  }
  return(found_pattern_dim)
}

aho's avatar
aho committed

# The variable 'dat' is mounted with the information (name, path) of each dataset.
# NOTE: This function creates the object 'dat_names' in the parent env.
mount_dat <- function(dat, pattern_dim, found_pattern_dim) {

#  dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames')
  dat_to_fetch <- c()
  dat_names <- c()
  if (!is.list(dat)) {
    dat <- as.list(dat)
  } else {
    if (!any(sapply(dat, is.list))) {
      dat <- list(dat)
    }
  }
  for (i in 1:length(dat)) {
    if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) {
      if (grepl('^(\\./|\\.\\./|/.*/|~/)', dat[[i]])) {
        dat[[i]] <- list(path = dat[[i]])
      } else {
        dat[[i]] <- list(name = dat[[i]])
      }
    } else if (!is.list(dat[[i]])) {
      stop(paste0("Parameter '", pattern_dim, 
                  "' is incorrect. It must be a list of lists or character strings."))
    }
    #if (!(all(names(dat[[i]]) %in% dat_info_names))) {
    #  stop("Error: parameter 'dat' is incorrect. There are unrecognized components in the information of some of the datasets. Check 'dat' in ?Load for details.")
    #}
    if (!('name' %in% names(dat[[i]]))) {
      dat[[i]][['name']] <- paste0('dat', i)
      if (!('path' %in% names(dat[[i]]))) {
        stop(paste0("Parameter '", found_pattern_dim, 
                    "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided."))
      }
    } else if (!('path' %in% names(dat[[i]]))) {
      dat_to_fetch <- c(dat_to_fetch, i)
    }
    #if ('path' %in% names(dat[[i]])) {
    #  if (!('nc_var_name' %in% names(dat[[i]]))) {
    #    dat[[i]][['nc_var_name']] <- '$var_name$'
    #  }
    #  if (!('suffix' %in% names(dat[[i]]))) {
    #    dat[[i]][['suffix']] <- ''
    #  }
    #  if (!('var_min' %in% names(dat[[i]]))) {
    #    dat[[i]][['var_min']] <- ''
    #  }
    #  if (!('var_max' %in% names(dat[[i]]))) {
    #    dat[[i]][['var_max']] <- ''
    #  }
    #}
    dat_names <- c(dat_names, dat[[i]][['name']])
  }
  if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) {
    .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.")
  }
  if (length(dat_to_fetch) > 0) {
    stop("Specified only the name for some data sets, but not the path ",
         "pattern. This option has not been yet implemented.")
  }

  assign('dat_names', dat_names, envir = parent.frame())
  return(dat)
}

# Add attributes indicating whether this dimension selector is value or indice
add_value_indices_flag <- function(x) {
  if (is.null(attr(x, 'values')) || is.null(attr(x, 'indices'))) {
    flag <- (any(x %in% c('all', 'first', 'last')) || is.numeric(unlist(x)))
    attr(x, 'values') <- !flag
    attr(x, 'indices') <- flag
  }
  return(x)
}


# Find the value for the undefined selector (i.e., indices()). Use the value from the first 
# found file.
# Note that "dat[[i]][['path']]" in parent env. is changed in this function.
find_ufd_value <- function(undefined_file_dims, dat, i, replace_values,
                           first_file, file_dims, path_glob_permissive, 
                           depending_file_dims, dat_selectors, selector_checker, chunks) {
  first_values <- vector('list', length = length(undefined_file_dims))
  names(first_values) <- undefined_file_dims
  found_values <- 0
  stop <- FALSE
  try_dim <- 1
  last_success <- 1
  while ((found_values < length(undefined_file_dims)) && !stop) {
    u_file_dim <- undefined_file_dims[try_dim]
    if (is.null(first_values[[u_file_dim]])) {
      path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], 
                                                           replace_values[-which(file_dims == u_file_dim)],
                                                           allow_undefined_key_vars = TRUE)
      found_value <- .FindTagValue(path_with_globs_and_tag, 
                                   first_file, u_file_dim)
      if (!is.null(found_value)) {
        found_values <- found_values + 1
        last_success <- try_dim
        first_values[[u_file_dim]] <- found_value
        replace_values[[u_file_dim]] <- found_value
      }
    }
    try_dim <- (try_dim %% length(undefined_file_dims)) + 1
    if (try_dim == last_success) {
      stop <- TRUE
    }
  }
  if (found_values < length(undefined_file_dims)) {
    stop(paste0("Path pattern of dataset '", dat[[i]][['name']], 
                "' is too complex. Could not automatically ",
                "detect values for all non-explicitly defined ",
                "indices. Check its pattern: ", dat[[i]][['path']]))
  }
  ## TODO: Replace ReplaceGlobExpressions by looped call to FindTagValue? As done above
  ##       Maybe it can solve more cases actually. I got warnings in ReplGlobExp with a typical
  ##       cmor case, requesting all members and chunks for fixed var and sdate. Not fixing
  ##       sdate raised 'too complex' error.
  # Replace shell globs in path pattern and keep the file_dims as tags
  dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, 
                                                file_dims, dat[[i]][['name']], path_glob_permissive)

  # Now time to look for the available values for the non 
  # explicitly defined selectors for the file dimensions.
  #print("H")
  # Check first the ones that do not depend on others.
  ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% names(depending_file_dims)))],
           undefined_file_dims[which(undefined_file_dims %in% names(depending_file_dims))])
  
  for (u_file_dim in ufd) {
    replace_values[undefined_file_dims] <- first_values
    replace_values[[u_file_dim]] <- '*'
    depended_dim <- NULL
    depended_dim_values <- NA

    #NOTE: Here 'selectors' is always 1. Is it supposed to be like this?
    selectors <- dat_selectors[[u_file_dim]][[1]]
    if (u_file_dim %in% names(depending_file_dims)) {
      depended_dim <- depending_file_dims[[u_file_dim]]
      depended_dim_values <- dat_selectors[[depended_dim]][[1]]
      dat_selectors[[u_file_dim]] <- vector('list', length = length(depended_dim_values))
      names(dat_selectors[[u_file_dim]]) <- depended_dim_values
    } else {
      dat_selectors[[u_file_dim]] <- list()
    }
    if (u_file_dim %in% unlist(depending_file_dims)) {
      depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, function(x) u_file_dim %in% x))]
      replace_values[depending_dims] <- rep('*', length(depending_dims))
    }
    for (j in 1:length(depended_dim_values)) {
      parsed_values <- c()
      if (!is.null(depended_dim)) {
        replace_values[[depended_dim]] <- depended_dim_values[j]
      }
      path_with_globs <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
      found_files <- Sys.glob(path_with_globs)
      ## TODO: Enhance this error message, or change by warning.
      ##       Raises if a wrong sdate is specified, for example.
      if (length(found_files) == 0) {
        .warning(paste0("Could not find files for any '", u_file_dim, 
                        "' for '", depended_dim, "' = '", 
                        depended_dim_values[j], "'."))
        dat_selectors[[u_file_dim]][[j]] <- NA
      } else {
        for (found_file in found_files) {
          path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']],
                                                               replace_values[-which(file_dims == u_file_dim)],
                                                               allow_undefined_key_vars = TRUE)
          parsed_values <- c(parsed_values, 
                             .FindTagValue(path_with_globs_and_tag, found_file, 
                                           u_file_dim))
        }
        #TODO: selector_checker() doesn't allow selectors to be characters. For selectors
        #      like "member = 'r7i1p1f1", it cannot be defined with values.
        dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors,
                                                             var = unique(parsed_values),
                                                             return_indices = FALSE)
        # Take chunk if needed
        dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]),
                                                                                           chunks[[u_file_dim]]['chunk'],
                                                                                           chunks[[u_file_dim]]['n_chunks'], 
                                                                                           u_file_dim)]
      }
    }
  }
  #NOTE: change 'dat' in parent env. because "dat[[i]][['path']]" is changed.
  assign('dat', dat, envir = parent.frame())
  return(dat_selectors)
}

423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000

# Adjust the argument 'return_vars' if users don't assign them properly.
# Force return_vars = (time = NULL) to (time = 'sdate') if one of the situations:
# (1) selector = [sdate = 2, time = 4], or
# (2) time_across = 'sdate'.
correct_return_vars <- function(inner_dim, inner_dims_across_files, found_pattern_dim,
                                file_dim_as_selector_array_dim) {
    # inner_dim is not in return_vars or is NULL
      if (is.character(file_dim_as_selector_array_dim)) { #(1)
        if (file_dim_as_selector_array_dim %in% found_pattern_dim) {
          stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '",
                      found_pattern_dim, 
                      "', which is not allowed. To assign the dependency on the pattern dim, ",
                      "use 'return_vars = list(", inner_dim, " = 'dat')' instead."))
        } else {
          corrected_value <- file_dim_as_selector_array_dim
        }
      } else if (inner_dim %in% inner_dims_across_files) { #(2)
        file_dim_name <- names(which(inner_dim == inner_dims_across_files))
        if (file_dim_name %in% found_pattern_dim) {
          stop(paste0("Found '", inner_dim, "' has across dependency on the pattern dim '",
                      found_pattern_dim, "', which is not allowed."))
        } else {
          corrected_value <- file_dim_name
        }
      }
      .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", corrected_value,
                      "', but '", inner_dim, "' is not in return_vars list or is NULL. ",
                      "To provide the correct metadata, the value of ", inner_dim,
                      " in 'return_vars' is specified as '", corrected_value, "'."))
  return(corrected_value)
}

# The time classes that are needed to adjust time zone back to UTC.
time_special_types <- function() {
  list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, 'Date' = as.Date)
}

# Replace the dim names read from netCDF file with the user-specified synonims.
replace_with_synonmins <- function(read_dims, synonims) {
  corrected_dim_name <- sapply(names(read_dims), 
                            function(x) {
                              which_entry <- which(sapply(synonims, function(y) x %in% y))
                              if (length(which_entry) > 0) {
                                names(synonims)[which_entry]
                              } else {
                                x
                              }
                            })
  return(corrected_dim_name)
}


# Prepare vars_to_read for this dataset (i loop) and this file (j loop)
generate_vars_to_read <- function(return_vars, changed_dims, first_found_file, common_return_vars,
                                  common_first_found_file, i) {
  vars_to_read <- NULL
  if (length(return_vars) > 0) {
    #NOTE: because return_vars has changed 'dat' to character(0) above (line 1775),
    #      'dat' won't be included in vars_to_read here.
    vars_to_read <- names(return_vars)[sapply(return_vars, function(x) any(names(changed_dims) %in% x))]
  }
  if (!is.null(first_found_file)) {
    if (any(!first_found_file)) {
      vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)]))
    }
  }
  if ((i == 1) && (length(common_return_vars) > 0)) {
    vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, function(x) any(names(changed_dims) %in% x))])
  }
  if (!is.null(common_first_found_file)) {
    if (any(!common_first_found_file)) {
      vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)]))
    }
  }
  return(vars_to_read)
}

# Find the largest dims length within one dataset.
find_largest_dims_length <- function(selectors_total_list, array_of_files_to_load,
                                     selector_indices_save, dat, expected_inner_dims,
                                     synonims, file_dim_reader) {
  # Open and get all the dims from all the files
  data_dims_all_files <- vector('list', length = length(selectors_total_list))

  for (selectors_kk in 1:length(data_dims_all_files)) {
    file_to_open <- do.call("[", c(list(array_of_files_to_load), 
                         as.list(selector_indices_save[[selectors_kk]])))
    data_dims_all_files[[selectors_kk]] <- try(
      file_dim_reader(file_to_open, NULL, selectors_total_list[[selectors_kk]],
                      lapply(dat[['selectors']][expected_inner_dims], '[[', 1),
                      synonims), silent = TRUE)

  }

  # Remove the missing files (i.e., fail try above)
  if (!identical(which(substr(data_dims_all_files, 1, 5) == 'Error'), integer(0))) {
      tmp <- which(substr(data_dims_all_files, 1, 5) == 'Error')
      data_dims_all_files <- data_dims_all_files[-tmp]
  }

  # Find the longest dimensions from all the files
  largest_data_dims <- rep(0, length(data_dims_all_files[[1]]))

  # The inner dim order may differ among files. Need to align them before
  # find out the largest dim length.
  dim_names_first_file <- names(data_dims_all_files[[1]])
  same_dim_order <-lapply(lapply(data_dims_all_files, names),      
                          identical, dim_names_first_file)
  for (to_fix in which(!unlist(same_dim_order))) {
    data_dims_all_files[[to_fix]] <- data_dims_all_files[[to_fix]][match(dim_names_first_file,
                                       names(data_dims_all_files[[to_fix]]))]
  }

  for (kk in 1:length(data_dims_all_files[[1]])) {
    largest_data_dims[kk] <- max(sapply(data_dims_all_files, '[', kk))
  }
  names(largest_data_dims) <- names(data_dims_all_files[[1]])
  return(largest_data_dims)
}

# Gererate vars_to_transform from picked_vars[[i]] and picked_common_vars
generate_vars_to_transform <- function(vars_to_transform, picked_vars, transform_vars, 
                                       picked_vars_ordered) {
  # In Start(), picked_vars can be picked_vars[[i]] or picked_common_vars
  picked_vars_to_transform <- which(names(picked_vars) %in% transform_vars)
  if (length(picked_vars_to_transform) > 0) {
    picked_vars_to_transform <- names(picked_vars)[picked_vars_to_transform]
    new_vars_to_transform <- picked_vars[picked_vars_to_transform]
    which_are_ordered <- which(!sapply(picked_vars_ordered[picked_vars_to_transform], is.null))
            
    if (length(which_are_ordered) > 0) {    
      tmp <- which(!is.na(match(names(picked_vars_ordered), names(which_are_ordered))))
      new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[tmp]
    }
    vars_to_transform <- c(vars_to_transform, new_vars_to_transform)
  }
  return(vars_to_transform)
}

# Out-of-range warning
show_out_of_range_warning <- function(inner_dim, range, bound) {
  # bound: 'lower' or 'upper'
  .warning(paste0("The ", bound, " boundary of selector of ", inner_dim,
                  " is out of range [", min(range), ", ", max(range), "]. ",
                  "Check if the desired range is all included."))
}


# Generate sub_array_of_fri 
generate_sub_array_of_fri <- function(with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
                                      is_circular_dim) {
  print_warning <- FALSE
  if (goes_across_prime_meridian) {
    #NOTE: The potential problem here is, if it is global longitude,
    #      and the indices overlap (e.g., lon = [0, 359.723] and 
    #      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
    #      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll get two 649.
    #      The fix below may not be the best solution, but it works for the example above.

    if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
      # global longitude
      sub_array_of_fri <- 1:n  # n = prod(dim(var_with_selectors))

      if (with_transform & beta != 0) {
        # Warning if transform_extra_cell != 0
        print_warning <- TRUE
      }

    } else {
      # normal case, i.e., not global
      first_index <- min(unlist(sub_array_of_indices))
      last_index <- max(unlist(sub_array_of_indices))
      if (with_transform) {
        gap_width <- last_index - first_index - 1
        actual_beta <- min(gap_width, beta)
        sub_array_of_fri <- c(1:(first_index + actual_beta),                                      
                              (last_index - actual_beta):n)
        if (actual_beta != beta) {
          print_warning <- TRUE
        }
      } else {
        sub_array_of_fri <- c(1:first_index, last_index:n)
      }
    }

  } else {
    #NOTE: This if seems redundant.
#    if (is.list(sub_array_of_indices)) {
#      sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
#    }
    #NOTE: sub_array_of_indices may be vector or list
    if (with_transform) {
      first_index <- min(unlist(sub_array_of_indices))
      last_index <- max(unlist(sub_array_of_indices))
      start_padding <- min(beta, first_index - 1)
      end_padding <- min(beta, n - last_index)
  
      if (!is_circular_dim) {  #latitude or when <var>_reorder is not used
        sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
        if (start_padding != beta | end_padding != beta) {
          print_warning <- TRUE
        }
      } else {  #longitude
  #      if (((last_index + beta) - (first_index - beta) + 1) >= n) {
        if (start_padding <= beta & end_padding <= beta) {
          sub_array_of_fri <- 1:n
        } else if (start_padding < beta) {  # left side too close to border, need to go to right side
          sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n)
        } else if (end_padding < beta) { # right side too close to border, need to go to left side
          sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding))
        } else {  #normal
          sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
        }
      }

    } else {
      if (is.list(sub_array_of_indices)) {
        sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
      } else {
        sub_array_of_fri <- sub_array_of_indices
      }
    }
  }
  if (print_warning) {
    .warning(paste0("Adding parameter transform_extra_cells =  ", beta,
                    " to the transformed index excesses ",
                    "the border. The border index is used for transformation."))
  }

  return(sub_array_of_fri)
}

# This function merges two dimensions (e.g., time and sdate if "time_across = 'sdate'") into one. 
# The two dimensions have to be next to each other. In Start(), it is used to reshape 
# final_dims_fake if merge_across_dims = TRUE
dims_merge <- function(inner_dims_across_files, final_dims_fake) {
  # inner_dims_across_files would be like: $sdate: "time"
  for (file_dim_across in names(inner_dims_across_files)) {
    inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
    new_dims <- c()
    # part 1: Put the dims before 'time' in new_dims
    if (inner_dim_pos > 1) {
      new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
    }
    # part 2: Merge time and sdate together, and name this dim as 'time'
    # The cross and being crossed dims are next to each other, e.g., [time, sdate]
    new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
                                     inner_dims_across_files[[file_dim_across]]))
    # part 3: Put the dimes after 'sdate' in new_dims
    if (inner_dim_pos + 1 < length(final_dims_fake)) {
      new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
    }
    final_dims_fake <- new_dims
  }
  return(final_dims_fake)
}

# This function splits one dimension into two. In Start(), it is used to reshape final_dims_fake
# if split_multiselected_dims = TRUE. 
dims_split <- function(dim_params, final_dims_fake) {
  all_split_dims <- NULL
  for (dim_param in 1:length(dim_params)) {
    split_dims <- dim(dim_params[[dim_param]])
    if (!is.null(split_dims)) {
      if (length(split_dims) > 1) {
        all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
                                                     names(dim_params)[dim_param]))
        if (is.null(names(split_dims))) {
          names(split_dims) <- paste0(names(dim_params)[dim_param], 
                                      1:length(split_dims))
        }
        old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])

        # If merge_across_dims and split_multiselected_dims are both used,
        # on one file dim, and this file dim is multi-dim, it doesn't work.
        if (identical(old_dim_pos, integer(0))) {
          stop(paste0("The dimension '", names(dim_params)[dim_param], 
                      "' to be split cannot be found after 'merge_across_dims' ",
                      "is used. Check if the reshape parameters are used appropriately."))
        }
 
        # NOTE: Three steps to create new dims.
        # 1st: Put in the dims before split_dim.
        # 2nd: Replace the old_dim with split_dims.
        # 3rd: Put in the dims after split_dim.
        new_dims <- c()
        if (old_dim_pos > 1) {
          new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
        }
        new_dims <- c(new_dims, split_dims)
        if (old_dim_pos < length(final_dims_fake)) {
          new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
        }
        final_dims_fake <- new_dims
      }
    }
  }
  return(list(final_dims_fake, all_split_dims))
}


# This function sums up the length of all the inner across dim (e.g., time: list(31, 29, 31, 30))
# and use it to replace the value of that inner dim. That is, it returns the actual length of 
# time rather than using the one including NAs. In Start(), it is used to reshape final_dims_fake
# if merge_across_dims = TRUE, merge_across_dims_narm = TRUE, and split_multiselected_dims = FALSE. 
merge_narm_dims <- function(final_dims_fake, across_inner_dim, length_inner_across_dim) {
  final_dims_fake_name <- names(final_dims_fake)
  pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
  new_length_inner_dim <- sum(unlist(length_inner_across_dim))
  if (pos_across_inner_dim != length(final_dims_fake)) {
    final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                         new_length_inner_dim,
                         final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
  } else {
    final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                         new_length_inner_dim)
  }
  names(final_dims_fake) <- final_dims_fake_name
  return(final_dims_fake)
}



# Adjust the dim order. If split_multiselected_dims + merge_across_dims, the dim order may 
# need to be changed. The inner_dim needs to be the first dim among split dims.
reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, final_dims_fake) {
  all_split_dims <- c(all_split_dims[inner_dim_pos_in_split_dims],
                      all_split_dims[1:length(all_split_dims)][-inner_dim_pos_in_split_dims])
  split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(all_split_dims))))
  new_dims <- c()
  if (split_dims_pos[1] != 1) {
    new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
  }
  new_dims <- c(new_dims, all_split_dims)
  if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
    new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
  }
  final_dims_fake <- new_dims

  return(list(final_dims_fake, all_split_dims))
}


# Build the work pieces.
build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, final_dims, 
                              found_pattern_dim, inner_dims_across_files, array_of_files_to_load,
                              array_of_not_found_files, array_of_metadata_flags,
                              metadata_file_counter, depending_file_dims, transform, 
                              transform_vars, picked_vars, picked_vars_ordered, picked_common_vars,
                              metadata_folder, debug = debug) {
  sub_array_dims <- final_dims[file_dims]
  sub_array_dims[found_pattern_dim] <- 1
  sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
                                      dim = sub_array_dims)
  names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
  # Detect which of the dimensions of the dataset go across files.
  file_dim_across_files <- lapply(inner_dims, 
                                  function(x) {
                                    dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
                                    if (any(dim_across)) {
                                      names(inner_dims_across_files)[which(dim_across)[1]]
                                    } else {
                                      NULL
                                    }
                                  })
  names(file_dim_across_files) <- inner_dims
  j <- 1
  while (j <= prod(sub_array_dims)) {
    # Work out file path.
    file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
    names(file_to_load_sub_indices) <- names(sub_array_dims)
    file_to_load_sub_indices[found_pattern_dim] <- i
    big_dims <- rep(1, length(dim(array_of_files_to_load)))
    names(big_dims) <- names(dim(array_of_files_to_load))
    file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
    file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
                            as.list(file_to_load_indices)))
    not_found_file <- do.call('[[', c(list(array_of_not_found_files),
                              as.list(file_to_load_indices)))
    load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
                                  as.list(file_to_load_indices)))
    if (load_file_metadata) {
       metadata_file_counter <- metadata_file_counter + 1
       assign('metadata_file_counter', metadata_file_counter, envir = parent.frame())
    }
    if (!is.na(file_to_load) && !not_found_file) {
      # Work out indices to take
      first_round_indices <- lapply(inner_dims, 
                                    function (x) {
                                      if (is.null(file_dim_across_files[[x]])) {
                                        x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names")
                                        if (!is.null(x_dim_name)) {
                                          which_chunk <- file_to_load_sub_indices[x_dim_name]
                                          selectors[[x]][['fri']][[which_chunk]]
                                        } else {
                                          selectors[[x]][['fri']][[1]]
                                        }
                                      } else {
                                        which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
                                        selectors[[x]][['fri']][[which_chunk]]
                                      }
                                    })
      names(first_round_indices) <- inner_dims
      second_round_indices <- lapply(inner_dims, 
                                     function (x) {
                                       if (is.null(file_dim_across_files[[x]])) {
                                         x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names")
                                         if (!is.null(x_dim_name)) {
                                           which_chunk <- file_to_load_sub_indices[x_dim_name]
                                           selectors[[x]][['sri']][[which_chunk]]
                                         } else {
                                           selectors[[x]][['sri']][[1]]
                                         }
                                       } else {
                                         which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
                                         selectors[[x]][['sri']][[which_chunk]]
                                       }
                                     })
      if (debug) {
        print("-> BUILDING A WORK PIECE")
        #print(str(selectors))
      }
      names(second_round_indices) <- inner_dims
      if (!any(sapply(first_round_indices, length) == 0)) {
        work_piece <- list()
        work_piece[['first_round_indices']] <- first_round_indices
        work_piece[['second_round_indices']] <- second_round_indices
        work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
        work_piece[['file_path']] <- file_to_load
        work_piece[['store_dims']] <- final_dims
        # Work out store position
        store_position <- final_dims
        store_position[names(file_to_load_indices)] <- file_to_load_indices
        store_position[inner_dims] <- rep(1, length(inner_dims))
        work_piece[['store_position']] <- store_position
        # Work out file selectors
        file_selectors <- sapply(file_dims, 
                                 function (x) {
                                   vector_to_pick <- 1
                                   if (x %in% names(depending_file_dims)) {
                                     vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                                   }
                                   selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
                                 })
        names(file_selectors) <- file_dims
        work_piece[['file_selectors']] <- file_selectors
        # Send variables for transformation
        if (!is.null(transform) && (length(transform_vars) > 0)) {
          vars_to_transform <- NULL
          picked_vars_to_transform <- which(names(picked_vars) %in% transform_vars)
          if (length(picked_vars_to_transform) > 0) {
            picked_vars_to_transform <- names(picked_vars)[picked_vars_to_transform]
            vars_to_transform <- c(vars_to_transform, picked_vars[picked_vars_to_transform])
            if (any(picked_vars_to_transform %in% names(picked_vars_ordered))) {
              picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered))]
              vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[picked_vars_ordered_to_transform]
            }
          }
          picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
          if (length(picked_common_vars_to_transform) > 0) {
            picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
            vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
            if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
              picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
              vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
            }
          }
          work_piece[['vars_to_transform']] <- vars_to_transform
        }
        # Send flag to load metadata
        if (load_file_metadata) {
          work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
        }
        work_pieces <- c(work_pieces, list(work_piece))
      }
    }
    j <- j + 1
  }

  return(work_pieces)
}

# Calculate the progress %s that will be displayed and assign them to the appropriate work pieces.
retrieve_progress_message <- function(work_pieces, num_procs, silent) {
  if (length(work_pieces) / num_procs >= 2 && !silent) {
    if (length(work_pieces) / num_procs < 10) {
      amount <- 100 / ceiling(length(work_pieces) / num_procs)
      reps <- ceiling(length(work_pieces) / num_procs)
    } else {
      amount <- 10
      reps <- 10
    }
    progress_steps <- rep(amount, reps)
    if (length(work_pieces) < (reps + 1)) {
      selected_pieces <- length(work_pieces)
      progress_steps <- c(sum(head(progress_steps, reps)),
                          tail(progress_steps, reps))
    } else {
      selected_pieces <- round(seq(1, length(work_pieces), 
                                   length.out = reps + 1))[-1]
    }
    progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
    progress_message <- 'Progress: 0%'
  } else {
    progress_message <- ''
    selected_pieces <- NULL
  }
  piece_counter <- 1
  step_counter <- 1
  work_pieces <- lapply(work_pieces, 
                        function (x) {
                          if (piece_counter %in% selected_pieces) {
                            wp <- c(x, list(progress_amount = progress_steps[step_counter]))
                            step_counter <<- step_counter + 1
                          } else {
                            wp <- x
                          }
                          piece_counter <<- piece_counter + 1
                          wp
                        })
  if (!silent) {
    .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.")
    .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.")
    .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:"))
    if (length(work_pieces) < 30) {
      lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2))
    } else {
      .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5)
    }
  }
    
  # Build the cluster of processes that will do the work and dispatch work pieces.
  # The function .LoadDataFile is applied to each work piece. This function will
  # open the data file, regrid if needed, subset, apply the mask, 
  # compute and apply the weights if needed,
  # disable extreme values and store in the shared memory matrix.
  #print("O")
  if (!silent) {
    .message("Loading... This may take several minutes...")
    if (progress_message != '') {
      .message(progress_message, appendLF = FALSE)
    }
  }
  return(work_pieces)
}

# If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs 
# due to unequal inner_dim ('time') length across file_dim ('sdate').
remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim,
                                            length_inner_across_dim, data_array) {
  across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?
  # Get the length of these two dimensions in final_dims
  length_inner_across_store_dims <- final_dims[across_inner_dim]
  length_file_across_store_dims <- final_dims[across_file_dim]
       
  # Create a logical array for merge_across_dims
  logi_array <- array(rep(FALSE,
                          length_file_across_store_dims * length_inner_across_store_dims),
                      dim = c(length_inner_across_store_dims, length_file_across_store_dims))
  for (i in 1:length_file_across_store_dims) {  #1:4
    logi_array[1:length_inner_across_dim[[i]], i] <- TRUE
  }
  
  # First, get the data array with final_dims dimension
  data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims)
       
  # Change the NA derived from additional spaces to -9999, then remove these -9999
  func_remove_blank <- function(data_array, logi_array) {
    # dim(data_array) = [time, file_date]
    # dim(logi_array) = [time, file_date]
    # Change the blank spaces from NA to -9999
    data_array[which(!logi_array)] <- -9999
    return(data_array)
  }
  data_array_final_dims <- multiApply::Apply(data_array_final_dims,
                                             target_dims = c(across_inner_dim, across_file_dim),  #c('time', 'file_date')
                                             output_dims = c(across_inner_dim, across_file_dim),