diff --git a/R/AreaWeights.R b/R/AreaWeights.R new file mode 100644 index 0000000000000000000000000000000000000000..4178cb474509ef952eb515e61df3117dcb62231c --- /dev/null +++ b/R/AreaWeights.R @@ -0,0 +1,54 @@ +AreaWeights <- function(listregs=NULL, datagrid='/cfu/autosubmit/con_files/mesh_mask_nemo.N3.2_O1L42.nc', datamask='/cfu/autosubmit/con_files/mask.regions.N3.2_O1L42.nc') { + # + # This function computes area weights of the selected regions in listsregs. + # + # Args: + # listregs : the list of names of the selected regions/zones/points + # datagrid : specifies what NEMO or EC-Earth grid is being used with full path + # and file name (containing all grid parameters), while for regular + # 1x1deg grid specify '1deg' (used by, e.g., HadISST sic, ..). + # For NULL the output will be unit vector with the length of listregs. + # datamask : full path and file name with masks for all regions/zones/points + # specificed in listregs (1 in region, 0 everywhere else) in datagrid. + # + # Return: + # areaweights : a vector of areas (10^9m^2) of the selected regions + # + # History: + # 1.0 # 2014-10 (N.S. Fuckar, neven.fuckar@ic3.cat) # Original code + # +############################################################################### + +if ( is.null(listregs) ) { stop('list of sea ice regions/zones/points is not specified') } + +if ( is.null(datagrid) ) { + areaweights <- rep(1, length(listregs)) +} else { + if ( datagrid=='1deg' ) { + lat <- seq(-89.5, 89.5, 1) + tmp1 <- InsertDim(cos(lat*pi/180),1,360) + earthsurface <- 510072000000000 + var1 <- tmp1*earthsurface/sum(tmp1) + } else { + ncid1 <- open.ncdf(datagrid) + e1t <- get.var.ncdf(ncid1, 'e1t') + e2t <- get.var.ncdf(ncid1, 'e2t') + var1 <- e1t*e2t + close.ncdf(ncid1) + } + + ncid2 <- open.ncdf(datamask) + + areaweights <- array(dim=length(listregs)) + for ( ii in 1:length(listregs) ) { + var2 <- get.var.ncdf(ncid2, listregs[ii]) + areaweights[ii] <- sum(var1*var2[,,1], na.rm=TRUE)/(10^9) + } + + close.ncdf(ncid2) +} + +invisible(areaweights) + +} + diff --git a/R/Clusters.R b/R/Clusters.R new file mode 100644 index 0000000000000000000000000000000000000000..278d15608f50c8afa0b5dd8e8f1003bb999d9010 --- /dev/null +++ b/R/Clusters.R @@ -0,0 +1,101 @@ +Clusters <- function(var, weights, nclusters=NULL, listindex=NULL) { + # + # This function computes clusters and their time series of occurrences with + # the K-mean clustering method on a matrix of input var (time, space). + # It requires NbClust package. + # + # Args: + # var : A matrix (time, space); the 2nd spatial dimension can correspond either + # to area-averages over a series of domains or the grid points for any + # grid (x,y), (x,y,z), (y,z), .. organised in an array for each time step + # weights : a vector of weights corresponding to the areas covering each domain/region + # or grid-cell of var; the length of weights vector should equal to the 2nd + # spatial dimension of var + # nclusters : K number of clusters to be computed, or K initial cluster centers + # (default is NULL and then function will determine the optimal number + # of clusters and their centers, but it can be a CPU intense process) + # listindex : list of validity indices (from NbClust package plus 'first') used to determine optimal + # K if it is not specified via positive integer bigger than 1 or initial/seed cluster + # centers in nclusters (NULL is deafult which leads to application of: 'kl', 'ch', + # 'duda', 'ratkowsky', 'ball', 'ptbiserial', 'gap', 'mcclain', 'tau', and 'sdindex') + # + # Returns: (output of kemans function) + # $cluster : a vector (time series) of integers indicating the occurrence of a cluster, i.e., when + # certain data member in time is allocated to a specific cluster (e.g., 2 1 3 1 1 1 ..) + # $centers : a matrix of cluster centres or centroids (e.g. [1:K, 1:spatial degrees of freedom]) + # $totss : the total sum of squares + # $withinss : vector of within-cluster sum of squares, one component per cluster + # $tot.withinss : total within-cluster sum of squares, i.e., ‘sum(withinss)’ + # $betweenss : the between-cluster sum of squares, i.e. ‘totss-tot.withinss’ + # $size : the number of points in each cluster + # + # History: + # 1.0 # 2014-10 (N.S. Fuckar, neven.fuckar@ic3.cat) # Original code + # +############################################################################## + +if ( dim(var)[2]!=length(weights) ) stop('spatial dimension of var is not equal to length of weights') + +var1 <- array(1, dim=dim(var)) +for ( i in 1:dim(var)[1] ) { var1[i,] <- var[i,]/weights } + +maxtryclstr <- 20 +maxnoiterat <- 300 +noatnbclust <- 10 + +if ( is.null(nclusters)==FALSE ) { + kmeans.results <- kmeans(var1, centers=nclusters, iter.max=maxnoiterat, nstart=30) +} else { + if ( is.null(listindex)==TRUE ) { listindex <- c('kl', 'ch', 'duda', 'ratkowsky', 'ball', 'ptbiserial', 'gap', 'mcclain', 'tau', 'sdindex') } + + Karray <- rep(0, length(listindex)*noatnbclust) + + ii <- 0 + for ( index1 in listindex ) { + for ( i in 1:noatnbclust ) { + ii <- ii + 1 + + if ( index1!='first' ) { + nbclust.results <- NbClust(var1, distance='euclidean', min.nc=2, max.nc=20, method='kmeans', index=index1) + Karray[ii] <- nbclust.results$Best.nc[1] + } else { + Karray <- c(Karray, rep(0, noatnbclust)) + + tmp1a <- array(dim = maxtryclstr) + tmp1b <- array(dim = maxtryclstr) + tmp2a <- array(dim = maxtryclstr) + tmp2b <- array(dim = maxtryclstr) + + for ( i in 1:maxtryclstr ) { + tmp1a[i] <- kmeans(var1, centers = i, iter.max = maxnoiterat, nstart = 30)$tot.withinss + tmp1b[i] <- kmeans(var1, centers = i, iter.max = maxnoiterat, nstart = 30)$betweenss + } + for ( i in 2:(maxtryclstr-1) ) { + tmp2a[i] <- (tmp1a[i] - tmp1a[i+1])/(tmp1a[i-1] - tmp1a[i]) + tmp2b[i] <- (tmp1b[i] - tmp1b[i+1])/(tmp1b[i-1] - tmp1b[i]) + } + + ia <- 2 + while ( tmp2a[ia] > tmp2a[ia+1] ) { ia <- ia + 1 } + Karray[ii] <- ia + for (j in 1:(noatnbclust-1)) { Karray[ii+j] <- ia } + ii <- ii + noatnbclust + + ib <- 2 + while ( tmp2b[ib] > tmp2b[ib+1] ) { ib <- ib + 1 } + Karray[ii] <- ib + for (j in 1:(noatnbclust-1)) { Karray[ii+j] <- ib } + ii <- ii + noatnbclust - 1 + break + } + } + } + + qw <- hist(Karray, breaks = seq(0, maxtryclstr), plot=F)$counts + + kmeans.results <- kmeans(var1, centers=which(qw==max(qw)), iter.max=maxnoiterat, nstart=30) +} + +invisible(kmeans.results) + +} diff --git a/R/MultiVarRegime.R b/R/MultiVarRegime.R new file mode 100644 index 0000000000000000000000000000000000000000..10e566eb47af0560952079f51c51e672a2bf36fb --- /dev/null +++ b/R/MultiVarRegime.R @@ -0,0 +1,130 @@ +MultiVarRegime <- function(data, EOFS = TRUE, neofs = 30, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30, multivariate = FALSE, listindex = NULL) { + if (EOFS != 1) { + stop("Currently clusters must be calculated from fields") + } + if (multivariate == FALSE) { + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + nftimes <- dim(data)[ftime] + nsdates <- dim(data)[sdate] + lon2 <- which(names(dim(data)) == "lon") + lat2 <- which(names(dim(data)) == "lat") + dims <- c(1 : length(dim(data)))[-c(sdate, ftime, lon2, lat2)] + data <- aperm(data, c(sdate, ftime, lat2, lon2, dims)) + nlon <- dim(data)[lon2] + nlat <- dim(data)[lat2] + data <- melt(data, varnames = c("Year", "Day", "Lat", "Lon")) + data <- acast(data, Year + Day ~ Lat ~ Lon) + if (is.null(ncenters)) { + stop("ncenters must be specified") + } + } else { + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + nftimes <- dim(data)[ftime] + nsdates <- dim(data)[sdate] + lon2 <- which(names(dim(data)) == "lon") + lat2 <- which(names(dim(data)) == "lat") + dataset <- which(names(dim(data)) == "var") + nvars <- dim(data)[dataset] + nlon <- dim(data)[lon2] + nlat <- dim(data)[lat2] * nvars + lat = rep(lat, nvars) + data <- aperm(data, c(sdate, ftime, lat2, dataset, lon2)) + var_mean <- var_sd <- c() + for (i in 1 : nvars) { + var_mean[i] <- mean(data[ , , , i, ], na.rm = TRUE) + var_sd[i] <- sd(data[, , , i, ], na.rm = TRUE) + data[, , , i, ] <- (data[, , , i, ] - var_mean[i]) / var_sd[i] + } + data <- melt(data, varnames = c("Year", "Day", "Lat", "Dataset", "Lon")) + data <- acast(data, Year + Day ~ Dataset + Lat ~ Lon) + } + if (EOFS == TRUE) { + #data <- princomp(data[,subs], cor = FALSE)$scores + dataPC <- EOF(data, lat = lat, lon = lon, neofs = neofs) + threshold <- 90 + minPC <- head(as.numeric(which(cumsum(dataPC$var) > threshold)), 1) + } + if (method == "kmeans") { + result <- kmeans(dataPC$PC[, 1 : minPC], centers = ncenters, + iter.max = 100, nstart = nstart, trace = FALSE) + if (multivariate == FALSE) { + reconstructed <- array(0, c(ncenters, nlat, nlon)) + for (i in 1 : ncenters) { + field <- apply(dataPC$EOF[1 : minPC, , ], MARGIN = c(2,3), function(x) x * result$center[i,]) + reconstructed[i, , ] <- apply(field, MARGIN = c(2,3), sum) + } + reconstructed <- aperm(reconstructed, c(1,3,2)) + } else { + reconstructed <- array(0, c(ncenters, nlat, nlon)) + for (i in 1 : ncenters) { + field <- apply(dataPC$EOF[1 : minPC, , ], MARGIN = c(2,3), function(x) x * result$center[i,]) + reconstructed[i, , ] <- apply(field, MARGIN = c(2,3), sum) + } + reconstructed2 <- array(0, c(nvars, ncenters, (nlat / nvars), nlon)) + for (i in 1 : nvars) { + for (j in 1 : ncenters) { + reconstructed2[i,j , , ] <- reconstructed[j, (((i - 1) * nlats) + 1) : (i * nlats), ] + } + reconstructed2[i, , , ] <- (reconstructed2[i, , , ] * var_sd[i]) + var_mean[i] + } + reconstructed2 <- aperm(reconstructed2, c(1,2,4,3)) + reconstructed <- reconstructed2 + rm(reconstructed2) + } + + names(dim(reconstructed)) <- c("cluster", "lon", "lat") + cluster_timeseries <- list(lengths = c(), values = c()) + for (i in 1 : nsdates) { + occurences <- rle(result$cluster[((i * nftimes) + 1 - nftimes) : (i * nftimes)]) + cluster_timeseries <- list(lengths = c(cluster_timeseries$lengths, occurences$lengths), + values = c(cluster_timeseries$values, occurences$values)) + } + frequency <- persistence <- list() + for (i in 1 : ncenters) { + frequency[[i]] <- (sum(cluster_timeseries$lengths[cluster_timeseries$values == i]) / length(result$cluster)) * 100 + persistence[[i]] <- mean(cluster_timeseries$length[cluster_timeseries$values == i]) + } + + } else if (method == "medoids") { + result <- pam(dataPC$PC[, 1 : minPC], k = ncenters) + reconstructed <- array(0, c(ncenters, nlat, nlon)) + for (i in 1 : ncenters) { + field <- apply(dataPC$EOF[1 : minPC, , ], MARGIN = c(2,3), function(x) x * result$medoids[i,]) + reconstructed[i, , ] <- apply(field, MARGIN = c(2,3), sum) + } + reconstructed <- aperm(reconstructed, c(1,3,2)) + names(dim(reconstructed)) <- c("cluster", "lon", "lat") + cluster_timeseries <- list(lengths = c(), values = c()) + for (i in 1 : nsdates) { + occurences <- rle(result$clustering[((i * nftimes) + 1 - nftimes) : (i * nftimes)]) + cluster_timeseries <- list(lengths = c(cluster_timeseries$lengths, occurences$lengths), + values = c(cluster_timeseries$values, occurences$values)) + } + frequency <- persistence <- list() + for (i in 1 : ncenters) { + frequency[[i]] <- (sum(cluster_timeseries$lengths[cluster_timeseries$values == i]) / length(result$clustering)) * 100 + persistence[[i]] <- mean(cluster_timeseries$length[cluster_timeseries$values == i]) + } + } else { + result <- hclust(dist(dataPC$PC[, 1 : minPC]), method = method) + clusterCut <- cutree(result, ncenters) + data <- aperm(data, c(3, 2, 1)) + result <- Composite(data, clusterCut) + } + if (method == "kmeans") { + return(list(field = reconstructed, EOF = dataPC$EOF, center = result$center, cluster = result$cluster, + cluster_lengths = cluster_timeseries$lengths, cluster_values = cluster_timeseries$values, + persistence = persistence, frequency = frequency)) + } else if (method == "medoids") { + return(list(field = reconstructed, EOF = dataPC$EOF, center = result$medoids, cluster = result$clustering, + cluster_lengths = cluster_timeseries$lengths, cluster_values = cluster_timeseries$values, + persistence = persistence, frequency = frequency)) + } else { + return(list(field = result, cluster = clusterCut)) + } +} + + diff --git a/R/SeaIceModes.R b/R/SeaIceModes.R new file mode 100644 index 0000000000000000000000000000000000000000..069898d3555716382d1d7551be62632e7cba3290 --- /dev/null +++ b/R/SeaIceModes.R @@ -0,0 +1,70 @@ +SeaIceModes <- function(var, exp, obs, sdates, seasoninf, seasonsup, nclusters=NULL, nleadtime=NULL, nmember=NULL, leadtimemin=1, leadtimemax=NULL, storefreq="monthly", sampleperiod=1, pldgr=0, listregs=NULL, weights=NULL, listindx=NULL) { + + if ( is.null(listregs) ) { stop('list of sea ice regions/zones/points is not specified') } + + if ( is.null(weights) ) { weights <- rep(1, length(listregs)) } + + for ( i in 1:length(listregs) ) { + tmp1 <- Load(paste(var, listregs[i], sep='_'), exp, obs, sdates, nleadtime=nleadtime, nmember=NULL, leadtimemin=leadtimemin, leadtimemax=leadtimemax, storefreq=storefreq, sampleperiod=sampleperiod) + + if ( is.null(obs) ) { + tmp2 <- Season(tmp1$mod, posdim = 4, monini = as.integer(substr(sdates,5,6)), moninf = seasoninf, monsup = seasonsup) + #tmp2 <- tmp1$mod + } else { + tmp2 <- Season(tmp1$obs, posdim = 4, monini = as.integer(substr(sdates,5,6)), moninf = seasoninf, monsup = seasonsup) + #tmp2 <- tmp1$obs + } + + ti2 <- ti1 <- seq(seasoninf, seasonsup) + while ( max(ti2) < dim(tmp2)[4] ) { ti1 <- ti1 + 12; ti2 <- append(ti2, ti1) } + tif <- dim(tmp2)[4] + ti2[ti2 > tif] <- ti2[ti2 > tif] - tif + ti2 <- unique(ti2) + + tmp2a <- drop(tmp2) + tii <- min(ti2) + tmp2b <- matrix(0, nrow = dim(tmp2a)[1], ncol = 1) + tmp2b[, 1] <- tmp2a[, tii] + for (i1 in (tii+1):tif) { + if (i1 %in% ti2) { tmp2b <- cbind(tmp2b, tmp2a[,i1]) } + } + + tmp2 <- InsertDim(var=InsertDim(var=tmp2b, posdim=2, lendim=1), posdim=1, lendim=1) + +# if ( is.null(nmember) ) { +# tmp3 <- Mean1Dim(tmp2, posdim=2) +# } else { +# if ( length(nmember)== 1) { +# if ( nmember > length(tmp2[1,,1,1]) ) stop('selected ensemble member is not available') +# tmp3 <- InsertDim(var=InsertDim(var=tmp2[1,nmember,1,], posdim=1, lendim=1), posdim=1, lendim=1) +# } else { +# if ( length(nmember)==2 ) { +# if ( max(nmember) > length(tmp2[1,,1,1]) ) stop('selected ensemble member is not available') +# tmp3 <- Mean1Dim(tmp2, posdim=2, limits=nmember) +# } else { +# stop('incorrect selection of ensemble members') +# } +# } +# } + tmp4 <- tmp3 + if ( i==1 ) { tmp4 <- array(dim=c(length(tmp3[1,1,]), length(listregs))) } + tmp4[,i] <- tmp3[1,1,] + } + if (detrend == TRUE) { + var <- Trend(var = tmp4, posTR=1, pldegree = pldgr)$detrended + } + if (!is.null(weights)) { + if (dim(var)[2]!=length(weights)) { + stop('spatial dimension of var is not equal to length of weights') + } + var1 <- array(1, dim=dim(var)) + for (i in 1 : dim(var)[1]) { + var1[i,] <- var[i,] / weights + } + } + + output <- kmeans(var, centers = nclusters, iter.max = maxnoiterat, nstart = 30) + + invisible(output1) + +} diff --git a/R/WeatherRegime.R b/R/WeatherRegime.R new file mode 100644 index 0000000000000000000000000000000000000000..177454dfdd63627c322bfbb328a74bbcd107ad2b --- /dev/null +++ b/R/WeatherRegime.R @@ -0,0 +1,90 @@ +WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = 90, lat_weights = TRUE, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30, iter.max = 100, listindex = NULL, ncores = NULL) { + if (length(dim(data)) > 4) { + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + lon_dim <- which(names(dim(data)) == "lon") + lat_dim <- which(names(dim(data)) == "lat") + dims <- c(1 : length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] + data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) + margins <- 5 : length(dim(data)) + result <- Apply(data = list(data), margins = list(margins), AtomicFun = ".WeatherRegime", EOFS = EOFS, neofs = neofs, + threshold = threshold, lat_weights = lat_weights, lon = lon, lat = lat, ncenters = ncenters, method = method, + ncores = ncores) + } else { + result <- .WeatherRegime(data, EOFS = EOFS, neofs = neofs, threshold = threshold, lat_weights = lat_weights, + lon = lon, lat = lat, ncenters = ncenters, method = method) + } + +} + +.WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = 90, lat_weights = lat_weights, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30, listindex = NULL) { + + names(dim(data)) <- c("sdate", "ftime", "lat", "lon") + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + nftimes <- dim(data)[ftime] + nsdates <- dim(data)[sdate] + lon2 <- which(names(dim(data)) == "lon") + lat2 <- which(names(dim(data)) == "lat") + data <- aperm(data, c(ftime, sdate, lat2, lon2)) + nlon <- dim(data)[lon2] + nlat <- dim(data)[lat2] + dim(data) <- c(nftimes * nsdates, nlat, nlon) + + if (is.null(ncenters)) { + stop("nclusters must be specified") + } + if (EOFS == TRUE) { + if (lat_weights == FALSE) { + stop("If 'EOFS = TRUE', then latitude weightings are calculated automatically") + } + #data <- princomp(data[,subs], cor = FALSE)$scores + dataPC <- EOF(data, lat = lat, lon = lon, neofs = neofs) + threshold <- threshold + minPC <- head(as.numeric(which(cumsum(dataPC$var) > threshold)), 1) + cluster_input <- dataPC$PC[, 1 : minPC] + } else { + cluster_input <- data + if (lat_weights == TRUE) { + latWeights <- InsertDim(InsertDim(cos(lat*pi/180), 1, nftimes*nsdates), 3, nlon) + cluster_input <- cluster_input * latWeights + } + dim(cluster_input) <- c(nftimes * nsdates, nlat * nlon) + } + if (method == "kmeans") { + result <- kmeans(cluster_input, centers = ncenters, + iter.max = 100, nstart = nstart, trace = FALSE) + reconstructed <- array(0, c(ncenters, nlat, nlon)) + data <- aperm(data, c(2, 3, 1)) + reconstructed <- Composite(data, result$cluster) + names(dim(reconstructed$composite)) <- c("lon", "lat", "cluster") + cluster_timeseries <- list(lengths = c(), values = c()) + for (i in 1 : nsdates) { + occurences <- rle(result$cluster[((i * nftimes) + 1 - nftimes) : (i * nftimes)]) + cluster_timeseries <- list(lengths = c(cluster_timeseries$lengths, occurences$lengths), + values = c(cluster_timeseries$values, occurences$values)) + } + frequency <- persistence <- c() + for (i in 1 : ncenters) { + frequency[i] <- (sum(cluster_timeseries$lengths[cluster_timeseries$values == i]) / length(result$cluster)) * 100 + persistence[i] <- mean(cluster_timeseries$length[cluster_timeseries$values == i]) + } + + } else { + result <- hclust(dist(cluster_input), method = method) + clusterCut <- cutree(result, ncenters) + data <- aperm(data, c(3, 2, 1)) + result <- Composite(data, clusterCut) + } + if (method == "kmeans") { + return(list(composite = reconstructed$composite, pvalue = reconstructed$pvalue, cluster = as.array(result$cluster), center = as.array(result$center), + cluster_lengths = as.array(cluster_timeseries$lengths), cluster_values = as.array(cluster_timeseries$values), + persistence = as.array(persistence), frequency = as.array(frequency))) + } else { + return(list(composite = result$composite, pvalue = result$pvalue, cluster = as.array(clusterCut))) + } +} + + diff --git a/Vignette/Regimes.html b/Vignette/Regimes.html new file mode 100644 index 0000000000000000000000000000000000000000..b1b522837e20e638c1cc7b569ea31e4e5b882dcc --- /dev/null +++ b/Vignette/Regimes.html @@ -0,0 +1,216 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
+

Weather Regimes

+

Weather regimes can be calculated using the k-means clustering algorithm, typically applied to the PCs of anomalies from selected variables, after smoothing and detrending.

+

The work flow will be something like:

+ +

The loading, detrending and calculation of anomalies needs to be done before applying the WeatherRegime() function. The calculation of the EOFs/PC (including the latitudinal area weighting) and the reconstruction of the field (Cluster centers * EOFs) is done within the WeatherRegime() function.

+
+

North Atlantic Weather Regimes

+

Below is a simple example of calculating the weather regimes for the North Atlantic from JRA55 SLP for January (with detrending disabled to reduce the computation time).

+

+
```r
+library(s2dverification)
+library(reshape2)
+JRA55 <- '/esnas/recon/jma/jra55/$STORE_FREQ$_mean/$VAR_NAME$_f6h/$VAR_NAME$_$YEAR$$MONTH$.nc'
+psl <- "psl"
+year.start <- 1981 
+year.end <- 2016
+#Euro-Atlantic region
+lat.max <- 81 
+lat.min <- 27 
+lon.max <- 45 
+lon.min <- 274.5
+firstmonth <- 1                              
+leadmonth <- 0      
+psl <-  Load(var = psl, exp = NULL, obs = list(list(path=JRA55)), paste0(year.start:year.end,'0101'), 
+             storefreq = 'daily', leadtimemax = 366, output = 'lonlat', latmin = lat.min,       
+             latmax = lat.max, lonmin = lon.min, lonmax = lon.max, nprocs=8)
+lon <- psl$lon
+lat <- psl$lat
+psl <- psl$obs
+#psl <- Trend(psl, posTR = 3)$detrended 
+psl <- psl[1, 1, , , , ]  # Drop the dataset and members dimensions
+psl <- Subset(psl, 2, 1 : 31) # Select the month of January
+psl <- psl / 100 #(Convert to hPa)
+psl_ano <- psl - InsertDim(Mean1Dim(psl, 1), 1, 36)
+psl_ano_smoothed <- Smoothing(psl_ano, 7, 2)
+psl_ano_smoothed <- psl_ano_smoothed[, 4 : 28, , ]
+names(dim(psl_ano_smoothed)) <- c("sdate", "ftime", "lat", "lon")
+WR <- WeatherRegime(psl_ano_smoothed, lon = lon, lat = lat, ncenters = 4)
+PlotLayout(fun = c('PlotEquiMap', 'PlotEquiMap', 'PlotEquiMap', 'PlotEquiMap'),
+                 plot_dims = c('lat', 'lon'), 
+                 var = list(WR$field[1, , ], WR$field[2, , ],
+                 WR$field[3, , ], WR$field[4, , ]), 
+                 lon = lon, lat = lat, 
+                 titles = paste0("Frequency of occurence ", round(unlist(WR$frequency)[1 : 4], digits = 1), "%"), 
+                 drawleg = 'E', units = 'hPa', units_scale = 2, 
+                 width = 12, height = 10, res = 200,      fileout = 'weather_regimes.png')
+            
+```
+
+
+

Hierarchical clustering

+

Hierarchical clustering methods can also be used through the method option. For example the hierarchical clustering method with mean linkage can be applied via method = average on the same dataset provided above:

+

+

r WR <- WeatherRegime(psl_ano, lon = lon, lat = lat, ncenters = 4, method = "average") PlotEquiMap(WR$field$composite[,,1], lon = lon, lat = lat, dots = WR$field$pvalue[,,4 ] < 0.05) #Plot the 1st regime

+
+
+ + + + +
+ + + + + + + + diff --git a/Vignette/Regimes.md b/Vignette/Regimes.md new file mode 100644 index 0000000000000000000000000000000000000000..b89617d6895d4fea359d4ca4ae21be776bbe63cd --- /dev/null +++ b/Vignette/Regimes.md @@ -0,0 +1,80 @@ +Weather Regimes +========== + Weather regimes can be calculated using the k-means clustering algorithm, +typically applied to the PCs of anomalies from selected variables, after +smoothing and detrending. + +The work flow will be something like: + + - [**Load data**](#loading): `Load()`. + - [**Calculate the anomalies**](#anomalies): `Ano()`. + - [**Smooth the anomalies**](#smoothing): `Smooth()`. + - [**Detrend the smoothed anomalies**](#detrending): `Trend()`. + - [**Calculate the PCs, the kmeans clusters and reconstruct the field as Cluster center x EOFs**](#EOF): `WeatherRegime()`. + - [**Visualize the results**](#Visualize): `PlotEquiMap()`. + +The loading, detrending and calculation of anomalies needs to be done before applying the WeatherRegime() function. The calculation of the EOFs/PC (including the latitudinal area weighting) and the reconstruction of the field (Cluster centers * EOFs) is done within the WeatherRegime() function. + +North Atlantic Weather Regimes +------------------ + +Below is a simple example of calculating the weather regimes for the North Atlantic from JRA55 SLP for January (with detrending disabled to reduce the computation time). + + + + ```r + library(s2dverification) + library(reshape2) + JRA55 <- '/esnas/recon/jma/jra55/$STORE_FREQ$_mean/$VAR_NAME$_f6h/$VAR_NAME$_$YEAR$$MONTH$.nc' + psl <- "psl" + year.start <- 1981 + year.end <- 2016 + #Euro-Atlantic region + lat.max <- 81 + lat.min <- 27 + lon.max <- 45 + lon.min <- 274.5 + firstmonth <- 1 + leadmonth <- 0 + psl <- Load(var = psl, exp = NULL, obs = list(list(path=JRA55)), paste0(year.start:year.end,'0101'), + storefreq = 'daily', leadtimemax = 366, output = 'lonlat', latmin = lat.min, + latmax = lat.max, lonmin = lon.min, lonmax = lon.max, nprocs=8) + lon <- psl$lon + lat <- psl$lat + psl <- psl$obs + #psl <- Trend(psl, posTR = 3)$detrended + psl <- psl[1, 1, , , , ] # Drop the dataset and members dimensions + psl <- Subset(psl, 2, 1 : 31) # Select the month of January + psl <- psl / 100 #(Convert to hPa) + psl_ano <- psl - InsertDim(Mean1Dim(psl, 1), 1, 36) + psl_ano_smoothed <- Smoothing(psl_ano, 7, 2) + psl_ano_smoothed <- psl_ano_smoothed[, 4 : 28, , ] + names(dim(psl_ano_smoothed)) <- c("sdate", "ftime", "lat", "lon") + WR <- WeatherRegime(psl_ano_smoothed, lon = lon, lat = lat, ncenters = 4) + PlotLayout(fun = c('PlotEquiMap', 'PlotEquiMap', 'PlotEquiMap', 'PlotEquiMap'), + plot_dims = c('lat', 'lon'), + var = list(WR$field[1, , ], WR$field[2, , ], + WR$field[3, , ], WR$field[4, , ]), + lon = lon, lat = lat, + titles = paste0("Frequency of occurence ", round(unlist(WR$frequency)[1 : 4], digits = 1), "%"), + drawleg = 'E', units = 'hPa', units_scale = 2, + width = 12, height = 10, res = 200, fileout = 'weather_regimes.png') + + ``` + + +Hierarchical clustering +------------------ + +Hierarchical clustering methods can also be used through the method option. For example the hierarchical clustering method with mean linkage can be applied via method = average on the same dataset provided above: + + + + ```r + WR <- WeatherRegime(psl_ano, lon = lon, lat = lat, ncenters = 4, method = "average") + PlotEquiMap(WR$field$composite[,,1], lon = lon, lat = lat, dots = WR$field$pvalue[,,4 ] < 0.05) #Plot the 1st regime + ``` + + + + \ No newline at end of file diff --git a/Vignette/WeatherRegime1.png b/Vignette/WeatherRegime1.png new file mode 100644 index 0000000000000000000000000000000000000000..9146dd4dd7d6ef2a8c2fcfcabcf52168bc723584 Binary files /dev/null and b/Vignette/WeatherRegime1.png differ diff --git a/Vignette/WeatherRegime2.png b/Vignette/WeatherRegime2.png new file mode 100644 index 0000000000000000000000000000000000000000..eddaaed7c5a71ea3cf128863405aeb41e77b8c84 Binary files /dev/null and b/Vignette/WeatherRegime2.png differ diff --git a/Vignette/weather_regimes.png b/Vignette/weather_regimes.png new file mode 100644 index 0000000000000000000000000000000000000000..9dbebb37ed1c99fee0a96c58b1ad660484de98cc Binary files /dev/null and b/Vignette/weather_regimes.png differ diff --git a/man/SeaIceModes.Rd b/man/SeaIceModes.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a2c13cf3ef47989cdfe17b62d2f24e8cb0adbe11 --- /dev/null +++ b/man/SeaIceModes.Rd @@ -0,0 +1,88 @@ +\name{SeaIceModes} +\alias{SeaIceModes} +\title{ + Computes sea ice variability modes or clusters. +} +\description{ + This function computes sea ice variability modes based on the K-means clustering + method applied to sea ice variable such as area, extent, volume or thickness averages + over the selected regions with sea ice in the Northern or Southern Hemisphere. +} +\usage{ + SeaIceModes(var, exp, obs, sdates, seasoninf, seasonsup, nclusters=NULL, nleadtime=NULL, nmember=NULL, leadtimemin=1, leadtimemax=NULL, storefreq="monthly", sampleperiod=1, pldgr=0, listregs=NULL, weights=NULL, listindx=NULL) +} +\arguments{ + \item{var}{ + For example: sia, sie, sit, .. (sea ice area, extent, thickness, ..) + exp, obs, sdates, nleadtime, leadtimemin, leadtimemax, storefreq and sampleperiod + are the same as in Load: only var, exp, obs and sdates are mandatory, and only + 1 experiment or only 1 observational dataset can be loaded. + } + \item{nmember}{ + Selected ensemble member or mean between selected ensemble members, e.g. c(1,3) + (default is NULL which produce ensemble mean of all available ensemble members). + } + \item{seasoninf}{ + Month of the year when to start computing the extended seasonal means: 1 to 12. + } + \item{seasonsup}{ + Month of the year when to stop computing the extended seasonal means: 1 to 12. + The extended seasonal means can overlap the 31s December: e.g., DJF average. + For monthly means put the same vales for saesoninf and seasonsup. + } + \item{nclusters}{ + K number of clusters to be computed, or K initial cluster centers (default is NULL + and then Clusters.R will determine the optimal number of clusters and their centers). + } + \item{pldgr}{ + pldgr<0 for using the full values, 0 for removing the long-term mean, and 0