diff --git a/.gitignore b/.gitignore
index 2e06392a861e3ff3debc5bc70ec12c1b98a1dd36..f705c88f97240242c7f32e18427971c28418a2ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,5 @@ test/report/*
 htmlcov
 .pytest_cache
 prof
-/.vscode
+.vscode
 .eggs
-
diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 5ea03ca97edc04fe0b123215df2f7844ef34b91a..0000000000000000000000000000000000000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "python.pythonPath": "/home/Earth/sloosvel/.conda/envs/diagonals/bin/python"
-}
\ No newline at end of file
diff --git a/diagonals/__init__.py b/diagonals/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..15bd90ffbf34d31b1d9a92f246c1dc92cc2f31a7 100644
--- a/diagonals/__init__.py
+++ b/diagonals/__init__.py
@@ -0,0 +1,71 @@
+import threading
+import contextlib
+import logging
+import six
+
+import numba.cuda
+
+logger = logging.getLogger(__name__)
+
+
+class Config(threading.local):
+    """Run-time configuration controller."""
+
+    def __init__(self, use_gpu=None):
+        """
+        A container for run-time options controls.
+        To adjust the values simply update the relevant attribute from
+        within your code. For example::
+            diagonals.CONFIG.use_gpu = False
+        If Iris code is executed with multiple threads, note the values of
+        these options are thread-specific.
+
+        """
+        if use_gpu is None:
+            use_gpu = numba.cuda.is_available()
+        self.__dict__['use_gpu'] = use_gpu
+        self.use_gpu = use_gpu
+
+    def __repr__(self):
+        return 'Config(use_gpu={0})'.format(self.use_gpu)
+
+    def __setattr__(self, name, value):
+        if name not in self.__dict__:
+            msg = "'Config' object has no attribute {!r}".format(name)
+            raise AttributeError(msg)
+        if name == 'use_gpu':
+            if value and not numba.cuda.is_available():
+                logger.warning(
+                    'CUDA is not available. Usage of GPU is disabled'
+                )
+                self.__dict__[name] = False
+                return
+        self.__dict__[name] = value
+
+    @contextlib.contextmanager
+    def context(self, **kwargs):
+        """
+        Return a context manager which allows temporary modification of
+        the option values for the active thread.
+        On entry to the `with` statement, all keyword arguments are
+        applied to the Config object. On exit from the `with`
+        statement, the previous state is restored.
+        For example::
+            with diagonals.CONFIG.context(cell_datetime_objects=False):
+                # ... code that expects numbers and not datetimes
+        """
+        # Save the current context
+        current_state = self.__dict__.copy()
+        # Update the state
+        for name, value in six.iteritems(kwargs):
+            setattr(self, name, value)
+        try:
+            yield
+        finally:
+            # Return the state
+            self.__dict__.clear()
+            self.__dict__.update(current_state)
+
+
+#: Object containing all the Iris run-time options.
+CONFIG = Config()
diff --git a/diagonals/__pycache__/__init__.cpython-37.pyc b/diagonals/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 3ac23b1d693a09d0e9c06847643641ed5ee32a9b..0000000000000000000000000000000000000000
Binary files a/diagonals/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/diagonals/diagnostics/__init__.py b/diagonals/diagnostics/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/diagonals/diagnostics/regmean.py b/diagonals/diagnostics/regmean.py
deleted file mode 100644
index af7d667362988d11197904042c85015b5575e8f8..0000000000000000000000000000000000000000
--- a/diagonals/diagnostics/regmean.py
+++ /dev/null
@@ -1,534 +0,0 @@
-import os
-import numpy as np
-import iris
-import iris.cube
-import iris.analysis
-import warnings
-import numba
-import datetime
-from numba import jit, njit
-from numba import vectorize
-from numba import guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
-
-
-def check_device_and_compute_regmean_2D(var, basins, e1t, e2t):
-    """Function that checks device and calls computing functions.
-
-    Checks if the computations are going performed in the CPU or the GPU:
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-
-    Returns
-    -------
-    regmean: float32
-        List containing regional mean for variable var
-    """
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        device = 'CPU'
-        regmean = compute_regmean_2D_cpu(var, basins, e1t, e2t)
-    else:
-        use_gpu = True
-        if use_gpu is True:
-            device = 'GPU'
-            regmean = compute_regmean_2D_gpu(var, basins, e1t, e2t)
-        else:
-            device = 'CPU'
-            regmean = compute_regmean_2D_cpu(var, basins, e1t, e2t)
-    print('Running on {}'.format(device))
-    return regmean
-
-
-def compute_regmean_2D_cpu(var, basins, e1t, e2t):
-    """Function that computes the regional mean for 2D vars in the cpu.
-
-       Computes the weights for each region and performs a weighted average.
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-
-    Returns
-    -------
-    regmean_total: float32
-        List containing regional mean for variable var.
-    """
-    times = var.shape[0]
-    regmean_total = {}
-    for basin, mask in basins.items():
-        weights = compute_weights_2D(mask, e1t, e2t)
-        regmean = np.empty(times)
-        for t in range(times):
-            regmean[t] = np.average(var[t, :, :], axis=(0, 1),
-                                    weights=np.squeeze(weights))
-        regmean_total[basin] = regmean
-    return regmean_total
-
-
-def compute_regmean_3D_levels(var, basins, e1t, e2t, e3t):
-    """Function that computes the regional mean at every depth level
-    for 3D vars in the cpu.
-
-    Computes the weights for each region and performs a weighted average.
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e3t.
-
-    Returns
-    -------
-    regmean_total: float32
-        List containing regional mean at every depth level for variable var.
-    """
-    times = var.shape[0]
-    levs = var.shape[1]
-    regmean_total = {}
-    for basin, mask in basins.items():
-        regmean = np.empty((times, levs))
-        w = compute_weights_3D(mask, e1t, e2t, e3t)
-        for t in range(times):
-            for l in range(levs):
-                regmean[t, l] = np.average(var[t, l, :, :], axis=(0, 1),
-                                           weights=np.squeeze(w)[l, :, :])
-        regmean_total[basin] = regmean
-    return regmean_total
-
-
-def compute_regmean_3D_cpu(var, basins, e1t, e2t, e3t):
-    """Function that computes the regional mean for 3D vars in the cpu.
-
-    Computes the weights for each region and performs a weighted average.
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e3t.
-
-    Returns
-    -------
-    regmean_total: float32
-        List containing the regional mean for variable var.
-    """
-    times = var.shape[0]
-    regmean_total = {}
-    for basin, mask in basins.items():
-        weights = compute_weights_3D(mask, e1t, e2t, e3t)
-        regmean = np.empty(times)
-        for t in range(times):
-            regmean[t] = np.average(var[t, :, :, :], axis=(0, 1, 2),
-                                    weights=np.squeeze(weights))
-        regmean_total[basin] = regmean
-    return regmean_total
-
-
-def compute_regmean_2D_gpu(var, basins, e1t, e2t):
-    """Function that computes the regional mean for 2D vars in the gpu.
-
-    Computes the weights for each region and performs a weighted average.
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-
-    Returns
-    -------
-    regmean_total: float32
-        List containing regional mean for variable var.
-    """
-    times = var.shape[0]
-    lats = var.shape[1]
-    lons = var.shape[2]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-    return masks
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-    gpu_weighted_var = cuda.device_array((mask, times, lats*lons),
-                                         dtype=np.float32)
-    gpu_vector_weight = cuda.device_array((mask, lats*lons), dtype=np.float32)
-
-    variable_weight_cuda[grid, block](gpu_var, gpu_e1t, gpu_e2t, gpu_masks,
-                                      gpu_weighted_var, gpu_vector_weight)
-
-    regmean = {}
-    for m in range(mask):
-        reduced_weight = sum_red_cuda(gpu_vector_weight[m, :])
-        regmean[m] = {}
-        for t in range(times):
-            reduced_var = sum_red_cuda(gpu_weighted_var[m, t, :])
-            regmean[m][t] = reduced_var / reduced_weight
-
-    return regmean
-
-
-def compute_regmean_gpu_3D(var, basins, e1t, e2t, e3t):
-    """Function that computes the regional mean for 3D vars in the gpu.
-
-    Computes the weights for each region and performs a weighted average.
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e3t.
-
-    Returns
-    -------
-    regmean: float32
-        List containing the regional mean for variable var.
-    """
-    times = var.shape[0]
-    levs = var.shape[1]
-    lats = var.shape[2]
-    lons = var.shape[3]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_e3t = cuda.to_device(e3t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-
-    gpu_weighted_var = cuda.device_array((mask, times, levs*lats*lons),
-                                         dtype=np.float32)
-    gpu_vector_weight = cuda.device_array((mask, levs*lats*lons),
-                                          dtype=np.float32)
-
-    variable_weight_cuda_3D[grid, block](gpu_var, gpu_e1t, gpu_e2t, gpu_e3t,
-                                         gpu_masks, gpu_weighted_var,
-                                         gpu_vector_weight)
-
-    regmean = {}
-    for m in range(mask):
-        reduced_weight = sum_red_cuda(gpu_vector_weight[m, :])
-        regmean[m] = {}
-        for t in range(times):
-            reduced_var = sum_red_cuda(gpu_weighted_var[m, t, :])
-            regmean[m][t] = reduced_var / reduced_weight
-
-    return regmean
-
-
-def compute_regmean_lev_gpu_3D(var, basins, e1t, e2t, e3t):
-    """Function that computes the regional mean at every depth level
-    for 3D vars in the cpu.
-
-    Computes the weights for each region and performs a weighted average.
-
-    Parameters
-    ----------
-    var : float32
-        Masked array containing variable data.
-    basins : float32
-        List containing basin names and masks.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e3t.
-
-    Returns
-    -------
-    regmean: float32
-        List containing regional mean at every depth level for variable var.
-    """
-    times = var.shape[0]
-    levs = var.shape[1]
-    lats = var.shape[2]
-    lons = var.shape[3]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_e3t = cuda.to_device(e3t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-
-    gpu_weighted_var = cuda.device_array((mask, times, levs, lats*lons),
-                                         dtype=np.float32)
-    gpu_vector_weight = cuda.device_array((mask, levs, lats*lons),
-                                          dtype=np.float32)
-
-    variable_weight_cuda_level_3D[grid, block](gpu_var, gpu_weights,
-                                               gpu_weighted_var,
-                                               gpu_vector_weight)
-    del gpu_var, gpu_weights
-
-    regmean = {}
-    for m in range(mask):
-        regmean[m] = {}
-        for t in range(times):
-            regmean[m][t] = {}
-            for l in range(levs):
-                reduced_weight = sum_red_cuda(gpu_vector_weight[mask, l, :])
-                reduced_weighted_var = sum_red_cuda(gpu_weighted_var[mask, t,
-                                                                     l, :])
-                regmean[m][t][l] = reduced_weighted_var / reduced_weight
-    del gpu_weighted_var, gpu_vector_weight
-
-    return regmean
-
-
-@vectorize(['float32(float32, float32, float32)'], target='cpu')
-def compute_weights_2D(mask, e1t, e2t):
-    """Function that computes the regional weights for 2D vars in the cpu.
-
-    Parameters
-    ----------
-    mask : float32
-        Mask array containing a region mask.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-
-    Returns
-    -------
-    weights: float32
-        Masked array containing the weights for a given region.
-    """
-    weights = mask*e1t*e2t
-    return weights
-
-
-@vectorize(['float32(float32, float32, float32, float32)'], target='cpu')
-def compute_weights_3D(mask, e1t, e2t, e3t):
-    """Function that computes the regional weights for 3D vars in the cpu.
-
-    Parameters
-    ----------
-    mask : float32
-        Mask array containing a region mask.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e2t
-
-    Returns
-    -------
-    weights: float32
-        Masked array containing the weights for a given region.
-    """
-    weights = mask*e1t*e2t*e3t
-    return weights
-
-
-@cuda.jit
-def variable_weight_cuda(var, e1t, e2t, masks, weighted_var, vector_weight):
-    """Numba kernel executed on the gpu that computes the weights and
-    weights a 2D variable for each region
-
-    Parameters
-    ----------
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    masks : float32
-        Masked array containing region masks.
-    weighted_var: float32
-        Empty array to store the results.
-    vector_var: float32
-        Empty array to store the results.
-
-    Returns
-    -------
-    weighted_var: float32
-        Array containing the weighted variable for a given region.
-    vector_weight: float32
-        Array containing the weights for a given region.
-    """
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    x = i+j*var.shape[2]
-    if(i >= var.shape[2]):
-        return
-    for bas in range(basins):
-        vector_weight[bas, x] = 0.0
-        weighted_var[bas, t, x] = 0.0
-        vector_weight[bas, x] = e1t[j, i]*e2t[j, i]*masks[bas, j, i]
-        weighted_var[bas, t, x] = var[t, j, i] * vector_weight[bas, x]
-
-
-@cuda.jit
-def variable_weight_cuda_3D(var, e1t, e2t, e3t, masks, weighted_var,
-                            vector_weight):
-    """Numba kernel executed on the gpu that computes the weights and
-    weights a 3D variable for each region.
-
-    Parameters
-    ----------
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e3t.
-    masks : float32
-        Masked array containing region masks.
-    weighted_var: float32
-        Empty array to store the results.
-    vector_weight: float32
-        Empty array to store the results.
-
-    Returns
-    -------
-    weighted_var: float32
-        Array containing the weighted variable for a given region.
-    vector_weight: float32
-        Array containing the weights for a given region.
-    """
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    levels = var.shape[1]
-    if(i >= var.shape[3]):
-        return
-    for bas in range(basins):
-        for lev in range(levels):
-            x = i+var.shape[3]*(j+var.shape[2]*lev)
-            vector_weight[bas, x] = 0.0
-            vector_weight[bas, x] = (e1t[j, i] * e2t[j, i] *
-                                     e3t[lev, j, i] * masks[bas, j, i])
-            weighted_var[bas, t, x] = 0.0
-            weighted_var[bas, t, x] = var[t, lev, j, i] * vector_weight[bas, x]
-
-
-@cuda.jit
-def variable_weight_cuda_level_3D(var, e1t, e2t, e3t, masks, weighted_var,
-                                  vector_weight):
-    """Numba kernel executed on the gpu that computes the weights and
-    weights a 3D variable for each region.
-
-    Parameters
-    ----------
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    e3t: float32
-        Masked array containing variable e3t.
-    masks : float32
-        Masked array containing region masks.
-    weighted_var: float32
-        Empty array to store the results.
-    vector_weight: float32
-        Empty array to store the results.
-
-    Returns
-    -------
-    weighted_var: float32
-        Array containing the weighted variable for a given region.
-    vector_weight: float32
-        Array containing the weights for a given region.
-    """
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    levels = var.shape[1]
-    x = i+j*var.shape[3]
-    if(i >= var.shape[3]):
-        return
-    for bas in range(basins):
-        for lev in levels:
-            vector_weight[bas, lev, x] = 0.0
-            vector_weight[bas, lev, x] = (e1t[j, i] * e2t[j, i] *
-                                          e3t[lev, j, i] * masks[bas, j, i])
-            weighted_var[bas, t, lev, x] = (var[t, lev, j, i] *
-                                            vector_weight[bas, lev, x])
-
-
-@cuda.reduce
-def sum_red_cuda(x, y):
-    """Numba kernel executed on the gpu that performs a sum reduction:
-
-    Parameters
-    ----------
-    x : float32
-        Numpy array.
-    y : float32
-        Numpy array.
-
-    Returns
-    -------
-    x+y : float64
-        Reduction operation.
-    """
-    return x+y
diff --git a/diagonals/diagnostics/regsum.py b/diagonals/diagnostics/regsum.py
deleted file mode 100644
index 39a72f1883360414681a2daa63b976dc9cc45184..0000000000000000000000000000000000000000
--- a/diagonals/diagnostics/regsum.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-import numpy as np
-import iris
-import iris.cube
-import iris.analysis
-import warnings
-import numba
-import datetime
-from numba import jit, njit
-from numba import vectorize
-from numba import guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
-
-
-def compute_regsum_2D_cpu(var, basins, e1t, e2t):
-    regsum = {}
-    for basin, mask in basins.items():
-        weighted_var = compute_weighted_var_2D(var, mask, e1t, e2t)
-        regsum[basin] = np.sum(weighted_var, axis=(1, 2))
-    return regsum
-
-
-def compute_regsum_3D_cpu(var, basins, e1t, e2t, e3t, save3d):
-    regsum = {}
-    regsum_3D = {}
-    for basin, mask in basins.items():
-        weighted_var = compute_weighted_var_3D(var, mask, e1t, e2t, e3t)
-        regsum[basin] = np.sum(weighted_var, axis=(2, 3))
-        if save3d:
-            regsum_3D[basin] = np.sum(regsum[basin], axis=(1,))
-        else:
-            regsum_3D = None
-    return regsum, regsum_3D
-
-
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
-           target='cpu')
-def compute_weighted_var_3D(var, mask, e1t, e2t, e3t):
-    weighted_var = var*mask*e1t*e2t*e3t
-    return weighted_var
-
-
-@vectorize(['float32(float32, float32, float32, float32)'], target='cpu')
-def compute_weighted_var_2D(var, mask, e1t, e2t):
-    weighted_var = var*mask*e1t*e2t
-    return weighted_var
diff --git a/__init__.py b/diagonals/examples/__init__.py
similarity index 100%
rename from __init__.py
rename to diagonals/examples/__init__.py
diff --git a/diagonals/examples/examples-moc.py b/diagonals/examples/examples-moc.py
index 00573a06b0037bb6db91e2239cca3c512d24c10d..70587182e96b71176398fb0dc75982db2098a157 100644
--- a/diagonals/examples/examples-moc.py
+++ b/diagonals/examples/examples-moc.py
@@ -1,80 +1,45 @@
-import os
+import logging
+import datetime
 import numpy as np
-import math
-import six
+
 import iris
 import iris.cube
-from iris.cube import CubeList
 import iris.analysis
 import iris.util
 import iris.coords
-import warnings
-import numba
-import datetime
-from numba import vectorize, guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
-
-MESH_FILE = "/esarchive/autosubmit/con_files/mesh_mask_nemo.Ec3.2_O25L75.nc"
-REGIONS_FILE = "/esarchive/autosubmit/con_files/mask.regions.Ec3.2_O25L75.nc"
-VO_FILE = "/esarchive/exp/ecearth/a0qp/cmorfiles/CMIP/EC-Earth-Consortium/" \
-          "EC-Earth3-HR/historical/r1i1p1f1/Omon/vo/gn/v20190411/" \
-          "vo_Omon_EC-Earth3-HR_historical_r1i1p1f1_gn_201301-201312.nc"
-RES = '/esarchive/exp/ecearth/a16l/cmorfiles/CMIP/EC-Earth-Consortium' \
-      '/EC-Earth3-LR/historical/r1i1p1f1/Omon/vsftmyz/gn/v20180711' \
-      '/vsftmyz_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_196101-196112.nc'
-
-
-def main():
-    e3v, e1v = load_mesh()
-    basin_name, basins = load_masks()
-    area = {}
-    for basin in basins:
-        area[basin] = compute_area_basin(e1v, e3v, basins[basin])
-    del e3v, e1v
-    vo = load_vo()
-    for i in range(1):
-        start = datetime.datetime.now()
-        vsftmyz = compute_vsftmyz_cpu(vo, area)
-        print('np {0}'.format(datetime.datetime.now()-start))
-        start = datetime.datetime.now()
-        moc = compute_vsftmyz_gpu(vo, area)
-        print('gp {0}'.format(datetime.datetime.now()-start))
-    save_data(moc)
-    save_data_cpu(vsftmyz)
-
-
-def compute_vsftmyz_cpu(vo, area):
-    vsftmyz = []
-    for basin in area:
-        vsftmyz_basin = np.sum(multiply_vo_basin(vo, area[basin]), axis=3)
-        compute_moc(vsftmyz_basin, vsftmyz_basin)
-        vsftmyz.append(vsftmyz_basin)
-    return vsftmyz
 
+import diagonals
+import diagonals.moc as moc
+from diagonals.mesh_helpers.nemo import Nemo
 
-def compute_vsftmyz_gpu(vo, area):
-    times = vo.shape[0]
-    levels = vo.shape[1]
-    lats = vo.shape[2]
-    lons = vo.shape[3]
+MESH_FILE = "/esarchive/autosubmit/con_files/mesh_mask_nemo.Ec3.2_O1L75.nc"
+REGIONS_FILE = "/esarchive/autosubmit/con_files/mask.regions.Ec3.2_O1L75.nc"
+VO_FILE = "/esarchive/exp/ecearth/a16l/cmorfiles/CMIP/EC-Earth-Consortium/" \
+          "EC-Earth3-LR/historical/r1i1p1f1/Omon/vo/gn/v20180711/" \
+          "vo_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_196101-196112.nc"
 
-    block = (128, 1, 1)
-    grid_size = (lats // block[0]) + 1
-    grid3D = (grid_size, levels, times)
-    gpu_vo = cuda.to_device(vo.astype(np.float32))
-    gpu_moc = cuda.device_array((times, levels, lats), dtype=np.float32)
-    grid2D = (grid_size, times)
+logger = logging.getLogger(__name__)
 
-    vsftmyz = []
-    for basin in area:
-        gpu_area = cuda.to_device(area[basin].astype(np.float32))
-        horizontal_integral[grid3D, block](gpu_vo, gpu_area, gpu_moc)
-        compute_moc_gpu[grid2D, block](gpu_moc)
-        vsftmyz.append(gpu_moc.copy_to_host())
 
-    return vsftmyz
+def main():
+    logging.basicConfig(level=logging.INFO)
+    start = datetime.datetime.now()
+    logger.info('Starting at %s', start)
+    mesh = Nemo(MESH_FILE, REGIONS_FILE)
+    with diagonals.CONFIG.context(use_gpu=True):
+        e1v = mesh.get_i_length(cell_point='V')
+        e3v = mesh.get_k_length(cell_point='V')
+        basins = load_masks()
+        vo = load_vo()
+        moc_index = moc.compute(basins, e1v, e3v, vo)
+        del e1v, e3v, vo
+        if diagonals.CONFIG.use_gpu:
+            device = 'GPU'
+        else:
+            device = 'CPU'
+        save_cubes(moc_index, basins, device)
+    ellapsed = datetime.datetime.now() - start
+    logger.info('Total ellapsed time on the %s: %s', device, ellapsed)
 
 
 def load_vo():
@@ -83,12 +48,6 @@ def load_vo():
     return vo_data
 
 
-def load_mesh():
-    e3v = iris.load_cube(MESH_FILE, 'e3v_0').data.astype(np.float32)
-    e1v = iris.load_cube(MESH_FILE, 'e1v').data.astype(np.float32)
-    return e3v, e1v
-
-
 def load_masks():
     basins = {}
     global_mask = iris.load_cube(MESH_FILE, 'vmask')
@@ -98,67 +57,23 @@ def load_masks():
     global_mask.data[..., -1] = 0.0
     basins['Global_Ocean'] = global_mask.data.astype(np.float32)
     cubes = iris.load(REGIONS_FILE)
-    for cube in cubes:
+    for cube in cubes[0:2]:
         name = cube.name()
         if name in ('nav_lat', 'nav_lon', 'Caspian_Sea', 'Global_Ocean'):
             continue
         cube = cube.extract(iris.Constraint(z=1))
         basins[name] = cube.data.astype(np.float32)
-    return basin_name, basins
-
-
-def save_data(vsftmyz):
-    moc_cube = []
-    for basin in range(len(vsftmyz)):
-        moc_cube.append(iris.cube.Cube(vsftmyz[basin]))
-    iris.save(moc_cube, '/esarchive/scratch/sloosvel/numba_outputs/moc.nc',
-              zlib=True)
-
-
-def save_data_cpu(vsftmyz):
-    moc_cube = []
-    for basin in range(len(vsftmyz)):
-        moc_cube.append(iris.cube.Cube(vsftmyz[basin]))
-    iris.save(moc_cube, '/esarchive/scratch/sloosvel/numba_outputs/vsftmyz.nc',
-              zlib=True)
-
-
-@vectorize(['float32(float32, float32, float32)'], target='cpu')
-def compute_area_basin(e1v, e3v, basin):
-    return - e1v * e3v * basin / 1e6
-
-
-@vectorize(['float32(float32, float32)'], target='cpu')
-def multiply_vo_basin(vo, basin):
-    return vo*basin
-
-
-@guvectorize([(float32[:, :, :], float32[:, :, :])], '(t, l, j)->(t, l, j)',
-             target='cpu')
-def compute_moc(moc, out):
-    for lev in reversed(range(moc.shape[1]-1)):
-        moc[:, lev, :] = moc[:, lev, :] + moc[:, lev+1, :]
-
-
-@cuda.jit()
-def compute_moc_gpu(moc):
-    j, t = cuda.grid(2)
-    if(j >= moc.shape[2]):
-        return
-    for lev in range(moc.shape[1]-2, -1, -1):
-        moc[t, lev, j] = moc[t, lev, j] + moc[t, lev+1, j]
+    return basins
 
 
-@cuda.jit()
-def horizontal_integral(vo, area, moc):
-    j, lev, t = cuda.grid(3)
-    moc[t, lev, j] = 0.0
-    temp = 0.0
-    if(j >= vo.shape[2]):
-        return
-    for i in range(vo.shape[3]):
-        temp += vo[t, lev, j, i] * area[0, lev, j, i]
-    moc[t, lev, j] = temp
+def save_cubes(moc, basins, device):
+    cubes_moc = iris.cube.CubeList()
+    for basin in basins.keys():
+        cube_moc = iris.cube.Cube(moc[basin])
+        cube_moc.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+        cubes_moc.append(cube_moc)
+    iris.save(cubes_moc.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/moc_{0}_merged.nc'.format(device), zlib=True)
 
 
 if __name__ == '__main__':
diff --git a/diagonals/examples/examples-oceanheatcontent.py b/diagonals/examples/examples-oceanheatcontent.py
index f0f2deb836b56355d1a8b6556b848894eba2baa5..867d818cde17168e2d94f575d281bbe8e97d9156 100644
--- a/diagonals/examples/examples-oceanheatcontent.py
+++ b/diagonals/examples/examples-oceanheatcontent.py
@@ -1,17 +1,14 @@
-import os
+import logging
+import datetime
 import numpy as np
+
 import iris
 import iris.cube
 import iris.analysis
-import warnings
-import numba
-import datetime
-from numba import jit
-from numba import vectorize
-from numba import guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
+
+import diagonals
+import diagonals.ohc as ohc
+from diagonals.mesh_helpers.nemo import Nemo
 
 MESH_FILE = "/esarchive/autosubmit/con_files/mesh_mask_nemo.Ec3.2_O1L75.nc"
 DATA_FILE = "/esarchive/exp/ecearth/t05l/cmorfiles/CMIP/EC-Earth-Consortium" \
@@ -19,87 +16,41 @@ DATA_FILE = "/esarchive/exp/ecearth/t05l/cmorfiles/CMIP/EC-Earth-Consortium" \
             "/thetao_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_199001-199001.nc"
 REGIONS_FILE = "/esarchive/autosubmit/con_files/mask.regions.Ec3.2_O1L75.nc"
 
+logger = logging.getLogger(__name__)
+
 
 def main():
+    logging.basicConfig(level=logging.INFO)
     start = datetime.datetime.now()
+    logger.info('Starting at %s', start)
     one_layer = ((0, 300),)
+    mesh = Nemo(MESH_FILE, REGIONS_FILE)
     # multiple_layers = ((0, 200), (200, 700), (700, 2000),
     #                    (0, 100), (100, 200),(200, 300), (300, 400),
     #                    (400, 500), (500, 1000),)
-
-    mask, e3t, depth = load_mesh()
-    weights = check_device_and_compute_weights(one_layer, mask, e3t, depth)
-    del mask, depth, e3t
-    basin_name, basins = load_basins()
-    e1t, e2t = load_areas()
-    area = {}
-    for basin in basins:
-        area[basin] = compute_area_basin(e1t, e2t, basins[basin])
-    del e1t, e2t, basins
-    thetao = load_thetao()
-    thetao += 273.15
-    ohc, ohc_1D = compute_OHC(one_layer, weights, thetao, area)
-    del weights, thetao, area
-    if numba.cuda.is_available() is False:
-        device = 'CPU'
-    else:
-        device = 'GPU'
-    save_data(one_layer, basin_name, ohc, ohc_1D, device)
-    seconds_total = datetime.datetime.now() - start
-    print('Total ellapsed time on the {1}: {0}'.format(seconds_total, device))
-
-
-def check_device_and_compute_weights(layers, mask, e3t, depth):
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        device = 'CPU'
-        weights = compute_weights_cpu(layers, mask, e3t, depth)
-    else:
-        use_gpu = True
-        if use_gpu is True:
+    with diagonals.CONFIG.context(use_gpu=True):
+        e3t = mesh.get_k_length()
+        mask = mesh.get_landsea_mask()
+        depth = mesh.get_depth(cell_point='W')
+        weights = ohc.get_weights(one_layer, mask, e3t, depth)
+        del mask, depth, e3t
+
+        basin_name, basins = load_basins()
+        areacello = mesh.get_areacello()
+        area = ohc.get_basin_area(areacello, basins)
+        del areacello, basins
+
+        thetao = load_thetao()
+        thetao += 273.15
+        ohc_2D, ohc_1D = ohc.compute(one_layer, weights, thetao, area)
+        del weights, thetao, area
+        if diagonals.CONFIG.use_gpu:
             device = 'GPU'
-            weights = compute_weights_gpu(layers, mask, e3t, depth)
         else:
             device = 'CPU'
-            weights = compute_weights_cpu(layers, mask, e3t, depth)
-    print('Running on {}'.format(device))
-    return weights
-
-
-def compute_OHC(layers, weights, thetao, area):
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        ohc, ohc_1D = compute_ohc_cpu(layers, thetao, weights, area)
-    else:
-        ohc, ohc_1D = compute_ohc_gpu(layers, thetao, weights, area)
-    return ohc, ohc_1D
-
-
-def load_mesh():
-    mask = iris.load_cube(MESH_FILE, 'tmask').data.astype(np.float32)
-    e3t = iris.load_cube(MESH_FILE, 'e3t_0').data.astype(np.float32)
-    depth = iris.load_cube(MESH_FILE, 'gdepw_0').data.astype(np.float32)
-    return mask, e3t, depth
-
-
-def compute_weights_cpu(layers, mask, e3t, depth):
-    weights = []
-    for min_depth, max_depth in layers:
-        weights.append(calculate_weight_numba(min_depth, max_depth, e3t, depth,
-                       mask))
-    return weights
-
-
-def compute_weights_gpu(layers, mask, e3t, depth):
-    gpu_mask = cuda.to_device(mask.data.astype(np.float32))
-    gpu_e3t = cuda.to_device(e3t.data.astype(np.float32))
-    gpu_depth = cuda.to_device(depth.data.astype(np.float32))
-    weights = []
-    for min_depth, max_depth in layers:
-        weights.append(calculate_weight_numba_cuda(min_depth, max_depth,
-                       gpu_e3t, gpu_depth, gpu_mask))
-    del gpu_depth, gpu_mask, gpu_e3t
-    return weights
+        save_data(one_layer, basin_name, ohc_2D, ohc_1D, device)
+    ellapsed = datetime.datetime.now() - start
+    logger.info('Total ellapsed time on the %s: %s', device, ellapsed)
 
 
 def load_basins():
@@ -128,60 +79,11 @@ def load_thetao():
     return thetao_data
 
 
-def compute_ohc_cpu(layers, thetao, weights, area):
-    ohc = []
-    for layer in range(len(layers)):
-        ohc_layer = sum_red.reduce(multiply_thetao_weight(thetao,
-                                   weights[layer]), axis=1)
-        ohc.append(ohc_layer)
-        ohc1D_total = []
-        for basin in area:
-            ohc_basin = multiply_array(ohc_layer, area[basin])
-            temp = sum_red.reduce(ohc_basin, axis=1)
-            ohc1D = sum_red.reduce(temp, axis=1)
-            ohc1D_total.append(ohc1D)
-    return ohc, ohc1D_total
-
-
-def compute_ohc_gpu(layers, thetao, weights, area):
-    levels = thetao.shape[1]
-    times = thetao.shape[0]
-    lats = thetao.shape[2]
-    lons = thetao.shape[3]
-    basins = len(area)
-
-    area_basin = np.empty((basins, lats, lons))
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-    ohc = []
-    gpu_ohc = cuda.device_array((times, lats, lons), dtype=np.float32)
-    gpu_temp = cuda.device_array((basins, times, lats*lons), dtype=np.float32)
-    for i, basin in enumerate(area):
-        area_basin[i, :, :] = area[basin]
-    gpu_basins_area = cuda.to_device(area_basin.astype(np.float32))
-    gpu_thetao = cuda.to_device(thetao.astype(np.float32))
-
-    for layer in range(len(layers)):
-        compute_ohc[grid, block](gpu_thetao, weights[layer], gpu_ohc, levels)
-        ohc.append(gpu_ohc.copy_to_host())  # moure al final
-        multiply_ohc_basin[grid, block](gpu_ohc, gpu_basins_area, gpu_temp)
-        ohc1D_basin = []
-        for basin in range(basins):
-            ohc_1D = np.empty(times, dtype=np.float32)
-            for time in range(times):
-                ohc_1D[time] = sum_red_cuda(gpu_temp[basin, time, :])
-            ohc1D_basin.append(ohc_1D)
-    del gpu_ohc, gpu_temp, gpu_basins_area
-
-    return ohc, ohc1D_basin
-
-
-def save_data(layers, basins, ohc, ohc1D, device):
+def save_data(layers, basins, ohc_2D, ohc1D, device):
     ohc_cube = []
-    print(len(ohc1D))
+    logger.info('ohc1d length is %s', len(ohc1D))
     for i, layer in enumerate(layers):
-        ohc_cube.append(iris.cube.Cube(ohc[i],
+        ohc_cube.append(iris.cube.Cube(ohc_2D[i],
                                        long_name='Ocean heat content'
                                                  ' {0[0]} to {0[1]} meters'
                                        .format(layer)))
@@ -191,107 +93,12 @@ def save_data(layers, basins, ohc, ohc1D, device):
                           long_name='{0}'.format(basin)))
         iris.save(ohc_1D,
                   '/esarchive/scratch/sloosvel/numba_outputs'
-                  '/ohc_1D_{1}_{0[0]}_{0[1]}_numba_vec.nc'
+                  '/ohc_1D_{1}_{0[0]}_{0[1]}.nc'
                   .format(layer, device), zlib=True)
     iris.save(ohc_cube,
               '/esarchive/scratch/sloosvel/numba_outputs/ohc_{0}.nc'
               .format(device), zlib=True)
 
 
-@vectorize(['float32(int32, int32, float32, float32, float32)'], target='cuda')
-def calculate_weight_numba_cuda(min_depth, max_depth, e3t, depth, mask):
-    """
-    Calculates the weight for each cell
-    """
-    if not mask:
-        return 0
-    top = depth
-    bottom = top + e3t
-    if bottom < min_depth or top > max_depth:
-        return 0
-    else:
-        if top < min_depth:
-            top = min_depth
-        if bottom > max_depth:
-            bottom = max_depth
-
-        return (bottom - top) * 1020 * 4000
-
-
-@cuda.jit()
-def compute_ohc(thetao, weight, ohc, levels):
-    i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
-    j = cuda.blockIdx.y
-    t = cuda.blockIdx.z
-    # i, j, t = grid(3)
-    ohc[t, j, i] = 0.0
-    temp = 0.0
-    if(i >= thetao.shape[3]):
-        return
-    for lev in range(levels):
-        thetao_value = thetao[t, lev, j, i]
-        temp += thetao_value * weight[0, lev, j, i]
-    ohc[t, j, i] = temp
-
-
-@cuda.jit()
-def multiply_ohc_basin(ohc, area, temp):
-    i, j, t = cuda.grid(3)
-    basins = area.shape[0]
-    x = i+j*ohc.shape[2]
-    if(i >= ohc.shape[2]):
-        return
-    for basin in range(basins):
-        temp[basin, t, x] = 0.0
-        temp[basin, t, x] = ohc[t, j, i] * area[basin, j, i]
-
-
-@vectorize(['float32(int32, int32, float32, float32, float32)'], target='cpu')
-def calculate_weight_numba(min_depth, max_depth, e3t, depth, mask):
-    """
-    Calculates the weight for each cell
-    """
-    if not mask:
-        return 0
-    top = depth
-    bottom = top + e3t
-    if bottom < min_depth or top > max_depth:
-        return 0
-    else:
-        if top < min_depth:
-            top = min_depth
-        if bottom > max_depth:
-            bottom = max_depth
-
-        return (bottom - top) * 1020 * 4000
-
-
-@vectorize(['float32(float32, float32)'], target='cpu')
-def multiply_thetao_weight(thetao, weight):
-    return thetao*weight
-
-
-@vectorize(['float64(float32, float32)', 'float64(float64, float64)'],
-           target='cpu')
-def sum_red(x, y):
-    return x+y
-
-
-@vectorize(['float32(float32, float32)', 'float64(float64, float64)'],
-           target='cpu')
-def multiply_array(a, b):
-    return a*b
-
-
-@vectorize(['float32(float32, float32, float32)'], target='cpu')
-def compute_area_basin(e1t, e2t, basin):
-    return e1t*e2t*basin
-
-
-@cuda.reduce
-def sum_red_cuda(x, y):
-    return x+y
-
-
 if __name__ == '__main__':
     main()
diff --git a/diagonals/examples/examples-regionmean.py b/diagonals/examples/examples-regionmean.py
index 014514300aa17bbe02bb89426efa2da0d9963b23..4f18cb18616b787d84dcee60a907bac6b0644eb3 100644
--- a/diagonals/examples/examples-regionmean.py
+++ b/diagonals/examples/examples-regionmean.py
@@ -1,77 +1,58 @@
-import os
+import logging
+import datetime
 import numpy as np
+
 import iris
 import iris.cube
 import iris.analysis
-import warnings
-import numba
-import datetime
-from numba import jit, njit
-from numba import vectorize
-from numba import guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
+
+import diagonals
+import diagonals.regmean as regmean
+from diagonals.mesh_helpers.nemo import Nemo
 
 MESH_FILE = "/esarchive/autosubmit/con_files/mesh_mask_nemo.Ec3.2_O1L75.nc"
-TOS_FILE = "/esarchive/exp/ecearth/a13c/cmorfiles/CMIP/EC-Earth-Consortium"\
-            "/EC-Earth3-LR/historical/r1i1p1f1/Omon/tos/gn/v20180501"\
-            "/tos_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_195001-195012.nc"
-THETAO_FILE = "/esarchive/exp/ecearth/a13c/cmorfiles/CMIP/EC-Earth-Consortium"\
-              "/EC-Earth3-LR/historical/r1i1p1f1/Omon/thetao/gn/v20180501/"\
+TOS_FILE = "/esarchive/exp/ecearth/a16l/cmorfiles/CMIP/EC-Earth-Consortium"\
+            "/EC-Earth3-LR/historical/r1i1p1f1/Omon/tos/gn/v20180711"\
+            "/tos_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_196101-196112.nc"
+THETAO_FILE = "/esarchive/exp/ecearth/a16l/cmorfiles/CMIP/EC-Earth-Consortium"\
+              "/EC-Earth3-LR/historical/r1i1p1f1/Omon/thetao/gn/v20180711/"\
               "thetao_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_"\
-              "195001-195012.nc"
+              "196101-196112.nc"
 REGIONS_FILE = "/esarchive/autosubmit/con_files/mask.regions.Ec3.2_O1L75.nc"
 
-
-def main():
-    e1t, e2t, e3t_0 = load_mesh()
-    basins = load_masks()
-    tos = load_tos()
-    for i in range(1):
-        start_gpu = datetime.datetime.now()
-        regmean_tos_gpu = compute_regmean_2D_gpu(tos, basins, e1t, e2t)
-        end_gpu = datetime.datetime.now() - start_gpu
-        start_numpy = datetime.datetime.now()
-        regmean_tos_numpy = compute_regmean_2D_numpy(tos, basins, e1t, e2t)
-        end_numpy = datetime.datetime.now() - start_numpy
-        print('Computing time 2D var GPU: {0} s'.format(end_gpu))
-        print('Computing time 2D var numpy: {0} s'.format(end_numpy))
-
-    del tos
-    thetao = load_thetao()
-    for i in range(1):
-        start_gpu_3D = datetime.datetime.now()
-        regmean_thetao_gpu = compute_regmean_gpu_3D(thetao, basins,  e1t, e2t,
-                                                    e3t_0)
-        regmean_thetao_lev_gpu = compute_regmean_lev_gpu_3D(thetao, basins,
-                                                            e1t, e2t, e3t_0)
-        end_gpu_3D = datetime.datetime.now() - start_gpu_3D
-        start_numpy_3D = datetime.datetime.now()
-        regmean_3D = compute_regmean_3D_numpy(thetao, basins, e1t, e2t, e3t_0)
-        reg_lev = compute_regmean_3D_levels_numpy(thetao, e1t, e2t, e3t_0)
-        end_numpy_3D = datetime.datetime.now() - start_numpy_3D
-        print('Computing time 3D var GPU: {0} s'.format(end_gpu_3D))
-        print('Computing time 3D var numpy: {0} s'.format(end_numpy_3D))
+logger = logging.getLogger(__name__)
 
 
-def load_mesh():
-    e1t = iris.load_cube(MESH_FILE, 'e1t').data.astype(np.float32)
-    e1t = np.squeeze(e1t)
-    e2t = iris.load_cube(MESH_FILE, 'e2t').data.astype(np.float32)
-    e2t = np.squeeze(e2t)
-    e3t = iris.load_cube(MESH_FILE, 'e3t_0').data.astype(np.float32)
-    e3t = np.squeeze(e3t)
-    return e1t, e2t, e3t
+def main():
+    logging.basicConfig(level=logging.INFO)
+    start = datetime.datetime.now()
+    logger.info('Starting at %s', start)
+    mesh = Nemo(MESH_FILE, REGIONS_FILE)
+
+    with diagonals.CONFIG.context(use_gpu=True):
+        areacello = mesh.get_areacello()
+        tos = load_tos()
+        basins = load_masks()
+        tosmean = regmean.compute_regmean_2D(tos, basins, areacello)
+        volcello = mesh.get_volcello()
+        thetao = load_thetao()
+        thetaomean = regmean.compute_regmean_levels(thetao, basins, volcello)
+        thetaomean3D = regmean.compute_regmean_3D(thetao, basins, volcello)
+        device = 'CPU'
+        save_cubes(tosmean, thetaomean, thetaomean3D, basins, device)
+    ellapsed = datetime.datetime.now() - start
+    logger.info('Total ellapsed time on the %s: %s', device, ellapsed)
 
 
 def load_masks():
     basins = {}
+    basin_name = []
     cubes = iris.load(REGIONS_FILE)
     for cube in cubes:
         name = cube.name()
         if name in ('nav_lat', 'nav_lon', 'Caspian_Sea'):
             continue
+        basin_name.append(name)
         cube = cube.extract(iris.Constraint(z=1))
         basins[name] = cube.data.astype(np.float32)
     return basins
@@ -84,252 +65,34 @@ def load_tos():
 
 
 def load_thetao():
-    thetao = iris.load_cube(THETAO_FILE, 'sea_water_potential_temperature')
+    thetao = iris.load_cube(THETAO_FILE, 'sea_water_conservative_temperature')
     thetao_data = thetao.data.astype(np.float32)
     return thetao_data
 
 
-def compute_regmean_2D_numpy(var, basins, e1t, e2t):
-    times = var.shape[0]
-    regmean = {}
-    for basin, mask in basins.items():
-        weights = compute_weights_2D(mask, e1t, e2t)
-        regmean[basin] = {}
-        for t in range(times):
-            regmean[basin][t] = np.average(var[t, :, :], axis=(0, 1),
-                                           weights=np.squeeze(weights))
-    return regmean
-
-
-def compute_regmean_3D_levels(var, basins, e1t, e2t, e3t):
-    times = var.shape[0]
-    levs = var.shape[1]
-    regmean_total = {}
-    for basin, mask in basins.items():
-        regmean = np.empty((times, levs))
-        w = compute_weights_3D(mask, e1t, e2t, e3t)
-        for t in range(times):
-            for l in range(levs):
-                regmean[t, l] = np.average(var[t, l, :, :], axis=(0, 1),
-                                           weights=np.squeeze(w)[l, :, :])
-        regmean_total[basin] = regmean
-    return regmean_total
-
-
-def compute_regmean_3D_cpu(var, basins, e1t, e2t, e3t):
-    times = var.shape[0]
-    regmean_total = {}
-    for basin, mask in basins.items():
-        weights = compute_weights_3D(mask, e1t, e2t, e3t)
-        regmean = np.empty(times)
-        for t in range(times):
-            regmean[t] = np.average(var[t, :, :, :], axis=(0, 1, 2),
-                                    weights=np.squeeze(weights))
-        regmean_total[basin] = regmean
-    return regmean_total
-
-
-def compute_regmean_2D_gpu(var, basins, e1t, e2t):
-    times = var.shape[0]
-    lats = var.shape[1]
-    lons = var.shape[2]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-    gpu_weighted_var = cuda.device_array((mask, times, lats*lons),
-                                         dtype=np.float32)
-    gpu_vector_weight = cuda.device_array((mask, lats*lons), dtype=np.float32)
-
-    variable_weight_cuda[grid, block](gpu_var, gpu_e1t, gpu_e2t, gpu_masks,
-                                      gpu_weighted_var, gpu_vector_weight)
-
-    regmean = {}
-    for m in range(mask):
-        reduced_weight = sum_red_cuda(gpu_vector_weight[m, :])
-        regmean[m] = {}
-        for t in range(times):
-            reduced_var = sum_red_cuda(gpu_weighted_var[m, t, :])
-            regmean[m][t] = reduced_var / reduced_weight
-
-    return regmean
-
-
-def compute_regmean_gpu_3D(var, basins, e1t, e2t, e3t):
-    times = var.shape[0]
-    levs = var.shape[1]
-    lats = var.shape[2]
-    lons = var.shape[3]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_e3t = cuda.to_device(e3t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-
-    gpu_weighted_var = cuda.device_array((mask, times, levs*lats*lons),
-                                         dtype=np.float32)
-    gpu_vector_weight = cuda.device_array((mask, levs*lats*lons),
-                                          dtype=np.float32)
-
-    variable_weight_cuda_3D[grid, block](gpu_var, gpu_e1t, gpu_e2t, gpu_e3t,
-                                         gpu_masks, gpu_weighted_var,
-                                         gpu_vector_weight)
-
-    regmean = {}
-    for m in range(mask):
-        reduced_weight = sum_red_cuda(gpu_vector_weight[m, :])
-        regmean[m] = {}
-        for t in range(times):
-            reduced_var = sum_red_cuda(gpu_weighted_var[m, t, :])
-            regmean[m][t] = reduced_var / reduced_weight
-
-    return regmean
-
-
-def compute_regmean_lev_gpu_3D(var, basins, e1t, e2t, e3t):
-    times = var.shape[0]
-    levs = var.shape[1]
-    lats = var.shape[2]
-    lons = var.shape[3]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_e3t = cuda.to_device(e3t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-
-    gpu_weighted_var = cuda.device_array((mask, times, levs, lats*lons),
-                                         dtype=np.float32)
-    gpu_vector_weight = cuda.device_array((mask, levs, lats*lons),
-                                          dtype=np.float32)
-
-    variable_weight_cuda_level_3D[grid, block](gpu_var, gpu_weights,
-                                               gpu_weighted_var,
-                                               gpu_vector_weight)
-    del gpu_var, gpu_weights
-
-    regmean = {}
-    for m in range(mask):
-        regmean[m] = {}
-        for t in range(times):
-            regmean[m][t] = {}
-            for l in range(levs):
-                reduced_weight = sum_red_cuda(gpu_vector_weight[mask, l, :])
-                reduced_weighted_var = sum_red_cuda(gpu_weighted_var[mask, t,
-                                                                     l, :])
-                regmean[m][t][l] = reduced_weighted_var / reduced_weight
-    del gpu_weighted_var, gpu_vector_weight
-
-    return regmean
-
-
-def basin_weights_2D(basins, e1t, e2t):
-    weights = {}
-    for basin, mask in basins.items():
-        weights[basin] = compute_weights_2D(mask, e1t, e2t)
-    return weights
-
-
-@vectorize(['float32(float32, float32, float32)'], target='cpu')
-def compute_weights_2D(mask, e1t, e2t):
-    weights = mask*e1t*e2t
-    return weights
-
-
-@vectorize(['float32(float32, float32, float32, float32)'], target='cpu')
-def compute_weights_3D(mask, e1t, e2t, e3t):
-    weights = mask*e1t*e2t*e3t
-    return weights
-
-
-@cuda.jit
-def variable_weight_cuda(var, e1t, e2t, masks, weighted_var, vector_weight):
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    x = i+j*var.shape[2]
-    if(i >= var.shape[2]):
-        return
-    for bas in range(basins):
-        vector_weight[bas, x] = 0.0
-        weighted_var[bas, t, x] = 0.0
-        vector_weight[bas, x] = e1t[j, i]*e2t[j, i]*masks[bas, j, i]
-        weighted_var[bas, t, x] = var[t, j, i] * vector_weight[bas, x]
-
-
-@cuda.jit
-def variable_weight_cuda_3D(var, e1t, e2t, e3t, masks, weighted_var,
-                            vector_weight):
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    levels = var.shape[1]
-    if(i >= var.shape[3]):
-        return
-    for bas in range(basins):
-        for lev in range(levels):
-            x = i+var.shape[3]*(j+var.shape[2]*lev)
-            vector_weight[bas, x] = 0.0
-            vector_weight[bas, x] = (e1t[j, i] * e2t[j, i] *
-                                     e3t[lev, j, i] * masks[bas, j, i])
-            weighted_var[bas, t, x] = 0.0
-            weighted_var[bas, t, x] = var[t, lev, j, i] * vector_weight[bas, x]
-
-
-@cuda.jit
-def variable_weight_cuda_level_3D(var, e1t, e2t, e3t, masks, weighted_var,
-                                  vector_weight):
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    levels = var.shape[1]
-    x = i+j*var.shape[3]
-    if(i >= var.shape[3]):
-        return
-    for bas in range(basins):
-        for lev in levels:
-            vector_weight[bas, lev, x] = 0.0
-            vector_weight[bas, lev, x] = (e1t[j, i] * e2t[j, i] *
-                                          e3t[lev, j, i] * masks[bas, j, i])
-            weighted_var[bas, t, lev, x] = (var[t, lev, j, i] *
-                                            vector_weight[bas, lev, x])
-
-
-@vectorize(['float32(float64, float64)'], target='cuda')
-def regmean_cuda(weighted_var, weights):
-    regmean = weighted_var/weights
-    return regmean
-
-
-@cuda.reduce
-def sum_red_cuda(x, y):
-    return x+y
+def save_cubes(tosmean, thetaomean, thetaomean3D, basins, device):
+    cubes_tosmean = iris.cube.CubeList()
+    cubes_thetaomean = iris.cube.CubeList()
+    cubes_thetaomean3D = iris.cube.CubeList()
+    for basin in basins.keys():
+        cube_tosmean = iris.cube.Cube(tosmean[basin])
+        cube_thetaomean = iris.cube.Cube(thetaomean[basin])
+        cube_thetaomean3D = iris.cube.Cube(thetaomean3D[basin])
+
+        cube_tosmean.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+        cube_thetaomean.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+        cube_thetaomean3D.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+
+        cubes_tosmean.append(cube_tosmean)
+        cubes_thetaomean.append(cube_thetaomean)
+        cubes_thetaomean3D.append(cube_thetaomean3D)
+
+    iris.save(cubes_tosmean.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/tosmean_{0}.nc'.format(device), zlib=True)
+    iris.save(cubes_thetaomean.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/thetaomean_{0}.nc'.format(device), zlib=True)
+    iris.save(cubes_thetaomean3D.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/thetaomean3D_{0}.nc'.format(device), zlib=True)
 
 
 if __name__ == '__main__':
diff --git a/diagonals/examples/examples-regsum.py b/diagonals/examples/examples-regsum.py
index 9d9da18f158b63915e1ec83c10358503fe2c5c8c..6a49975ac4ae14be0cd3446339368a7d5b33da29 100644
--- a/diagonals/examples/examples-regsum.py
+++ b/diagonals/examples/examples-regsum.py
@@ -1,59 +1,48 @@
-import os
+import logging
+import datetime
 import numpy as np
+
 import iris
 import iris.cube
 import iris.analysis
-import warnings
-import numba
-import datetime
-from numba import jit, njit
-from numba import vectorize
-from numba import guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
+
+import diagonals
+import diagonals.regsum as regsum
+from diagonals.mesh_helpers.nemo import Nemo
 
 MESH_FILE = "/esarchive/autosubmit/con_files/mesh_mask_nemo.Ec3.2_O1L75.nc"
-TOS_FILE = "/esarchive/exp/ecearth/a13c/cmorfiles/CMIP/EC-Earth-Consortium"\
-            "/EC-Earth3-LR/historical/r1i1p1f1/Omon/tos/gn/v20180501"\
-            "/tos_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_195001-195012.nc"
-THETAO_FILE = "/esarchive/exp/ecearth/a13c/cmorfiles/CMIP/EC-Earth-Consortium"\
-              "/EC-Earth3-LR/historical/r1i1p1f1/Omon/thetao/gn/v20180501/"\
+TOS_FILE = "/esarchive/exp/ecearth/a16l/cmorfiles/CMIP/EC-Earth-Consortium"\
+            "/EC-Earth3-LR/historical/r1i1p1f1/Omon/tos/gn/v20180711"\
+            "/tos_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_196101-196112.nc"
+THETAO_FILE = "/esarchive/exp/ecearth/a16l/cmorfiles/CMIP/EC-Earth-Consortium"\
+              "/EC-Earth3-LR/historical/r1i1p1f1/Omon/thetao/gn/v20180711/"\
               "thetao_Omon_EC-Earth3-LR_historical_r1i1p1f1_gn_"\
-              "195001-195012.nc"
+              "196101-196112.nc"
 REGIONS_FILE = "/esarchive/autosubmit/con_files/mask.regions.Ec3.2_O1L75.nc"
 
+logger = logging.getLogger(__name__)
+
 
 def main():
-    e1t, e2t, e3t_0, tmask = load_mesh()
-    basins = load_masks()
-    tos = load_tos()
-    thetao = load_thetao()
-    for i in range(1):
-        start = datetime.datetime.now()
-        # sum_cpu = compute_regsum_2D_cpu(tos, basins, e1t, e2t)
-        print('cpu {0}'.format(datetime.datetime.now()-start))
-        start = datetime.datetime.now()
-        # sum_gpu = compute_regsum_2D_gpu(tos, basins, e1t, e2t)
-        print('gpu {0}'.format(datetime.datetime.now()-start))
-        sum3D_cpu, sum_cpu = compute_regsum_3D_cpu(thetao, basins, e1t, e2t,
-                                                   e3t_0, tmask, True)
-        a = 0
-        # sum3D_gpu = compute_regsum_3D_gpu(thetao, basins, e1t, e2t, e3t_0)
-        # print(sum3D_cpu['Chukchi_Sea'][:])
-        # print(sum3D_gpu[0])
-
-
-def load_mesh():
-    e1t = iris.load_cube(MESH_FILE, 'e1t').data.astype(np.float32)
-    e1t = np.squeeze(e1t)
-    e2t = iris.load_cube(MESH_FILE, 'e2t').data.astype(np.float32)
-    e2t = np.squeeze(e2t)
-    e3t = iris.load_cube(MESH_FILE, 'e3t_0').data.astype(np.float32)
-    e3t = np.squeeze(e3t)
-    tmask = iris.load_cube(MESH_FILE, 'tmask').data.astype(np.int8)
-    tmask = np.squeeze(tmask)
-    return e1t, e2t, e3t, tmask
+    logging.basicConfig(level=logging.INFO)
+    start = datetime.datetime.now()
+    logger.info('Starting at %s', start)
+    mesh = Nemo(MESH_FILE, REGIONS_FILE)
+    with diagonals.CONFIG.context(use_gpu=True):
+        areacello = mesh.get_areacello()
+        tos = load_tos()
+        basins = load_masks()
+        tossum = regsum.compute_regsum_2D(tos, basins, areacello)
+        volcello = mesh.get_volcello()
+        tmask = mesh.get_landsea_mask()
+        thetao = load_thetao()
+        thetaosum = regsum.compute_regsum_levels(thetao, basins,
+                                                 volcello, tmask)
+        thetaosum3D = regsum.compute_regsum_3D(thetao, basins, volcello, tmask)
+        device = 'CPU'
+        save_cubes(tossum, thetaosum, thetaosum3D, basins, device)
+    ellapsed = datetime.datetime.now() - start
+    logger.info('Total ellapsed time on the %s: %s', device, ellapsed)
 
 
 def load_masks():
@@ -63,8 +52,6 @@ def load_masks():
         name = cube.name()
         if name in ('nav_lat', 'nav_lon', 'Caspian_Sea'):
             continue
-        if name not in ('Global_Ocean'):
-            continue
         cube = cube.extract(iris.Constraint(z=1))
         basins[name] = cube.data.astype(np.float32)
     return basins
@@ -78,142 +65,33 @@ def load_tos():
 
 def load_thetao():
     thetao = iris.load_cube(THETAO_FILE, 'sea_water_potential_temperature')
-    thetao_data = np.ma.filled(thetao.data, 0.0).astype(np.float32)
+    thetao_data = thetao.data.astype(np.float32)
     return thetao_data
 
 
-def compute_regsum_2D_cpu(var, basins, e1t, e2t):
-    regsum = {}
-    for basin, mask in basins.items():
-        weighted_var = compute_weighted_var_2D(var, mask, e1t, e2t)
-        regsum[basin] = np.sum(weighted_var, axis=(1, 2))
-    return regsum
-
-
-def compute_regsum_3D_cpu(var, basins, e1t, e2t, e3t, tmask, save3d):
-    regsum = {}
-    regsum_3D = {}
-    for basin, mask in basins.items():
-        weighted_var = compute_weighted_var_3D(var, mask, e1t, e2t, e3t, tmask)
-        regsum[basin] = np.sum(weighted_var, axis=(2, 3))
-        if save3d:
-            regsum_3D[basin] = np.sum(regsum[basin], axis=(1,))
-        else:
-            regsum_3D = None
-    return regsum, regsum_3D
-
-
-def compute_regsum_2D_gpu(var, basins, e1t, e2t):
-    times = var.shape[0]
-    lats = var.shape[1]
-    lons = var.shape[2]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-    gpu_weighted_var = cuda.device_array((mask, times, lats*lons),
-                                         dtype=np.float32)
-
-    variable_weight_cuda[grid, block](gpu_var, gpu_e1t, gpu_e2t, gpu_masks,
-                                      gpu_weighted_var)
-
-    regsum = {}
-    for m in range(mask):
-        regsum[m] = {}
-        for t in range(times):
-            regsum[m][t] = sum_red_cuda(gpu_weighted_var[m, t, :])
-
-    return regsum
-
-
-def compute_regsum_3D_gpu(var, basins, e1t, e2t, e3t):
-    times = var.shape[0]
-    levs = var.shape[1]
-    lats = var.shape[2]
-    lons = var.shape[3]
-    mask = len(basins)
-
-    masks = np.empty((mask, lats, lons))
-    for i, basin in enumerate(basins):
-        masks[i, :, :] = basins[basin]
-
-    block = (128, 1, 1)
-    grid_size = (lons // block[0]) + 1
-    grid = (grid_size, lats, times)
-
-    gpu_var = cuda.to_device(var.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_e3t = cuda.to_device(e3t.astype(np.float32))
-    gpu_masks = cuda.to_device(masks)
-    gpu_weighted_var = cuda.device_array((mask, times, levs*lats*lons),
-                                         dtype=np.float32)
-
-    variable_weight_cuda_3D[grid, block](gpu_var, gpu_e1t, gpu_e2t, gpu_e3t,
-                                         gpu_masks, gpu_weighted_var)
-
-    regsum = {}
-    for m in range(mask):
-        regsum[m] = {}
-        for t in range(times):
-            regsum[m][t] = sum_red_cuda(gpu_weighted_var[m, t, :])
-
-    return regsum
-
-@vectorize(['float64(float32, float32, float32, float32, float32, int8)'],
-           target='cpu')
-def compute_weighted_var_3D(var, mask, e1t, e2t, e3t, tmask):
-    weighted_var = var*mask*e1t*e2t*e3t*tmask
-    return weighted_var
-
-
-@vectorize(['float32(float32, float32, float32, float32)'], target='cpu')
-def compute_weighted_var_2D(var, mask, e1t, e2t):
-    weighted_var = var*mask*e1t*e2t
-    return weighted_var
-
-
-@cuda.jit
-def variable_weight_cuda(var, e1t, e2t, masks, weighted_var):
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    x = i+j*var.shape[2]
-    if(i >= var.shape[2]):
-        return
-    for bas in range(basins):
-        weighted_var[bas, t, x] = 0.0
-        weighted_var[bas, t, x] = (var[t, j, i] * e1t[j, i] * e2t[j, i] *
-                                   masks[bas, j, i])
-
-
-@cuda.jit
-def variable_weight_cuda_3D(var, e1t, e2t, e3t, masks, weighted_var):
-    i, j, t = cuda.grid(3)
-    basins = masks.shape[0]
-    levels = var.shape[1]
-    if(i >= var.shape[3]):
-        return
-    for bas in range(basins):
-        for lev in range(levels):
-            x = i+var.shape[3]*(j+var.shape[2]*lev)
-            weighted_var[bas, t, x] = 0.0
-            weighted_var[bas, t, x] = (var[t, j, i] * e1t[j, i] * e2t[j, i] *
-                                       e3t[j, i] * masks[bas, j, i])
-
-
-@cuda.reduce
-def sum_red_cuda(x, y):
-    return x+y
+def save_cubes(tossum, thetaosum, thetaosum3D, basins, device):
+    cubes_tossum = iris.cube.CubeList()
+    cubes_thetaosum = iris.cube.CubeList()
+    cubes_thetaosum3D = iris.cube.CubeList()
+    for basin in basins.items():
+        cube_tossum = iris.cube.Cube(tossum[basin])
+        cube_thetaosum = iris.cube.Cube(thetaosum[basin])
+        cube_thetaosum3D = iris.cube.Cube(thetaosum3D[basin])
+
+        cube_tossum.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+        cube_thetaosum.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+        cube_thetaosum3D.add_aux_coord(iris.coords.AuxCoord(basin, 'region'))
+
+        cubes_tossum.append(cube_tossum)
+        cubes_thetaosum.append(cube_thetaosum)
+        cubes_thetaosum3D.append(cube_thetaosum)
+
+    iris.save(cubes_tossum.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/tossum_{0}.nc'.format(device), zlib=True)
+    iris.save(cubes_thetaosum.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/thetaosum_{0}.nc'.format(device), zlib=True)
+    iris.save(cubes_thetaosum3D.merge_cube(), '/esarchive/scratch/sloosvel/'
+              'numba_outputs/thetaosum3D_{0}.nc'.format(device), zlib=True)
 
 
 if __name__ == '__main__':
diff --git a/diagonals/examples/examples-siarea.py b/diagonals/examples/examples-siarea.py
index f0bbd05e18c5908ff94e7677f334541a153fc729..389c68c97e034a3e8fa33439ce4e4a6e1113a2dd 100644
--- a/diagonals/examples/examples-siarea.py
+++ b/diagonals/examples/examples-siarea.py
@@ -1,9 +1,6 @@
-import os
-import numpy as np
-import glob
-import time
-import operator
+import logging
 import datetime
+import numpy as np
 
 import iris
 import iris.cube
@@ -11,17 +8,11 @@ import iris.analysis
 import iris.coords
 import iris.coord_categorisation
 import iris.analysis
-from iris.experimental.equalise_cubes import equalise_attributes
-from iris.time import PartialDateTime
 
-import warnings
-import numba
-from numba import jit
-from numba import vectorize
-from numba import guvectorize
-from numba import cuda
-from numba import int32, int64, float32, float64
-from numba.cuda.cudadrv import driver
+import diagonals
+import diagonals.siasie as siasie
+from diagonals.mesh_helpers.nemo import Nemo
+
 
 MESH_FILE = "/esarchive/autosubmit/con_files/mesh_mask_nemo.Ec3.2_O25L75.nc"
 REGIONS_FILE = "/esarchive/autosubmit/con_files/mask.regions.Ec3.2_O25L75.nc"
@@ -29,74 +20,31 @@ SIC_FILE = "/esarchive/exp/ecearth/a0qp/cmorfiles/CMIP/EC-Earth-Consortium"\
            "/EC-Earth3-HR/historical/r1i1p1f1/SImon/siconc/gn/v20180406"\
            "/siconc_SImon_EC-Earth3-HR_historical_r1i1p1f1_gn_195001-195012.nc"
 
+logger = logging.getLogger(__name__)
+
 
 def main():
+    logging.basicConfig(level=logging.INFO)
     start = datetime.datetime.now()
-    cube_gphit = load_cube(MESH_FILE, 'gphit')
-    cube_e1t = load_cube(MESH_FILE, 'e1t')
-    cube_e2t = load_cube(MESH_FILE, 'e2t')
-    cube_sic = load_cube(SIC_FILE, 'sea_ice_area_fraction')
-    gphit, e1t, e2t, sic_slices = load_data(cube_gphit, cube_e1t, cube_e2t,
-                                            cube_sic)
-    masks = load_masks(REGIONS_FILE)
-    extn, exts, arean, areas, device = check_device_and_compute(gphit, e1t,
-                                                                e2t,
-                                                                sic_slices,
-                                                                masks)
-    save_cubes(extn, exts, arean, areas, masks, device)
-    seconds_total = datetime.datetime.now() - start
-    print('Total ellapsed time on the {1}: {0}'.format(seconds_total, device))
-
-
-def check_device_and_compute(gphit, e1t, e2t, sic_slices, basins):
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        device = 'CPU'
-        extn, exts, arean, areas = compute_sic_cpu(gphit, e1t, e2t, sic_slices,
-                                                   basins)
-    else:
-        use_gpu = True
-        if use_gpu is True:
+    logger.info('Starting at %s', start)
+    mesh = Nemo(MESH_FILE, REGIONS_FILE)
+    with diagonals.CONFIG.context(use_gpu=True):
+        basins = load_masks(REGIONS_FILE)
+        areacello = mesh.get_areacello()
+        gphit = mesh.get_grid_latitude()
+        cube_sic = load_cube(SIC_FILE, 'sea_ice_area_fraction')
+        sic_slices = load_data(cube_sic)
+        masks = load_masks(REGIONS_FILE)
+        extn, exts, arean, areas = siasie.compute(gphit, areacello,
+                                                  sic_slices, masks)
+        del areacello, gphit, cube_sic, sic_slices
+        if diagonals.CONFIG.use_gpu:
             device = 'GPU'
-            extn, exts, arean, areas = compute_sic_gpu(gphit, e1t, e2t,
-                                                       sic_slices, basins)
         else:
             device = 'CPU'
-            extn, exts, arean, areas = compute_sic_cpu(gphit, e1t, e2t,
-                                                       sic_slices, basins)
-
-    return extn, exts, arean, areas, device
-
-
-def compute_sic_cpu(gphit, e1t, e2t, sic_slices, basins):
-    extn = {}
-    exts = {}
-    arean = {}
-    areas = {}
-
-    for sic_data in sic_slices:
-        time = sic_data.coord('time').points[0]
-        extn[time] = {}
-        exts[time] = {}
-        arean[time] = {}
-        areas[time] = {}
-        sic = sic_data.data
-        for basin, tmask in basins.items():
-            temp = extn_numba(gphit, e1t, e2t, tmask, sic)
-            extn[time][basin] = np.sum(temp, axis=(1, 2))
-
-            temp = exts_numba(gphit, e1t, e2t, tmask, sic)
-            exts[time][basin] = np.sum(temp, axis=(1, 2))
-
-            temp = arean_numba(gphit, e1t, e2t, tmask, sic)
-            arean[time][basin] = np.sum(temp, axis=(1, 2))
-
-            temp = areas_numba(gphit, e1t, e2t, tmask, sic)
-            areas[time][basin] = np.sum(temp, axis=(1, 2))
-
-    del gphit, e1t, e2t, sic_slices, temp
-
-    return extn, exts, arean, areas
+        save_cubes(extn, exts, arean, areas, masks, device)
+    ellapsed = datetime.datetime.now() - start
+    logger.info('Total ellapsed time on the %s: %s', device, ellapsed)
 
 
 def save_cubes(extn, exts, arean, areas, basins, device):
@@ -105,7 +53,7 @@ def save_cubes(extn, exts, arean, areas, basins, device):
     cubes_arean = iris.cube.CubeList()
     cubes_areas = iris.cube.CubeList()
     for time in extn.keys():
-        for basin in basins.items():
+        for basin in basins.keys():
             cube_extn = iris.cube.Cube(extn[time][basin] / 1e12)
             cube_exts = iris.cube.Cube(exts[time][basin] / 1e12)
             cube_arean = iris.cube.Cube(arean[time][basin] / 1e12)
@@ -136,64 +84,12 @@ def save_cubes(extn, exts, arean, areas, basins, device):
               'numba_outputs/areas_{0}.nc'.format(device), zlib=True)
 
 
-def compute_sic_gpu(gphit, e1t, e2t, sic_slices, basins):
-    extn = {}
-    exts = {}
-    arean = {}
-    areas = {}
-
-    gpu_gphit = cuda.to_device(gphit.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_temp = cuda.device_array(e1t.shape[1]*e1t.shape[2], dtype=np.float32)
-
-    block = (128, 1)
-    grid_size = ((e1t.shape[2]) // block[0]) + 1
-    grid = (grid_size, e1t.shape[1])
-
-    for sic_data in sic_slices:
-        time = sic_data.coord('time').points[0]
-        extn[time] = {}
-        exts[time] = {}
-        arean[time] = {}
-        areas[time] = {}
-        sic = sic_data.data.astype(np.float32)
-        gpu_sic = cuda.to_device(sic.astype(np.float32))
-        for basin, tmask in basins.items():
-            gpu_tmask = cuda.to_device(tmask.astype(np.float32))
-
-            extn_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                         gpu_tmask, gpu_sic, gpu_temp)
-            extn[time][basin] = sum_red_cuda(gpu_temp)
-
-            exts_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                         gpu_tmask, gpu_sic, gpu_temp)
-            exts[time][basin] = sum_red_cuda(gpu_temp)
-
-            arean_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                          gpu_tmask, gpu_sic, gpu_temp)
-            arean[time][basin] = sum_red_cuda(gpu_temp)
-
-            areas_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                          gpu_tmask, gpu_sic, gpu_temp)
-            areas[time][basin] = sum_red_cuda(gpu_temp)
-
-    del gphit, e1t, e2t, sic_slices
-    del gpu_gphit, gpu_e1t, gpu_e2t, gpu_tmask, gpu_sic, gpu_temp
-
-    return extn, exts, arean, areas
-
-
-def load_data(cube_gphit, cube_e1t, cube_e2t, cube_sic):
-    gphit = cube_gphit.data.astype(np.float32)
-    e1t = cube_e1t.data.astype(np.float32)
-    e2t = cube_e2t.data.astype(np.float32)
+def load_data(cube_sic):
     sic_slices = []
     for sic_data in cube_sic.slices_over('time'):
         sic_data.data = np.ma.filled(sic_data.data, 0.0).astype(np.float32)
         sic_slices.append(sic_data)
-
-    return gphit, e1t, e2t, sic_slices
+    return sic_slices
 
 
 def load_cube(filepath, variable):
@@ -219,94 +115,5 @@ def load_masks(filepath):
     return basins
 
 
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
-           target='cpu')
-def arean_numba(gphit, e1t, e2t, tmask, sic):
-    if gphit > 0:
-        temp = e1t * e2t * tmask * sic / 100
-    return temp
-
-
-@cuda.jit()
-def arean_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
-    i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
-        return
-    temp[x] = 0.0
-    if(gphit[0, j, i] > 0):
-        temp[x] = (e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i] * sic[j, i]
-                   / 100)
-
-
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
-           target='cpu')
-def extn_numba(gphit, e1t, e2t, tmask, sic):
-    if(gphit > 0 and sic > 15):
-        temp = e1t * e2t * tmask
-    return temp
-
-
-@cuda.jit()
-def extn_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
-    i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
-        return
-    temp[x] = 0.0
-    if(gphit[0, j, i] > 0 and sic[j, i] > 15):
-        temp[x] = e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i]
-
-
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
-           target='cpu')
-def areas_numba(gphit, e1t, e2t, tmask, sic):
-    if gphit < 0:
-        temp = e1t * e2t * tmask * sic / 100
-    return temp
-
-
-@cuda.jit()
-def areas_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
-    i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
-        return
-    temp[x] = 0.0
-    if(gphit[0, j, i] < 0):
-        temp[x] = (e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i] * sic[j, i]
-                   / 100)
-
-
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
-           target='cpu')
-def exts_numba(gphit, e1t, e2t, tmask, sic):
-    if(gphit < 0 and sic > 15):
-        temp = e1t * e2t * tmask
-    return temp
-
-
-@cuda.jit()
-def exts_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
-    i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
-        return
-    temp[x] = 0.0
-    if(gphit[0, j, i] < 0 and sic[j, i] > 15):
-        temp[x] = e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i]
-
-
-@vectorize(['float64(float32, float32)', 'float64(float64, float64)'],
-           target='cpu')
-def sum_red(x, y):
-    return x+y
-
-
-@cuda.reduce
-def sum_red_cuda(x, y):
-    return x+y
-
-
 if __name__ == '__main__':
     main()
diff --git a/build/lib/diagonals/__init__.py b/diagonals/mesh_helpers/__init__.py
similarity index 100%
rename from build/lib/diagonals/__init__.py
rename to diagonals/mesh_helpers/__init__.py
diff --git a/diagonals/mesh_helpers/nemo.py b/diagonals/mesh_helpers/nemo.py
new file mode 100644
index 0000000000000000000000000000000000000000..63fd4b766bb9597d523bf45d2da8beaaf7c41795
--- /dev/null
+++ b/diagonals/mesh_helpers/nemo.py
@@ -0,0 +1,98 @@
+import numpy as np
+import iris
+
+from numba import vectorize
+
+
+class Nemo():
+
+    def __init__(self, mesh_file, regions_file, default_cell_point='T'):
+        self.mesh_file = mesh_file
+        self.regions_file = regions_file
+        self.default_cell_point = default_cell_point
+
+    def get_areacello(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        e1 = self.get_i_length(cell_point, dtype)
+        e2 = self.get_j_length(cell_point, dtype)
+        return _get_area(e1, e2)
+
+    def get_volcello(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        e1 = self.get_i_length(cell_point, dtype)
+        e2 = self.get_j_length(cell_point, dtype)
+        e3 = self.get_k_length(cell_point, dtype)
+        return _get_volume(e1, e2, e3)
+
+    def get_i_length(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        return self.get_mesh_var('e1' + cell_point.lower(), dtype)
+
+    def get_j_length(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        return self.get_mesh_var('e2' + cell_point.lower(), dtype)
+
+    def get_k_length(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        return self.get_mesh_var('e3' + cell_point.lower() + '_0', dtype)
+
+    def get_depth(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        return self.get_mesh_var('gdep' + cell_point.lower() + '_0', dtype)
+
+    def get_landsea_mask(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        return self.get_mesh_var(cell_point.lower() + 'mask', dtype)
+
+    def get_grid_latitude(self, cell_point=None, dtype=np.float32):
+        if cell_point is None:
+            cell_point = self.default_cell_point
+        return self.get_mesh_var('gphi' + cell_point.lower(), dtype)
+
+    def get_mesh_var(self, var, dtype):
+        return iris.load_cube(self.mesh_file, var).data.astype(dtype)
+
+
+@vectorize(['float32(float32, float32)'], target='cpu')
+def _get_area(e1, e2):
+    """Vectorized numba function executed on the cpu that computes the area
+    for each basin:
+
+    Parameters
+    ----------
+    e1 : float32
+        Masked array containing variable e1.
+    e2 : float32
+        Masked array containing variable e2.
+    Returns
+    -------
+    e1*e2 : float32
+        Masked array containing the area for the whole grid.
+    """
+    return e1*e2
+
+
+@vectorize(['float32(float32, float32, float32)'], target='cpu')
+def _get_volume(e1, e2, e3):
+    """Vectorized numba function executed on the cpu that computes the area
+    for each basin:
+
+    Parameters
+    ----------
+    e1t : float32
+        Masked array containing variable e1t.
+    e2t : float32
+        Masked array containing variable e2t.
+    Returns
+    -------
+    e1*e2*e3 : float32
+        Masked array containing the volume for the whole grid.
+    """
+    return e1 * e2 * e3
diff --git a/diagonals/moc.py b/diagonals/moc.py
new file mode 100644
index 0000000000000000000000000000000000000000..926ca98f638446dd536214173740c0947125f26c
--- /dev/null
+++ b/diagonals/moc.py
@@ -0,0 +1,246 @@
+import os
+
+import numpy as np
+
+import iris
+import iris.cube
+import iris.analysis
+
+import warnings
+import datetime
+
+import numba
+from numba import vectorize
+from numba import guvectorize
+from numba import cuda
+from numba import int32, int64, float32, float64
+from numba.cuda.cudadrv import driver
+
+import diagonals
+
+__all__ = ['compute']
+
+
+def compute(basins, e1v, e3v, vo):
+    """Function that checks device and calls computing functions.
+
+    Checks if the computations are going performed in the CPU or the GPU:
+
+    Parameters
+    ----------
+    basins : list
+        List of masked arrays containing the mask for each basin.
+    e1v: float32
+        Masked array containing variable e1v
+    e3v: float32
+        Masked array containing variable e3v
+    vo : float32
+        Masked array containing Sea Water Y Velocity data.
+
+    Returns
+    -------
+    moc: list
+        List of masked arrays containing the moc index for each basin.
+    """
+    area = {}
+    for basin in basins:
+        area[basin] = _compute_area(e1v, e3v, basins[basin])
+
+    del e1v, e3v
+
+    if diagonals.CONFIG.use_gpu:
+        moc = _compute_moc_gpu(
+            vo, area
+            )
+    else:
+        moc = _compute_moc_cpu(
+            vo, area
+        )
+    return moc
+
+
+def _compute_moc_cpu(vo, area):
+    """Function that calls computing functions in the CPU.
+
+    Computes moc index for each basin and appends them to a list:
+
+    Parameters
+    ----------
+    area: list
+        List containing the areas for every basin.
+
+    vo : float32
+        Masked array containing Sea Water Y Velocity data.
+
+    Returns
+    -------
+    moc: list
+        List of masked arrays containing the moc index for each basin.
+    """
+    moc = {}
+    for basin, mask in area.items():
+        moc_basin = np.sum(_multiply_vo_basin(vo, mask), axis=3)
+        _vertical_cumsum(moc_basin, moc_basin)
+        moc[basin] = moc_basin
+    return moc
+
+
+def _compute_moc_gpu(vo, area):
+    """Function that calls computing functions in the GPU.
+
+    Computes moc index for each basin and appends them to a list:
+
+    Parameters
+    ----------
+    area: list
+        List containing the areas for every basin.
+
+    vo : float32
+        Masked array containing Sea Water Y Velocity data.
+
+    Returns
+    -------
+    moc: list
+        List of masked arrays containing the moc index for each basin.
+    """
+    times = vo.shape[0]
+    levels = vo.shape[1]
+    lats = vo.shape[2]
+
+    block = (128, 1, 1)
+    grid_size = (lats // block[0]) + 1
+    grid3D = (grid_size, levels, times)
+    gpu_vo = cuda.to_device(vo.astype(np.float32))
+    gpu_moc = cuda.device_array((times, levels, lats), dtype=np.float32)
+    grid2D = (grid_size, times)
+
+    moc = {}
+    for basin, mask in area.items():
+        gpu_area = cuda.to_device(mask.astype(np.float32))
+        _horizontal_integral[grid3D, block](gpu_vo, gpu_area, gpu_moc)
+        _vertical_cumsum_gpu[grid2D, block](gpu_moc)
+        moc[basin] = gpu_moc.copy_to_host()
+
+    del gpu_area, gpu_moc, gpu_vo
+
+    return moc
+
+
+@vectorize(['float32(float32, float32, float32)'], target='cpu')
+def _compute_area(e1v, e3v, basin):
+    """Vectorized numba function executed in the CPU.
+
+    Calculates cell area for each basin:
+
+    Parameters
+    ----------
+    e1v: float32
+        Masked array containing variable e1v.
+    e3v: float32
+        Masked array containing variable e3v.
+    basin : float32
+        Masked array containing a basin mask.
+
+    Returns
+    -------
+    area: float32
+        Masked array containing the cell area for a given basin.
+    """
+    area = - e1v * e3v * basin / 1e6
+    return area
+
+
+@vectorize(['float32(float32, float32)'], target='cpu')
+def _multiply_vo_basin(vo, basin):
+    """Vectorized numba function executed in the CPU.
+
+    Weights vo with a given basin area:
+
+    Parameters
+    ----------
+    e1v: float32
+        Masked array containing variable e1v.
+    e3v: float32
+        Masked array containing variable e3v.
+    basin : float32
+        Masked array containing a basin mask.
+
+    Returns
+    -------
+    area: float32
+        Masked array containing the cell area for a given basin.
+    """
+    return vo*basin
+
+
+@guvectorize([(float32[:, :, :], float32[:, :, :])], '(t, l, j)->(t, l, j)',
+             target='cpu')
+def _vertical_cumsum(moc, out):
+    """Numba gu-function executed in the CPU.
+
+    Performs vertical cummulative sum in order to compute the moc index:
+
+    Parameters
+    ----------
+    moc: float32
+        Masked array containing horizontal integration of vo.
+
+    Returns
+    -------
+    moc: float32
+        Masked array containing the moc index for a given basin.
+    """
+    for lev in reversed(range(moc.shape[1]-1)):
+        moc[:, lev, :] = moc[:, lev, :] + moc[:, lev+1, :]
+
+
+@cuda.jit()
+def _vertical_cumsum_gpu(moc):
+    """Numba kernel executed in the GPU.
+
+    Performs vertical cummulative sum in order to compute the moc index:
+
+    Parameters
+    ----------
+    moc: float32
+        Masked array containing horizontal integration of vo.
+
+    Returns
+    -------
+    moc: float32
+        Masked array containing the moc indes for a given basin.
+    """
+    j, t = cuda.grid(2)
+    if(j >= moc.shape[2]):
+        return
+    for lev in range(moc.shape[1]-2, -1, -1):
+        moc[t, lev, j] = moc[t, lev, j] + moc[t, lev+1, j]
+
+
+@cuda.jit()
+def _horizontal_integral(vo, area, moc):
+    """Numba kernel executed in the GPU.
+
+    Integrates along the longitudes in order to compute the moc index:
+
+    Parameters
+    ----------
+    vo : float32
+        Masked array containing Sea Water Y Velocity data.
+
+    area: float32
+        List containing the areas for every basin.
+
+    Returns
+    -------
+    moc: float32
+        Masked array containing vo horizontally integrated for a given basin.
+    """
+    j, lev, t = cuda.grid(3)
+    moc[t, lev, j] = 0.0
+    temp = 0.0
+    if(j >= vo.shape[2]):
+        return
+    for i in range(vo.shape[3]):
+        temp += vo[t, lev, j, i] * area[0, lev, j, i]
+    moc[t, lev, j] = temp
diff --git a/diagonals/diagnostics/oceanheatcontent.py b/diagonals/ohc.py
similarity index 83%
rename from diagonals/diagnostics/oceanheatcontent.py
rename to diagonals/ohc.py
index c8ff761be1fbcaca2618ba35351837269afd9984..ef70caa2859c50d1260fb608a137bffd1211baf6 100644
--- a/diagonals/diagnostics/oceanheatcontent.py
+++ b/diagonals/ohc.py
@@ -7,19 +7,20 @@ import iris.cube
 import iris.analysis
 
 import warnings
-
 import datetime
 
 import numba
-from numba import jit
 from numba import vectorize
-from numba import guvectorize
 from numba import cuda
 from numba import int32, int64, float32, float64
 from numba.cuda.cudadrv import driver
 
+import diagonals
+
+__all__ = ['get_weights', 'compute', 'get_basin_area']
 
-def check_device_and_compute_weights(layers, mask, e3t, depth):
+
+def get_weights(layers, mask, cell_height, cell_top_depth):
     """Function that checks device and calls computing functions.
 
     Checks if the computations are going performed in the CPU or the GPU:
@@ -30,9 +31,9 @@ def check_device_and_compute_weights(layers, mask, e3t, depth):
         List containing the minimum and maximum depth values for the layer.
     mask : float32
         Masked array containing the Global basin mask.
-    e3t: float32
+    cell_height: float32
         Masked array containing variable e3t.
-    depth: float32
+    cell_top_depth: float32
         Masked array containing variable gwdep.
 
     Returns
@@ -40,23 +41,18 @@ def check_device_and_compute_weights(layers, mask, e3t, depth):
     weights: float32
         List of masked arrays containing the weights for each layer.
     """
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        device = 'CPU'
-        weights = compute_weights_cpu(layers, mask, e3t, depth)
+    if diagonals.CONFIG.use_gpu:
+        weights = _compute_weights_gpu(
+            layers, mask, cell_height, cell_top_depth
+        )
     else:
-        use_gpu = True
-        if use_gpu is True:
-            device = 'GPU'
-            weights = compute_weights_gpu(layers, mask, e3t, depth)
-        else:
-            device = 'CPU'
-            weights = compute_weights_cpu(layers, mask, e3t, depth)
-    print('Running on {}'.format(device))
+        weights = _compute_weights_cpu(
+            layers, mask, cell_height, cell_top_depth
+        )
     return weights
 
 
-def compute_OHC(layers, weights, thetao, area):
+def compute(layers, weights, thetao, area):
     """Function that checks device and calls computing functions.
 
     Checks if the computations are going performed in the CPU or the GPU:
@@ -81,15 +77,14 @@ def compute_OHC(layers, weights, thetao, area):
         List of masked arrays containing the 1D ocean heat content for each
         layer, basin and timestep.
     """
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        ohc, ohc_1D = compute_ohc_cpu(layers, thetao, weights, area)
+    if diagonals.CONFIG.use_gpu:
+        ohc, ohc_1D = _compute_ohc_gpu(layers, thetao, weights, area)
     else:
-        ohc, ohc_1D = compute_ohc_gpu(layers, thetao, weights, area)
+        ohc, ohc_1D = _compute_ohc_cpu(layers, thetao, weights, area)
     return ohc, ohc_1D
 
 
-def compute_weights_cpu(layers, mask, e3t, depth):
+def _compute_weights_cpu(layers, mask, e3t, depth):
     """Function that calls computing functions in the CPU.
 
     Computes weights for each layer and appends them to a list:
@@ -112,12 +107,13 @@ def compute_weights_cpu(layers, mask, e3t, depth):
     """
     weights = []
     for min_depth, max_depth in layers:
-        weights.append(calculate_weight_numba(min_depth, max_depth, e3t, depth,
-                       mask))
+        weights.append(
+            _calculate_weight_numba(min_depth, max_depth, e3t, depth, mask)
+        )
     return weights
 
 
-def compute_weights_gpu(layers, mask, e3t, depth):
+def _compute_weights_gpu(layers, mask, e3t, depth):
     """Function that calls computing functions in the GPU.
 
     Computes weights for each layer and appends them to a list:
@@ -143,13 +139,13 @@ def compute_weights_gpu(layers, mask, e3t, depth):
     gpu_depth = cuda.to_device(depth.data.astype(np.float32))
     weights = []
     for min_depth, max_depth in layers:
-        weights.append(calculate_weight_numba_cuda(min_depth, max_depth,
+        weights.append(_calculate_weight_numba_cuda(min_depth, max_depth,
                        gpu_e3t, gpu_depth, gpu_mask))
     del gpu_depth, gpu_mask, gpu_e3t
     return weights
 
 
-def compute_ohc_cpu(layers, thetao, weights, area):
+def _compute_ohc_cpu(layers, thetao, weights, area):
     """Function that calls computing functions in the CPU.
 
     Loops over layers to compute the global ocean heat content in 2D. This
@@ -178,18 +174,20 @@ def compute_ohc_cpu(layers, thetao, weights, area):
     """
     ohc = []
     for layer in range(len(layers)):
-        ohc_layer = sum_red.reduce(multiply_array(thetao,
-                                   weights[layer]), axis=1)
+        ohc_layer = _sum_red.reduce(
+            _multiply_array(thetao, weights[layer]),
+            axis=1
+        )
         ohc.append(ohc_layer)
         ohc1D_total = []
         for i, basin in enumerate(area):
-            ohc_basin = multiply_array(ohc_layer, area[basin])
+            ohc_basin = _multiply_array(ohc_layer, area[basin])
             ohc1D = np.sum(ohc_basin, axis=(1, 2))
             ohc1D_total.append(ohc1D)
     return ohc, ohc1D_total
 
 
-def compute_ohc_gpu(layers, thetao, weights, area):
+def _compute_ohc_gpu(layers, thetao, weights, area):
     """Function that calls computing functions in the GPU.
 
     Loops over layers to compute the global ocean heat content in 2D. This
@@ -235,14 +233,14 @@ def compute_ohc_gpu(layers, thetao, weights, area):
     gpu_thetao = cuda.to_device(thetao.astype(np.float32))
 
     for layer in range(len(layers)):
-        compute_ohc[grid, block](gpu_thetao, weights[layer], gpu_ohc, levels)
+        _compute_ohc[grid, block](gpu_thetao, weights[layer], gpu_ohc, levels)
         ohc.append(gpu_ohc.copy_to_host())  # moure al final
-        multiply_ohc_basin[grid, block](gpu_ohc, gpu_basins_area, gpu_temp)
+        _multiply_ohc_basin[grid, block](gpu_ohc, gpu_basins_area, gpu_temp)
         ohc1D_basin = []
         for basin in range(basins):
             ohc_1D = np.empty(times, dtype=np.float32)
             for time in range(times):
-                ohc_1D[time] = sum_red_cuda(gpu_temp[basin, time, :])
+                ohc_1D[time] = _sum_red_cuda(gpu_temp[basin, time, :])
             ohc1D_basin.append(ohc_1D)
     del gpu_ohc, gpu_temp, gpu_basins_area
 
@@ -250,7 +248,7 @@ def compute_ohc_gpu(layers, thetao, weights, area):
 
 
 @vectorize(['float32(int32, int32, float32, float32, float32)'], target='cuda')
-def calculate_weight_numba_cuda(min_depth, max_depth, e3t, depth, mask):
+def _calculate_weight_numba_cuda(min_depth, max_depth, e3t, depth, mask):
     """Vectorized numba function executed on the gpu.
 
     Calculates the weight for each cell:
@@ -289,7 +287,7 @@ def calculate_weight_numba_cuda(min_depth, max_depth, e3t, depth, mask):
 
 
 @cuda.jit()
-def compute_ohc(thetao, weight, ohc, levels):
+def _compute_ohc(thetao, weight, ohc, levels):
     """Numba kernel executed on the gpu that computes the global ocean heat
     content in 2D for a given layer and timestep:
 
@@ -321,7 +319,7 @@ def compute_ohc(thetao, weight, ohc, levels):
 
 
 @cuda.jit()
-def multiply_ohc_basin(ohc, area, temp):
+def _multiply_ohc_basin(ohc, area, temp):
     """Numba kernel executed on the gpu that weights the global ocean heat
     content over the area for each basin for and timestep:
 
@@ -343,8 +341,8 @@ def multiply_ohc_basin(ohc, area, temp):
     """
     i, j, t = cuda.grid(3)
     basins = area.shape[0]
-    x = i+j*ohc.shape[2]
-    if(i >= ohc.shape[2]):
+    x = i + j * ohc.shape[2]
+    if i >= ohc.shape[2]:
         return
     for basin in range(basins):
         temp[basin, t, x] = 0.0
@@ -352,7 +350,7 @@ def multiply_ohc_basin(ohc, area, temp):
 
 
 @vectorize(['float32(int32, int32, float32, float32, float32)'], target='cpu')
-def calculate_weight_numba(min_depth, max_depth, e3t, depth, mask):
+def _calculate_weight_numba(min_depth, max_depth, e3t, depth, mask):
     """Vectorized numba function executed on the cpu.
 
     Calculates the weight for each cell:
@@ -392,7 +390,7 @@ def calculate_weight_numba(min_depth, max_depth, e3t, depth, mask):
 
 @vectorize(['float64(float32, float32)', 'float64(float64, float64)'],
            target='cpu')
-def sum_red(x, y):
+def _sum_red(x, y):
     """Vectorized numba function executed on the cpu that performs a sum
     reduction:
 
@@ -413,7 +411,7 @@ def sum_red(x, y):
 
 @vectorize(['float32(float32, float32)', 'float64(float64, float64)'],
            target='cpu')
-def multiply_array(a, b):
+def _multiply_array(a, b):
     """Vectorized numba function executed on the cpu that performs an
     element-wise multiplication of two arrays:
 
@@ -433,30 +431,35 @@ def multiply_array(a, b):
     return a*b
 
 
-@vectorize(['float32(float32, float32, float32)'], target='cpu')
-def compute_area_basin(e1t, e2t, basin):
+def get_basin_area(areacello, basins):
+    basin_areas = {}
+    for basin in basins:
+        basin_areas[basin] = _compute_basin_area(areacello, basins[basin])
+    return basin_areas
+
+
+@vectorize(['float32(float32, float32)'], target='cpu')
+def _compute_basin_area(areacello, basin):
     """Vectorized numba function executed on the cpu that computes the area
        for each basin:
 
     Parameters
     ----------
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t : float32
-        Masked array containing variable e2t.
+    areacello : float32
+        Masked array containing areacello.
     basin: float32
         Masked array containing the mask for a given basin.
 
     Returns
     -------
-    e1t*e2t*basin : float32
+    areacello*basin : float32
         Masked array containing the area for a given basin.
     """
-    return e1t*e2t*basin
+    return areacello*basin
 
 
 @cuda.reduce
-def sum_red_cuda(x, y):
+def _sum_red_cuda(x, y):
     """Numba kernel executed on the gpu that performs a sum reduction:
 
     Parameters
diff --git a/diagonals/regmean.py b/diagonals/regmean.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e6eb9a84a85e4cc498115e5bc245c503a257123
--- /dev/null
+++ b/diagonals/regmean.py
@@ -0,0 +1,238 @@
+import os
+
+import numpy as np
+
+import iris
+import iris.cube
+import iris.analysis
+
+import warnings
+import datetime
+
+import numba
+from numba import vectorize
+from numba import cuda
+from numba import int32, int64, float32, float64
+from numba.cuda.cudadrv import driver
+
+import diagonals
+
+__all__ = ['compute_regmean_2D', 'compute_regmean_3D',
+           'compute_regmean_levels']
+
+
+def compute_regmean_2D(var, basins, area):
+    """Function that checks device and calls computing functions.
+
+    Checks if the computations are going performed in the CPU or the GPU:
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    area : float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    regmean: float32
+        List containing regional mean for variable var
+    """
+    if diagonals.CONFIG.use_gpu:
+        logger.warning('GPU routines not implemented for regmean diagnostic.'
+                       'Using CPU instead')
+    regmean = _compute_regmean_2D_cpu(var, basins, area)
+    return regmean
+
+
+def compute_regmean_3D(var, basins, volume):
+    """Function that checks device and calls computing functions.
+
+    Checks if the computations are going performed in the CPU or the GPU:
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    volume : float32
+        Masked array containing cell volume.
+
+    Returns
+    -------
+    regmean: float32
+        List containing regional mean for variable var
+    """
+    if diagonals.CONFIG.use_gpu:
+        logger.warning('GPU routines not implemented for regmean diagnostic.'
+                       'Using CPU instead')
+    regmean = _compute_regmean_3D_cpu(var, basins, volume)
+    return regmean
+
+
+def compute_regmean_levels(var, basins, volume):
+    """Function that checks device and calls computing functions.
+
+    Checks if the computations are going performed in the CPU or the GPU:
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    volume : float32
+        Masked array containing cell volume.
+
+    Returns
+    -------
+    regmean: float32
+        List containing regional mean for variable var at each level.
+    """
+    if diagonals.CONFIG.use_gpu:
+        logger.warning('GPU routines not implemented for regmean diagnostic.'
+                       'Using CPU instead')
+    else:
+        regmean = _compute_regmean_levels_cpu(
+            var, basins, volume
+        )
+    return regmean
+
+
+def _compute_regmean_2D_cpu(var, basins, area):
+    """Function that computes the regional mean for 2D vars in the cpu.
+
+       Computes the weights for each region and performs a weighted average.
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    area : float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    regmean_total: float32
+        List containing regional mean for variable var.
+    """
+    times = var.shape[0]
+    regmean_total = {}
+    for basin, mask in basins.items():
+        weights = _compute_weights_2D(mask, area)
+        regmean = np.empty(times)
+        for t in range(times):
+            regmean[t] = np.average(var[t, :, :], axis=(0, 1),
+                                    weights=np.squeeze(weights))
+        regmean_total[basin] = regmean
+    return regmean_total
+
+
+def _compute_regmean_3D_cpu(var, basins, volume):
+    """Function that computes the regional mean for 3D vars in the cpu.
+
+    Computes the weights for each region and performs a weighted average.
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    volume : float32
+        Masked array containing cell volume.
+
+    Returns
+    -------
+    regmean_total: float32
+        List containing the regional mean for variable var.
+    """
+    times = var.shape[0]
+    regmean_total = {}
+    for basin, mask in basins.items():
+        weights = _compute_weights_3D(mask, volume)
+        regmean = np.empty(times)
+        for t in range(times):
+            regmean[t] = np.average(var[t, :, :, :], axis=(0, 1, 2),
+                                    weights=np.squeeze(weights))
+        regmean_total[basin] = regmean
+    return regmean_total
+
+
+def _compute_regmean_levels_cpu(var, basins, volume):
+    """Function that computes the regional mean at every depth level
+    for 3D vars in the cpu.
+
+    Computes the weights for each region and performs a weighted average.
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    volume : float32
+        Masked array containing cell volume.
+
+    Returns
+    -------
+    regmean_total: float32
+        List containing regional mean at every depth level for variable var.
+    """
+    times = var.shape[0]
+    levs = var.shape[1]
+    regmean_total = {}
+    for basin, mask in basins.items():
+        regmean = np.empty((times, levs))
+        w = _compute_weights_3D(mask, volume)
+        for t in range(times):
+            for l in range(levs):
+                regmean[t, l] = np.average(var[t, l, :, :], axis=(0, 1),
+                                           weights=np.squeeze(w)[l, :, :])
+        regmean_total[basin] = regmean
+    return regmean_total
+
+
+@vectorize(['float32(float32, float32)'], target='cpu')
+def _compute_weights_2D(mask, area):
+    """Function that computes the regional weights for 2D vars in the cpu.
+
+    Parameters
+    ----------
+    mask : float32
+        Mask array containing a region mask.
+    area : float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    weights: float32
+        Masked array containing the weights for a given region.
+    """
+    weights = mask*area
+    return weights
+
+
+@vectorize(['float32(float32, float32)'], target='cpu')
+def _compute_weights_3D(mask, volume):
+    """Function that computes the regional weights for 3D vars in the cpu.
+
+    Parameters
+    ----------
+    mask : float32
+        Mask array containing a region mask.
+    volume: float32
+        Masked array containing cell volume.
+
+    Returns
+    -------
+    weights: float32
+        Masked array containing the weights for a given region.
+    """
+    weights = mask*volume
+    return weights
diff --git a/diagonals/regsum.py b/diagonals/regsum.py
new file mode 100644
index 0000000000000000000000000000000000000000..4051c5f2425d3e50a6f597fbe3d9a4416852dd41
--- /dev/null
+++ b/diagonals/regsum.py
@@ -0,0 +1,196 @@
+import os
+
+import numpy as np
+
+import iris
+import iris.cube
+import iris.analysis
+
+import warnings
+import datetime
+
+import numba
+from numba import vectorize
+from numba import cuda
+from numba import int32, int64, float32, float64
+from numba.cuda.cudadrv import driver
+
+import diagonals
+
+__all__ = ['compute_regsum_2D', 'compute_regsum_3D', 'compute_regsum_levels']
+
+
+def compute_regsum_2D(var, basins, area):
+    """Function that checks device and calls computing functions.
+
+    Checks if the computations are going performed in the CPU or the GPU:
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    area : float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    regmean: float32
+        List containing regional mean for variable var
+    """
+    if diagonals.CONFIG.use_gpu:
+        logger.warning('GPU routines not implemented for regmean diagnostic.'
+                       'Using CPU instead')
+    regsum = _compute_regsum_2D_cpu(var, basins, area)
+    return regsum
+
+
+def compute_regsum_3D(var, basins, volume, tmask):
+    """Function that checks device and calls computing functions.
+
+    Checks if the computations are going performed in the CPU or the GPU:
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    area : float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    regmean: float32
+        List containing regional mean for variable var
+    """
+    if diagonals.CONFIG.use_gpu:
+        logger.warning('GPU routines not implemented for regmean diagnostic.'
+                       'Using CPU instead')
+    regsum = _compute_regsum_3D_cpu(var, basins, volume, tmask)
+    return regsum
+
+
+def _compute_regsum_2D_cpu(var, basins, area):
+    """Function that computes the regional sum for 2D vars in the cpu.
+
+       Computes the weights for each region and performs a weighted sum.
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    area : float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    regmean_total: float32
+        List containing regional mean for variable var.
+    """
+    regsum = {}
+    for basin, mask in basins.items():
+        weighted_var = _weigh_var_2D(var, mask, area)
+        regsum[basin] = np.sum(weighted_var, axis=(1, 2))
+    return regsum
+
+
+def _compute_regsum_3D_cpu(var, basins, volume, tmask):
+    """Function that computes the regional sum for 3D vars in the cpu.
+
+    Computes the weights for each region and performs a weighted sum.
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    volume : float32
+        Masked array containing cell volume
+
+    Returns
+    -------
+    regmean_total: float32
+        List containing regional mean for variable var.
+    """
+    regsum = {}
+    for basin, mask in basins.items():
+        weighted_var = _weigh_var_3D(var, mask, volume, tmask)
+        regsum[basin] = np.sum(weighted_var, axis=(1, 2, 3))
+    return regsum
+
+
+def compute_regsum_levels(var, basins, volume, tmask):
+    """Function that computes the regional sum for 3D vars in the cpu.
+
+    Computes the weights for each region and performs a weighted sum.
+
+    Parameters
+    ----------
+    var : float32
+        Masked array containing variable data.
+    basins : float32
+        List containing basin names and masks.
+    volume : float32
+        Masked array containing cell volume
+    Returns
+    -------
+    regmean_total: float32
+        List containing regional mean for variable var.
+    """
+    regsum = {}
+    for basin, mask in basins.items():
+        weighted_var = _weigh_var_3D(var, mask, volume, tmask)
+        regsum[basin] = np.sum(weighted_var, axis=(2, 3))
+    return regsum
+
+
+@vectorize(['float32(float32, float32, float32)'], target='cpu')
+def _weigh_var_2D(var, mask, area):
+    """Function that weights a 2D variable for each region.
+
+    Parameters
+    ----------
+    var: float32
+        Masked array containing variable to sum.
+    mask : float32
+        Mask array containing a region mask.
+    area: float32
+        Masked array containing cell area.
+
+    Returns
+    -------
+    weights: float32
+        Masked array containing the weights for a given region.
+    """
+    weighted_var = var * mask * area
+    return weighted_var
+
+
+@vectorize(['float32(float32, float32, float32, float32)'],
+           target='cpu')
+def _weigh_var_3D(var, mask, volume, tmask):
+    """Function that weights a 3D variable for each region.
+
+    Parameters
+    ----------
+    var: float32
+        Masked array containing variable to sum.
+    mask : float32
+        Mask array containing a region mask.
+    volume: float32
+        Masked array containing cell volume.
+    tmask: float32
+        Masked array containing land-sea mask for any grid-point type
+
+    Returns
+    -------
+    weights: float32
+        Masked array containing the weights for a given region.
+    """
+    weighted_var = var * mask * volume * tmask
+    return weighted_var
diff --git a/diagonals/diagnostics/siarea.py b/diagonals/siasie.py
similarity index 55%
rename from diagonals/diagnostics/siarea.py
rename to diagonals/siasie.py
index b7dae330156d97ac9c042b70ca4597655ea071ba..2a5c49bd1b733788fee02c7fe646543c6e6476d1 100644
--- a/diagonals/diagnostics/siarea.py
+++ b/diagonals/siasie.py
@@ -1,9 +1,5 @@
 import os
 import numpy as np
-import glob
-import time
-import operator
-import datetime
 
 import iris
 import iris.cube
@@ -15,66 +11,19 @@ from iris.experimental.equalise_cubes import equalise_attributes
 from iris.time import PartialDateTime
 
 import warnings
+
 import numba
-from numba import jit
 from numba import vectorize
-from numba import guvectorize
 from numba import cuda
 from numba import int32, int64, float32, float64
 from numba.cuda.cudadrv import driver
 
+import diagonals
 
-def compute_siarea(cube_gphit, cube_e1t, cube_e2t, cube_sic, masks):
-    """Function that calls loading and computing functions for the siarea
-       diagnostic.
-
-    Loads the data from Iris cubes and computes the sea ice area and extent for
-    the northern and southern hemisphere:
-
-    Parameters
-    ----------
-    cube_gphit : Cube
-        Iris cube containing variable gphit.
-    cube_e1t : Cube
-        Iris cube containing variable e1t.
-    cube_e2t: Cube
-        Iris cube containing variable e2t.
-    cube_sic: Cube
-        Iris cube containing variable sic (sea_ice_area_fraction).
-    masks: float32
-        List containing basin names and masks.
+__all__ = ['compute']
 
-    Returns
-    -------
-    extn: float64
-        List containing sea ice extent in the northern hemisphere at each
-        timestep and basin.
-    exts: float64
-        List containing sea ice extent in the southern hemisphere at each
-        timestep and basin.
-    arean: float64
-        List containing sea ice area in the northern hemisphere at each
-        timestep and basin.
-    areas: float64
-        List containing sea ice area in the southern hemisphere at each
-        timestep and basin.
-    """
-    start = datetime.datetime.now()
-    gphit, e1t, e2t, sic_slices = load_data(cube_gphit,
-                                            cube_e1t,
-                                            cube_e2t,
-                                            cube_sic)
-    extn, exts, arean, areas, device = check_device_and_compute(gphit,
-                                                                e1t,
-                                                                e2t,
-                                                                sic_slices,
-                                                                masks)
-    seconds_total = datetime.datetime.now() - start
-    print('Total ellapsed time on the {1}: {0}'.format(seconds_total, device))
-    return extn, exts, arean, areas
 
-
-def check_device_and_compute(gphit, e1t, e2t, sic_slices, basins):
+def compute(gphit, area, sic_slices, basins):
     """Function that checks device and calls computing functions.
 
     Checks if the computations are going performed in the CPU or the GPU:
@@ -83,10 +32,8 @@ def check_device_and_compute(gphit, e1t, e2t, sic_slices, basins):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Iris cubelist containing variable sic sliced over time.
     masks: float32
@@ -109,26 +56,19 @@ def check_device_and_compute(gphit, e1t, e2t, sic_slices, basins):
     device: string
         Device where the computations are being performed (CPU or GPU)
     """
-    check_gpu = numba.cuda.is_available()
-    if check_gpu is False:
-        device = 'CPU'
-        extn, exts, arean, areas = compute_sic_cpu(gphit, e1t, e2t, sic_slices,
-                                                   basins)
+    if diagonals.CONFIG.use_gpu:
+        extn, exts, arean, areas = _compute_sic_gpu(
+            gphit, area, sic_slices, basins
+        )
     else:
-        use_gpu = True
-        if use_gpu is True:
-            device = 'GPU'
-            extn, exts, arean, areas = compute_sic_gpu(gphit, e1t, e2t,
-                                                       sic_slices, basins)
-        else:
-            device = 'CPU'
-            extn, exts, arean, areas = compute_sic_cpu(gphit, e1t, e2t,
-                                                       sic_slices, basins)
+        extn, exts, arean, areas = _compute_sic_cpu(
+            gphit, area, sic_slices, basins
+        )
 
-    return extn, exts, arean, areas, device
+    return extn, exts, arean, areas
 
 
-def compute_sic_cpu(gphit, e1t, e2t, sic_slices, basins):
+def _compute_sic_cpu(gphit, area, sic_slices, basins):
     """Function that computes sea ice are and extension on the CPU.
 
     Loops over time and basins and reduces over lat and lon:
@@ -137,10 +77,8 @@ def compute_sic_cpu(gphit, e1t, e2t, sic_slices, basins):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic_slices: float32
         Iris cubelist containing variable sic sliced over time.
     basins: float32
@@ -174,24 +112,24 @@ def compute_sic_cpu(gphit, e1t, e2t, sic_slices, basins):
         areas[time] = {}
         sic = sic_data.data
         for basin, tmask in basins.items():
-            temp = extn_numba(gphit, e1t, e2t, tmask, sic)
+            temp = _extn_cpu(gphit, area, tmask, sic)
             extn[time][basin] = np.sum(temp, axis=(1, 2))
 
-            temp = exts_numba(gphit, e1t, e2t, tmask, sic)
+            temp = _exts_cpu(gphit, area, tmask, sic)
             exts[time][basin] = np.sum(temp, axis=(1, 2))
 
-            temp = arean_numba(gphit, e1t, e2t, tmask, sic)
+            temp = _arean_cpu(gphit, area, tmask, sic)
             arean[time][basin] = np.sum(temp, axis=(1, 2))
 
-            temp = areas_numba(gphit, e1t, e2t, tmask, sic)
+            temp = _areas_cpu(gphit, area, tmask, sic)
             areas[time][basin] = np.sum(temp, axis=(1, 2))
 
-    del gphit, e1t, e2t, sic_slices, temp
+    del gphit, area, sic_slices, temp
 
     return extn, exts, arean, areas
 
 
-def compute_sic_gpu(gphit, e1t, e2t, sic_slices, basins):
+def _compute_sic_gpu(gphit, area, sic_slices, basins):
     """Function that computes sea ice are and extension on the GPU.
 
     Loops over time and basins and reduces over lat and lon:
@@ -200,10 +138,8 @@ def compute_sic_gpu(gphit, e1t, e2t, sic_slices, basins):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic_slices: float32
         Iris cubelist containing variable sic sliced over time.
     basins: float32
@@ -230,13 +166,13 @@ def compute_sic_gpu(gphit, e1t, e2t, sic_slices, basins):
     areas = {}
 
     gpu_gphit = cuda.to_device(gphit.astype(np.float32))
-    gpu_e1t = cuda.to_device(e1t.astype(np.float32))
-    gpu_e2t = cuda.to_device(e2t.astype(np.float32))
-    gpu_temp = cuda.device_array(e1t.shape[1]*e1t.shape[2], dtype=np.float32)
+    gpu_area = cuda.to_device(area.astype(np.float32))
+    gpu_temp = cuda.device_array(area.shape[1]*area.shape[2], dtype=np.float32)
+    del gphit, area
 
     block = (128, 1)
-    grid_size = ((e1t.shape[2]) // block[0]) + 1
-    grid = (grid_size, e1t.shape[1])
+    grid_size = ((area.shape[2]) // block[0]) + 1
+    grid = (grid_size, area.shape[1])
 
     for sic_data in sic_slices:
         time = sic_data.coord('time').points[0]
@@ -249,73 +185,29 @@ def compute_sic_gpu(gphit, e1t, e2t, sic_slices, basins):
         for basin, tmask in basins.items():
             gpu_tmask = cuda.to_device(tmask.astype(np.float32))
 
-            extn_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                         gpu_tmask, gpu_sic, gpu_temp)
-            extn[time][basin] = sum_red_cuda(gpu_temp)
-
-            exts_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                         gpu_tmask, gpu_sic, gpu_temp)
-            exts[time][basin] = sum_red_cuda(gpu_temp)
+            _extn_gpu[grid, block](gpu_gphit, gpu_area, gpu_tmask,
+                                   gpu_sic, gpu_temp)
+            extn[time][basin] = _sum_red_cuda(gpu_temp)
 
-            arean_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                          gpu_tmask, gpu_sic, gpu_temp)
-            arean[time][basin] = sum_red_cuda(gpu_temp)
+            _exts_gpu[grid, block](gpu_gphit, gpu_area, gpu_tmask,
+                                   gpu_sic, gpu_temp)
+            exts[time][basin] = _sum_red_cuda(gpu_temp)
 
-            areas_numba_cuda[grid, block](gpu_gphit, gpu_e1t, gpu_e2t,
-                                          gpu_tmask, gpu_sic, gpu_temp)
-            areas[time][basin] = sum_red_cuda(gpu_temp)
+            _arean_gpu[grid, block](gpu_gphit, gpu_area, gpu_tmask,
+                                    gpu_sic, gpu_temp)
+            arean[time][basin] = _sum_red_cuda(gpu_temp)
 
-    del gphit, e1t, e2t, sic_slices
-    del gpu_gphit, gpu_e1t, gpu_e2t, gpu_tmask, gpu_sic, gpu_temp
+            _areas_gpu[grid, block](gpu_gphit, gpu_area, gpu_tmask,
+                                    gpu_sic, gpu_temp)
+            areas[time][basin] = _sum_red_cuda(gpu_temp)
 
+    del gpu_gphit, gpu_area, gpu_tmask, gpu_sic, gpu_temp
     return extn, exts, arean, areas
 
 
-def load_data(cube_gphit, cube_e1t, cube_e2t, cube_sic):
-    """Function that loads data from Iris cubes.
-
-    Loads the data from Iris cubes as float32 for cube_gphit, cube_e1t and
-    cube_e2t. For cube_sic, the cube gets sliced over time:
-
-    Parameters
-    ----------
-    cube_gphit : Cube
-        Iris cube containing variable gphit.
-    cube_e1t : Cube
-        Iris cube containing variable e1t.
-    cube_e2t: Cube
-        Iris cube containing variable e2t.
-    cube_sic: Cube
-        Iris cube containing variable sic (sea_ice_area_fraction).
-
-    Returns
-    -------
-    gphit : float32
-        Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
-    sic_slices: float32
-        Iris cubelist containing variable sic sliced over time.
-    basins: float32
-        List containing basin names and masks.
-
-    """
-    gphit = cube_gphit.data.astype(np.float32)
-    e1t = cube_e1t.data.astype(np.float32)
-    e2t = cube_e2t.data.astype(np.float32)
-    sic_slices = []
-    for sic_data in cube_sic.slices_over('time'):
-        sic_data.data = np.ma.filled(sic_data.data, 0.0).astype(np.float32)
-        sic_slices.append(sic_data)
-
-    return gphit, e1t, e2t, sic_slices
-
-
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
+@vectorize(['float32(float32, float32, float32, float32)'],
            target='cpu')
-def arean_numba(gphit, e1t, e2t, tmask, sic):
+def _arean_cpu(gphit, area, tmask, sic):
     """Vectorized numba function executed on the cpu that computes the
        percentage of sea ice in the northern hemisphere for a given timestep
        and basin:
@@ -324,10 +216,8 @@ def arean_numba(gphit, e1t, e2t, tmask, sic):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -339,12 +229,12 @@ def arean_numba(gphit, e1t, e2t, tmask, sic):
         Masked array to be summed over lat and lon to obtain sea ice area
     """
     if gphit > 0:
-        temp = e1t * e2t * tmask * sic / 100
+        temp = area * tmask * sic / 100
     return temp
 
 
 @cuda.jit()
-def arean_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
+def _arean_gpu(gphit, area, tmask, sic, temp):
     """Numba kernel executed on the gpu that computes the percentage of sea ice
     in the northern hemisphere for a given timestep and basin:
 
@@ -352,10 +242,8 @@ def arean_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
      ----------
      gphit : float32
          Masked array containing variable gphit.
-     e1t : float32
-         Masked array containing variable e1t.
-     e2t: float32
-         Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
      sic: float32
          Masked array containing variable sic for a given timestep.
      tmask: float32
@@ -369,18 +257,18 @@ def arean_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
          for the reduction kernel.
      """
     i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
+    x = i + j * sic.shape[1]
+    if(i >= sic.shape[1]):
         return
     temp[x] = 0.0
     if(gphit[0, j, i] > 0):
-        temp[x] = (e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i] * sic[j, i]
+        temp[x] = (area[0, j, i] * tmask[0, j, i] * sic[j, i]
                    / 100)
 
 
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
+@vectorize(['float32(float32, float32, float32, float32)'],
            target='cpu')
-def extn_numba(gphit, e1t, e2t, tmask, sic):
+def _extn_cpu(gphit, area, tmask, sic):
     """Vectorized numba function executed on the cpu that computes the sea ice
     extent per unit of area in the northern hemisphere for a given timestep and
     basin:
@@ -389,10 +277,8 @@ def extn_numba(gphit, e1t, e2t, tmask, sic):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -405,12 +291,12 @@ def extn_numba(gphit, e1t, e2t, tmask, sic):
         value
     """
     if(gphit > 0 and sic > 15):
-        temp = e1t * e2t * tmask
+        temp = area * tmask
     return temp
 
 
 @cuda.jit()
-def extn_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
+def _extn_gpu(gphit, area, tmask, sic, temp):
     """Numba kernel executed on the gpu that computes the percentage of sea ice
     extent value per unit of area in the northern hemisphere for a given
     timestep and basin:
@@ -419,10 +305,8 @@ def extn_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -436,17 +320,17 @@ def extn_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
         for the reduction kernel.
     """
     i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
+    x = i + j * sic.shape[1]
+    if(i >= sic.shape[1]):
         return
     temp[x] = 0.0
     if(gphit[0, j, i] > 0 and sic[j, i] > 15):
-        temp[x] = e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i]
+        temp[x] = area[0, j, i] * tmask[0, j, i]
 
 
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
+@vectorize(['float32(float32, float32, float32, float32)'],
            target='cpu')
-def areas_numba(gphit, e1t, e2t, tmask, sic):
+def _areas_cpu(gphit, area, tmask, sic):
     """Vectorized numba function executed on the cpu that computes the
        percentage of sea ice in the southern hemisphere for a given timestep
        and basin:
@@ -455,10 +339,8 @@ def areas_numba(gphit, e1t, e2t, tmask, sic):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -470,12 +352,12 @@ def areas_numba(gphit, e1t, e2t, tmask, sic):
         Masked array to be summed over lat and lon to obtain sea ice area
     """
     if gphit < 0:
-        temp = e1t * e2t * tmask * sic / 100
+        temp = area * tmask * sic / 100
     return temp
 
 
 @cuda.jit()
-def areas_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
+def _areas_gpu(gphit, area, tmask, sic, temp):
     """Numba kernel executed on the gpu that computes the percentage of sea ice
     in the southern hemisphere for a given timestep and basin:
 
@@ -483,10 +365,8 @@ def areas_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -500,18 +380,17 @@ def areas_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
         for the reduction kernel.
     """
     i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
+    x = i + j * sic.shape[1]
+    if(i >= sic.shape[1]):
         return
     temp[x] = 0.0
     if(gphit[0, j, i] < 0):
-        temp[x] = (e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i] * sic[j, i]
-                   / 100)
+        temp[x] = area[0, j, i] * tmask[0, j, i] * sic[j, i] / 100
 
 
-@vectorize(['float32(float32, float32, float32, float32, float32)'],
+@vectorize(['float32(float32, float32, float32, float32)'],
            target='cpu')
-def exts_numba(gphit, e1t, e2t, tmask, sic):
+def _exts_cpu(gphit, area, tmask, sic):
     """Vectorized numba function executed on the cpu that computes the sea ice
     extent per unit of area in the southern hemisphere for a given timestep and
     basin:
@@ -520,10 +399,8 @@ def exts_numba(gphit, e1t, e2t, tmask, sic):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -536,12 +413,12 @@ def exts_numba(gphit, e1t, e2t, tmask, sic):
         value
     """
     if(gphit < 0 and sic > 15):
-        temp = e1t * e2t * tmask
+        temp = area * tmask
     return temp
 
 
 @cuda.jit()
-def exts_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
+def _exts_gpu(gphit, area, tmask, sic, temp):
     """Numba kernel executed on the gpu that computes the percentage of sea ice
     extent value per unit of area in the southern hemisphere for a given
     timestep and basin:
@@ -550,10 +427,8 @@ def exts_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
     ----------
     gphit : float32
         Masked array containing variable gphit.
-    e1t : float32
-        Masked array containing variable e1t.
-    e2t: float32
-        Masked array containing variable e2t.
+    area : float32
+        Masked array containing cell area.
     sic: float32
         Masked array containing variable sic for a given timestep.
     tmask: float32
@@ -567,17 +442,17 @@ def exts_numba_cuda(gphit, e1t, e2t, tmask, sic, temp):
         for the reduction kernel.
     """
     i, j = cuda.grid(2)
-    x = i + j * e1t.shape[2]
-    if(i >= e1t.shape[2]):
+    x = i + j * sic.shape[1]
+    if(i >= sic.shape[1]):
         return
     temp[x] = 0.0
     if(gphit[0, j, i] < 0 and sic[j, i] > 15):
-        temp[x] = e1t[0, j, i] * e2t[0, j, i] * tmask[0, j, i]
+        temp[x] = area[0, j, i] * tmask[0, j, i]
 
 
 @vectorize(['float64(float32, float32)', 'float64(float64, float64)'],
            target='cpu')
-def sum_red(x, y):
+def _sum_red(x, y):
     """Vectorized numba function executed on the cpu that performs a sum
     reduction:
 
@@ -597,7 +472,7 @@ def sum_red(x, y):
 
 
 @cuda.reduce
-def sum_red_cuda(x, y):
+def _sum_red_cuda(x, y):
     """Numba kernel executed on the gpu that performs a sum reduction:
 
     Parameters
diff --git a/setup.py b/setup.py
index 6a478dae540e844e2787447b0dd2a007d7b103ea..758e1af40c4182b0e604d938aad51ad554fb4678 100644
--- a/setup.py
+++ b/setup.py
@@ -81,4 +81,7 @@ setup(name='diagonals',
           'gpu': REQUIREMENTS['gpu'],
           'test': REQUIREMENTS['test'],
       },
+      cmdclass={
+            'test': RunTests,
+      },
       zip_safe=False,)
diff --git a/test/__pycache__/__init__.cpython-37.pyc b/test/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index f9dc2c9fee10bb904ca5e86f7f30655c7c164fd3..0000000000000000000000000000000000000000
Binary files a/test/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/test/unit/__pycache__/__init__.cpython-37.pyc b/test/unit/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index c5fca930321cdf105aa08f8c5572894b24be475a..0000000000000000000000000000000000000000
Binary files a/test/unit/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/test/unit/__pycache__/test_lint.cpython-37.pyc b/test/unit/__pycache__/test_lint.cpython-37.pyc
deleted file mode 100644
index 9fb66b38e57bb2b3b9632e11b9bc43eeb233feed..0000000000000000000000000000000000000000
Binary files a/test/unit/__pycache__/test_lint.cpython-37.pyc and /dev/null differ
diff --git a/test/unit/test_ohc.py b/test/unit/test_ohc.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a67a18c8e0935137f2c217e6c1ad79c8c8fb8c2
--- /dev/null
+++ b/test/unit/test_ohc.py
@@ -0,0 +1,43 @@
+import os
+import unittest
+
+import pycodestyle  # formerly known as pep8
+
+import numpy as np
+import diagonals.ohc as ohc
+
+
+class TestOhc(unittest.TestCase):
+
+    def test_weights(self):
+        weights = ohc.get_weights(
+            layers=((0, 2),),
+            mask=np.ones((2, 2), dtype=np.float32),
+            cell_height=np.array(((1., 2.), (4., 3.)), dtype=np.float32),
+            cell_top_depth=np.array(((0., 0.), (1., 2.)), dtype=np.float32)
+        )
+        expected_result = 1020 * 4000 * np.array(((1., 2.), (1., 0.)))
+        self.assertTrue(np.all(weights == expected_result))
+
+    def test_weights_all_land(self):
+        weights = ohc.get_weights(
+            layers=((0, 2),),
+            mask=np.zeros((2, 2), dtype=np.float32),
+            cell_height=np.array(((1., 2.), (4., 3.)), dtype=np.float32),
+            cell_top_depth=np.array(((0., 0.), (1., 2.)), dtype=np.float32)
+        )
+        self.assertTrue(np.all(weights == np.zeros((2, 2), dtype=np.float32)))
+
+    def test_weights_mul_layers(self):
+        weights = ohc.get_weights(
+            layers=((0, 2), (2, 4)),
+            mask=np.ones((2, 2), dtype=np.float32),
+            cell_height=np.array(((1., 2.), (4., 3.)), dtype=np.float32),
+            cell_top_depth=np.array(((0., 0.), (1., 2.)), dtype=np.float32)
+        )
+        print(weights)
+        expected_result = 1020 * 4000 * np.array((
+            ((1., 2.), (1., 0.)),
+            ((0., 0.), (2., 2.))
+        ))
+        self.assertTrue(np.all(weights == expected_result))