diff --git a/VERSION b/VERSION index 600e081493847643ace20b59399a6279bda1b409..be00304020f74e0a67c1537305c38e788a93dfe6 100644 --- a/VERSION +++ b/VERSION @@ -1,2 +1,2 @@ -3.0.0b36 +3.0.0b37 diff --git a/doc/source/conf.py b/doc/source/conf.py index dc6936446aaa417bd93ea8272ce7854ade2ca6a9..cc4c099c886456b2cb831fa7ce586b02f7d978ee 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -64,7 +64,7 @@ copyright = u'2016, BSC-CNS Earth Sciences Department' # The short X.Y version. version = '3.0b' # The full version, including alpha/beta/rc tags. -release = '3.0.0b36' +release = '3.0.0b37' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -187,7 +187,7 @@ html_static_path = ['_static'] #html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'EarthDiagnosticsdoc' +htmlhelp_basename = 'EarthDiagnosticsd' # -- Options for LaTeX output --------------------------------------------- diff --git a/doc/source/config_file.rst b/doc/source/config_file.rst new file mode 100644 index 0000000000000000000000000000000000000000..b4bd369813afe34a9155bd45422413ac5b959272 --- /dev/null +++ b/doc/source/config_file.rst @@ -0,0 +1,233 @@ +Configuration file options +========================== + +This section contains the list and explanation about all the options that are available on the configuration file. Use +it as a reference while preparing your configuration file. Each subsection will refer to the matching section from the +config file. Those subsections explanation may be divided itself for the shake of clarity but this further divisions +have nothing to do with the config file syntax itself. + +DIAGNOSTICS +----------- + +This section contains the general configuration for the diagnostics. The explanation has been divided in two subsections: the first +one will cover all the mandatory options that you must specify in every configuration, while the second will cover all +the optional configurations. + +Mandatory configurations +~~~~~~~~~~~~~~~~~~~~~~~~ + +* SCRATCH_DIR: + Temporary folder for the calculations. Final results will never be stored here. + +* DATA_DIR: + ':' separated list of folders to look for data in. It will look for file in the path $DATA_FOLDER/$EXPID and + $DATA_FOLDER/$DATA_TYPE/$MODEL/$EXPID + +* CON_FILES: + Folder containing mask and mesh files for the dataset. + +* FREQUENCY: + Default data frequency to be used by the diagnostics. Some diagnostics can override this configuration or even + ignore it completely. + + +Optional configurations +~~~~~~~~~~~~~~~~~~~~~~~ + +* DATA_ADAPTOR + This is used to choose the mechanism for storing and retrieving data. Options are CMOR (for our own experiments) or + THREDDS (for anything else). Default value is CMOR + +* DATA_TYPE + Type of the dataset to use. It can be exp, obs or recon. Default is exp. + +* DATA_CONVENTION + Convention to use for file paths and names and variable naming among other things. Can be SPECS, PRIMAVERA or CMIP6. + Default is SPECS. + +* CDFTOOLS_PATH + Path to the folder containing CDFTOOLS executables. By default is empty, so CDFTOOLS binaries must be added to the + system path. + +* MAX_CORES + Maximum number of cores to use. By default the diagnostics will use all cores available to them. It is not + necessary when launching through a scheduler, as Earthdiagnostics can detect how many cores the scheduler has + allocated to it. + +EXPERIMENT +---------- + +This sections contains options related to the experiment's definition or configuration. + +* MODEL + Name of the model used for the experiment. + +* MODEL_VERSION + Model version. Used to get the correct mask and mesh files + +* ATMOS_TIMESTEP + Time between outputs from the atmosphere. This is not the model simulation timestep! + +* OCEAN_TIMESTEP + Time between outputs from the ocean. This is not the model simulation timestep! + +* ATMOS_GRID + Atmospheric grid definition. Will be used as a default target for interpolation diagnostics. + +* INSTITUTE + Institute that made the experiment, observation or reconstruction + +* EXPID + Unique identifier for the experiment + +* NAME + Experiment's name. By default it is the EXPID. + +* STARTDATES + Startdates to run as a space separated list + +* MEMBER + Members to run as a space separated integer list + +* MEMBER_DIGITS + Number of minimum digits to compose the member name. By default it is 1. For example, for member 1 member name + will be fc1 if MEMBER_DIGITS is 1 or fc01 if MEMBER_DIGITS is 2 + +* CHUNK_SIZE + Length of the chunks in months + +* CHUNKS + Number of chunks to run + +* CALENDAR + Calendar to use for date calculation. All calendars supported by Autosubmit are available. Default is 'standard' + +CMOR +---- + +In this section, you can control how will work the cmorization process. All options belonging to this section are optional. + +Cmorization options +~~~~~~~~~~~~~~~~~~~ + +This options control when and which varibales will be cmorized. + +* FORCE + If True, launches the cmorization, regardless of existence of the extracted files or the package containing the + online-cmorized ones. If False, only the non-present chunks will be cmorized. Default value is False + +* FORCE_UNTAR + Unpacks the online-cmorized files regardless of exstience of extracted files. If 'FORCE is True, this parameter has + no effect. If False, only the non-present chunks will be unpacked. Default value is False. + +* FILTER_FILES + Only cmorize original files containing any of the given strings. This is a space separated list. Default is the + empty string. + +* OCEAN_FILES + Boolean flag to activate or no NEMO files cmorization. Default is True. + +* ATMOSPHERE_FILES + Boolean flag to activate or no IFS files cmorization. Default is True. + +* USE_GRIB + Boolean flag to activate or no GRIB files cmorization for the atmosphere. If activated and no GRIB files are present, + it will cmorize using the MMA files instead (as if it was set to False). Default is True. + +* CHUNKS + Space separated list of chunks to be cmorized. If not provided, all chunks are cmorized + +* VARIABLE_LIST + Space separated list of variables to cmorize. Variables must be specified as domain:var_name. If no one is specified, + all the variables will be cmorized + +Grib variables extraction +************************* + +These three options ares used to configure the variables to be CMORized from the grib atmospheric files. +They must be specified using the IFS code in a list separated by comma. + +You can also specify the levels to extract using one of the the following syntaxes: + +* VARIABLE_CODE +* VARIABLE_CODE:LEVEL, +* VARIABLE_CODE:LEVEL_1-LEVEL_2-...-LEVEL_N +* VARIABLE_CODE:MIN_LEVEL:MAX_LEVEL:STEP + +Some examples to clarify it further: +* Variable with code 129 at level 30000: 129:30000 +* Variable with code 129 at levels 30000, 40000 and 60000: 129:30000-40000-60000 +* Variable with code 129 at levels between 30000 and 600000 with 10000 intervals: + 129:30000:60000:10000 equivalent to 129:30000-40000-50000-60000 + +* ATMOS_HOURLY_VARS + Configuration of variables to be extracted in an hourly basis + +* ATMOS_DAILY_VARS + Configuration of variables to be extracted in a daily basis + +* ATMOS_MONTHLY_VARS + Configuration of variables to be extracted in a monthly basis + +Metadata options +~~~~~~~~~~~~~~~~ +All the options in this subsection will serve just to add the given values to the homonymous attributes in the +cmorized files. + +* ASSOCIATED_EXPERIMENT + Default value is 'to be filled' + +* ASSOCIATED_MODEL + Default value is 'to be filled' + +* INITIALIZATION_DESCRIPTION + Default value is 'to be filled' + +* INITIALIZATION_METHOD + Default value is '1' + +* PHYSICS_DESCRIPTION + Default value is 'to be filled' + +* PHYSICS_VERSION + Default value is '1' + +* SOURCE + Default value is 'to be filled' + + +THREDDS +------- + +For now, there is only one option for the THREDDS server configuration. + +* SERVER_URL + THREDDS server URL + + +ALIAS +----- + +This config file section is different from all the others because it does not contain a set of configurations. Instead, +in this section the user can define a set of aliases to be able to launch its most used configurations with ease. To do +this, the user must add an option with named after the desired alias and assign to it the configuration or configurations to launch +when this ALIAS is invoked. See the next example: + +.. code-block:: ini + + ALIAS_NAME = diag,opt1,opt2 diag,opt1new,opt2 + +In this case, the user has defined a new alias 'ALIAS' that can be used launch two times the diagnostic 'diag', +the first with the options 'opt1' and 'opt2' and the second replacing 'opt1' with 'opt1new'. + +In this example, configuring the DIAGS as + +.. code-block:: ini + + DIAGS = ALIAS_NAME + +will be identical to + +.. code-block:: ini + + DIAGS = diag,opt1,opt2 diag,opt1new,opt2 diff --git a/doc/source/diagnostic_list.rst b/doc/source/diagnostic_list.rst index f8392b48d49bc1ee0072fc1481e0f8abccda9f16..08f7016554322b4dd7d8b4035808c853f9aa60f5 100644 --- a/doc/source/diagnostic_list.rst +++ b/doc/source/diagnostic_list.rst @@ -1,3 +1,5 @@ +# coding=utf-8 + Diagnostic list =============== @@ -10,7 +12,6 @@ Remember that diagnostics are specified separated by spaces while options are gi DIAGS = diag1 diag2,option1,option2 diag3 - General ------- diff --git a/doc/source/index.rst b/doc/source/index.rst index c7858dc9f19a612427cff53bb5a6be5e26fc9152..af80a81d839e7576465d416b928020bcaea78874 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -10,6 +10,7 @@ Welcome to Earth Diagnostics's documentation! :maxdepth: 3 tutorial + config_file diagnostic_list tips errors diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index 136c7bc307c5f4a0a2d513c89896392922ef4627..c2b99900d72bea39a2fe0f65ac5a2ccf4811c0f6 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -11,18 +11,27 @@ From now on this tutorial will guide you through all the process from installati Installation ------------ -For now, you only have one option: download the diagnostics directly from BSC-ES's Gitlab: +If you have access to the BSC-ES machines, you don't need to install it. Just use the available module: + +.. code-block:: + + module load EarthDiagnostics + + +In case that you need a custom installation for development or can not use the BSC-ES machines, +install it from BSC-ES GitLab repository: .. code-block:: sh - git clone https://earth.bsc.es/gitlab/es/ocean_diagnostics.git + pip install git+https://earth.bsc.es/gitlab/es/ocean_diagnostics.git You will also need -* CDO version 1.6.9 (other versions could work, but this is the one we use) +* CDO version 1.7.2 (other versions could work, but this is the one we use) * NCO version 4.5.4 or newer -* Python 2.7 or newer (but no 3.x) with Autosubmit, CDO and NCO packages, among others. A virtual environment with all requisites fullfilled is available at /shared/earth/ClimatePrediction/EarthDiagnostics -* Access to CDFTOOLS_3.0 executables for BSC-ES. At this point, those are located at /shared/earth/ClimatePrediction/CDFTOOLS_CMOR/bin. +* Python 2.7 or newer (but no 3.x) with bscearth.utils, CDO and NCO packages, among others. +* Access to CDFTOOLS_3.0 executables for BSC-ES. The source code is available on Github (https://github.com/jvegasbsc/CDFTOOLS) and it can be compiled with CMake + Creating a config file ---------------------- diff --git a/earthdiagnostics/EarthDiagnostics.pdf b/earthdiagnostics/EarthDiagnostics.pdf index f0f1f04f2798326fe17f4e3df41e81e02d64530c..e28893111aa256d7606e968c8851c20450c95f5d 100644 Binary files a/earthdiagnostics/EarthDiagnostics.pdf and b/earthdiagnostics/EarthDiagnostics.pdf differ diff --git a/earthdiagnostics/cmor_tables/default.csv b/earthdiagnostics/cmor_tables/default.csv index 26c06435bce63a944bc4d508d529143314868c2a..811422d90c4e40f3db21315432063028d98ec89c 100644 --- a/earthdiagnostics/cmor_tables/default.csv +++ b/earthdiagnostics/cmor_tables/default.csv @@ -315,7 +315,7 @@ tintpp,tintpp,global_total_integrated_primary_production,global total integrated tnfix,tnfix,global_total_nitrogen_fixation,global total nitrogen fixation,ocnBgchem,,,,,, tdenit,tdenit,total_denitrification,Total denitrification,ocnBgchem,,,,,, inttpp,inttpp,total_primary_production_of_phyto,Total Primary production of phyto,ocnBgchem,,,,,, -inttppnew,inttppnew,new_primary_production_of_phyto,New Primary production of phyto,ocnBgchem,,,,,, +intppnew,intppnew,new_primary_production_of_phyto,New Primary production of phyto,ocnBgchem,,,,,, intppphy,intppphy,vertically_integrated_primary_production_by_nanophy,Vertically integrated primary production by nanophy,ocnBgchem,,,,,, ppphy,ppphy,primary_production_of_nanooplakton,Primary production of nanooplakton,ocnBgchem,,,,,, intpbcal,intpbcal,vertically_integrated_of_calcite_productdic_fluxion,Vertically integrated of calcite productDIC fluxion,ocnBgchem,,,,,, diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 71efc421bf2856b4bc69d80664581dcd7c4d0e96..7ed10db32f6fa885533b3062aa4fda1d0c338363 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -393,6 +393,7 @@ class Cmorizer(object): self.data_manager.send_file(temp, var_cmor.domain, var_cmor.short_name, self.startdate, self.member, frequency=frequency, rename_var=variable, date_str=date_str, region=region, move_old=True, grid=var_cmor.grid, cmorized=True) + Log.info('Variable {0.domain}:{0.short_name} processed', var_cmor) def get_date_str(self, file_path): file_parts = os.path.basename(file_path).split('_') diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index fc0a684938678df831c05c191c17fe5cd776623d..ac26952e87d5dcd8f5e3586ffed092a8f53a1e77 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -431,8 +431,8 @@ class CMORManager(DataManager): if self.config.cmor.chunk_cmorization_requested(chunk): Log.info('Unpacking cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.untar(filepaths, self.cmor_path) - self._correct_paths(startdate) - self.create_links(startdate) + self._correct_paths(startdate, member) + self.create_links(startdate, member) return True return False @@ -450,11 +450,11 @@ class CMORManager(DataManager): filepaths += glob.glob(os.path.join(tar_original_files, 'outputs', file_name)) return filepaths - def _correct_paths(self, startdate): + def _correct_paths(self, startdate, member): self._remove_extra_output_folder() - self._fix_model_as_experiment_error(startdate) + self._fix_model_as_experiment_error(startdate, member) - def _fix_model_as_experiment_error(self, startdate): + def _fix_model_as_experiment_error(self, startdate, member): if self.experiment.experiment_name != self.experiment.model: bad_path = os.path.join(self.cmor_path, self.experiment.institute, self.experiment.model, self.experiment.model) @@ -462,6 +462,10 @@ class CMORManager(DataManager): for (dirpath, dirnames, filenames) in os.walk(bad_path, False): for filename in filenames: + if '_S{0}_'.format(startdate) in filename: + continue + if '_r{0}i1p1_'.format(member+1) in filename: + continue filepath = os.path.join(dirpath, filename) good = filepath.replace('_{0}_output_'.format(self.experiment.model), '_{0}_{1}_S{2}_'.format(self.experiment.model, @@ -480,25 +484,14 @@ class CMORManager(DataManager): bad_path = os.path.join(self.cmor_path, 'output') if os.path.exists(bad_path): Log.debug('Moving CMOR files out of the output folder') - CMORManager.copytree(bad_path, self.cmor_path) - shutil.rmtree(bad_path) + Utils.move_tree(bad_path, self.cmor_path) Log.debug('Done') - @staticmethod - def copytree(source, destiny): - if not os.path.exists(destiny): - os.makedirs(destiny) - shutil.copystat(source, destiny) - lst = os.listdir(source) - for item in lst: - item_source = os.path.join(source, item) - item_destiny = os.path.join(destiny, item) - if os.path.isdir(item_source): - CMORManager.copytree(item_source, item_destiny) - else: - shutil.copy2(item_source, item_destiny) - - def create_links(self, startdate): + def create_links(self, startdate, member=None): + if member: + member_str = 'r{0}i1p1'.format(member+1) + else: + member_str = None Log.info('Creating links for CMOR files ({0})', startdate) path = self._get_startdate_path(startdate) for freq in os.listdir(path): @@ -506,6 +499,8 @@ class CMORManager(DataManager): for domain in os.listdir(os.path.join(path, freq)): for var in os.listdir(os.path.join(path, freq, domain)): for member in os.listdir(os.path.join(path, freq, domain, var)): + if member_str != member: + continue for name in os.listdir(os.path.join(path, freq, domain, var, member)): filepath = os.path.join(path, freq, domain, var, member, name) if os.path.isfile(filepath): diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index f5a84ec6b030e8d0a3536f6e8c4879843c9e76c2..d6ea1383b12b0b80328b86869660faa20b213bce 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -246,6 +246,25 @@ class Utils(object): if os.path.isfile(path): os.remove(path) + @staticmethod + def copy_tree(source, destiny): + if not os.path.exists(destiny): + os.makedirs(destiny) + shutil.copystat(source, destiny) + lst = os.listdir(source) + for item in lst: + item_source = os.path.join(source, item) + item_destiny = os.path.join(destiny, item) + if os.path.isdir(item_source): + Utils.copy_tree(item_source, item_destiny) + else: + shutil.copy2(item_source, item_destiny) + + @staticmethod + def move_tree(source, destiny): + Utils.copy_tree(source, destiny) + shutil.rmtree(source) + @staticmethod def get_file_hash(filepath): """ diff --git a/earthdiagnostics/variable_alias/cmip6.csv b/earthdiagnostics/variable_alias/cmip6.csv index e996633ec04d5e21a7fdd0f7001c210caaecce47..d5860676f88a82f02a7051f261a52e4e2c8de506 100644 --- a/earthdiagnostics/variable_alias/cmip6.csv +++ b/earthdiagnostics/variable_alias/cmip6.csv @@ -34,8 +34,7 @@ c-export,c-export,, tintpp,tintpp,, tnfix,tnfix,, tdenit,tdenit,, -inttpp,inttpp,, -inttppnew,inttppnew,, +intppnew:inttppnew,intppnew,, inttpbfe,pbfe,, intdic,intdic,, o2min,o2min,, @@ -45,7 +44,7 @@ intppphy,intppphy,, intppphy2,intppdiat,, ppphy ,ppphy ,, ppphy2 ,pdi,, -intpp,intpp,, +intpp:inttpp,intpp,, intpbfe,intpbfe,, intpbsi,intpbsi,, intpbcal,intpbcal,, diff --git a/launch_diags.sh b/launch_diags.sh index cdeaff799e92e8363c03ca386781417ffc1b43e1..bbce66f32d8831fcd94074842312462e1c02089e 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -15,10 +15,10 @@ PATH_TO_VIRTUALENV=~jvegas/virtualenvs/diags/bin module purge module load NCO/4.5.4-foss-2015a module load CDO/1.7.2-foss-2015a -module load CDFTOOLS/3.0a2-foss-2015a +module load CDFTOOLS/3.0a5-foss-2015a source ${PATH_TO_VIRTUALENV}/activate export PYTHONPATH=${PATH_TO_DIAGNOSTICS}:${PYTHONPATH} cd ${PATH_TO_DIAGNOSTICS}/earthdiagnostics/ -./earthdiags.py -lc DEBUG -f ${PATH_TO_CONF_FILE} +./earthdiags.py -f ${PATH_TO_CONF_FILE} diff --git a/test/unit/test_areamoc.py b/test/unit/test_areamoc.py index fb23ff38dca894d3822aa7db651c88418c8c2a2b..15d4bcd750f752c2989c7f259fb45b201f1d3d58 100644 --- a/test/unit/test_areamoc.py +++ b/test/unit/test_areamoc.py @@ -39,4 +39,5 @@ class TestAreaMoc(TestCase): AreaMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.psi), 'Area MOC Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0') + self.assertEquals(str(self.psi), 'Area MOC Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0 ' + 'Basin: Antarctic_Ocean')