diff --git a/.codacy.yml b/.codacy.yml new file mode 100644 index 0000000000000000000000000000000000000000..445a627b511319070406c4ab67202651e780aea8 --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,25 @@ +# codacy configuration file + +--- + +engines: + coverage: + enabled: true + exclude_paths: [ + 'tests', + ] + metrics: + enabled: true + duplication: + enabled: true + prospector: + enabled: true + pylint: + enabled: true + python_version: 3 + +exclude_paths: [ + 'doc/**', + 'test/**', + 'earthdiagnostics/cmor_tables/**', +] diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000000000000000000000000000000000..6c77e847caf3215fbe9907faf6eda073d963b626 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +branch = True +source = earthdiagnostics + +[html] +title = Coverage report for EarthDiagnostics + diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..2a7a00f3dc536f4c4c3827bea45539afd498b435 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,26 @@ +# EditorConfig is awesome: http://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +# Set default charset +charset = utf-8 + +# Matches multiple files with brace expansion notation + +# 4 space indentation +[*.{py,java,r,R}] +indent_size = 4 + +# 2 space indentation +[*.{js,json,yml,html,xml,ncl}] +indent_size = 2 + +[*.{md,Rmd}] +trim_trailing_whitespace = false + diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..fdc3755229b9f14f9814b69c9093ca6ade6c2064 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,19 @@ +before_script: + - export GIT_SSL_NO_VERIFY=1 + - git submodule sync --recursive + - git submodule update --init --recursive + - export PATH="$HOME/miniconda2/bin:$PATH" + +test_python2: + script: + - conda env update -f environment.yml -n earthdiagnostics2 python=2 + - source activate earthdiagnostics + - coverage run -m unittest discover + +test_python3: + script: + - conda env update -f environment.yml -n earthdiagnostics3 python=3 + - source activate earthdiagnostics + - coverage run -m unittest discover + - coverage xml + - python-codacy-coverage -r coverage.xml diff --git a/.prospector.yml b/.prospector.yml new file mode 100644 index 0000000000000000000000000000000000000000..b9c6fa952133de951c9005edb1b2f01d9bcb851b --- /dev/null +++ b/.prospector.yml @@ -0,0 +1,36 @@ +# prospector configuration file + +--- + +output-format: grouped + +strictness: veryhigh +doc-warnings: true +test-warnings: true +member-warnings: false + +pyroma: + run: true + +pydocroma: + run: true + +pep8: + disable: [ + E501, # Line-length, already controlled by pylint + ] + +pep257: + run: true + # see http://pep257.readthedocs.io/en/latest/error_codes.html + disable: [ + # For short descriptions it makes sense not to end with a period: + D400, # First line should end with a period + # Disable because not part of PEP257 official convention: + D203, # 1 blank line required before class docstring + D212, # Multi-line docstring summary should start at the first line + D213, # Multi-line docstring summary should start at the second line + D404, # First word of the docstring should not be This + D107, # We are using numpy style and constructor should be documented in class docstring + D105, # Docstring in magic methods should not be required: we all now what they are for + ] diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..db7741b994ec5e5720e803d9e73b639a55435ff8 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,407 @@ +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Allow optimization of some AST trees. This will activate a peephole AST +# optimizer, which will apply various small optimizations. For instance, it can +# be used to obtain the result of joining multiple strings with the addition +# operator. Joining a lot of strings can lead to a maximum recursion error in +# Pylint and this flag can prevent that. It has one side effect, the resulting +# AST will be different than the one from reality. This option is deprecated +# and it will be removed in Pylint 2.0. +optimize-ast=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". This option is deprecated +# and it will be removed in Pylint 2.0. +files-output=no + +# Tells whether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=120 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma,dict-separator + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format=LF + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,FIX-ME,XXX,TODO + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,future.builtins + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_,logger + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=yes + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for function names +function-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for variable names +variable-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for attribute names +attr-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for argument names +argument-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for method names +method-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + + +[ELIF] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec,optparse + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/README b/README deleted file mode 100644 index f247d009952abfbc0a536addd6730abcd6f5f8a6..0000000000000000000000000000000000000000 --- a/README +++ /dev/null @@ -1,12 +0,0 @@ -This tool is a set of diagnostics used at BSC-ES department for NEMO and EC-EARTH models postprocessing. -They are based on CDO, NCO and CDFTOOLS 3.0. For CDFTOOLS, a custom build is required. - -FULL DOCUMENTATION AND HOW TOs -============================== - -Check the Earth Diagnostics documentation in PDF format in EarthDiagnostics.pdf available also in this folder. - -CONTACT -======= - -For any doubts or suggestions, contact javier.vegas@bsc.es \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db2b71e88c735ffbd429b0e2ca9833d2e0355fe3 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/206d0f75e8c64742a4fb4a0bd3015565)](https://www.codacy.com/app/BSC-Earth/earthdiagnostics?utm_source=earth.bsc.es&utm_medium=referral&utm_content=gitlab/es/earthdiagnostics&utm_campaign=Badge_Grade) +[![Codacy Badge](https://api.codacy.com/project/badge/Coverage/206d0f75e8c64742a4fb4a0bd3015565)](https://www.codacy.com/app/BSC-Earth/earthdiagnostics?utm_source=earth.bsc.es&utm_medium=referral&utm_content=gitlab/es/earthdiagnostics&utm_campaign=Badge_Coverage) + +This tool is a set of diagnostics used at BSC-ES department for NEMO and EC-EARTH models postprocessing. +They are based on CDO, NCO and CDFTOOLS 3.0. For CDFTOOLS, a custom build is required. + +FULL DOCUMENTATION AND HOW TOs +============================== + +Latest version: + +[![Readthedocs](https://readthedocs.org/projects/pip/badge/)](http://earthdiagnostics.readthedocs.io/en/latest/) + +Latest release: + +[![Readthedocs](https://readthedocs.org/projects/pip/badge/?version=stable)](http://earthdiagnostics.readthedocs.io/en/stable/) + +CONTACT +======= + +For any doubts or suggestions, contact javier.vegas[at]bsc.es diff --git a/VERSION b/VERSION index d1a884396e844471a12b1b30a7f60150aaac1a5b..2638df168bc2aebcad08a8bc38589754868cc234 100644 --- a/VERSION +++ b/VERSION @@ -1,2 +1,2 @@ -3.0.0rc2 +3.0.0rc3 diff --git a/bin/earthdiags b/bin/earthdiags index 6ac2607276ec2558b7179ce10e942048f989218c..174528e3210ded69220e9aae3d5e05a614f09d90 100644 --- a/bin/earthdiags +++ b/bin/earthdiags @@ -22,5 +22,6 @@ def main(): os._exit(1) os._exit(0) + if __name__ == "__main__": main() diff --git a/diags.conf b/diags.conf index d6504e3d592003ced64cc7594e42b4e3831369c5..6ea8d8c74fe2dca09fb218993b74041ee3c3292a 100644 --- a/diags.conf +++ b/diags.conf @@ -1,44 +1,44 @@ [DIAGNOSTICS] # Data adaptor type: CMOR (for our experiments), THREDDS (for other experiments) -DATA_ADAPTOR = OBSRECON +DATA_ADAPTOR = CMOR # Path to the folder where you want to create the temporary files SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive # Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) -DATA_TYPE = recon +DATA_TYPE = exp # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 -DATA_CONVENTION = SPECS +DATA_CONVENTION = PRIMAVERA # Path to NEMO's mask and grid files needed for CDFTools CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -#DIAGS = discretize,atmos,sfcWind,,0,40 -DIAGS = interpcdo,ocean,tas,r240x121,bilinear,False,ecmwf,False -# DIAGS = monmean,ocean,uovmean0.0-30.0m,day monmean,ocean,vovmean0.0-30.0m,day +DIAGS = regmean,ocean,thetao +# DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. -FREQUENCY = weekly +FREQUENCY = mon # Path to CDFTOOLS binaries CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir RESTORE_MESHES = False -# Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available)z -MAX_CORES = 2 +# Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available) +MAX_CORES = 1 [CMOR] # If true, recreates CMOR files regardless of presence. Default = False FORCE = False # If true, CMORizes ocean files. Default = True -OCEAN_FILES = True +OCEAN_FILES = False FILTER_FILES = # If true, CMORizes atmosphere files. Default = True ATMOSPHERE_FILES = False # You can specify the variable to cmorize, in the way domain:var domain:var2 domain2:var VARIABLE_LIST = +CHUNK_LIST = 1 # Variables to be CMORized from the grib atmospheric files, separated by comma. # You can also specify the levels to extract using the following syntax @@ -63,21 +63,25 @@ ATMOS_MONTHLY_VARS = 167, 201, 202, 165, 166, 151, 144, 228, 205, 182, 164, 146, # PHYSICS_VERSION = 1 # PHYSICS_DESCRIPTION = # ASSOCIATED_MODEL = -# SOURCE = 'EC-Earthv2.3.0, ocean: Nemo3.1, ifs31r1, lim2 +# SOURCE = EC-Earthv2.3.0, ocean: Nemo3.1, ifs31r1, lim2 +VERSION = v20170705 +ACTIVITY = CMIP +#ACTIVITY = CMIP_ece2cmor [THREDDS] SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard -INSTITUTE = ecmwf -MODEL = erainterim +INSTITUTE = EC-Earth-Consortium +MODEL = EC-Earth3-HR +NAME = historical # Model version: Available versions -MODEL_VERSION = +MODEL_VERSION =Ec3.2_O25L75 # Atmospheric output timestep in hours -ATMOS_TIMESTEP = 6 +ATMOS_TIMESTEP = 3 # Ocean output timestep in hours -OCEAN_TIMESTEP = 6 +OCEAN_TIMESTEP = 3 # For those who use Autosubmit, this will be easy # EXPID is the unique identifier of the experiment. @@ -87,16 +91,18 @@ OCEAN_TIMESTEP = 6 # if 2, fc00 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment -EXPID = testing_erainterim -#STARTDATES = 199[3-9]0[1-9]01 199[3-9]1[0-2]01 200[0-9]0[1-9]01 200[0-9]1[0-2]01 201[0-5]0[1-9]01 201[0-5]1[0-2]01 -#STARTDATES = 19840101 19850101 -STARTDATES = 19960104 -MEMBERS = 0 +EXPID = a0n8 +STARTDATES = 19900101 +MEMBERS = fc0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 CHUNKS = 1 # CHUNKS = 1 +[REPORT] +MAXIMUM_PRIORITY = 7 +PATH = $HOME/reports/a0n8 + # This ALIAS section is a bit different # Inside this, you can provide alias for frequent diagnostics calls. @@ -110,17 +116,17 @@ STC = mocarea,0,25,0,200,Pac mocarea,-25,0,0,200,Pac mocarea,0,25,0,200,Atl moca HEAT_SAL_MXL = mlotstsc mlotsthc LMSALC = vertmeanmeters,so,300,5400 USALC = vertmeanmeters,so,0,300 -OHC = ohc,glob,0,1,10 +OHC = ohc,glob,0,0,2000 XOHC = ohc,glob,1,0,0 -LOHC = ohc,glob,0,23,46 -MOHC = ohc,glob,0,18,22 -UOHC = ohc,glob,0,1,17 +LOHC = ohc,glob,0,700,2000 +MOHC = ohc,glob,0,300,700 +UOHC = ohc,glob,0,0,300 OHC_SPECIFIED_LAYER = ohclayer,0,300 ohclayer,300,800 3DTEMP = interp,thetao 3DSAL = interp,so -TSEC_AVE190-220E =avgsection,ocean,thetao,190,220,-90,90,regular -SSEC_AVE190-220E =avgsection,ocean,so,190,220,-90,90,regular -VERT_SSECTIONS = cutsection,so,Z,0 cutsection,so,Z,45 cutsection,so,Z,-45 cutsection,so,M,-30 cutsection,so,M,80 +TSEC_AVE190-220E =avgsection,thetao,190,220,-90,90 +SSEC_AVE190-220E =avgsection,so,190,220,-90,90 +VERT_SSECTIONS = cutsection,so,Z,0 cutsection,so,Z,45 cutsection,so,Z,-45 cutsection,so,M,-30 cutsection,so,M,180 cutsection,so,M,80 VERT_TSECTIONS = cutsection,thetao,Z,0 cutsection,thetao,Z,45 cutsection,thetao,Z,-45 cutsection,thetao,M,-30 cutsection,thetao,M,180 cutsection,thetao,M,80 SIASIESIV = siasiesiv,glob diff --git a/doc/source/codedoc/earthdiagnostics.rst b/doc/source/codedoc/earthdiagnostics.rst index 0bd7d0319695a2f2fb0c2e18fcdcd72d70a1258a..77aa310ed68e695e7295ebf97d51d25348027059 100644 --- a/doc/source/codedoc/earthdiagnostics.rst +++ b/doc/source/codedoc/earthdiagnostics.rst @@ -43,6 +43,13 @@ earthdiagnostics.constants :inherited-members: :members: +earthdiagnostics.datafile +------------------------- +.. automodule:: earthdiagnostics.datafile + :show-inheritance: + :inherited-members: + :members: + earthdiagnostics.datamanager ---------------------------- .. automodule:: earthdiagnostics.datamanager @@ -50,7 +57,6 @@ earthdiagnostics.datamanager :inherited-members: :members: - earthdiagnostics.diagnostic --------------------------- .. automodule:: earthdiagnostics.diagnostic @@ -65,9 +71,44 @@ earthdiagnostics.earthdiags :inherited-members: :members: -earthdiagnostics.parser +earthdiagnostics.frequency +-------------------------- +.. automodule:: earthdiagnostics.frequency + :show-inheritance: + :inherited-members: + :members: + +earthdiagnostics.modellingrealm +------------------------------- +.. automodule:: earthdiagnostics.modellingrealm + :show-inheritance: + :inherited-members: + :members: + +earthdiagnostics.obsreconmanager +-------------------------------- +.. automodule:: earthdiagnostics.obsreconmanager + :show-inheritance: + :inherited-members: + :members: + +earthdiagnostics.publisher +-------------------------- +.. automodule:: earthdiagnostics.publisher + :show-inheritance: + :inherited-members: + :members: + +earthdiagnostics.singleton +-------------------------- +.. automodule:: earthdiagnostics.singleton + :show-inheritance: + :inherited-members: + :members: + +earthdiagnostics.threddsmanager ----------------------- -.. automodule:: earthdiagnostics.parser +.. automodule:: earthdiagnostics.threddsmanager :show-inheritance: :inherited-members: :members: @@ -85,3 +126,18 @@ earthdiagnostics.variable :show-inheritance: :inherited-members: :members: + +earthdiagnostics.variable_type +------------------------------ +.. automodule:: earthdiagnostics.variable_type + :show-inheritance: + :inherited-members: + :members: + + +earthdiagnostics.workmanager +---------------------------- +.. automodule:: earthdiagnostics.work_manager + :show-inheritance: + :inherited-members: + :members: diff --git a/doc/source/conf.py b/doc/source/conf.py index 81114e45817e1bdd9b631143c94ce4be611eefab..436019f12521accc66aa512561525c49c7df0434 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -20,12 +20,12 @@ import sys # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../..')) -print os.path.abspath('../..') +print(os.path.abspath('../..')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -39,6 +39,7 @@ extensions = [ 'sphinx.ext.pngmath', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', + 'sphinx.ext.napoleon', ] # Add any paths that contain templates here, relative to this directory. @@ -62,9 +63,9 @@ copyright = u'2016, BSC-CNS Earth Sciences Department' # built documents.source ~/vi # # The short X.Y version. -version = '3.0b' +version = '3.0rc' # The full version, including alpha/beta/rc tags. -release = '3.0.0b57' +release = '3.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/earthdiagnostics/CDFTOOLS_meteofrance.namlist b/earthdiagnostics/CDFTOOLS_meteofrance.namlist new file mode 100644 index 0000000000000000000000000000000000000000..8f747650cd51ec119ffe48e072dad4ab4ef9bc38 --- /dev/null +++ b/earthdiagnostics/CDFTOOLS_meteofrance.namlist @@ -0,0 +1,211 @@ + ! Thu Jun 30 16:19:27 2016 + ! Namelist automatically generated by PrintCdfNames + ! Do not edit without changing its name ... + ! ------------------------------------------ + &NAMDIM + CN_X = "i" + , + CN_Y = "j" + , + CN_Z = "lev" + , + CN_T = "time" + + / + &NAMDIMVAR + CN_VLON2D = "lon" + , + CN_VLAT2D = "lat" + , + CN_VDEPTHT = "lev" + , + CN_VDEPTHU = "lev" + , + CN_VDEPTHV = "lev" + , + CN_VDEPTHW = "lev" + , + CN_VTIMEC = "time" + , + CN_MISSING_VALUE = "_FillValue" + + / + &NAMMETRICS + CN_VE1T = "e1t" + , + CN_VE1U = "e1u" + , + CN_VE1V = "e1v" + , + CN_VE1F = "e1f" + , + CN_VE2T = "e2t" + , + CN_VE2U = "e2u" + , + CN_VE2V = "e2v" + , + CN_VE2F = "e2f" + , + CN_VE3T = "e3t" + , + CN_VE3W = "e3w" + , + CN_VFF = "ff" + , + CN_GLAMT = "glamt" + , + CN_GLAMU = "glamu" + , + CN_GLAMV = "glamv" + , + CN_GLAMF = "glamf" + , + CN_GPHIT = "gphit" + , + CN_GPHIU = "gphiu" + , + CN_GPHIV = "gphiv" + , + CN_GPHIF = "gphif" + , + CN_GDEPT = "gdept" + , + CN_GDEPW = "gdepw" + , + CN_HDEPT = "hdept" + , + CN_HDEPW = "hdepw" + + / + &NAMVARS + CN_VOTEMPER = "thetao" + , + CN_VOSALINE = "so" + , + CN_VOZOCRTX = "uo" + , + CN_VOMECRTY = "vo" + , + CN_VOMEEIVV = "vomeeivv" + , + CN_VOVECRTZ = "vovecrtz" + , + CN_SOSSHEIG = "sossheig" + , + CN_SOMXL010 = "mlotst" + , + CN_SOMXLT02 = "somxlt02" + , + CN_SOHEFLDO = "sohefldo" + , + CN_SOLHFLUP = "solhflup" + , + CN_SOSBHFUP = "sosbhfup" + , + CN_SOLWFLDO = "solwfldo" + , + CN_SOSHFLDO = "soshfldo" + , + CN_SOWAFLUP = "sowaflup" + , + CN_SOWAFLCD = "sowaflcd" + , + CN_SOWAFLDP = "sowafldp" + , + CN_IOWAFLUP = "iowaflup" + , + CN_ZOMSFATL = "zomsfatl" + , + CN_ZOMSFGLO = "zomsfglo" + , + CN_ZOMSFPAC = "zomsfpac" + , + CN_ZOMSFINP = "zomsfinp" + , + CN_ZOMSFIND = "zomsfind" + , + CN_ZOISOATL = "zoisoatl" + , + CN_ZOISOGLO = "zoisoglo" + , + CN_ZOISOPAC = "zoisopac" + , + CN_ZOISOINP = "zoisoinp" + , + CN_ZOISOIND = "zoisoind" + , + CN_VOZOUT = "vozout" + , + CN_VOMEVT = "vomevt" + , + CN_VOZOUS = "vozous" + , + CN_VOMEVS = "vomevs" + , + CN_SOZOUT = "sozout" + , + CN_SOMEVT = "somevt" + , + CN_SOZOUS = "sozous" + , + CN_SOMEVS = "somevs" + , + CN_SOZOUTRP = "sozoutrp" + , + CN_SOMEVTRP = "somevtrp" + , + CN_SOICECOV = "soicecov" + , + CN_VOSIGMA0 = "vosigma0" + , + CN_VOSIGMAI = "vosigmai" + , + CN_VOSIGNTR = "vosigntr" + , + CN_VODEPISO = "vodepiso" + , + CN_ISOTHICK = "isothick" + , + CN_IICETHIC = "iicethic" + , + CN_ILEADFRA = "ileadfra" + , + CN_INVCFC = "INVCFC" + , + CN_CFC11 = "CFC11" + , + CN_PENDEP = "pendep" + + / + &NAMBATHY + CN_FBATHYMET = "bathy_meter.nc" + , + CN_FBATHYLEV = "bathy_level.nc" + , + CN_BATHYMET = "Bathymetry" + , + CN_BATHYLEV = "bathy_level" + , + CN_MBATHY = "mbathy" + + / + ! Namelist entry namsqdvar needs manual formating before + ! it can be used as input : put variables names in between ' + ! and separate variables by , + &NAMSQDVAR + NN_SQDVAR = 4, + CN_SQDVAR = "vozocrtx vomecrty vovecrtz sossheig" , + / + &NAMMESHMASK + CN_FZGR = "mesh_zgr.nc" + , + CN_FHGR = "mesh_hgr.nc" + , + CN_FMSK = "mask.nc" + , + CN_FCOO = "coordinates.nc" + , + CN_FBASINS = "new_maskglo.nc", + + / diff --git a/earthdiagnostics/__init__.py b/earthdiagnostics/__init__.py index e442aee3ee9987d5c764bebba056f9cb5ab5c68c..b6f6a1bd4731d745dbafa0c9bf8877cea4f27458 100644 --- a/earthdiagnostics/__init__.py +++ b/earthdiagnostics/__init__.py @@ -1,11 +1,8 @@ # coding=utf-8 -""" -Module containing the Earth Diagnostics. -""" +"""Module containing the Earth Diagnostics.""" import os from earthdiagnostics.cdftools import CDFTools cdftools = CDFTools() DEVNULL = open(os.devnull, 'wb') - diff --git a/earthdiagnostics/box.py b/earthdiagnostics/box.py index ea7ccc15feee5661b4cdb0e54ab965fbaf10f33a..4d68bb1018cf17810814f0a1215945ddf0a5a347 100644 --- a/earthdiagnostics/box.py +++ b/earthdiagnostics/box.py @@ -1,9 +1,20 @@ # coding=utf-8 +"""Module to manage 3D space restrictions""" + + class Box(object): """ - Represents a box in the 3D space. Also allows easy conversion from the coordinate values to significant string + Represents a box in the 3D space. + + Also allows easy conversion from the coordinate values to significant string representations + + Parameters + ---------- + depth_in_meters: bool, optional + If True, depth is given in meters. If False, it correspond to levels """ + def __init__(self, depth_in_meters=False): self.depth_in_meters = depth_in_meters """ @@ -37,6 +48,7 @@ class Box(object): def max_lat(self): """ Maximum latitude + :rtype: float """ return self._max_lat @@ -50,8 +62,9 @@ class Box(object): @property def min_lat(self): """ - Minimum latitude - :rtype: float + Minimum latitude + + :rtype: float """ return self._min_lat @@ -64,8 +77,9 @@ class Box(object): @property def max_lon(self): """ - Maximum longitude - :rtype: float + Maximum longitude + + :rtype: float """ return self._max_lon @@ -78,8 +92,9 @@ class Box(object): @property def min_lon(self): """ - Minimum longitude - :rtype: float + Minimum longitude + + :rtype: float """ return self._min_lon @@ -88,11 +103,13 @@ class Box(object): if value >= 360 or value <= -360: raise ValueError('{0} is not a valid longitude. Must be between -360 and 360'.format(value)) self._min_lon = value - + def get_lat_str(self): """ - Gets a string representation of the latitude in the format XX{N/S}. + Get a string representation of the latitude in the format XX{N/S}. + If min_lat is different from max_lat, it concatenates the two values + :return: string representation for latitude :rtype: str """ @@ -102,22 +119,24 @@ class Box(object): direction = 'S' else: direction = 'N' - + string = str(abs(self.min_lat)) + direction - + if self.max_lat != self.min_lat: if self.max_lat < 0: direction = 'S' else: direction = 'N' string += str(abs(self.max_lat)) + direction - + return string def get_lon_str(self): """ - Gets a string representation of the longitude in the format XX{E/W}. + Get a string representation of the longitude in the format XX{E/W}. + If min_lon is different from max_lon, it concatenates the two values + :return: string representation for longitude :rtype: str """ @@ -140,8 +159,11 @@ class Box(object): def get_depth_str(self): """ - Gets a string representation of depth. For depth expressed in meters, it adds th character 'm' to the end + Get a string representation of depth. + + For depth expressed in meters, it adds the character 'm' to the end If min_depth is different from max_depth, it concatenates the two values + :return: string representation for depth :rtype: str """ @@ -158,5 +180,3 @@ class Box(object): else: string = '{0:d}{1}'.format(int(abs(self.max_depth)), suffix) return string - - diff --git a/earthdiagnostics/cdftools.py b/earthdiagnostics/cdftools.py index ad0355c38e811b3051e939d51e7b86629d0ddded..f720696e20bea554d24e2ad9cb1dce149648d3f0 100644 --- a/earthdiagnostics/cdftools.py +++ b/earthdiagnostics/cdftools.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""CDFTOOLS interface""" import os import six @@ -20,45 +21,44 @@ class CDFTools(object): self.data_convention = '' # noinspection PyShadowingBuiltins - def run(self, command, input, output=None, options=None, log_level=Log.INFO, input_option=None): + def run(self, command, input_file, output_file=None, options=None, log_level=Log.INFO, input_option=None): """ - Runs one of the CDFTools + Run one of the CDFTools :param command: executable to run :type command: str | iterable - :param input: input file - :type input: str - :param output: output file. Not all tools support this parameter + :param input_file: input file + :type input_file: str + :param output_file: output file. Not all tools support this parameter :type options: str :param options: options for the tool. - :type options: str | [str] | Tuple[str] | NoneType + :type options: str | [str] | Tuple[str] | None :param log_level: log level at which the output of the cdftool command will be added :type log_level: int :param input_option: option to add before input file :type input_option: str """ - line = [os.path.join(self.path, command)] self._check_command_existence(line[0]) if input_option: line.append(input_option) - self._check_input(command, input, line) + self._check_input(command, input_file, line) if options: if isinstance(options, six.string_types): options = options.split() for option in options: line.append(str(option)) - if output: - if input == output: + if output_file: + if input_file == output_file: raise ValueError('Input and output file can not be the same on CDFTools') line.append('-o') - line.append(output) + line.append(output_file) Log.debug('Executing {0}', ' '.join(line)) shell_output = Utils.execute_shell_command(line, log_level) - self._check_output_was_created(line, output) + self._check_output_was_created(line, output_file) return shell_output @staticmethod @@ -69,30 +69,30 @@ class CDFTools(object): # noinspection PyShadowingBuiltins @staticmethod - def _check_input(command, input, line): - if input: - if isinstance(input, six.string_types): - line.append(input) - if not os.path.isfile(input): - raise ValueError('Error executing {0}\n Input file {1} file does not exist', command, input) + def _check_input(command, input_file, line): + if input_file: + if isinstance(input_file, six.string_types): + line.append(input_file) + if not os.path.isfile(input_file): + raise ValueError('Error executing {0}\n Input file {1} file does not exist', command, input_file) else: - for element in input: + for element in input_file: line.append(element) if not os.path.isfile(element): raise ValueError('Error executing {0}\n Input file {1} file does not exist', command, element) - # noinspection PyMethodMayBeStatic - def is_exe(self, fpath): + @staticmethod + def _is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) def _check_command_existence(self, command): if self.path: - if self.is_exe(os.path.join(self.path, command)): + if self._is_exe(os.path.join(self.path, command)): return else: for path in os.environ["PATH"].split(os.pathsep): path = path.strip('"') exe_file = os.path.join(path, command) - if self.is_exe(exe_file): + if self._is_exe(exe_file): return raise ValueError('Error executing {0}\n Command does not exist in {1}'.format(command, self.path)) diff --git a/earthdiagnostics/cmor_tables/meteofrance.csv b/earthdiagnostics/cmor_tables/meteofrance.csv new file mode 100644 index 0000000000000000000000000000000000000000..086e2ec0d7f084954d539a8e6bb63a3439f6c58e --- /dev/null +++ b/earthdiagnostics/cmor_tables/meteofrance.csv @@ -0,0 +1,3 @@ +Variable,Shortname,Name,Long name,Domain,Basin,Units,Valid min,Valid max,Grid,Tables +iiceconc:siconc:soicecov:ileadfra:ci,soicecov,sea_ice_area_fraction,Sea Ice Area Fraction,seaIce,,%,,,, +iicethic:sithic,sogsit__,sea_ice_thickness,Sea Ice Thickness,seaIce,,m,,,, diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 37013ae5004dd859c6d0fe30cbb02cb820428030..1e2d718bddf49812b2a010c1701eca705c08b3ec 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -1,7 +1,7 @@ # coding=utf-8 +"""Cmorization classes""" import glob import os -import pygrib import shutil import uuid from datetime import datetime @@ -20,12 +20,11 @@ class Cmorizer(object): """ Class to manage CMORization - :param data_manager: experiment's data manager - :type data_manager: CMORManager - :param startdate: startdate to cmorize - :type startdate: str - :param member: member to cmorize - :type member: int + Parameters + ---------- + data_manager: DataManager + startdate: str + member: int """ @@ -65,10 +64,7 @@ class Cmorizer(object): 'tbnds': 'bnds', 'nav_lat': self.lat_name, 'nav_lon': self.lon_name, 'x': 'i', 'y': 'j'} def cmorize_ocean(self): - """ - CMORizes ocean files from MMO files - :return: - """ + """Cmorize ocean files from MMO files""" if not self.cmor.ocean: Log.info('Skipping ocean cmorization due to configuration') return @@ -88,9 +84,9 @@ class Cmorizer(object): count = 1 for tarfile in tar_files: - if not self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), (ModelingRealms.ocean, - ModelingRealms.seaIce, - ModelingRealms.ocnBgchem)): + if not self._cmorization_required(self._get_chunk(os.path.basename(tarfile)), (ModelingRealms.ocean, + ModelingRealms.seaIce, + ModelingRealms.ocnBgchem)): Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) count += 1 continue @@ -134,7 +130,7 @@ class Cmorizer(object): for cmor_var in ("hfss", 'hfls')] total_seconds = (self.experiment.atmos_timestep * 3600) for filename in glob.glob(os.path.join(self.cmor_scratch, '*.nc')): - handler = Utils.openCdf(filename) + handler = Utils.open_cdf(filename) for varname in handler.variables.keys(): cmor_var = self.data_manager.variable_list.get_variable(varname, True) @@ -190,16 +186,13 @@ class Cmorizer(object): shutil.move(merged_sh, os.path.join(self.cmor_scratch, 'MMASH_1m_{0[0]}_{0[1]}.nc'.format(tar_startdate))) def cmorize_atmos(self): - """ - CMORizes atmospheric data, from grib or MMA files - :return: - """ + """Cmorize atmospheric data, from grib or MMA files""" if not self.cmor.atmosphere: Log.info('Skipping atmosphere cmorization due to configuration') return Log.info('\nCMORizing atmosphere\n') - if self.cmor.use_grib and self.gribfiles_available(): + if self.cmor.use_grib and self._gribfiles_available(): self._cmorize_grib_files() else: self._cmorize_mma_files() @@ -211,7 +204,7 @@ class Cmorizer(object): if len(tar_files) == 0: Log.error('MMA files not found in {0}'.format(self.original_files_path)) for tarfile in tar_files: - if not self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), (ModelingRealms.atmos,)): + if not self._cmorization_required(self._get_chunk(os.path.basename(tarfile)), (ModelingRealms.atmos,)): Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) count += 1 continue @@ -231,10 +224,10 @@ class Cmorizer(object): chunk = 1 chunk_start = parse_date(self.startdate) - while os.path.exists(self.get_original_grib_path(chunk_start, 'GG')) or \ - os.path.exists(self.get_original_grib_path(chunk_start, 'SH')): + while os.path.exists(self._get_original_grib_path(chunk_start, 'GG')) or \ + os.path.exists(self._get_original_grib_path(chunk_start, 'SH')): - if self.cmorization_required(chunk, (ModelingRealms.atmos,)): + if self._cmorization_required(chunk, (ModelingRealms.atmos,)): chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', self.experiment.calendar) chunk_end = previous_day(chunk_end, self.experiment.calendar) Log.info('CMORizing chunk {0}-{1}', date2str(chunk_start), date2str(chunk_end)) @@ -242,21 +235,21 @@ class Cmorizer(object): for grid in ('SH', 'GG'): Log.info('Processing {0} variables', grid) - if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): + if not os.path.exists(self._get_original_grib_path(chunk_start, grid)): continue - self.cmorize_grib_file(chunk_end, chunk_start, grid) + self._cmorize_grib_file(chunk_end, chunk_start, grid) except Exception as ex: Log.error('Can not cmorize GRIB file for chunk {0}-{1}: {2}', date2str(chunk_start), date2str(chunk_end), ex) chunk_start = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', self.experiment.calendar) chunk += 1 - def cmorize_grib_file(self, chunk_end, chunk_start, grid): + def _cmorize_grib_file(self, chunk_end, chunk_start, grid): for month in range(0, self.experiment.chunk_size): current_date = add_months(chunk_start, month, self.experiment.calendar) - original_gribfile = self.get_original_grib_path(current_date, grid) + original_gribfile = self._get_original_grib_path(current_date, grid) Log.info('Processing month {1}', grid, date2str(current_date)) - gribfile = self.get_scratch_grib_path(current_date, grid) + gribfile = self._get_scratch_grib_path(current_date, grid) if not os.path.isfile(gribfile): Log.info('Copying file...', grid, date2str(current_date)) Utils.copy_file(original_gribfile, gribfile) @@ -267,7 +260,7 @@ class Cmorizer(object): os.remove(gribfile) return - next_gribfile = self.get_original_grib_path(add_months(current_date, 1, self.experiment.calendar), grid) + next_gribfile = self._get_original_grib_path(add_months(current_date, 1, self.experiment.calendar), grid) if not os.path.exists(next_gribfile): os.remove(gribfile) @@ -318,7 +311,7 @@ class Cmorizer(object): Utils.remove_file('ICM') def _get_monthly_grib(self, current_date, gribfile, grid): - prev_gribfile = self.get_scratch_grib_path(add_months(current_date, -1, self.experiment.calendar), grid) + prev_gribfile = self._get_scratch_grib_path(add_months(current_date, -1, self.experiment.calendar), grid) if os.path.exists(prev_gribfile): self._merge_grib_files(current_date, prev_gribfile, gribfile) full_file = 'ICM' @@ -326,14 +319,14 @@ class Cmorizer(object): full_file = gribfile return full_file - def get_scratch_grib_path(self, current_date, grid): + def _get_scratch_grib_path(self, current_date, grid): return os.path.join(self.config.scratch_dir, self._get_grib_filename(grid, current_date)) def _obtain_atmos_timestep(self, gribfile): if self.atmos_timestep is None: self.atmos_timestep = self._get_atmos_timestep(gribfile) - def get_original_grib_path(self, current_date, grid): + def _get_original_grib_path(self, current_date, grid): return os.path.join(self.original_files_path, self._get_grib_filename(grid, current_date)) @@ -342,6 +335,7 @@ class Cmorizer(object): def _get_atmos_timestep(self, gribfile): Log.info('Getting timestep...') + import pygrib grib_handler = pygrib.open(gribfile) dates = set() try: @@ -368,11 +362,12 @@ class Cmorizer(object): Utils.convert2netcdf4(filename) frequency = self._get_nc_file_frequency(filename) Utils.rename_variables(filename, self.alt_coord_names, False, True) - self._remove_valid_limits(filename) - self._add_common_attributes(filename, frequency) - self._update_time_variables(filename) + handler = Utils.open_cdf(filename) + Cmorizer._remove_valid_limits(handler) + self._add_common_attributes(handler, frequency) + self._update_time_variables(handler) + handler.sync() - handler = Utils.openCdf(filename) Log.info('Splitting file {0}', filename) for variable in handler.variables.keys(): if variable in Cmorizer.NON_DATA_VARIABLES: @@ -385,9 +380,9 @@ class Cmorizer(object): handler.close() os.remove(filename) - # noinspection PyMethodMayBeStatic - def _remove_valid_limits(self, filename): - handler = Utils.openCdf(filename) + @staticmethod + def _remove_valid_limits(filename): + handler = Utils.open_cdf(filename) for variable in handler.variables.keys(): var = handler.variables[variable] if 'valid_min' in var.ncattrs(): @@ -416,16 +411,20 @@ class Cmorizer(object): def extract_variable(self, file_path, handler, frequency, variable): """ - Extracts a variable from a file and creates the CMOR file - - :param file_path: path to the file - :type file_path: str - :param handler: netCDF4 handler for the file - :type handler: netCDF4.Dataset - :param frequency: variable's frequency - :type frequency: Frequency - :param variable: variable's name - :type variable: str + Extract a variable from a file and creates the CMOR file + + Parameters + ---------- + file_path:str + handler: netCDF4.Dataset + frequency: Frequency + variable: str + + Raises + ------ + CMORException + If the filename does not match any of the recognized patterns + """ alias, var_cmor = self.config.var_manager.get_variable_and_alias(variable) if var_cmor is None: @@ -445,7 +444,7 @@ class Cmorizer(object): else: region = alias.basin.name - date_str = self.get_date_str(file_path) + date_str = self._get_date_str(file_path) if date_str is None: Log.error('Variable {0} can not be cmorized. Original filename does not match a recognized pattern', var_cmor.short_name) @@ -477,9 +476,10 @@ class Cmorizer(object): region_str = '' Log.info('Variable {0.domain}:{0.short_name} processed{1}', var_cmor, region_str) - def get_date_str(self, file_path): + def _get_date_str(self, file_path): file_parts = os.path.basename(file_path).split('_') - if file_parts[0] in (self.experiment.expid, 't00o', 'MMA', 'MMASH', 'MMAGG', 'MMO') or file_parts[0].startswith('ORCA'): + valid_starts = (self.experiment.expid, 'MMA', 'MMASH', 'MMAGG', 'MMO') + if file_parts[0] in valid_starts or file_parts[0].startswith('ORCA'): # Model output if file_parts[-1].endswith('.tar'): file_parts = file_parts[-1][0:-4].split('-') @@ -492,8 +492,8 @@ class Cmorizer(object): else: return None - def get_chunk(self, file_path): - chunk_start = parse_date(self.get_date_str(file_path).split('-')[0]) + def _get_chunk(self, file_path): + chunk_start = parse_date(self._get_date_str(file_path).split('-')[0]) current_date = parse_date(self.startdate) chunk = 1 while current_date < chunk_start: @@ -505,7 +505,7 @@ class Cmorizer(object): return chunk def _add_coordinate_variables(self, handler, temp): - handler_cmor = Utils.openCdf(temp) + handler_cmor = Utils.open_cdf(temp) Utils.copy_variable(handler, handler_cmor, self.lon_name, False) Utils.copy_variable(handler, handler_cmor, self.lat_name, False) if 'time' in handler_cmor.dimensions.keys(): @@ -545,44 +545,15 @@ class Cmorizer(object): var_codes = self.config.cmor.get_variables(frequency) for var_code in var_codes: if not os.path.exists('{0}_{1}.128.nc'.format(gribfile, var_code)): - continue + continue new_units = None cdo_operator = '-selmon,{0}'.format(month) - if frequency == Frequencies.monthly: - if var_code == 201: - cdo_operator = "-monmean -daymax {0}".format(cdo_operator) - elif var_code == 202: - cdo_operator = "-monmean -daymax {0}".format(cdo_operator) - else: - cdo_operator = "-monmean {0} ".format(cdo_operator) - if frequency == Frequencies.daily: - if var_code == 201: - cdo_operator = "-daymax {0} ".format(cdo_operator) - elif var_code == 202: - cdo_operator = "-daymin {0} ".format(cdo_operator) - else: - cdo_operator = "-daymean {0} ".format(cdo_operator) - - if var_code in (144, 146, 147, 169, 175, 176, 177, 179, 180, 181, 182, 201, 202, 205, 212, 228): - cdo_operator = '{0} -shifttime,-{1}hours'.format(cdo_operator, self.experiment.atmos_timestep) - - if var_code == 129: - # geopotential - new_units = "m" - cdo_operator = "-divc,9.81 {0}".format(cdo_operator) - elif var_code in (146, 147, 169, 175, 176, 177, 179, 212): - # radiation - new_units = "W m-2" - cdo_operator = "-divc,{0} {1}".format(self.experiment.atmos_timestep * 3600, cdo_operator) - elif var_code in (180, 181): - # momentum flux - new_units = "N m-2" - cdo_operator = "-divc,{0} {1}".format(self.experiment.atmos_timestep * 3600, cdo_operator) - elif var_code in (144, 182, 205, 228): - # precipitation/evaporation/runoff - new_units = "kg m-2 s-1" - cdo_operator = "-mulc,1000 -divc,{0}".format(self.experiment.atmos_timestep * 3600) + cdo_operator = self._get_time_average(cdo_operator, frequency, var_code) + + cdo_operator = self._fix_time_shift(cdo_operator, var_code) + + cdo_operator, new_units = self._change_units(cdo_operator, new_units, var_code) levels = self.config.cmor.get_levels(frequency, var_code) if levels: @@ -594,7 +565,7 @@ class Cmorizer(object): gribfile, var_code, frequency)) h_var_file = '{0}_{1}_{2}.nc'.format(gribfile, var_code, frequency) - handler = Utils.openCdf(h_var_file) + handler = Utils.open_cdf(h_var_file) if new_units: for var in handler.variables.values(): if 'code' in var.ncattrs() and var.code == var_code: @@ -612,6 +583,48 @@ class Cmorizer(object): output='{0}_{1}_1m.nc'.format(gribfile, var_code), options=('-O -v {0}'.format(var_name))) + def _fix_time_shift(self, cdo_operator, var_code): + if var_code in (144, 146, 147, 169, 175, 176, 177, 179, 180, 181, 182, 201, 202, 205, 212, 228): + cdo_operator = '{0} -shifttime,-{1}hours'.format(cdo_operator, self.experiment.atmos_timestep) + return cdo_operator + + @staticmethod + def _get_time_average(cdo_operator, frequency, var_code): + if frequency == Frequencies.monthly: + if var_code == 201: + cdo_operator = "-monmean -daymax {0}".format(cdo_operator) + elif var_code == 202: + cdo_operator = "-monmean -daymax {0}".format(cdo_operator) + else: + cdo_operator = "-monmean {0} ".format(cdo_operator) + if frequency == Frequencies.daily: + if var_code == 201: + cdo_operator = "-daymax {0} ".format(cdo_operator) + elif var_code == 202: + cdo_operator = "-daymin {0} ".format(cdo_operator) + else: + cdo_operator = "-daymean {0} ".format(cdo_operator) + return cdo_operator + + def _change_units(self, cdo_operator, new_units, var_code): + if var_code == 129: + # geopotential + new_units = "m" + cdo_operator = "-divc,9.81 {0}".format(cdo_operator) + elif var_code in (146, 147, 169, 175, 176, 177, 179, 212): + # radiation + new_units = "W m-2" + cdo_operator = "-divc,{0} {1}".format(self.experiment.atmos_timestep * 3600, cdo_operator) + elif var_code in (180, 181): + # momentum flux + new_units = "N m-2" + cdo_operator = "-divc,{0} {1}".format(self.experiment.atmos_timestep * 3600, cdo_operator) + elif var_code in (144, 182, 205, 228): + # precipitation/evaporation/runoff + new_units = "kg m-2 s-1" + cdo_operator = "-mulc,1000 -divc,{0}".format(self.experiment.atmos_timestep * 3600) + return cdo_operator, new_units + def _merge_and_cmorize_atmos(self, chunk_start, chunk_end, grid, frequency): merged_file = 'MMA_{0}_{1}_{2}_{3}.nc'.format(frequency, date2str(chunk_start), date2str(chunk_end), grid) files = glob.glob(os.path.join(self.config.scratch_dir, @@ -627,8 +640,7 @@ class Cmorizer(object): self._cmorize_nc_file(merged_file) - def _update_time_variables(self, filename): - handler = Utils.openCdf(filename) + def _update_time_variables(self, handler): time_var = handler.variables['time'] if hasattr(time_var, 'calendar'): calendar = time_var.calendar @@ -639,11 +651,9 @@ class Cmorizer(object): handler.variables['time_bnds'].units = time_var.units Utils.convert_units(handler.variables['time_bnds'], 'days since 1850-01-01 00:00:00', calendar, calendar) Utils.convert_units(time_var, 'days since 1850-1-1 00:00:00', calendar) - handler.close() - self._set_leadtime_var(filename) + self._set_leadtime_var(handler) - def _set_leadtime_var(self, filename): - handler = Utils.openCdf(filename) + def _set_leadtime_var(self, handler): if 'leadtime' in handler.variables: var = handler.variables['leadtime'] else: @@ -657,12 +667,10 @@ class Cmorizer(object): for time in leadtime] for lt in range(0, len(leadtime)): var[lt] = leadtime[lt].days - handler.close() - def _add_common_attributes(self, filename, frequency): + def _add_common_attributes(self, handler, frequency): cmor = self.config.cmor experiment = self.config.experiment - handler = Utils.openCdf(filename) handler.associated_experiment = cmor.associated_experiment handler.batch = '{0}{1}'.format(experiment.institute, datetime.now().strftime('%Y-%m-%d(T%H:%M:%SZ)')) handler.contact = 'Pierre-Antoine Bretonniere, pierre-antoine.bretonniere@bsc.es , ' \ @@ -687,14 +695,13 @@ class Cmorizer(object): handler.tracking_id = str(uuid.uuid1()) handler.title = "{0} model output prepared for {2} {1}".format(experiment.model, experiment.experiment_name, self.config.data_convention.upper()) - handler.close() - def gribfiles_available(self): + def _gribfiles_available(self): grb_path = os.path.join(self.original_files_path, '*.grb') gribfiles = glob.glob(grb_path) return len(gribfiles) > 0 - def cmorization_required(self, chunk, domains): + def _cmorization_required(self, chunk, domains): if not self.config.cmor.chunk_cmorization_requested(chunk): return False if self.config.cmor.force: @@ -706,4 +713,6 @@ class Cmorizer(object): class CMORException(Exception): + """Exception to be launched when an error is encountered during cmorization""" + pass diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index cfa67caefe57fce5578f933a1057dcd8b0dddcd3..931f4ec73cc6dd87b85b89264f126b98056d392f 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -1,33 +1,73 @@ # coding=utf-8 +"""Classes to manage cmorized datasets""" import glob import os +import re +import shutil from datetime import datetime from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, previous_day from bscearth.utils.log import Log -from datafile import StorageStatus -from diagnostic import Diagnostic +from earthdiagnostics.datafile import StorageStatus +from earthdiagnostics.diagnostic import Diagnostic from earthdiagnostics.cmorizer import Cmorizer from earthdiagnostics.datamanager import DataManager from earthdiagnostics.frequency import Frequencies, Frequency from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class CMORManager(DataManager): """ Data manager class for CMORized experiments + + Parameters + ---------- + config: earthdiagnostics.config.Config + """ + def __init__(self, config): super(CMORManager, self).__init__(config) self._dic_cmorized = dict() + self.find_model_data() + self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid) + + def experiment_name(self, startdate): + """ + Get experiment name, appending startdate if needed + + Parameters + ---------- + startdate: str + + Returns + ------- + str + + """ + if self.config.cmor.append_startdate: + return '{}S{}'.format(self.config.experiment.experiment_name, startdate) + else: + return self.config.experiment.experiment_name + + def find_model_data(self): + """ + Seek the configured data folders for the experiment data + + For each folder, it looks at: + -/ + -// + -/// + + Model has any '-' character removed and is passed to lower + """ data_folders = self.config.data_dir.split(':') experiment_folder = self.experiment.model.lower() if experiment_folder.startswith('ec-earth'): experiment_folder = 'ecearth' - self.config.data_dir = None for data_folder in data_folders: if os.path.isdir(os.path.join(data_folder, self.experiment.expid)): @@ -38,62 +78,71 @@ class CMORManager(DataManager): self.config.data_dir = test_folder break - test_folder = os.path.join(data_folder, self.config.data_type, experiment_folder) + test_folder = os.path.join(data_folder, self.config.data_type, experiment_folder) if os.path.isdir(os.path.join(test_folder, self.experiment.expid)): self.config.data_dir = test_folder break - if not self.config.data_dir: raise Exception('Can not find model data') - self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles') # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN, possible_versions=None): + """ + Check if a file exists in the storage + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None + box: Box or None + frequency: Frequency or None + vartype: VariableType + possible_versions: iterable od str or None + + Returns + ------- + bool + + """ cmor_var = self.variable_list.get_variable(var) filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid, None, None) - # noinspection PyBroadException if possible_versions is None: - # noinspection PyBroadException - try: - return os.path.isfile(filepath) - except Exception: - return False + return os.path.isfile(filepath) else: for version in possible_versions: - # noinspection PyBroadException - try: - if os.path.isfile(filepath.replace(self.config.cmor.version, version)): - return True - except Exception: - pass + if os.path.isfile(filepath.replace(self.config.cmor.version, version)): + return True return False def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param vartype: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :return: path to the copy created on the scratch folder - :rtype: str + Request a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None + box: Box or None + frequency: Frequency or None + vartype: VariableType or None + + Returns + ------- + DataFile + """ + if frequency is None: + frequency = self.config.frequency cmor_var = self.variable_list.get_variable(var) var = self._get_final_var_name(box, var) filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid, None, None) @@ -102,28 +151,25 @@ class CMORManager(DataManager): def request_year(self, diagnostic, domain, var, startdate, member, year, grid=None, box=None, frequency=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param year: - :param diagnostic: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :return: path to the copy created on the scratch folder - :rtype: str - """ + Request a given year for a variavle from a CMOR repository + + Parameters + ---------- + diagnostic: Diagnostic + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str or None + box: Box or None + frequency: Frequency or None + + Returns + ------- + DataFile + """ job = MergeYear(self, domain, var, startdate, member, year, grid, box, frequency) job.request_data() job.declare_data_generated() @@ -135,32 +181,28 @@ class CMORManager(DataManager): def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN, diagnostic=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param diagnostic: - :param region: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str + Declare a variable chunk to be generated by a diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None + region: Basin or None + box: Box or None + frequency: Frequency or None + vartype: VariableType + diagnostic: Diagnostic + + Returns + ------- + DataFile + """ - if not frequency: + if frequency is None: frequency = self.config.frequency original_name = var cmor_var = self.variable_list.get_variable(var) @@ -177,26 +219,24 @@ class CMORManager(DataManager): def declare_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VariableType.MEAN, diagnostic=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param diagnostic: - :param year: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str + Declare a variable year to be generated by a diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str or None + box: Box or None + vartype: VariableType + diagnostic: Diagnostic + + Returns + ------- + DataFile + """ original_name = var cmor_var = self.variable_list.get_variable(var) @@ -214,33 +254,43 @@ class CMORManager(DataManager): def get_file_path(self, startdate, member, domain, var, cmor_var, chunk, frequency, grid=None, year=None, date_str=None): """ - Returns the path to a concrete file - :param cmor_var: - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param domain: file's domain - :type domain: Domain - :param var: file's var - :type var: var - :param chunk: file's chunk - :type chunk: int|NoneType - :param frequency: file's frequency - :type frequency: Frequency - :param grid: file's grid - :type grid: str|NoneType - :param year: file's year - :type year: int|str|NoneType - :param date_str: date string to add directly. Overrides year or chunk configurations - :type date_str: str|NoneType - :return: path to the file - :rtype: str|NoneType - :param cmor_var: variable instance describing the selected variable - :type cmor_var: Variable + Return the path to a concrete file + + Parameters + ---------- + startdate: str + member: int + domain: ModelingRealm + var: str + cmor_var: Variable + chunk: int or None + frequency: Frequency or str + grid: str or None + year: int or None + date_str: str or None + + Returns + ------- + str + + Raises + ------ + ValueError + If you provide two or more parameters from chunk, year or date_str or none at all + """ - if not frequency: + if frequency is None: frequency = self.config.frequency + frequency = Frequency.parse(frequency) + options = sum(x is not None for x in (chunk, year, date_str)) + if options == 0: + raise ValueError('You must provide chunk, year or date_str') + elif options > 1: + raise ValueError('You must provide only one parameter in chunk, year or date_str') + if frequency is None: + frequency = self.config.frequency + else: + frequency = Frequency.parse(frequency) folder_path = self._get_full_cmor_folder_path(startdate, member, domain, var, frequency, grid, cmor_var) file_name = self._get_cmor_file_name(startdate, member, domain, var, cmor_var, frequency, @@ -255,26 +305,13 @@ class CMORManager(DataManager): else: cmor_table = cmor_var.get_table(frequency, self.config.data_convention) - if chunk is not None: - time_bound = self._get_chunk_time_bounds(startdate, chunk) - elif year: - if frequency != Frequencies.yearly: - raise ValueError('Year may be provided instead of chunk only if frequency is "yr"') - time_bound = str(year) - elif date_str: - time_bound = date_str - else: - raise ValueError('Chunk, year and date_str can not be None at the same time') - - if time_bound: - time_bound = '_{0}.nc'.format(time_bound) - else: - time_bound = '.nc' + time_bound = self._get_time_component(chunk, date_str, frequency, startdate, year) + time_bound = '_{0}.nc'.format(time_bound) if self.config.data_convention in ('specs', 'preface'): file_name = '{0}_{1}_{2}_{3}_S{4}_{5}{6}'.format(var, cmor_table.name, self.experiment.model, - self.experiment.experiment_name, startdate, + self.experiment_name(startdate), startdate, self._get_member_str(member), time_bound) elif self.config.data_convention in ('primavera', 'cmip6'): if not grid: @@ -283,13 +320,26 @@ class CMORManager(DataManager): else: grid = self.config.cmor.default_atmos_grid file_name = '{0}_{1}_{2}_{3}_{4}_{5}{6}'.format(var, cmor_table.name, self.experiment.model, - self.experiment.experiment_name, + self.experiment_name(startdate), self._get_member_str(member), grid, time_bound) + # Meteofrance else: - raise Exception('Data convention {0} not supported'.format(self.config.data_convention)) + time_bound = self._get_chunk_time_bounds(startdate, chunk) + file_name = '{0}_{1}_{2}_{3}.nc'.format(var, frequency, time_bound, self._get_member_str(member)) return file_name + def _get_time_component(self, chunk, date_str, frequency, startdate, year): + if chunk is not None: + time_bound = self._get_chunk_time_bounds(startdate, chunk) + elif year: + if frequency != Frequencies.yearly: + raise ValueError('Year may be provided instead of chunk only if frequency is "yr"') + time_bound = str(year) + else: + time_bound = date_str + return time_bound + def _get_full_cmor_folder_path(self, startdate, member, domain, var, frequency, grid, cmor_var): if self.config.data_convention in ('specs', 'preface'): folder_path = os.path.join(self._get_startdate_path(startdate), str(frequency), domain.name, var) @@ -298,7 +348,10 @@ class CMORManager(DataManager): folder_path = os.path.join(folder_path, self._get_member_str(member)) if self.config.cmor.version: folder_path = os.path.join(folder_path, self.config.cmor.version) - else: + + elif self.config.data_convention in ('primavera', 'cmip6'): + if not self.config.cmor.version: + raise ValueError('CMOR version is mandatory for PRIMAVERA and CMIP6') if not grid: if domain in [ModelingRealms.ocnBgchem, ModelingRealms.seaIce, ModelingRealms.ocean]: grid = self.config.cmor.default_ocean_grid @@ -311,6 +364,12 @@ class CMORManager(DataManager): folder_path = os.path.join(self._get_startdate_path(startdate), self._get_member_str(member), table_name, var, grid, self.config.cmor.version) + elif self.config.data_convention == 'meteofrance': + folder_path = os.path.join(self.config.data_dir, self.experiment_name(startdate), + 'H{0}'.format(chr(64 + int(startdate[4:6]))), + startdate[0:4]) + else: + raise ValueError('Data convention {0} not supported'.format(self.config.data_convention)) return folder_path def _get_chunk_time_bounds(self, startdate, chunk): @@ -322,68 +381,142 @@ class CMORManager(DataManager): separator = '_' else: separator = '-' - time_bound = "{0:04}{1:02}{4}{2:04}{3:02}".format(chunk_start.year, chunk_start.month, chunk_end.year, - chunk_end.month, separator) + if self.config.data_convention == 'meteofrance': + time_bound = "{0:04}{1:02}".format(chunk_start.year, chunk_start.month) + else: + time_bound = "{0:04}{1:02}{4}{2:04}{3:02}".format(chunk_start.year, chunk_start.month, chunk_end.year, + chunk_end.month, separator) return time_bound def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None, frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN): - """ - Creates the link of a given file from the CMOR repository. - - :param cmor_var: - :param move_old: - :param date_str: - :param year: if frequency is yearly, this parameter is used to give the corresponding year - :type year: int - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - :param cmor_var: variable instance describing the selected variable - :type cmor_var: Variable + Create the link of a given file from the CMOR repository. + + Parameters + ---------- + domain: ModelingRealm + var: str + cmor_var: + startdate: str + member: int + chunk: int or None, optional + grid: str or None, optional + frequency: Frequency or None, optional + year: int or None, optional + date_str: str or None, optional + move_old: bool, optional + vartype: VariableType, optional """ - - if not frequency: + if frequency is None: frequency = self.config.frequency filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, - grid=grid, year=str(year), date_str=date_str) + grid=grid, year=year, date_str=date_str) self.create_link(domain, filepath, frequency, var, grid, move_old, vartype) - # noinspection PyPep8Naming + def create_link(self, domain, filepath, frequency, var, grid, move_old, vartype): + """ + Create file link + + Parameters + ---------- + domain: ModelingRealm + filepath: str + frequency: Frequency + var: str + grid: str + move_old: bool + vartype: VariableType + + """ + if self.config.data_convention == 'meteofrance': + return + freq_str = frequency.folder_name(vartype) + + if not grid: + grid = 'original' + + variable_folder = self.get_varfolder(domain, var) + vargrid_folder = self.get_varfolder(domain, var, grid) + + self.lock.acquire() + try: + if grid == 'original': + link_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, variable_folder) + Utils.create_folder_tree(link_path) + else: + link_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, vargrid_folder) + Utils.create_folder_tree(link_path) + default_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, variable_folder) + original_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, + vargrid_folder.replace('-{0}_f'.format(grid), '-original_f')) + + if os.path.islink(default_path): + os.remove(default_path) + elif os.path.isdir(default_path): + shutil.move(default_path, original_path) + os.symlink(link_path, default_path) + + if move_old and link_path not in self._checked_vars: + self._checked_vars.append(link_path) + old_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, + 'old_{0}'.format(os.path.basename(link_path))) + regex = re.compile(var + '_[0-9]{6,8}\.nc') + for filename in os.listdir(link_path): + if regex.match(filename): + Utils.create_folder_tree(old_path) + Utils.move_file(os.path.join(link_path, filename), + os.path.join(old_path, filename)) + + link_path = os.path.join(link_path, os.path.basename(filepath)) + if os.path.lexists(link_path): + os.remove(link_path) + if not os.path.exists(filepath): + raise ValueError('Original file {0} does not exists'.format(filepath)) + relative_path = os.path.relpath(filepath, os.path.dirname(link_path)) + os.symlink(relative_path, link_path) + except Exception: + raise + finally: + self.lock.release() + def prepare(self): """ - Prepares the data to be used by the diagnostic. + Prepare the data to be used by the diagnostic. If CMOR data is not created, it show a warning and closes. In the future, an automatic cmorization procedure will be launched If CMOR data is available but packed, the procedure will unpack it. - :return: """ # Check if cmorized and convert if not + if self.config.data_convention == 'meteofrance': + return + for startdate, member in self.experiment.get_member_list(): if not self._unpack_cmor_files(startdate, member): self._cmorize_member(startdate, member) def is_cmorized(self, startdate, member, chunk, domain): + """ + Check if a chunk domain is cmorized + + A cache is maintained so only the first check is costly + + Parameters + ---------- + startdate: str + member: int + chunk: int + domain: ModelingRealm + + Returns + ------- + bool + + """ identifier = (startdate, member, chunk) if identifier not in self._dic_cmorized: self._dic_cmorized[identifier] = {} @@ -399,19 +532,11 @@ class CMORManager(DataManager): count = 0 if self.config.data_convention == 'specs': for freq in os.listdir(startdate_path): - domain_path = os.path.join(startdate_path, freq, - domain.name) + domain_path = os.path.join(startdate_path, freq, domain.name) if os.path.isdir(domain_path): - for var in os.listdir(domain_path): - cmor_var = self.variable_list.get_variable(var, True) - var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, - Frequency(freq)) - if os.path.isfile(var_path): - count += 1 - if count >= self.config.cmor.min_cmorized_vars: - return True - else: - continue + count = self._check_var_presence(domain_path, count, startdate, member, domain, chunk, freq) + if count >= self.config.cmor.min_cmorized_vars: + return True else: member_path = os.path.join(startdate_path, self._get_member_str(member)) if not os.path.isdir(member_path): @@ -421,17 +546,21 @@ class CMORManager(DataManager): table_dir = os.path.join(member_path, table.name) if not os.path.isdir(table_dir): return False - for var in os.listdir(table_dir): - cmor_var = self.variable_list.get_variable(var, True) - var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency=freq) - if os.path.isfile(var_path): - count += 1 - if count >= self.config.cmor.min_cmorized_vars: - return True - else: - continue + count = self._check_var_presence(table_dir, count, startdate, member, domain, chunk, freq) + if count >= self.config.cmor.min_cmorized_vars: + return True return False + def _check_var_presence(self, folder, current_count, startdate, member, domain, chunk, freq): + for var in os.listdir(folder): + cmor_var = self.variable_list.get_variable(var, True) + var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency=freq) + if os.path.isfile(var_path): + current_count += 1 + if current_count >= self.config.cmor.min_cmorized_vars: + break + return current_count + def _cmorize_member(self, startdate, member): start_time = datetime.now() member_str = self.experiment.get_member_str(member) @@ -468,9 +597,6 @@ class CMORManager(DataManager): Log.info('Unzipping cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.unzip(filepaths, True) - if not os.path.exists(self.cmor_path): - os.mkdir(self.cmor_path) - filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, chunk, 'tar') if len(filepaths) > 0: Log.info('Unpacking cmorized data for {0} {1} {2}...', startdate, member, chunk) @@ -501,11 +627,11 @@ class CMORManager(DataManager): self._fix_model_as_experiment_error(startdate) def _fix_model_as_experiment_error(self, startdate): - if self.experiment.experiment_name != self.experiment.model: + if self.experiment_name(startdate) != self.experiment.model: bad_path = os.path.join(self.cmor_path, self.experiment.institute, self.experiment.model, self.experiment.model) Log.debug('Correcting double model appearance') - for (dirpath, dirnames, filenames) in os.walk(bad_path, False): + for (dirpath, _, filenames) in os.walk(bad_path, False): for filename in filenames: if '_S{0}_'.format(startdate) in filename: continue @@ -513,15 +639,15 @@ class CMORManager(DataManager): good = filepath good = good.replace('_{0}_output_'.format(self.experiment.model), '_{0}_{1}_S{2}_'.format(self.experiment.model, - self.experiment.experiment_name, + self.experiment_name(startdate), startdate)) good = good.replace('/{0}/{0}'.format(self.experiment.model), '/{0}/{1}'.format(self.experiment.model, - self.experiment.experiment_name)) + self.experiment_name(startdate))) Utils.move_file(filepath, good) - if self.experiment.model != self.experiment.experiment_name: + if self.experiment.model != self.experiment_name(startdate): os.rmdir(dirpath) Log.debug('Done') @@ -533,6 +659,20 @@ class CMORManager(DataManager): Log.debug('Done') def create_links(self, startdate, member=None): + """ + Create links for a gicen startdate or member + + Parameters + ---------- + startdate: str + member: int or None + + Returns + ------- + ValueError: + If the data convention is not supported + + """ if member is not None: member_str = self._get_member_str(member) else: @@ -540,21 +680,21 @@ class CMORManager(DataManager): Log.info('Creating links for CMOR files ({0})', startdate) path = self._get_startdate_path(startdate) if self.config.data_convention.upper() in ('SPECS', 'APPLICATE'): - self._create_links_CMIP5(member_str, path) + self._create_links_cmip5(member_str, path) elif self.config.data_convention.upper() in ('CMIP6', 'PRIMAVERA'): - self._create_links_CMIP6(member_str, path) + self._create_links_cmip6(member_str, path) else: raise ValueError('Dataset convention {0} not supported for massive ' 'link creation'.format(self.config.data_convention)) Log.debug('Links ready') - def _create_links_CMIP5(self, member_str, path): + def _create_links_cmip5(self, member_str, path): for freq in os.listdir(path): frequency = Frequency.parse(freq) for domain in os.listdir(os.path.join(path, freq)): for var in os.listdir(os.path.join(path, freq, domain)): for member in os.listdir(os.path.join(path, freq, domain, var)): - if member_str != member: + if member_str is not None and member_str != member: continue for name in os.listdir(os.path.join(path, freq, domain, var, member)): filepath = os.path.join(path, freq, domain, var, member, name) @@ -566,98 +706,82 @@ class CMORManager(DataManager): self.create_link(domain, os.path.join(filepath, filename), frequency, var, "", False, vartype=VariableType.MEAN) - def _create_links_CMIP6(self, member_str, path): + def _create_links_cmip6(self, member_str, path): for member in os.listdir(path): for table in os.listdir(os.path.join(path, member)): frequency = self.variable_list.tables[table].frequency domain = None for var in os.listdir(os.path.join(path, member, table)): for grid in os.listdir(os.path.join(path, member, table, var)): - if member_str != member: + if member_str is not None and member_str != member: continue for name in os.listdir(os.path.join(path, member, table, var, grid)): filepath = os.path.join(path, member, table, var, grid, name) if os.path.isfile(filepath): - original_handler = Utils.openCdf(filepath) - if original_handler.dimensions['i'].size < original_handler.dimensions['j'].size: - original_handler.close() - Utils.rename_variables(filepath, {'i': 'j', 'j': 'i'}, False, True) - else: - original_handler.close() self.create_link(domain, filepath, frequency, var, "", False, vartype=VariableType.MEAN) else: for filename in os.listdir(filepath): - if not filename.endswith('.nc') or filename.startswith('.'): - return cmorfile = os.path.join(filepath, filename) - self._fix_ij_swap(cmorfile) self.create_link(domain, cmorfile, frequency, var, "", False, vartype=VariableType.MEAN) - def _fix_ij_swap(self, cmorfile): - return - original_handler = Utils.openCdf(cmorfile) - if original_handler.dimensions['i'].size < original_handler.dimensions['j'].size: - temp = TempFile.get() - new_handler = Utils.openCdf(temp, 'w') - for attribute in original_handler.ncattrs(): - original = getattr(original_handler, attribute) - setattr(new_handler, attribute, - Utils.convert_to_ASCII_if_possible(original)) - for dimension in original_handler.dimensions.keys(): - if dimension == 'i': - new_name = 'j' - elif dimension == 'j': - new_name = 'i' - else: - new_name = dimension - new_handler.createDimension(new_name, original_handler.dimensions[dimension].size) - for variable in original_handler.variables.keys(): - original_var = original_handler.variables[variable] - translated_dimensions = Utils._translate(original_var.dimensions, - {'i': 'j', 'j': 'i'}) - new_var = new_handler.createVariable(variable, original_var.datatype, - translated_dimensions) - Utils.copy_attributes(new_var, original_var) - new_var[:] = original_var[:] - original_handler.close() - new_handler.close() - Utils.move_file(temp, cmorfile, save_hash=True) - Log.debug('File {0} translated', cmorfile) - def _get_startdate_path(self, startdate): """ - Returns the path to the startdate's CMOR folder - :param startdate: target startdate - :type startdate: str - :return: path to the startdate's CMOR º - :rtype: str + Return the path to the startdate's CMOR folder + + Parameters + ---------- + startdate: str + + Returns + ------- + str """ if self.config.data_convention == 'specs': return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute, - self.experiment.model, self.experiment.experiment_name, 'S' + startdate) + self.experiment.model, self.experiment_name(startdate), 'S' + startdate) elif self.config.data_convention == 'preface': return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute, - self.experiment.experiment_name, 'S' + startdate) + self.experiment_name(startdate), 'S' + startdate) else: return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.config.cmor.activity, - self.experiment.institute, self.experiment.model, self.experiment.experiment_name) + self.experiment.institute, self.experiment.model, self.experiment_name(startdate)) def _get_member_str(self, member): if self.config.data_convention in ('specs', 'preface'): template = 'r{0}i{1}p1' elif self.config.data_convention in ('primavera', 'cmip6'): template = 'r{0}i{1}p1f1' - else: - raise Exception('Data convention {0} not supported'.format(self.config.data_convention)) - + elif self.config.data_convention == 'meteofrance': + return '{0:02d}'.format(member) return template.format(member + 1 - self.experiment.member_count_start, self.config.cmor.initialization_number) class MergeYear(Diagnostic): + """ + Diagnostic to get all the data for a given year and merge it in a file + + Parameters + ---------- + data_manager: DataManager + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str or None, optional + box: Box or None, optional + frequency: Frequency or None, optional + """ + @classmethod def generate_jobs(cls, diags, options): + """ + Method to generate the required diagnostics from a section of the configuration file + + Required by the interface, does nothing as this diagnostic is not meant to be configured in the usal way + """ pass def __init__(self, data_manager, domain, var, startdate, member, year, grid=None, box=None, frequency=None): @@ -673,24 +797,39 @@ class MergeYear(Diagnostic): self.box = box self.frequency = frequency + def __str__(self): + return 'Merge year data Variable: {0.domain}:{0.var} Startdate: {0.startdate} Member: {0.member} ' \ + 'Year: {0.year} Grid: {0.grid} Box: {0.box} Frequency: {0.frequency}'.format(self) + + def __eq__(self, other): + return self.domain == other.domain and self.var == other.var and self.startdate == other.startdate and \ + self.member == other.member and self.year == other.year and self.grid == other.grid and \ + self.box == other.box and self.frequency == other.frequency + + def __hash__(self): + return hash(str(self)) + def request_data(self): + """Request all the data required by the diagnostic""" for chunk in self.experiment.get_year_chunks(self.startdate, self.year): self.chunk_files.append(self.request_chunk(self.domain, self.var, self.startdate, self.member, chunk, grid=self.grid, box=self.box, frequency=self.frequency)) def declare_data_generated(self): + """Declare all the data generated by the diagnostic""" self.year_file = self.declare_year(self.domain, self.var, self.startdate, self.member, self.year, grid=self.grid, box=self.box) self.year_file.storage_status = StorageStatus.NO_STORE def compute(self): + """Create the yearly file for the data""" temp = self._merge_chunk_files() temp2 = self._select_data_of_given_year(temp) self.year_file.set_local_file(temp2) def _select_data_of_given_year(self, data_file): temp2 = TempFile.get() - handler = Utils.openCdf(data_file) + handler = Utils.open_cdf(data_file) times = Utils.get_datetime_from_netcdf(handler) x = 0 first_index = None @@ -723,12 +862,3 @@ class MergeYear(Diagnostic): for chunk_file in self.chunk_files: os.remove(chunk_file) return temp - - def __str__(self): - return 'Create year CMOR file Startdate: {0.startdate} Member: {0.member} Year: {0.year} ' \ - 'Variable: {0.domain}:{0.var} Grid: {0.grid} Box: {0.box}'.format(self) - - def __eq__(self, other): - return self.startdate == other.startdate and self.member == other.member and self.year == other.year and\ - self.domain == other.domain and self.var == other.var and self.grid == other.grid and \ - self.box == other.box diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index d30d0465f982badf8e5822e35b04f93cdce61f67..7ef152465b3aff73c1be1d6cade4b18a70f4a06e 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -1,31 +1,114 @@ # coding=utf-8 +"""Classes to manage Earth Diagnostics configuration""" import os import six from bscearth.utils.config_parser import ConfigParser -from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, date2str +from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, date2str, add_years, add_months, add_days from bscearth.utils.log import Log from earthdiagnostics import cdftools from earthdiagnostics.frequency import Frequency, Frequencies +from earthdiagnostics.modelingrealm import ModelingRealm from earthdiagnostics.variable import VariableManager -from modelingrealm import ModelingRealm class ConfigException(Exception): + """Exception raised when there is a problem with the configuration""" + pass class Config(object): - """ - Class to read and manage the configuration + """Class to read and manage the configuration""" - :param path: path to the conf file - :type path: str - """ + def __init__(self): + # Read diags config + self.data_adaptor = None + "Scratch folder path" + self.scratch_dir = None + "Scratch folder path" + self.use_ramdisk = None + "If True, the scratch dir is created as a ram disk" + self.auto_clean = None + "If True, the scratch dir is removed after finishing" + self.scratch_masks = None + "Common scratch folder for masks" + self.data_dir = None + "Root data folder path" + self.data_type = None + "Data type (experiment, observation or reconstruction)" + self.con_files = None + "Mask and meshes folder path" + self.mesh_mask = None + "Custom mesh mask file to use" + self.new_mask_glo = None + "Custom new mask glo file to use" + self.mask_regions = None + "Custom mask regions file to use" + self.mask_regions_3d = None + "Custom mask regions 3D file to use" + self.data_convention = None + "Data convention to use" + self.var_manager = None + self.skip_diags_done = None + "Flag to control if already done diags must be recalculated" + self.frequency = None + "Default data frequency to be used by the diagnostics" - def __init__(self, path): + self.cdftools_path = None + "Path to CDFTOOLS executables" + self.max_cores = None + "Maximum number of cores to use" + self.parallel_downloads = None + "Maximum number of simultaneous downloads" + self.parallel_uploads = None + "Maximum number of simultaneous uploads" + self.restore_meshes = None + "If True, forces the tool to copy all the mesh and mask files for the model, regardless of existence" + # Read experiment config + self.experiment = ExperimentConfig() + """ + Configuration related to the experiment + + Returns + ------- + ExperimentConfig + """ + self.cmor = None + """ + CMOR related configuration + + Returns + ------- + CMORConfig + """ + self.thredds = None + """ + THREDDS server configuration + + Returns + ------- + THREDDSConfig + """ + self.report = None + """ + Reporting configuration + + Returns + ------- + ReportConfig + """ + + def parse(self, path): + """ + Read configuration from INI file + + Parameters + ---------- + path: str + """ parser = ConfigParser() parser.optionxform = str parser.read(path) @@ -33,36 +116,25 @@ class Config(object): # Read diags config self.data_adaptor = parser.get_choice_option('DIAGNOSTICS', 'DATA_ADAPTOR', ('CMOR', 'THREDDS', 'OBSRECON'), 'CMOR') - "Scratch folder path" self.scratch_dir = parser.get_path_option('DIAGNOSTICS', 'SCRATCH_DIR') - "Scratch folder path" self.use_ramdisk = parser.get_bool_option('DIAGNOSTICS', 'USE_RAMDISK', False) - "If True, the scratch dir is created as a ram disk" self.auto_clean = parser.get_bool_option('DIAGNOSTICS', 'AUTO_CLEAN', True) - "If True, the scratch dir is removed after finishing" if not self.auto_clean and self.use_ramdisk: Log.warning('RAM disk scratch dir is always automatically cleaned.') self.auto_clean = True self.scratch_masks = parser.get_path_option('DIAGNOSTICS', 'SCRATCH_MASKS', '/scratch/Earth/ocean_masks') - "Common scratch folder for masks" self.data_dir = parser.get_path_option('DIAGNOSTICS', 'DATA_DIR') - "Root data folder path" self.data_type = parser.get_choice_option('DIAGNOSTICS', 'DATA_TYPE', ('exp', 'obs', 'recon'), 'exp') - "Data type (experiment, observation or reconstruction)" self.con_files = parser.get_path_option('DIAGNOSTICS', 'CON_FILES') - "Mask and meshes folder path" self.mesh_mask = parser.get_path_option('DIAGNOSTICS', 'MESH_MASK', '') - "Custom mesh mask file to use" self.new_mask_glo = parser.get_path_option('DIAGNOSTICS', 'NEW_MASK_GLO', '') - "Custom new mask glo file to use" self.mask_regions = parser.get_path_option('DIAGNOSTICS', 'MASK_REGIONS', '') - "Custom mask regions file to use" self.mask_regions_3d = parser.get_path_option('DIAGNOSTICS', 'MASK_REGIONS_3D', '') - "Custom mask regions 3D file to use" self.data_convention = parser.get_choice_option('DIAGNOSTICS', 'DATA_CONVENTION', - ('specs', 'primavera', 'cmip6', 'preface'), 'specs', + ('specs', 'primavera', 'cmip6', 'preface', 'meteofrance'), + 'specs', ignore_case=True) if self.data_convention in ('primavera', 'cmip6'): @@ -79,26 +151,16 @@ class Config(object): self._diags = parser.get_option('DIAGNOSTICS', 'DIAGS') self.skip_diags_done = parser.get_bool_option('DIAGNOSTICS', 'SKIP_DIAGS_DONE', True) self.frequency = Frequency(parser.get_option('DIAGNOSTICS', 'FREQUENCY')) - "Default data frequency to be used by the diagnostics" self.cdftools_path = parser.get_path_option('DIAGNOSTICS', 'CDFTOOLS_PATH', '') - "Path to CDFTOOLS executables" self.max_cores = parser.get_int_option('DIAGNOSTICS', 'MAX_CORES', 0) - "Maximum number of cores to use" self.parallel_downloads = parser.get_int_option('DIAGNOSTICS', 'PARALLEL_DOWNLOADS', 1) - "Maximum number of simultaneous downloads" self.parallel_uploads = parser.get_int_option('DIAGNOSTICS', 'PARALLEL_UPLOADS', 1) - "Maximum number of simultaneous uploads" self.restore_meshes = parser.get_bool_option('DIAGNOSTICS', 'RESTORE_MESHES', False) - "If True, forces the tool to copy all the mesh and mask files for the model, regardless of existence" # Read experiment config - self.experiment = ExperimentConfig(parser) - """ - Configuration related to the experiment - - :rtype: ExperimentConfig - """ + self.experiment = ExperimentConfig() + self.experiment.parse_ini(parser) # Read aliases self._aliases = dict() if parser.has_section('ALIAS'): @@ -108,6 +170,9 @@ class Config(object): commands = self._diags.split() self._real_commands = list() for command in commands: + command = command.strip() + if command.startswith('#'): + break if command.lower() in self._aliases: added_commands = self._aliases[command.lower()] Log.info('Changing alias {0} for {1}', command, ' '.join(added_commands)) @@ -125,14 +190,24 @@ class Config(object): def get_commands(self): """ - Returns the list of commands after replacing the alias - :return: full list of commands - :rtype: list(str) + Return the list of commands after replacing the alias + + Returns + ------- + iterable of str """ return self._real_commands class CMORConfig(object): + """ + Configuration for the cmorization processes + + Parameters + ---------- + parser: ConfigParser + var_manager: VariableManager + """ def __init__(self, parser, var_manager): self.force = parser.get_bool_option('CMOR', 'FORCE', False) @@ -155,6 +230,7 @@ class CMORConfig(object): self.default_atmos_grid = parser.get_option('CMOR', 'DEFAULT_ATMOS_GRID', 'gr') self.activity = parser.get_option('CMOR', 'ACTIVITY', 'CMIP') self.min_cmorized_vars = parser.get_int_option('CMOR', 'MIN_CMORIZED_VARS', 10) + self.append_startdate = parser.get_bool_option('CMOR', 'APPEND_STARTDATE', False) vars_string = parser.get_option('CMOR', 'VARIABLE_LIST', '') self.var_manager = var_manager @@ -184,11 +260,12 @@ class CMORConfig(object): def cmorize(self, var_cmor): """ - Checks if var_cmor is on variable list + Check if var_cmor is on variable list + + Parameters + ---------- + var_cmor: Variable - :param var_cmor: CMOR variable object - :rtype var_cmor: Variablle - :return: """ if self._variable_list is None: return True @@ -197,6 +274,18 @@ class CMORConfig(object): return '{0}:{1}'.format(var_cmor.domain, var_cmor.short_name) in self._variable_list def any_required(self, variables): + """ + Check if any of the given variables is needed for cmorization + + Parameters + ---------- + variables: iterable of str + + Returns + ------- + bool + + """ if self._variable_list is None: return True for var in variables: @@ -206,6 +295,18 @@ class CMORConfig(object): return False def chunk_cmorization_requested(self, chunk): + """ + Check if the cmorization of a given chunk is required + + Parameters + ---------- + chunk: int + + Returns + ------- + bool + + """ if len(self._chunks) == 0: return True return chunk in self._chunks @@ -237,6 +338,23 @@ class CMORConfig(object): return range(start, end, step) def get_variables(self, frequency): + """ + Get the variables to get from the grib file for a given frequency + + Parameters + ---------- + frequency: Frequency + + Returns + ------- + str + + Raises + ------ + ValueError + If the frequency passed is not supported + + """ if frequency in (Frequencies.three_hourly, Frequencies.six_hourly): return self._var_hourly elif frequency == Frequencies.daily: @@ -246,26 +364,62 @@ class CMORConfig(object): raise ValueError('Frequency not recognized: {0}'.format(frequency)) def get_requested_codes(self): + """ + Get all the codes to be extracted from the grib files + + Returns + ------- + set of int + + """ return set(list(self._var_hourly.keys()) + list(self._var_daily.keys()) + list(self._var_monthly.keys())) def get_levels(self, frequency, variable): + """ + Get the levels to extract for a given variable + + Parameters + ---------- + frequency: Frequency + variable: str + + Returns + ------- + iterable of int + + """ return self.get_variables(frequency)[variable] class THREDDSConfig(object): + """ + Configuration related to the THREDDS server + + Parameters + ---------- + parser: ConfigParser + + """ + def __init__(self, parser): self.server_url = parser.get_option('THREDDS', 'SERVER_URL', '') class ExperimentConfig(object): - """ - Encapsulates all chunk related tasks + """Configuration related to the experiment""" - :param parser: parser for the config file - :type parser: Parser - """ + def __init__(self): + self.chunk_list = None - def __init__(self, parser): + def parse_ini(self, parser): + """ + Parse experiment section from INI-like file + + Parameters + ---------- + parser: ConfigParser + + """ self.institute = parser.get_option('EXPERIMENT', 'INSTITUTE') self.expid = parser.get_option('EXPERIMENT', 'EXPID') self.experiment_name = parser.get_option('EXPERIMENT', 'NAME', self.expid) @@ -274,6 +428,36 @@ class ExperimentConfig(object): self.member_prefix = parser.get_option('EXPERIMENT', 'MEMBER_PREFIX', 'fc') self.member_count_start = parser.get_int_option('EXPERIMENT', 'MEMBER_COUNT_START', 0) + self._parse_members() + self.calendar = parser.get_option('EXPERIMENT', 'CALENDAR', 'standard') + self._parse_startdates(parser) + + self.chunk_size = parser.get_int_option('EXPERIMENT', 'CHUNK_SIZE') + self.num_chunks = parser.get_int_option('EXPERIMENT', 'CHUNKS') + self.chunk_list = parser.get_int_list_option('EXPERIMENT', 'CHUNK_LIST', []) + + self.model = parser.get_option('EXPERIMENT', 'MODEL') + self.model_version = parser.get_option('EXPERIMENT', 'MODEL_VERSION', '') + self.atmos_grid = parser.get_option('EXPERIMENT', 'ATMOS_GRID', '') + self.atmos_timestep = parser.get_int_option('EXPERIMENT', 'ATMOS_TIMESTEP', 6) + self.ocean_timestep = parser.get_int_option('EXPERIMENT', 'OCEAN_TIMESTEP', 6) + + def _parse_startdates(self, parser): + startdates = parser.get_list_option('EXPERIMENT', 'STARTDATES') + import exrex + self.startdates = [] + for startdate_pattern in startdates: + startdate_pattern = startdate_pattern.strip() + if not startdate_pattern: + continue + if startdate_pattern[0] == '{' and startdate_pattern[-1] == '}': + self._read_startdates(startdate_pattern[1:-1]) + else: + for startdate in exrex.generate(startdate_pattern): + startdate = startdate.strip() + self.startdates.append(startdate) + + def _parse_members(self): members = [] for mem in self.members: if '-' in mem: @@ -290,29 +474,33 @@ class ExperimentConfig(object): members.append(int(mem)) self.members = members - startdates = parser.get_list_option('EXPERIMENT', 'STARTDATES') - - import exrex - self.startdates = [] - for startdate_pattern in startdates: - for startdate in exrex.generate(startdate_pattern): - startdate = startdate.strip() - if startdate: - self.startdates.append(startdate) - - self.chunk_size = parser.get_int_option('EXPERIMENT', 'CHUNK_SIZE') - self.num_chunks = parser.get_int_option('EXPERIMENT', 'CHUNKS') - self.chunk_list = parser.get_int_list_option('EXPERIMENT', 'CHUNK_LIST', []) - self.calendar = parser.get_option('EXPERIMENT', 'CALENDAR', 'standard') - self.model = parser.get_option('EXPERIMENT', 'MODEL') - self.model_version = parser.get_option('EXPERIMENT', 'MODEL_VERSION', '') - self.atmos_grid = parser.get_option('EXPERIMENT', 'ATMOS_GRID', '') - self.atmos_timestep = parser.get_int_option('EXPERIMENT', 'ATMOS_TIMESTEP', 6) - self.ocean_timestep = parser.get_int_option('EXPERIMENT', 'OCEAN_TIMESTEP', 6) + def _read_startdates(self, pattern): + pattern = pattern.split(',') + start = parse_date(pattern[0].strip()) + end = parse_date(pattern[1].strip()) + interval = pattern[2].strip() + if len(interval) == 1: + factor = 1 + else: + factor = int(interval[0:-1]) + interval = interval[-1].upper() + while start <= end: + self.startdates.append(date2str(start)) + if interval == 'Y': + start = add_years(start, factor) + elif interval == 'M': + start = add_months(start, factor, cal=self.calendar) + elif interval == 'W': + start = add_days(start, factor * 7, cal=self.calendar) + elif interval == 'D': + start = add_days(start, factor, cal=self.calendar) + else: + raise ConfigException('Interval {0} not supported in STARTDATES definition: {1}', interval, pattern) def get_chunk_list(self): """ Return a list with all the chunks + :return: List containing tuples of startdate, member and chunk :rtype: tuple[str, int, int] """ @@ -330,6 +518,7 @@ class ExperimentConfig(object): def get_member_list(self): """ Return a list with all the members + :return: List containing tuples of startdate and member :rtype: tuple[str, int, int] """ @@ -342,6 +531,7 @@ class ExperimentConfig(object): def get_year_chunks(self, startdate, year): """ Get the list of chunks containing timesteps from the given year + :param startdate: startdate to use :type startdate: str :param year: reference year @@ -362,29 +552,99 @@ class ExperimentConfig(object): return chunks def get_chunk_start(self, startdate, chunk): + """ + Get chunk's first day + + Parameters + ---------- + startdate: str or datetime.datetime + chunk: int + + Returns + ------- + datetime.datetime + + See Also + -------- + get_chunk_start_str + + """ # noinspection PyTypeChecker if isinstance(startdate, six.string_types): startdate = parse_date(startdate) return chunk_start_date(startdate, chunk, self.chunk_size, 'month', self.calendar) def get_chunk_start_str(self, startdate, chunk): + """ + Get chunk's first day string representation + + Parameters + ---------- + startdate: str or datetime.datetime + chunk: int + + Returns + ------- + str + + See Also + -------- + get_chunk_start + + """ return date2str(self.get_chunk_start(startdate, chunk)) def get_chunk_end(self, startdate, chunk): + """ + Get chunk's last day + + Parameters + ---------- + startdate: str or datetime.datetime + chunk: int + + Returns + ------- + datetime.datetime + + See Also + -------- + get_chunk_end_str + + """ return chunk_end_date(self.get_chunk_start(startdate, chunk), self.chunk_size, 'month', self.calendar) def get_chunk_end_str(self, startdate, chunk): + """ + Get chunk's last day as a string + + Parameters + ---------- + startdate: str or datetime.datetime + chunk: int + + Returns + ------- + datetime.datetime + + See Also + -------- + get_chunk_end + + """ return date2str(self.get_chunk_end(startdate, chunk)) def get_full_years(self, startdate): """ - Returns the list of full years that are in the given startdate + Return the list of full years that are in the given startdate + :param startdate: startdate to use :type startdate: str :return: list of full years :rtype: list[int] + """ - chunks_per_year = 12 / self.chunk_size + chunks_per_year = 12 // self.chunk_size date = parse_date(startdate) first_january = 0 first_year = date.year @@ -396,23 +656,34 @@ class ExperimentConfig(object): first_january += 1 years = list() - for chunk in range(first_january, chunks_per_year, self.num_chunks): + for _ in range(first_january, self.num_chunks, chunks_per_year,): years.append(first_year) first_year += 1 return years def get_member_str(self, member): """ - Returns the member name for a given member number. + Return the member name for a given member number. + :param member: member's number :type member: int :return: member's name :rtype: str + """ return '{0}{1}'.format(self.member_prefix, str(member).zfill(self.member_digits)) class ReportConfig(object): + """ + Configuration for the reporting feature + + Parameters + ---------- + parser: ConfigParser + + """ + def __init__(self, parser): self.maximum_priority = parser.get_int_option('REPORT', 'MAXIMUM_PRIORITY', 10) self.path = parser.get_path_option('REPORT', 'PATH', '') diff --git a/earthdiagnostics/constants.py b/earthdiagnostics/constants.py index 229a61b0f0bbc0f92cca9895c34646f51e92b145..a244ca5c542cced734bafc4fc032410ac5fccf88 100644 --- a/earthdiagnostics/constants.py +++ b/earthdiagnostics/constants.py @@ -1,10 +1,5 @@ # coding=utf-8 -""" -Contains the enumeration-like classes used by the diagnostics -""" -import netCDF4 - -from singleton import SingletonType +"""Contains the enumeration-like classes used by the diagnostics""" class Basin(object): @@ -19,6 +14,8 @@ class Basin(object): self._name = name def __eq__(self, other): + if other is None: + return False if self.name != other.name: return False return True @@ -32,20 +29,25 @@ class Basin(object): @property def name(self): """ - Basin's full name + Basin full name + :rtype: str """ return self._name class Basins(object): - """ - Predefined basins - """ + """Singleton class to manage available basins""" + + __instance = None - __metaclass__ = SingletonType + def __new__(cls, *args, **kwargs): + if cls.__instance is None: + cls.__instance = object.__new__(cls, *args, **kwargs) + cls.__instance.__initialize() + return cls.__instance - def __init__(self): + def __initialize(self): self.aliases = { 'Global': ('Global', 'glob'), @@ -131,9 +133,12 @@ class Basins(object): for alias in self.aliases[basin.name]: self._add_alias(alias, basin) + self.__initiated = False + def get_available_basins(self, handler): """ - + Read available basins from file + :param handler: :type handler: netCDF4.Dataset """ @@ -158,9 +163,11 @@ class Basins(object): def parse(self, basin): """ - Return the basin matching the given name. If the parameter basin is a Basin instance, directly returns the same - instance. This bahaviour is intended to facilitate the development of methods that can either accept a name - or a Basin instance to characterize the basin. + Return the basin matching the given name. + + If the parameter basin is a Basin instance, directly returns the same + instance. This bahaviour is intended to facilitate the development of + methods that can either accept a nameor a Basin instance to characterize the basin. :param basin: basin name or basin instance :type basin: str | Basin @@ -177,9 +184,7 @@ class Basins(object): class Models(object): - """ - Predefined models - """ + """Predefined models""" ECEARTH_2_3_O1L42 = 'Ec2.3_O1L42' """ EC-Earth 2.3 ORCA1 L42""" diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 3b38e0771a390de9a8f7cecf65debbe672f5ea2b..8bd25b4decfe645ab8e076a3a9ec18909d5c5698 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -1,4 +1,5 @@ # coding: utf-8 +"""Module for classes to manage storage manipulation""" import csv import os import shutil @@ -10,11 +11,13 @@ from bscearth.utils.log import Log from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils, TempFile -from publisher import Publisher -from variable_type import VariableType +from earthdiagnostics.publisher import Publisher +from earthdiagnostics.variable import VariableType class LocalStatus(object): + """Local file status enumeration""" + PENDING = 0 DOWNLOADING = 1 READY = 2 @@ -24,6 +27,8 @@ class LocalStatus(object): class StorageStatus(object): + """Remote file status enumeration""" + PENDING = 0 UPLOADING = 1 READY = 2 @@ -32,6 +37,11 @@ class StorageStatus(object): class DataFile(Publisher): + """ + Represent a data file + + Must be derived for each concrete data file format + """ def __init__(self): super(DataFile, self).__init__() @@ -53,15 +63,15 @@ class DataFile(Publisher): self.job_added = False self._modifiers = [] self._size = None + self.lon_name = None + self.lat_name = None def __str__(self): return 'Data file for {0}'.format(self.remote_file) - def unsubscribe(self, who): - super(DataFile, self).unsubscribe(who) - @property def size(self): + """File size""" if self._size is None: self._get_size() return self._size @@ -70,12 +80,13 @@ class DataFile(Publisher): try: if self.local_status == LocalStatus.READY: self._size = os.path.getsize(self.local_file) - except Exception: + except OSError: self._size = None def clean_local(self): - if self.local_status != LocalStatus.READY or len(self.suscribers) > 0 or self.upload_required() or \ - self.storage_status == StorageStatus.UPLOADING: + """Check if a local file is still needed and remove it if not""" + if self.local_status != LocalStatus.READY or self.suscribers or self.upload_required() or \ + self.storage_status == StorageStatus.UPLOADING: return Log.debug('File {0} no longer needed. Deleting from scratch...'.format(self.remote_file)) os.remove(self.local_file) @@ -83,15 +94,26 @@ class DataFile(Publisher): self.local_file = None self.local_status = LocalStatus.PENDING - def only_suscriber(self, who): - if len(self._subscribers) != 1: - return - return who in self._subscribers - def upload_required(self): + """ + Get if an upload is needed for this file + + Returns + ------- + bool + + """ return self.local_status == LocalStatus.READY and self.storage_status == StorageStatus.PENDING def download_required(self): + """ + Get if a download is required for this file + + Returns + ------- + bool + + """ if not self.local_status == LocalStatus.PENDING: return False @@ -102,20 +124,54 @@ class DataFile(Publisher): return True def add_modifier(self, diagnostic): + """ + Register a diagnostic as a modifier of this data + + A modifier diagnostic is a diagnostic that read this data and changes it in any way. + The diagnostic must be a modifier even if it only affects the metadata + + Parameters + ---------- + diagnostic: Diagnostic + + """ self._modifiers.append(diagnostic) def has_modifiers(self): - return len(self._modifiers) > 0 + """ + Check if it has registered modifiers + + Returns + ------- + bool + + """ + return bool(self._modifiers) def ready_to_run(self, diagnostic): + """ + Check if the data is ready to run for a given diagnostics + + To be ready to run, the datafile should be in the local storage and no modifiers can be pending. + + Parameters + ---------- + diagnostic: Diagnostic + + Returns + ------- + bool + + """ if not self.local_status == LocalStatus.READY: return False - if len(self._modifiers) == 0: + if not self._modifiers: return True return self._modifiers[0] is diagnostic @property def local_status(self): + """Get local storage status""" return self._local_status @local_status.setter @@ -128,6 +184,7 @@ class DataFile(Publisher): @property def storage_status(self): + """Get remote storage status""" return self._storage_status @storage_status.setter @@ -139,23 +196,44 @@ class DataFile(Publisher): self.dispatch(self) @classmethod - def from_storage(cls, filepath): + def from_storage(cls, filepath, data_convention): + """Create a new datafile to be downloaded from the storage""" file_object = cls() file_object.remote_file = filepath file_object.local_status = LocalStatus.PENDING + file_object.data_convention = data_convention return file_object @classmethod - def to_storage(cls, remote_file): + def to_storage(cls, remote_file, data_convention): + """Create a new datafile object for a file that is going to be generated and stored""" new_object = cls() new_object.remote_file = remote_file new_object.storage_status = StorageStatus.PENDING + new_object.data_convention = data_convention return new_object def download(self): + """ + Get data from remote storage to the local one + + Must be overriden by the derived classes + + Raises + ------ + NotImplementedError + If the derived classes do not override this + + """ raise NotImplementedError('Class must implement the download method') def prepare_to_upload(self, rename_var): + """ + Prepare a local file to be uploaded + + This includes renaming the variable if necessary, updating the metadata and adding the history and + managing the possibility of multiple regions + """ if self.data_convention in ('primavera', 'cmip6'): self.lon_name = 'longitude' self.lat_name = 'latitude' @@ -178,22 +256,37 @@ class DataFile(Publisher): self.upload() def upload(self): + """Send a loal file to the storage""" self.storage_status = StorageStatus.UPLOADING try: Utils.copy_file(self.local_file, self.remote_file, save_hash=True) - except Exception as ex: + except (OSError, Exception) as ex: Log.error('File {0} can not be uploaded: {1}', self.remote_file, ex) self.storage_status = StorageStatus.FAILED return Log.info('File {0} uploaded!', self.remote_file) - try: - self.create_link() - except Exception as ex: - Log.warning('Link for file {0} can not be created: {1}', self.remote_file, ex) + self.create_link() self.storage_status = StorageStatus.READY def set_local_file(self, local_file, diagnostic=None, rename_var='', region=None): + """ + Set the local file generated by EarthDiagnostics + + This also prepares it for the upload + + Parameters + ---------- + local_file: str + diagnostic: Diagnostic or None + rename_var: str + region: Basin or None + + Returns + ------- + None + + """ if diagnostic in self._modifiers: self._modifiers.remove(diagnostic) if region is not None: @@ -205,10 +298,11 @@ class DataFile(Publisher): self.local_status = LocalStatus.READY def create_link(self): + """Create a link from the original data in the _ folder""" pass def _correct_metadata(self): - handler = Utils.openCdf(self.local_file) + handler = Utils.open_cdf(self.local_file) var_handler = handler.variables[self.final_name] coords = set.intersection({'time', 'lev', self.lat_name, self.lon_name}, set(handler.variables.keys())) var_handler.coordinates = ' '.join(coords) @@ -240,10 +334,12 @@ class DataFile(Publisher): if self.cmor_var is not None: if self.cmor_var.valid_min: - valid_min = '-a valid_min,{0},o,{1},"{2}" '.format(self.final_name, var_type.char, self.cmor_var.valid_min) + valid_min = '-a valid_min,{0},o,{1},"{2}" '.format(self.final_name, var_type.char, + self.cmor_var.valid_min) if self.cmor_var.valid_max: - valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.final_name, var_type.char, self.cmor_var.valid_max) + valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.final_name, var_type.char, + self.cmor_var.valid_max) Utils.nco.ncatted(input=file_path, output=file_path, options=('-O -a _FillValue,{0},o,{1},"1.e20" ' @@ -299,7 +395,7 @@ class DataFile(Publisher): self._update_var_with_region_data() self._correct_metadata() Utils.nco.ncks(input=self.local_file, output=self.local_file, options=['--fix_rec_dmn region']) - handler = Utils.openCdf(self.local_file) + handler = Utils.open_cdf(self.local_file) regions = handler.variables['region'][...].tolist() if len(regions) > 1: ordered_regions = sorted(regions) @@ -320,14 +416,14 @@ class DataFile(Publisher): def _update_var_with_region_data(self): temp = TempFile.get() shutil.copyfile(self.remote_file, temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) var_handler = handler.variables[self.final_name] var_type = var_handler.dtype handler.close() self._fix_values_metadata(var_type, temp) Utils.nco.ncks(input=temp, output=temp, options=['--mk_rec_dmn region']) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) var_handler = handler.variables[self.final_name] if hasattr(var_handler, 'valid_min'): del var_handler.valid_min @@ -354,7 +450,7 @@ class DataFile(Publisher): Utils.move_file(temp, self.local_file) def _add_region_dimension_to_var(self): - handler = Utils.openCdf(self.local_file) + handler = Utils.open_cdf(self.local_file) handler.createDimension('region') var_region = handler.createVariable('region', str, 'region') var_region[0] = self.region @@ -381,6 +477,7 @@ class DataFile(Publisher): Utils.rename_variables(self.local_file, variables, False, True) def add_diagnostic_history(self): + """Add the history line corresponding to the diagnostic to the local file""" if not self.diagnostic: return from earthdiagnostics.earthdiags import EarthDiags @@ -389,6 +486,7 @@ class DataFile(Publisher): self._add_history_line(history_line) def add_cmorization_history(self): + """Add the history line corresponding to the cmorization to the local file""" from earthdiagnostics.earthdiags import EarthDiags history_line = 'CMORized with Earthdiagnostics version {0}'.format(EarthDiags.version) self._add_history_line(history_line) @@ -397,28 +495,41 @@ class DataFile(Publisher): utc_datetime = 'UTC ' + datetime.utcnow().isoformat() history_line = '{0}: {1};'.format(utc_datetime, history_line) - handler = Utils.openCdf(self.local_file) + handler = Utils.open_cdf(self.local_file) try: history_line = history_line + handler.history except AttributeError: history_line = history_line - handler.history = Utils.convert_to_ASCII_if_possible(history_line) + handler.history = Utils.convert_to_ascii_if_possible(history_line) handler.close() class UnitConversion(object): """ Class to manage unit conversions + + Parameters + ---------- + source: str + destiny: str + factor: float + offset: float + """ + _dict_conversions = None + def __init__(self, source, destiny, factor, offset): + self.source = source + self.destiny = destiny + self.factor = float(factor) + self.offset = float(offset) + @classmethod def load_conversions(cls): - """ - Load conversions from the configuration file - """ + """Load conversions from the configuration file""" cls._dict_conversions = dict() - with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'conversions.csv'), 'rb') as csvfile: + with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'conversions.csv'), 'r') as csvfile: reader = csv.reader(csvfile, dialect='excel') for line in reader: if line[0] == 'original': @@ -428,23 +539,19 @@ class UnitConversion(object): @classmethod def add_conversion(cls, conversion): """ - Adds a conversion to the dictionary + Add a conversion to the dictionary :param conversion: conversion to add :type conversion: UnitConversion """ cls._dict_conversions[(conversion.source, conversion.destiny)] = conversion - def __init__(self, source, destiny, factor, offset): - self.source = source - self.destiny = destiny - self.factor = float(factor) - self.offset = float(offset) - @classmethod def get_conversion_factor_offset(cls, input_units, output_units): """ - Gets the conversion factor and offset for two units . The conversion has to be done in the following way: + Get the conversion factor and offset for two units. + + The conversion has to be done in the following way: converted = original * factor + offset :param input_units: original units @@ -502,8 +609,10 @@ class UnitConversion(object): class NetCDFFile(DataFile): + """Implementation of DataFile for netCDF files""" def download(self): + """Get data from remote storage to the local one""" try: self.local_status = LocalStatus.DOWNLOADING Log.debug('Downloading file {0}...', self.remote_file) @@ -511,6 +620,12 @@ class NetCDFFile(DataFile): self.local_file = TempFile.get() Utils.get_file_hash(self.remote_file, use_stored=True, save=True) Utils.copy_file(self.remote_file, self.local_file) + if self.data_convention == 'meteofrance': + Log.debug('Converting variable names from meteofrance convention') + alt_coord_names = {'time_counter': 'time', 'time_counter_bounds': 'time_bnds', + 'tbnds': 'bnds', 'nav_lat': 'lat', 'nav_lon': 'lon', 'x': 'i', + 'y': 'j'} + Utils.rename_variables(self.local_file, alt_coord_names, must_exist=False, rename_dimension=True) Log.info('File {0} ready!', self.remote_file) self.local_status = LocalStatus.READY @@ -521,10 +636,11 @@ class NetCDFFile(DataFile): self.local_status = LocalStatus.FAILED def create_link(self): + """Create a link from the original data in the _ folder""" try: self.data_manager.create_link(self.domain, self.remote_file, self.frequency, self.final_name, self.grid, True, self.var_type) - except Exception as ex: + except (ValueError, Exception) as ex: Log.error('Can not create link to {1}: {0}'.format(ex, self.remote_file)) def _get_size(self): @@ -533,7 +649,5 @@ class NetCDFFile(DataFile): self._size = os.path.getsize(self.local_file) if self.storage_status == StorageStatus.READY: self._size = os.path.getsize(self.remote_file) - except Exception: + except OSError: self._size = None - - diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index b4ab012486095e8058845a99b50abada456fdb11..ebf6e8a813b251a493fb54ec509d779fc68ec7ec 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -1,23 +1,21 @@ # coding: utf-8 -import csv -import os -import re -import shutil +"""Base data manager for Earth diagnostics""" import threading -from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus +from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus, UnitConversion from earthdiagnostics.modelingrealm import ModelingRealms -from earthdiagnostics.utils import Utils -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class DataManager(object): """ Class to manage the data repositories - :param config: - :type config: Config + Parameters + ---------- + config: Config """ + def __init__(self, config): self.config = config self.experiment = config.experiment @@ -29,7 +27,7 @@ class DataManager(object): def _get_file_from_storage(self, filepath): if filepath not in self.requested_files: - self.requested_files[filepath] = NCfile.from_storage(filepath) + self.requested_files[filepath] = NCfile.from_storage(filepath, self.config.data_convention) file_object = self.requested_files[filepath] file_object.local_satatus = LocalStatus.PENDING return self.requested_files[filepath] @@ -37,7 +35,7 @@ class DataManager(object): def _declare_generated_file(self, remote_file, domain, final_var, cmor_var, data_convention, region, diagnostic, grid, var_type, original_var): if remote_file not in self.requested_files: - self.requested_files[remote_file] = NCfile.to_storage(remote_file) + self.requested_files[remote_file] = NCfile.to_storage(remote_file, data_convention) file_object = self.requested_files[remote_file] file_object.diagnostic = diagnostic file_object.var_type = var_type @@ -48,7 +46,6 @@ class DataManager(object): file_object.final_name = final_var file_object.cmor_var = cmor_var file_object.region = region - file_object.data_convention = data_convention file_object.storage_status = StorageStatus.PENDING return file_object @@ -59,80 +56,45 @@ class DataManager(object): return var def get_varfolder(self, domain, var, grid=None, frequency=None): + """Get variable folder name for _ folder""" if grid: var = '{0}-{1}'.format(var, grid) if domain in [ModelingRealms.ocean, ModelingRealms.seaIce, ModelingRealms.ocnBgchem]: - return self._apply_fxh(var, self.experiment.ocean_timestep, frequency) + return DataManager._apply_fxh(var, self.experiment.ocean_timestep, frequency) else: - return self._apply_fxh(var, self.experiment.atmos_timestep, frequency) + return DataManager._apply_fxh(var, self.experiment.atmos_timestep, frequency) - def _apply_fxh(self, folder_name, timestep, frequency=None): + @staticmethod + def _apply_fxh(folder_name, timestep, frequency=None): is_base_frequency = frequency is not None and frequency.frequency.endswith('hr') if not is_base_frequency and timestep > 0: return '{0}_f{1}h'.format(folder_name, timestep) return folder_name def create_link(self, domain, filepath, frequency, var, grid, move_old, vartype): - freq_str = frequency.folder_name(vartype) - - if not grid: - grid = 'original' - - variable_folder = self.get_varfolder(domain, var) - vargrid_folder = self.get_varfolder(domain, var, grid) - - self.lock.acquire() - try: - if grid == 'original': - link_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, variable_folder) - if os.path.islink(link_path): - link_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, vargrid_folder) - - Utils.create_folder_tree(link_path) - else: - link_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, vargrid_folder) - Utils.create_folder_tree(link_path) - default_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, variable_folder) - original_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, - vargrid_folder.replace('-{0}_f'.format(grid), '-original_f')) - - if os.path.islink(default_path): - os.remove(default_path) - elif os.path.isdir(default_path): - shutil.move(default_path, original_path) - os.symlink(link_path, default_path) - - if move_old and link_path not in self._checked_vars: - self._checked_vars.append(link_path) - old_path = os.path.join(self.config.data_dir, self.experiment.expid, freq_str, - 'old_{0}_f{1}h'.format(var, self.experiment.atmos_timestep)) - regex = re.compile(var + '_[0-9]{6,8}\.nc') - for filename in os.listdir(link_path): - if regex.match(filename): - Utils.create_folder_tree(old_path) - Utils.move_file(os.path.join(link_path, filename), - os.path.join(old_path, filename)) - - link_path = os.path.join(link_path, os.path.basename(filepath)) - if os.path.lexists(link_path): - os.remove(link_path) - if not os.path.exists(filepath): - raise ValueError('Original file {0} does not exists'.format(filepath)) - if not os.path.isdir(os.path.dirname(link_path)): - Utils.create_folder_tree(os.path.dirname(link_path)) - relative_path = os.path.relpath(filepath, os.path.dirname(link_path)) - os.symlink(relative_path, link_path) - except Exception: - raise - finally: - self.lock.release() - - # Overridable methods (not mandatory) + """ + Create file link + + Must be implementd by the derived classes. If not, this method will have no effect + + Parameters + ---------- + domain: ModelingRealm + filepath: str + frequency: Frequency + var: str + grid: str + move_old: bool + vartype: VariableType + + """ + pass + def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None, frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN): """ - Creates the link of a given file from the CMOR repository. + Create the link of a given file from the CMOR repository. :param cmor_var: :param move_old: @@ -161,131 +123,151 @@ class DataManager(object): pass def prepare(self): - """ - Prepares the data to be used by the diagnostic. - :return: - """ + """Prepare the data to be used by Earth Diagnostics""" pass def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + Request a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None, optional + box: Box or None, optional + frequency: Frequency or None, optional + vartype: VariableType or None, optional + + Returns + ------- + DataFile + + Raises + ------ + NotImplementedError + If not implemented by derived classes - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :return: path to the copy created on the scratch folder - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :rtype: str """ raise NotImplementedError('Class must override request_chunk method') - -class UnitConversion(object): - """ - Class to manage unit conversions - """ - _dict_conversions = None - - @classmethod - def load_conversions(cls): + def request_year(self, diagnostic, domain, var, startdate, member, year, grid=None, box=None, frequency=None): """ - Load conversions from the configuration file + Request a given year for a variavle from a CMOR repository + + Parameters + ---------- + diagnostic: Diagnostic + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str or None, optional + box: Box or None, optional + frequency: Frequency or None, optional + + Returns + ------- + DataFile + + Raises + ------ + NotImplementedError + If not implemented by derived classes + """ - cls._dict_conversions = dict() - with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'conversions.csv'), 'rb') as csvfile: - reader = csv.reader(csvfile, dialect='excel') - for line in reader: - if line[0] == 'original': - continue - cls.add_conversion(UnitConversion(line[0], line[1], line[2], line[3])) - - @classmethod - def add_conversion(cls, conversion): + raise NotImplementedError('Class must override request_year method') + + def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, + vartype=VariableType.MEAN, diagnostic=None): """ - Adds a conversion to the dictionary + Declare a variable chunk to be generated by a diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None, optional + region: Basin or None, optional + box: Box or None, optional + frequency: Frequency or None, optional + vartype: VariableType, optional + diagnostic: Diagnostic, optional + + Returns + ------- + DataFile + + Raises + ------ + NotImplementedError + If not implemented by derived classes - :param conversion: conversion to add - :type conversion: UnitConversion """ - cls._dict_conversions[(conversion.source, conversion.destiny)] = conversion + raise NotImplementedError('Class must override declare_chunk method') - def __init__(self, source, destiny, factor, offset): - self.source = source - self.destiny = destiny - self.factor = float(factor) - self.offset = float(offset) + def declare_year(self, domain, var, startdate, member, year, grid=None, box=None, + vartype=VariableType.MEAN, diagnostic=None): + """ + Declare a variable year to be generated by a diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str or None, optional + box: Box or None, optional + vartype: VariableType, optional + diagnostic: Diagnostic, optional + + Returns + ------- + DataFile + + Raises + ------ + NotImplementedError + If not implemented by derived classes - @classmethod - def get_conversion_factor_offset(cls, input_units, output_units): """ - Gets the conversion factor and offset for two units . The conversion has to be done in the following way: - converted = original * factor + offset - - :param input_units: original units - :type input_units: str - :param output_units: destiny units - :type output_units: str - :return: factor and offset - :rtype: [float, float] + raise NotImplementedError('Class must override declare_year method') + + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN, possible_versions=None): """ - units = input_units.split() - if len(units) == 1: - scale_unit = 1 - unit = units[0] - else: - if '^' in units[0]: - values = units[0].split('^') - scale_unit = pow(int(values[0]), int(values[1])) - else: - scale_unit = float(units[0]) - unit = units[1] - - units = output_units.split() - if len(units) == 1: - scale_new_unit = 1 - new_unit = units[0] - else: - if '^' in units[0]: - values = units[0].split('^') - scale_new_unit = pow(int(values[0]), int(values[1])) - else: - scale_new_unit = float(units[0]) - new_unit = units[1] - - factor, offset = UnitConversion._get_factor(new_unit, unit) - if factor is None: - return None, None - factor = factor * scale_unit / float(scale_new_unit) - offset /= float(scale_new_unit) - - return factor, offset - - @classmethod - def _get_factor(cls, new_unit, unit): - # Add only the conversions with a factor greater than 1 - if unit == new_unit: - return 1, 0 - elif (unit, new_unit) in cls._dict_conversions: - conversion = cls._dict_conversions[(unit, new_unit)] - return conversion.factor, conversion.offset - elif (new_unit, unit) in cls._dict_conversions: - conversion = cls._dict_conversions[(new_unit, unit)] - return 1 / conversion.factor, -conversion.offset - else: - return None, None + Check if a file exists in the storage + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None, optional + box: Box or None, optional + frequency: Frequency or None, optional + vartype: VariableType, optional + possible_versions: iterable od str or None, optional + + Raises + ------ + NotImplementedError + If not implemented by derived classes + + Returns + ------- + bool + """ + raise NotImplementedError('Class must override file_exists method') diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 0b1dbf57a346ca408883b904281ed6deff6c8502..4090dcd606b0054cf5af16694a3a1b1b38241060 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""This module contains the Diagnostic base class and all the classes for parsing the options passed to them""" import datetime from bscearth.utils.log import Log @@ -8,10 +9,12 @@ from earthdiagnostics.datafile import StorageStatus, LocalStatus from earthdiagnostics.frequency import Frequency from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.publisher import Publisher -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class DiagnosticStatus(object): + """Enumeration of diagnostic status""" + WAITING = 0 READY = 1 RUNNING = 2 @@ -21,20 +24,27 @@ class DiagnosticStatus(object): class Diagnostic(Publisher): """ - Base class for the diagnostics. Provides a common interface for them and also - has a mechanism that allows diagnostic retrieval by name. + Base class for the diagnostics. + + Provides a common interface for them and also has a mechanism that allows diagnostic retrieval by name. :param data_manager: data manager that will be used to store and retrieve the necessary data :type data_manager: DataManager """ alias = None - """ - Alias to call the diagnostic. Must be overridden at the derived clases - """ + """ Alias to call the diagnostic. Must be overridden at the derived clases""" _diag_list = dict() def __init__(self, data_manager): + """ + Diagnostic constructor + + Parameters + ---------- + data_manager: DataManager + + """ super(Diagnostic, self).__init__() self._generated_files = [] self.data_manager = data_manager @@ -42,21 +52,55 @@ class Diagnostic(Publisher): self._requests = [] self.consumed_time = datetime.timedelta() self.subjobs = [] + self.message = None + + def __ne__(self, other): + """ + Check if a diagnostic is different than other + + Implementation is just the negation of the equal, that should be implemented by the derived classes + + Parameters + ---------- + other: Diagnostic or None + Diagnostic to be compared + + Returns + ------- + bool + + """ + return not self == other + + def __hash__(self): + return hash(str(self)) def can_skip_run(self): + """ + Check if a diagnostic calculation can be skipped + + Looks if the data to be generated is already there and is not going to be modified + + Returns + ------- + bool + + """ for file_generated in self._generated_files: if file_generated.storage_status != StorageStatus.READY: return False if file_generated.has_modifiers(): Log.warning('Can not skip diagnostics run when data is going to be modified: {0}'.format(self)) return False - + return False def __repr__(self): + """Full string representation. Defaults to str""" return str(self) @property def status(self): + """Execution status""" return self._status @status.setter @@ -72,17 +116,22 @@ class Diagnostic(Publisher): self.dispatch(self) @staticmethod - def register(cls): + def register(diagnostic_class): """ - Register a new diagnostic using the given alias. It must be call using the derived class. - :param cls: diagnostic class to register - :type cls: Type[Diagnostic] + Register a new diagnostic using the given alias. + + It must be called using the derived class. + + Parameters + ---------- + diagnostic_class: Type[Diagnostic] + """ - if not issubclass(cls, Diagnostic): - raise ValueError('Class {0} must be derived from Diagnostic'.format(cls)) - if cls.alias is None: - raise ValueError('Diagnostic class {0} must have defined an alias'.format(cls)) - Diagnostic._diag_list[cls.alias] = cls + if not issubclass(diagnostic_class, Diagnostic): + raise ValueError('Class {0} must be derived from Diagnostic'.format(diagnostic_class)) + if diagnostic_class.alias is None: + raise ValueError('Diagnostic class {0} must have defined an alias'.format(diagnostic_class)) + Diagnostic._diag_list[diagnostic_class.alias] = diagnostic_class # noinspection PyProtectedMember @staticmethod @@ -90,10 +139,14 @@ class Diagnostic(Publisher): """ Return the class for a diagnostic given its name - :param name: diagnostic alias - :type name: str - :return: the selected Diagnostic class, None if name can not be found - :rtype: Diagnostic + Parameters + ---------- + name: str + + Returns + ------- + Type[Diagnostic] or None + """ if name in Diagnostic._diag_list.keys(): return Diagnostic._diag_list[name] @@ -101,7 +154,7 @@ class Diagnostic(Publisher): def compute(self): """ - Calculates the diagnostic and stores the output + Calculate the diagnostic and stores the output Must be implemented by derived classes """ @@ -109,7 +162,7 @@ class Diagnostic(Publisher): def request_data(self): """ - Calculates the diagnostic and stores the output + Request the data required by the diagnostic Must be implemented by derived classes """ @@ -117,7 +170,7 @@ class Diagnostic(Publisher): def declare_data_generated(self): """ - Calculates the diagnostic and stores the output + Declare the data to be generated by the diagnostic Must be implemented by derived classes """ @@ -126,22 +179,25 @@ class Diagnostic(Publisher): def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN): """ + Declare a chunk that is going to be generated by the diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int or None + chunk: int or None + grid: str or None + region: Basin or None + box: Box or None + frequency: Frequency or None + vartype: VariableType + + Returns + ------- + DataFile - :param domain: - :type domain: ModelingRealm - :param var: - :param startdate: - :param member: - :param chunk: - :param grid: - :param region: - :param box: - :param frequency: - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: datafile object - :rtype: earthdiagnostics.datafile.DataFile """ if isinstance(region, Basin): region = region.name @@ -155,19 +211,23 @@ class Diagnostic(Publisher): def declare_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VariableType.MEAN): """ + Declare a year that is going to be generated by the diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str or None + box: Box or None + vartype: VariableType + + Returns + ------- + DataFile - :param domain: - :type domain: ModelingRealm - :param var: - :param startdate: - :param member: - :param grid: - :param box: - :param year: - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: datafile object - :rtype: DataFile """ generated_year = self.data_manager.declare_year(domain, var, startdate, member, year, grid, box, diagnostic=self, vartype=vartype) @@ -181,23 +241,45 @@ class Diagnostic(Publisher): Must be implemented by derived classes. - :param diags: diagnostics manager - :type diags: Diags - :param options: list of strings containing the options passed to the diagnostic - :type options: list[str] - :return: + Parameters + ---------- + diags: Diags + options: list of str + + Returns + ------- + list of Diagnostic + """ raise NotImplementedError("Class must override generate_jobs class method") @classmethod def process_options(cls, options, options_available): + """ + Process the configuration of a diagnostic + + Parameters + ---------- + options: iterable of str + options_available: iterable of DiagnosticOptiion + + Returns + ------- + dict of str: str + Dictionary of names and values for the options + + Raises + ------ + DiagnosticOptionError: + If there are more options that admitted for the diagnostic + + """ processed = dict() options = options[1:] if len(options) > len(options_available): raise DiagnosticOptionError('You have specified more options than available for diagnostic ' '{0}'.format(cls.alias)) - for x in range(len(options_available)): - option_definition = options_available[x] + for x, option_definition in enumerate(options_available): if len(options) <= x: option_value = '' else: @@ -207,17 +289,22 @@ class Diagnostic(Publisher): def __str__(self): """ - Must be implemented by derived classes - :return: + Represenation of the diagnostic as a string + + Must be implemented by derived classesgit """ return 'Developer must override base class __str__ method' def add_subjob(self, subjob): """ - Adds a subjob - :param subjob: - :type subjob: Diagnostic - :return: + Add a subjob + + Add a diagnostic that must be run before the current one + + Parameters + ---------- + subjob: Diagnostic + """ self.subjobs.append(subjob) subjob.subscribe(self, self._subjob_status_changed) @@ -227,6 +314,35 @@ class Diagnostic(Publisher): def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, to_modify=False, vartype=VariableType.MEAN): + """ + Request one chunk of data required by the diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str or None + member: int or None + chunk: int or None + grid: str or None + box: Box or None + frequency: Frequency or None + to_modify: bool + Flag that must be active if the diagnostic is going to generate a modified version of this data. In this + case this data must not be declared as an output of the diagnostic + vartype: VariableType + + Returns + ------- + DataFile + + See Also + -------- + request_year + declare_chunk + declare_year + + """ request = self.data_manager.request_chunk(domain, var, startdate, member, chunk, grid, box, frequency, vartype) if to_modify: request.add_modifier(self) @@ -235,6 +351,32 @@ class Diagnostic(Publisher): return request def request_year(self, domain, var, startdate, member, year, grid=None, box=None, frequency=None, to_modify=False): + """ + Request one year of data that is required for the diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + year: int + grid: str + box: Box + frequency: Frequency + to_modify: str + + Returns + ------- + DataFile + + See Also + -------- + request_chunk + declare_chunk + declare_year + + """ request = self.data_manager.request_year(self, domain, var, startdate, member, year, grid, box, frequency) if to_modify: request.add_modifier(self) @@ -245,7 +387,6 @@ class Diagnostic(Publisher): def _updated_request(self, request): if self.status != DiagnosticStatus.WAITING: return - from datafile import LocalStatus if request.local_status == LocalStatus.FAILED: self.message = 'Required file {0} is not available'.format(request.remote_file) self.status = DiagnosticStatus.FAILED @@ -255,6 +396,7 @@ class Diagnostic(Publisher): self.check_is_ready() def check_is_ready(self): + """Check if a diagnostic is ready to run and change its status accordingly""" if all([request.ready_to_run(self) for request in self._requests]) and\ all([subjob.status == DiagnosticStatus.COMPLETED for subjob in self.subjobs]): self.status = DiagnosticStatus.READY @@ -264,23 +406,70 @@ class Diagnostic(Publisher): request.unsubscribe(self) def all_requests_in_storage(self): + """ + Check if all the data requested is in the local scratch + + Returns + ------- + bool + + """ return self.pending_requests() == 0 def pending_requests(self): + """ + Get the number of data request pending to be fulfilled + + Returns + ------- + int + + """ return len([request.storage_status != StorageStatus.READY or request.local_status != LocalStatus.READY for request in self._requests]) + class DiagnosticOption(object): + """Class to manage string options for the diagnostic""" def __init__(self, name, default_value=None): + """ + Option constructor + + Parameters + ---------- + name: str + default_value: object, optional + If None, the option is required and an exception will be thrown at parse time if the value is empty + + """ self.name = name self.default_value = default_value def parse(self, option_value): - option_value = self.check_default(option_value) + """ + Get the final value for the option + + If option_value is empty, return default_value + + Parameters + ---------- + option_value: str + + Returns + ------- + str + + Raises + ------ + DiagnosticOptionError: + If the option is empty and default_value is False + + """ + option_value = self._check_default(option_value) return option_value - def check_default(self, option_value): + def _check_default(self, option_value): if option_value == '': if self.default_value is None: raise DiagnosticOptionError('Option {0} is not optional'.format(self.name)) @@ -290,11 +479,37 @@ class DiagnosticOption(object): class DiagnosticFloatOption(DiagnosticOption): + """Class for parsing float options""" + def parse(self, option_value): - return float(self.check_default(option_value)) + """ + Parse option value + + Parameters + ---------- + option_value:str + + Returns + ------- + float + """ + return float(self._check_default(option_value)) class DiagnosticIntOption(DiagnosticOption): + """ + Class for parsing integer options + + Parameters + ---------- + name: str + default_value: int, optional + min_limit: int, optional + If setted, any value below this will not be accepted + max_limit: int, optional + If setted, any value over this will not be accepted + + """ def __init__(self, name, default_value=None, min_limit=None, max_limit=None): super(DiagnosticIntOption, self).__init__(name, default_value) @@ -302,24 +517,46 @@ class DiagnosticIntOption(DiagnosticOption): self.max_limit = max_limit def parse(self, option_value): - value = int(self.check_default(option_value)) + """ + Parse option value + + Parameters + ---------- + option_value:str + + Returns + ------- + int + + Raises + ------ + DiagnosticOptionError + If parsed values is outside limits + """ + value = int(self._check_default(option_value)) + self._check_limits(value) + return value + + def _check_limits(self, value): if self.min_limit is not None and value < self.min_limit: raise DiagnosticOptionError('Value {0} is lower than minimum ({1})'.format(value, self.min_limit)) if self.max_limit is not None and value > self.max_limit: raise DiagnosticOptionError('Value {0} is higher than maximum ({1})'.format(value, self.max_limit)) - return value -class DiagnosticListIntOption(DiagnosticOption): +class DiagnosticListIntOption(DiagnosticIntOption): """ - :param name: - :type name: str - :param default_value: - :type default_value: int|NoneType - :param min_limit: - :type min_limit: int|NoneType - :param max_limit: - :type max_limit: int|NoneType + Class for parsing integer list options + + Parameters + ---------- + name: str + default_value: list, optional + min_limit: int, optional + If setted, any value below this will not be accepted + max_limit: int, optional + If setted, any value over this will not be accepted + """ def __init__(self, name, default_value=None, min_limit=None, max_limit=None): @@ -330,41 +567,80 @@ class DiagnosticListIntOption(DiagnosticOption): """ Upper limit """ def parse(self, option_value): - option_value = self.check_default(option_value) + """ + Parse option value + + Parameters + ---------- + option_value:str + + Returns + ------- + list(int) + + Raises + ------ + DiagnosticOptionError + If parsed values is outside limits + """ + option_value = self._check_default(option_value) if isinstance(option_value, tuple) or isinstance(option_value, list): return option_value values = [int(i) for i in option_value.split('-')] for value in values: - # noinspection PyTypeChecker - if self.min_limit is not None and value < self.min_limit: - raise DiagnosticOptionError('Value {0} is lower than minimum ({1})'.format(value, self.min_limit)) - # noinspection PyTypeChecker - if self.max_limit is not None and value > self.max_limit: - raise DiagnosticOptionError('Value {0} is higher than maximum ({1})'.format(value, self.max_limit)) + self._check_limits(value) return values class DiagnosticListFrequenciesOption(DiagnosticOption): + """ + Class for parsing an option which is a list of frequencies + + Parameters + ---------- + name: str + default_value: list, optional + + """ def __init__(self, name, default_value=None): super(DiagnosticListFrequenciesOption, self).__init__(name, default_value) def parse(self, option_value): - option_value = self.check_default(option_value) - if isinstance(option_value, tuple) or isinstance(option_value, list): + option_value = self._check_default(option_value) + if isinstance(option_value, (tuple, list)): return option_value values = [Frequency(i) for i in option_value.split('-')] return values class DiagnosticVariableOption(DiagnosticOption): + """ + Class to parse variable options + + Parameters + ---------- + var_manager: VariableManager + name: str, optional + default_value: str, optional + + """ + def __init__(self, var_manager, name='variable', default_value=None): super(DiagnosticVariableOption, self).__init__(name, default_value) self.var_manager = var_manager def parse(self, option_value): - option_value = self.check_default(option_value) + """ + Parse option value + + Returns + ------- + Variable + + """ + option_value = self._check_default(option_value) real_name = self.var_manager.get_variable(option_value, False) if real_name is None: return option_value @@ -372,13 +648,31 @@ class DiagnosticVariableOption(DiagnosticOption): class DiagnosticVariableListOption(DiagnosticOption): + """ + Class to parse variable list options + + Parameters + ---------- + var_manager: VariableManager + name: str, optional + default_value: str, optional + + """ def __init__(self, var_manager, name, default_value=None): super(DiagnosticVariableListOption, self).__init__(name, default_value) self.var_manager = var_manager def parse(self, option_value): - option_value = self.check_default(option_value) + """ + Parse option value + + Returns + ------- + List[Variable] + + """ + option_value = self._check_default(option_value) var_names = [] for value in option_value.split(':'): real_name = self.var_manager.get_variable(value, False) @@ -390,24 +684,77 @@ class DiagnosticVariableListOption(DiagnosticOption): class DiagnosticDomainOption(DiagnosticOption): + """ + Class to parse domain options + + Parameters + ---------- + name: str, optional + default_value: str, optional + + """ + def __init__(self, name='domain', default_value=None): super(DiagnosticDomainOption, self).__init__(name, default_value) def parse(self, option_value): - return ModelingRealms.parse(self.check_default(option_value)) + """ + Parse option value + + Returns + ------- + ModelingRealm + """ + return ModelingRealms.parse(self._check_default(option_value)) class DiagnosticFrequencyOption(DiagnosticOption): + """ + Class to parse frequency options + + Parameters + ---------- + name: str, optional + default_value: Frequency,optional + + """ + def __init__(self, name='frequency', default_value=None): super(DiagnosticFrequencyOption, self).__init__(name, default_value) def parse(self, option_value): - return Frequency.parse(self.check_default(option_value)) + """ + Parse option value + + Parameters + ---------- + option_value: str + + Returns + ------- + Frequency + + """ + return Frequency.parse(self._check_default(option_value)) class DiagnosticBasinOption(DiagnosticOption): + """Class to parse basin options""" + def parse(self, option_value): - value = self.check_default(option_value) + """ + Parse option value + + Parameters + ---------- + option_value: str + + Returns + ------- + Basin + + """ + value = self._check_default(option_value) basin = Basins().parse(value) if basin is None: raise DiagnosticOptionError('Basin {0} not recognized'.format(value)) @@ -415,20 +762,65 @@ class DiagnosticBasinOption(DiagnosticOption): class DiagnosticComplexStrOption(DiagnosticOption): + """ + Class to parse complex string options + + It replaces '&;' with ',' and '&.' with ' ' + + """ + def parse(self, option_value): - return self.check_default(option_value).replace('&;', ',').replace('&.', ' ') + """ + Parse option value + + Parameters + ---------- + option_value:str + + Returns + ------- + str + """ + return self._check_default(option_value).replace('&;', ',').replace('&.', ' ') class DiagnosticBoolOption(DiagnosticOption): + """Class to parse boolean options""" + def parse(self, option_value): - option_value = self.check_default(option_value) + """ + Parse option value + + Parameters + ---------- + option_value:str + + Returns + ------- + Bool + """ + option_value = self._check_default(option_value) if isinstance(option_value, bool): return option_value - else: - return option_value.lower() in ('true', 't', 'yes') + return option_value.lower() in ('true', 't', 'yes') class DiagnosticChoiceOption(DiagnosticOption): + """ + Class to parse choice option + + Parameters + ---------- + name: str + choices: list of str + Valid options for the option + default_value: str, optional + If not None, it should ve a valid choice + ignore_case: bool, optional + If false, value must match case of the valid choice + + """ + def __init__(self, name, choices, default_value=None, ignore_case=True): super(DiagnosticChoiceOption, self).__init__(name, default_value) self.choices = choices @@ -438,19 +830,32 @@ class DiagnosticChoiceOption(DiagnosticOption): if default_value is not None: self.parse(default_value) - def parse(self, value): - value = self.check_default(value) + def parse(self, option_value): + """ + Parse option value + + Parameters + ---------- + option_value:str + + Returns + ------- + str + """ + option_value = self._check_default(option_value) if self.ignore_case: - value = value.lower() + option_value = option_value.lower() for choice in self.choices: - if value == choice.lower(): + if option_value == choice.lower(): return choice else: - if value in self.choices: - return value + if option_value in self.choices: + return option_value raise DiagnosticOptionError('Value {1} in option {0} is not a valid choice. ' - 'Options are {2}'.format(self.name, value, self.choices)) + 'Options are {2}'.format(self.name, option_value, self.choices)) class DiagnosticOptionError(Exception): + """Exception class for errors related to bad options for the diagnostics""" + pass diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 44c7fba56a2bf986fb916bc7d7928f966e9bbca4..23107feb04d545a7bb2dd9ffcbf0d5a67fcf6934 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # coding=utf-8 +"""Entry point for EarthDiagnostics""" import argparse import os import shutil @@ -9,7 +10,7 @@ from distutils.spawn import find_executable import bscearth.utils.path import netCDF4 import pkg_resources -from bscearth.utils.date import * +from bscearth.utils.log import Log from earthdiagnostics import cdftools from earthdiagnostics.cmormanager import CMORManager @@ -25,9 +26,11 @@ class EarthDiags(object): """ Launcher class for the diagnostics - :param config_file: path to the configuration file - :type config_file: str + Parameters + ---------- + config_file: str """ + # Get the version number from the relevant file. If not, from autosubmit package scriptdir = os.path.abspath(os.path.dirname(__file__)) if not os.path.exists(os.path.join(scriptdir, 'VERSION')): @@ -36,7 +39,6 @@ class EarthDiags(object): version_path = os.path.join(scriptdir, 'VERSION') readme_path = os.path.join(scriptdir, 'README') changes_path = os.path.join(scriptdir, 'CHANGELOG') - documentation_path = os.path.join(scriptdir, 'EarthDiagnostics.pdf') if os.path.isfile(version_path): with open(version_path) as f: version = f.read().strip() @@ -45,7 +47,9 @@ class EarthDiags(object): def __init__(self, config_file): Log.info('Initialising Earth Diagnostics Version {0}', EarthDiags.version) - self.config = Config(config_file) + self.config = Config() + self.config.parse(config_file) + os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE' TempFile.scratch_folder = self.config.scratch_dir cdftools.path = self.config.cdftools_path @@ -61,7 +65,9 @@ class EarthDiags(object): @staticmethod def parse_args(): """ - Entry point for the Earth Diagnostics. For more detailed documentation, use -h option + Entry point for the Earth Diagnostics. + + For more detailed documentation, use -h option """ # try: parser = argparse.ArgumentParser(description='Main executable for Earth Diagnostics.') @@ -89,7 +95,7 @@ class EarthDiags(object): args = parser.parse_args() if args.doc: Log.info('Opening documentation...') - doc_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'EarthDiagnostics.pdf') + doc_path = os.path.join('http://earthdiagnostics.readthedocs.io/en/latest') Utils.execute_shell_command(('xdg-open', doc_path)) Log.result('Documentation opened!') return True @@ -142,6 +148,11 @@ class EarthDiags(object): def run(self): """ Run the diagnostics + + Returns + ------- + bool + """ self.had_errors = False Log.debug('Using netCDF version {0}', netCDF4.getlibversion()) @@ -173,7 +184,7 @@ class EarthDiags(object): def _read_basins_from_file(filename): if not os.path.isfile(filename): return - handler = Utils.openCdf(filename) + handler = Utils.open_cdf(filename) Basins().get_available_basins(handler) handler.close() @@ -197,6 +208,14 @@ class EarthDiags(object): self.data_manager.prepare() def clean(self): + """ + Clean scratch folder + + Returns + ------- + bool + + """ Log.info('Removing scratch folder...') self._remove_scratch_dir() Log.result('Scratch folder removed') @@ -210,6 +229,13 @@ class EarthDiags(object): shutil.rmtree(self.config.scratch_dir) def report(self): + """ + Create a report of missing variables for a given experiment + + Returns + ------- + bool + """ Log.info('Looking for existing vars...') self._prepare_data_manager() base_folder = self.config.report.path @@ -224,7 +250,7 @@ class EarthDiags(object): '{0}_{1}.report'.format(startdate, self.config.experiment.get_member_str(member))) - self.create_report(report_path, results) + self._create_report(report_path, results) Log.result('Report finished') return True @@ -248,7 +274,7 @@ class EarthDiags(object): return results @staticmethod - def create_report(report_path, results): + def _create_report(report_path, results): tables = set([result[1].name for result in results]) for table in tables: file_handler = open('{0}.{1}'.format(report_path, table), 'w') @@ -306,35 +332,9 @@ class EarthDiags(object): mask_regions_3d_path = os.path.join(con_files, mask_regions_3d) if self.config.scratch_masks: - Utils.create_folder_tree(self.config.scratch_masks) - Utils.give_group_write_permissions(self.config.scratch_masks) - - mesh_mask_scratch_path = os.path.join(self.config.scratch_masks, mesh_mask) - - if self._copy_file(mesh_mask_path, mesh_mask_scratch_path, - restore_meshes): - Utils.give_group_write_permissions(mesh_mask_scratch_path) - self._link_file(mesh_mask_scratch_path, 'mesh_hgr.nc') - self._link_file(mesh_mask_scratch_path, 'mesh_zgr.nc') - self._link_file(mesh_mask_scratch_path, 'mask.nc') - - new_maskglo_scratch_path = os.path.join(self.config.scratch_masks, new_mask_glo) - if self._copy_file(new_mask_glo_path, - new_maskglo_scratch_path, restore_meshes): - Utils.give_group_write_permissions(new_maskglo_scratch_path) - self._link_file(new_maskglo_scratch_path, 'new_maskglo.nc') - - mask_regions_scratch_path = os.path.join(self.config.scratch_masks, mask_regions) - if self._copy_file(mask_regions_path, - mask_regions_scratch_path, restore_meshes): - Utils.give_group_write_permissions(mask_regions_scratch_path) - self._link_file(mask_regions_scratch_path, 'mask_regions.nc') - - mask_regions3d_scratch_path = os.path.join(self.config.scratch_masks, mask_regions_3d) - if self._copy_file(mask_regions_3d_path, - mask_regions3d_scratch_path, restore_meshes): - Utils.give_group_write_permissions(mask_regions3d_scratch_path) - self._link_file(mask_regions3d_scratch_path, 'mask_regions.3d.nc') + self._prepare_mesh_using_scratch(mask_regions, mask_regions_3d, mask_regions_3d_path, mask_regions_path, + mesh_mask, mesh_mask_path, new_mask_glo, new_mask_glo_path, + restore_meshes) else: self._copy_file(mesh_mask_path, 'mesh_hgr.nc', restore_meshes) self._link_file('mesh_hgr.nc', 'mesh_zgr.nc') @@ -348,6 +348,34 @@ class EarthDiags(object): Log.result('Mesh files ready!') + def _prepare_mesh_using_scratch(self, mask_regions, mask_regions_3d, mask_regions_3d_path, mask_regions_path, + mesh_mask, mesh_mask_path, new_mask_glo, new_mask_glo_path, + restore_meshes): + Utils.create_folder_tree(self.config.scratch_masks) + Utils.give_group_write_permissions(self.config.scratch_masks) + mesh_mask_scratch_path = os.path.join(self.config.scratch_masks, mesh_mask) + if self._copy_file(mesh_mask_path, mesh_mask_scratch_path, + restore_meshes): + Utils.give_group_write_permissions(mesh_mask_scratch_path) + self._link_file(mesh_mask_scratch_path, 'mesh_hgr.nc') + self._link_file(mesh_mask_scratch_path, 'mesh_zgr.nc') + self._link_file(mesh_mask_scratch_path, 'mask.nc') + new_maskglo_scratch_path = os.path.join(self.config.scratch_masks, new_mask_glo) + if self._copy_file(new_mask_glo_path, + new_maskglo_scratch_path, restore_meshes): + Utils.give_group_write_permissions(new_maskglo_scratch_path) + self._link_file(new_maskglo_scratch_path, 'new_maskglo.nc') + mask_regions_scratch_path = os.path.join(self.config.scratch_masks, mask_regions) + if self._copy_file(mask_regions_path, + mask_regions_scratch_path, restore_meshes): + Utils.give_group_write_permissions(mask_regions_scratch_path) + self._link_file(mask_regions_scratch_path, 'mask_regions.nc') + mask_regions3d_scratch_path = os.path.join(self.config.scratch_masks, mask_regions_3d) + if self._copy_file(mask_regions_3d_path, + mask_regions3d_scratch_path, restore_meshes): + Utils.give_group_write_permissions(mask_regions3d_scratch_path) + self._link_file(mask_regions3d_scratch_path, 'mask_regions.3d.nc') + def _copy_file(self, source, destiny, force): if not os.path.exists(source): Log.user_warning('File {0} is not available for {1}', destiny, self.config.experiment.model_version) @@ -355,8 +383,9 @@ class EarthDiags(object): if not force and os.path.exists(destiny): # Small size differences can be due to the renaming of variables - delta_size = abs(os.stat(source).st_size - os.stat(destiny).st_size) - if delta_size < 512: + reference_size = os.stat(source).st_size + delta_size = abs(reference_size - os.stat(destiny).st_size) + if delta_size < 2048 or delta_size / reference_size < 1/1000: Log.info('File {0} already exists', destiny) return True @@ -375,16 +404,16 @@ class EarthDiags(object): try: os.remove(destiny) except OSError as ex: - if ex.errno == 13: #Permission denied + if ex.errno == 13: # Permission denied Log.info('Link already created') return - pass os.symlink(source, destiny) Log.info('File {0} ready', destiny) def main(): + """Main for earthdiagnostics""" if not EarthDiags.parse_args(): exit(1) diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 9a95f26e00fabd86d278eb43df8687a27209f4b8..5cd15f48f509e6242a0cd11050cc9a7eef5e2cd4 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -1,8 +1,9 @@ # coding=utf-8 -from earthdiagnostics.variable_type import VariableType +"""Data frequency management tools""" class Frequency(object): + """Time frequency""" _recognized = {'f': 'fx', 'fx': 'fx', 'fixed': 'fx', 'c': 'clim', 'clim': 'clim', 'climatology': 'clim', 'monclim': 'clim', '1hrclimmon': 'clim', @@ -36,6 +37,18 @@ class Frequency(object): return self.frequency def folder_name(self, vartype): + """ + Get foder name associated to this frequency + + Parameters + ---------- + vartype: VariableType + + Returns + ------- + str + """ + from earthdiagnostics.variable import VariableType if self == Frequencies.daily: freq_str = 'daily_{0}'.format(VariableType.to_str(vartype)) elif self == Frequencies.weekly: @@ -52,12 +65,27 @@ class Frequency(object): @staticmethod def parse(freq): + """ + Get frequency instance from str + + If a Frequency object is passed, it is returned as usual + + Parameters + ---------- + freq: str or Frequency + + Returns + ------- + Frequency + """ if isinstance(freq, Frequency): return freq return Frequency(freq) class Frequencies(object): + """Enumeration of supported frequencies""" + fixed = Frequency('fx') climatology = Frequency('clim') yearly = Frequency('year') diff --git a/earthdiagnostics/general/__init__.py b/earthdiagnostics/general/__init__.py index 34820058a09b80bbd78b343158abef14094df0fb..1b9cbfa80bee2363aa1572d50af6f30fc1cde38a 100644 --- a/earthdiagnostics/general/__init__.py +++ b/earthdiagnostics/general/__init__.py @@ -1,13 +1,2 @@ # coding=utf-8 -from earthdiagnostics.general.monthlymean import MonthlyMean -from earthdiagnostics.general.dailymean import DailyMean -from earthdiagnostics.general.yearlymean import YearlyMean -from earthdiagnostics.general.rewrite import Rewrite -from earthdiagnostics.general.relink import Relink -from earthdiagnostics.general.scale import Scale -from earthdiagnostics.general.attribute import Attribute -from earthdiagnostics.general.relinkall import RelinkAll -from earthdiagnostics.general.simplify_dimensions import SimplifyDimensions -from earthdiagnostics.general.select_levels import SelectLevels -from earthdiagnostics.general.module import Module -from earthdiagnostics.general.verticalmeanmetersiris import VerticalMeanMetersIris +"""Package containing general purpose diagnostics""" diff --git a/earthdiagnostics/general/attribute.py b/earthdiagnostics/general/attribute.py index 1c6185787d95a7ed3fa3525f477f49ff02d930c8..f80e971892844be202b224e1b9a507c56dac0b18 100644 --- a/earthdiagnostics/general/attribute.py +++ b/earthdiagnostics/general/attribute.py @@ -1,14 +1,15 @@ # coding=utf-8 +"""Set attributtes in netCDF files""" from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticComplexStrOption, \ DiagnosticDomainOption, DiagnosticVariableOption from earthdiagnostics.utils import Utils -from earthdiagnostics.modelingrealm import ModelingRealm class Attribute(Diagnostic): """ - Rewrites files without doing any calculations. - Can be useful to convert units or to correct wrong metadata + Set the value of an attribute + + Can be useful to correct wrong metadata :original author: Javier Vegas-Regidor @@ -56,15 +57,14 @@ class Attribute(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, grid - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, grid + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticOption('name'), @@ -79,19 +79,19 @@ class Attribute(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid, to_modify=True) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.corrected = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" variable_file = self.variable_file.local_file - handler = Utils.openCdf(variable_file) + handler = Utils.open_cdf(variable_file) handler.setncattr(self.attributte_name, self.attributte_value) handler.close() if not Utils.check_netcdf_file(variable_file): @@ -99,4 +99,3 @@ class Attribute(Diagnostic): self.attributte_value)) self.corrected.set_local_file(variable_file, self) - diff --git a/earthdiagnostics/general/dailymean.py b/earthdiagnostics/general/dailymean.py deleted file mode 100644 index 7fb4736eba58980d40dce6f247b85f22b91850f3..0000000000000000000000000000000000000000 --- a/earthdiagnostics/general/dailymean.py +++ /dev/null @@ -1,109 +0,0 @@ -# coding=utf-8 - -import os -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ - DiagnosticFrequencyOption, DiagnosticVariableOption -from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealm - - -class DailyMean(Diagnostic): - """ - Calculates daily mean for a given variable - - :original author: Javier Vegas-Regidor - - :created: July 2016 - - :param data_manager: data management object - :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int - :param variable: variable's name - :type variable: str - :param domain: variable's domain - :type domain: ModelingRealm - :param frequency: original frequency - :type frequency: str - :param grid: original data grid - :type grid: str - """ - - alias = 'daymean' - "Diagnostic alias for the configuration file" - - def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): - Diagnostic.__init__(self, data_manager) - self.startdate = startdate - self.member = member - self.chunk = chunk - self.variable = variable - self.domain = domain - self.frequency = frequency - self.grid = grid - - def __str__(self): - return 'Calculate daily mean Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4} Original frequency: {5} Grid: {6}'.format(self.startdate, self.member, self.chunk, - self.domain, self.variable, - self.frequency, self.grid) - - def __eq__(self, other): - return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable and self.frequency == other.frequency and \ - self.grid == other.grid - - @classmethod - def generate_jobs(cls, diags, options): - """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, frequency=day, grid='' - :type options: list[str] - :return: - """ - - options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(diags.data_manager.config.var_manager), - DiagnosticFrequencyOption(), - DiagnosticOption('grid', '')) - options = cls.process_options(options, options_available) - job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(DailyMean(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], options['frequency'], options['grid'])) - return job_list - - def request_data(self): - self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) - - def declare_data_generated(self): - self.daymean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.daily, grid=self.grid) - - def compute(self): - """ - Runs the diagnostic - """ - temp = TempFile.get() - handler = Utils.openCdf(self.variable_file.local_file) - if 'region' in handler.variables: - noregion = TempFile.get() - Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options=('-O -C -x -v region',)) - Utils.cdo.daymean(input=noregion, output=temp) - os.remove(noregion) - monmean_handler = Utils.openCdf(temp) - Utils.copy_variable(handler, monmean_handler, 'region') - monmean_handler.close() - else: - Utils.cdo.daymean(input=self.variable_file.local_file, output=temp) - self.daymean.set_local_file(temp) - diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index f72aa5f568d43f6e70fe277c528ea065d74db443..b8b311466f93c46a37ddb03169b186f8f83aed4a 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -1,15 +1,14 @@ # coding=utf-8 -from earthdiagnostics.diagnostic import * -from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealm +"""Compute module of two variables""" import numpy as np +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, DiagnosticOption +from earthdiagnostics.utils import Utils, TempFile + class Module(Diagnostic): """ - Scales a variable by the given value also adding at offset - Can be useful to correct units or other known errors - (think of a tas file declaring K as units but with the data stored as Celsius) + Compute the module of the vector given by two scalar variables :original author: Javier Vegas-Regidor @@ -57,14 +56,14 @@ class Module(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, grid - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, grid + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(diags.data_manager.config.var_manager, 'componentu'), DiagnosticVariableOption(diags.data_manager.config.var_manager, 'componentv'), @@ -79,23 +78,23 @@ class Module(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.component_u_file = self.request_chunk(self.domain, self.componentu, self.startdate, self.member, self.chunk, grid=self.grid) self.component_v_file = self.request_chunk(self.domain, self.componentv, self.startdate, self.member, self.chunk, grid=self.grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.module_file = self.declare_chunk(self.domain, self.module, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() Utils.copy_file(self.component_u_file.local_file, temp) - component_u = Utils.openCdf(temp) - component_v = Utils.openCdf(self.component_v_file.local_file) + component_u = Utils.open_cdf(temp) + component_v = Utils.open_cdf(self.component_v_file.local_file) variable_u = component_u.variables[self.componentu] variable_v = component_v.variables[self.componentv] diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py deleted file mode 100644 index dca5e730af29859750df5055dc331976dfcf9df8..0000000000000000000000000000000000000000 --- a/earthdiagnostics/general/monthlymean.py +++ /dev/null @@ -1,111 +0,0 @@ -# coding=utf-8 - -import os -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ - DiagnosticFrequencyOption, DiagnosticVariableOption -from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealm - - -class MonthlyMean(Diagnostic): - """ - Calculates monthly mean for a given variable - - :original author: Javier Vegas-Regidor - - :created: July 2016 - - :param data_manager: data management object - :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int - :param variable: variable's name - :type variable: str - :param domain: variable's domain - :type domain: ModelingRealm - :param frequency: original frequency - :type frequency: Frequency - :param grid: original data grid - :type grid: str - """ - - alias = 'monmean' - "Diagnostic alias for the configuration file" - - def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): - Diagnostic.__init__(self, data_manager) - self.startdate = startdate - self.member = member - self.chunk = chunk - self.variable = variable - self.domain = domain - self.frequency = frequency - self.grid = grid - - def __str__(self): - return 'Calculate monthly mean Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable) - - def __eq__(self, other): - return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable and self.frequency == other.frequency and \ - self.grid == other.grid - - @classmethod - def generate_jobs(cls, diags, options): - """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, frequency=day, grid='' - :type options: list[str] - :return: - """ - - options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(diags.data_manager.config.var_manager), - DiagnosticFrequencyOption('frequency', Frequencies.daily), - DiagnosticOption('grid', '')) - options = cls.process_options(options, options_available) - job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(MonthlyMean(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], options['frequency'], options['grid'])) - - return job_list - - def request_data(self): - self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) - - def declare_data_generated(self): - self.monmean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.monthly, grid=self.grid) - - def compute(self): - """ - Runs the diagnostic - """ - handler = Utils.openCdf(self.variable_file.local_file) - temp = TempFile.get() - if 'region' in handler.variables: - noregion = TempFile.get() - Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options=('-O -C -x -v region',)) - Utils.cdo.monmean(input=noregion, output=temp) - os.remove(noregion) - monmean_handler = Utils.openCdf(temp) - Utils.copy_variable(handler, monmean_handler, 'region') - monmean_handler.close() - else: - Utils.cdo.monmean(input=self.variable_file.local_file, output=temp) - handler.close() - self.monmean.set_local_file(temp) - - - diff --git a/earthdiagnostics/general/relink.py b/earthdiagnostics/general/relink.py index 60c69f4cf0894b38ec2df16ee2a379f37603587c..1a9162d948e63221f1217be94294b8b0f1fdf338 100644 --- a/earthdiagnostics/general/relink.py +++ b/earthdiagnostics/general/relink.py @@ -1,8 +1,7 @@ # coding=utf-8 +"""Create links for a variable""" from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticBoolOption, \ DiagnosticVariableOption -from earthdiagnostics.modelingrealm import ModelingRealm -from earthdiagnostics.variable import VariableManager class Relink(Diagnostic): @@ -55,14 +54,14 @@ class Relink(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, move_old=False - :type options: list[str] - :return: - """ + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, move_old=False + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticBoolOption('move_old', True), @@ -75,16 +74,15 @@ class Relink(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" pass def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" pass def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" self.data_manager.link_file(self.domain, self.variable, self.var_manager.get_variable(self.variable), self.startdate, self.member, self.chunk, move_old=self.move_old, grid=self.grid) - diff --git a/earthdiagnostics/general/relinkall.py b/earthdiagnostics/general/relinkall.py index d5fffc4ae4ba02b9a23f526c3fd031139050f871..17d05df88969749e450cb7a096f23b29b00714dc 100644 --- a/earthdiagnostics/general/relinkall.py +++ b/earthdiagnostics/general/relinkall.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Create links for all variables in a startdate""" from earthdiagnostics.diagnostic import Diagnostic @@ -32,14 +33,14 @@ class RelinkAll(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, move_old=False - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, move_old=False + :type options: list[str] + :return: + """ if len(options) > 1: raise Exception('The Relink All diagnostic has no options') job_list = list() @@ -48,14 +49,13 @@ class RelinkAll(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" pass def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" pass def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" self.data_manager.create_links(self.startdate) - diff --git a/earthdiagnostics/general/rewrite.py b/earthdiagnostics/general/rewrite.py index 2aa937baa7dd988fd872047331f54a39461489f1..db19ee029a3c2cb150670cf4f25d81813bf8a321 100644 --- a/earthdiagnostics/general/rewrite.py +++ b/earthdiagnostics/general/rewrite.py @@ -1,11 +1,12 @@ # coding=utf-8 +"""Rewrite netCDF file""" from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticVariableOption -from earthdiagnostics.modelingrealm import ModelingRealm class Rewrite(Diagnostic): """ Rewrites files without doing any calculations. + Can be useful to convert units or to correct wrong metadata :original author: Javier Vegas-Regidor @@ -50,14 +51,14 @@ class Rewrite(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, grid - :type options: list[str] - :return: - """ + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, grid + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticOption('grid', '')) @@ -69,16 +70,15 @@ class Rewrite(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid, to_modify=True) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.corrected = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" self.corrected.set_local_file(self.variable_file.local_file, self) - diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index 116a978ddc632c5d7e64745f4425ef46c85da68f..b7d27de28fe8d5ff1f0ffbc92f87b41146217f1c 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -1,13 +1,19 @@ # coding=utf-8 -from earthdiagnostics.diagnostic import * -from earthdiagnostics.utils import Utils -from earthdiagnostics.modelingrealm import ModelingRealm +"""Scales a variable by with value and offset""" import math +import numpy as np + +from earthdiagnostics.constants import Basins +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticDomainOption, DiagnosticVariableOption, \ + DiagnosticFloatOption, DiagnosticBoolOption, DiagnosticListFrequenciesOption, DiagnosticOption +from earthdiagnostics.utils import Utils + class Scale(Diagnostic): """ Scales a variable by the given value also adding at offset + Can be useful to correct units or other known errors (think of a tas file declaring K as units but with the data stored as Celsius) @@ -33,7 +39,7 @@ class Scale(Diagnostic): "Diagnostic alias for the configuration file" def __init__(self, data_manager, startdate, member, chunk, value, offset, domain, variable, grid, - min_limit, max_limit, frequency): + min_limit, max_limit, frequency, apply_mask): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -46,30 +52,31 @@ class Scale(Diagnostic): self.min_limit = min_limit self.max_limit = max_limit self.frequency = frequency + self.apply_mask = apply_mask self.original_values = None def __str__(self): - return 'Scale output Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Scale value: {5} Offset: {6} Variable: {3}:{4} ' \ - 'Frequency: {7}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable, - self.value, self.offset, self.frequency) + return 'Scale output Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ + 'Scale value: {0.value} Offset: {0.offset} Variable: {0.domain}:{0.variable} ' \ + 'Frequency: {0.frequency} Apply mask: {0.apply_mask}'.format(self) def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable and self.frequency == other.frequency + self.domain == other.domain and self.variable == other.variable and self.frequency == other.frequency and \ + self.apply_mask == other.apply_mask and self.value == other.value and self.offset == other.offset @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, grid - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, grid + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFloatOption('value'), @@ -77,35 +84,44 @@ class Scale(Diagnostic): DiagnosticOption('grid', ''), DiagnosticFloatOption('min_limit', float('nan')), DiagnosticFloatOption('max_limit', float('nan')), - DiagnosticListFrequenciesOption('frequencies', [diags.config.frequency])) + DiagnosticListFrequenciesOption('frequencies', [diags.config.frequency]), + DiagnosticBoolOption('apply_mask', False)) options = cls.process_options(options, options_available) job_list = list() for frequency in options['frequencies']: for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(Scale(diags.data_manager, startdate, member, chunk, options['value'], options['offset'], options['domain'], options['variable'], - options['grid'], options['min_limit'], options['max_limit'], frequency)) + options['grid'], options['min_limit'], options['max_limit'], frequency, + options['apply_mask'])) return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid, frequency=self.frequency, to_modify=True) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.corrected = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid, frequency=self.frequency) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" variable_file = self.variable_file.local_file - handler = Utils.openCdf(variable_file) - var_handler = handler.variables[self.variable] - self.original_values = var_handler[:] + handler = Utils.open_cdf(variable_file) + var = handler.variables[self.variable] + self.original_values = var[:] + if self.apply_mask: + mask = Utils.get_mask(Basins().Global).astype(float) + mask[mask == 0] = np.nan + var[:] = mask * var[:] if self._check_limits(): - var_handler[:] = self.original_values * self.value + self.offset + values = self.original_values * self.value + self.offset + if self.apply_mask: + values[np.isnan(values)] = 0 + var[:] = values handler.close() self.corrected.set_local_file(self.variable_file.local_file, self) @@ -115,5 +131,3 @@ class Scale(Diagnostic): if not math.isnan(self.max_limit) and (self.original_values.max() > self.max_limit): return False return True - - diff --git a/earthdiagnostics/general/select_levels.py b/earthdiagnostics/general/select_levels.py index 1d2fb9cac33b0ebed3d5c3daf8fa3ab5b287b7bf..239c2df1807530f92769b7f7b19e991ea1560b5e 100644 --- a/earthdiagnostics/general/select_levels.py +++ b/earthdiagnostics/general/select_levels.py @@ -1,32 +1,26 @@ # coding=utf-8 +"""Extract levels from variable""" +from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ DiagnosticVariableListOption, DiagnosticIntOption -from earthdiagnostics.modelingrealm import ModelingRealm from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.box import Box class SelectLevels(Diagnostic): """ - Convert i j files to lon lat when there is no interpolation required, - i.e. lon is constant over i and lat is constat over j - - :original author: Javier Vegas-Regidor - - :created: April 2017 - - :param data_manager: data management object - :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int - :param variable: variable's name - :type variable: str - :param domain: variable's domain - :type domain: ModelingRealm + Extract levels from file + + Parameters + ---------- + data_manager: DataManager + startdate: str + member: int + chunk: init + domain: ModellingRealm + variable: str + grid: str or None + first_level: int + last_level: int """ alias = 'selev' @@ -57,14 +51,14 @@ class SelectLevels(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: domain,variables,grid - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: domain,variables,grid + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticIntOption('first_level'), @@ -82,17 +76,17 @@ class SelectLevels(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid, to_modify=True) def declare_data_generated(self): + """Request data required by the diagnostic""" self.result = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() Utils.nco.ncks(input=self.variable_file, output=temp, @@ -118,4 +112,3 @@ class SelectLevels(Diagnostic): dimensions=(var_name, 'vertices')) new_lat_vertices[:] = vertices_values Utils.copy_attributes(new_lat_vertices, var_vertices) - diff --git a/earthdiagnostics/general/simplify_dimensions.py b/earthdiagnostics/general/simplify_dimensions.py index c903af0a7258259efdadf8ef6f415f9400eb8f34..6f73d379e37f94e8bb1236045ce3bd93e366762e 100644 --- a/earthdiagnostics/general/simplify_dimensions.py +++ b/earthdiagnostics/general/simplify_dimensions.py @@ -1,33 +1,28 @@ # coding=utf-8 +"""Convert i j files to lon lat when there is no interpolation required""" import numpy as np from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ DiagnosticVariableListOption -from earthdiagnostics.modelingrealm import ModelingRealm from earthdiagnostics.utils import Utils, TempFile class SimplifyDimensions(Diagnostic): """ - Convert i j files to lon lat when there is no interpolation required, + Convert i j files to lon lat when there is no interpolation required + i.e. lon is constant over i and lat is constat over j - :original author: Javier Vegas-Regidor - - :created: April 2017 - - :param data_manager: data management object - :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int - :param variable: variable's name - :type variable: str - :param domain: variable's domain - :type domain: ModelingRealm + Parameters + ---------- + data_manager: DataManager + startdate: str + member: int + chunk: init + domain: ModellingRealm + variable: str + grid: str or None + data_convention: str """ alias = 'simdim' @@ -60,14 +55,14 @@ class SimplifyDimensions(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: domain,variables,grid - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: domain,variables,grid + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(), DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticOption('grid', '')) @@ -83,18 +78,18 @@ class SimplifyDimensions(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid, to_modify=True) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.simplified = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ - handler = Utils.openCdf(self.variable_file.local_file) + """Run the diagnostic""" + handler = Utils.open_cdf(self.variable_file.local_file) if 'i' not in handler.dimensions: raise Exception('Variable {0.domain}:{0.variable} does not have i,j dimensions'.format(self)) lat = handler.variables[self.lat_name] @@ -112,7 +107,7 @@ class SimplifyDimensions(Diagnostic): '{0.domain}:{0.variable}'.format(self)) temp = TempFile.get() - new_file = Utils.openCdf(temp, 'w') + new_file = Utils.open_cdf(temp, 'w') for dim in handler.dimensions.keys(): if dim in (self.lon_name, self.lat_name, 'i', 'j', 'vertices'): continue @@ -127,7 +122,7 @@ class SimplifyDimensions(Diagnostic): '{0}_vertices'.format(self.lon_name), '{0}_vertices'.format(self.lat_name)): continue Utils.copy_variable(handler, new_file, var, new_names={'i': self.lon_name, 'j': self.lat_name}) - + self._create_var(self.lon_name, lon_values, handler, new_file) self._create_var(self.lat_name, lat_values, handler, new_file) handler.close() @@ -154,4 +149,3 @@ class SimplifyDimensions(Diagnostic): dimensions=(var_name, 'vertices')) new_lat_vertices[:] = vertices_values Utils.copy_attributes(new_lat_vertices, var_vertices) - diff --git a/earthdiagnostics/general/timemean.py b/earthdiagnostics/general/timemean.py new file mode 100644 index 0000000000000000000000000000000000000000..0f0b53229fffd0084c334d6ebbe676cf906b17ec --- /dev/null +++ b/earthdiagnostics/general/timemean.py @@ -0,0 +1,239 @@ +# coding=utf-8 +"""Time mean diagnostics""" +import os + +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ + DiagnosticFrequencyOption, DiagnosticVariableOption +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.utils import TempFile, Utils + + +class TimeMean(Diagnostic): + """ + Base class for all time mean diagnostics + + :param data_manager: data management object + :type data_manager: DataManager + :param startdate: startdate + :type startdate: str + :param member: member number + :type member: int + :param chunk: chunk's number + :type chunk: int + :param variable: variable's name + :type variable: str + :param domain: variable's domain + :type domain: ModelingRealm + :param frequency: original frequency + :type frequency: str + :param grid: original data grid + :type grid: str + """ + + def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): + Diagnostic.__init__(self, data_manager) + self.startdate = startdate + self.member = member + self.chunk = chunk + self.variable = variable + self.domain = domain + self.frequency = frequency + self.grid = grid + self._target_frequency = None + + def __str__(self): + return 'Calculate {0._target_frequency} mean Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ + 'Variable: {0.domain}:{0.variable} Original frequency: {0.frequency} Grid: {0.grid}'.format(self) + + def __eq__(self, other): + return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ + self.domain == other.domain and self.variable == other.variable and self.frequency == other.frequency and \ + self.grid == other.grid and self._target_frequency == other._target_frequency + + @classmethod + def _process_options(cls, diags, options): + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), + DiagnosticFrequencyOption(), + DiagnosticOption('grid', '')) + options = cls.process_options(options, options_available) + return options + + @classmethod + def generate_jobs(cls, diags, options): + """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, domain, frequency=day, grid='' + :type options: list[str] + :return: + """ + options = cls._process_options(diags, options) + job_list = list() + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job_list.append(cls(diags.data_manager, startdate, member, chunk, + options['domain'], options['variable'], options['frequency'], options['grid'])) + return job_list + + def request_data(self): + """Request data required by the diagnostic""" + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=self.frequency, grid=self.grid) + + def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" + self.daymean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=Frequencies.daily, grid=self.grid) + + def compute_mean(self, input_file, output_file): + """ + Compute the time mean + + Parameters + ---------- + input_file: str + output_file: str + + """ + raise NotImplementedError() + + def compute(self): + """Run the diagnostic""" + temp = TempFile.get() + handler = Utils.open_cdf(self.variable_file.local_file) + if 'region' in handler.variables: + noregion = TempFile.get() + Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options=('-O -C -x -v region',)) + self.compute_mean(noregion, temp) + os.remove(noregion) + monmean_handler = Utils.open_cdf(temp) + Utils.copy_variable(handler, monmean_handler, 'region') + monmean_handler.close() + else: + self.compute_mean(self.variable_file.local_file, temp) + self.daymean.set_local_file(temp) + + +class DailyMean(TimeMean): + """ + Calculates daily mean for a given variable + + :param data_manager: data management object + :type data_manager: DataManager + :param startdate: startdate + :type startdate: str + :param member: member number + :type member: int + :param chunk: chunk's number + :type chunk: int + :param variable: variable's name + :type variable: str + :param domain: variable's domain + :type domain: ModelingRealm + :param frequency: original frequency + :type frequency: str + :param grid: original data grid + :type grid: str + """ + + alias = 'daymean' + "Diagnostic alias for the configuration file" + def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): + TimeMean.__init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid) + self._target_frequency = 'daily' + + def compute_mean(self, input_file, output_file): + """ + Compute the time mean + + Parameters + ---------- + input_file: str + output_file: str + + """ + Utils.cdo.daymean(input=input_file, output=output_file) + + +class MonthlyMean(TimeMean): + """ + Calculates monthly mean for a given variable + + :param data_manager: data management object + :type data_manager: DataManager + :param startdate: startdate + :type startdate: str + :param member: member number + :type member: int + :param chunk: chunk's number + :type chunk: int + :param variable: variable's name + :type variable: str + :param domain: variable's domain + :type domain: ModelingRealm + :param frequency: original frequency + :type frequency: str + :param grid: original data grid + :type grid: str + """ + + alias = 'monmean' + "Diagnostic alias for the configuration file" + def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): + TimeMean.__init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid) + self._target_frequency = 'monthly' + + def compute_mean(self, input_file, output_file): + """ + Compute the time mean + + Parameters + ---------- + input_file: str + output_file: str + + """ + Utils.cdo.monmean(input=input_file, output=output_file) + + +class YearlyMean(TimeMean): + """ + Calculates monthly mean for a given variable + + :param data_manager: data management object + :type data_manager: DataManager + :param startdate: startdate + :type startdate: str + :param member: member number + :type member: int + :param chunk: chunk's number + :type chunk: int + :param variable: variable's name + :type variable: str + :param domain: variable's domain + :type domain: ModelingRealm + :param frequency: original frequency + :type frequency: str + :param grid: original data grid + :type grid: str + """ + + alias = 'yearmean' + "Diagnostic alias for the configuration file" + def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): + TimeMean.__init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid) + self._target_frequency = 'yearly' + + def compute_mean(self, input_file, output_file): + """ + Compute the time mean + + Parameters + ---------- + input_file: str + output_file: str + + """ + Utils.cdo.monmean(input=input_file, output=output_file) diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py index 23dce4d8c686047e3f67385d5367ac2789725eaf..4cc5f3945d4b305f4975e8da72f9fbe1614cfdc2 100644 --- a/earthdiagnostics/general/verticalmeanmetersiris.py +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Average vertically any variable""" import iris import iris.analysis import iris.exceptions @@ -12,7 +13,7 @@ from earthdiagnostics.utils import TempFile class VerticalMeanMetersIris(Diagnostic): """ - Averages vertically any given variable + Average vertically any given variable :original author: Virginie Guemas :contributor: Javier Vegas-Regidor @@ -58,7 +59,7 @@ class VerticalMeanMetersIris(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -86,17 +87,17 @@ class VerticalMeanMetersIris(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.results = self.declare_chunk(self.domain, self.variable + 'vmean', self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" iris.FUTURE.netcdf_no_unlimited = True iris.FUTURE.netcdf_promote = True @@ -119,8 +120,8 @@ class VerticalMeanMetersIris(Diagnostic): lev_max = coord.points[-1] else: lev_max = self.box.max_depth - var_cube = var_cube.extract(iris.Constraint(coord_values= - {coord.var_name: lambda cell: lev_min <= cell <= lev_max})) + lev_constraint = iris.Constraint(coord_values={coord.var_name: lambda cell: lev_min <= cell <= lev_max}) + var_cube = var_cube.extract(lev_constraint) var_cube = var_cube.collapsed(coord, iris.analysis.MEAN) temp = TempFile.get() iris.save(var_cube, temp, zlib=True) diff --git a/earthdiagnostics/general/yearlymean.py b/earthdiagnostics/general/yearlymean.py deleted file mode 100644 index 148f0ca210a07a2661bf68ded705a5edef2c46bd..0000000000000000000000000000000000000000 --- a/earthdiagnostics/general/yearlymean.py +++ /dev/null @@ -1,111 +0,0 @@ -# coding=utf-8 - -import os -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ - DiagnosticFrequencyOption, DiagnosticVariableOption -from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealm - - -class YearlyMean(Diagnostic): - """ - Calculates yearly mean for a given variable - - :original author: Javier Vegas-Regidor - - :created: July 2016 - - :param data_manager: data management object - :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int - :param variable: variable's name - :type variable: str - :param domain: variable's domain - :type domain: ModelingRealm - :param frequency: original frequency - :type frequency: str - :param grid: original data grid - :type grid: str - """ - - alias = 'yearmean' - "Diagnostic alias for the configuration file" - - def __init__(self, data_manager, startdate, member, chunk, domain, variable, frequency, grid): - Diagnostic.__init__(self, data_manager) - self.startdate = startdate - self.member = member - self.chunk = chunk - self.variable = variable - self.domain = domain - self.frequency = frequency - self.grid = grid - - def __str__(self): - return 'Calculate yearly mean Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4} Original frequency: {5} Grid: {6}'.format(self.startdate, self.member, self.chunk, - self.domain, self.variable, - self.frequency, self.grid) - - def __eq__(self, other): - return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable and self.frequency == other.frequency and \ - self.grid == other.grid - - @classmethod - def generate_jobs(cls, diags, options): - """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: variable, domain, frequency=day, grid='' - :type options: list[str] - :return: - """ - - options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(diags.data_manager.config.var_manager), - DiagnosticFrequencyOption(default_value=diags.config.frequency), - DiagnosticOption('grid', '')) - options = cls.process_options(options, options_available) - job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(YearlyMean(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], options['frequency'], options['grid'])) - return job_list - - def request_data(self): - self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) - - def declare_data_generated(self): - self.yearmean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.yearly, grid=self.grid) - - def compute(self): - """ - Runs the diagnostic - """ - temp = TempFile.get() - - handler = Utils.openCdf(self.variable_file.local_file) - if 'region' in handler.variables: - noregion = TempFile.get() - Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options=('-O -C -x -v region',)) - Utils.cdo.yearmean(input=noregion, output=temp) - monmean_handler = Utils.openCdf(temp) - Utils.copy_variable(handler, monmean_handler, 'region') - monmean_handler.close() - else: - Utils.cdo.yearmean(input=self.variable_file.local_file, output=temp) - os.remove(self.variable_file.local_file) - - self.yearmean.set_local_file(temp) - diff --git a/earthdiagnostics/modelingrealm.py b/earthdiagnostics/modelingrealm.py index 703d5b47d8c28f8b6aa257eeed1fcf4cc29d143a..af0856f44e7838765595b7c18ce7af2e7029934b 100644 --- a/earthdiagnostics/modelingrealm.py +++ b/earthdiagnostics/modelingrealm.py @@ -1,8 +1,21 @@ # coding=utf-8 +"""Class to manage variable domains""" from earthdiagnostics.frequency import Frequencies class ModelingRealm(object): + """ + Class to represent Modeling Relms for variables + + Parameters + ---------- + domain_name: str + + Raises + ------ + ValueError + If the domain is not supported + """ def __init__(self, domain_name): lower_name = domain_name.lower() @@ -22,6 +35,9 @@ class ModelingRealm(object): def __eq__(self, other): return other.__class__ == ModelingRealm and self.name == other.name + def __hash__(self): + return hash(self.name) + def __ne__(self, other): return not (self == other) @@ -33,13 +49,17 @@ class ModelingRealm(object): def get_table_name(self, frequency, data_convention): """ - Returns the table name for a domain-frequency pair - :param data_convention: Data convention in use - :type data_convention: str - :param frequency: variable's frequency - :type frequency: Frequency - :return: variable's table name - :rtype: str + Get table name for a domain-frequency pair + + Parameters + ---------- + data_convention: str + frequency: Frequency + + Returns + ------- + str + """ if self.name == 'seaIce': if data_convention in ('specs', 'preface'): @@ -62,12 +82,36 @@ class ModelingRealm(object): return table_name def get_table(self, frequency, data_convention): + """ + Get table name for a domain-frequency pair + + Parameters + ---------- + data_convention: str + frequency: Frequency + + Returns + ------- + str + + Parameters + ---------- + frequency + data_convention + + Returns + ------- + CMORTable + + """ table_name = self.get_table_name(frequency, data_convention) from earthdiagnostics.variable import CMORTable return CMORTable(table_name, frequency, 'December 2013') class ModelingRealms(object): + """Enumeration of supported modelling realms""" + seaIce = ModelingRealm('seaice') ocean = ModelingRealm('ocean') landIce = ModelingRealm('landIce') @@ -80,17 +124,31 @@ class ModelingRealms(object): @classmethod def parse(cls, modelling_realm): """ - Return the basin matching the given name. If the parameter basin is a Basin instance, directly returns the same - instance. This bahaviour is intended to facilitate the development of methods that can either accept a name - or a Basin instance to characterize the basin. - - :param modelling_realm: basin name or basin instance - :type modelling_realm: str | Basin - :return: basin instance corresponding to the basin name - :rtype: Basin + Return the basin matching the given name. + + If the parameter modelling_realm is a ModelingRealm instance, directly returns the same + instance. This behaviour is intended to facilitate the development of + methods that can either accept a name or a ModelingRealm instance to + characterize the modelling realm. + + Parameters + ---------- + modelling_realm: ModelingRealm or str + + Returns + ------- + ModelingRealm + + Raises + ------ + ValueError + If the modelling realm is not supported + """ if isinstance(modelling_realm, ModelingRealm): return modelling_realm + if modelling_realm == '': + return None for name in cls.__dict__.keys(): if name.startswith('_'): continue @@ -100,4 +158,3 @@ class ModelingRealms(object): if modelling_realm.lower() in [value.name.lower()]: return value raise ValueError('Modelling realm {0} not recognized!'.format(modelling_realm)) - diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index 356eac7732ec956711e95fc30b43839ffd1331a6..40c9869fffb843246d03e3ce577fcda177e217fa 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -1,17 +1,28 @@ # coding=utf-8 +""" +Data management for BSC-Earth conventions + +Focused on working with observations and reconstructions as well as with downloaded +but no cmorized models (like ECMWF System 4) +""" import os from bscearth.utils.log import Log from earthdiagnostics.datamanager import DataManager from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class ObsReconManager(DataManager): """ Data manager class for CMORized experiments + + Parameters + ---------- + config: Config """ + def __init__(self, config): super(ObsReconManager, self).__init__(config) data_folders = self.config.data_dir.split(':') @@ -28,48 +39,11 @@ class ObsReconManager(DataManager): if self.config.data_type in ('obs', 'recon') and self.experiment.chunk_size != 1: raise Exception('For obs and recon data chunk_size must be always 1') - # noinspection PyUnusedLocal - def request_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, - vartype=VariableType.MEAN): - filepath = self.get_file_path(startdate, domain, variable, frequency, vartype) - return self._get_file_from_storage(filepath) - - def create_link(self, domain, filepath, frequency, var, grid, move_old, vartype): - pass - - # noinspection PyUnusedLocal - def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): - """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param domain: CMOR domain - :type domain: str - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - """ - return NotImplementedError - def get_file_path(self, startdate, domain, var, frequency, vartype, box=None, grid=None): """ - Returns the path to a concrete file + Return the path to a concrete file + :param startdate: file's startdate :type startdate: str :param domain: file's domain @@ -87,7 +61,7 @@ class ObsReconManager(DataManager): :param vartype: Variable type (mean, statistic) :type vartype: VariableType """ - if not frequency: + if frequency is None: frequency = self.config.frequency folder_path = self._get_folder_path(frequency, domain, var, grid, vartype) @@ -105,52 +79,6 @@ class ObsReconManager(DataManager): var_folder) return folder_path - def get_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VariableType.MEAN): - """ - Ge a file containing all the data for one year for one variable - :param domain: variable's domain - :type domain: str - :param var: variable's name - :type var: str - :param startdate: startdate to retrieve - :type startdate: str - :param member: member to retrieve - :type member: int - :param year: year to retrieve - :type year: int - :param grid: variable's grid - :type grid: str - :param box: variable's box - :type box: Box - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: - """ - raise NotImplementedError() - - def get_var_url(self, var, startdate, frequency, box, vartype): - """ - Get url for dataset - :param var: variable to retrieve - :type var: str - :param startdate: startdate to retrieve - :type startdate: str - :param frequency: frequency to get: - :type frequency: Frequency | None - :param box: box to get - :type box: Box - :param vartype: type of variable - :type vartype: VariableType - :return: - """ - if not frequency: - frequency = self.config.frequency - var = self._get_final_var_name(box, var) - full_path = os.path.join(self.config.data_dir, self.config.data_type, self.experiment.institute, - self.experiment.model, frequency.folder_name(vartype)) - full_path = os.path.join(full_path, var, self._get_file_name(var, startdate, frequency)) - return full_path - def _get_file_name(self, var, startdate, frequency): if startdate: if self.config.data_type != 'exp' and frequency != Frequencies.weekly: @@ -159,62 +87,27 @@ class ObsReconManager(DataManager): else: return '{0}.nc'.format(var) - def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None, - frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN): - """ - Creates the link of a given file from the CMOR repository. - - :param cmor_var: - :param move_old: - :param date_str: - :param year: if frequency is yearly, this parameter is used to give the corresponding year - :type year: int - :param domain: CMOR domain - :type domain: str - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: str - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - """ - # THREDDSManager does not require links - pass - def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + Request a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None + box: Box or None + frequency: Frequency or None + vartype: VariableType or None + + Returns + ------- + DataFile - :param vartype: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :return: path to the copy created on the scratch folder - :rtype: str """ var = self._get_final_var_name(box, var) filepath = self.get_file_path(startdate, domain, var, frequency, vartype, box, grid) @@ -225,32 +118,27 @@ class ObsReconManager(DataManager): def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN, diagnostic=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param diagnostic: - :param region: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str + Declare a variable chunk to be generated by a diagnostic + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None, optional + region: Basin or None, optional + box: Box or None, optional + frequency: Frequency or None, optional + vartype: VariableType, optional + diagnostic: Diagnostic, optional + + Returns + ------- + DataFile """ - if not frequency: + if frequency is None: frequency = self.config.frequency original_name = var cmor_var = self.variable_list.get_variable(var) @@ -264,4 +152,3 @@ class ObsReconManager(DataManager): netcdf_file.frequency = frequency Log.debug('{0} will be generated', filepath) return netcdf_file - diff --git a/earthdiagnostics/ocean/__init__.py b/earthdiagnostics/ocean/__init__.py index 854d90d94bd5a3906a916bfe64798296cd774b55..4196e87035f530d4170b5985a9689b16280e7315 100644 --- a/earthdiagnostics/ocean/__init__.py +++ b/earthdiagnostics/ocean/__init__.py @@ -1,26 +1,2 @@ # coding=utf-8 -""" -Module containing the diagnostics related to the ocean output -""" -from earthdiagnostics.ocean.heatcontent import HeatContent -from earthdiagnostics.ocean.moc import Moc -from earthdiagnostics.ocean.areamoc import AreaMoc -from earthdiagnostics.ocean.maxmoc import MaxMoc -from earthdiagnostics.ocean.psi import Psi -from earthdiagnostics.ocean.gyres import Gyres -from earthdiagnostics.ocean.convectionsites import ConvectionSites -from earthdiagnostics.ocean.cutsection import CutSection -from earthdiagnostics.ocean.averagesection import AverageSection -from earthdiagnostics.ocean.interpolate import Interpolate -from earthdiagnostics.ocean.interpolatecdo import InterpolateCDO -from earthdiagnostics.ocean.verticalmeanmeters import VerticalMeanMeters -from earthdiagnostics.ocean.verticalmean import VerticalMean -from earthdiagnostics.ocean.mixedlayersaltcontent import MixedLayerSaltContent -from earthdiagnostics.ocean.siasiesiv import Siasiesiv -from earthdiagnostics.ocean.heatcontentlayer import HeatContentLayer -from earthdiagnostics.ocean.mixedlayerheatcontent import MixedLayerHeatContent -from earthdiagnostics.ocean.regionmean import RegionMean -from earthdiagnostics.ocean.rotation import Rotation -from earthdiagnostics.ocean.mxl import Mxl -from earthdiagnostics.ocean.verticalgradient import VerticalGradient -from earthdiagnostics.ocean.mask_land import MaskLand +"""Package containing diagnostics related to the ocean model""" diff --git a/earthdiagnostics/ocean/areamoc.py b/earthdiagnostics/ocean/areamoc.py index c73895fd434d03ee6d726cdfbd51434fc5dadb87..0f20d28dac39bcd0c6e2cc50d0bd7fc3f6cca047 100644 --- a/earthdiagnostics/ocean/areamoc.py +++ b/earthdiagnostics/ocean/areamoc.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Compute an Atlantic MOC index from the average""" import os import numpy as np @@ -12,7 +13,9 @@ from earthdiagnostics.utils import Utils, TempFile class AreaMoc(Diagnostic): """ - Compute an Atlantic MOC index by averaging the meridional overturning + Compute an Atlantic MOC index + + Averages the meridional overturning in a latitude band between 1km and 2km or any other index averaging the meridional overturning in a given basin and a given domain @@ -64,7 +67,7 @@ class AreaMoc(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -72,7 +75,6 @@ class AreaMoc(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticIntOption('min_lat'), DiagnosticIntOption('max_lat'), DiagnosticIntOption('min_depth'), @@ -90,18 +92,18 @@ class AreaMoc(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(ModelingRealms.ocean, AreaMoc.vsftmyz, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.results = self.declare_chunk(ModelingRealms.ocean, AreaMoc.vsftmyz, self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" nco = Utils.nco cdo = Utils.cdo @@ -110,11 +112,11 @@ class AreaMoc(Diagnostic): Utils.copy_file(self.variable_file.local_file, temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if 'i' in handler.dimensions: handler.close() nco.ncwa(input=temp, output=temp, options=('-O -a i',)) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) basin_index = np.where(handler.variables['basin'][:] == self.basin.name) if 'lat' in handler.variables: @@ -135,8 +137,8 @@ class AreaMoc(Diagnostic): # To select basin and remove dimension nco.ncwa(input=temp, output=temp, options=('-O -d basin,{0} -a basin'.format(basin_index),)) - source = Utils.openCdf(temp) - destiny = Utils.openCdf(temp2, 'w') + source = Utils.open_cdf(temp) + destiny = Utils.open_cdf(temp2, 'w') Utils.copy_dimension(source, destiny, 'time') Utils.copy_dimension(source, destiny, 'lev') diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index 8ca8abb0a03da07a2f5ea49b92e19f7ae838e2ba..27eac1aa5bc981084b7ef348f84722e0a0118b86 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -1,14 +1,18 @@ # coding=utf-8 +"""Compute an average of a given zone""" import os + from earthdiagnostics.box import Box -from earthdiagnostics.diagnostic import * +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticDomainOption, DiagnosticVariableOption, \ + DiagnosticIntOption, DiagnosticOption from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealm class AverageSection(Diagnostic): """ - Compute an average of a given zone. The variable MUST be in a regular grid + Compute an average of a given zone. + + The variable MUST be in a regular grid :original author: Virginie Guemas :contributor: Javier Vegas-Regidor @@ -30,7 +34,6 @@ class AverageSection(Diagnostic): :type domain: ModelingRealm :param box: box to use for the average :type box: Box - """ alias = 'avgsection' @@ -57,7 +60,7 @@ class AverageSection(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -85,17 +88,17 @@ class AverageSection(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.mean = self.declare_chunk(self.domain, self.variable + 'mean', self.startdate, self.member, self.chunk, box=self.box, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() variable_file = self.variable_file.local_file Utils.cdo.zonmean(input='-sellonlatbox,{0},{1},{2},{3} {4}'.format(self.box.min_lon, self.box.max_lon, diff --git a/earthdiagnostics/ocean/convectionsites.py b/earthdiagnostics/ocean/convectionsites.py index 74271682754c1072f48ff2eb7e8d2f1735e76d68..08523f34c1b44884c6b45f71bad3dd5a2a0d30b0 100644 --- a/earthdiagnostics/ocean/convectionsites.py +++ b/earthdiagnostics/ocean/convectionsites.py @@ -1,10 +1,12 @@ # coding=utf-8 +"""Compute the intensity of convection""" import numpy as np from bscearth.utils.log import Log -from earthdiagnostics.diagnostic import Diagnostic -from earthdiagnostics.utils import Utils, TempFile + from earthdiagnostics.constants import Models +from earthdiagnostics.diagnostic import Diagnostic from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class ConvectionSites(Diagnostic): @@ -50,7 +52,7 @@ class ConvectionSites(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -66,15 +68,15 @@ class ConvectionSites(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.mixed_layer = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.sites = self.declare_chunk(ModelingRealms.ocean, 'site', self.startdate, self.member, self.chunk) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" if self.model_version in [Models.ECEARTH_2_3_O1L42, Models.ECEARTH_3_0_O1L46, Models.NEMO_3_2_O1L42, Models.NEMO_3_3_O1L46, Models.NEMOVAR_O1L42]: @@ -93,8 +95,8 @@ class ConvectionSites(Diagnostic): mlotst_file = self.mixed_layer.local_file output = TempFile.get() - self.mlotst_handler = Utils.openCdf(mlotst_file) - handler = Utils.openCdf(output, 'w') + self.mlotst_handler = Utils.open_cdf(mlotst_file) + handler = Utils.open_cdf(output, 'w') handler.createDimension('time', self.mlotst_handler.variables['time'].shape[0]) handler.createDimension('region', 4) Utils.copy_variable(self.mlotst_handler, handler, 'time') diff --git a/earthdiagnostics/ocean/cutsection.py b/earthdiagnostics/ocean/cutsection.py index d4e5f89c9e8c6fc43669b73a522937166d57e02c..2690e6b5196e252cd77a3ae63594c16d037724d4 100644 --- a/earthdiagnostics/ocean/cutsection.py +++ b/earthdiagnostics/ocean/cutsection.py @@ -1,12 +1,13 @@ # coding=utf-8 +"""Cut meridional or zonal sections""" import numpy as np from bscearth.utils.log import Log +from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticBoolOption, DiagnosticIntOption, \ DiagnosticDomainOption, DiagnosticVariableOption -from earthdiagnostics.box import Box -from earthdiagnostics.utils import Utils from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils class CutSection(Diagnostic): @@ -71,7 +72,7 @@ class CutSection(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -92,19 +93,19 @@ class CutSection(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.section = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" nco = Utils.nco - handler = Utils.openCdf('mesh_hgr.nc') + handler = Utils.open_cdf('mesh_hgr.nc') dimi = handler.dimensions['i'].size dimj = handler.dimensions['j'].size dimlev = handler.dimensions['lev'].size @@ -113,7 +114,7 @@ class CutSection(Diagnostic): lat = handler.variables['lat'][:] handler.close() - handler = Utils.openCdf('mask.nc') + handler = Utils.open_cdf('mask.nc') mask_lev = handler.variables['tmask'][:] mask_lev = mask_lev.astype(float) # noinspection PyTypeChecker @@ -152,7 +153,7 @@ class CutSection(Diagnostic): temp = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) dimtime = handler.dimensions['time'].size var_array = handler.variables[self.variable][:] handler.close() @@ -171,7 +172,7 @@ class CutSection(Diagnostic): nco.ncks(input=temp, output=temp, options=('-O -v lev,time',)) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if not self.zonal: handler.createDimension('lat', size) coord_var = handler.createVariable('lat', float, 'lat') diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index b01971488d8e19b447c03ff114bcad32302955d0..942a24676d1467113f16270c974fb315f6d83763 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -1,11 +1,12 @@ # coding=utf-8 +"""Compute the intensity of the subtropical and subpolar gyres""" import numpy as np from bscearth.utils.log import Log from earthdiagnostics.constants import Models from earthdiagnostics.diagnostic import Diagnostic -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class Gyres(Diagnostic): @@ -52,7 +53,7 @@ class Gyres(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -69,28 +70,28 @@ class Gyres(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.vsftbarot = self.request_chunk(ModelingRealms.ocean, 'vsftbarot', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.gyre = self.declare_chunk(ModelingRealms.ocean, 'gyre', self.startdate, self.member, self.chunk) # noinspection PyPep8Naming def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" if self.model_version in [Models.ECEARTH_2_3_O1L42, Models.ECEARTH_3_0_O1L46, Models.NEMO_3_2_O1L42, Models.NEMO_3_3_O1L46, Models.NEMOVAR_O1L42]: - subpolNAtl = [230, 275, 215, 245] - subpolNPac = [70, 145, 195, 235] - subtropNPac = [45, 175, 165, 220] - subtropNAtl = [195, 275, 175, 225] - subtropSPac = [70, 205, 120, 145] - subtropSAtl = [235, 300, 120, 145] - subtropInd = [320, 30, 110, 180] - ACC = [1, 361, 1, 65] + subpol_n_atl = [230, 275, 215, 245] + subpol_n_pac = [70, 145, 195, 235] + subtrop_n_pac = [45, 175, 165, 220] + subtrop_n_atl = [195, 275, 175, 225] + subtrop_s_pac = [70, 205, 120, 145] + subtrop_s_atl = [235, 300, 120, 145] + subtrop_ind = [320, 30, 110, 180] + acc = [1, 361, 1, 65] elif self.model_version in [Models.ECEARTH_3_0_O25L46, Models.ECEARTH_3_0_O25L75, Models.GLORYS2_V1_O25L75, Models.ECEARTH_3_2_O1L75, Models.ECEARTH_3_2_O25L75]: @@ -101,9 +102,9 @@ class Gyres(Diagnostic): output = TempFile.get() vsftbarot_file = self.vsftbarot.local_file - handler_original = Utils.openCdf(vsftbarot_file) + handler_original = Utils.open_cdf(vsftbarot_file) self.var_vsftbarot = handler_original.variables['vsftbarot'] - handler = Utils.openCdf(output, 'w') + handler = Utils.open_cdf(output, 'w') handler.createDimension('time', handler_original.variables['time'].shape[0]) handler.createDimension('region', 8) Utils.copy_variable(handler_original, handler, 'time') @@ -117,35 +118,35 @@ class Gyres(Diagnostic): var_gyre.units = 'm^3/s' var_region[0] = 'subpolNAtl' - var_gyre[:, 0] = self._gyre(subpolNAtl, True) + var_gyre[:, 0] = self._gyre(subpol_n_atl, True) Log.debug('subpolNAtl: {0}', var_gyre[:, 0]) var_region[1] = 'subpolNPac' - var_gyre[:, 1] = self._gyre(subpolNPac, True) + var_gyre[:, 1] = self._gyre(subpol_n_pac, True) Log.debug('subpolNPac: {0}', var_gyre[:, 1]) var_region[2] = 'subtropNPac' - var_gyre[:, 2] = self._gyre(subtropNPac) + var_gyre[:, 2] = self._gyre(subtrop_n_pac) Log.debug('subtropNPac: {0}', var_gyre[:, 2]) var_region[3] = 'subtropSPac' - var_gyre[:, 3] = self._gyre(subtropSPac) + var_gyre[:, 3] = self._gyre(subtrop_s_pac) Log.debug('subtropSPac: {0}', var_gyre[:, 3]) var_region[4] = 'subtropNAtl' - var_gyre[:, 4] = self._gyre(subtropNAtl) + var_gyre[:, 4] = self._gyre(subtrop_n_atl) Log.debug('subtropNAtl: {0}', var_gyre[:, 4]) var_region[5] = 'subtropSAtl' - var_gyre[:, 5] = self._gyre(subtropSAtl) + var_gyre[:, 5] = self._gyre(subtrop_s_atl) Log.debug('subtropSAtl: {0}', var_gyre[:, 5]) var_region[6] = 'subtropInd' - var_gyre[:, 6] = self._gyre(subtropInd) + var_gyre[:, 6] = self._gyre(subtrop_ind) Log.debug('subtropInd: {0}', var_gyre[:, 6]) var_region[7] = 'ACC' - var_gyre[:, 7] = self._gyre(ACC) + var_gyre[:, 7] = self._gyre(acc) Log.debug('ACC: {0}', var_gyre[:, 7]) handler.close() diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index 81ef23ccd1cdcc0d19483bd540ca13bef7ef5610..476c639f242bae59cd8ed03fff04a040e14596ff 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Compute the total ocean heat content""" import shutil import numpy as np @@ -63,7 +64,7 @@ class HeatContent(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -133,7 +134,7 @@ class HeatContent(Diagnostic): @classmethod def _read_level_values(cls): - handler = Utils.openCdf('mesh_zgr.nc') + handler = Utils.open_cdf('mesh_zgr.nc') if 'gdepw_1d' in handler.variables: depth_w = handler.variables['gdepw_1d'][0, :] elif 'gdepw_0' in handler.variables: @@ -150,11 +151,13 @@ class HeatContent(Diagnostic): return depth_t, depth_w def request_data(self): + """Request data required by the diagnostic""" self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) if self.mxloption != 0: self.mlotst = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" if self.box.min_depth == 0: # For cdftools, this is all levels box_save = None @@ -167,9 +170,7 @@ class HeatContent(Diagnostic): box=box_save, region=self.basin.name) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" nco = Utils.nco temperature_file = TempFile.get() Utils.copy_file(self.thetao.local_file, temperature_file) @@ -189,7 +190,7 @@ class HeatContent(Diagnostic): para.append('-mxloption') para.append(str(self.mxloption)) if self.basin != Basins().Global: - handler = Utils.openCdf('mask_regions.3d.nc') + handler = Utils.open_cdf('mask_regions.3d.nc') if self.basin.name not in handler.variables: raise Exception('Basin {0} is not defined on mask_regions.nc'.format(self.basin.name)) @@ -200,15 +201,15 @@ class HeatContent(Diagnostic): temp2 = TempFile.get() - cdftools.run('cdfheatc', options=para, input=temperature_file, output=temp2, input_option='-f') + cdftools.run('cdfheatc', options=para, input_file=temperature_file, output_file=temp2, input_option='-f') - results = Utils.openCdf(temp2) + results = Utils.open_cdf(temp2) heatcsum_temp = TempFile.get() heatcvmean_temp = TempFile.get() nco.ncks(input=temperature_file, output=heatcsum_temp, options=('-O -v time',)) shutil.copy(heatcsum_temp, heatcvmean_temp) - heatcsum_handler = Utils.openCdf(heatcsum_temp) + heatcsum_handler = Utils.open_cdf(heatcsum_temp) thc = heatcsum_handler.createVariable('heatcsum', float, 'time') thc.standard_name = "integral_of_sea_water_potential_temperature_expressed_as_heat_content" thc.long_name = "Total heat content" @@ -216,7 +217,7 @@ class HeatContent(Diagnostic): thc[:] = results.variables['heatc3d'][:, 0, 0] heatcsum_handler.close() - heatcvmean_handler = Utils.openCdf(heatcvmean_temp) + heatcvmean_handler = Utils.open_cdf(heatcvmean_temp) uhc = heatcvmean_handler.createVariable('heatcvmean', float, 'time') uhc.standard_name = "integral_of_sea_water_potential_temperature_expressed_as_heat_content" uhc.long_name = "Heat content per unit volume" diff --git a/earthdiagnostics/ocean/heatcontentlayer.py b/earthdiagnostics/ocean/heatcontentlayer.py index c64b19021e455ecec86dfb501e9f7923a1df205d..88a74a7ca7d3f93dac2862a529d1547e3eeb84c5 100644 --- a/earthdiagnostics/ocean/heatcontentlayer.py +++ b/earthdiagnostics/ocean/heatcontentlayer.py @@ -1,11 +1,12 @@ # coding=utf-8 +"""Point-wise Ocean Heat Content in a specified ocean thickness (J/m-2)""" import numpy as np -from earthdiagnostics.constants import Basins from earthdiagnostics.box import Box +from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import Diagnostic, DiagnosticIntOption, DiagnosticBasinOption -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class HeatContentLayer(Diagnostic): @@ -54,7 +55,7 @@ class HeatContentLayer(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -71,38 +72,41 @@ class HeatContentLayer(Diagnostic): box.max_depth = options['max_depth'] job_list = list() - handler = Utils.openCdf('mesh_zgr.nc') + max_level, min_level, weight = cls._compute_weights(box) + + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job_list.append(HeatContentLayer(diags.data_manager, startdate, member, chunk, box, + weight, min_level, max_level)) + return job_list + + @classmethod + def _compute_weights(cls, box): + handler = Utils.open_cdf('mesh_zgr.nc') # mask = Utils.get_mask(options['basin']) mask = handler.variables['tmask'][:] - if 'e3t' in handler.variables: e3t = handler.variables['e3t'][:] elif 'e3t_0' in handler.variables: e3t = handler.variables['e3t_0'][:] else: raise Exception('e3t variable can not be found') - if 'gdepw' in handler.variables: depth = handler.variables['gdepw'][:] elif 'gdepw_0' in handler.variables: depth = handler.variables['gdepw_0'][:] else: raise Exception('gdepw variable can not be found') - e3t_3d = e3t.shape != depth.shape if e3t_3d: mask = e3t_3d * mask else: e3t = e3t[0, :] - while len(depth.shape) < 4: depth = np.expand_dims(depth, -1) handler.close() def calculate_weight(e3t_point, depth_point, mask_point): - """ - Calculates the weight for each cell - """ + """Calculate the weight for each cell""" if not mask_point: return 0 top = depth_point @@ -119,7 +123,6 @@ class HeatContentLayer(Diagnostic): calc = np.vectorize(calculate_weight, otypes='f') weight = calc(e3t, depth, mask) - # Now we will reduce to the levels with any weight != 0 to avoid loading too much data on memory levels = weight.shape[1] min_level = 0 @@ -129,30 +132,26 @@ class HeatContentLayer(Diagnostic): while max_level < (levels - 1) and weight[:, max_level + 1, :].any(): max_level += 1 weight = weight[:, min_level:max_level, :] - - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(HeatContentLayer(diags.data_manager, startdate, member, chunk, box, - weight, min_level, max_level)) - return job_list + return max_level, min_level, weight def request_data(self): + """Request data required by the diagnostic""" self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.heatc = self.declare_chunk(ModelingRealms.ocean, 'heatc', self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" nco = Utils.nco thetao_file = TempFile.get() results = TempFile.get() Utils.copy_file(self.thetao.local_file, thetao_file) - handler = Utils.openCdf(thetao_file) + handler = Utils.open_cdf(thetao_file) Utils.convert_units(handler.variables['thetao'], 'K') heatc_sl = np.sum(handler.variables['thetao'][:, self.min_level:self.max_level, :] * self.weight, 1) handler.sync() @@ -161,7 +160,7 @@ class HeatContentLayer(Diagnostic): nco.ncks(input=thetao_file, output=results, options=('-O -v lon,lat,time',)) Utils.rename_variables(results, {'x': 'i', 'y': 'j'}, False, True) - handler_results = Utils.openCdf(results) + handler_results = Utils.open_cdf(results) handler_results.createVariable('heatc', float, ('time', 'j', 'i'), fill_value=1.e20) handler_results.sync() handler_results.variables['heatc'][:] = heatc_sl diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index dab72b363f7499d4a7157471be8632a03d20417d..56f478445889e22ce00395c2a6c09049ba16d7c2 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""SCRIP based interpolation""" import os import shutil import threading @@ -14,6 +15,7 @@ from earthdiagnostics.utils import Utils, TempFile class Interpolate(Diagnostic): """ 3-dimensional conservative interpolation to the regular atmospheric grid. + It can also be used for 2D (i,j) variables :original author: Virginie Guemas @@ -72,17 +74,20 @@ class Interpolate(Diagnostic): self.variable, self.grid, self.invert_latitude, self.model_version, self.original_grid) + def __hash__(self): + return hash(str(self)) + @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: target_grid, variable, domain=ocean - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: target_grid, variable, domain=ocean + :type options: list[str] + :return: + """ options_available = (DiagnosticOption('target_grid'), DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variable'), DiagnosticDomainOption(default_value=ModelingRealms.ocean), @@ -100,23 +105,23 @@ class Interpolate(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.original = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.original_grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.regridded = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" variable_file = TempFile.get() Utils.copy_file(self.original.local_file, variable_file) Utils.rename_variables(variable_file, {'i': 'x', 'j': 'y'}, must_exist=False, rename_dimension=True) cdo = Utils.cdo nco = Utils.nco - handler = Utils.openCdf(variable_file) + handler = Utils.open_cdf(variable_file) if 'lev' in handler.dimensions: num_levels = handler.dimensions['lev'].size has_levels = True @@ -137,7 +142,7 @@ class Interpolate(Diagnostic): else: Utils.move_file(self._get_level_file(0), temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if 'record' in handler.dimensions: handler.renameDimension('record', 'lev') handler.close() @@ -177,7 +182,7 @@ class Interpolate(Diagnostic): self.grid, lev + 1) if not os.path.isfile(weights_file): raise Exception('Level {0} weights file does not exist for model {1} ' - 'and grid {2}'.format(lev+1, self.model_version, self.grid)) + 'and grid {2}'.format(lev + 1, self.model_version, self.grid)) namelist_file = TempFile.get(suffix='') scrip_use_in = open(namelist_file, 'w') scrip_use_in.writelines("&remap_inputs\n") diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index b8b816207853e3529837b2dfc319cbdf5685fca9..5a09eaf3174905af3ec4a2b868cfc5405fb9ae39 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -1,16 +1,20 @@ # coding=utf-8 +"""CDO-based interpolation""" import os import numpy as np -from earthdiagnostics.diagnostic import * -from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms +from earthdiagnostics.constants import Basins +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticDomainOption, DiagnosticVariableListOption, \ + DiagnosticChoiceOption, DiagnosticBoolOption, DiagnosticOption +from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils, TempFile class InterpolateCDO(Diagnostic): """ 3-dimensional conservative interpolation to the regular atmospheric grid. + It can also be used for 2D (i,j) variables :original author: Javier Vegas-Regidor @@ -66,6 +70,9 @@ class InterpolateCDO(Diagnostic): self.variable == other.variable and self.mask_oceans == other.mask_oceans and self.grid == other.grid and \ self.original_grid == other.original_grid + def __hash__(self): + return hash(str(self)) + def __str__(self): return 'Interpolate with CDO Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ 'Variable: {0.domain}:{0.variable} Target grid: {0.grid} Original grid: {0.original_grid} ' \ @@ -74,14 +81,14 @@ class InterpolateCDO(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic - - :param diags: Diagnostics manager class - :type diags: Diags - :param options: target_grid, variable, domain=ocean - :type options: list[str] - :return: - """ + Create a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: target_grid, variable, domain=ocean + :type options: list[str] + :return: + """ options_available = (DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticOption('target_grid', diags.config.experiment.atmos_grid.lower()), @@ -105,7 +112,7 @@ class InterpolateCDO(Diagnostic): else: startdate, member, chunk = diags.config.experiment.get_chunk_list()[0] weights_job = ComputeWeights(diags.data_manager, startdate, member, chunk, options['domain'], - options['variables'][0],target_grid, options['original_grid'], weights, + options['variables'][0], target_grid, options['original_grid'], weights, options['method']) for var in options['variables']: @@ -133,24 +140,15 @@ class InterpolateCDO(Diagnostic): def get_sample_grid_file(cls): temp = TempFile.get() - lat_name = 'lat' - handler = Utils.openCdf('mask.nc') - for lat_alias in ['lat', 'latitude']: - if lat_alias in handler.variables: - lat_name = lat_alias - break + handler = Utils.open_cdf('mask.nc') - lon_name = None - for lon_alias in ['lon', 'longitude']: - if lon_alias in handler.variables: - lon_name = lon_alias - break + lat_name, lon_name = cls._get_lat_lon_alias(handler) lon_bnds_name = '{0}_bnds'.format(lon_name) lat_bnds_name = '{0}_bnds'.format(lat_name) Utils.nco.ncks(input='mask.nc', output=temp, options=('-O -v tmask,{0},{1},gphif,glamf'.format(lat_name, lon_name),)) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) lon = handler.variables[lon_name] lon.units = "degrees_east" lon.long_name = "Longitude" @@ -193,6 +191,20 @@ class InterpolateCDO(Diagnostic): Utils.nco.ncks(input=temp, output=temp, options=('-O -x -v gphif,glamf',)) return temp + @classmethod + def _get_lat_lon_alias(cls, handler): + lat_name = None + for lat_alias in ['lat', 'latitude']: + if lat_alias in handler.variables: + lat_name = lat_alias + break + lon_name = None + for lon_alias in ['lon', 'longitude']: + if lon_alias in handler.variables: + lon_name = lon_alias + break + return lat_name, lon_name + @classmethod def _translate_ifs_grids_to_cdo_names(cls, target_grid): if target_grid.upper().startswith('T159L'): @@ -204,17 +216,17 @@ class InterpolateCDO(Diagnostic): return target_grid def request_data(self): + """Request data required by the diagnostic""" self.original = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.original_grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.regridded = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" variable_file = TempFile.get() Utils.copy_file(self.original.local_file, variable_file) Utils.rename_variables(variable_file, {'jpib': 'i', 'jpjb': 'j', 'x': 'i', 'y': 'j', @@ -222,21 +234,16 @@ class InterpolateCDO(Diagnostic): 'SSTK_ens0': 'tos', 'SSTK_ens1': 'tos', 'SSTK_ens2': 'tos', 'nav_lat': 'lat', 'nav_lon': 'lon'}, must_exist=False, rename_dimension=True) - handler = Utils.openCdf(variable_file) + handler = Utils.open_cdf(variable_file) + lat_name, lon_name = self._get_lat_lon_alias(handler) var = handler.variables[self.variable] units = var.units coordinates = list() for dim in var.dimensions: if dim == 'i': - if 'lat' in handler.variables: - coordinates.append('lat') - else: - coordinates.append('latitude') + coordinates.append(lon_name) elif dim == 'j': - if 'lon' in handler.variables: - coordinates.append('lon') - else: - coordinates.append('longitude') + coordinates.append(lat_name) else: coordinates.append(dim) var.coordinates = ' '.join(coordinates) @@ -250,19 +257,46 @@ class InterpolateCDO(Diagnostic): temp = TempFile.get() Utils.cdo.remap(','.join((self.grid.split('_')[0], self.weights)), input=variable_file, output=temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) + handler.variables[self.variable].units = units handler.close() + if lat_name != 'lat': + Utils.rename_variables(temp, {'lat': lat_name, 'lon': lon_name}, True, True) + self.regridded.set_local_file(temp) class ComputeWeights(Diagnostic): + """ + Diagnostic used to compute interpolation weights + + Parameters + ---------- + data_manager: DataManager + startdate: str + member: int + chunk: int + domain: ModelingRealm + variable: str + target_grid: str + original_grid: str + weights_file: str + method: str + + """ + alias = 'computeinterpcdoweights' "Diagnostic alias for the configuration file" @classmethod def generate_jobs(cls, diags, options): + """ + Generate the instances of the diagnostics that will be run by the manager + + This method does not does anything as this diagnostic is not expected to be called by the users + """ pass def __init__(self, data_manager, startdate, member, chunk, domain, variable, target_grid, @@ -285,10 +319,10 @@ class ComputeWeights(Diagnostic): InterpolateCDO.compute_weights(self.method, self.grid, self.sample_data.local_file, self.weights_file) def request_data(self): + """Request data required by the diagnostic""" self.sample_data = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.original_grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" pass - - diff --git a/earthdiagnostics/ocean/mask_land.py b/earthdiagnostics/ocean/mask_land.py index a7af9aaa04719fc12837178e4a8af396dc3121f5..01062c11d7db87301933e9e28eb084939681b01b 100644 --- a/earthdiagnostics/ocean/mask_land.py +++ b/earthdiagnostics/ocean/mask_land.py @@ -1,17 +1,16 @@ # coding=utf-8 +"""Changes values present in the mask for NaNs""" +import numpy as np + from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableListOption, \ DiagnosticDomainOption, DiagnosticChoiceOption, DiagnosticOption from earthdiagnostics.utils import Utils, TempFile -import numpy as np class MaskLand(Diagnostic): """ Changes values present in the mask for NaNs - :created: February 2012 - :last modified: June 2016 - :param data_manager: data management object :type data_manager: DataManager :param startdate: startdate @@ -25,6 +24,7 @@ class MaskLand(Diagnostic): """ alias = 'maskland' + "Diagnostic alias for the configuration file" def __init__(self, data_manager, startdate, member, chunk, domain, variable, mask, grid): Diagnostic.__init__(self, data_manager) @@ -47,7 +47,7 @@ class MaskLand(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -77,30 +77,28 @@ class MaskLand(Diagnostic): @classmethod def _get_mask(cls, cell_point): - mask_file = Utils.openCdf('mask.nc') + mask_file = Utils.open_cdf('mask.nc') mask = mask_file.variables['{0}mask'.format(cell_point)][:].astype(float) mask[mask == 0] = np.nan mask_file.close() return mask - "Diagnostic alias for the configuration file" - def request_data(self): + """Request data required by the diagnostic""" self.var_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.masked_file = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() Utils.copy_file(self.var_file.local_file, temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if 'lev' not in handler.dimensions: mask = self.mask[:, 0, ...] else: @@ -109,4 +107,3 @@ class MaskLand(Diagnostic): handler.close() self.masked_file.set_local_file(temp) - diff --git a/earthdiagnostics/ocean/maxmoc.py b/earthdiagnostics/ocean/maxmoc.py index fb86ecb5c636a968871486038871a083a9decb71..eba04a78977fe9779503ae04b0943d35ff39daa8 100644 --- a/earthdiagnostics/ocean/maxmoc.py +++ b/earthdiagnostics/ocean/maxmoc.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Compute an Atlantic MOC index""" import netCDF4 import numpy as np from bscearth.utils.log import Log @@ -8,12 +9,14 @@ from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import Diagnostic, DiagnosticBasinOption, DiagnosticFloatOption from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class MaxMoc(Diagnostic): """ - Compute an Atlantic MOC index by finding the maximum of the annual + Compute an Atlantic MOC index + + It finds the maximum of the annual mean meridional overturning in a latitude / depth region :original author: Virginie Guemas @@ -59,7 +62,7 @@ class MaxMoc(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each complete year to compute the diagnostic + Create a job for each complete year to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -91,11 +94,12 @@ class MaxMoc(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_year(ModelingRealms.ocean, MaxMoc.vsftmyz, self.startdate, self.member, self.year) def declare_data_generated(self): - + """Declare data to be generated by the diagnostic""" self.results = {'vsftmyzmax': self.declare_year(ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, self.year, box=self.box, vartype=VariableType.STATISTIC), 'vsftmyzmaxlev': self.declare_year(ModelingRealms.ocean, 'vsftmyzmaxlev', self.startdate, @@ -114,21 +118,19 @@ class MaxMoc(Diagnostic): vartype=VariableType.STATISTIC)} def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" nco = Utils.nco temp = TempFile.get() Utils.copy_file(self.variable_file.local_file, temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if 'i' in handler.dimensions: handler.close() nco.ncwa(input=temp, output=temp, options=('-O -a i',)) else: handler.close() - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) basin_index = np.where(handler.variables['basin'][:] == self.basin.name) if len(basin_index) == 0: raise Exception("Basin {1} is not defined in {0}", temp, self.basin.name) diff --git a/earthdiagnostics/ocean/mixedlayerheatcontent.py b/earthdiagnostics/ocean/mixedlayerheatcontent.py index e81624d2555f7eb4db05dcdbccaff430d2878e13..fa03a558bc8fdd789939ccb562b1b271d1aedefb 100644 --- a/earthdiagnostics/ocean/mixedlayerheatcontent.py +++ b/earthdiagnostics/ocean/mixedlayerheatcontent.py @@ -1,10 +1,11 @@ # coding=utf-8 +"""Compute mixed layer heat content""" import os -from earthdiagnostics.diagnostic import Diagnostic from earthdiagnostics import cdftools -from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.diagnostic import Diagnostic from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class MixedLayerHeatContent(Diagnostic): @@ -48,7 +49,7 @@ class MixedLayerHeatContent(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -64,22 +65,22 @@ class MixedLayerHeatContent(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) self.mlotst = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.ohcsum = self.declare_chunk(ModelingRealms.ocean, 'ohcvsumlotst', self.startdate, self.member, self.chunk) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temperature_file = TempFile.get() Utils.copy_file(self.thetao.local_file, temperature_file) Utils.nco.ncks(input=self.mlotst.local_file, output=temperature_file, options=('-A -v mlotst',)) temp = TempFile.get() - cdftools.run('cdfmxlheatc', input=temperature_file, output=temp) + cdftools.run('cdfmxlheatc', input_file=temperature_file, output_file=temp) os.remove(temperature_file) diff --git a/earthdiagnostics/ocean/mixedlayersaltcontent.py b/earthdiagnostics/ocean/mixedlayersaltcontent.py index ad350961fdcabed51b378f9dd2fd26bc79bef164..a6f7b4559ee8c209361cd050f67decce5ed93a59 100644 --- a/earthdiagnostics/ocean/mixedlayersaltcontent.py +++ b/earthdiagnostics/ocean/mixedlayersaltcontent.py @@ -1,9 +1,11 @@ # coding=utf-8 +"""Compute mixed layer salt content""" import os + from earthdiagnostics import cdftools from earthdiagnostics.diagnostic import Diagnostic -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class MixedLayerSaltContent(Diagnostic): @@ -25,6 +27,7 @@ class MixedLayerSaltContent(Diagnostic): :param chunk: chunk's number :type chunk: int """ + alias = 'mlotstsc' "Diagnostic alias for the configuration file" @@ -46,7 +49,7 @@ class MixedLayerSaltContent(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -62,22 +65,22 @@ class MixedLayerSaltContent(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.so = self.request_chunk(ModelingRealms.ocean, 'so', self.startdate, self.member, self.chunk) self.mlotst = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.sosum = self.declare_chunk(ModelingRealms.ocean, 'scvsummlotst', self.startdate, self.member, self.chunk) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" salinity_file = TempFile.get() Utils.copy_file(self.so.local_file, salinity_file) Utils.nco.ncks(input=self.mlotst.local_file, output=salinity_file, options=('-A -v mlotst',)) temp = TempFile.get() - cdftools.run('cdfmxlsaltc', input=salinity_file, output=temp) + cdftools.run('cdfmxlsaltc', input_file=salinity_file, output_file=temp) os.remove(salinity_file) Utils.rename_variables(temp, {'x': 'i', 'y': 'j', 'somxlsaltc': 'scvsummlotst'}, False, True) diff --git a/earthdiagnostics/ocean/moc.py b/earthdiagnostics/ocean/moc.py index 7686e84b3a616984c53e070840220220e6a2860d..a300e8b113484709ab2fa2ec75786a86bb4ea3d8 100644 --- a/earthdiagnostics/ocean/moc.py +++ b/earthdiagnostics/ocean/moc.py @@ -1,12 +1,13 @@ # coding=utf-8 +"""Compute the MOC for oceanic basins""" import numpy as np from bscearth.utils.log import Log from earthdiagnostics import cdftools from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import Diagnostic -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class Moc(Diagnostic): @@ -51,7 +52,7 @@ class Moc(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -67,24 +68,24 @@ class Moc(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(ModelingRealms.ocean, 'vo', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.results = self.declare_chunk(ModelingRealms.ocean, Moc.vsftmyz, self.startdate, self.member, self.chunk) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() Log.debug('Computing MOC') - cdftools.run('cdfmoc', input=self.variable_file.local_file, output=temp) + cdftools.run('cdfmoc', input_file=self.variable_file.local_file, output_file=temp) Utils.nco.ncks(input=self.variable_file.local_file, output=temp, options=('-A -v lev',)) Utils.convert2netcdf4(temp) Log.debug('Reformatting variables') - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) basins_list = [Basins().Global.name] if 'zomsfatl' in handler.variables: @@ -100,7 +101,7 @@ class Moc(Diagnostic): ('time', 'lev', 'i', 'j', 'basin'), fill_value=example._FillValue) - moc.units = Utils.convert_to_ASCII_if_possible(example.units) + moc.units = Utils.convert_to_ascii_if_possible(example.units) moc.add_offset = example.add_offset moc.scale_factor = example.scale_factor diff --git a/earthdiagnostics/ocean/mxl.py b/earthdiagnostics/ocean/mxl.py index b5e7c980355beccbf485606bbeaed5216c498375..1c733c83ebb514bcfc3cb61d6a89d07f3ea7ab58 100644 --- a/earthdiagnostics/ocean/mxl.py +++ b/earthdiagnostics/ocean/mxl.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Compute the mixed layer depth""" import os from earthdiagnostics import cdftools @@ -39,7 +40,7 @@ class Mxl(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -55,21 +56,21 @@ class Mxl(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.thetao_file = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) self.so_file = self.request_chunk(ModelingRealms.ocean, 'so', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.mlotst_file = self.declare_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() - cdftools.run('cdfmxl', input=[self.thetao_file, self.so_file], output=temp, options='-nc4') + cdftools.run('cdfmxl', input_file=[self.thetao_file, self.so_file], output_file=temp, options='-nc4') temp2 = TempFile.get() - source = Utils.openCdf(temp) - destiny = Utils.openCdf(temp2, 'w') + source = Utils.open_cdf(temp) + destiny = Utils.open_cdf(temp2, 'w') Utils.copy_variable(source, destiny, 'somxl010', must_exist=True, add_dimensions=True) Utils.copy_variable(source, destiny, 'lat', must_exist=False) Utils.copy_variable(source, destiny, 'latitude', must_exist=False) diff --git a/earthdiagnostics/ocean/psi.py b/earthdiagnostics/ocean/psi.py index fd1ee553ead2b07aa9bca2191f6fbb98194d5b71..03107eda951397285a7fb177b5b3a58c74d94958 100644 --- a/earthdiagnostics/ocean/psi.py +++ b/earthdiagnostics/ocean/psi.py @@ -1,8 +1,9 @@ # coding=utf-8 +"""Compute the barotropic stream function""" from earthdiagnostics import cdftools from earthdiagnostics.diagnostic import Diagnostic -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class Psi(Diagnostic): @@ -45,7 +46,7 @@ class Psi(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -61,18 +62,19 @@ class Psi(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.uo = self.request_chunk(ModelingRealms.ocean, 'uo', self.startdate, self.member, self.chunk) self.vo = self.request_chunk(ModelingRealms.ocean, 'vo', self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.psi = self.declare_chunk(ModelingRealms.ocean, Psi.vsftbarot, self.startdate, self.member, self.chunk) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() - cdftools.run('cdfpsi', input=[self.uo.local_file, self.vo.local_file], output=temp, options='-mean -mask') + cdftools.run('cdfpsi', input_file=[self.uo.local_file, self.vo.local_file], output_file=temp, + options='-mean -mask') Utils.rename_variable(temp, 'sobarstf', Psi.vsftbarot) Utils.setminmax(temp, Psi.vsftbarot) self.psi.set_local_file(temp) diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index edc5e212884106d87483fe94adfce5545da0bc51..fa41fcf161a1ee5cc4563b4ab70fac2b2176a0c8 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -1,7 +1,13 @@ # coding=utf-8 -import os +"""Diagnostic to compute regional averages""" +import iris +import iris.util +import iris.coords +import iris.analysis +import iris.exceptions + +import numpy as np -from earthdiagnostics import cdftools from earthdiagnostics.box import Box from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticIntOption, DiagnosticDomainOption, \ @@ -12,8 +18,9 @@ from earthdiagnostics.utils import Utils, TempFile class RegionMean(Diagnostic): """ - Computes the mean value of the field (3D, weighted). For 3D fields, - a horizontal mean for each level is also given. If a spatial window + Computes the mean value of the field (3D, weighted). + + For 3D fields, a horizontal mean for each level is also given. If a spatial window is specified, the mean value is computed only in this window. :original author: Javier Vegas-Regidor @@ -37,35 +44,42 @@ class RegionMean(Diagnostic): alias = 'regmean' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, variable, grid_point, box, save3d, basin, - variance, grid): + def __init__(self, data_manager, startdate, member, chunk, domain, variable, box, save3d, weights_file, + variance, basin): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member self.chunk = chunk self.domain = domain self.variable = variable - self.grid_point = grid_point.upper() self.box = box self.save3d = save3d - self.basin = basin + self.weights_file = weights_file self.variance = variance - self.grid = grid + self.basin = basin + self.declared = {} + self.lat_name = 'lat' + self.lon_name = 'lon' + def __eq__(self, other): + if type(self) is not type(other): + return False return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ self.box == other.box and self.variable == other.variable def __str__(self): return 'Region mean Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} Variable: {0.variable} ' \ - 'Grid point: {0.grid_point} Box: {0.box} Save 3D: {0.save3d} Save variance: {0.variance} ' \ - 'Original grid: {0.grid} Basin: {0.basin}'.format(self) + 'Box: {0.box} Save 3D: {0.save3d} Save variance: {0.variance}'.format(self) + + def __hash__(self): + return hash(str(self)) @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -77,8 +91,8 @@ class RegionMean(Diagnostic): DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticOption('grid_point', 'T'), DiagnosticBasinOption('basin', Basins().Global), - DiagnosticIntOption('min_depth', 0), - DiagnosticIntOption('max_depth', 0), + DiagnosticIntOption('min_depth', -1), + DiagnosticIntOption('max_depth', -1), DiagnosticBoolOption('save3D', True), DiagnosticBoolOption('variance', False), DiagnosticOption('grid', '')) @@ -88,94 +102,281 @@ class RegionMean(Diagnostic): box.min_depth = options['min_depth'] box.max_depth = options['max_depth'] + weights_file = TempFile.get() + weight_diagnostics = ComputeWeights(diags.data_manager, options['grid_point'], options['basin'], box, + weights_file) + job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(RegionMean(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], options['grid_point'], box, - options['save3D'], options['basin'], options['variance'], options['grid'])) + job = RegionMean(diags.data_manager, startdate, member, chunk, + options['domain'], options['variable'], box, + options['save3D'], weights_file, options['variance'], options['basin']) + job.add_subjob(weight_diagnostics) + job_list.append(job) + return job_list def request_data(self): - self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + """Request data required by the diagnostic""" + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" if self.box.min_depth == 0: # To cdftools, this means all levels box_save = None else: box_save = self.box - self.declare_var('mean', False, box_save) - self.declare_var('mean', True, box_save) + self._declare_var('mean', False, box_save) + self._declare_var('mean', True, box_save) if self.variance: - self.declare_var('var', False, box_save) - self.declare_var('var', True, box_save) + self._declare_var('var', False, box_save) + self._declare_var('var', True, box_save) def compute(self): - """ - Runs the diagnostic - """ - mean_file = TempFile.get() + """Run the diagnostic""" + iris.FUTURE.netcdf_promote = True + iris.FUTURE.netcdf_no_unlimited = True - variable_file = self.variable_file.local_file + has_levels = self._fix_file_metadata() - handler = Utils.openCdf(variable_file) - self.save3d &= 'lev' in handler.dimensions - handler.close() + data = self._load_data() - cdfmean_options = [self.variable, self.grid_point, 0, 0, 0, 0, self.box.min_depth, self.box.max_depth] - if self.variance: - cdfmean_options += ['-var'] - if self.basin != Basins().Global: - cdfmean_options.append('-M') - cdfmean_options.append('mask_regions.3d.nc') - cdfmean_options.append(self.basin.name) + weights = iris.load_cube(self.weights_file, 'weights').data + i_indexes = iris.load_cube(self.weights_file, 'i_indexes').data + j_indexes = iris.load_cube(self.weights_file, 'j_indexes').data + lev_limits = iris.load_cube(self.weights_file, 'lev_limits').data + + def selected_i(cell): + return cell.point - 1 in i_indexes + + def selected_j(cell): + return cell.point - 1 in j_indexes + + def selected_level(cell): + return lev_limits[0] <= cell.point <= lev_limits[1] - cdftools.run('cdfmean', input=variable_file, output=mean_file, options=cdfmean_options) - Utils.rename_variables(mean_file, {'gdept': 'lev', 'gdepw': 'lev'}, must_exist=False, rename_dimension=True) + data = data.extract(iris.Constraint(i=selected_i, j=selected_j, lev=selected_level)) + if has_levels: + self._meand_3d_variable(data, weights) + else: + self._mean_2d_var(data, weights) - self.send_var('mean', False, mean_file) - self.send_var('mean', True, mean_file) + def _mean_2d_var(self, data, weights): + mean = iris.cube.CubeList() + var = iris.cube.CubeList() + for time_slice in data.slices_over('time'): + mean.append(time_slice.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=weights)) + var.append(time_slice.collapsed(['latitude', 'longitude'], iris.analysis.VARIANCE, weights=weights)) + self._send_var('mean', False, mean.merge_cube()) + if self.variance: + self._send_var('var', False, var.merge_cube()) + def _meand_3d_variable(self, data, weights): + mean = iris.cube.CubeList() + mean3d = iris.cube.CubeList() + var = iris.cube.CubeList() + var3d = iris.cube.CubeList() + for time_slice in data.slices_over('time'): + mean.append(time_slice.collapsed(['latitude', 'longitude', 'depth'], + iris.analysis.MEAN, weights=weights)) + if self.save3d: + mean3d.append(time_slice.collapsed(['latitude', 'longitude'], + iris.analysis.MEAN, weights=weights)) + if self.variance: + var.append(time_slice.collapsed(['latitude', 'longitude', 'depth'], + iris.analysis.VARIANCE, weights=weights)) + if self.save3d: + var3d.append(time_slice.collapsed(['latitude', 'longitude'], + iris.analysis.VARIANCE, weights=weights)) + self._send_var('mean', True, mean3d) + self._send_var('mean', False, mean) if self.variance: - self.send_var('var', False, mean_file) - self.send_var('var', True, mean_file) + self._send_var('var', True, var3d) + + self._send_var('var', False, var) + + def _load_data(self): + def add_i_j(cube, field, filename): + if cube.var_name != self.variable: + return + if not cube.coords('i'): + index = field.dimensions.index('i') + i = np.arange(1, field.shape[index] + 1) + i_coord = iris.coords.DimCoord(i, var_name='i') + cube.add_dim_coord(i_coord, index) + if not cube.coords('j'): + index = field.dimensions.index('j') + i = np.arange(1, field.shape[index] + 1) + i_coord = iris.coords.DimCoord(i, var_name='j') + cube.add_dim_coord(i_coord, index) + if not cube.coords('lev'): + index = field.dimensions.index('lev') + i = np.arange(1, field.shape[index] + 1) + lev = iris.coords.AuxCoord(i, var_name='lev') + cube.add_aux_coord(lev, index) - os.remove(mean_file) + data = iris.load_cube(self.variable_file.local_file, + iris.AttributeConstraint(short_name=self.variable), + callback=add_i_j) + return data - def send_var(self, var, threed, mean_file): + def _fix_file_metadata(self): + handler = Utils.open_cdf(self.variable_file.local_file) + var = handler.variables[self.variable] + coordinates = '' + has_levels = False + for dimension in handler.variables.keys(): + if dimension in ['time', 'lev', 'lat', 'latitude', 'lon', 'longitude', 'i', 'j']: + coordinates += ' {0}'.format(dimension) + if dimension == 'lev': + has_levels = True + var.coordinates = coordinates + handler.close() + return has_levels + + def _declare_var(self, var, threed, box_save): if threed: if not self.save3d: return False - original_name = '{0}_{1}'.format(var, self.variable) final_name = '{1}3d{0}'.format(var, self.variable) - levels = ',lev' else: - original_name = '{0}_3D{1}'.format(var, self.variable) final_name = '{1}{0}'.format(var, self.variable) - levels = '' - - temp2 = TempFile.get() - Utils.nco.ncks(input=mean_file, output=temp2, options=('-v {0},lat,lon{1}'.format(original_name, levels),)) - handler = Utils.openCdf(temp2) - var_handler = handler.variables[original_name] - if hasattr(var_handler, 'valid_min'): - del var_handler.valid_min - if hasattr(var_handler, 'valid_max'): - del var_handler.valid_max - handler.close() - self.declared[final_name].set_local_file(temp2, diagnostic=self, rename_var=original_name, region=self.basin) - def declare_var(self, var, threed, box_save): + self.declared[final_name] = self.declare_chunk(ModelingRealms.ocean, final_name, self.startdate, self.member, + self.chunk, box=box_save, region=self.basin) + + def _send_var(self, var, threed, cube_list): if threed: - if not self.save3d: + if not self.save3d and threed: return False final_name = '{1}3d{0}'.format(var, self.variable) else: final_name = '{1}{0}'.format(var, self.variable) + cube = cube_list.merge_cube() + print(cube) + print(cube.data) + cube.var_name = 'result' + cube.remove_coord('latitude') + cube.remove_coord('longitude') + cube.remove_coord('depth') + cube.remove_coord('lev') + temp = TempFile.get() + iris.save(cube, temp) + self.declared[final_name].set_local_file(temp, diagnostic=self, rename_var='result', region=self.basin) - self.declared[final_name] = self.declare_chunk(ModelingRealms.ocean, final_name, self.startdate, self.member, - self.chunk, box=box_save, region=self.basin, grid=self.grid) +class ComputeWeights(Diagnostic): + """ + Diagnostic used to compute regional mean and sum weights + + Parameters + ---------- + data_manager: DataManager + grid_point: str + basin: int + weights_file: str + """ + + alias = 'computeregmeanweights' + "Diagnostic alias for the configuration file" + + @classmethod + def generate_jobs(cls, diags, options): + """ + Generate the instances of the diagnostics that will be run by the manager + + This method does not does anything as this diagnostic is not expected to be called by the users + """ + pass + + def __init__(self, data_manager, grid_point, basin, box, weights_file): + Diagnostic.__init__(self, data_manager) + self.weights_file = weights_file + self.basin = basin + self.grid_point = grid_point.lower() + self.box = box + + def __eq__(self, other): + if type(self) is not type(other): + return False + return self.weights_file == other.weights_file and self.basin == other.basin and \ + self.grid_point == other.grid_point and self.box != other.box + + def __str__(self): + return 'Computing weights for region averaging: Point {0.grid_point} Basin: {0.basin} Box: {0.box}'\ + .format(self) + + def __hash__(self): + return hash(str(self)) + + def compute(self): + """Compute weights""" + iris.FUTURE.netcdf_promote = True + iris.FUTURE.netcdf_no_unlimited = True + + mask = np.squeeze(Utils.get_mask(self.basin, True)) + surface_mask = mask[0, ...] + i_indexes = np.where(np.any(surface_mask != 0, 0))[0] + j_indexes = np.where(np.any(surface_mask != 0, 1))[0] + mask_small = np.take(np.take(mask, i_indexes, 2), j_indexes, 1) + + e1 = self._try_load_cube(1) + e2 = self._try_load_cube(2) + e3 = self._try_load_cube(3) + depth = iris.util.squeeze(iris.load_cube('mesh_hgr.nc', 'gdept_0')) + if self.box.min_depth == -1: + min_level = 0 + else: + distance = abs((depth - self.box.min_depth).data) + min_level = np.argmin(distance) + + if self.box.max_depth == -1: + max_level = depth.shape[0] + else: + distance = abs((depth - self.box.max_depth).data) + max_level = np.argmin(distance) + + def selected_i(cell): + return cell.point - 1 in i_indexes + + def selected_j(cell): + return cell.point - 1 in j_indexes + + def selected_level(cell): + return min_level <= cell.point <= max_level + + e1_small = e1.extract(iris.Constraint(i=selected_i, j=selected_j)) + e2_small = e2.extract(iris.Constraint(i=selected_i, j=selected_j)) + e3_small = e3.extract(iris.Constraint(i=selected_i, j=selected_j, lev=selected_level)) + mask_small = mask_small[min_level:max_level, ...] + + mask_small = e3_small * mask_small + e_small = e1_small * e2_small + for coord in e_small.coords(): + e_small.remove_coord(coord) + for coord in mask_small.coords(): + mask_small.remove_coord(coord) + weights = mask_small * e_small + weights.var_name = 'weights' + i_indexes = iris.cube.Cube(i_indexes, var_name='i_indexes') + j_indexes = iris.cube.Cube(j_indexes, var_name='j_indexes') + lev_limits = iris.cube.Cube([min_level, max_level], var_name='lev_limits') + iris.save((weights, i_indexes, j_indexes, lev_limits), self.weights_file) + + def _try_load_cube(self, number): + try: + cube = iris.load_cube('mesh_hgr.nc', 'e{0}{1}'.format(number, self.grid_point)) + except iris.exceptions.ConstraintMismatchError: + cube = iris.load_cube('mesh_hgr.nc', 'e{0}{1}_0'.format(number, self.grid_point)) + return iris.util.squeeze(cube) + + def request_data(self): + """Request data required by the diagnostic""" + pass + + def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" + pass diff --git a/earthdiagnostics/ocean/regionsum.py b/earthdiagnostics/ocean/regionsum.py index 52b49f91935b65e8485a6c86f58b0e0725f83f5f..6ba7a4e310461d84a36aa706ca8fb4bd237eccba 100644 --- a/earthdiagnostics/ocean/regionsum.py +++ b/earthdiagnostics/ocean/regionsum.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Diagnostic to calculate a region total""" import os from earthdiagnostics import cdftools @@ -12,8 +13,9 @@ from earthdiagnostics.utils import Utils, TempFile class RegionSum(Diagnostic): """ - Computes the mean value of the field (3D, weighted). For 3D fields, - a horizontal mean for each level is also given. If a spatial window + Computes the sum of the field (3D, weighted). + + For 3D fields, a horizontal mean for each level is also given. If a spatial window is specified, the mean value is computed only in this window. :original author: Javier Vegas-Regidor @@ -38,7 +40,7 @@ class RegionSum(Diagnostic): "Diagnostic alias for the configuration file" def __init__(self, data_manager, startdate, member, chunk, domain, variable, grid_point, box, save3d, basin, - variance, grid): + grid): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -49,10 +51,12 @@ class RegionSum(Diagnostic): self.box = box self.save3d = save3d self.basin = basin - self.variance = variance self.grid = grid self.declared = {} + self.lat_name = 'lat' + self.lon_name = 'lon' + def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ self.box == other.box and self.variable == other.variable @@ -65,7 +69,7 @@ class RegionSum(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -91,52 +95,58 @@ class RegionSum(Diagnostic): for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(RegionSum(diags.data_manager, startdate, member, chunk, options['domain'], options['variable'], options['grid_point'], box, - options['save3D'], options['basin'], options['variance'], options['grid'])) + options['save3D'], options['basin'], options['grid'])) return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" if self.box.min_depth == 0: # To cdftools, this means all levels box_save = None else: box_save = self.box - self.declare_var('sum', False, box_save) - self.declare_var('sum', True, box_save) + self._declare_var('sum', False, box_save) + self._declare_var('sum', True, box_save) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" mean_file = TempFile.get() variable_file = self.variable_file.local_file - handler = Utils.openCdf(variable_file) + handler = Utils.open_cdf(variable_file) self.save3d &= 'lev' in handler.dimensions + if "latitude" in handler.variables: + self.lat_name = 'latitude' + if "longitude" in handler.variables: + self.lon_name = 'longitude' + handler.close() - cdfmean_options = [self.variable, self.grid_point, 0, 0, 0, 0, self.box.min_depth, self.box.max_depth] - if self.variance: - cdfmean_options += ['-var'] + cdfmean_options = ['-v', self.variable, '-p', self.grid_point, + '-zoom', 0, 0, 0, 0, self.box.min_depth, self.box.max_depth] if self.basin != Basins().Global: cdfmean_options.append('-M') cdfmean_options.append('mask_regions.3d.nc') cdfmean_options.append(self.basin.name) - cdftools.run('cdfsum', input=variable_file, output=mean_file, options=cdfmean_options) + cdftools.run('cdfsum', input_file=variable_file, input_option='-f', output_file=mean_file, + options=cdfmean_options) Utils.rename_variables(mean_file, {'gdept': 'lev', 'gdepw': 'lev'}, must_exist=False, rename_dimension=True) - self.send_var('mean', False, mean_file) - self.send_var('mean', True, mean_file) + self._send_var(False, mean_file) + self._send_var(True, mean_file) os.remove(mean_file) - def send_var(self, var, threed, mean_file): + def _send_var(self, threed, mean_file): + var = 'sum' if threed: if not self.save3d: return False @@ -149,8 +159,9 @@ class RegionSum(Diagnostic): levels = '' temp2 = TempFile.get() - Utils.nco.ncks(input=mean_file, output=temp2, options=('-v {0},lat,lon{1}'.format(original_name, levels),)) - handler = Utils.openCdf(temp2) + Utils.nco.ncks(input=mean_file, output=temp2, + options=('-v {0},{2.lat_name},{2.lon_name}{1}'.format(original_name, levels, self),)) + handler = Utils.open_cdf(temp2) var_handler = handler.variables[original_name] if hasattr(var_handler, 'valid_min'): del var_handler.valid_min @@ -159,7 +170,7 @@ class RegionSum(Diagnostic): handler.close() self.declared[final_name].set_local_file(temp2, diagnostic=self, rename_var=original_name, region=self.basin) - def declare_var(self, var, threed, box_save): + def _declare_var(self, var, threed, box_save): if threed: if not self.save3d: return False @@ -169,4 +180,3 @@ class RegionSum(Diagnostic): self.declared[final_name] = self.declare_chunk(ModelingRealms.ocean, final_name, self.startdate, self.member, self.chunk, box=box_save, region=self.basin, grid=self.grid) - diff --git a/earthdiagnostics/ocean/rotation.py b/earthdiagnostics/ocean/rotation.py index bdd6132c4cba6c04878ded48c8961eef25b88f2e..b00a30b913d0bb9f686a68e336b7e6e53a2845ac 100644 --- a/earthdiagnostics/ocean/rotation.py +++ b/earthdiagnostics/ocean/rotation.py @@ -1,15 +1,16 @@ # coding=utf-8 +"""Rotate two u v variables to align with latitude and longitude""" import shutil + from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticVariableOption from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealms class Rotation(Diagnostic): """ - Cuts a meridional or zonal section + Rotate two u v variables to align with latitude and longitude :original author: Virginie Guemas :contributor: Javier Vegas-Regidor @@ -45,7 +46,8 @@ class Rotation(Diagnostic): def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variableu == other.variableu and self.variablev == other.variablev + self.domain == other.domain and self.variableu == other.variableu and \ + self.variablev == other.variablev and self.executable == other.executable def __str__(self): return 'Rotate variables Startdate: {0} Member: {1} Chunk: {2} Variables: {3}:{4} , ' \ @@ -55,7 +57,7 @@ class Rotation(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -63,9 +65,9 @@ class Rotation(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager, 'variableu'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager, 'variableu'), DiagnosticVariableOption(diags.data_manager.config.var_manager, 'variablev'), - DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticOption('executable', '/home/Earth/jvegas/pyCharm/cfutools/interpolation/rotateUVorca')) options = cls.process_options(options, options_available) @@ -78,21 +80,20 @@ class Rotation(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.ufile = self.request_chunk(self.domain, self.variableu, self.startdate, self.member, self.chunk) self.vfile = self.request_chunk(self.domain, self.variablev, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.urotated_file = self.declare_chunk(self.domain, self.variableu, self.startdate, self.member, self.chunk, grid='rotated') self.vrotated_file = self.declare_chunk(self.domain, self.variablev, self.startdate, self.member, self.chunk, grid='rotated') def compute(self): - """ - Runs the diagnostic - """ - - handler = Utils.openCdf(self.ufile.local_file) + """Run the diagnostic""" + handler = Utils.open_cdf(self.ufile.local_file) if 'lev' in handler.dimensions: self.num_levels = handler.dimensions['lev'].size self.has_levels = True @@ -107,12 +108,12 @@ class Rotation(Diagnostic): urotated = self._merge_levels(self.variableu, 'u') vrotated = self._merge_levels(self.variablev, 'v') - ufile_handler = Utils.openCdf(self.ufile.local_file) + ufile_handler = Utils.open_cdf(self.ufile.local_file) self._add_metadata_and_vars(ufile_handler, urotated, self.variableu) ufile_handler.close() self.urotated_file.set_local_file(urotated) - vfile_handler = Utils.openCdf(self.vfile.local_file) + vfile_handler = Utils.open_cdf(self.vfile.local_file) self._add_metadata_and_vars(vfile_handler, vrotated, self.variablev) vfile_handler.close() self.vrotated_file.set_local_file(urotated) @@ -122,7 +123,7 @@ class Rotation(Diagnostic): if self.has_levels: Utils.nco.ncecat(input=self._get_level_file(0, direction), output=temp, options=("-n {0},2,1 -v '{1}'".format(self.num_levels, var),)) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if 'record' in handler.dimensions: handler.renameDimension('record', 'lev') handler.close() @@ -164,7 +165,7 @@ class Rotation(Diagnostic): return namelist_file def _add_metadata_and_vars(self, reference_file_handler, rotaded_file, var_name): - rotated_handler = Utils.openCdf(rotaded_file) + rotated_handler = Utils.open_cdf(rotaded_file) self._copy_extra_variables(reference_file_handler, rotated_handler) Utils.copy_attributes(rotated_handler.variables[var_name], reference_file_handler.variables[var_name], ('_FillValue',)) @@ -180,7 +181,3 @@ class Rotation(Diagnostic): self.tempTemplate = TempFile.get(suffix='_01.nc') # self.tempTemplate = 'temp_01.nc' return self.tempTemplate.replace('_01.nc', '_{1}_{0:02d}.nc'.format(lev + 1, direction)) - - - - diff --git a/earthdiagnostics/ocean/siasiesiv.py b/earthdiagnostics/ocean/siasiesiv.py index 21adfc0fa4fc605991e849711a16451e14276677..53b1c7281a0a2c79760a6476e44bf934978f0ac7 100644 --- a/earthdiagnostics/ocean/siasiesiv.py +++ b/earthdiagnostics/ocean/siasiesiv.py @@ -1,32 +1,37 @@ # coding=utf-8 +"""Compute the sea ice extent , area and volume in both hemispheres or a specified region""" import os -# noinspection PyUnresolvedReferences -import earthdiagnostics.cdftoolspython as cdftoolspython import netCDF4 import numpy as np from bscearth.utils.log import Log from earthdiagnostics.constants import Basins -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticBasinOption +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticBasinOption, DiagnosticBoolOption from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils, TempFile +# noinspection PyUnresolvedReferences + + class Siasiesiv(Diagnostic): """ Compute the sea ice extent , area and volume in both hemispheres or a specified region. - - :original author: Virginie Guemas - :contributor: Neven Fuckar - :contributor: Ruben Cruz - :contributor: Javier Vegas-Regidor - - :created: April 2012 - :last modified: June 2016 - + Parameters + ---------- + data_manager: DataManager + startdate: str + member: int + chunk: init + domain: ModellingRealm + variable: str + basin: Basin + mask: numpy.array + omit_vol: bool """ + alias = 'siasiesiv' "Diagnostic alias for the configuration file" @@ -34,19 +39,7 @@ class Siasiesiv(Diagnostic): e2t = None gphit = None - def __init__(self, data_manager, startdate, member, chunk, basin, mask, var_manager): - """ - :param data_manager: data management object - :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int - :param mask: mask to use - :type mask: numpy.array - """ + def __init__(self, data_manager, startdate, member, chunk, basin, mask, var_manager, omit_vol): Diagnostic.__init__(self, data_manager) self.basin = basin self.startdate = startdate @@ -55,17 +48,18 @@ class Siasiesiv(Diagnostic): self.mask = mask self.generated = {} self.var_manager = var_manager + self.omit_volume = omit_vol self.sic_varname = self.var_manager.get_variable('sic').short_name self.sit_varname = self.var_manager.get_variable('sit').short_name def __str__(self): - return 'Siasiesiv Startdate: {0} Member: {1} Chunk: {2} Basin: {3}'.format(self.startdate, self.member, - self.chunk, self.basin) + return 'Siasiesiv Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ + 'Basin: {0.basin} Omit volume: {0.omit_volume}'.format(self) @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -73,7 +67,8 @@ class Siasiesiv(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticBasinOption('basin', Basins().Global), ) + options_available = (DiagnosticBasinOption('basin', Basins().Global), + DiagnosticBoolOption('omit_volume', False)) options = cls.process_options(options, options_available) if options['basin'] is None: @@ -85,8 +80,8 @@ class Siasiesiv(Diagnostic): job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(Siasiesiv(diags.data_manager, startdate, member, chunk, options['basin'], mask, - diags.config.var_manager)) - mesh_handler = Utils.openCdf('mesh_hgr.nc') + diags.config.var_manager, options['omit_volume'])) + mesh_handler = Utils.open_cdf('mesh_hgr.nc') Siasiesiv.e1t = np.asfortranarray(mesh_handler.variables['e1t'][0, :]) Siasiesiv.e2t = np.asfortranarray(mesh_handler.variables['e2t'][0, :]) Siasiesiv.gphit = np.asfortranarray(mesh_handler.variables['gphit'][0, :]) @@ -95,17 +90,22 @@ class Siasiesiv(Diagnostic): return job_list def request_data(self): - self.sit = self.request_chunk(ModelingRealms.seaIce, self.sit_varname, - self.startdate, self.member, self.chunk) + """Request data required by the diagnostic""" + if not self.omit_volume: + self.sit = self.request_chunk(ModelingRealms.seaIce, self.sit_varname, + self.startdate, self.member, self.chunk) self.sic = self.request_chunk(ModelingRealms.seaIce, self.sic_varname, self.startdate, self.member, self.chunk) def declare_data_generated(self): - self._declare_var('sivols') + """Declare data to be generated by the diagnostic""" + if not self.omit_volume: + self._declare_var('sivols') + self._declare_var('sivoln') + self._declare_var('siareas') self._declare_var('siextents') - self._declare_var('sivoln') self._declare_var('siarean') self._declare_var('siextentn') @@ -114,35 +114,40 @@ class Siasiesiv(Diagnostic): self.chunk, region=self.basin.name) def compute(self): - """ - Runs the diagnostic - """ - sit_handler = Utils.openCdf(self.sit.local_file) - sit = np.asfortranarray(sit_handler.variables[self.sit_varname][:]) - timesteps = sit_handler.dimensions['time'].size - sit_handler.close() + """Run the diagnostic""" + import earthdiagnostics.cdftoolspython as cdftoolspython - sic_handler = Utils.openCdf(self.sic.local_file) + sic_handler = Utils.open_cdf(self.sic.local_file) Utils.convert_units(sic_handler.variables[self.sic_varname], '1.0') sic = np.asfortranarray(sic_handler.variables[self.sic_varname][:]) + timesteps = sic_handler.dimensions['time'].size sic_handler.close() + if self.omit_volume: + sit = sic + else: + sit_handler = Utils.open_cdf(self.sit.local_file) + sit = np.asfortranarray(sit_handler.variables[self.sit_varname][:]) + sit_handler.close() + result = np.empty((8, timesteps)) for t in range(0, timesteps): result[:, t] = cdftoolspython.icediag.icediags(Siasiesiv.e1t, Siasiesiv.e2t, self.mask, Siasiesiv.gphit, sit[t, :], sic[t, :]) - self._extract_variable_and_rename(result[4, :], 'sivols', '10^9 m3') self._extract_variable_and_rename(result[5, :], 'siareas', '10^9 m2') self._extract_variable_and_rename(result[7, :], 'siextents', '10^9 m2') - self._extract_variable_and_rename(result[0, :], 'sivoln', '10^9 m3') self._extract_variable_and_rename(result[1, :], 'siarean', '10^9 m2') self._extract_variable_and_rename(result[3, :], 'siextentn', '10^9 m2') + if not self.omit_volume: + self._extract_variable_and_rename(result[4, :], 'sivols', '10^9 m3') + self._extract_variable_and_rename(result[0, :], 'sivoln', '10^9 m3') + def _extract_variable_and_rename(self, values, cmor_name, units): temp = TempFile.get() - reference_handler = Utils.openCdf(self.sit.local_file) + reference_handler = Utils.open_cdf(self.sic.local_file) os.remove(temp) handler = netCDF4.Dataset(temp, 'w') @@ -159,4 +164,3 @@ class Siasiesiv(Diagnostic): new_var.valid_max = np.max(values) handler.close() self.generated[cmor_name].set_local_file(temp) - diff --git a/earthdiagnostics/ocean/verticalgradient.py b/earthdiagnostics/ocean/verticalgradient.py index 9cd9ab6101875ea64c2f3c2a788ff423fa23fdae..64a933ed8f677d3293464a558373923237d18de1 100644 --- a/earthdiagnostics/ocean/verticalgradient.py +++ b/earthdiagnostics/ocean/verticalgradient.py @@ -1,14 +1,14 @@ # coding=utf-8 +"""Calculate the gradient between 2 ocean levels""" from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticIntOption, DiagnosticVariableOption -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class VerticalGradient(Diagnostic): """ - Chooses vertical level in ocean, or vertically averages between - 2 or more ocean levels + Calculate the gradient between 2 ocean levels :original author: Virginie Guemas :contributor: Eleftheria Exarchou @@ -55,7 +55,7 @@ class VerticalGradient(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -81,27 +81,27 @@ class VerticalGradient(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.gradient_file = self.declare_chunk(ModelingRealms.ocean, self.variable + 'vgrad', self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ - handler = Utils.openCdf(self.variable_file) + """Run the diagnostic""" + handler = Utils.open_cdf(self.variable_file) if 'lev' not in handler.dimensions: raise Exception('Variable {0} does not have a level dimension') var_handler = handler.variables[self.variable] - upper_level = var_handler[:, self.box.min_depth-1, ...] - lower_level = var_handler[:, self.box.max_depth-1, ...] + upper_level = var_handler[:, self.box.min_depth - 1, ...] + lower_level = var_handler[:, self.box.max_depth - 1, ...] gradient = upper_level - lower_level temp = TempFile.get() - new_file = Utils.openCdf(temp, 'w') + new_file = Utils.open_cdf(temp, 'w') for var in handler.variables.keys(): if var in (self.variable, 'lev', 'lev_bnds'): continue @@ -114,4 +114,3 @@ class VerticalGradient(Diagnostic): new_var.standard_name += '_vertical_gradient' self.gradient_file.set_local_file(temp) - diff --git a/earthdiagnostics/ocean/verticalmean.py b/earthdiagnostics/ocean/verticalmean.py index 21bd4a88bc171a15fa6358364a23f99264b03328..cf05d5eb6b4bef756d69ad00e2ce26ee8da5a271 100644 --- a/earthdiagnostics/ocean/verticalmean.py +++ b/earthdiagnostics/ocean/verticalmean.py @@ -1,15 +1,15 @@ # coding=utf-8 +"""Chooses vertical level in ocean, or vertically averages between 2 or more ocean levels""" from earthdiagnostics import cdftools from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticIntOption, DiagnosticVariableOption -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class VerticalMean(Diagnostic): """ - Chooses vertical level in ocean, or vertically averages between - 2 or more ocean levels + Chooses vertical level in ocean, or vertically averages between 2 or more ocean levels :original author: Virginie Guemas :contributor: Eleftheria Exarchou @@ -56,7 +56,7 @@ class VerticalMean(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -82,19 +82,19 @@ class VerticalMean(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.results = self.declare_chunk(ModelingRealms.ocean, self.variable + 'vmean', self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() - handler = Utils.openCdf(self.variable_file.local_file) + handler = Utils.open_cdf(self.variable_file.local_file) if self.box.min_depth is None: lev_min = handler.variables['lev'][0] else: @@ -106,8 +106,7 @@ class VerticalMean(Diagnostic): lev_max = self.box.max_depth handler.close() - cdftools.run('cdfvertmean', input=self.variable_file.local_file, output=temp, + cdftools.run('cdfvertmean', input_file=self.variable_file.local_file, output_file=temp, options=[self.variable, 'T', lev_min, lev_max, '-debug']) Utils.setminmax(temp, '{0}_vert_mean'.format(self.variable)) self.results.set_local_file(temp, rename_var='{0}_vert_mean'.format(self.variable)) - diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 8951f493229004fc2ba3da3d7274e3216c9dbb3f..ee9d2084256f3c3b924f28ec6d2d60d465ad0df9 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -1,10 +1,11 @@ # coding=utf-8 +"""Averages vertically any given variable""" from earthdiagnostics import cdftools from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticFloatOption, DiagnosticDomainOption, \ DiagnosticVariableOption, DiagnosticChoiceOption -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class VerticalMeanMeters(Diagnostic): @@ -56,7 +57,7 @@ class VerticalMeanMeters(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -84,19 +85,19 @@ class VerticalMeanMeters(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.results = self.declare_chunk(self.domain, self.variable + 'vmean', self.startdate, self.member, self.chunk, box=self.box) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() - handler = Utils.openCdf(self.variable_file.local_file) + handler = Utils.open_cdf(self.variable_file.local_file) if self.box.min_depth is None: lev_min = handler.variables['lev'][0] else: @@ -108,7 +109,7 @@ class VerticalMeanMeters(Diagnostic): lev_max = self.box.max_depth handler.close() - cdftools.run('cdfvertmean', input=self.variable_file.local_file, output=temp, + cdftools.run('cdfvertmean', input_file=self.variable_file.local_file, output_file=temp, options=[self.variable, self.grid_point, lev_min, lev_max, '-debug']) Utils.setminmax(temp, '{0}_vert_mean'.format(self.variable)) self.results.set_local_file(temp, rename_var='{0}_vert_mean'.format(self.variable)) diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index 4b318b2cafabbdac1ef3d3ed24e8e33f07b9ed79..42a8d3ce149a900fa10cd79e4261c9711ebbd243 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -1,28 +1,32 @@ # coding=utf-8 +"""Module to allow classes to communicate when an event is produced""" + + class Publisher(object): - """ - Base class to provide functionality to notify updates to other objects - """ + """Base class to provide functionality to notify updates to other objects""" + def __init__(self): self._subscribers = dict() def subscribe(self, who, callback=None): """ Add a suscriber to the current publisher - - :param who: subscriber to add - :type who: object - :param callback: method to execute when publisher updates - :type callback: callable | NoneType - """ + + Parameters + ---------- + who: object + Subscriber to add + callback: callable or None, optional + Callback to call + """ if callback is None: callback = getattr(who, 'update') self._subscribers[who] = callback def unsubscribe(self, who): """ - Removes a suscriber from the current publisher - + Remove a suscriber from the current publisher + :param who: suscriber to remove :type who: object """ @@ -31,16 +35,30 @@ class Publisher(object): def dispatch(self, *args): """ Notify update to all the suscribers - + :param args: arguments to pass """ - for subscriber, callback in self._subscribers.items(): + for callback in tuple(self._subscribers.values()): # noinspection PyCallingNonCallable callback(*args) @property def suscribers(self): + """List of suscribers of this publisher""" + return self._subscribers.keys() + + def only_suscriber(self, who): """ - List of suscribers of this publisher + Get if an object is the sole suscriber of this publisher + + Parameters + ---------- + who: object + + Returns + ------- + bool """ - return self._subscribers.keys() + if len(self._subscribers) != 1: + return + return who in self._subscribers diff --git a/earthdiagnostics/singleton.py b/earthdiagnostics/singleton.py deleted file mode 100644 index 2cd1bb8d91a945eace966863d1f49184232378bd..0000000000000000000000000000000000000000 --- a/earthdiagnostics/singleton.py +++ /dev/null @@ -1,8 +0,0 @@ -# coding=utf-8 -class SingletonType(type): - def __call__(cls, *args): - try: - return cls.__instance - except AttributeError: - cls.__instance = super(SingletonType, cls).__call__(*args) - return cls.__instance diff --git a/earthdiagnostics/statistics/__init__.py b/earthdiagnostics/statistics/__init__.py index 2424b99360092efc33d5a92b8d03c888a3882ea1..bc73837e51ec509621271906b29e6c52e88e5b44 100644 --- a/earthdiagnostics/statistics/__init__.py +++ b/earthdiagnostics/statistics/__init__.py @@ -1,5 +1,2 @@ # coding=utf-8 -from monthlypercentile import MonthlyPercentile -from climatologicalpercentile import ClimatologicalPercentile -from daysoverpercentile import DaysOverPercentile -from discretize import Discretize +"""Package containing statistical diagnostics""" diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 229ad02073a7f156fd3db0064c31b6e8508249ad..8050f7ef9bfa39aa21d50eb09ad3549bc9f47f16 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Calculates the climatological percentiles for the given leadtime""" import iris import iris.coord_categorisation import iris.coords @@ -11,7 +12,7 @@ from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, Di DiagnosticIntOption, DiagnosticListIntOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import TempFile -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class ClimatologicalPercentile(Diagnostic): @@ -47,8 +48,8 @@ class ClimatologicalPercentile(Diagnostic): def __eq__(self, other): return self.domain == other.domain and self.variable == other.variable and \ - self.start_year == other.start_year and self.end_year == other.end_year and \ - self.forecast_month == other.forecast_month + self.start_year == other.start_year and self.end_year == other.end_year and \ + self.forecast_month == other.forecast_month def __str__(self): return 'Climatological percentile Variable: {0.domain}:{0.variable} Period: {0.start_year}-{0.end_year} ' \ @@ -57,7 +58,7 @@ class ClimatologicalPercentile(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -81,9 +82,18 @@ class ClimatologicalPercentile(Diagnostic): return job_list def requested_startdates(self): - return ['{0}{1:02}01'.format(year, self.forecast_month) for year in range(self.start_year, self.end_year+1)] + """ + Required startdates to compute the percentile + + Returns + ------- + list of str + + """ + return ['{0}{1:02}01'.format(year, self.forecast_month) for year in range(self.start_year, self.end_year + 1)] def request_data(self): + """Request data required by the diagnostic""" for startdate in self.requested_startdates(): if startdate not in self.leadtime_files: self.leadtime_files[startdate] = {} @@ -92,14 +102,13 @@ class ClimatologicalPercentile(Diagnostic): None, None, vartype=VariableType.STATISTIC) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:02d}'.format(self) self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" iris.FUTURE.netcdf_promote = True self._get_distribution() percentile_values = self._calculate_percentiles() diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index aad012a492728d7d2a5c320441dc88c398b79e64..ad1b25f397c33b8e065f342afd5751759a882054 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Diagnostic to coompute statistics about days above and below a set ofprecomputed percentiles""" import os import iris @@ -11,15 +12,19 @@ from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log from iris.time import PartialDateTime -from earthdiagnostics.diagnostic import * +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticDomainOption, DiagnosticIntOption, \ + DiagnosticListIntOption, DiagnosticOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalPercentile from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.variable import VariableType class DaysOverPercentile(Diagnostic): """ - Calculates the montlhy percentiles + Calculates the days over and below the calculted percentiles + + It uses the stored percentiles and the stored discretized variables :param data_manager: data management object :type data_manager: DataManager @@ -39,10 +44,13 @@ class DaysOverPercentile(Diagnostic): self.forecast_month = forecast_month self.startdate = startdate + self.lat_coord = None + self.lon_coord = None + def __eq__(self, other): return self.startdate == other.startdate and self.domain == other.domain and \ - self.variable == other.variable and self.start_year == other.start_year and \ - self.end_year == other.end_year + self.variable == other.variable and self.start_year == other.start_year and \ + self.end_year == other.end_year def __str__(self): return 'Days over percentile Startdate: {0.startdate} Variable: {0.domain}:{0.variable} ' \ @@ -51,7 +59,7 @@ class DaysOverPercentile(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -71,10 +79,11 @@ class DaysOverPercentile(Diagnostic): for forecast_month in options['forecast_month']: job_list.append(DaysOverPercentile(diags.data_manager, options['domain'], options['variable'], options['start_year'], options['end_year'], - startdate, forecast_month)) + startdate, forecast_month)) return job_list def request_data(self): + """Request data required by the diagnostic""" var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:02d}'.format(self) self.percentiles_file = self.request_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology) @@ -82,6 +91,7 @@ class DaysOverPercentile(Diagnostic): self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, None, None) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" var_over = self.variable + '_daysover_q{0}_{1.start_year}-{1.end_year}' var_below = self.variable + '_daysbelow_q{0}_{1.start_year}-{1.end_year}' self.days_over_file = {} @@ -98,44 +108,13 @@ class DaysOverPercentile(Diagnostic): vartype=VariableType.STATISTIC) def compute(self): - """ - Runs the diagnostic - """ - iris.FUTURE.netcdf_promote = True - percentiles = iris.load_cube(self.percentiles_file.local_file) - - handler = Utils.openCdf(self.variable_file.local_file) - if 'realization' in handler.variables: - handler.variables[self.variable].coordinates = 'realization' - handler.close() - var = iris.load_cube(self.variable_file.local_file) - date = parse_date(self.startdate) - lead_date = add_months(date, 1, self.data_manager.config.experiment.calendar) - leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} - - def assign_leadtime(coord, x): - # noinspection PyBroadException - try: - leadtime_month = 1 - partial_date = leadtimes[leadtime_month] - while coord.units.num2date(x) >= partial_date: - leadtime_month += 1 - try: - partial_date = leadtimes[leadtime_month] - except KeyError: - new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) - partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) - leadtimes[leadtime_month] = partial_date - return leadtime_month - except Exception: - pass - iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', assign_leadtime) - iris.coord_categorisation.add_year(var, 'time') - iris.coord_categorisation.add_day_of_year(var, 'time') + """Run the diagnostic""" + leadtimes, percentiles, var = self._load_data() try: realization_coord = var.coord('realization') except iris.exceptions.CoordinateNotFoundError: realization_coord = None + self.lat_coord = var.coord('latitude') self.lon_coord = var.coord('longitude') results_over = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} @@ -165,22 +144,22 @@ class DaysOverPercentile(Diagnostic): # noinspection PyTypeChecker days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps) - result = self.create_results_cube(days_over, percentile, realization_coord, - time_coord, var_daysover, long_name_days_over) + result = self._create_results_cube(days_over, percentile, realization_coord, + time_coord, var_daysover, long_name_days_over) results_over[percentile].append(result) # noinspection PyTypeChecker days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps) - result = self.create_results_cube(days_below, percentile, realization_coord, - time_coord, var_days_below, long_name_days_below) + result = self._create_results_cube(days_below, percentile, realization_coord, + time_coord, var_days_below, long_name_days_below) results_below[percentile].append(result) Log.debug('Saving percentiles startdate {0}', self.startdate) for perc in ClimatologicalPercentile.Percentiles: iris.FUTURE.netcdf_no_unlimited = True - self.days_over_file[perc].set_local_file(self.save_to_file(perc, results_over, var_daysover), + self.days_over_file[perc].set_local_file(self._save_to_file(perc, results_over, var_daysover), rename_var=var_daysover) - self.days_below_file[perc].set_local_file(self.save_to_file(perc, results_below, var_days_below), + self.days_below_file[perc].set_local_file(self._save_to_file(perc, results_below, var_days_below), rename_var=var_days_below) del self.days_over_file @@ -188,15 +167,46 @@ class DaysOverPercentile(Diagnostic): del self.lat_coord del self.lon_coord - def save_to_file(self, perc, results_over, var_daysover): + def _load_data(self): + iris.FUTURE.netcdf_promote = True + percentiles = iris.load_cube(self.percentiles_file.local_file) + handler = Utils.open_cdf(self.variable_file.local_file) + if 'realization' in handler.variables: + handler.variables[self.variable].coordinates = 'realization' + handler.close() + var = iris.load_cube(self.variable_file.local_file) + date = parse_date(self.startdate) + lead_date = add_months(date, 1, self.data_manager.config.experiment.calendar) + leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} + + def assign_leadtime(coord, x): + leadtime_month = 1 + partial_date = leadtimes[leadtime_month] + while coord.units.num2date(x) >= partial_date: + leadtime_month += 1 + try: + partial_date = leadtimes[leadtime_month] + except KeyError: + new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) + leadtimes[leadtime_month] = partial_date + return leadtime_month + + iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', assign_leadtime) + iris.coord_categorisation.add_year(var, 'time') + iris.coord_categorisation.add_day_of_year(var, 'time') + return leadtimes, percentiles, var + + @staticmethod + def _save_to_file(perc, results_over, var_daysover): temp = TempFile.get() iris.save(results_over[perc].merge_cube(), temp, zlib=True, unlimited_dimensions=['time']) Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, must_exist=False, rename_dimension=True) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) if 'time' not in handler.dimensions: new_file = TempFile.get() - new_handler = Utils.openCdf(new_file, 'w') + new_handler = Utils.open_cdf(new_file, 'w') new_handler.createDimension('time', 1) for dimension in handler.dimensions: @@ -228,8 +238,8 @@ class DaysOverPercentile(Diagnostic): handler.close() return temp - def create_results_cube(self, days_over, percentile, realization_coord, time_coord, - var_name, long_name): + def _create_results_cube(self, days_over, percentile, realization_coord, time_coord, + var_name, long_name): result = iris.cube.Cube(days_over.astype(np.float32), var_name=var_name, long_name=long_name, units=1.0) if realization_coord is not None: result.add_aux_coord(realization_coord, 0) diff --git a/earthdiagnostics/statistics/discretize.py b/earthdiagnostics/statistics/discretize.py index c997e5ec73f3697f320deed2ee853210ef66876b..e577d90b1b2c501351a70de688400e49ebb1da8e 100644 --- a/earthdiagnostics/statistics/discretize.py +++ b/earthdiagnostics/statistics/discretize.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Diagnostic to discretize time series""" import math import cf_units @@ -17,7 +18,7 @@ from iris.time import PartialDateTime from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ DiagnosticIntOption, DiagnosticFloatOption from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class Discretize(Diagnostic): @@ -45,6 +46,7 @@ class Discretize(Diagnostic): self.realizations = None self.num_bins = num_bins self._bins = None + self.discretized_data = None self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) if not math.isnan(min_value): @@ -69,11 +71,18 @@ class Discretize(Diagnostic): self.process = psutil.Process() - def print_memory_used(self): + def _print_memory_used(self): Log.debug('Memory: {0:.2f} GB'.format(self.process.memory_info().rss / 1024.0**3)) @property def bins(self): + """ + Configuration of bins to use + + Returns + ------- + int or iterable of str + """ if self._bins is None: return self.num_bins return self._bins @@ -84,8 +93,11 @@ class Discretize(Diagnostic): def __eq__(self, other): return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins and \ - self.min_value == other.min_value and self.max_value == other.max_value and \ - self.startdate == other.startdate + self.min_value == other.min_value and self.max_value == other.max_value and \ + self.startdate == other.startdate + + def __hash__(self): + return hash(str(self)) def __str__(self): return 'Discretizing variable: {0.domain}:{0.variable} Startdate: {0.startdate} ' \ @@ -94,7 +106,7 @@ class Discretize(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -117,35 +129,35 @@ class Discretize(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.original_data = self.request_chunk(self.domain, self.variable, self.startdate, None, None) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" var_name = '{0.variable}_dis'.format(self) self.discretized_data = self.declare_chunk(self.domain, var_name, self.startdate, None, None, vartype=VariableType.STATISTIC) def compute(self): - """ - Runs the diagnostic - """ - self.print_memory_used() + """Run the diagnostic""" + self._print_memory_used() iris.FUTURE.netcdf_promote = True self._load_cube() - self.print_memory_used() + self._print_memory_used() self._get_value_interval() - self.print_memory_used() + self._print_memory_used() Log.info('Range: [{0}, {1}]', self.min_value, self.max_value) self._get_distribution() - self.print_memory_used() + self._print_memory_used() self._save_results() - self.print_memory_used() + self._print_memory_used() del self.distribution del self.data_cube - self.print_memory_used() + self._print_memory_used() def _load_cube(self): - handler = Utils.openCdf(self.original_data.local_file) + handler = Utils.open_cdf(self.original_data.local_file) if 'realization' in handler.variables: handler.variables[self.variable].coordinates = 'realization' handler.close() @@ -178,9 +190,9 @@ class Discretize(Diagnostic): bins_bounds = np.zeros((self.num_bins, 2)) for x in range(self.num_bins): - bins[x] = (self.bins[x+1] - self.bins[x]) / 2 + self.bins[x] + bins[x] = (self.bins[x + 1] - self.bins[x]) / 2 + self.bins[x] bins_bounds[x, 0] = self.bins[x] - bins_bounds[x, 1] = self.bins[x+1] + bins_bounds[x, 1] = self.bins[x + 1] bins_coord = iris.coords.DimCoord(bins, var_name='bin', units=self.data_cube.units, bounds=bins_bounds) @@ -200,8 +212,8 @@ class Discretize(Diagnostic): units='months')) lead_date = add_months(date, leadtime - 1, self.data_manager.config.experiment.calendar) leadtime_cube.add_aux_coord(iris.coords.AuxCoord(cf_units.date2num(lead_date, - unit='days since 1950-01-01', - calendar="standard"), + unit='days since 1950-01-01', + calendar="standard"), var_name='time', units='days since 1950-01-01')) @@ -220,13 +232,13 @@ class Discretize(Diagnostic): if 'realization' in leadtime_cube.coords(): for realization_cube in self.data_cube.slices_over('realization'): Log.debug('Discretizing realization {0}', realization_cube.coord('realization').points[0]) - self.print_memory_used() + self._print_memory_used() if leadtime not in self.distribution: self.distribution[leadtime] = self._calculate_distribution(realization_cube) else: self.distribution[leadtime] += self._calculate_distribution(realization_cube) else: - self.print_memory_used() + self._print_memory_used() self.distribution[leadtime] = self._calculate_distribution(leadtime_cube) # noinspection PyTypeChecker @@ -251,12 +263,3 @@ class Discretize(Diagnostic): return histogram return np.apply_along_axis(calculate_histogram, 0, data_cube.data) - - - - - - - - - diff --git a/earthdiagnostics/statistics/monthlypercentile.py b/earthdiagnostics/statistics/monthlypercentile.py index 1ecb5c44276b68650216a0e79a7d239c13a499a6..9e3575bd9ea14d1a468d8b006feaf7095352bf8f 100644 --- a/earthdiagnostics/statistics/monthlypercentile.py +++ b/earthdiagnostics/statistics/monthlypercentile.py @@ -1,11 +1,13 @@ # coding=utf-8 +"""Calculates the montlhy percentiles""" +from calendar import monthrange + from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticListIntOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.variable_type import VariableType -from calendar import monthrange +from earthdiagnostics.variable import VariableType class MonthlyPercentile(Diagnostic): @@ -49,7 +51,7 @@ class MonthlyPercentile(Diagnostic): @classmethod def generate_jobs(cls, diags, options): """ - Creates a job for each chunk to compute the diagnostic + Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags @@ -69,9 +71,11 @@ class MonthlyPercentile(Diagnostic): return job_list def request_data(self): + """Request data required by the diagnostic""" self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): + """Declare data to be generated by the diagnostic""" self.max_file = self.declare_chunk(self.domain, self.variable_max, self.startdate, self.member, self.chunk, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) self.min_file = self.declare_chunk(self.domain, self.variable_min, self.startdate, self.member, self.chunk, @@ -88,21 +92,47 @@ class MonthlyPercentile(Diagnostic): @property def variable_max(self): + """ + Variable name for the maximum + + Returns + ------- + str + + """ return '{0}max'.format(self.variable) @property def variable_min(self): + """ + Variable name for the minimum + + Returns + ------- + str + + """ return '{0}min'.format(self.variable) def percentile(self, percentile): + """ + Variable name for the given percentile + + Parameters + ---------- + percentile: int + + Returns + ------- + str + + """ return '{0}_q{1}'.format(self.variable, percentile) def compute(self): - """ - Runs the diagnostic - """ + """Run the diagnostic""" temp = TempFile.get() - handler = Utils.openCdf(self.variable_file.local_file) + handler = Utils.open_cdf(self.variable_file.local_file) datetimes = Utils.get_datetime_from_netcdf(handler) handler.close() @@ -140,22 +170,19 @@ class MonthlyPercentile(Diagnostic): Log.debug('Computing percentile {0}', percentile) Utils.cdo.monpctl(str(percentile), input=[temp, monmin_file, monmax_file], output=temp) Utils.rename_variable(temp, 'lev', 'ensemble', False, True) - handler = Utils.openCdf(monmax_file) + handler = Utils.open_cdf(monmax_file) handler.variables[self.variable].long_name += ' {0} Percentile'.format(percentile) handler.close() self.percentiles[percentile].set_local_file(temp, rename_var=self.variable) Utils.rename_variable(monmax_file, 'lev', 'ensemble', False, True) - handler = Utils.openCdf(monmax_file) + handler = Utils.open_cdf(monmax_file) handler.variables[self.variable].long_name += ' Monthly Maximum' handler.close() self.max_file.set_local_file(monmax_file, rename_var=self.variable) Utils.rename_variable(monmin_file, 'lev', 'ensemble', False, True) - handler = Utils.openCdf(monmin_file) + handler = Utils.open_cdf(monmin_file) handler.variables[self.variable].long_name += ' Monthly Minimum' handler.close() self.min_file.set_local_file(monmin_file, rename_var=self.variable) - - - diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 8637121a55d0595b3e008cdf5c47675d0a7bbe29..b6606524453ee5785d197da28d259a8f1c23b32b 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -1,27 +1,32 @@ # coding=utf-8 +"""Data manager for THREDDS server""" import os +from datetime import datetime from time import strptime import iris import netCDF4 import numpy as np -from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date +from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date from bscearth.utils.log import Log -from iris.coords import DimCoord from cf_units import Unit +from iris.coords import DimCoord from datafile import DataFile, StorageStatus, LocalStatus from earthdiagnostics.datamanager import DataManager from earthdiagnostics.utils import TempFile, Utils -from datetime import datetime - -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class THREDDSManager(DataManager): """ - Data manager class for CMORized experiments + Data manager class for THREDDS + + Parameters + ---------- + config: Config """ + def __init__(self, config): super(THREDDSManager, self).__init__(config) self.server_url = config.thredds.server_url @@ -39,52 +44,27 @@ class THREDDSManager(DataManager): if self.config.data_type in ('obs', 'recon') and self.experiment.chunk_size != 1: raise Exception('For obs and recon data chunk_size must be always 1') - # noinspection PyUnusedLocal - def get_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VariableType.MEAN): - - aggregation_path = self.get_var_url(variable, startdate, frequency, None, vartype) - startdate = parse_date(startdate) - start_chunk = chunk_start_date(startdate, self.experiment.num_chunks, self.experiment.chunk_size, - 'month', self.experiment.calendar) - end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) - - thredds_subset = THREDDSSubset(aggregation_path, "", variable, startdate, end_chunk) - selected_months = ','.join([str(add_months(startdate, i, self.experiment.calendar).month) for i in leadtimes]) - temp = TempFile.get() - if self.config.data_type == 'exp': - select_months = '-selmonth,{0} {1}'.format(selected_months, thredds_subset) - selected_years = ','.join([str(add_months(startdate, i, self.experiment.calendar).year) for i in leadtimes]) - Utils.cdo.selyear(selected_years, input=select_months, output=temp) - else: - Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) - return temp - # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): - """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param domain: CMOR domain - :type domain: str - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str + vartype=VariableType.MEAN, possible_versions=None): + """" + Check if a file exists in the storage + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None + box: Box or None + frequency: Frequency or None + vartype: VariableType + + Returns + ------- + bool """ aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) @@ -98,25 +78,24 @@ class THREDDSManager(DataManager): def get_file_path(self, startdate, domain, var, frequency, vartype, box=None, grid=None): """ - Returns the path to a concrete file - :param startdate: file's startdate - :type startdate: str - :param domain: file's domain - :type domain: str - :param var: file's var - :type var: str - :param frequency: file's frequency - :type frequency: Frequency - :param box: file's box - :type box: Box - :param grid: file's grid - :type grid: str - :return: path to the file - :rtype: str - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType + Return the path to a concrete file + + Parameters + ---------- + startdate: str + domain: ModelingRealm + var: str + frequency: Frequency + vartype: VariableType + box: Box or None, optional + grid: str or None, optional + + Returns + ------- + str + """ - if not frequency: + if frequency is None: frequency = self.config.frequency var = self._get_final_var_name(box, var) @@ -144,6 +123,7 @@ class THREDDSManager(DataManager): def get_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VariableType.MEAN): """ Ge a file containing all the data for one year for one variable + :param domain: variable's domain :type domain: str :param var: variable's name @@ -163,12 +143,13 @@ class THREDDSManager(DataManager): :return: """ aggregation_path = self.get_var_url(var, startdate, None, box, vartype) - thredds_subset = THREDDSSubset(aggregation_path, "", var, datetime(year, 1, 1), datetime(year+1, 1, 1)) + thredds_subset = THREDDSSubset(aggregation_path, "", var, datetime(year, 1, 1), datetime(year + 1, 1, 1)) return thredds_subset.download() def get_var_url(self, var, startdate, frequency, box, vartype): """ Get url for dataset + :param var: variable to retrieve :type var: str :param startdate: startdate to retrieve @@ -181,7 +162,7 @@ class THREDDSManager(DataManager): :type vartype: VariableType :return: """ - if not frequency: + if frequency is None: frequency = self.config.frequency var = self._get_final_var_name(box, var) full_path = os.path.join(self.server_url, 'dodsC', self.config.data_type, self.experiment.institute, @@ -200,64 +181,27 @@ class THREDDSManager(DataManager): else: return '{0}.nc'.format(var) - def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None, - frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN): - """ - Creates the link of a given file from the CMOR repository. - - :param cmor_var: - :param move_old: - :param date_str: - :param year: if frequency is yearly, this parameter is used to give the corresponding year - :type year: int - :param domain: CMOR domain - :type domain: str - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: str - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - :param cmor_var: variable instance describing the selected variable - :type cmor_var: Variable - """ - # THREDDSManager does not require links - pass - def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + Request a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + Parameters + ---------- + domain: ModelingRealm + var: str + startdate: str + member: int + chunk: int + grid: str or None + box: Box or None + frequency: Frequency or None + vartype: VariableType or None + + Returns + ------- + DataFile - :param vartype: - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :return: path to the copy created on the scratch folder - :rtype: str """ aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) file_path = self.get_file_path(startdate, domain, var, frequency, vartype, box=box) @@ -275,10 +219,10 @@ class THREDDSManager(DataManager): def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN, diagnostic=None): """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + Copy a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - :param diagnostic: - :param region: + :param diagnostic: + :param region: :param domain: CMOR domain :type domain: Domain :param var: variable name @@ -290,11 +234,11 @@ class THREDDSManager(DataManager): :param chunk: file's chunk :type chunk: int :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType + :type grid: str|None :param box: file's box (only needed to retrieve sections or averages) :type box: Box :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType + :type frequency: Frequency|None :param vartype: Variable type (mean, statistic) :type vartype: VariableType :return: path to the copy created on the scratch folder @@ -320,23 +264,29 @@ class THREDDSManager(DataManager): class THREDDSError(Exception): + """Exception to be launched when a THREDDS related error is encounteredd""" + pass class THREDDSSubset(DataFile): + """ + Implementation of DataFile for the THREDDS server + + Parameters + ---------- + thredds_path: str + file_path: str + var: str + start_time: datetime + end_time: datetime + """ + def __init__(self, thredds_path, file_path, var, start_time, end_time): - """ - - :param thredds_path: - :param file_path: - :param var: - :type var: str - :param start_time: - :param end_time: - """ super(THREDDSSubset, self).__init__() self.thredds_path = thredds_path self.remote_file = file_path + self.local_file = None if '_f' in var: self.var = var[:var.index('_f')] self.hourly = var[var.index('_f'):] @@ -352,6 +302,15 @@ class THREDDSSubset(DataFile): return 'THREDDS {0.thredds_path} ({0.start_time}-{0.end_time})'.format(self) def download(self): + """ + Get data from the THREDDS server + + Raises + ------ + THREDDSError + If the data can not be downloaded + + """ try: Log.debug('Downloading thredds subset {0}...', self) iris.FUTURE.netcdf_promote = True @@ -364,22 +323,23 @@ class THREDDSSubset(DataFile): self.local_file = TempFile.get() iris.save(var_cube, self.local_file, zlib=True) if not Utils.check_netcdf_file(self.local_file): - raise Exception('netcdf check for downloaded file failed') + raise THREDDSError('netcdf check for downloaded file failed') Log.info('Request {0} ready!', self) self.local_status = LocalStatus.READY - except Exception as ex: + except THREDDSError as ex: Log.error('Can not retrieve {0} from server: {1}'.format(self, ex)) self.local_status = LocalStatus.FAILED - # noinspection PyUnusedLocal,PyMethodMayBeStatic - def _correct_cube(self, cube, field, filename): + # noinspection PyUnusedLocal + @staticmethod + def _correct_cube(cube, field, filename): if not cube.coords('time'): return time = cube.coord('time') if time.units.origin.startswith('month'): ref = strptime(time.units.origin[time.units.origin.index(' since ') + 7:], '%Y-%m-%d %H:%M:%S') helper = np.vectorize(lambda x: datetime(year=ref.tm_year + int(x) / 12, - month=int(x-1) % 12 + 1, + month=int(x - 1) % 12 + 1, day=ref.tm_mday)) times = np.round(time.points + ref.tm_mon) dates = helper(times) diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index 7c70b56e4d0e69a69d63a0a185e33d399db572be..bf44e4b442316ee4b4347ca1ef9221de9a11f927 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Common utilities for multiple topics that are not big enough to have their own module""" import datetime import os import re @@ -10,6 +11,7 @@ import tarfile import tempfile from contextlib import contextmanager +import cf_units import iris import iris.exceptions import netCDF4 @@ -18,7 +20,6 @@ import six import xxhash from bscearth.utils.log import Log from cdo import Cdo -from cfunits import Units from nco import Nco from earthdiagnostics.constants import Basins @@ -26,6 +27,7 @@ from earthdiagnostics.constants import Basins @contextmanager def suppress_stdout(): + """Redirect the standard output to devnull""" with open(os.devnull, "w") as devnull: old_stdout = sys.stdout sys.stdout = devnull @@ -35,14 +37,8 @@ def suppress_stdout(): sys.stdout = old_stdout -class File(object): - pass - - class Utils(object): - """ - Container class for miscellaneous utility methods - """ + """Container class for miscellaneous utility methods""" nco = Nco() """An instance of Nco class ready to be used""" @@ -50,26 +46,37 @@ class Utils(object): """An instance of Cdo class ready to be used""" @staticmethod - def get_mask(basin): + def get_mask(basin, with_levels=False): """ - Returns a numpy array containing the mask for the given basin + Return the mask for the given basin - :param basin: basin to retrieve - :type basin: Basin - :return: mask - :rtype: numpy.array - """ + Parameters + ---------- + basin: Basin + + Returns + ------- + numpy.array + + Raises + ------ + Exception: If mask.regions.nc is not available + """ basin = Basins().parse(basin) if basin != Basins().Global: try: - mask_handler = Utils.openCdf('mask_regions.nc') - mask = mask_handler.variables[basin.name][:, 0, :] + if with_levels: + mask_handler = Utils.open_cdf('mask_regions.3d.nc') + mask = mask_handler.variables[basin.name][0, ...] + else: + mask_handler = Utils.open_cdf('mask_regions.nc') + mask = mask_handler.variables[basin.name][:, 0, :] mask_handler.close() except IOError: raise Exception('File mask.regions.nc is required for basin {0}'.format(basin)) else: - mask_handler = Utils.openCdf('mask.nc') + mask_handler = Utils.open_cdf('mask.nc') mask = mask_handler.variables['tmask'][0, 0, :] mask_handler.close() return mask @@ -77,11 +84,13 @@ class Utils(object): @staticmethod def setminmax(filename, variable_list): """ - Sets the valid_max and valid_min values to the current max and min values on the file - :param filename: path to file - :type filename: str - :param variable_list: list of variables in which valid_min and valid_max will be set - :type variable_list: str | list + Set the valid_max and valid_min values to the current max and min values on the file + + Parameters + ---------- + filename: str + variable_list: str or iterable of str + """ # noinspection PyTypeChecker if isinstance(variable_list, six.string_types): @@ -89,7 +98,7 @@ class Utils(object): Log.info('Getting max and min values for {0}', ' '.join(variable_list)) - handler = Utils.openCdf(filename) + handler = Utils.open_cdf(filename) for variable in variable_list: var = handler.variables[variable] values = [np.max(var), np.min(var)] @@ -102,17 +111,22 @@ class Utils(object): @staticmethod def rename_variable(filepath, old_name, new_name, must_exist=True, rename_dimension=False): """ - Rename multiple variables from a NetCDF file - :param filepath: path to file - :type filepath: str - :param old_name: variable's name to change - :type old_name: str - :param new_name: new name - :type new_name: str - :param must_exist: if True, the function will raise an exception if the variable name does not exist - :type must_exist: bool - :param rename_dimension: if True, also rename dimensions with the same name - :type rename_dimension: bool + Rename variable from a NetCDF file + + This function is just a wrapper around Utils.rename_variables + + Parameters + ---------- + filepath: str + old_name: str + new_name: str + must_exist: bool, optional + rename_dimension: bool, optional + + See Also + -------- + Utils.rename_variables + """ Utils.rename_variables(filepath, {old_name: new_name}, must_exist, rename_dimension) @@ -120,19 +134,26 @@ class Utils(object): def rename_variables(filepath, dic_names, must_exist=True, rename_dimension=False): """ Rename multiple variables from a NetCDF file - :param filepath: path to file - :type filepath: str - :param dic_names: dictionary containing old names as keys and new names as values - :type dic_names: dict - :param must_exist: if True, the function will raise an exception if the variable name does not exist - :type must_exist: bool - :param rename_dimension: if True, also rename dimensions with the same name - :type rename_dimension: bool + + Parameters + ---------- + filepath: str + dic_names: dict of str: str + Gives the renaming to do in the form old_name: new_name + must_exist: bool, optional + rename_dimension: bool, optional + + Raises + ------- + ValueError + If any original name is the same as the new + Exception + If any requested variable does not exist and must_exist is True """ for old, new in six.iteritems(dic_names): if old == new: raise ValueError('{0} original name is the same as the new') - handler = Utils.openCdf(filepath) + handler = Utils.open_cdf(filepath) original_names = set(handler.variables.keys()).union(handler.dimensions.keys()) if not any((True for x in dic_names.keys() if x in original_names)): @@ -145,7 +166,7 @@ class Utils(object): temp = TempFile.get() shutil.copyfile(filepath, temp) - handler = Utils.openCdf(temp) + handler = Utils.open_cdf(temp) error = False try: @@ -160,16 +181,30 @@ class Utils(object): if error: Log.debug('First attemp to rename failed. Using secondary rename method for netCDF') - Utils._rename_vars_by_creating_new_file(dic_names, filepath, temp) + Utils._rename_by_new_file(dic_names, filepath, temp) Log.debug('Rename done') Utils.move_file(temp, filepath) @staticmethod def check_netcdf_file(filepath): + """ + Check if a NetCDF file is well stored + + This functions is used to check if a NetCDF file is corrupted. It prefers to raise a false postive than + to have false negatives. + + Parameters + ---------- + filepath + + Returns + ------- + bool + """ with suppress_stdout(): try: - handler = Utils.openCdf(filepath) + handler = Utils.open_cdf(filepath) if 'time' in handler.variables: if handler.variables['time'].dimensions != ('time', ): handler.close() @@ -180,33 +215,37 @@ class Utils(object): cubes = iris.load(filepath) if len(cubes) == 0: return False - except iris.exceptions.IrisError as ex: - Log.debug('netCDF checks failed: {0}', ex) - return False - except RuntimeError as ex: - # HDF error, usually - Log.debug('netCDF checks failed: {0}', ex) - return False - except Exception as ex: - # HDF error, usually + except (iris.exceptions.IrisError, RuntimeError, OSError) as ex: Log.debug('netCDF checks failed: {0}', ex) return False return True @staticmethod def get_file_variables(filename): - handler = Utils.openCdf(filename) + """ + Get all the variables in a file + + Parameters + ---------- + filename + + Returns + ------- + iterable of str + + """ + handler = Utils.open_cdf(filename) variables = handler.variables.keys() handler.close() return variables @staticmethod - def _rename_vars_by_creating_new_file(dic_names, filepath, temp): - original_handler = Utils.openCdf(filepath) - new_handler = Utils.openCdf(temp, 'w') + def _rename_by_new_file(dic_names, filepath, temp): + original_handler = Utils.open_cdf(filepath) + new_handler = Utils.open_cdf(temp, 'w') for attribute in original_handler.ncattrs(): original = getattr(original_handler, attribute) - setattr(new_handler, attribute, Utils.convert_to_ASCII_if_possible(original)) + setattr(new_handler, attribute, Utils.convert_to_ascii_if_possible(original)) for dimension in original_handler.dimensions.keys(): Utils.copy_dimension(original_handler, new_handler, dimension, new_names=dic_names) for variable in original_handler.variables.keys(): @@ -216,15 +255,31 @@ class Utils(object): # noinspection PyPep8Naming @staticmethod - def convert_to_ASCII_if_possible(string, encoding='ascii'): + def convert_to_ascii_if_possible(string, encoding='ascii'): + u""" + Convert an Unicode string to ASCII if all characters can be translated. + + If a string can not be translated it is unchanged. It also automatically + replaces Bretonnière with Bretonniere + + Parameters + ---------- + string: unicode + encoding: str, optional + + Returns + ------- + str + + """ # noinspection PyTypeChecker if isinstance(string, six.string_types): try: return string.encode(encoding) except UnicodeEncodeError: if u'Bretonnière' in string: - string = string.replace(u'Bretonnière', 'Bretonniere') - return Utils.convert_to_ASCII_if_possible(string, encoding) + string = string.replace(u'Bretonnière', 'Bretonnière') + return Utils.convert_to_ascii_if_possible(string, encoding) return string @staticmethod @@ -239,21 +294,34 @@ class Utils(object): if old_name in handler.variables: if new_name not in handler.variables: handler.renameVariable(old_name, new_name) + for var in handler.variables: + if hasattr(var, 'coordinates') and " {0} ".format(old_name) in var.coordinates: + new_coordinates = var.coordinates.replace(" {0} ".format(old_name), + " {0} ".format(new_name)) + var.coordinates = Utils.convert_to_ascii_if_possible(new_coordinates) elif must_exist: raise Exception("Variable {0} does not exist in file {1}".format(old_name, filepath)) handler.sync() @staticmethod - def copy_file(source, destiny, save_hash=False, use_stored_hash=True): + def copy_file(source, destiny, save_hash=False, use_stored_hash=True, retrials=3): """ - Copies a file from source to destiny, creating dirs if necessary + Copy a file and compute a hash to check if the copy is equal to the source + + Parameters + ---------- + source: str + destiny: str + save_hash: bool, optional + If True, stores a copy of the hash + use_stored_hash: bool, optional + If True, try to use the stored value of the source hash instead of computing it + retrials: int, optional + Minimum value is 1 - :param save_hash: if True, stores hash value in a file - :type save_hash: bool - :param source: path to source - :type source: str - :param destiny: path to destiny - :type destiny: str + See Also + -------- + move_file """ dirname_path = os.path.dirname(destiny) if dirname_path and not os.path.exists(dirname_path): @@ -268,7 +336,9 @@ class Utils(object): Log.debug('Hashing original file... {0}', datetime.datetime.now()) hash_original = Utils.get_file_hash(source, use_stored=use_stored_hash) - retrials = 3 + if retrials < 1: + retrials = 1 + while hash_original != hash_destiny: if retrials == 0: raise Exception('Can not copy {0} to {1}'.format(source, destiny)) @@ -280,33 +350,56 @@ class Utils(object): Log.debug('Finished {0}', datetime.datetime.now()) @staticmethod - def move_file(source, destiny, save_hash=False): + def move_file(source, destiny, save_hash=False, retrials=3): """ - Moves a file from source to destiny, creating dirs if necessary + Move a file and compute a hash to check if the copy is equal to the source + + It is just a call to Utils.copy_file followed bu + + Parameters + ---------- + source: str + destiny: str + save_hash: bool, optional + If True, stores a copy of the hash + retrials: int, optional + Minimum value is 1 + + See Also + -------- + copy_file - :param source: path to source - :type source: str - :param destiny: path to destiny - :type destiny: str - :param save_hash: if True, stores hash value in a file - :type save_hash: bool """ - Utils.copy_file(source, destiny, save_hash) + Utils.copy_file(source, destiny, save_hash, retrials) os.remove(source) @staticmethod def remove_file(path): """ - Removes a file, checking before if its exists + Delete a file only if it previously exists + + Parameters + ---------- + path: str - :param path: path to file - :type path: str """ if os.path.isfile(path): os.remove(path) @staticmethod def copy_tree(source, destiny): + """ + Copy a full tree to a new location + + Parameters + ---------- + source: str + destiny: str + + See Also + -------- + move_tree + """ if not os.path.exists(destiny): os.makedirs(destiny) shutil.copystat(source, destiny) @@ -321,21 +414,33 @@ class Utils(object): @staticmethod def move_tree(source, destiny): + """ + Move a tree to a new location + + Parameters + ---------- + source: str + destiny: str + + See Also + ------- + copy_tree + """ Utils.copy_tree(source, destiny) shutil.rmtree(source) @staticmethod def get_file_hash(filepath, use_stored=False, save=False): """ - Returns the xxHash hash for the given filepath - :param filepath: path to the file to compute hash on - :type filepath:str - :param use_stored: if True, try to read the hash value from file - :type use_stored: bool - :param save: if True, stores hash value in a file - :type save: bool - :return: file's xxHash hash - :rtype: str + Get the xxHash hash for a given file + + Parameters + ---------- + filepath: str + use_stored: bool, optional + If True, tries to use the stored hash before computing it + save: bool, optional + If True, saves the hash to a file """ if use_stored: hash_file = Utils._get_hash_filename(filepath) @@ -368,14 +473,25 @@ class Utils(object): @staticmethod def execute_shell_command(command, log_level=Log.DEBUG): """ - Executes a sheel commandsi - :param command: command to execute + Execute shell command + + Writes the output to the log with the specified level + + Parameters + ---------- + command: str or iterable of str + log_level: int, optional + + Returns + ------- + iterable of str + Standard output of the command + + Raises + ------ + Utils.ExecutionError + If the command return value is non zero - Log.info('Detailed time for diagnostic class') - :param log_level: log level to use for command output - :type log_level: int - :return: command output - :rtype: list """ # noinspection PyTypeChecker if isinstance(command, six.string_types): @@ -386,26 +502,26 @@ class Utils(object): for line in comunicate: if not line: continue + if six.PY3: + line = str(line, encoding='UTF-8') if log_level != Log.NO_LOG: Log.log.log(log_level, line) output.append(line) if process.returncode != 0: - raise Utils.ExecutionError('Error executing {0}\n Return code: {1}'.format(' '.join(command), process.returncode)) + raise Utils.ExecutionError('Error executing {0}\n Return code: {1}'.format(' '.join(command), + str(process.returncode))) return output _cpu_count = None @staticmethod def available_cpu_count(): - """ - Number of available virtual or physical CPUs on this systemx - """ + """Number of available virtual or physical CPUs on this system""" if Utils._cpu_count is None: try: - m = re.search(r'(?m)^Cpus_allowed:\s*(.*)$', - open('/proc/self/status').read()) - if m: - res = bin(int(m.group(1).replace(',', ''), 16)).count('1') + match = re.search(r'(?m)^Cpus_allowed:\s*(.*)$', open('/proc/self/status').read()) + if match: + res = bin(int(match.group(1).replace(',', ''), 16)).count('1') if res > 0: Utils._cpu_count = res except IOError: @@ -420,12 +536,14 @@ class Utils(object): @staticmethod def convert2netcdf4(filetoconvert): """ - Checks if a file is in netCDF4 format and converts to netCDF4 if not + Convert a file to NetCDF4 - :param filetoconvert: file to convert - :type filetoconvert: str - """ + Conversion only performed if required. Deflation level set to 4 and shuffle activated. + Parameters + ---------- + filetoconvert: str + """ if Utils._is_compressed_netcdf4(filetoconvert): return @@ -437,10 +555,12 @@ class Utils(object): @classmethod def _is_compressed_netcdf4(cls, filetoconvert): is_compressed = True - handler = Utils.openCdf(filetoconvert) + handler = Utils.open_cdf(filetoconvert) if not handler.file_format == 'NETCDF4': is_compressed = False + handler.close() else: + handler.close() ncdump_result = Utils.execute_shell_command('ncdump -hs {0}'.format(filetoconvert), Log.NO_LOG) ncdump_result = ncdump_result[0].replace('\t', '').split('\n') for var in handler.variables: @@ -450,37 +570,39 @@ class Utils(object): if not '{0}:_Shuffle = "true" ;'.format(var) in ncdump_result: is_compressed = False break - - handler.close() return is_compressed # noinspection PyPep8Naming @staticmethod - def openCdf(filepath, mode='a'): + def open_cdf(filepath, mode='a'): """ - Opens a netCDF file and returns a handler to it + Open a NetCDF file + + Parameters + ---------- + filepath: str + mode: str, optional - :param filepath: path to the file - :type filepath: str - :param mode: mode to open the file. By default, a (append) - :type mode: str - :return: handler to the file - :rtype: netCDF4.Dataset + Returns + ------- + netCDF4.Dataset """ return netCDF4.Dataset(filepath, mode) @staticmethod def get_datetime_from_netcdf(handler, time_variable='time'): """ - Gets a datetime array from a netCDF file + Get time from NetCDF files + + Parameters + ---------- + handler: netCDF4.Dataset + time_variable: str, optional - :param handler: file to read - :type handler: netCDF4.Dataset - :param time_variable: variable to read, by default 'time' - :type time_variable: str - :return: Datetime numpy array created from the values stored at the netCDF file - :rtype: np.array + Returns + ------- + numpy.array of Datetime """ var_time = handler.variables[time_variable] nctime = var_time[:] # get values @@ -495,22 +617,22 @@ class Utils(object): @staticmethod def copy_variable(source, destiny, variable, must_exist=True, add_dimensions=False, new_names=None): """ - Copies the given variable from source to destiny - - :param add_dimensions: if it's true, dimensions required by the variable will be automatically added to the - file. It will also add the dimension variable - :type add_dimensions: bool - :param source: origin file - :type source: netCDF4.Dataset - :param destiny: destiny file - :type destiny: netCDF4.Dataset - :param variable: variable to copy - :type variable: str - :param must_exist: if false, does not raise an error uf variable does not exist - :type must_exist: bool - :param new_names: dictionary containing variables to rename and new name as key-value pairs - :type new_names: dict - :return: + Copy the given variable from source to destiny + + Parameters + ---------- + source: netCDF4.Dataset + destiny: netCDF4.Dataset + variable: str + must_exist: bool, optional + add_dimensions: bool, optional + new_names: dict of str: str + + Raises + ------ + Exception + If dimensions are not correct in the destiny file and add_dimensions is False + """ if not must_exist and variable not in source.variables.keys(): return @@ -538,32 +660,41 @@ class Utils(object): original_var = source.variables[variable] new_var = destiny.createVariable(new_name, original_var.datatype, translated_dimensions) Utils.copy_attributes(new_var, original_var) + if hasattr(new_var, 'coordinates'): + coords = [new_names[coord] if coord in new_names else coord for coord in new_var.coordinates.split(' ')] + new_var.coordinates = Utils.convert_to_ascii_if_possible(' '.join(coords)) + new_var[:] = original_var[:] @staticmethod def copy_attributes(new_var, original_var, omitted_attributtes=None): + """ + Copy attributtes from one variable to another + + Parameters + ---------- + new_var: netCDF4.Variable + original_var: netCDF4.Variable + omitted_attributtes: iterable of str + Collection of attributtes that should not be copied + """ if omitted_attributtes is None: omitted_attributtes = [] - new_var.setncatts({k: Utils.convert_to_ASCII_if_possible(original_var.getncattr(k)) + new_var.setncatts({k: Utils.convert_to_ascii_if_possible(original_var.getncattr(k)) for k in original_var.ncattrs() if k not in omitted_attributtes}) @staticmethod def copy_dimension(source, destiny, dimension, must_exist=True, new_names=None): """ - Copies the given dimension from source to destiny, including dimension variables if present - - :param new_names: dictionary containing variables to rename and new name as key-value pairs - :type new_names: dict - :param source: origin file - :type source: netCDF4.Dataset - :param destiny: destiny file - :type destiny: netCDF4.Dataset - :param dimension: variable to copy - :type dimension: str - :param must_exist: if false, does not raise an error uf variable does not exist - :type must_exist: bool + Copy the given dimension from source to destiny, including dimension variables if present - :return: + Parameters + ---------- + source: netCDF4.Dataset + destiny: netCDF4.Dataset + dimension: str + must_exist: bool, optional + new_names: dict of str: str or None, optional """ if not must_exist and dimension not in source.dimensions.keys(): return @@ -585,16 +716,18 @@ class Utils(object): def concat_variables(source, destiny, remove_source=False): """ Add variables from a nc file to another - :param source: path to source file - :type source: str - :param destiny: path to destiny file - :type destiny: str - :param remove_source: if True, removes source file - :type remove_source: bool + + Parameters + ---------- + source: str + destiny: str + remove_source: bool + if True, removes source file + """ if os.path.exists(destiny): - handler_total = Utils.openCdf(destiny) - handler_variable = Utils.openCdf(source) + handler_total = Utils.open_cdf(destiny) + handler_variable = Utils.open_cdf(source) concatenated = dict() for var in handler_variable.variables: if var not in handler_total.variables: @@ -620,9 +753,8 @@ class Utils(object): Utils.convert2netcdf4(destiny) class ExecutionError(Exception): - """ - Exception to raise when a command execution fails - """ + """Exception to raise when a command execution fails""" + pass @classmethod @@ -638,53 +770,70 @@ class Utils(object): @staticmethod def create_folder_tree(path): """ - Createas a fodle path will and parent directories if needed. - :param path: folder's path - :type path: str + Create a folder path with all parent directories if needed. + + Parameters + ---------- + path: str + """ if not os.path.exists(path): # noinspection PyBroadException try: os.makedirs(path) - except Exception: - # Here we can have a race condition. Let's check again for existence and rethrow if still not exists + except OSError: + # This could happen if two threads are tying to create the folder. + # Let's check again for existence and rethrow if still not exists if not os.path.isdir(path): raise @staticmethod def give_group_write_permissions(path): - st = os.stat(path) - if st.st_mode & stat.S_IWGRP: + """Give write permissions to the group""" + stats = os.stat(path) + if stats.st_mode & stat.S_IWGRP: return - os.chmod(path, st.st_mode | stat.S_IWGRP) + os.chmod(path, stats.st_mode | stat.S_IWGRP) @staticmethod def convert_units(var_handler, new_units, calendar=None, old_calendar=None): + """ + Convert units + + Parameters + ---------- + var_handler: Dataset + new_units: str + calendar: str + old_calendar: str + """ if new_units == var_handler.units: return if hasattr(var_handler, 'calendar'): old_calendar = var_handler.calendar - new_unit = Units(new_units, calendar=calendar) - old_unit = Units(var_handler.units, calendar=old_calendar) - var_handler[:] = Units.conform(var_handler[:], old_unit, new_unit, inplace=True) + new_unit = cf_units.Unit(new_units, calendar=calendar) + old_unit = cf_units.Unit(var_handler.units, calendar=old_calendar) + var_handler[:] = old_unit.convert(var_handler[:], new_unit, inplace=True) if 'valid_min' in var_handler.ncattrs(): - var_handler.valid_min = Units.conform(float(var_handler.valid_min), old_unit, new_unit, - inplace=True) + var_handler.valid_min = old_unit.convert(float(var_handler.valid_min), new_unit, + inplace=True) if 'valid_max' in var_handler.ncattrs(): - var_handler.valid_max = Units.conform(float(var_handler.valid_max), old_unit, new_unit, - inplace=True) + var_handler.valid_max = old_unit.convert(float(var_handler.valid_max), new_unit, + inplace=True) var_handler.units = new_units @staticmethod def untar(files, destiny_path): """ Untar files to a given destiny - :param files: files to unzip - :type files: list[Any] | Tuple[Any] - :param destiny_path: path to destination folder - :type destiny_path: str + + Parameters + ---------- + files: iterable of str + destiny_path: str + """ for filepath in files: Log.debug('Unpacking {0}', filepath) @@ -703,10 +852,11 @@ class Utils(object): def unzip(files, force=False): """ Unzip a list of files - :param files: files to unzip - :type files: list | str - :param force: if True, it will overwrite unzipped files - :type force: bool + + files: str or iterable of str + force: bool, optional + if True, it will overwrite unzipped files + """ # noinspection PyTypeChecker if isinstance(files, six.string_types): @@ -723,16 +873,13 @@ class Utils(object): raise Utils.UnzipException('Can not unzip {0}: {1}'.format(filepath, ex)) class UnzipException(Exception): - """ - Excpetion raised when unzip fails - """ + """Exception raised when unzip fails""" + pass class TempFile(object): - """ - Class to manage temporal files - """ + """Class to manage temporal files""" autoclean = True """ @@ -754,7 +901,7 @@ class TempFile(object): @staticmethod def get(filename=None, clean=None, suffix='.nc'): """ - Gets a new temporal filename, storing it for automated cleaning + Get a new temporal filename, storing it for automated cleaning :param suffix: :param filename: if it is not none, the function will use this filename instead of a random one @@ -770,9 +917,9 @@ class TempFile(object): if filename: path = os.path.join(TempFile.scratch_folder, filename) else: - fd, path = tempfile.mkstemp(dir=TempFile.scratch_folder, prefix=TempFile.prefix, suffix=suffix) + file_descriptor, path = tempfile.mkstemp(dir=TempFile.scratch_folder, prefix=TempFile.prefix, suffix=suffix) path = str(path) - os.close(fd) + os.close(file_descriptor) if clean: TempFile.files.append(path) @@ -781,11 +928,8 @@ class TempFile(object): @staticmethod def clean(): - """ - Removes all temporary files created with Tempfile until now - """ + """Remove all temporary files created with Tempfile until now""" for temp_file in TempFile.files: if os.path.exists(temp_file): os.remove(temp_file) TempFile.files = list() - diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index 588d18e24c9338f925fd481de9797f468daaef9c..fee14f52f230571f751303649bf6a5fe55edec50 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -1,4 +1,5 @@ # coding=utf-8 +"""Classes to manage variable definitions and aliases""" import csv import glob import json @@ -10,22 +11,32 @@ from bscearth.utils.log import Log from earthdiagnostics.constants import Basins from earthdiagnostics.frequency import Frequency from earthdiagnostics.modelingrealm import ModelingRealms +from concurrent.futures import ThreadPoolExecutor class VariableJsonException(Exception): + """Exception to be raised when an error related to the json reading is encountered""" + pass class VariableManager(object): + """Class for translating variable alias into standard names and provide the correct description for them""" + def __init__(self): self._cmor_tables_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cmor_tables') self._aliases_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'variable_alias') + self.clean() + + def clean(self): + """Clean all information contained in the variable manager""" self._dict_variables = {} + self._dict_aliases = {} self.tables = {} def get_variable(self, original_name, silent=False): """ - Returns the cmor variable instance given a variable name + Return the cmor variable instance given a variable name :param original_name: original variable's name :type original_name: str @@ -43,7 +54,7 @@ class VariableManager(object): def get_all_variables(self): """ - Returns all variables + Return all variables :return: CMOR variable list :rtype: set[Variable] @@ -53,7 +64,7 @@ class VariableManager(object): def get_variable_and_alias(self, original_name, silent=False): """ - Returns the cmor variable instance given a variable name + Return the cmor variable instance given a variable name :param original_name: original variable's name :type original_name: str @@ -71,13 +82,18 @@ class VariableManager(object): def load_variables(self, table_name): """ - Loads the CMOR csv and creates the variables dictionary + Load the CMOR csv and creates the variables dictionary + + Parameters + ---------- + table_name: str + """ self._dict_variables = dict() self._load_variable_list(table_name) self._load_missing_defaults() self._load_known_aliases(table_name) - self._construct_aliases_dict() + self.create_aliases_dict() def _load_variable_list(self, table_name): @@ -107,7 +123,7 @@ class VariableManager(object): return json_folder def _load_file(self, csv_table_path, default=False): - with open(self._get_csv_path(csv_table_path), 'rb') as csvfile: + with open(self._get_csv_path(csv_table_path), 'r') as csvfile: reader = csv.reader(csvfile, dialect='excel') for line in reader: if line[0] == 'Variable': @@ -119,19 +135,37 @@ class VariableManager(object): continue var.default = default - self._dict_variables[var.short_name.lower()] = var + self.register_variable(var) + + def register_variable(self, var): + """ + Register variable info + + Parameters + ---------- + var: Variable + + """ + self._dict_variables[var.short_name.lower()] = var def _load_json(self, json_folder): + executor = ThreadPoolExecutor() for file_name in os.listdir(json_folder): if file_name in ('CMIP6_grids.json', 'CMIP6_formula_terms.json'): continue - json_data = open(os.path.join(json_folder, file_name)).read() + + executor.submit(self._load_json_file, os.path.join(json_folder, file_name)) + executor.shutdown(True) + + def _load_json_file(self, json_path): + with open(json_path) as json_file: + json_data = json_file.read() try: data = json.loads(json_data) except ValueError: - continue + return if 'variable_entry' in data: - Log.debug('Parsing file {0}'.format(file_name)) + Log.debug('Parsing file {0}'.format(json_path)) table_id = data['Header']['table_id'][6:] table = CMORTable(table_id, Frequency(data['Header']['frequency']), data['Header']['table_date']) self.tables[table_id] = table @@ -149,7 +183,7 @@ class VariableManager(object): try: variable.parse_json(json_data[short_name], short_name) variable.add_table(table) - self._dict_variables[variable.short_name.lower()] = variable + self.register_variable(variable) except VariableJsonException: Log.error('Could not read variable {0}'.format(short_name)) @@ -159,18 +193,14 @@ class VariableManager(object): def _load_alias_csv(self, filename): file_path = self._get_aliases_csv_path(filename) - if not os.path.isfile(file_path): - return - with open(file_path, 'rb') as csvfile: + with open(file_path, 'r') as csvfile: reader = csv.reader(csvfile, dialect='excel') for line in reader: if line[0] == 'Aliases': continue - aliases = line[0].split(':') - if line[1] not in aliases: - aliases.append(line[1]) + aliases = self._get_aliases(line) cmor_vars = [] for alias in aliases: @@ -191,30 +221,42 @@ class VariableManager(object): Log.error('Aliases {0} can be be mapped to multiple variables ' '[{1}]'.format(aliases, ', '.join(map(str, cmor_vars)))) continue + cmor_var = cmor_vars[0] + self._register_aliases(aliases, cmor_var, line) - for alias in aliases: - if alias != cmor_var.short_name and alias in self._dict_variables: - Log.error('Alias {0} for variable {1} is already a different ' - 'variable!'.format(alias, cmor_var.short_name)) - continue - alias_object = VariableAlias(alias) - if line[2]: - alias_object.basin = Basins().parse(line[2]) - if line[3]: - alias_object.grid = line[3] - cmor_var.known_aliases.append(alias_object) + @staticmethod + def _get_aliases(line): + aliases = line[0].split(':') + if line[1] not in aliases: + aliases.append(line[1]) + return aliases + + def _register_aliases(self, aliases, cmor_var, line): + for alias in aliases: + if alias != cmor_var.short_name and alias in self._dict_variables: + Log.error('Alias {0} for variable {1} is already a different ' + 'variable!'.format(alias, cmor_var.short_name)) + continue + alias_object = VariableAlias(alias) + if line[2]: + alias_object.basin = Basins().parse(line[2]) + if line[3]: + alias_object.grid = line[3] + cmor_var.known_aliases.append(alias_object) def _get_aliases_csv_path(self, filename): csv_table_path = os.path.join(self._aliases_folder, '{0}.csv'.format(filename)) return csv_table_path - def _construct_aliases_dict(self): + def create_aliases_dict(self): + """Create aliases dictionary for the registered variables""" self._dict_aliases = {} for cmor_var_name in self._dict_variables: cmor_var = self._dict_variables[cmor_var_name] - if cmor_var_name not in cmor_var.known_aliases: - cmor_var.known_aliases.append(VariableAlias(cmor_var_name)) + base_alias = VariableAlias(cmor_var_name) + if base_alias not in cmor_var.known_aliases: + cmor_var.known_aliases.append(base_alias) for alias in cmor_var.known_aliases: self._dict_aliases[alias.alias] = (alias, cmor_var) @@ -237,48 +279,49 @@ class VariableManager(object): for row in data_sheet.rows: if row[1].value in excel.sheetnames: table_data[row[1].value] = (Frequency(row[2].value), 'Date missing') - for sheet_name in excel.sheetnames: - try: - sheet = excel.get_sheet_by_name(sheet_name) - if sheet.title == 'Primday': - pass - if sheet['A1'].value != 'Priority': - continue - table_frequency, table_date = table_data[sheet.title] - table = CMORTable(sheet.title, table_frequency, table_date) - self.tables[sheet.title] = table - for row in sheet.rows: - if row[0].value == 'Priority' or not row[5].value: - continue - - cmor_name = row[11].value - if not cmor_name: - cmor_name = row[5].value - - priority = int(row[0].value) - bsc_commitment = row[30].value - if bsc_commitment is not None and bsc_commitment.strip().lower() == 'false': - priority = priority + 3 - - if cmor_name.lower() in self._dict_variables: - var = self._dict_variables[cmor_name.lower()] - else: - var = Variable() - var.short_name = cmor_name - var.standard_name = row[6].value - var.long_name = row[1].value + sheet = excel.get_sheet_by_name(sheet_name) + if sheet.title == 'Primday': + pass + if sheet['A1'].value != 'Priority': + continue + self._load_xlsx_table(sheet, table_data) - var.domain = self._process_modelling_realm(var, row[12].value) + def _load_xlsx_table(self, sheet, table_data): + try: + table_frequency, table_date = table_data[sheet.title] + table = CMORTable(sheet.title, table_frequency, table_date) + self.tables[sheet.title] = table + for row in sheet.rows: + if row[0].value == 'Priority' or not row[5].value: + continue + self._parse_xlsx_var_row(row, table) + except Exception as ex: + Log.error('Table {0} can not be loaded: {1}', sheet.title, ex) + import traceback + traceback.print_exc() + + def _parse_xlsx_var_row(self, row, table): + cmor_name = row[11].value + if not cmor_name: + cmor_name = row[5].value + priority = int(row[0].value) + bsc_commitment = row[30].value + if bsc_commitment is not None and bsc_commitment.strip().lower() == 'false': + priority = priority + 3 + if cmor_name.lower() in self._dict_variables: + var = self._dict_variables[cmor_name.lower()] + else: + var = Variable() + var.short_name = cmor_name + var.standard_name = row[6].value + var.long_name = row[1].value - var.units = row[2].value - self._dict_variables[var.short_name.lower()] = var - var.add_table(table, priority) + var.domain = self._process_modelling_realm(var, row[12].value) - except Exception as ex: - Log.error('Table {0} can not be loaded: {1}', sheet_name, ex) - import traceback - traceback.print_exc() + var.units = row[2].value + self.register_variable(var) + var.add_table(table, priority) @staticmethod def _process_modelling_realm(var, value): @@ -293,7 +336,9 @@ class VariableManager(object): class Variable(object): """ - Class to characterize a CMOR variable. It also contains the static method to make the match between thje original + Class to characterize a CMOR variable. + + It also contains the static method to make the match between the original name and the standard name. Requires data _convetion to be available in cmor_tables to work. """ @@ -317,14 +362,34 @@ class Variable(object): self.tables = [] def add_table(self, table, priority=None): + """ + Add table to variable + + Parameters + ---------- + table: CMORTable + priority: int or None, optional + + """ self.tables.append((table, priority)) - def parse_json(self, json_var, key): + def parse_json(self, json_var, variable): + """ + Parse variable json + + Parameters + ---------- + json_var: dict of str: str + variable: str + + Returns + ------- + """ if 'out_name' in json_var: self.short_name = json_var['out_name'].strip() else: - raise VariableJsonException('Variable has no out name defined'.format(key)) + raise VariableJsonException('Variable {0} has no out name defined'.format(variable)) self.standard_name = json_var['standard_name'].strip() self.long_name = json_var['long_name'].strip() @@ -342,6 +407,18 @@ class Variable(object): self.priority = 1 def get_modelling_realm(self, domains): + """ + Get var modelling realm + + Parameters + ---------- + domains: iterable of str + + Returns + ------- + ModelingRealm or None + + """ if len(domains) > 1: Log.warning('Multiple modeling realms assigned to variable {0}: {1}. ', self, domains) parsed = [] @@ -364,6 +441,14 @@ class Variable(object): return ModelingRealms.parse(domains[0]) def parse_csv(self, var_line): + """ + Fill the object information from a csv line + + Parameters + ---------- + var_line: list of str + + """ self.short_name = var_line[1].strip() self.standard_name = var_line[2].strip() self.long_name = var_line[3].strip() @@ -375,7 +460,27 @@ class Variable(object): self.grid = var_line[9].strip() def get_table(self, frequency, data_convention): - for table, priority in self.tables: + """ + Get a table object given the frequency and data_covention + + If the variable does not contain the table information, it uses the domain to make a guess + + Parameters + ---------- + frequency: Frequency + data_convention: str + + Returns + ------- + CMORTable + + Raises + ------ + ValueError + If a table can not be deduced from the given parameters + + """ + for table, _ in self.tables: if table.frequency == frequency: return table if self.domain: @@ -403,14 +508,21 @@ class Variable(object): class VariableAlias(object): """ - Class to characterize a CMOR variable. It also contains the static method to make the match between thje original + Class to characterize a CMOR variable. + + It also contains the static method to make the match between the original name and the standard name. Requires data _convetion to be available in cmor_tables to work. + + Parameters + ---------- + alias: str + """ - def __init__(self, alias): + def __init__(self, alias, basin=None, grid=None): self.alias = alias - self.basin = None - self.grid = None + self.basin = basin + self.grid = grid def __str__(self): string = self.alias @@ -420,21 +532,53 @@ class VariableAlias(object): string += ' Grid: {0}'.format(self.grid) return string + def __eq__(self, other): + if other is None: + return False + return self.alias == other.alias and self.grid == other.grid and self.basin == other.basin + + def __ne__(self, other): + return not self == other + class CMORTable(object): + """ + Class to represent a CMOR table + + Parameters + ---------- + name: str + frequency: Frequency + date: str + """ + def __init__(self, name, frequency, date): self.name = name - self.frequency = Frequency.parse(frequency) + self.frequency = frequency self.date = date def __str__(self): return self.name def __repr__(self): - return '{0.name} ({0.frequency}, {0.date}'.format(self) + return '{0.name} ({0.frequency}, {0.date})'.format(self) def __lt__(self, other): return self.name < other.name +class VariableType(object): + """Enumeration of variable types""" + + MEAN = 1 + STATISTIC = 2 + @staticmethod + def to_str(vartype): + """Get str representation of vartype for the folder convention""" + if vartype == VariableType.MEAN: + return 'mean' + elif vartype == VariableType.STATISTIC: + return 'statistics' + else: + raise ValueError('Variable type {0} not supported'.format(vartype)) diff --git a/earthdiagnostics/variable_alias/cmip6.csv b/earthdiagnostics/variable_alias/cmip6.csv index e6c8a679f5ea3fc1969546c0f8a6de6d71769da4..62d12675ef58727d30e1f4709a216b96e5c596fa 100644 --- a/earthdiagnostics/variable_alias/cmip6.csv +++ b/earthdiagnostics/variable_alias/cmip6.csv @@ -78,4 +78,3 @@ hflx_rain_cea,hfrainds,, hflx_cal_cea,hfibthermds2d,, rain,prra,, calving,ficeberg2d,, - diff --git a/earthdiagnostics/variable_alias/meteofrance.csv b/earthdiagnostics/variable_alias/meteofrance.csv new file mode 100644 index 0000000000000000000000000000000000000000..abb99d5efa983b044037db11656d46a16a2934b3 --- /dev/null +++ b/earthdiagnostics/variable_alias/meteofrance.csv @@ -0,0 +1,4 @@ +Aliases,Shortname,Basin,Grid +iiceconc:siconc:soicecov:ileadfra,soicecov,, +ci,sic,,ifs +es,sbl,, diff --git a/earthdiagnostics/variable_alias/primavera.csv b/earthdiagnostics/variable_alias/primavera.csv index db38cd5c1c7448696e8583c0d94739cce1790e54..c64d2303eeda50a2316f2e7a08ca5907cb694a49 100644 --- a/earthdiagnostics/variable_alias/primavera.csv +++ b/earthdiagnostics/variable_alias/primavera.csv @@ -7,3 +7,81 @@ hflx_rain_cea,hfrainds,, hflx_cal_cea,hfibthermds2d,, rain,prra,, calving,ficeberg2d,, +alk,talk,, +oxygen,o2,, +calcite,calc,, +po4,po4,, +poc,poc,, +silicate,si,, +nanophy,nanophy,, +microzoo,zmicro,, +doc,dissoc,, +diaphy,phydiat,, +mesozoo,zmeso,, +dsi,dsi,, +dissfe,dfe,, +bfe,bfe,, +goc,goc,, +sfe,sfe,, +dfe,dfe,, +micrzoo,zmicro,, +nfe,nfe,, +nchl,nchl,, +dchl,chldiat,, +nitrate,no3,, +ammonium,nh4,, +pno3tot,pno3tot,, +psiltot,psiltot,, +palktot,palktot,, +pfertot,pfertot,, +tcflx,tcflx,, +tcflxcum,tcflxcum,, +c-export,c-export,, +tintpp,tintpp,, +tnfix,tnfix,, +tdenit,tdenit,, +intppnew,intppnew,, +inttppnew,inttppnew,, +inttpbfe,pbfe,, +intdic,intdic,, +o2min,o2min,, +zo2min,zo2min,, +intnfix,intpn2,, +intppphy,intppphy,, +intppphy2,intppdiat,, +ppphy ,ppphy ,, +ppphy2 ,pdi,, +intpp,intpp,, +inttpp,inttpp,, +intpbfe,intpbfe,, +intpbsi,intpbsi,, +intpbcal,intpbcal,, +cflx,cflx,, +remin,remin,, +denit,denit,, +nfix,nfix,, +sdenit,sdenit,, +dpco2,dpco2,, +epc100,epc100,, +expc,expc,, +par,par,, +lnnut,lnnut,, +ldnut,ldnut,, +lnfe,lnfe,, +ldfe,limfediat,, +lnlight,lnlight,, +ldlight,ldlight,, +graz1,graz1,, +graz2,graz2,, +mumax,mumax,, +mun,mun,, +mud,mud,, +ppnewn,ppnewn,, +ppnewd,ppnewd,, +alb_ice,sialb,, +qsr3d,rsdo,, +hflx_rnf_cea,hfrunoffds2d,, +hflx_rain_cea,hfrainds,, +hflx_cal_cea,hfibthermds2d,, +rain,prra,, +calving,ficeberg2d,, diff --git a/earthdiagnostics/variable_type.py b/earthdiagnostics/variable_type.py deleted file mode 100644 index 4b3f17daa0b515582f1870cbbc7f05dacfa3379a..0000000000000000000000000000000000000000 --- a/earthdiagnostics/variable_type.py +++ /dev/null @@ -1,13 +0,0 @@ -# coding=utf-8 -class VariableType(object): - MEAN = 1 - STATISTIC = 2 - - @staticmethod - def to_str(vartype): - if vartype == VariableType.MEAN: - return 'mean' - elif vartype == VariableType.STATISTIC: - return 'statistics' - else: - raise ValueError('Variable type {0} not supported'.format(vartype)) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 398486a4474974d5fa0b2d1240eca44e792961a8..385a5525e2c4cac75a510964b5d5c8d474525ca2 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -1,24 +1,31 @@ # coding=utf-8 +"""Earthdiagnostics workflow manager""" import datetime import operator import sys import threading import time import traceback - -from bscearth.utils.log import Log # noinspection PyCompatibility from concurrent.futures import ThreadPoolExecutor +from functools import cmp_to_key + +from bscearth.utils.log import Log from earthdiagnostics.datafile import StorageStatus, LocalStatus from earthdiagnostics.diagnostic import DiagnosticStatus, Diagnostic, DiagnosticOptionError -from earthdiagnostics.general import * -from earthdiagnostics.ocean import * -from earthdiagnostics.statistics import * from earthdiagnostics.utils import Utils, TempFile class WorkManager(object): + """ + Class to produce and control the workflow of EarthDiagnostics + + Parameters + ---------- + config: Config + data_manager: DataManager + """ def __init__(self, config, data_manager): self.jobs = None @@ -28,6 +35,7 @@ class WorkManager(object): self.data_manager = data_manager def prepare_job_list(self): + """Create the list of jobs to run""" self._register_diagnostics() list_jobs = list() for fulldiag in self.config.get_commands(): @@ -40,7 +48,8 @@ class WorkManager(object): for job in diag_class.generate_jobs(self, diag_options): list_jobs.append(job) for subjob in job.subjobs: - list_jobs.append(subjob) + if subjob not in list_jobs: + list_jobs.append(subjob) continue except DiagnosticOptionError as ex: Log.error('Can not configure diagnostic {0}: {1}', diag_options[0], ex) @@ -51,10 +60,18 @@ class WorkManager(object): self.jobs = list_jobs def run(self): + """ + Run all the diagnostics + + Returns + ------- + bool + Only True if all diagnostic were correctly executed + """ if len(self.jobs) == 0: Log.result('No diagnostics to run') return True - + start_time = datetime.datetime.now() Log.info("Starting to compute at {0}", start_time) self.threads = Utils.available_cpu_count() @@ -98,8 +115,8 @@ class WorkManager(object): finish_time = datetime.datetime.now() Log.result("Diagnostics finished at {0}", finish_time) Log.result("Elapsed time: {0}\n", finish_time - start_time) - self.print_errors() - self.print_stats() + self._print_errors() + self._print_stats() return not self.had_errors def _job_status_changed(self, job): @@ -110,7 +127,7 @@ class WorkManager(object): del self.data_manager.requested_files[request.remote_file] request.unsubscribe(self) request.clean_local() - self.check_completion() + self._check_completion() def _file_object_status_changed(self, file_object): Log.debug('Checking file {0}. Local status {0.local_status} Storage status{0.storage_status}', file_object) @@ -127,42 +144,54 @@ class WorkManager(object): del self.data_manager.requested_files[file_object.remote_file] file_object.unsubscribe(self) file_object.clean_local() - self.check_completion() + self._check_completion() - def check_completion(self): - counter = 0 - for job in self.jobs: - if job.status == DiagnosticStatus.READY: - counter += 1 - if counter > 20: - break + def _check_completion(self): + self._pause_downloader_if_required() - self.downloader.on_hold = counter > 20 + if self._jobs_running_or_ready(): + return False + if self._data_downloading_or_uploading(): + return False + + try: + self.lock.release() + except threading.ThreadError: + pass + return True + def _jobs_running_or_ready(self): for job in self.jobs: if job.status in (DiagnosticStatus.READY, DiagnosticStatus.RUNNING): - return False + return True if job.status == DiagnosticStatus.WAITING: if job.all_requests_in_storage(): - return False + return True + return False + def _data_downloading_or_uploading(self): for request in self.data_manager.requested_files.values(): if request.storage_status == StorageStatus.UPLOADING: - return False + return True if request.local_status == LocalStatus.DOWNLOADING: - return False + return True if request.upload_required(): - return False + return True if request.download_required(): - return False - try: - self.lock.release() - except Exception: - pass - return True + return True + return False + + def _pause_downloader_if_required(self): + counter = 0 + for job in self.jobs: + if job.status == DiagnosticStatus.READY: + counter += 1 + if counter > 20: + break + self.downloader.on_hold = counter > 20 - def print_stats(self): + def _print_stats(self): Log.info('Time consumed by each diagnostic class') Log.info('--------------------------------------') @@ -177,7 +206,7 @@ class WorkManager(object): for diag in sorted(times, key=operator.itemgetter(1)): Log.info('{0:23} {1:}', diag, times[diag]) - def print_errors(self): + def _print_errors(self): failed = [job for job in self.jobs if job.status == DiagnosticStatus.FAILED] if len(failed) == 0: return @@ -191,20 +220,20 @@ class WorkManager(object): @staticmethod def _run_job(job): - time = datetime.datetime.now() + start_time = datetime.datetime.now() try: Log.info('Starting {0}', job) job.status = DiagnosticStatus.RUNNING job.compute() except Exception as ex: - job.consumed_time = datetime.datetime.now() - time - exc_type, exc_value, exc_traceback = sys.exc_info() + job.consumed_time = datetime.datetime.now() - start_time + exc_type, _, exc_traceback = sys.exc_info() job.message = '{0}\n{1}'.format(ex, ''.join(traceback.format_tb(exc_traceback))) - Log.error('Job {0} failed: {1}', job, job.message ) + Log.error('Job {0} failed ({2}): {1}', job, job.message, exc_type) job.status = DiagnosticStatus.FAILED return False - job.consumed_time = datetime.datetime.now() - time + job.consumed_time = datetime.datetime.now() - start_time Log.result('Finished {0}', job) job.status = DiagnosticStatus.COMPLETED return True @@ -219,6 +248,11 @@ class WorkManager(object): @staticmethod def _register_stats_diagnostics(): + from earthdiagnostics.statistics.monthlypercentile import MonthlyPercentile + from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalPercentile + from earthdiagnostics.statistics.daysoverpercentile import DaysOverPercentile + from earthdiagnostics.statistics.discretize import Discretize + Diagnostic.register(MonthlyPercentile) Diagnostic.register(ClimatologicalPercentile) Diagnostic.register(DaysOverPercentile) @@ -226,6 +260,16 @@ class WorkManager(object): @staticmethod def _register_general_diagnostics(): + from earthdiagnostics.general.attribute import Attribute + from earthdiagnostics.general.timemean import DailyMean, MonthlyMean, YearlyMean + from earthdiagnostics.general.module import Module + from earthdiagnostics.general.rewrite import Rewrite + from earthdiagnostics.general.relink import Relink + from earthdiagnostics.general.relinkall import RelinkAll + from earthdiagnostics.general.scale import Scale + from earthdiagnostics.general.verticalmeanmetersiris import VerticalMeanMetersIris + from earthdiagnostics.general.simplify_dimensions import SimplifyDimensions + Diagnostic.register(DailyMean) Diagnostic.register(MonthlyMean) Diagnostic.register(YearlyMean) @@ -240,6 +284,28 @@ class WorkManager(object): @staticmethod def _register_ocean_diagnostics(): + from earthdiagnostics.ocean.mixedlayerheatcontent import MixedLayerHeatContent + from earthdiagnostics.ocean.mixedlayersaltcontent import MixedLayerSaltContent + from earthdiagnostics.ocean.siasiesiv import Siasiesiv + from earthdiagnostics.ocean.verticalmean import VerticalMean + from earthdiagnostics.ocean.verticalmeanmeters import VerticalMeanMeters + from earthdiagnostics.ocean.verticalgradient import VerticalGradient + from earthdiagnostics.ocean.interpolate import Interpolate + from earthdiagnostics.ocean.interpolatecdo import InterpolateCDO + from earthdiagnostics.ocean.moc import Moc + from earthdiagnostics.ocean.areamoc import AreaMoc + from earthdiagnostics.ocean.maxmoc import MaxMoc + from earthdiagnostics.ocean.psi import Psi + from earthdiagnostics.ocean.gyres import Gyres + from earthdiagnostics.ocean.convectionsites import ConvectionSites + from earthdiagnostics.ocean.cutsection import CutSection + from earthdiagnostics.ocean.averagesection import AverageSection + from earthdiagnostics.ocean.heatcontentlayer import HeatContentLayer + from earthdiagnostics.ocean.heatcontent import HeatContent + from earthdiagnostics.ocean.regionmean import RegionMean + from earthdiagnostics.ocean.regionsum import RegionSum + from earthdiagnostics.ocean.rotation import Rotation + Diagnostic.register(MixedLayerSaltContent) Diagnostic.register(Siasiesiv) Diagnostic.register(VerticalMean) @@ -258,11 +324,18 @@ class WorkManager(object): Diagnostic.register(HeatContentLayer) Diagnostic.register(HeatContent) Diagnostic.register(RegionMean) + Diagnostic.register(RegionSum) Diagnostic.register(Rotation) Diagnostic.register(VerticalGradient) class Downloader(object): + """ + Download manager for EarthDiagnostics + + We are not using a ThreadPoolExecutor because we want to be able to control priorities in the download + """ + def __init__(self): self._downloads = [] self._lock = threading.Lock() @@ -270,46 +343,18 @@ class Downloader(object): self.on_hold = False def start(self): - self._thread = threading.Thread(target=self.downloader) + """Create the downloader thread and initialize it""" + self._thread = threading.Thread(target=self._downloader) self._thread.start() def submit(self, datafile): + """Add a datafile to the download queue""" self._lock.acquire() self._downloads.append(datafile) self._lock.release() - def downloader(self): + def _downloader(self): try: - def suscribers_waiting(datafile): - waiting = 0 - for diag in datafile.suscribers: - if not isinstance(diag, Diagnostic): - continue - if diag.pending_requests() == 1: - waiting += 1 - return waiting - - def prioritize(datafile1, datafile2): - waiting = suscribers_waiting(datafile1) - suscribers_waiting(datafile2) - if waiting: - return -waiting - - suscribers = len(datafile1.suscribers) - len(datafile2.suscribers) - if suscribers: - return -suscribers - - if datafile1.size is None: - if datafile2.size is None: - return 0 - else: - return -1 - elif datafile2.size is None: - return 1 - size = datafile1.size - datafile2.size - if size: - return -size - return 0 - while True: with self._lock: if len(self._downloads) == 0 or self.on_hold: @@ -317,14 +362,46 @@ class Downloader(object): return time.sleep(0.01) continue - self._downloads.sort(prioritize) + self._downloads.sort(key=cmp_to_key(Downloader._prioritize)) datafile = self._downloads[0] self._downloads.remove(datafile) datafile.download() except Exception as ex: Log.critical('Unhandled error at downloader: {0}\n{1}', ex, traceback.print_exc()) + @staticmethod + def _suscribers_waiting(datafile): + waiting = 0 + for diag in datafile.suscribers: + if not isinstance(diag, Diagnostic): + continue + if diag.pending_requests() == 1: + waiting += 1 + return waiting + + @staticmethod + def _prioritize(datafile1, datafile2): + waiting = Downloader._suscribers_waiting(datafile1) - Downloader._suscribers_waiting(datafile2) + if waiting: + return -waiting + + suscribers = len(datafile1.suscribers) - len(datafile2.suscribers) + if suscribers: + return -suscribers + + if datafile1.size is None: + if datafile2.size is None: + return 0 + else: + return -1 + elif datafile2.size is None: + return 1 + size = datafile1.size - datafile2.size + if size: + return -size + return 0 + def shutdown(self): + """Stop the downloader after all downloads have finished""" self.stop = True self._thread.join() - diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..14d620055e705e0214f91ecc4d3b850a6bf96835 --- /dev/null +++ b/environment.yml @@ -0,0 +1,30 @@ +--- + +name: earthdiagnostics +channels: +- conda-forge + +dependencies: +- iris +- netcdf4 +- numpy +- cdo +- nco +- python-cdo +- coverage +- pygrib +- psutil +- six +- cf_units +- openpyxl +- mock +- cmake +- coverage + +- pip: + - bscearth.utils + - futures + - nco + - exrex + - xxhash + - codacy-coverage diff --git a/fix.py b/fix.py deleted file mode 100644 index b5444a530a0369baa12eb15ea9c03409abd817f9..0000000000000000000000000000000000000000 --- a/fix.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import shutil - -base_path = '/esarchive/exp/PREFACE/ctrl/cmorfiles/Cerfacs' -for experiment in os.listdir(base_path): - exp_path = os.path.join(base_path, experiment) - if not os.path.isdir(exp_path): - continue - for startdate in os.listdir(exp_path): - if not os.path.isdir(os.path.join(exp_path, startdate)): - continue - for freq in os.listdir(os.path.join(exp_path, startdate)): - for domain in os.listdir(os.path.join(exp_path, startdate, freq)): - for var in os.listdir(os.path.join(exp_path, startdate, freq, domain)): - for member in os.listdir(os.path.join(exp_path, startdate, freq, domain, var)): - for version in os.listdir(os.path.join(exp_path, startdate, freq, domain, var, member)): - for filename in os.listdir(os.path.join(exp_path, startdate, freq, domain, var, member, version)): - print(os.path.join(exp_path, startdate, freq, domain, var, member, version, filename)) - print(os.path.join(exp_path, startdate, freq, domain, var, member, version, - filename.replace('_CNRM-CM-HR_', '_CNRM-CM-HR_{0}_'.format(experiment)))) - print('') - shutil.move(os.path.join(exp_path, startdate, freq, domain, var, member, version, filename), - os.path.join(exp_path, startdate, freq, domain, var, member, version, - filename.replace('_CNRM-CM-HR_', '_CNRM-CM-HR_{0}_'.format(experiment)))) - - - # original_tos_path = os.path.join(exp_path, startdate, 'mon/atmos/tos') - # if os.path.isdir(original_tos_path): - # new_tos_path = os.path.join(exp_path, startdate, 'mon/ocean') - # if not os.path.isdir(new_tos_path): - # os.makedirs(new_tos_path) - # shutil.move(original_tos_path, new_tos_path) \ No newline at end of file diff --git a/launch_diags.sh b/launch_diags.sh index 477a1ca4069f72efad559e9052a14227487b5318..df9a004b1826525927f211e3cb86959793db7736 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -1,25 +1,23 @@ #!/usr/bin/env bash -#SBATCH -n 4 +#SBATCH -n 1 #SBATCH --time 7-00:00:00 #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out -PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf -PATH_TO_DIAGNOSTICS=~jvegas/earthdiagnostics -PATH_TO_CONDAENV=diags +PATH_TO_CONF_FILE=~rfernand/run_earthdiagnostics/m04p/diags.conf +PATH_TO_DIAGNOSTICS=~jvegas/PycharmProjects/earthdiagnostics +PATH_TO_CONDAENV=/home/Earth/jvegas/.conda/envs/earthdiagnostics3/ module purge -module load NCO/4.5.4-foss-2015a -module load CDO/1.7.2-foss-2015a module load CDFTOOLS/3.0a8-foss-2015a module load Miniconda2 set -xv -source activate diags +source activate ${PATH_TO_CONDAENV} export PYTHONPATH=${PATH_TO_DIAGNOSTICS}:${PYTHONPATH} cd ${PATH_TO_DIAGNOSTICS}/earthdiagnostics/ diff --git a/patch.bash b/patch.bash deleted file mode 100755 index 60e7b8f885c9e7462391aee55c33bd030ec2869c..0000000000000000000000000000000000000000 --- a/patch.bash +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -o nounset -set -o errexit - -exp=a0fj -sd=19580101 -root=esarchive -mod=nemo - -if [ -d /${root}/exp/${mod}/${exp}/original_files ] -then - echo "folder already exists" - exit -fi - -mkdir -p /${root}/exp/${mod}/${exp}/original_files -mv /${root}/exp/${mod}/${exp}/${sd} /${root}/exp/${mod}/${exp}/original_files -ln -sf /${root}/exp/${mod}/${exp}/original_files/${sd} /${root}/exp/${mod}/${exp}/${sd} diff --git a/readthedocs.yml b/readthedocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..40c095421d2ee9cb3d07b654aac4caf5f4a8c532 --- /dev/null +++ b/readthedocs.yml @@ -0,0 +1,2 @@ +conda: + file: environment.yml \ No newline at end of file diff --git a/setup.py b/setup.py index 0e68a0892e2f5df5df0cd7afc2a14c8a7475dd62..81ea7ac88c192975f366114399a03b20da94823f 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # coding=utf-8 -""" -Installation script for EarthDiagnostics package -""" +"""Installation script for EarthDiagnostics package""" from os import path @@ -27,7 +25,7 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo>=1.3.4', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', - 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash', 'six', 'psutil', + 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'xxhash', 'six', 'psutil', 'exrex'], packages=find_packages(), include_package_data=True, diff --git a/test/run_test.py b/test/run_test.py index 59eec7e98a18694833f660a50431d2ea696de630..12614344898e370d45c3c0b4c2849bfe73ac8884 100644 --- a/test/run_test.py +++ b/test/run_test.py @@ -3,18 +3,23 @@ Script to run the tests for EarthDiagnostics and generate the code coverage report """ + +import os +import sys +work_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +os.chdir(work_path) +print(work_path) import coverage import unittest -import os -work_path = os.path.abspath('.') -source_path = os.path.join(work_path, '..', 'earthdiagnostics', '*') -print(source_path) -cov = coverage.Coverage(include=source_path) -cov.set_option("run:branch", True) -cov.set_option("html:title", 'Coverage report for EarthDiagnostics') +cov = coverage.Coverage() cov.start() -suite = unittest.TestLoader().discover('.') +if len(sys.argv) == 1: + suite = unittest.TestLoader().discover('.') +else: + suite = unittest.TestLoader().discover('.', pattern=sys.argv[1]) + + unittest.TextTestRunner(verbosity=2).run(suite) cov.stop() diff --git a/test/unit/general/test_attribute.py b/test/unit/general/test_attribute.py index da6d4146b090a3e529678a332f1ba9fe705a8eb1..5616b347327da016bd16340d9543c307e9c0a72b 100644 --- a/test/unit/general/test_attribute.py +++ b/test/unit/general/test_attribute.py @@ -50,5 +50,5 @@ class TestAttribute(TestCase): def test_str(self): mixed = Attribute(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 'att', 'value') - self.assertEquals(str(mixed), 'Write attributte output Startdate: 20010101 Member: 0 Chunk: 0 ' + self.assertEqual(str(mixed), 'Write attributte output Startdate: 20010101 Member: 0 Chunk: 0 ' 'Variable: atmos:var Attributte: att:value Grid: grid') diff --git a/test/unit/general/test_dailymean.py b/test/unit/general/test_dailymean.py index ec85d9dcea51e903b8be04b8f3c57b4a9769eb2b..8fa092c634374c89a8fc4f1eb060cee3aab1f2ec 100644 --- a/test/unit/general/test_dailymean.py +++ b/test/unit/general/test_dailymean.py @@ -1,12 +1,12 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption -from earthdiagnostics.box import Box -from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.general.dailymean import DailyMean from mock import Mock, patch +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.timemean import DailyMean from earthdiagnostics.modelingrealm import ModelingRealms @@ -51,5 +51,5 @@ class TestDailyMean(TestCase): def test_str(self): mixed = DailyMean(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', 'freq', '') - self.assertEquals(str(mixed), 'Calculate daily mean Startdate: 20000101 Member: 1 Chunk: 1 ' + self.assertEqual(str(mixed), 'Calculate daily mean Startdate: 20000101 Member: 1 Chunk: 1 ' 'Variable: ocean:var Original frequency: freq Grid: ') diff --git a/test/unit/general/test_module.py b/test/unit/general/test_module.py index e01ecf8c277db292cc8c786f399f473caa826229..218223b9f12e8ce9ecdc0f1613f5b698fe32f7dd 100644 --- a/test/unit/general/test_module.py +++ b/test/unit/general/test_module.py @@ -50,5 +50,5 @@ class TestModule(TestCase): def test_str(self): mixed = Module(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'varu', 'varv', 'varmodule', 'grid') - self.assertEquals(str(mixed), 'Calculate module Startdate: 20010101 Member: 0 Chunk: 0 ' + self.assertEqual(str(mixed), 'Calculate module Startdate: 20010101 Member: 0 Chunk: 0 ' 'Variables: atmos:varu,varv,varmodule Grid: grid') diff --git a/test/unit/general/test_monthlymean.py b/test/unit/general/test_monthlymean.py index b31561ac1fbfe9a9d8e40e80a9329b141cb27ed8..adc75cabdd11aa21322d7254d2aecb93df40cc3b 100644 --- a/test/unit/general/test_monthlymean.py +++ b/test/unit/general/test_monthlymean.py @@ -1,12 +1,12 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption -from earthdiagnostics.box import Box -from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.general.monthlymean import MonthlyMean from mock import Mock, patch +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.timemean import MonthlyMean from earthdiagnostics.modelingrealm import ModelingRealms @@ -30,14 +30,6 @@ class TestMonthlyMean(TestCase): @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): - - jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], MonthlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - Frequencies.daily, '')) - self.assertEqual(jobs[1], MonthlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - Frequencies.daily, '')) - jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'monthly']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], MonthlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', @@ -59,5 +51,5 @@ class TestMonthlyMean(TestCase): MonthlyMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Calculate monthly mean Startdate: 20000101 Member: 1 Chunk: 1 ' - 'Variable: ocean:var') + self.assertEqual(str(self.mixed), 'Calculate monthly mean Startdate: 20000101 Member: 1 Chunk: 1 ' + 'Variable: ocean:var Original frequency: freq Grid: ') diff --git a/test/unit/general/test_relink.py b/test/unit/general/test_relink.py index 835b0bcd6f944847430720abade1e52dc95357b3..970bb0ead44a43d94186e66ed979d12143f67d11 100644 --- a/test/unit/general/test_relink.py +++ b/test/unit/general/test_relink.py @@ -57,5 +57,5 @@ class TestRelink(TestCase): def test_str(self): mixed = Relink(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', True, 'grid') - self.assertEquals(str(mixed), 'Relink output Startdate: 20010101 Member: 0 Chunk: 0 Move old: True ' + self.assertEqual(str(mixed), 'Relink output Startdate: 20010101 Member: 0 Chunk: 0 Move old: True ' 'Variable: ocean:var Grid: grid') diff --git a/test/unit/general/test_relinkall.py b/test/unit/general/test_relinkall.py index cf8c9a163437d5d868401a54b6df62195d0e1f7a..22bd7612c8be3120d2fa0f569e832de2aca063ee 100644 --- a/test/unit/general/test_relinkall.py +++ b/test/unit/general/test_relinkall.py @@ -6,8 +6,6 @@ from earthdiagnostics.box import Box from earthdiagnostics.general.relinkall import RelinkAll from mock import Mock, patch -from earthdiagnostics.modelingrealm import ModelingRealms - class TestRelinkAll(TestCase): @@ -36,4 +34,4 @@ class TestRelinkAll(TestCase): def test_str(self): mixed = RelinkAll(self.data_manager, '20010101') - self.assertEquals(str(mixed), 'Relink all output Startdate: 20010101') + self.assertEqual(str(mixed), 'Relink all output Startdate: 20010101') diff --git a/test/unit/general/test_rewrite.py b/test/unit/general/test_rewrite.py index 202e2c39afddfaae8e23f3edd2be7ff3f74a336d..4a6fd038e538e97fd0e20ede8a214885f289dc15 100644 --- a/test/unit/general/test_rewrite.py +++ b/test/unit/general/test_rewrite.py @@ -47,5 +47,5 @@ class TestRewrite(TestCase): Rewrite.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Rewrite output Startdate: 20000101 Member: 1 Chunk: 1 ' + self.assertEqual(str(self.mixed), 'Rewrite output Startdate: 20000101 Member: 1 Chunk: 1 ' 'Variable: atmos:var Grid: grid') diff --git a/test/unit/general/test_scale.py b/test/unit/general/test_scale.py index e7697cc26076c55e7a8d50ea65031a74305d3ac0..23e1f7b7be192429b26cc07d0342f28cb34b3f74 100644 --- a/test/unit/general/test_scale.py +++ b/test/unit/general/test_scale.py @@ -1,12 +1,12 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError -from earthdiagnostics.box import Box -from earthdiagnostics.general.scale import Scale -from earthdiagnostics.frequency import Frequencies from mock import Mock, patch +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.scale import Scale from earthdiagnostics.modelingrealm import ModelingRealms @@ -32,40 +32,48 @@ class TestScale(TestCase): jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', '', - float('nan'), float('nan'), Frequencies.monthly)) + float('nan'), float('nan'), Frequencies.monthly, False)) self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', '', - float('nan'), float('nan'), Frequencies.monthly)) + float('nan'), float('nan'), Frequencies.monthly, False)) jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', - float('nan'), float('nan'), Frequencies.monthly)) + float('nan'), float('nan'), Frequencies.monthly, False)) self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', - float('nan'), float('nan'), Frequencies.monthly)) + float('nan'), float('nan'), Frequencies.monthly, False)) jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', - 0, 100, Frequencies.monthly)) + 0, 100, Frequencies.monthly, False)) self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', - 0, 100, Frequencies.monthly)) + 0, 100, Frequencies.monthly, False)) jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100', '3hr']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', - 0, 100, Frequencies.three_hourly)) + 0, 100, Frequencies.three_hourly, False)) + self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', + 0, 100, Frequencies.three_hourly, False)) + + jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100', '3hr', + True]) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', + 0, 100, Frequencies.three_hourly, True)) self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', - 0, 100, Frequencies.three_hourly)) + 0, 100, Frequencies.three_hourly, True)) with self.assertRaises(DiagnosticOptionError): Scale.generate_jobs(self.diags, ['diagnostic']) - with self.assertRaises(DiagnosticOptionError): - Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100', '3hr', - 'extra']) + with self.assertRaises(DiagnosticOptionError): + Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100', '3hr', 'True', + 'extra']) def test_str(self): mixed = Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', 0, 100, - Frequencies.three_hourly) - self.assertEquals(str(mixed), 'Scale output Startdate: 20010101 Member: 0 Chunk: 0 Scale value: 0 Offset: 0 ' - 'Variable: atmos:var Frequency: 3hr') + Frequencies.three_hourly, False) + self.assertEqual(str(mixed), 'Scale output Startdate: 20010101 Member: 0 Chunk: 0 Scale value: 0 Offset: 0 ' + 'Variable: atmos:var Frequency: 3hr Apply mask: False') diff --git a/test/unit/general/test_select_levels.py b/test/unit/general/test_select_levels.py index 32e7424da69948ecf986eb8295b3e4a0b03bb168..bca61711b2bba7669207baa4d962106cd1bdccdc 100644 --- a/test/unit/general/test_select_levels.py +++ b/test/unit/general/test_select_levels.py @@ -62,5 +62,5 @@ class TestSelectLevels(TestCase): def test_str(self): mixed = SelectLevels(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 0, 20) - self.assertEquals(str(mixed), 'Select levels Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + self.assertEqual(str(mixed), 'Select levels Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' 'Levels: 0-20 Grid: grid') diff --git a/test/unit/general/test_simplify_dimensions.py b/test/unit/general/test_simplify_dimensions.py index 429ad6f2344ed5d5dd4f67d38c89ed53b1ffd807..5a36288e1c878e7e1aa38bcff9863bdd68933b5a 100644 --- a/test/unit/general/test_simplify_dimensions.py +++ b/test/unit/general/test_simplify_dimensions.py @@ -19,6 +19,7 @@ class TestSimplifyDimensions(TestCase): self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) self.diags.config.experiment.startdates = ['20010101', ] self.diags.config.frequency = Frequencies.monthly + self.diags.config.data_convention = 'convention' self.box = Box() self.box.min_depth = 0 @@ -32,24 +33,24 @@ class TestSimplifyDimensions(TestCase): jobs = SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], SimplifyDimensions(self.data_manager, '20010101', 0, 0, - ModelingRealms.atmos, 'var', '')) + ModelingRealms.atmos, 'var', '', 'convention')) self.assertEqual(jobs[1], SimplifyDimensions(self.data_manager, '20010101', 0, 1, - ModelingRealms.atmos, 'var', '')) + ModelingRealms.atmos, 'var', '', 'convention')) jobs = SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'grid']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], SimplifyDimensions(self.data_manager, '20010101', 0, 0, - ModelingRealms.atmos, 'var', 'grid')) + ModelingRealms.atmos, 'var', 'grid', 'convention')) self.assertEqual(jobs[1], SimplifyDimensions(self.data_manager, '20010101', 0, 1, - ModelingRealms.atmos, 'var', 'grid')) + ModelingRealms.atmos, 'var', 'grid', 'convention')) with self.assertRaises(DiagnosticOptionError): SimplifyDimensions.generate_jobs(self.diags, ['diagnostic']) - with self.assertRaises(DiagnosticOptionError): - SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'grid', 'extra']) + with self.assertRaises(DiagnosticOptionError): + SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'grid', 'extra']) def test_str(self): - mixed = SimplifyDimensions(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid') - self.assertEquals(str(mixed), 'Simplify dimension Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + mixed = SimplifyDimensions(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 'convention') + self.assertEqual(str(mixed), 'Simplify dimension Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' 'Grid: grid') diff --git a/test/unit/general/test_verticalmeanmetersiris.py b/test/unit/general/test_verticalmeanmetersiris.py index cd2876fe9f2a62b91ee28616a5dee862b8f79dbf..ca972cac9a4ae91b57b6c10860530ce451f168ed 100644 --- a/test/unit/general/test_verticalmeanmetersiris.py +++ b/test/unit/general/test_verticalmeanmetersiris.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError +from earthdiagnostics.diagnostic import DiagnosticVariableListOption, DiagnosticOptionError from earthdiagnostics.box import Box from earthdiagnostics.general.verticalmeanmetersiris import VerticalMeanMetersIris from earthdiagnostics.frequency import Frequencies @@ -27,14 +27,14 @@ class TestVerticalMeanMetersIris(TestCase): def fake_parse(self, value): if not value: raise DiagnosticOptionError - return value + return [value] - @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) def test_generate_jobs(self): box = Box(True) - jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var']) + jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box)) @@ -45,14 +45,14 @@ class TestVerticalMeanMetersIris(TestCase): box.min_depth = 0 box.max_depth = 100 - jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100']) + jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', '0', '100']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box)) self.assertEqual(jobs[1], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', box)) - jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100', 'seaIce']) + jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'seaice', 'var', '0', '100']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, 'var', box)) @@ -63,12 +63,13 @@ class TestVerticalMeanMetersIris(TestCase): VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic']) with self.assertRaises(DiagnosticOptionError): - VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100', 'seaIce', 'extra']) + VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', '0', '100', 'seaIce', + 'extra']) def test_str(self): box = Box(True) box.min_depth = 0 box.max_depth = 100 mixed = VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', box) - self.assertEquals(str(mixed), 'Vertical mean meters Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + self.assertEqual(str(mixed), 'Vertical mean meters Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' 'Box: 0-100m') diff --git a/test/unit/general/test_yearlymean.py b/test/unit/general/test_yearlymean.py index dcf5ad75ea16b60f061c44d17e8eada524cdca56..36b48eaf8fca6a9ea1c83787c83db9342dc8ff38 100644 --- a/test/unit/general/test_yearlymean.py +++ b/test/unit/general/test_yearlymean.py @@ -1,12 +1,12 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption -from earthdiagnostics.box import Box -from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.general.yearlymean import YearlyMean from mock import Mock, patch +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.timemean import YearlyMean from earthdiagnostics.modelingrealm import ModelingRealms @@ -30,14 +30,6 @@ class TestYearlyMean(TestCase): @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): - - jobs = YearlyMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], YearlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - Frequencies.monthly, '')) - self.assertEqual(jobs[1], YearlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - Frequencies.monthly, '')) - jobs = YearlyMean.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'day']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], YearlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', @@ -59,5 +51,5 @@ class TestYearlyMean(TestCase): YearlyMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Calculate yearly mean Startdate: 20000101 Member: 1 Chunk: 1 ' + self.assertEqual(str(self.mixed), 'Calculate yearly mean Startdate: 20000101 Member: 1 Chunk: 1 ' 'Variable: ocean:var Original frequency: freq Grid: ') diff --git a/test/unit/ocean/test_areamoc.py b/test/unit/ocean/test_areamoc.py index c1361b8393676bc6fee3106ebf1bd451b34abd92..648ce675f8c77d943bd77f3849294b43ac84a4f6 100644 --- a/test/unit/ocean/test_areamoc.py +++ b/test/unit/ocean/test_areamoc.py @@ -53,5 +53,5 @@ class TestAreaMoc(TestCase): AreaMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.psi), 'Area MOC Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0 ' + self.assertEqual(str(self.psi), 'Area MOC Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0 ' 'Basin: Atlantic') diff --git a/test/unit/ocean/test_averagesection.py b/test/unit/ocean/test_averagesection.py index d3be4b2bba95b3f66515ee07af45eab51d42b1d5..a5905589db9c993aac9ae483973e8255edcefcd1 100644 --- a/test/unit/ocean/test_averagesection.py +++ b/test/unit/ocean/test_averagesection.py @@ -51,5 +51,5 @@ class TestAverageSection(TestCase): def test_str(self): diag = AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', self.box, 'grid') - self.assertEquals(str(diag), 'Average section Startdate: 20010101 Member: 0 Chunk: 0 Box: 0N0E ' + self.assertEqual(str(diag), 'Average section Startdate: 20010101 Member: 0 Chunk: 0 Box: 0N0E ' 'Variable: ocean:var Grid: grid') diff --git a/test/unit/ocean/test_convectionsites.py b/test/unit/ocean/test_convectionsites.py index 63710d054e6a4f937cb9c22e8e78d659c80f3c73..c9468b0880018ded6223864d2738aafeb30d0f26 100644 --- a/test/unit/ocean/test_convectionsites.py +++ b/test/unit/ocean/test_convectionsites.py @@ -25,4 +25,4 @@ class TestConvectionSites(TestCase): ConvectionSites.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.psi), 'Convection sites Startdate: 20000101 Member: 1 Chunk: 1') + self.assertEqual(str(self.psi), 'Convection sites Startdate: 20000101 Member: 1 Chunk: 1') diff --git a/test/unit/ocean/test_cutsection.py b/test/unit/ocean/test_cutsection.py index a658b06d5524c96b2e2bf4f73e32958fcffcef4d..e27102a0b1107e1f985e141cc8d39cf0383d32c9 100644 --- a/test/unit/ocean/test_cutsection.py +++ b/test/unit/ocean/test_cutsection.py @@ -51,5 +51,5 @@ class TestCutSection(TestCase): CutSection.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.psi), 'Cut section Startdate: 20000101 Member: 1 Chunk: 1 Variable: atmos:var ' + self.assertEqual(str(self.psi), 'Cut section Startdate: 20000101 Member: 1 Chunk: 1 Variable: atmos:var ' 'Zonal: True Value: 0') diff --git a/test/unit/ocean/test_gyres.py b/test/unit/ocean/test_gyres.py index 8a0255835c0373f317d9885864a2e5df14a10610..9d3a13d3c74df5fdd9ff0b19e5bb49816bd63820 100644 --- a/test/unit/ocean/test_gyres.py +++ b/test/unit/ocean/test_gyres.py @@ -26,4 +26,4 @@ class TestGyres(TestCase): Gyres.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.gyres), 'Gyres Startdate: 20000101 Member: 1 Chunk: 1 Model version: model_version') + self.assertEqual(str(self.gyres), 'Gyres Startdate: 20000101 Member: 1 Chunk: 1 Model version: model_version') diff --git a/test/unit/ocean/test_heatcontent.py b/test/unit/ocean/test_heatcontent.py index f842bc13c2dcedcdc0dc586216da4aabc7b9529f..a2387016bff607bf9f668ea4a8a0bed66ba69b42 100644 --- a/test/unit/ocean/test_heatcontent.py +++ b/test/unit/ocean/test_heatcontent.py @@ -44,5 +44,5 @@ class TestHeatContent(TestCase): def test_str(self): diag = HeatContent(self.data_manager, '20010101', 0, 0, Basins().Global, -1, self.box, 1, 20) - self.assertEquals(str(diag), 'Heat content Startdate: 20010101 Member: 0 Chunk: 0 Mixed layer: -1 Box: 0-100 ' + self.assertEqual(str(diag), 'Heat content Startdate: 20010101 Member: 0 Chunk: 0 Mixed layer: -1 Box: 0-100 ' 'Basin: Global') diff --git a/test/unit/ocean/test_heatcontentlayer.py b/test/unit/ocean/test_heatcontentlayer.py index bf8135c65ed2a39b633660b410e14874f058766c..9c1974057b3981002292ff9d2c8c0485d893af8e 100644 --- a/test/unit/ocean/test_heatcontentlayer.py +++ b/test/unit/ocean/test_heatcontentlayer.py @@ -22,4 +22,4 @@ class TestHeatContentLayer(TestCase): def test_str(self): diag = HeatContentLayer(self.data_manager, '20000101', 1, 1, self.box, self.weight, 0, 10) - self.assertEquals(str(diag), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0-100m') + self.assertEqual(str(diag), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0-100m') diff --git a/test/unit/ocean/test_interpolate.py b/test/unit/ocean/test_interpolate.py index 4d19949ff4b0d09a33057008119c9e14c33f7493..8bd284dc6f382dd6ebeeea2b23b5c88d21f83b94 100644 --- a/test/unit/ocean/test_interpolate.py +++ b/test/unit/ocean/test_interpolate.py @@ -71,6 +71,6 @@ class TestInterpolate(TestCase): def test_str(self): diag = Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 'model_version', True, 'original_grid') - self.assertEquals(str(diag), 'Interpolate Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + self.assertEqual(str(diag), 'Interpolate Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' 'Target grid: grid Invert lat: True Model: model_version ' 'Original grid: original_grid') diff --git a/test/unit/ocean/test_interpolatecdo.py b/test/unit/ocean/test_interpolatecdo.py index 04b085521407b34572a6edc95f238906de21b8b0..fccf47de39cee86b4a77ee18543246375ce2a2e2 100644 --- a/test/unit/ocean/test_interpolatecdo.py +++ b/test/unit/ocean/test_interpolatecdo.py @@ -24,10 +24,14 @@ class TestInterpolate(TestCase): raise DiagnosticOptionError return value.split('-') - @patch('earthdiagnostics.ocean.interpolatecdo.InterpolateCDO._compute_weights') + @patch('earthdiagnostics.ocean.interpolatecdo.InterpolateCDO.compute_weights') + @patch('earthdiagnostics.ocean.interpolatecdo.InterpolateCDO.get_sample_grid_file') @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) - def test_generate_jobs(self, mock_weights): + @patch('os.remove') + @patch('earthdiagnostics.utils.TempFile.get') + def test_generate_jobs(self, mock_weights, mock_grid_file, mock_remove, mock_get): mock_weights.return_value = None + mock_get.return_value = 'path_to_weights' jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var']) self.assertEqual(len(jobs), 2) @@ -84,6 +88,6 @@ class TestInterpolate(TestCase): def test_str(self): diag = InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'atmos_grid', 'model_version', False, 'orig', None) - self.assertEquals(str(diag), 'Interpolate with CDO Startdate: 20010101 Member: 0 Chunk: 0 Variable: ocean:var ' + self.assertEqual(str(diag), 'Interpolate with CDO Startdate: 20010101 Member: 0 Chunk: 0 Variable: ocean:var ' 'Target grid: atmos_grid Original grid: orig Mask ocean: False ' 'Model: model_version') diff --git a/test/unit/ocean/test_maskland.py b/test/unit/ocean/test_maskland.py index ede009114fda18e2582a6ce7ba7372520d436192..22f4994b02468d73c319642a3a59c0cb321b6393 100644 --- a/test/unit/ocean/test_maskland.py +++ b/test/unit/ocean/test_maskland.py @@ -62,4 +62,4 @@ class TestMaskLand(TestCase): def test_str(self): diag = MaskLand(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 't', 'grid') - self.assertEquals(str(diag), 'Land mask Startdate: 20010101 Member: 0 Chunk: 0 Variable: ocean:var Grid: grid') + self.assertEqual(str(diag), 'Land mask Startdate: 20010101 Member: 0 Chunk: 0 Variable: ocean:var Grid: grid') diff --git a/test/unit/ocean/test_maxmoc.py b/test/unit/ocean/test_maxmoc.py index a1141d01c047a4f00fa2a16f27a67b5327e70bf5..3224050ecb03509171c9497d29f4569b1e560866 100644 --- a/test/unit/ocean/test_maxmoc.py +++ b/test/unit/ocean/test_maxmoc.py @@ -63,5 +63,5 @@ class TestMaxMoc(TestCase): MaxMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.maxmoc), 'Max moc Startdate: 20000101 Member: 1 Year: 2000 ' + self.assertEqual(str(self.maxmoc), 'Max moc Startdate: 20000101 Member: 1 Year: 2000 ' 'Box: 0.0N0m Basin: Global') diff --git a/test/unit/ocean/test_mixedlayerheatcontent.py b/test/unit/ocean/test_mixedlayerheatcontent.py index b7cf564bc9cdfabe924327ea0602ea3bc6d84478..8d94bd8168f630d90dd9b925a40869c3b35b106b 100644 --- a/test/unit/ocean/test_mixedlayerheatcontent.py +++ b/test/unit/ocean/test_mixedlayerheatcontent.py @@ -26,4 +26,4 @@ class TestMixedLayerHeatContent(TestCase): MixedLayerHeatContent.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Mixed layer heat content Startdate: 20000101 Member: 1 Chunk: 1') + self.assertEqual(str(self.mixed), 'Mixed layer heat content Startdate: 20000101 Member: 1 Chunk: 1') diff --git a/test/unit/ocean/test_mixedlayersaltcontent.py b/test/unit/ocean/test_mixedlayersaltcontent.py index 7aa42f6691eff2d80c4290b1fa5d1505794543d2..9832e3f8c34895bbb202636821527eba0ffe5fb6 100644 --- a/test/unit/ocean/test_mixedlayersaltcontent.py +++ b/test/unit/ocean/test_mixedlayersaltcontent.py @@ -26,4 +26,4 @@ class TestMixedLayerSaltContent(TestCase): MixedLayerSaltContent.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Mixed layer salt content Startdate: 20000101 Member: 1 Chunk: 1') + self.assertEqual(str(self.mixed), 'Mixed layer salt content Startdate: 20000101 Member: 1 Chunk: 1') diff --git a/test/unit/ocean/test_moc.py b/test/unit/ocean/test_moc.py index 0c05f8c4fe7dd3632cc5b662aacda78c95dbfe95..f86f41d470fb0316d205e236a14000e7b5594352 100644 --- a/test/unit/ocean/test_moc.py +++ b/test/unit/ocean/test_moc.py @@ -26,4 +26,4 @@ class TestMoc(TestCase): Moc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'MOC Startdate: 20000101 Member: 1 Chunk: 1') + self.assertEqual(str(self.mixed), 'MOC Startdate: 20000101 Member: 1 Chunk: 1') diff --git a/test/unit/ocean/test_mxl.py b/test/unit/ocean/test_mxl.py index ead3fbbccfb2bfc40b6075089b8dc166326dcba8..5b63aeea20063b1898e36e194252415e81afe0f5 100644 --- a/test/unit/ocean/test_mxl.py +++ b/test/unit/ocean/test_mxl.py @@ -25,4 +25,4 @@ class TestMxl(TestCase): def test_str(self): diag = Mxl(self.data_manager, '20010101', 0, 0) - self.assertEquals(str(diag), 'Mixed layer Startdate: 20010101 Member: 0 Chunk: 0') + self.assertEqual(str(diag), 'Mixed layer Startdate: 20010101 Member: 0 Chunk: 0') diff --git a/test/unit/ocean/test_psi.py b/test/unit/ocean/test_psi.py index 019e9339ca281274d59ba15b07bf190ee9b457da..1bdf597ceeb5a8be9161d85e96487e85a48badf6 100644 --- a/test/unit/ocean/test_psi.py +++ b/test/unit/ocean/test_psi.py @@ -22,4 +22,4 @@ class TestPsi(TestCase): Psi.generate_jobs(self.diags, ['diagnostic', 'badoption']) def test_str(self): - self.assertEquals(str(self.psi), 'PSI Startdate: 20000101 Member: 1 Chunk: 1') + self.assertEqual(str(self.psi), 'PSI Startdate: 20000101 Member: 1 Chunk: 1') diff --git a/test/unit/ocean/test_region_mean.py b/test/unit/ocean/test_region_mean.py index efeeb7d70b8da45b3baaceeff3e79a9bb0193305..2ee865fa474025d884c0dceb508fa2000036756a 100644 --- a/test/unit/ocean/test_region_mean.py +++ b/test/unit/ocean/test_region_mean.py @@ -26,29 +26,29 @@ class TestRegionMean(TestCase): def test_generate_jobs(self): box = Box() - box.min_depth = 0 - box.max_depth = 0 + box.min_depth = -1 + box.max_depth = -1 jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'T', - box, True, Basins().Global, False, '')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'T', - box, True, Basins().Global, False, '')) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', - box, True, Basins().Global, False, '')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', - box, True, Basins().Global, False, '')) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', - box, True, Basins().Global, False, '')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', - box, True, Basins().Global, False, '')) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) box = Box() box.min_depth = 1.0 @@ -56,32 +56,32 @@ class TestRegionMean(TestCase): jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', - box, True, Basins().Global, False, '')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', - box, True, Basins().Global, False, '')) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + box, True, 'weights', False, Basins().Global)) jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box, False, Basins().Global, False, '')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', box, False, Basins().Global, False, '')) jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false', 'True']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box, False, Basins().Global, True, '')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', box, False, Basins().Global, True, '')) jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false', 'True', 'grid']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box, False, Basins().Global, True, 'grid')) - self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', box, False, Basins().Global, True, 'grid')) with self.assertRaises(DiagnosticOptionError): @@ -96,7 +96,7 @@ class TestRegionMean(TestCase): box.min_depth = 1 box.max_depth = 10 - diag = RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', box, False, - Basins().Global, True, 'grid') - self.assertEquals(str(diag), 'Region mean Startdate: 20010101 Member: 0 Chunk: 0 Variable: var Grid point: U ' - 'Box: 1-10 Save 3D: False Save variance: True Original grid: grid Basin: Global') + diag = RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box, False, 'file', + True, Basins().Global) + self.assertEqual(str(diag), 'Region mean Startdate: 20010101 Member: 0 Chunk: 0 Variable: var ' + 'Box: 1-10 Save 3D: False Save variance: True') diff --git a/test/unit/ocean/test_rotation.py b/test/unit/ocean/test_rotation.py new file mode 100644 index 0000000000000000000000000000000000000000..08c61fb1203f151306ca46ce601cf1c90bb70ca6 --- /dev/null +++ b/test/unit/ocean/test_rotation.py @@ -0,0 +1,46 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.ocean.rotation import Rotation +from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.diagnostic import DiagnosticOptionError, DiagnosticVariableOption +from mock import Mock, patch + + +class TestMixedLayerHeatContent(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + self.mixed = Rotation(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'varu', 'varv', 'exe') + + def fake_parse(self, value): + if not value: + raise DiagnosticOptionError + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = Rotation.generate_jobs(self.diags, ['diagnostic', 'ocean', 'varu', 'varv']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Rotation(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'varu', 'varv', + '/home/Earth/jvegas/pyCharm/cfutools/interpolation/rotateUVorca')) + self.assertEqual(jobs[1], Rotation(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'varu', 'varv', + '/home/Earth/jvegas/pyCharm/cfutools/interpolation/rotateUVorca')) + + jobs = Rotation.generate_jobs(self.diags, ['diagnostic', 'ocean', 'varu', 'varv', 'exe']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], + Rotation(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'varu', 'varv', 'exe')) + self.assertEqual(jobs[1], + Rotation(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'varu', 'varv', 'exe')) + + with self.assertRaises(Exception): + Rotation.generate_jobs(self.diags, ['diagnostic', 'ocean', 'varu', 'varv', 'exe', 'extra']) + + def test_str(self): + self.assertEqual(str(self.mixed), 'Rotate variables Startdate: 20000101 Member: 1 Chunk: 1 Variables: ocean:varu , ocean:varv') diff --git a/test/unit/ocean/test_siasiesiv.py b/test/unit/ocean/test_siasiesiv.py index 00f7c68ca53e22c4ca2f10d90b1d4d89e4176236..59ac3043f13d5431f2d1ea745a70da3d49883255 100644 --- a/test/unit/ocean/test_siasiesiv.py +++ b/test/unit/ocean/test_siasiesiv.py @@ -1,9 +1,10 @@ # coding=utf-8 from unittest import TestCase +from mock import Mock + from earthdiagnostics.constants import Basins from earthdiagnostics.ocean.siasiesiv import Siasiesiv -from mock import Mock class TestSiasiesiv(TestCase): @@ -15,7 +16,9 @@ class TestSiasiesiv(TestCase): self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) self.mask = Mock() - self.psi = Siasiesiv(self.data_manager, '20000101', 1, 1, Basins().Global, self.mask) + self.var_manager = Mock() + self.psi = Siasiesiv(self.data_manager, '20000101', 1, 1, Basins().Global, self.mask, self.var_manager, False) def test_str(self): - self.assertEquals(str(self.psi), 'Siasiesiv Startdate: 20000101 Member: 1 Chunk: 1 Basin: Global') + self.assertEqual(str(self.psi), + 'Siasiesiv Startdate: 20000101 Member: 1 Chunk: 1 Basin: Global Omit volume: False') diff --git a/test/unit/ocean/test_vertical_gradient.py b/test/unit/ocean/test_vertical_gradient.py index e2d9d22d32358059c73899c1c7aee834400dddab..c55aeded3cf09c862ab7e7bcd37611a2f0129c52 100644 --- a/test/unit/ocean/test_vertical_gradient.py +++ b/test/unit/ocean/test_vertical_gradient.py @@ -1,8 +1,6 @@ # coding=utf-8 from unittest import TestCase from earthdiagnostics.ocean.verticalgradient import VerticalGradient -from earthdiagnostics.modelingrealm import ModelingRealms -from earthdiagnostics.constants import Basins from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import DiagnosticOptionError, DiagnosticVariableOption from mock import Mock, patch @@ -62,4 +60,4 @@ class TestVerticalGradient(TestCase): box.max_depth = 10 diag = VerticalGradient(self.data_manager, '20010101', 0, 0, 'var', box) - self.assertEquals(str(diag), 'Vertical gradient Startdate: 20010101 Member: 0 Chunk: 0 Variable: var Box: 1-10') + self.assertEqual(str(diag), 'Vertical gradient Startdate: 20010101 Member: 0 Chunk: 0 Variable: var Box: 1-10') diff --git a/test/unit/ocean/test_verticalmean.py b/test/unit/ocean/test_verticalmean.py index fc501afda0a4b1c3842c8fd60b16d4fb88b03b9f..f6ae18c2cd6b6801dd73da3a71c26819dd85b38a 100644 --- a/test/unit/ocean/test_verticalmean.py +++ b/test/unit/ocean/test_verticalmean.py @@ -53,5 +53,5 @@ class TestVerticalMean(TestCase): VerticalMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Vertical mean Startdate: 20000101 Member: 1 Chunk: 1 Variable: var ' + self.assertEqual(str(self.mixed), 'Vertical mean Startdate: 20000101 Member: 1 Chunk: 1 Variable: var ' 'Box: 0-100') diff --git a/test/unit/ocean/test_verticalmeanmeters.py b/test/unit/ocean/test_verticalmeanmeters.py index f696d20d4adc81e59cb62d700cd2fcba7a0f79d0..ff6580a2b9547a714c7fa7eb86b60d7dc87ced24 100644 --- a/test/unit/ocean/test_verticalmeanmeters.py +++ b/test/unit/ocean/test_verticalmeanmeters.py @@ -60,5 +60,5 @@ class TestVerticalMeanMeters(TestCase): VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.mixed), 'Vertical mean meters Startdate: 20000101 Member: 1 Chunk: 1 ' + self.assertEqual(str(self.mixed), 'Vertical mean meters Startdate: 20000101 Member: 1 Chunk: 1 ' 'Variable: ocean:var Box: 0-100m') diff --git a/test/unit/statistics/test_climatologicalpercentile.py b/test/unit/statistics/test_climatologicalpercentile.py index 12752caab2bd8fcc7322b7e80130a939f4652029..3a905cbb743cbcd189d222ed514d96c5085db3a8 100644 --- a/test/unit/statistics/test_climatologicalpercentile.py +++ b/test/unit/statistics/test_climatologicalpercentile.py @@ -38,5 +38,5 @@ class TestClimatologicalPercentile(TestCase): diagnostic = ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, 11, self.diags.config.experiment) - self.assertEquals(str(diagnostic), 'Climatological percentile Variable: ocean:var Period: 2000-2001 ' + self.assertEqual(str(diagnostic), 'Climatological percentile Variable: ocean:var Period: 2000-2001 ' 'Forecast month: 11') diff --git a/test/unit/statistics/test_daysoverpercentile.py b/test/unit/statistics/test_daysoverpercentile.py index cc225cd64a589cb1f453e2582777841cc6b2c2ff..afc1a03ec546decb3f54248098c99026cdb9e76a 100644 --- a/test/unit/statistics/test_daysoverpercentile.py +++ b/test/unit/statistics/test_daysoverpercentile.py @@ -13,14 +13,15 @@ class TestDaysOverPercentile(TestCase): self.data_manager = Mock() self.diags = Mock() self.diags.config.experiment.get_chunk_list.return_value = (('20011101', 0, 0), ('20011101', 0, 1)) + self.diags.config.experiment.startdates = ('20001101', '20011101') def test_generate_jobs(self): jobs = DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001', '11']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, - 2000, 11)) + '20001101', 11)) self.assertEqual(jobs[1], DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, - 2001, 11)) + '20011101', 11)) with self.assertRaises(Exception): DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001']) @@ -29,5 +30,5 @@ class TestDaysOverPercentile(TestCase): def test_str(self): diagnostic = DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, '20001101', 11) - self.assertEquals(str(diagnostic), 'Days over percentile Startdate: 20001101 Variable: ocean:var ' + self.assertEqual(str(diagnostic), 'Days over percentile Startdate: 20001101 Variable: ocean:var ' 'Climatology: 2000-2001') diff --git a/test/unit/statistics/test_discretize.py b/test/unit/statistics/test_discretize.py index 402a3772aff95aa7a6cf001cb049d2034be8847a..872c510944c4e67c29fb0c2b78dc2c70e6f791ae 100644 --- a/test/unit/statistics/test_discretize.py +++ b/test/unit/statistics/test_discretize.py @@ -50,11 +50,11 @@ class TestClimatologicalPercentile(TestCase): def test_str(self): diagnostic = Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', 2000, 10, 40) - self.assertEquals(str(diagnostic), 'Discretizing variable: ocean:var Startdate: 20000101 Bins: 2000 ' + self.assertEqual(str(diagnostic), 'Discretizing variable: ocean:var Startdate: 20000101 Bins: 2000 ' 'Range: [10, 40]') diagnostic = Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', 2000, float('nan'), float('nan')) - self.assertEquals(str(diagnostic), 'Discretizing variable: ocean:var Startdate: 20000101 Bins: 2000 ' + self.assertEqual(str(diagnostic), 'Discretizing variable: ocean:var Startdate: 20000101 Bins: 2000 ' 'Range: [None, None]') diff --git a/test/unit/statistics/test_monthlypercentile.py b/test/unit/statistics/test_monthlypercentile.py index a902ec89098794bdca2baabce94be5d1aa3d4107..8127d5a4cab83216867dbedfb9e41b2b2f70d14c 100644 --- a/test/unit/statistics/test_monthlypercentile.py +++ b/test/unit/statistics/test_monthlypercentile.py @@ -37,5 +37,5 @@ class TestMonthlyPercentile(TestCase): MonthlyPercentile.generate_jobs(self.diags, ['monpercent', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.diagnostic), 'Monthly percentile Startdate: 20000101 Member: 1 Chunk: 1 ' + self.assertEqual(str(self.diagnostic), 'Monthly percentile Startdate: 20000101 Member: 1 Chunk: 1 ' 'Variable: ocean:var Percentiles: 10, 90') diff --git a/test/unit/test_box.py b/test/unit/test_box.py index 37b5ce4911333419281366b9d850e219c8aacb8d..8f69f2fa7068fdb6a4891319219515c3b4495da4 100644 --- a/test/unit/test_box.py +++ b/test/unit/test_box.py @@ -69,25 +69,25 @@ class TestBox(TestCase): Box().min_lon = 80 def test_get_lat_str(self): - self.assertEquals('20S0N', self.box1.get_lat_str()) - self.assertEquals('20N', self.box2.get_lat_str()) - self.assertEquals('', self.box3.get_lat_str()) - self.assertEquals('20S10S', self.box4.get_lat_str()) + self.assertEqual('20S0N', self.box1.get_lat_str()) + self.assertEqual('20N', self.box2.get_lat_str()) + self.assertEqual('', self.box3.get_lat_str()) + self.assertEqual('20S10S', self.box4.get_lat_str()) def test_get_lon_str(self): - self.assertEquals('20W0E', self.box1.get_lon_str()) - self.assertEquals('20E', self.box2.get_lon_str()) - self.assertEquals('', self.box3.get_lon_str()) - self.assertEquals('20W10W', self.box4.get_lon_str()) + self.assertEqual('20W0E', self.box1.get_lon_str()) + self.assertEqual('20E', self.box2.get_lon_str()) + self.assertEqual('', self.box3.get_lon_str()) + self.assertEqual('20W10W', self.box4.get_lon_str()) def test_get_depth_str(self): - self.assertEquals('0-20', self.box1.get_depth_str()) - self.assertEquals('20m', self.box2.get_depth_str()) - self.assertEquals('', self.box3.get_depth_str()) - self.assertEquals('0-20', self.box4.get_depth_str()) + self.assertEqual('0-20', self.box1.get_depth_str()) + self.assertEqual('20m', self.box2.get_depth_str()) + self.assertEqual('', self.box3.get_depth_str()) + self.assertEqual('0-20', self.box4.get_depth_str()) def test__str__(self): - self.assertEquals('20S0N20W0E0-20', str(self.box1)) - self.assertEquals('20N20E20m', str(self.box2)) - self.assertEquals('', str(self.box3)) - self.assertEquals('20S10S20W10W0-20', str(self.box4)) + self.assertEqual('20S0N20W0E0-20', str(self.box1)) + self.assertEqual('20N20E20m', str(self.box2)) + self.assertEqual('', str(self.box3)) + self.assertEqual('20S10S20W10W0-20', str(self.box4)) diff --git a/test/unit/test_cdftools.py b/test/unit/test_cdftools.py index 28de4ac34e9b60726ddbcac9e1d7d3e7f38a8d1f..26150c2f0dc91abdbbe4d1bacd0ea02cbe012f73 100644 --- a/test/unit/test_cdftools.py +++ b/test/unit/test_cdftools.py @@ -1,10 +1,10 @@ # coding=utf-8 +import os from unittest import TestCase -import os +import mock from earthdiagnostics.cdftools import CDFTools -import mock # noinspection PyUnusedLocal @@ -13,53 +13,54 @@ def bad_file(path, access=None): class TestCDFTools(TestCase): - # noinspection PyUnusedLocal @mock.patch('os.path.isfile', side_effect=bad_file) @mock.patch('os.access', side_effect=bad_file) @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') def test_run(self, mock_path, mock_exists, execute_mock): - self.cdftools = CDFTools('/test/path') + self.cdftools = CDFTools('') execute_mock.return_value = ['Command output'] with self.assertRaises(ValueError): - self.cdftools.run('badcommand', input='input_file', output='output_file') + self.cdftools.run('badcommand', input_file='input_file', output_file='output_file') with self.assertRaises(ValueError): - self.cdftools.run('command', input='badinput_file', output='output_file') + self.cdftools.run('command', input_file='badinput_file', output_file='output_file') with self.assertRaises(ValueError): - self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') + self.cdftools.run('command', input_file=['input_file', 'badinput_file'], output_file='output_file') with self.assertRaises(ValueError): - self.cdftools.run('command', input='input_file', output='input_file') + self.cdftools.run('command', input_file='input_file', output_file='input_file') with self.assertRaises(Exception): - self.cdftools.run('command', input='input_file', output='badoutput_file') + self.cdftools.run('command', input_file='input_file', output_file='badoutput_file') - self.cdftools.run('command', input='input_file', output='output_file') - self.cdftools.run('command', input='input_file') - self.cdftools.run('command', input=None) - self.cdftools.run('command', input=['input_file', 'input_file2']) - self.cdftools.run('command', input='input_file', options='-o -p') - self.cdftools.run('command', input='input_file', options=('-o', '-p')) + self.cdftools.run('command', input_file='input_file', output_file='output_file') + self.cdftools.run('command', input_option='-i', input_file='input_file', output_file='output_file') + self.cdftools.run('command', input_file='input_file') + self.cdftools.run('command', input_file=None) + self.cdftools.run('command', input_file=['input_file', 'input_file2']) + self.cdftools.run('command', input_file='input_file', options='-o -p') + self.cdftools.run('command', input_file='input_file', options=('-o', '-p')) # noinspection PyUnusedLocal @mock.patch('os.path.isfile', side_effect=bad_file) @mock.patch('os.access', side_effect=bad_file) @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') - def test_run(self, mock_path, mock_exists, execute_mock): - self.cdftools = CDFTools('') + def test_run_with_path(self, mock_path, mock_exists, execute_mock): + self.cdftools = CDFTools('/some/path') execute_mock.return_value = ['Command output'] with self.assertRaises(ValueError): - self.cdftools.run('badcommand', input='input_file', output='output_file') + self.cdftools.run('badcommand', input_file='input_file', output_file='output_file') with self.assertRaises(ValueError): - self.cdftools.run('command', input='badinput_file', output='output_file') + self.cdftools.run('command', input_file='badinput_file', output_file='output_file') with self.assertRaises(ValueError): - self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') + self.cdftools.run('command', input_file=['input_file', 'badinput_file'], output_file='output_file') with self.assertRaises(ValueError): - self.cdftools.run('command', input='input_file', output='input_file') + self.cdftools.run('command', input_file='input_file', output_file='input_file') with self.assertRaises(Exception): - self.cdftools.run('command', input='input_file', output='badoutput_file') + self.cdftools.run('command', input_file='input_file', output_file='badoutput_file') - self.cdftools.run('command', input='input_file', output='output_file') - self.cdftools.run('command', input='input_file') - self.cdftools.run('command', input=None) - self.cdftools.run('command', input=['input_file', 'input_file2']) - self.cdftools.run('command', input='input_file', options='-o -p') - self.cdftools.run('command', input='input_file', options=('-o', '-p')) + self.cdftools.run('command', input_file='input_file', output_file='output_file') + self.cdftools.run('command', input_option='-i', input_file='input_file', output_file='output_file') + self.cdftools.run('command', input_file='input_file') + self.cdftools.run('command', input_file=None) + self.cdftools.run('command', input_file=['input_file', 'input_file2']) + self.cdftools.run('command', input_file='input_file', options='-o -p') + self.cdftools.run('command', input_file='input_file', options=('-o', '-p')) diff --git a/test/unit/test_cmormanager.py b/test/unit/test_cmormanager.py new file mode 100644 index 0000000000000000000000000000000000000000..6a32c3674e1d621236fbd2c95f65a71e03b8d398 --- /dev/null +++ b/test/unit/test_cmormanager.py @@ -0,0 +1,838 @@ +# coding=utf-8 +import os +import shutil +import tempfile +from unittest import TestCase + +import mock +from mock import Mock + +from earthdiagnostics.cmormanager import CMORManager, MergeYear +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestCMORManager(TestCase): + + def setUp(self): + self.config = Mock() + self.config.data_convention = 'specs' + self.config.frequency = '6hr' + self.config.data_type = 'exp' + self.config.experiment.expid = 'expid' + self.config.experiment.model = 'model' + self.config.experiment.experiment_name = 'expname' + self.config.experiment.institute = 'institute' + self.config.experiment.member_count_start = 0 + self.config.experiment.chunk_size = 12 + self.config.experiment.num_chunks = 1 + self.config.experiment.calendar = 'standard' + self.config.experiment.atmos_timestep = 3 + self.config.experiment.ocean_timestep = 6 + + self.config.cmor.initialization_number = 1 + self.config.cmor.version = '' + self.config.cmor.default_ocean_grid = 'ocean_grid' + self.config.cmor.default_atmos_grid = 'atmos_grid' + self.config.cmor.activity = 'activity' + self.config.cmor.force = False + self.config.cmor.force_untar = False + self.config.cmor.append_startdate = False + + self.tmp_dir = tempfile.mkdtemp() + os.mkdir(os.path.join(self.tmp_dir, self.config.experiment.expid)) + self.config.data_dir = self.tmp_dir + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + + def test_find_data(self): + cmor_manager = CMORManager(self.config) + self.assertEqual(cmor_manager.cmor_path, os.path.join(self.tmp_dir, 'expid')) + + def test_find_data_fail(self): + os.rmdir(os.path.join(self.tmp_dir, self.config.experiment.expid)) + with self.assertRaises(Exception): + CMORManager(self.config) + + def test_find_data_with_model(self): + os.makedirs(os.path.join(self.tmp_dir, 'model', self.config.experiment.expid)) + os.rmdir(os.path.join(self.tmp_dir, self.config.experiment.expid)) + cmor_manager = CMORManager(self.config) + self.assertEqual(cmor_manager.cmor_path, os.path.join(self.tmp_dir, 'model', 'expid')) + + def test_find_data_with_ecearth_fix(self): + self.config.experiment.model = 'EC-Earth' + os.makedirs(os.path.join(self.tmp_dir, 'ecearth', self.config.experiment.expid)) + os.rmdir(os.path.join(self.tmp_dir, self.config.experiment.expid)) + cmor_manager = CMORManager(self.config) + self.assertEqual(cmor_manager.cmor_path, os.path.join(self.tmp_dir, 'ecearth', 'expid')) + + def test_find_data_with_type_and_model(self): + os.makedirs(os.path.join(self.tmp_dir, 'exp', 'model', self.config.experiment.expid)) + os.rmdir(os.path.join(self.tmp_dir, self.config.experiment.expid)) + cmor_manager = CMORManager(self.config) + self.assertEqual(cmor_manager.cmor_path, os.path.join(self.tmp_dir, 'exp', 'model', 'expid')) + + def test_get_file_path_bad_convention(self): + self.config.cmor.version = 'version' + self.config.data_convention = 'bad_convention' + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + with self.assertRaises(ValueError): + cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, 'mon') + + def test_get_file_path_specs(self): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S19900101/mon/' + 'ocean/var/r2i1p1/' + 'var_Omon_model_expname_S19900101_r2i1p1_198901-198912.nc')) + + def test_get_file_path_specs_non_cmor(self): + cmor_manager = CMORManager(self.config) + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', None, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S19900101/mon/' + 'ocean/var/r2i1p1/' + 'var_Omon_model_expname_S19900101_r2i1p1_198901-198912.nc')) + def test_get_file_path_specs_empty_time_info(self): + cmor_manager = CMORManager(self.config) + with self.assertRaises(ValueError): + cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', None, None, + 'mon') + + def test_get_file_path_preface(self): + self._configure_preface() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/expname/S19900101/mon/' + 'ocean/var/r2i1p1/' + 'var_Omon_model_expname_S19900101_r2i1p1_198901_198912.nc')) + + def test_get_file_path_specs_version(self): + self.config.cmor.version = 'version' + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S19900101/mon/' + 'ocean/var/r2i1p1/version/' + 'var_Omon_model_expname_S19900101_r2i1p1_198901-198912.nc')) + + def test_get_file_path_specs_grid(self): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon', 'grid') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S19900101/mon/' + 'ocean/var/grid/r2i1p1/' + 'var_Omon_model_expname_S19900101_r2i1p1_198901-198912.nc')) + + def test_get_file_path_specs_year(self): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, None, + 'year', year='1998') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S19900101/year/' + 'ocean/var/r2i1p1/' + 'var_Omon_model_expname_S19900101_r2i1p1_1998.nc')) + + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', None, ModelingRealms.ocean, 'var', cmor_var, + 1, 'mon', year='1998') + + def test_get_file_path_raise_incompatible_date_info(self): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + frequency = Mock() + frequency.frequency = 'monthly' + frequency.__str__ = Mock() + frequency.__str__.return_value = 'frequency' + + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + None, frequency, year='1998') + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + 1, frequency, year='1998') + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + 1, frequency, date_str='1998') + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + None, frequency, year='1998', date_str='1998') + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + 1, frequency, year='1998', date_str='1998') + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + None, frequency) + + def test_get_file_path_specs_date_str(self): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, None, + 'mon', date_str='date_str') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S19900101/mon/' + 'ocean/var/r2i1p1/' + 'var_Omon_model_expname_S19900101_r2i1p1_date_str.nc')) + + def test_get_file_path_primavera(self): + self._configure_primavera() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expname/' + 'r2i1p1f1/Omon/var/ocean_grid/version/' + 'var_Omon_model_expname_r2i1p1f1_ocean_grid_198901-198912.nc')) + + def test_get_file_path_primavera_with_startdate(self): + self._configure_primavera() + self.config.cmor.append_startdate = True + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expnameS19900101/' + 'r2i1p1f1/Omon/var/ocean_grid/version/' + 'var_Omon_model_expnameS19900101_r2i1p1f1_ocean_grid_198901-198912.nc')) + + def test_get_file_path_primavera_no_cmor(self): + self._configure_primavera() + cmor_manager = CMORManager(self.config) + frequency = Mock() + frequency.__str__ = Mock() + frequency.__str__.return_value = 'mon' + frequency.frequency = 'mon' + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', None, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expname/' + 'r2i1p1f1/Omon/var/ocean_grid/version/' + 'var_Omon_model_expname_r2i1p1f1_ocean_grid_198901-198912.nc')) + + def test_get_file_path_no_version_primavera(self): + self._configure_primavera() + self.config.cmor.version = '' + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + frequency = Mock() + frequency.__str__ = Mock() + frequency.__str__.return_value = 'frequency' + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', 1, ModelingRealms.ocean, 'var', cmor_var, + 0, frequency) + + def _configure_primavera(self): + self.config.data_convention = 'primavera' + self.config.cmor.version = 'version' + + def _configure_meteofrance(self): + self.config.data_convention = 'meteofrance' + + def _configure_preface(self): + self.config.data_convention = 'preface' + + def test_get_file_path_primavera_grid(self): + self._configure_primavera() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, 0, + 'mon', 'grid') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/' + 'Omon/var/grid/version/' + 'var_Omon_model_expname_r2i1p1f1_grid_198901-198912.nc')) + + def test_get_file_path_primavera_atmos(self): + self._configure_primavera() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.atmos, 'var', cmor_var, 0, + 'mon') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expname/' + 'r2i1p1f1/Omon/var/atmos_grid/version/' + 'var_Omon_model_expname_r2i1p1f1_atmos_grid_198901-198912.nc')) + + def test_get_file_path_primavera_year(self): + self._configure_primavera() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, None, + 'year', year='1998') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/Omon/' + 'var/ocean_grid/version/' + 'var_Omon_model_expname_r2i1p1f1_ocean_grid_1998.nc')) + + self.assertRaises(ValueError, cmor_manager.get_file_path, '19900101', None, ModelingRealms.ocean, 'var', cmor_var, + 1, 'mon', year='1998') + + def test_get_file_path_primavera_date_str(self): + self._configure_primavera() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('19900101', 1, ModelingRealms.ocean, 'var', cmor_var, None, + 'mon', date_str='date_str') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/' + 'Omon/var/ocean_grid/version/' + 'var_Omon_model_expname_r2i1p1f1_ocean_grid_date_str.nc')) + + def test_file_exists(self): + with mock.patch('os.path.isfile') as isfile: + cmor_manager = CMORManager(self.config) + isfile.return_value = True + self.assertTrue(cmor_manager.file_exists(ModelingRealms.ocean, 'var', '20011101', 1, 1)) + isfile.return_value = False + self.assertFalse(cmor_manager.file_exists(ModelingRealms.ocean, 'var', '20011101', 1, 1)) + + def test_file_exists_multiple_versions(self): + with mock.patch('os.path.isfile') as isfile: + cmor_manager = CMORManager(self.config) + isfile.return_value = True + self.assertTrue(cmor_manager.file_exists(ModelingRealms.ocean, 'var', '20011101', 1, 1, + possible_versions=('version1', 'version2'))) + isfile.return_value = False + self.assertFalse(cmor_manager.file_exists(ModelingRealms.ocean, 'var', '20011101', 1,1, + possible_versions=('version1', 'version2'))) + + def test_get_file_path_meteofrance(self): + self._configure_meteofrance() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + file_path = cmor_manager.get_file_path('20110101', 1, ModelingRealms.ocean, 'soicecov', cmor_var, 1, + 'day') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expname/HA/2011/soicecov_day_201101_01.nc')) + + file_path = cmor_manager.get_file_path('20110101', 1, ModelingRealms.ocean, 'soicecov', cmor_var, 2, + 'day') + self.assertEqual(file_path, + os.path.join(self.tmp_dir, 'expname/HA/2011/soicecov_day_201201_01.nc')) + + def test_get_file_path_bad_convention(self): + self.config.data_convention = 'badconvention' + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + with self.assertRaises(ValueError): + cmor_manager.get_file_path('20110101', 1, ModelingRealms.ocean, 'soicecov', cmor_var, 1, + 'day') + + def test_create_link(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', None, False, None) + filename = os.path.basename(path) + self.assertTrue(os.path.islink(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h', filename))) + + def test_create_link_no_source(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + os.remove(path) + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + with self.assertRaises(ValueError): + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', None, False, None) + + def test_create_link_exists(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + filename = os.path.basename(path) + + os.makedirs(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h')) + os.symlink(path, os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h', filename)) + + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', None, False, None) + self.assertTrue(os.path.islink(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h', filename))) + + def test_create_link_default_is_link(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + os.makedirs(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-original_f6h')) + os.symlink(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-original_f6h'), + os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h')) + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', None, False, None) + filename = os.path.basename(path) + self.assertTrue(os.path.islink(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h', filename))) + + def test_create_link_move_old(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + os.makedirs(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h')) + fd = open(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h', 'var_20001101.nc'), 'w') + fd.close() + + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', None, True, None) + filename = os.path.basename(path) + self.assertTrue(os.path.islink(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h', filename))) + self.assertTrue(os.path.isfile(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'old_var_f6h', + 'var_20001101.nc'))) + + def test_create_link_with_grid(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', 'grid', False, None) + filename = os.path.basename(path) + var_mainfolder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h') + var_grid_folder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-grid_f6h') + self.assertTrue(os.path.islink(var_mainfolder)) + self.assertTrue(os.path.islink(os.path.join(var_grid_folder, filename))) + self.assertTrue(os.readlink(var_mainfolder), var_grid_folder) + + def test_create_link_with_grid_default_is_link(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + os.makedirs(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-original_f6h')) + os.symlink(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-original_f6h'), + os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h')) + + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', 'grid', False, None) + filename = os.path.basename(path) + var_mainfolder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h') + var_grid_folder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-grid_f6h') + self.assertTrue(os.path.islink(var_mainfolder)) + self.assertTrue(os.path.islink(os.path.join(var_grid_folder, filename))) + self.assertTrue(os.readlink(var_mainfolder), var_grid_folder) + + def test_create_link_with_grid_default_is_folder(self): + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + os.makedirs(os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h')) + + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', 'grid', False, None) + filename = os.path.basename(path) + var_mainfolder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h') + var_grid_folder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-grid_f6h') + self.assertTrue(os.path.islink(var_mainfolder)) + self.assertTrue(os.path.islink(os.path.join(var_grid_folder, filename))) + self.assertTrue(os.readlink(var_mainfolder), var_grid_folder) + + def test_create_link_meteofrance(self): + self._configure_meteofrance() + cmor_manager = CMORManager(self.config) + file_descriptor, path = tempfile.mkstemp(dir=self.tmp_dir) + os.close(file_descriptor) + frequency = Mock() + frequency.folder_name.return_value = 'frequency_folder' + cmor_manager.create_link(ModelingRealms.ocean, path, frequency, 'var', 'grid', False, None) + filename = os.path.basename(path) + var_mainfolder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var_f6h') + var_grid_folder = os.path.join(self.tmp_dir, 'expid', 'frequency_folder', 'var-grid_f6h') + self.assertFalse(os.path.islink(var_mainfolder)) + self.assertFalse(os.path.islink(os.path.join(var_grid_folder, filename))) + + @mock.patch('earthdiagnostics.cmormanager.Cmorizer', autospec=True) + def test_prepare_cmorize(self, mock_cmor): + mock_instance = mock_cmor.return_value + cmor_manager = CMORManager(self.config) + self.config.experiment.get_member_list.return_value = (('20000101', 2),) + cmor_manager.prepare() + mock_instance.cmorize_ocean.assert_called_once() + mock_instance.cmorize_atmos.assert_called_once() + + @mock.patch('earthdiagnostics.cmormanager.Cmorizer', autospec=True) + def test_prepare_cmorize_force(self, mock_cmor): + mock_instance = mock_cmor.return_value + self.config.cmor.force = True + cmor_manager = CMORManager(self.config) + self.config.experiment.get_member_list.return_value = (('20000101', 2),) + cmor_manager.prepare() + mock_instance.cmorize_ocean.assert_called_once() + mock_instance.cmorize_atmos.assert_called_once() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.is_cmorized') + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_links') + @mock.patch('earthdiagnostics.utils.Utils.unzip') + @mock.patch('earthdiagnostics.utils.Utils.untar') + def test_prepare_cmorize_force_untar(self, mock_untar, mock_unzip, mock_create_links, mock_cmorized): + original_cmor_path = os.path.join(self.config.data_dir, self.config.experiment.expid, + 'original_files', 'cmorfiles') + os.makedirs(original_cmor_path) + self.config.experiment.get_member_str.return_value = 'r1i1p1' + self.config.experiment.get_chunk_start_str.return_value = '20000101' + cmor_prefix = 'CMORT_{0}_{1}_{2}_{3}-'.format(self.config.experiment.expid, '20000101', 'r1i1p1', + '20000101') + tempfile.mkstemp('.tar.gz', cmor_prefix, original_cmor_path) + tempfile.mkstemp('.tar', cmor_prefix, original_cmor_path) + mock_cmorized.return_value = True + self.config.cmor.force_untar = True + cmor_manager = CMORManager(self.config) + self.config.experiment.get_member_list.return_value = (('20000101', 2),) + cmor_manager.prepare() + mock_create_links.assert_called_once() + mock_unzip.assert_called_once() + mock_untar.assert_called_once() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.is_cmorized') + def test_prepare_already_cmorized(self, mock_cmorized): + mock_cmorized.return_value = True + self.config.cmor.force_untar = False + cmor_manager = CMORManager(self.config) + self.config.experiment.get_member_list.return_value = (('20000101', 2),) + cmor_manager.prepare() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.is_cmorized') + def test_prepare_cmorization_not_requested(self, mock_cmorized): + mock_cmorized.return_value = False + self.config.cmor.chunk_cmorization_requested.return_value = False + cmor_manager = CMORManager(self.config) + self.config.experiment.get_member_list.return_value = (('20000101', 2),) + cmor_manager.prepare() + + def test_prepare_meteofrance(self): + self._configure_meteofrance() + cmor_manager = CMORManager(self.config) + cmor_manager.prepare() + + def test_is_cmorized_false(self): + cmor_manager = CMORManager(self.config) + self.assertFalse(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + @mock.patch('os.path.isfile') + def test_is_cmorized_true(self, mock_is_file): + mock_is_file.return_value = True + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 1 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S20000101/mon/ocean/var')) + self.assertTrue(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + @mock.patch('os.path.isfile') + def test_is_cmorized_not_enough_vars(self, mock_is_file): + mock_is_file.return_value = True + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 2 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S20000101/mon/ocean/var')) + self.assertFalse(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + def test_is_cmorized_not_domain_folder(self): + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 2 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, 'expid/cmorfiles/institute/model/expname/S20000101/mon')) + self.assertFalse(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + @mock.patch('os.path.isfile') + def test_is_cmorized_true_primavera(self, mock_is_file): + self._configure_primavera() + mock_is_file.return_value = True + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 1 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/Omon/var')) + self.assertTrue(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + @mock.patch('os.path.isfile') + def test_is_cmorized_false_primavera(self, mock_is_file): + self._configure_primavera() + mock_is_file.return_value = False + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 1 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/Omon/var')) + self.assertFalse(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + def test_is_cmorized_false_not_member_folder_primavera(self): + self._configure_primavera() + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 1 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/')) + self.assertFalse(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + def test_is_cmorized_false_not_table_folder_primavera(self): + self._configure_primavera() + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + self.config.var_manager.get_variable.return_value = cmor_var + self.config.cmor.min_cmorized_vars = 1 + cmor_manager = CMORManager(self.config) + os.makedirs(os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1')) + self.assertFalse(cmor_manager.is_cmorized('20000101', 1, 1, ModelingRealms.ocean)) + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_link_file(self, mock_get_file_path, mock_create_link): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + cmor_manager.link_file(ModelingRealms.ocean, 'var', cmor_var, '20010101', 1, 1) + mock_get_file_path.assert_called_once() + mock_create_link.assert_called_once() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_link_file(self, mock_get_file_path, mock_create_link): + frequency = Mock() + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + cmor_manager.link_file(ModelingRealms.ocean, 'var', cmor_var, '20010101', 1, 1, frequency=frequency) + mock_get_file_path.assert_called_once() + mock_create_link.assert_called_once() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_link_file_default_freq(self, mock_get_file_path, mock_create_link): + cmor_manager = CMORManager(self.config) + cmor_var = Mock() + cmor_manager.link_file(ModelingRealms.ocean, 'var', cmor_var, '20010101', 1, 1) + mock_get_file_path.assert_called_once() + mock_create_link.assert_called_once() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_specs(self, mock_create_link): + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/institute/model/expname/S20010101/mon/ocean/var/r2i1p1') + os.makedirs(member_path) + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101', 1) + mock_create_link.assert_called() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_specs_member_not_found(self, mock_create_link): + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/institute/model/expname/S20010101/mon/ocean/var/r1i1p1') + os.makedirs(member_path) + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101', 1) + mock_create_link.assert_not_called() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_specs_with_grid(self, mock_create_link): + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/institute/model/expname/S20010101/mon/ocean/var/r2i1p1/grid') + os.makedirs(member_path) + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101', 1) + mock_create_link.assert_called() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_primavera(self, mock_create_link): + self._configure_primavera() + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/Omon/var/gn') + os.makedirs(member_path) + self.config.var_manager.tables = {'Omon': Mock()} + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101', 1) + mock_create_link.assert_called() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_with_version_primavera(self, mock_create_link): + self._configure_primavera() + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/Omon/var/gn/version') + os.makedirs(member_path) + self.config.var_manager.tables = {'Omon': Mock()} + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101', 1) + mock_create_link.assert_called() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_with_version_primavera_no_member(self, mock_create_link): + self._configure_primavera() + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r2i1p1f1/Omon/var/gn/version') + os.makedirs(member_path) + self.config.var_manager.tables = {'Omon': Mock()} + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101') + mock_create_link.assert_called() + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.create_link') + def test_create_links_member_not_found_primavera(self, mock_create_link): + self._configure_primavera() + member_path = os.path.join(self.tmp_dir, + 'expid/cmorfiles/activity/institute/model/expname/r1i1p1f1/Omon/var/gn') + os.makedirs(member_path) + self.config.var_manager.tables = {'Omon': Mock()} + tempfile.mkstemp(dir=member_path) + cmor_manager = CMORManager(self.config) + cmor_manager.create_links('20010101', 1) + mock_create_link.assert_not_called() + + def test_create_links_meteofrance(self): + self._configure_meteofrance() + cmor_manager = CMORManager(self.config) + with self.assertRaises(ValueError): + cmor_manager.create_links('20010101', 1) + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_request_chunk(self, mock_get_file_path): + mock_get_file_path.return_value = '/path/to/file' + cmor_manager = CMORManager(self.config) + datafile = cmor_manager.request_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1) + self.assertEqual(datafile.remote_file, '/path/to/file') + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_declare_chunk(self, mock_get_file_path): + mock_get_file_path.return_value = '/path/to/file' + cmor_manager = CMORManager(self.config) + datafile = cmor_manager.declare_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1) + self.assertEqual(datafile.remote_file, '/path/to/file') + + mock_frequency = Mock() + datafile = cmor_manager.declare_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1, + frequency=mock_frequency) + self.assertEqual(datafile.remote_file, '/path/to/file') + + self.config.var_manager.get_variable.return_value = None + datafile = cmor_manager.declare_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1) + self.assertEqual(datafile.remote_file, '/path/to/file') + + + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_request_year(self, mock_get_file_path): + self.config.experiment.get_year_chunks.return_value = (1, 2) + mock_get_file_path.return_value = '/path/to/file' + mock_diagnostic = Mock() + cmor_manager = CMORManager(self.config) + datafile = cmor_manager.request_year(mock_diagnostic, ModelingRealms.ocean, 'var', '20010101', 1, 2000) + self.assertEqual(datafile.remote_file, '/path/to/file') + + @mock.patch('earthdiagnostics.cmormanager.CMORManager.get_file_path') + def test_declare_year(self, mock_get_file_path): + mock_get_file_path.return_value = '/path/to/file' + cmor_manager = CMORManager(self.config) + datafile = cmor_manager.declare_year(ModelingRealms.ocean, 'var', '20010101', 1, 2001) + self.assertEqual(datafile.remote_file, '/path/to/file') + + self.config.var_manager.get_variable.return_value = None + datafile = cmor_manager.declare_year(ModelingRealms.ocean, 'var', '20010101', 1, 2001) + self.assertEqual(datafile.remote_file, '/path/to/file') + + +class TestMergeYear(TestCase): + + def setUp(self): + self.data_manager = Mock() + + def test_generate_jobs(self): + self.assertIsNone(MergeYear.generate_jobs(None, None)) + + def test_eq(self): + self.assertEqual(MergeYear(self.data_manager, ModelingRealms.ocean, 'var', 'startdate', 1, 1998), + MergeYear(self.data_manager, ModelingRealms.ocean, 'var', 'startdate', 1, 1998)) + self.assertEqual(MergeYear(self.data_manager, ModelingRealms.ocean, 'var', 'startdate', 1, 1998, 'grid'), + MergeYear(self.data_manager, ModelingRealms.ocean, 'var', 'startdate', 1, 1998, 'grid')) + + + diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 419be475362582c7df34169f4a640748ad298e9e..b1ee9671af1d38017c22320ca37567adf74e3b8a 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -1,7 +1,10 @@ # coding=utf-8 from unittest import TestCase +import datetime +import mock +import os -from earthdiagnostics.config import CMORConfig, ConfigException, THREDDSConfig, ReportConfig, ExperimentConfig +from earthdiagnostics.config import CMORConfig, ConfigException, THREDDSConfig, ReportConfig, ExperimentConfig, Config from earthdiagnostics.frequency import Frequencies from earthdiagnostics.modelingrealm import ModelingRealms @@ -24,9 +27,10 @@ class VariableManagerMock(object): return var -class ParserMock(object): +class ParserMock(mock.Mock): - def __init__(self): + def __init__(self, **kwargs): + super(mock.Mock, self).__init__(**kwargs) self._values = {} def add_value(self, section, var, value): @@ -44,12 +48,14 @@ class ParserMock(object): def get_bool_option(self, section, var, default): return self.get_value(section, var, default) - def get_path_option(self, section, var, default): + def get_path_option(self, section, var, default=""): return self.get_value(section, var, default) def get_int_option(self, section, var, default=0): return self.get_value(section, var, default) + def get_choice_option(self, section, var, choices, default, ignore_case=True): + return self.get_value(section, var, default) def get_int_list_option(self, section, var, default=list(), separator=' '): try: @@ -66,6 +72,14 @@ class ParserMock(object): def get_option(self, section, var, default=None): return self.get_value(section, var, default) + def has_section(self, section): + start = '{0}:'.format(section) + return any(x.startswith(start) for x in self._values) + + def options(self, section): + start = '{0}:'.format(section) + return [x[len(start):] for x in self._values if x.startswith(start)] + class TestCMORConfig(TestCase): @@ -75,25 +89,26 @@ class TestCMORConfig(TestCase): def test_basic_config(self): config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.ocean, True) - self.assertEquals(config.atmosphere, True) - self.assertEquals(config.force, False) - self.assertEquals(config.force_untar, False) - self.assertEquals(config.use_grib, True) - self.assertEquals(config.activity, 'CMIP') - self.assertEquals(config.associated_experiment, 'to be filled') - self.assertEquals(config.associated_model, 'to be filled') - self.assertEquals(config.initialization_description, 'to be filled') - self.assertEquals(config.initialization_method, '1') - self.assertEquals(config.initialization_number, 1) - self.assertEquals(config.source, 'to be filled') - self.assertEquals(config.version, '') - self.assertEquals(config.physics_version, '1') - self.assertEquals(config.physics_description, 'to be filled') - self.assertEquals(config.filter_files, '') - self.assertEquals(config.default_atmos_grid, 'gr') - self.assertEquals(config.default_ocean_grid, 'gn') - self.assertEquals(config.min_cmorized_vars, 10) + self.assertEqual(config.ocean, True) + self.assertEqual(config.atmosphere, True) + self.assertEqual(config.force, False) + self.assertEqual(config.force_untar, False) + self.assertEqual(config.use_grib, True) + self.assertEqual(config.activity, 'CMIP') + self.assertEqual(config.associated_experiment, 'to be filled') + self.assertEqual(config.associated_model, 'to be filled') + self.assertEqual(config.initialization_description, 'to be filled') + self.assertEqual(config.initialization_method, '1') + self.assertEqual(config.initialization_number, 1) + self.assertEqual(config.source, 'to be filled') + self.assertEqual(config.version, '') + self.assertEqual(config.physics_version, '1') + self.assertEqual(config.physics_description, 'to be filled') + self.assertEqual(config.filter_files, '') + self.assertEqual(config.default_atmos_grid, 'gr') + self.assertEqual(config.default_ocean_grid, 'gn') + self.assertEqual(config.min_cmorized_vars, 10) + self.assertEqual(config.append_startdate, False) def test_cmorize(self): config = CMORConfig(self.mock_parser, self.var_manager) @@ -184,62 +199,70 @@ class TestCMORConfig(TestCase): def test_hourly_vars(self): config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.get_variables(Frequencies.six_hourly), {}) + self.assertEqual(config.get_variables(Frequencies.six_hourly), {}) self.mock_parser.add_value('CMOR', 'ATMOS_HOURLY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.get_variables(Frequencies.six_hourly), {128: None, + self.assertEqual(config.get_variables(Frequencies.six_hourly), {128: None, 129: '1', 130: '1,2', 131: '1,2,3,4,5,6,7,8,9', 132: '0,5'}) - self.assertEquals(config.get_levels(Frequencies.six_hourly, 128), None) - self.assertEquals(config.get_levels(Frequencies.six_hourly, 129), '1') - self.assertEquals(config.get_levels(Frequencies.six_hourly, 130), '1,2') - self.assertEquals(config.get_levels(Frequencies.six_hourly, 131), '1,2,3,4,5,6,7,8,9',) - self.assertEquals(config.get_levels(Frequencies.six_hourly, 132), '0,5') + self.assertEqual(config.get_levels(Frequencies.six_hourly, 128), None) + self.assertEqual(config.get_levels(Frequencies.six_hourly, 129), '1') + self.assertEqual(config.get_levels(Frequencies.six_hourly, 130), '1,2') + self.assertEqual(config.get_levels(Frequencies.six_hourly, 131), '1,2,3,4,5,6,7,8,9',) + self.assertEqual(config.get_levels(Frequencies.six_hourly, 132), '0,5') def test_daily_vars(self): config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.get_variables(Frequencies.daily), {}) + self.assertEqual(config.get_variables(Frequencies.daily), {}) self.mock_parser.add_value('CMOR', 'ATMOS_DAILY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.get_variables(Frequencies.daily), {128: None, + self.assertEqual(config.get_variables(Frequencies.daily), {128: None, 129: '1', 130: '1,2', 131: '1,2,3,4,5,6,7,8,9', 132: '0,5'}) - self.assertEquals(config.get_levels(Frequencies.daily, 128), None) - self.assertEquals(config.get_levels(Frequencies.daily, 129), '1') - self.assertEquals(config.get_levels(Frequencies.daily, 130), '1,2') - self.assertEquals(config.get_levels(Frequencies.daily, 131), '1,2,3,4,5,6,7,8,9',) - self.assertEquals(config.get_levels(Frequencies.daily, 132), '0,5') + self.assertEqual(config.get_levels(Frequencies.daily, 128), None) + self.assertEqual(config.get_levels(Frequencies.daily, 129), '1') + self.assertEqual(config.get_levels(Frequencies.daily, 130), '1,2') + self.assertEqual(config.get_levels(Frequencies.daily, 131), '1,2,3,4,5,6,7,8,9',) + self.assertEqual(config.get_levels(Frequencies.daily, 132), '0,5') def test_monthly_vars(self): config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.get_variables(Frequencies.monthly), {}) + self.assertEqual(config.get_variables(Frequencies.monthly), {}) self.mock_parser.add_value('CMOR', 'ATMOS_MONTHLY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') config = CMORConfig(self.mock_parser, self.var_manager) - self.assertEquals(config.get_variables(Frequencies.monthly), {128: None, + self.assertEqual(config.get_variables(Frequencies.monthly), {128: None, 129: '1', 130: '1,2', 131: '1,2,3,4,5,6,7,8,9', 132: '0,5'}) - self.assertEquals(config.get_levels(Frequencies.monthly, 128), None) - self.assertEquals(config.get_levels(Frequencies.monthly, 129), '1') - self.assertEquals(config.get_levels(Frequencies.monthly, 130), '1,2') - self.assertEquals(config.get_levels(Frequencies.monthly, 131), '1,2,3,4,5,6,7,8,9',) - self.assertEquals(config.get_levels(Frequencies.monthly, 132), '0,5') + self.assertEqual(config.get_levels(Frequencies.monthly, 128), None) + self.assertEqual(config.get_levels(Frequencies.monthly, 129), '1') + self.assertEqual(config.get_levels(Frequencies.monthly, 130), '1,2') + self.assertEqual(config.get_levels(Frequencies.monthly, 131), '1,2,3,4,5,6,7,8,9',) + self.assertEqual(config.get_levels(Frequencies.monthly, 132), '0,5') def test_bad_frequency_vars(self): config = CMORConfig(self.mock_parser, self.var_manager) with self.assertRaises(ValueError): - self.assertEquals(config.get_variables(Frequencies.climatology), {}) + config.get_variables(Frequencies.climatology) + + def test_requested_codes(self): + self.mock_parser.add_value('CMOR', 'ATMOS_HOURLY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') + self.mock_parser.add_value('CMOR', 'ATMOS_DAILY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') + self.mock_parser.add_value('CMOR', 'ATMOS_MONTHLY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') + config = CMORConfig(self.mock_parser, self.var_manager) + + self.assertEqual(config.get_requested_codes(), {128, 129, 130, 131, 132}) class TestTHREDDSConfig(TestCase): @@ -249,12 +272,12 @@ class TestTHREDDSConfig(TestCase): def test_basic_config(self): config = THREDDSConfig(self.mock_parser) - self.assertEquals(config.server_url, '') + self.assertEqual(config.server_url, '') def test_url(self): self.mock_parser.add_value('THREDDS', 'SERVER_URL', 'test_url') config = THREDDSConfig(self.mock_parser) - self.assertEquals(config.server_url, 'test_url') + self.assertEqual(config.server_url, 'test_url') class TestReportConfig(TestCase): @@ -264,17 +287,18 @@ class TestReportConfig(TestCase): def test_basic_config(self): config = ReportConfig(self.mock_parser) - self.assertEquals(config.path, '') - self.assertEquals(config.maximum_priority, 10) + self.assertEqual(config.path, '') + self.assertEqual(config.maximum_priority, 10) def test_path(self): self.mock_parser.add_value('REPORT', 'PATH', 'new_path') config = ReportConfig(self.mock_parser) - self.assertEquals(config.path, 'new_path') + self.assertEqual(config.path, 'new_path') def test_priority(self): + self.mock_parser.add_value('REPORT', 'MAXIMUM_PRIORITY', 3) config = ReportConfig(self.mock_parser) - self.assertEquals(config.maximum_priority, 3) + self.assertEqual(config.maximum_priority, 3) class TestExperimentConfig(TestCase): @@ -283,37 +307,225 @@ class TestExperimentConfig(TestCase): self.mock_parser = ParserMock() def test_basic_config(self): - config = ExperimentConfig(self.mock_parser) - - self.assertEquals(config.startdates, []) - self.assertEquals(config.members, []) - self.assertEquals(config.chunk_size, 0) - self.assertEquals(config.num_chunks, 0) - - self.assertEquals(config.atmos_grid, '') - self.assertEquals(config.atmos_timestep, 6) - self.assertEquals(config.ocean_timestep, 6) - - def test_cmor_version_required(self): - self.mock_parser.add_value('CMOR', 'VERSION', '20001101') - self.mock_parser.add_value('EXPERIMENT', 'DATA_CONVENTION', 'Primavera') - config = ExperimentConfig(self.mock_parser) - self.assertEquals(config.path, 'new_path') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + + self.assertEqual(config.startdates, []) + self.assertEqual(config.members, []) + self.assertEqual(config.chunk_size, 0) + self.assertEqual(config.num_chunks, 0) + + self.assertEqual(config.atmos_grid, '') + self.assertEqual(config.atmos_timestep, 6) + self.assertEqual(config.ocean_timestep, 6) + + def test_members(self): + self.mock_parser.add_value('EXPERIMENT', 'MEMBERS', 'fc0 1') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.members, [0, 1]) + + self.mock_parser.add_value('EXPERIMENT', 'MEMBERS', 'fc00 fc01') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.members, [0, 1]) + + self.mock_parser.add_value('EXPERIMENT', 'MEMBERS', 'fc1-fc3') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.members, [1, 2, 3]) + + self.mock_parser.add_value('EXPERIMENT', 'MEMBERS', '1-3') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.members, [1, 2, 3]) def test_startdates(self): self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '20001101 20011101') - config = ExperimentConfig(self.mock_parser) - self.assertEquals(config.startdates, ['20001101', '20011101']) + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20011101']) self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '200(0|1)1101') - config = ExperimentConfig(self.mock_parser) - self.assertEquals(config.startdates, ['20001101', '20011101']) + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20011101']) self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '200[0-2](02|05|08|11)01') - config = ExperimentConfig(self.mock_parser) - print(config.startdates) - self.assertEquals(config.startdates, [u'20000201', u'20000501', u'20000801', u'20001101', u'20010201', + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, [u'20000201', u'20000501', u'20000801', u'20001101', u'20010201', u'20010501', u'20010801', u'20011101', u'20020201', u'20020501', u'20020801', u'20021101']) + def test_auto_startdates(self): + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '{20001101,20011101,1Y}') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20011101']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '{20001101,20011101,6M} ') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20010501', '20011101']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '{20001101,20001201,1W}') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20001108', '20001115', '20001122', '20001129']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '{20001101,20001201,W}') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20001108', '20001115', '20001122', '20001129']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '{20001101,20001201,7D}') + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.startdates, ['20001101', '20001108', '20001115', '20001122', '20001129']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '{20001101,20001201,7F}') + with self.assertRaises(ConfigException): + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + + def test_get_member_str(self): + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.get_member_str(1), 'fc1') + + def test_get_full_years(self): + self.mock_parser.add_value('EXPERIMENT', 'CHUNK_SIZE', 3) + self.mock_parser.add_value('EXPERIMENT', 'CHUNKS', 15) + + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.get_full_years('20000601'), [2001, 2002, 2003, 2004]) + self.assertEqual(config.get_full_years('20000101'), [2000, 2001, 2002, 2003]) + + def test_get_year_chunks(self): + self.mock_parser.add_value('EXPERIMENT', 'CHUNK_SIZE', 3) + self.mock_parser.add_value('EXPERIMENT', 'CHUNKS', 13) + + config = ExperimentConfig() + config.parse_ini(self.mock_parser) + self.assertEqual(config.get_year_chunks('20000601', 2003), [11, 12, 13]) + self.assertEqual(config.get_year_chunks('20000601', 2001), [3, 4, 5, 6, 7]) + self.assertEqual(config.get_year_chunks('20000101', 2000), [1, 2, 3, 4]) + + self.assertEqual(config.get_year_chunks('20000601', 2000), [1, 2, 3]) + self.assertEqual(config.get_year_chunks('20000601', 1999), []) + + def test_get_chunk_list(self): + config = ExperimentConfig() + config.startdates = ('20010101', ) + config.members = (0, 1, 2) + config.chunk_list = [0] + config.num_chunks = 2 + self.assertEqual(config.get_chunk_list(), [('20010101', 0, 0), ('20010101', 1, 0), ('20010101', 2, 0)]) + + config.chunk_list = [] + self.assertEqual(config.get_chunk_list(), [('20010101', 0, 1), ('20010101', 0, 2), ('20010101', 1, 1), + ('20010101', 1, 2), ('20010101', 2, 1), ('20010101', 2, 2)]) + + def test_get_member_list(self): + config = ExperimentConfig() + config.startdates = ('20010101', ) + config.members = (0, 1, 2) + self.assertEqual(config.get_member_list(), [('20010101', 0), ('20010101', 1), ('20010101', 2)]) + + def test_get_chunk_start_str(self): + config = ExperimentConfig() + self.mock_parser.add_value('EXPERIMENT', 'CHUNK_SIZE', 12) + self.mock_parser.add_value('EXPERIMENT', 'CHUNKS', 3) + config.parse_ini(self.mock_parser) + self.assertEqual(config.get_chunk_start_str('20001101', 3), '20021101') + + def test_get_chunk_start_str_datetime(self): + config = ExperimentConfig() + self.mock_parser.add_value('EXPERIMENT', 'CHUNK_SIZE', 12) + self.mock_parser.add_value('EXPERIMENT', 'CHUNKS', 3) + date = datetime.datetime(year=2000, month=11, day=1) + config.parse_ini(self.mock_parser) + self.assertEqual(config.get_chunk_start_str(date, 3), '20021101') + + def test_get_chunk_end_str(self): + config = ExperimentConfig() + self.mock_parser.add_value('EXPERIMENT', 'CHUNK_SIZE', 12) + self.mock_parser.add_value('EXPERIMENT', 'CHUNKS', 3) + config.parse_ini(self.mock_parser) + self.assertEqual(config.get_chunk_end_str('20001101', 3), '20031101') + + +class TestConfig(TestCase): + + def setUp(self): + self.mock_parser = ParserMock() + self.mock_parser.add_value('DIAGNOSTICS', 'FREQUENCY', 'mon') + self.mock_parser.add_value('DIAGNOSTICS', 'DIAGS', 'diag1 diag2') + self.mock_parser.add_value('DIAGNOSTICS', 'SCRATCH_DIR', 'scratch') + self._environ = dict(os.environ) + + def tearDown(self): + os.environ.clear() + os.environ.update(self._environ) + + def _parse(self, config): + def mock_new(): + return self.mock_parser + + def mock_new_exp(): + mock_exp = mock.Mock() + mock_exp.expid = 'expid' + return mock_exp + with mock.patch('earthdiagnostics.config.ConfigParser', new=mock_new): + with mock.patch('earthdiagnostics.config.VariableManager'): + with mock.patch('earthdiagnostics.config.ExperimentConfig', new=mock_new_exp): + with mock.patch('earthdiagnostics.config.CMORConfig'): + with mock.patch('earthdiagnostics.config.THREDDSConfig'): + config.parse('path') + + def test_diags(self): + config = Config() + self.mock_parser.add_value('DIAGNOSTICS', 'DIAGS', 'diag1 diag2,opt1,opt2 # Commented diag') + self._parse(config) + self.assertEqual(config.get_commands(), (['diag1', 'diag2,opt1,opt2'])) + + def test_parse(self): + config = Config() + self._parse(config) + self.assertEqual(config.frequency, Frequencies.monthly) + self.assertEqual(config.auto_clean, True) + self.assertEqual(config.cdftools_path, '') + self.assertEqual(config.con_files, '') + self.assertEqual(config.data_adaptor, 'CMOR') + self.assertEqual(config.get_commands(), (['diag1', 'diag2'])) + + def test_alias(self): + config = Config() + self.mock_parser.add_value('ALIAS', 'diag1', 'diag3') + self._parse(config) + self.assertEqual(config.get_commands(), ['diag3', 'diag2']) + + def test_auto_clean_ram_disk(self): + config = Config() + self.mock_parser.add_value('DIAGNOSTICS', 'AUTO_CLEAN', False) + self.mock_parser.add_value('DIAGNOSTICS', 'USE_RAMDISK', True) + self._parse(config) + self.assertEqual(config.auto_clean, True) + self.assertEqual(config.use_ramdisk, True) + + def test_data_convention_primavera(self): + config = Config() + self.mock_parser.add_value('DIAGNOSTICS', 'DATA_CONVENTION', 'primavera') + self._parse(config) + self.assertEqual(config.data_convention, 'primavera') + self.assertEqual(config.scratch_masks, '/scratch/Earth/ocean_masks/primavera') + namelist = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', + 'earthdiagnostics/CDFTOOLS_primavera.namlist')) + self.assertEqual(os.environ['NAM_CDF_NAMES'], namelist) + + + + diff --git a/test/unit/test_constants.py b/test/unit/test_constants.py index c538ee09d04f1308204d62eecf9361be2e475c33..943a2b3e722674948e22275ee23a11e612863545 100644 --- a/test/unit/test_constants.py +++ b/test/unit/test_constants.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.constants import Basin +from earthdiagnostics.constants import Basin, Basins class TestBasin(TestCase): @@ -10,11 +10,17 @@ class TestBasin(TestCase): self.basin = Basin('Basin') def test_name(self): - self.assertEquals('Basin', self.basin.name) + self.assertEqual('Basin', self.basin.name) def test__eq__(self): self.assertTrue(Basin('Basin') == self.basin) self.assertFalse(Basin('bas') == self.basin) def test__str__(self): - self.assertEquals(str(self.basin), 'Basin') + self.assertEqual(str(self.basin), 'Basin') + + +class TestBasins(TestCase): + + def test_singleton(self): + self.assertIs(Basins(), Basins()) diff --git a/test/unit/test_data_manager.py b/test/unit/test_data_manager.py index 6bdc84b39140e42b8613a147af1acee8e886297a..00d03b8c1edae697eb2aea6586e00a565466f33a 100644 --- a/test/unit/test_data_manager.py +++ b/test/unit/test_data_manager.py @@ -2,37 +2,68 @@ from unittest import TestCase +from mock import Mock + +from earthdiagnostics.datamanager import DataManager +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestDataManager(TestCase): + + def setUp(self): + self.config = Mock() + self.config.experiment.atmos_timestep = 3 + self.config.experiment.ocean_timestep = 6 + + self.data_manager = DataManager(self.config) + + def test_prepare(self): + self.data_manager.prepare() + + def test_create_link(self): + self.data_manager.create_link(None, '', None, '', '', False, Mock()) + + def test_link_file(self): + self.data_manager.link_file(None, '', None, '', 0, 0) + + def test_file_exists(self): + with self.assertRaises(NotImplementedError): + self.data_manager.file_exists(None, '', '', 0, 0) + + def test_request_chunk(self): + with self.assertRaises(NotImplementedError): + self.data_manager.request_chunk(None, '', '', 0, 0) + + def test_request_year(self): + with self.assertRaises(NotImplementedError): + self.data_manager.request_year(None, '', '', '', 0, 0) + + def test_declare_chunk(self): + with self.assertRaises(NotImplementedError): + self.data_manager.declare_chunk(None, '', '', 0, 0) + + def test_declare_year(self): + with self.assertRaises(NotImplementedError): + self.data_manager.declare_year(None, '', '', 0, 0) + + def test_get_varfolder(self): + self.assertEqual(self.data_manager.get_varfolder(ModelingRealms.ocean, 'var'), + 'var_f6h') + self.assertEqual(self.data_manager.get_varfolder(ModelingRealms.atmos, 'var'), + 'var_f3h') + frequency = Mock() + frequency.__str__ = Mock() + frequency.__str__.return_value = '3hr' + frequency.frequency = '3hr' + self.assertEqual(self.data_manager.get_varfolder(ModelingRealms.atmos, 'var', frequency=frequency), + 'var') + + def test_get_final_varname(self): + self.assertEqual(self.data_manager._get_final_var_name(None, 'var'), 'var') + box = Mock() + box.get_lon_str.return_value = '_lon' + box.get_lat_str.return_value = '_lat' + box.get_depth_str.return_value = '_depth' + self.assertEqual(self.data_manager._get_final_var_name(box, 'var'), 'var_lon_lat_depth') + -from earthdiagnostics.datamanager import UnitConversion - - -class TestConversion(TestCase): - - def test__init(self): - conversion = UnitConversion('km', 'm', 1000, 0) - self.assertEqual(conversion.source, 'km') - self.assertEqual(conversion.destiny, 'm') - self.assertEqual(conversion.factor, 1000) - self.assertEqual(conversion.offset, 0) - - def test_add_conversion(self): - UnitConversion._dict_conversions = dict() - conversion = UnitConversion('km', 'm', 1000, 0) - UnitConversion.add_conversion(conversion) - self.assertIs(UnitConversion._dict_conversions[('km', 'm')], conversion) - UnitConversion._dict_conversions = dict() - - def test_get_factor_offset(self): - UnitConversion._dict_conversions = dict() - conversion = UnitConversion('km', 'm', 1000, 0) - UnitConversion.add_conversion(conversion) - self.assertEqual(UnitConversion.get_conversion_factor_offset('km', 'm'), (1000, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('m', 'km'), (1 / 1000.0, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('1e3 m', 'km'), (1, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('10^3 m', 'km'), (1, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('km', '1e3 m'), (1, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('km', '10^3 m'), (1, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('m', 'm'), (1, 0)) - self.assertEqual(UnitConversion.get_conversion_factor_offset('m²', 'km'), (None, None)) - - UnitConversion._dict_conversions = dict() diff --git a/test/unit/test_datafile.py b/test/unit/test_datafile.py new file mode 100644 index 0000000000000000000000000000000000000000..0b47650e6f28a3fc78a7bce7d17e6c76ad6c7a96 --- /dev/null +++ b/test/unit/test_datafile.py @@ -0,0 +1,82 @@ +# coding=utf-8 +from unittest import TestCase +from mock import Mock + +from earthdiagnostics.datafile import UnitConversion, DataFile, LocalStatus, StorageStatus + + +class TestConversion(TestCase): + + def test__init(self): + conversion = UnitConversion('km', 'm', 1000, 0) + self.assertEqual(conversion.source, 'km') + self.assertEqual(conversion.destiny, 'm') + self.assertEqual(conversion.factor, 1000) + self.assertEqual(conversion.offset, 0) + + def test_add_conversion(self): + UnitConversion._dict_conversions = dict() + conversion = UnitConversion('km', 'm', 1000, 0) + UnitConversion.add_conversion(conversion) + self.assertIs(UnitConversion._dict_conversions[('km', 'm')], conversion) + UnitConversion._dict_conversions = dict() + + def test_get_factor_offset(self): + UnitConversion._dict_conversions = dict() + conversion = UnitConversion('km', 'm', 1000, 0) + UnitConversion.add_conversion(conversion) + self.assertEqual(UnitConversion.get_conversion_factor_offset('km', 'm'), (1000, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('m', 'km'), (1 / 1000.0, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('1e3 m', 'km'), (1, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('10^3 m', 'km'), (1, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('km', '1e3 m'), (1, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('km', '10^3 m'), (1, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('m', 'm'), (1, 0)) + self.assertEqual(UnitConversion.get_conversion_factor_offset('m²', 'km'), (None, None)) + + UnitConversion._dict_conversions = dict() + + +class TestDatafile(TestCase): + + def setUp(self): + self.data_file = DataFile() + + def test_download_required(self): + self.assertFalse(self.data_file.download_required()) + + self.data_file.local_status = LocalStatus.READY + self.assertFalse(self.data_file.download_required()) + + self.data_file.local_status = LocalStatus.COMPUTING + self.assertFalse(self.data_file.download_required()) + + self.data_file.local_status = LocalStatus.PENDING + self.data_file.storage_status = StorageStatus.READY + self.assertTrue(self.data_file.download_required()) + + self.data_file.local_status = LocalStatus.PENDING + self.data_file.storage_status = StorageStatus.UPLOADING + self.assertFalse(self.data_file.download_required()) + + def test_upload_required(self): + self.assertFalse(self.data_file.upload_required()) + + self.data_file.local_status = LocalStatus.READY + self.assertFalse(self.data_file.upload_required()) + + self.data_file.local_status = LocalStatus.COMPUTING + self.assertFalse(self.data_file.upload_required()) + + self.data_file.local_status = LocalStatus.READY + self.data_file.storage_status = StorageStatus.PENDING + self.assertTrue(self.data_file.upload_required()) + + self.data_file.local_status = LocalStatus.PENDING + self.data_file.storage_status = StorageStatus.READY + self.assertFalse(self.data_file.upload_required()) + + def test_add_modifier(self): + self.assertFalse(self.data_file.has_modifiers()) + self.data_file.add_modifier(Mock()) + self.assertTrue(self.data_file.has_modifiers()) diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index ae9921fa920affc0833b87dffbdf5d5899c6b806..b1ff6c82e7fef19c093785783716f0f9fb3e40e1 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -271,7 +271,7 @@ class TestDiagnosticVariableListOption(TestCase): var_manager_mock = Mock() var_manager_mock.get_variable.side_effect = (self.get_var_mock('var1'), self.get_var_mock('var2')) diag = DiagnosticVariableListOption(var_manager_mock, 'variables') - self.assertEqual(['var1', 'var2'], diag.parse('var1-var2')) + self.assertEqual(['var1', 'var2'], diag.parse('var1:var2')) def test_parse_one(self): var_manager_mock = Mock() @@ -362,7 +362,7 @@ class TestDiagnostic(TestCase): Diagnostic(None).compute() def test_repr(self): - self.assertEquals(Diagnostic(None).__repr__(), str(Diagnostic(None))) + self.assertEqual(Diagnostic(None).__repr__(), str(Diagnostic(None))) def test_empty_process_options(self): self.assertEqual(len(Diagnostic.process_options(('diag_name',), tuple())), 0) diff --git a/test/unit/test_earthdiags.py b/test/unit/test_earthdiags.py index 122793ec6e24f2c02de20bfbb1c7f2d8132d9d4f..95468b71af8911089fd5083b6f05a1ef04baa881 100644 --- a/test/unit/test_earthdiags.py +++ b/test/unit/test_earthdiags.py @@ -1,8 +1,15 @@ -# # coding=utf-8 -# from unittest import TestCase +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.earthdiags import EarthDiags + # -# from earthdiagnostics.earthdiags import EarthDiags +# class TestEarthDiags(TestCase): # +# def setUp(self): +# self.earthdiags = EarthDiags('path/to/conf') # -# class TestEarthDiags(TestCase): -# pass +# self.earthdiags.config() +# +# def test_clean(self): +# self.earthdiags.parse_args() diff --git a/test/unit/test_frequency.py b/test/unit/test_frequency.py index 34e32a3695d7f2f364100573dfbbbad5fe7cda7e..b92069e348857283a1bae08b7994181810170ed4 100644 --- a/test/unit/test_frequency.py +++ b/test/unit/test_frequency.py @@ -2,11 +2,16 @@ from unittest import TestCase from earthdiagnostics.frequency import Frequency -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class TestFrequency(TestCase): + def test_parse(self): + freq = Frequency('m') + self.assertEqual(Frequency.parse('m'), freq) + self.assertIs(Frequency.parse(freq), freq) + def test_not_supported(self): with self.assertRaises(ValueError): Frequency('badfreq') @@ -20,6 +25,9 @@ class TestFrequency(TestCase): def test_get_daily_mean(self): self.assertEqual(Frequency('d').folder_name(VariableType.MEAN), 'daily_mean') + def test_get_weekly_mean(self): + self.assertEqual(Frequency('w').folder_name(VariableType.MEAN), 'weekly_mean') + def test_get_daily_stats(self): self.assertEqual(Frequency('d').folder_name(VariableType.STATISTIC), 'daily_statistics') diff --git a/test/unit/test_modelling_realm.py b/test/unit/test_modelling_realm.py index eec9d0cec9a9055ef6df139f05bf97bd307aedf8..3752346f9d46106d6338064eec46d6b8a37ecf11 100644 --- a/test/unit/test_modelling_realm.py +++ b/test/unit/test_modelling_realm.py @@ -8,9 +8,9 @@ from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms class TestModellingRealms(TestCase): def test_parse(self): - self.assertEquals(ModelingRealms.parse('atmos'), ModelingRealms.atmos) - self.assertEquals(ModelingRealms.parse('atmoschem'), ModelingRealms.atmosChem) - self.assertEquals(ModelingRealms.parse('atmoSChem'), ModelingRealms.atmosChem) + self.assertEqual(ModelingRealms.parse('atmos'), ModelingRealms.atmos) + self.assertEqual(ModelingRealms.parse('atmoschem'), ModelingRealms.atmosChem) + self.assertEqual(ModelingRealms.parse('atmoSChem'), ModelingRealms.atmosChem) with self.assertRaises(ValueError): ModelingRealms.parse('badrealm') @@ -20,6 +20,9 @@ class TestModellingRealm(TestCase): def setUp(self): self.realm = ModelingRealm('ocean') + def test__repr__(self): + self.assertEqual(str(self.realm), repr(self.realm)) + def test_constructor_fail_on_bad_realm(self): with self.assertRaises(ValueError): ModelingRealm('badrealm') diff --git a/test/unit/test_obsreconmanager.py b/test/unit/test_obsreconmanager.py new file mode 100644 index 0000000000000000000000000000000000000000..05d98f3527e9627a16607068f40a58af60163eb5 --- /dev/null +++ b/test/unit/test_obsreconmanager.py @@ -0,0 +1,142 @@ +# coding=utf-8 +import os +import shutil +import tempfile +from unittest import TestCase +import mock +from mock import Mock + +from earthdiagnostics.obsreconmanager import ObsReconManager +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestObsReconManager(TestCase): + + def setUp(self): + self.config = Mock() + self.config.data_convention = 'specs' + self.config.data_type = 'exp' + self.config.experiment.expid = 'expid' + self.config.experiment.model = 'model' + self.config.experiment.experiment_name = 'expname' + self.config.experiment.institute = 'institute' + self.config.experiment.member_count_start = 0 + self.config.experiment.chunk_size = 12 + self.config.experiment.num_chunks = 1 + self.config.experiment.calendar = 'standard' + self.config.experiment.atmos_timestep = 3 + self.config.experiment.ocean_timestep = 6 + + frequency = Mock() + frequency.__str__ = Mock() + frequency.__str__.return_value = 'mon' + frequency.folder_name.return_value = 'monthly_mean' + + self.config.frequency = frequency + + self.config.cmor.initialization_number = 1 + self.config.cmor.version = '' + self.config.cmor.default_ocean_grid = 'ocean_grid' + self.config.cmor.default_atmos_grid = 'atmos_grid' + self.config.cmor.activity = 'activity' + self.config.cmor.force = False + self.config.cmor.force_untar = False + + self.tmp_dir = tempfile.mkdtemp() + self._make_data_folder() + self.config.data_dir = self.tmp_dir + + def _make_data_folder(self): + os.makedirs(os.path.join(self.tmp_dir, self.config.data_type, self.config.experiment.institute, + self.config.experiment.model, self.config.experiment.expid)) + + def tearDown(self): + if os.path.exists(self.tmp_dir): + shutil.rmtree(self.tmp_dir) + + def test_not_data_dir(self): + shutil.rmtree(self.tmp_dir) + with self.assertRaises(Exception): + ObsReconManager(self.config) + + def test_bad_num_chunks(self): + self.config.experiment.chunk_size = 3 + self.config.data_type = 'obs' + self._make_data_folder() + with self.assertRaises(Exception): + ObsReconManager(self.config) + + def test_get_file_path(self): + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + frequency = Mock() + frequency.__str__ = Mock() + frequency.__str__.return_value = 'frequency' + frequency.folder_name.return_value = 'folder_name' + + manager = ObsReconManager(self.config) + self.assertEqual(manager.get_file_path('19900101', ModelingRealms.ocean, 'var', frequency, None), + os.path.join(self.tmp_dir, 'exp/institute/model/folder_name/var/var_19900101.nc')) + + def test_get_file_path_default_freq(self): + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + + manager = ObsReconManager(self.config) + self.assertEqual(manager.get_file_path('19900101', ModelingRealms.ocean, 'var', None, None), + os.path.join(self.tmp_dir, 'exp/institute/model/monthly_mean/var/var_19900101.nc')) + + def test_get_file_path_no_startdate(self): + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + + manager = ObsReconManager(self.config) + self.assertEqual(manager.get_file_path(None, ModelingRealms.ocean, 'var', None, None), + os.path.join(self.tmp_dir, 'exp/institute/model/monthly_mean/var/var.nc')) + + def test_get_file_path_recon_weekly(self): + cmor_var = Mock() + omon = Mock() + omon.name = 'Omon' + cmor_var.get_table.return_value = omon + + self.config.data_type = 'recon' + self.config.experiment.chunk_size = 1 + self._make_data_folder() + + manager = ObsReconManager(self.config) + self.assertEqual(manager.get_file_path('19900101', ModelingRealms.ocean, 'var', None, None), + os.path.join(self.tmp_dir, 'recon/institute/model/monthly_mean/var/var_199001.nc')) + + @mock.patch('earthdiagnostics.obsreconmanager.ObsReconManager.get_file_path') + def test_declare_chunk(self, mock_get_file_path): + mock_get_file_path.return_value = '/path/to/file' + cmor_manager = ObsReconManager(self.config) + datafile = cmor_manager.declare_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1) + self.assertEqual(datafile.remote_file, '/path/to/file') + + mock_frequency = Mock() + datafile = cmor_manager.declare_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1, + frequency=mock_frequency) + self.assertEqual(datafile.remote_file, '/path/to/file') + + self.config.var_manager.get_variable.return_value = None + datafile = cmor_manager.declare_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1) + self.assertEqual(datafile.remote_file, '/path/to/file') + + @mock.patch('earthdiagnostics.obsreconmanager.ObsReconManager.get_file_path') + def test_request_chunk(self, mock_get_file_path): + mock_get_file_path.return_value = '/path/to/file' + cmor_manager = ObsReconManager(self.config) + datafile = cmor_manager.request_chunk(ModelingRealms.ocean, 'var', '20010101', 1, 1) + self.assertEqual(datafile.remote_file, '/path/to/file') + + + + diff --git a/test/unit/test_publisher.py b/test/unit/test_publisher.py index 5fe325ff819c7840370abbab9e7cc41d8e09d326..a91248322496017267a5de99c41f79642c9f8a3b 100644 --- a/test/unit/test_publisher.py +++ b/test/unit/test_publisher.py @@ -35,3 +35,14 @@ class TestPublisher(TestCase): pub.dispatch(1, 2, 3) suscriber.callback.assert_called_with(1, 2, 3) + def test_only_suscriber(self): + suscriber = Mock() + pub = Publisher() + pub.subscribe(suscriber, callback=suscriber.callback) + self.assertTrue(pub.only_suscriber(suscriber)) + + suscriber2 = Mock() + pub.subscribe(suscriber2, callback=suscriber.callback) + self.assertFalse(pub.only_suscriber(suscriber)) + self.assertFalse(pub.only_suscriber(suscriber2)) + diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py index 70664ab41837731c0925b21b35b0f64376ad8f9f..2162cab9b43f653e2f30cfeee4be8d3f6ef6b7ef 100644 --- a/test/unit/test_utils.py +++ b/test/unit/test_utils.py @@ -11,17 +11,17 @@ class TestTempFile(TestCase): TempFile.prefix = 'prefix' def test_get(self): - self.assertEquals(TempFile.get('tempfile', clean=False), '/tmp/tempfile') - self.assertEquals(TempFile.get('tempfile2', clean=True), '/tmp/tempfile2') + self.assertEqual(TempFile.get('tempfile', clean=False), '/tmp/tempfile') + self.assertEqual(TempFile.get('tempfile2', clean=True), '/tmp/tempfile2') self.assertNotIn('/tmp/tempfile', TempFile.files) self.assertIn('/tmp/tempfile2', TempFile.files) TempFile.autoclean = True - self.assertEquals(TempFile.get('tempfile3'), '/tmp/tempfile3') + self.assertEqual(TempFile.get('tempfile3'), '/tmp/tempfile3') self.assertIn('/tmp/tempfile3', TempFile.files) TempFile.autoclean = False - self.assertEquals(TempFile.get('tempfile4'), '/tmp/tempfile4') + self.assertEqual(TempFile.get('tempfile4'), '/tmp/tempfile4') self.assertNotIn('/tmp/tempfile4', TempFile.files) with mock.patch('tempfile.mkstemp') as mkstemp_mock: @@ -46,7 +46,7 @@ class TestTempFile(TestCase): TempFile.get('tempfile') TempFile.get('tempfile2') TempFile.clean() - self.assertEquals(len(TempFile.files), 0) + self.assertEqual(len(TempFile.files), 0) class TestUtils(TestCase): diff --git a/test/unit/test_variable.py b/test/unit/test_variable.py index c53baa633654b4b31525c08e25dad727b52f863e..6fd11e7b85714b7b18dbe3d76663ad4efac66f3b 100644 --- a/test/unit/test_variable.py +++ b/test/unit/test_variable.py @@ -2,7 +2,7 @@ from mock import Mock from unittest import TestCase -from earthdiagnostics.variable import CMORTable, VariableAlias, Variable, VariableJsonException +from earthdiagnostics.variable import CMORTable, VariableAlias, Variable, VariableJsonException, VariableManager from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.constants import Basins from earthdiagnostics.frequency import Frequencies @@ -14,18 +14,41 @@ class TestCMORTable(TestCase): self.frequency = Mock() def test_str(self): - self.assertEquals(str(CMORTable('name', 'm', 'Month YEAR')), 'name') + self.assertEqual(str(CMORTable('name', 'm', 'Month YEAR')), 'name') + + def test_repr(self): + self.assertEqual(repr(CMORTable('name', 'm', 'Month YEAR')), 'name (m, Month YEAR)') + + def test_lt(self): + self.assertLess(CMORTable('a', 'm', 'Month YEAR'), CMORTable('b', 'm', 'Month YEAR')) class TestVariableAlias(TestCase): def test_str(self): alias = VariableAlias('alias') - self.assertEquals(str(alias), 'alias') + self.assertEqual(str(alias), 'alias') + alias.basin = 'basin' + self.assertEqual(str(alias), 'alias Basin: basin') + alias.grid = 'grid' + self.assertEqual(str(alias), 'alias Basin: basin Grid: grid') + + def test_eq(self): + alias = VariableAlias('alias') + self.assertEqual(alias, VariableAlias('alias')) + alias.basin = 'basin' + self.assertEqual(alias, VariableAlias('alias', 'basin')) + alias.grid = 'grid' + self.assertEqual(alias, VariableAlias('alias', 'basin', 'grid')) + self.assertNotEqual(alias, None) + + def test_ne(self): + alias = VariableAlias('alias') + self.assertNotEqual(alias, VariableAlias('alias2')) alias.basin = 'basin' - self.assertEquals(str(alias), 'alias Basin: basin') + self.assertNotEqual(alias, VariableAlias('alias', 'basin2')) alias.grid = 'grid' - self.assertEquals(str(alias), 'alias Basin: basin Grid: grid') + self.assertNotEqual(alias, VariableAlias('alias', 'basin', 'grid2')) class TestVariable(TestCase): @@ -182,3 +205,74 @@ class TestVariable(TestCase): var = Variable() with self.assertRaises(ValueError): var.get_table(Frequencies.daily, 'specs') + + +class TestVariableManager(TestCase): + + def setUp(self): + self.var_manager = VariableManager() + + def tearDown(self): + self.var_manager.clean() + + def test_load_primavera(self): + self.var_manager.load_variables('primavera') + + def test_load_cmip6(self): + self.var_manager.load_variables('cmip6') + + def test_load_specs(self): + self.var_manager.load_variables('specs') + + def test_load_preface(self): + self.var_manager.load_variables('preface') + + def test_bad_load(self): + with self.assertRaises(Exception): + self.var_manager.load_variables('badconvention') + + def test_get_variable(self): + var1 = self._get_var_mock('var1', ['var1_alias']) + self.var_manager.register_variable(var1) + self.var_manager.create_aliases_dict() + + self.assertIs(self.var_manager.get_variable('var1'), var1) + self.assertIs(self.var_manager.get_variable('var1_alias'), var1) + + self.assertIsNone(self.var_manager.get_variable('var2')) + self.assertIsNone(self.var_manager.get_variable('var2', True)) + + def test_get_variable_and_alias(self): + var1 = self._get_var_mock('var1', ['var1_alias']) + self.var_manager.register_variable(var1) + self.var_manager.create_aliases_dict() + + self.assertEqual(self.var_manager.get_variable_and_alias('var1'), (VariableAlias('var1'), var1)) + self.assertEqual(self.var_manager.get_variable_and_alias('var1_alias'), (VariableAlias('var1_alias'), var1)) + + self.assertEqual(self.var_manager.get_variable_and_alias('var2'), (None, None)) + self.assertEqual(self.var_manager.get_variable_and_alias('var2', True), (None, None)) + + def _get_var_mock(self, name, aliases): + var1 = Mock() + var1.short_name = name + + def get_alias_mock(alias): + alias_mock = Mock() + alias_mock.alias = alias + alias_mock.grid = None + alias_mock.basin = None + return alias_mock + + var1.known_aliases = [get_alias_mock(alias) for alias in [name] + aliases] + return var1 + + def test_get_all_variables(self): + var1 = self._get_var_mock('var1', ['var1_alias']) + var2 = self._get_var_mock('var2', ['var2_alias']) + + self.var_manager.register_variable(var1) + self.var_manager.register_variable(var2) + self.var_manager.create_aliases_dict() + + self.assertEqual(self.var_manager.get_all_variables(), [var1, var2]) diff --git a/test/unit/test_variable_type.py b/test/unit/test_variable_type.py index 28dd44f56c34599ce1c9d293d6a1a430e71709d7..5b98e6aef1ab19b87b19da8040fbe6ba818f5b46 100644 --- a/test/unit/test_variable_type.py +++ b/test/unit/test_variable_type.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.variable_type import VariableType +from earthdiagnostics.variable import VariableType class TestVariableType(TestCase): diff --git a/test/unit/test_workmanager.py b/test/unit/test_workmanager.py new file mode 100644 index 0000000000000000000000000000000000000000..ad5c88b64a2073169d534892f99a2041f3f9ae73 --- /dev/null +++ b/test/unit/test_workmanager.py @@ -0,0 +1,54 @@ +# coding=utf-8 +from unittest import TestCase +from earthdiagnostics.work_manager import Downloader, WorkManager +from earthdiagnostics.diagnostic import Diagnostic +from mock import Mock + + +class TestDownloader(TestCase): + + def setUp(self): + self.downloader = Downloader() + + def test_start_and_stop(self): + self.downloader.start() + self.assertTrue(self.downloader._thread.is_alive()) + self.downloader.shutdown() + self.assertFalse(self.downloader._thread.is_alive()) + + def test_submit(self): + datafile = Mock() + self.downloader.submit(datafile) + + def test_download(self): + datafile = Mock() + self.downloader.submit(datafile) + self.downloader.start() + self.downloader.shutdown() + + datafile.download.assert_called_once() + + +class TestWorkManager(TestCase): + + + def setUp(self): + self.config = Mock() + self.data_manager = Mock() + self.work_manager = WorkManager(self.config, self.data_manager) + + def test_prepare_job_list(self): + + class Diag1(Diagnostic): + alias = 'diag1' + + @classmethod + def generate_jobs(cls, diags, options): + cls.process_options(options, []) + diag = Diag1(self.data_manager) + diag.add_subjob(Diag1(self.data_manager)) + return (diag,) + + self.config.get_commands.return_value = ['diag1', 'baddiag', 'diag1,badoption'] + Diagnostic.register(Diag1) + self.work_manager.prepare_job_list()