diff --git a/CHANGELOG.md b/CHANGELOG.md index 9314db36784f5fbe0a0d49f42c993b9a301554bf..5d55066268e29f54f152e3812e772083687590e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,19 @@ # NES CHANGELOG -### 0.0.0 - 2022/04/20 - - - Skeleton \ No newline at end of file +### 0.0.1 +* Changes and new features: + * First beta release + * Open NetCDF: + * Regular Latitude-Longitude + * Rotated Lat-Lon + * Statistics: + * Daily_mean + * Daily_max + * Daily_min + * Last time step + * Parallelization: + * By time axis + * By Y axis + * By X axis + * Create NetCDF: + * Regular Latitude-Longitude \ No newline at end of file diff --git a/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd b/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd index 256a6b05f01553700688ba425987c5e52884b6d9..7b2fd04640aa2992d698f7e91096c94c676a4c83 100644 --- a/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd +++ b/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd @@ -30,8 +30,6 @@ module load cfunits/1.8-foss-2019b-Python-3.7.4 module load xarray/0.19.0-foss-2019b-Python-3.7.4 export PYTHONPATH=/gpfs/scratch/bsc32/bsc32538/NES_tests/NES:${PYTHONPATH} -export PYTHONPATH=/gpfs/scratch/bsc32/bsc32538/SNES_tests/NES/nes:${PYTHONPATH} - # DON'T USE ADDRESS BELOW. # DO USE TOKEN BELOW diff --git a/Jupyter_notebooks/NES_simple_test.ipynb b/Jupyter_notebooks/NES_simple_test.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b0eca7fee0afad70b6d657a84995bcc5155959e4 --- /dev/null +++ b/Jupyter_notebooks/NES_simple_test.ipynb @@ -0,0 +1,501 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NES - NetCDF for Earth Science" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from nes import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Open NetCDF" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 164 ms, sys: 120 ms, total: 284 ms\n", + "Wall time: 14.5 s\n" + ] + } + ], + "source": [ + "cams_file = \"/gpfs/scratch/bsc32/bsc32538/a4mg/nmmb-monarch/ARCHIVE/000/2022050312/MONARCH_d01_2022050312.nc\"\n", + "%time nessy = open_netcdf(path=cams_file, info=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nessy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time\n", + "NES.time : list of time steps (datetime)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[datetime.datetime(2022, 5, 3, 12, 0),\n", + " datetime.datetime(2022, 5, 3, 13, 0),\n", + " datetime.datetime(2022, 5, 3, 14, 0),\n", + " datetime.datetime(2022, 5, 3, 15, 0),\n", + " datetime.datetime(2022, 5, 3, 16, 0),\n", + " datetime.datetime(2022, 5, 3, 17, 0),\n", + " datetime.datetime(2022, 5, 3, 18, 0),\n", + " datetime.datetime(2022, 5, 3, 19, 0),\n", + " datetime.datetime(2022, 5, 3, 20, 0),\n", + " datetime.datetime(2022, 5, 3, 21, 0),\n", + " datetime.datetime(2022, 5, 3, 22, 0),\n", + " datetime.datetime(2022, 5, 3, 23, 0),\n", + " datetime.datetime(2022, 5, 4, 0, 0),\n", + " datetime.datetime(2022, 5, 4, 1, 0),\n", + " datetime.datetime(2022, 5, 4, 2, 0),\n", + " datetime.datetime(2022, 5, 4, 3, 0),\n", + " datetime.datetime(2022, 5, 4, 4, 0),\n", + " datetime.datetime(2022, 5, 4, 5, 0),\n", + " datetime.datetime(2022, 5, 4, 6, 0),\n", + " datetime.datetime(2022, 5, 4, 7, 0),\n", + " datetime.datetime(2022, 5, 4, 8, 0),\n", + " datetime.datetime(2022, 5, 4, 9, 0),\n", + " datetime.datetime(2022, 5, 4, 10, 0),\n", + " datetime.datetime(2022, 5, 4, 11, 0),\n", + " datetime.datetime(2022, 5, 4, 12, 0),\n", + " datetime.datetime(2022, 5, 4, 13, 0),\n", + " datetime.datetime(2022, 5, 4, 14, 0),\n", + " datetime.datetime(2022, 5, 4, 15, 0),\n", + " datetime.datetime(2022, 5, 4, 16, 0),\n", + " datetime.datetime(2022, 5, 4, 17, 0),\n", + " datetime.datetime(2022, 5, 4, 18, 0),\n", + " datetime.datetime(2022, 5, 4, 19, 0),\n", + " datetime.datetime(2022, 5, 4, 20, 0),\n", + " datetime.datetime(2022, 5, 4, 21, 0),\n", + " datetime.datetime(2022, 5, 4, 22, 0),\n", + " datetime.datetime(2022, 5, 4, 23, 0),\n", + " datetime.datetime(2022, 5, 5, 0, 0),\n", + " datetime.datetime(2022, 5, 5, 1, 0),\n", + " datetime.datetime(2022, 5, 5, 2, 0),\n", + " datetime.datetime(2022, 5, 5, 3, 0),\n", + " datetime.datetime(2022, 5, 5, 4, 0),\n", + " datetime.datetime(2022, 5, 5, 5, 0),\n", + " datetime.datetime(2022, 5, 5, 6, 0),\n", + " datetime.datetime(2022, 5, 5, 7, 0),\n", + " datetime.datetime(2022, 5, 5, 8, 0),\n", + " datetime.datetime(2022, 5, 5, 9, 0),\n", + " datetime.datetime(2022, 5, 5, 10, 0),\n", + " datetime.datetime(2022, 5, 5, 11, 0),\n", + " datetime.datetime(2022, 5, 5, 12, 0),\n", + " datetime.datetime(2022, 5, 5, 13, 0),\n", + " datetime.datetime(2022, 5, 5, 14, 0),\n", + " datetime.datetime(2022, 5, 5, 15, 0),\n", + " datetime.datetime(2022, 5, 5, 16, 0),\n", + " datetime.datetime(2022, 5, 5, 17, 0),\n", + " datetime.datetime(2022, 5, 5, 18, 0),\n", + " datetime.datetime(2022, 5, 5, 19, 0),\n", + " datetime.datetime(2022, 5, 5, 20, 0),\n", + " datetime.datetime(2022, 5, 5, 21, 0),\n", + " datetime.datetime(2022, 5, 5, 22, 0),\n", + " datetime.datetime(2022, 5, 5, 23, 0),\n", + " datetime.datetime(2022, 5, 6, 0, 0),\n", + " datetime.datetime(2022, 5, 6, 1, 0),\n", + " datetime.datetime(2022, 5, 6, 2, 0),\n", + " datetime.datetime(2022, 5, 6, 3, 0),\n", + " datetime.datetime(2022, 5, 6, 4, 0),\n", + " datetime.datetime(2022, 5, 6, 5, 0),\n", + " datetime.datetime(2022, 5, 6, 6, 0),\n", + " datetime.datetime(2022, 5, 6, 7, 0),\n", + " datetime.datetime(2022, 5, 6, 8, 0),\n", + " datetime.datetime(2022, 5, 6, 9, 0),\n", + " datetime.datetime(2022, 5, 6, 10, 0),\n", + " datetime.datetime(2022, 5, 6, 11, 0),\n", + " datetime.datetime(2022, 5, 6, 12, 0),\n", + " datetime.datetime(2022, 5, 6, 13, 0),\n", + " datetime.datetime(2022, 5, 6, 14, 0),\n", + " datetime.datetime(2022, 5, 6, 15, 0),\n", + " datetime.datetime(2022, 5, 6, 16, 0),\n", + " datetime.datetime(2022, 5, 6, 17, 0),\n", + " datetime.datetime(2022, 5, 6, 18, 0),\n", + " datetime.datetime(2022, 5, 6, 19, 0),\n", + " datetime.datetime(2022, 5, 6, 20, 0),\n", + " datetime.datetime(2022, 5, 6, 21, 0),\n", + " datetime.datetime(2022, 5, 6, 22, 0),\n", + " datetime.datetime(2022, 5, 6, 23, 0),\n", + " datetime.datetime(2022, 5, 7, 0, 0),\n", + " datetime.datetime(2022, 5, 7, 1, 0),\n", + " datetime.datetime(2022, 5, 7, 2, 0),\n", + " datetime.datetime(2022, 5, 7, 3, 0),\n", + " datetime.datetime(2022, 5, 7, 4, 0),\n", + " datetime.datetime(2022, 5, 7, 5, 0),\n", + " datetime.datetime(2022, 5, 7, 6, 0),\n", + " datetime.datetime(2022, 5, 7, 7, 0),\n", + " datetime.datetime(2022, 5, 7, 8, 0),\n", + " datetime.datetime(2022, 5, 7, 9, 0),\n", + " datetime.datetime(2022, 5, 7, 10, 0),\n", + " datetime.datetime(2022, 5, 7, 11, 0),\n", + " datetime.datetime(2022, 5, 7, 12, 0),\n", + " datetime.datetime(2022, 5, 7, 13, 0),\n", + " datetime.datetime(2022, 5, 7, 14, 0),\n", + " datetime.datetime(2022, 5, 7, 15, 0),\n", + " datetime.datetime(2022, 5, 7, 16, 0),\n", + " datetime.datetime(2022, 5, 7, 17, 0),\n", + " datetime.datetime(2022, 5, 7, 18, 0),\n", + " datetime.datetime(2022, 5, 7, 19, 0),\n", + " datetime.datetime(2022, 5, 7, 20, 0),\n", + " datetime.datetime(2022, 5, 7, 21, 0),\n", + " datetime.datetime(2022, 5, 7, 22, 0),\n", + " datetime.datetime(2022, 5, 7, 23, 0),\n", + " datetime.datetime(2022, 5, 8, 0, 0)]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nessy.time\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Level, Latitude, Longitude" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': masked_array(data=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n", + " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],\n", + " mask=False,\n", + " fill_value=999999,\n", + " dtype=int32),\n", + " 'dimensions': ('lm',),\n", + " 'units': '',\n", + " 'long_name': 'layer id'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nessy.lev" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': masked_array(\n", + " data=[[16.371021, 16.43293 , 16.494629, ..., 16.494629, 16.43293 ,\n", + " 16.371021],\n", + " [16.503883, 16.565914, 16.627739, ..., 16.627739, 16.565918,\n", + " 16.503883],\n", + " [16.636723, 16.69888 , 16.760828, ..., 16.760828, 16.698881,\n", + " 16.636723],\n", + " ...,\n", + " [58.41168 , 58.525536, 58.63936 , ..., 58.63936 , 58.525547,\n", + " 58.41168 ],\n", + " [58.49049 , 58.604454, 58.718372, ..., 58.718372, 58.604454,\n", + " 58.49049 ],\n", + " [58.56883 , 58.6829 , 58.796925, ..., 58.796925, 58.682903,\n", + " 58.56883 ]],\n", + " mask=False,\n", + " fill_value=1e+20,\n", + " dtype=float32),\n", + " 'dimensions': ('rlat', 'rlon'),\n", + " 'long_name': 'latitude',\n", + " 'units': 'degrees_north',\n", + " 'standard_name': 'latitude',\n", + " 'coordinates': 'lon lat'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nessy.lat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Variables\n", + "\n", + "- List of variables in lazy mode: No data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['lmp', 'IM', 'JM', 'LM', 'IHRST', 'I_PAR_STA', 'J_PAR_STA', 'NPHS', 'NCLOD', 'NHEAT', 'NPREC', 'NRDLW', 'NRDSW', 'NSRFC', 'AVGMAXLEN', 'MDRMINout', 'MDRMAXout', 'MDIMINout', 'MDIMAXout', 'IDAT', 'DXH', 'SG1', 'SG2', 'DSG1', 'DSG2', 'SGML1', 'SGML2', 'SLDPTH', 'ISLTYP', 'IVGTYP', 'NCFRCV', 'NCFRST', 'FIS', 'GLAT', 'GLON', 'PD', 'VLAT', 'VLON', 'ACPREC', 'CUPREC', 'MIXHT', 'PBLH', 'RLWTOA', 'RSWIN', 'U10', 'USTAR', 'V10', 'RMOL', 'T2', 'relative_humidity_2m', 'T', 'U', 'V', 'SH2O', 'SMC', 'STC', 'AERO_ACPREC', 'AERO_CUPREC', 'AERO_DEPDRY', 'AERO_OPT_R', 'DRE_SW_TOA', 'DRE_SW_SFC', 'DRE_LW_TOA', 'DRE_LW_SFC', 'ENG_SW_SFC', 'ADRYDEP', 'WETDEP', 'PH_NO2', 'HSUM', 'POLR', 'aerosol_optical_depth_dim', 'aerosol_optical_depth', 'satellite_AOD_dim', 'satellite_AOD', 'aerosol_loading_dim', 'aerosol_loading', 'clear_sky_AOD_dim', 'clear_sky_AOD', 'layer_thickness', 'mid_layer_pressure', 'interface_pressure', 'relative_humidity', 'mid_layer_height', 'mid_layer_height_agl', 'air_density', 'dry_pm10_mass', 'dry_pm2p5_mass', 'QC', 'QR', 'QS', 'QG', 'aero_dust_001', 'aero_dust_002', 'aero_dust_003', 'aero_dust_004', 'aero_dust_005', 'aero_dust_006', 'aero_dust_007', 'aero_dust_008', 'aero_ssa_001', 'aero_ssa_002', 'aero_ssa_003', 'aero_ssa_004', 'aero_ssa_005', 'aero_ssa_006', 'aero_ssa_007', 'aero_ssa_008', 'aero_om_001', 'aero_om_002', 'aero_om_003', 'aero_om_004', 'aero_om_005', 'aero_om_006', 'aero_bc_001', 'aero_bc_002', 'aero_so4_001', 'aero_no3_001', 'aero_no3_002', 'aero_no3_003', 'aero_nh4_001', 'aero_unsp_001', 'aero_unsp_002', 'aero_unsp_003', 'aero_unsp_004', 'aero_unsp_005', 'aero_pol_001', 'aero_pol_002', 'aero_pol_003', 'aero_pol_004', 'aero_pol_005', 'aero_pol_006', 'aero_pol_007', 'aero_pol_008', 'aero_pol_009', 'aero_pol_010', 'NO2', 'NO', 'O3', 'NO3', 'N2O5', 'HNO3', 'HONO', 'PNA', 'H2O2', 'NTR', 'ROOH', 'FORM', 'ALD2', 'ALDX', 'PAR', 'CO', 'MEPX', 'MEOH', 'FACD', 'PAN', 'PACD', 'AACD', 'PANX', 'OLE', 'ETH', 'IOLE', 'TOL', 'CRES', 'OPEN', 'MGLY', 'XYL', 'ISOP', 'ISPD', 'TERP', 'SO2', 'SULF', 'ETOH', 'ETHA', 'CL2', 'HOCL', 'FMCL', 'HCL', 'BENZENE', 'SESQ', 'NH3', 'DMS', 'SOAP_I', 'SOAP_T', 'SOAP_F', 'SOAP_A', 'O', 'O1D', 'OH', 'HO2', 'XO2', 'XO2N', 'MEO2', 'HCO3', 'C2O3', 'CXO3', 'ROR', 'TO2', 'TOLRO2', 'CRO', 'XYLRO2', 'ISOPRXN', 'TRPRXN', 'SULRXN', 'CL', 'CLO', 'TOLNRXN', 'TOLHRXN', 'XYLNRXN', 'XYLHRXN', 'BENZRO2', 'BNZNRXN', 'BNZHRXN', 'SESQRXN', 'aerosol_extinction_dim', 'aerosol_extinction_DUST_1', 'aerosol_extinction_DUST_2', 'aerosol_extinction_DUST_3', 'aerosol_extinction_DUST_4', 'aerosol_extinction_DUST_5', 'aerosol_extinction_DUST_6', 'aerosol_extinction_DUST_7', 'aerosol_extinction_DUST_8', 'aerosol_extinction_SALT_total', 'aerosol_extinction_OM_total', 'aerosol_extinction_BC_total', 'aerosol_extinction_SO4_total', 'aerosol_extinction_NO3_total', 'aerosol_extinction_NH4_total', 'aerosol_extinction_UNSPC_1', 'aerosol_extinction_UNSPC_2', 'aerosol_extinction_UNSPC_3', 'aerosol_extinction_UNSPC_4', 'aerosol_extinction_UNSPC_5', 'aerosol_extinction_POLLEN_total'])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nessy.variables.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'O3': {'data': None,\n", + " 'dimensions': ('time', 'lm', 'rlat', 'rlon'),\n", + " 'long_name': 'TRACERS_054',\n", + " 'units': 'unknown',\n", + " 'standard_name': 'TRACERS_054',\n", + " 'coordinates': 'lon lat',\n", + " 'grid_mapping': 'rotated_pole'}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Selecting only one variable and descarting the rest.\n", + "nessy.keep_vars('O3')\n", + "nessy.variables" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rank 000: Loading O3 var (1/1)\n", + "Rank 000: Loaded O3 var ((109, 24, 361, 467))\n", + "CPU times: user 1.22 s, sys: 6.8 s, total: 8.02 s\n", + "Wall time: 41.7 s\n" + ] + } + ], + "source": [ + "# Loading variable data from NetCDF file\n", + "%time nessy.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(109, 24, 361, 467)\n", + "('time', 'lm', 'rlat', 'rlon')\n" + ] + } + ], + "source": [ + "print(nessy.variables['O3']['data'].shape)\n", + "print(nessy.variables['O3']['dimensions'])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.13 s, sys: 601 ms, total: 1.73 s\n", + "Wall time: 14.4 s\n" + ] + } + ], + "source": [ + "# Writing NetCDF\n", + "%time nessy.to_netcdf('o3_test.nc')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 440 ms, sys: 80.1 ms, total: 520 ms\n", + "Wall time: 522 ms\n" + ] + } + ], + "source": [ + "%time nessy.daily_statistic(op=\"mean\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(5, 24, 361, 467)\n", + "('time', 'lm', 'rlat', 'rlon')\n" + ] + } + ], + "source": [ + "print(nessy.variables['O3']['data'].shape)\n", + "print(nessy.variables['O3']['dimensions'])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 43 ms, sys: 32.2 ms, total: 75.1 ms\n", + "Wall time: 693 ms\n" + ] + } + ], + "source": [ + "%time nessy.to_netcdf('o3_daily_mean_test.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Metadata 'cell_methods': time: mean (interval: 1hr)\n", + "Time: [datetime.datetime(2022, 5, 3, 0, 0), datetime.datetime(2022, 5, 4, 0, 0), datetime.datetime(2022, 5, 5, 0, 0), datetime.datetime(2022, 5, 6, 0, 0), datetime.datetime(2022, 5, 7, 0, 0)]\n", + "Time bounds: 5\n", + "[datetime.datetime(2022, 5, 3, 12, 0), datetime.datetime(2022, 5, 3, 23, 0)]\n" + ] + } + ], + "source": [ + "print(\"Metadata 'cell_methods':\", nessy.variables['O3']['cell_methods'])\n", + "\n", + "print(\"Time:\", nessy.time)\n", + "print(\"Time bounds:\", len(nessy.time_bnds))\n", + "\n", + "print(nessy.time_bnds[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/README.md b/README.md index c49354d3c212bcb364b78046449d1ad191a2aa2f..95ad6c17febf3a5bd178894d65f3ee51bcaac208 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # NES -NetCDF for Earth Science \ No newline at end of file +NetCDF for Earth Science + +test local \ No newline at end of file diff --git a/nes/__init__.py b/nes/__init__.py index ec5b69a4a8a379747c9cd8a93d12feb59fff91ef..c91cab9952be3171cd2733482352f949d4253e94 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -1,4 +1,4 @@ __date__ = "2022-MM-DD" -__version__ = "0.0.0" +__version__ = "0.0.1" from nes.load_nes import open_netcdf diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 6e4163aecadd76160f063e4db6c1cca685eb3b09..6e0dd878e0baaff7f61bd137a98fcc01646fe4ef 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -15,6 +15,7 @@ from numpy.ma.core import MaskError from copy import deepcopy import datetime + class Nes(object): """ @@ -81,7 +82,7 @@ class Nes(object): Tuple with the name of the dimensions of the Longitude values """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None): + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create=False): """ Initialize the Nes class @@ -105,6 +106,7 @@ class Nes(object): Indicates the parallelization method that you want. Default over Y axis accepted values: ['Y', 'T'] """ + # MPI Initialization if comm is None: self.comm = MPI.COMM_WORLD @@ -153,7 +155,7 @@ class Nes(object): # Axis limits self.parallel_method = parallel_method - + self.read_axis_limits = self.set_read_axis_limits() # Dimensions screening @@ -162,7 +164,7 @@ class Nes(object): self.lat = self._get_coordinate_values(self._lat, 'Y') self.lon = self._get_coordinate_values(self._lon, 'X') self.time_bnds = self._time_bnds - + self.write_axis_limits = self.set_write_axis_limits() # NetCDF attributes @@ -286,8 +288,9 @@ class Nes(object): self._time_bnds = deepcopy(time_bnds) self.time_bnds = deepcopy(time_bnds) else: - msg = 'WARNING!!! ' - msg += 'The given time bounds list has a different length than the time array. Time bounds will not be set.' + msg = "WARNING!!! " + msg += "The given time bounds list has a different length than the time array. " + msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self._time), len(time_bnds)) warnings.warn(msg) else: msg = 'WARNING!!! ' @@ -296,29 +299,201 @@ class Nes(object): warnings.warn(msg) return None - def get_time_id(self, hours, first=True): + def free_vars(self, var_list): """ - Get the index of the corresponding time value. + Erase the selected variables from the variables information. Parameters ---------- - hours : int - Number of hours to avoid - first : bool - Indicates if youy want to avoid from the first hours (True) or from the last (False) - Default: True + var_list : list, str + List (or single string) of the variables to be loaded + """ + if isinstance(var_list, str): + var_list = [var_list] + + if self.is_xarray: + self.dataset = self.dataset.drop_vars(var_list) + self.variables = self.__get_lazy_variables() + else: + for var_name in var_list: + del self.variables[var_name] + return None + + def keep_vars(self, var_list): + """ + Keep the selected variables and erases the rest. + + Parameters + ---------- + var_list : list, str + List (or single string) of the variables to be loaded + """ + if isinstance(var_list, str): + var_list = [var_list] + + to_remove = list(set(self.variables.keys()).difference(set(var_list))) + + self.free_vars(to_remove) + return None + + def get_time_interval(self): + """ + Calculate the interrval of hours between time steps Returns ------- int - Possition of the time array + Number of hours between time steps + """ + time_interval = self._time[1] - self._time[0] + time_interval = int(time_interval.seconds // 3600) + return time_interval + + # ================================================================================================================== + # Statistics + # ================================================================================================================== + + def last_time_step(self): + """ + Modify variables to keep only the last time step + """ + if self.parallel_method == 'T': + raise NotImplementedError("Statistics are not implemented on time axis paralelitation method.") + aux_time = self._time[0].replace(hour=0, minute=0, second=0, microsecond=0) + self._time = [aux_time] + self.time = [aux_time] + + for var_name, var_info in self.variables.items(): + if var_info['data'] is None: + self.load(var_name) + aux_data = var_info['data'][-1, :] + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]['data'] = aux_data + self.hours_start = 0 + self.hours_end = 0 + return None + + def daily_statistic(self, op, type_op='calendar'): """ - from datetime import timedelta - if first: - idx = self._time.index(self._time[0] + timedelta(hours=hours)) + Calculate daily statistic + + Parameters + ---------- + op : str + Statistic to perform. Accepted values: "max", "mean" and "min" + type_op : str + Type of statistic to perform. Accepted values: "calendar", "alltsteps", and "withoutt0" + - "calendar": Calculate the statistic using the time metadata. + It will avoid single time step by day calculations + - "alltsteps": Calculate a single time statistic with all the time steps. + - "withoutt0": Calculate a single time statistic with all the time steps avoiding the first one. + """ + if self.parallel_method == 'T': + raise NotImplementedError("Statistics are not implemented on time axis paralelitation method.") + time_interval = self.get_time_interval() + if type_op == 'calendar': + aux_time_bounds = [] + aux_time = [] + day_list = [date_aux.day for date_aux in self.time] + for var_name, var_info in self.variables.items(): + if var_info['data'] is None: + self.load(var_name) + stat_data = None + for day in np.unique(day_list): + idx_first = next(i for i, val in enumerate(day_list, 0) if val == day) + idx_last = len(day_list) - next(i for i, val in enumerate(reversed(day_list), 1) if val == day) + if idx_first != idx_last: # To avoid single time step statistic + if idx_last != len(day_list): + if op == 'mean': + data_aux = var_info['data'][idx_first:idx_last + 1, :, :, :].mean(axis=0) + elif op == 'max': + data_aux = var_info['data'][idx_first:idx_last + 1, :, :, :].max(axis=0) + elif op == 'min': + data_aux = var_info['data'][idx_first:idx_last + 1, :, :, :].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + aux_time_bounds.append([self.time[idx_first], self.time[idx_last]]) + else: + if op == 'mean': + data_aux = data[idx_first:, :, :, :].mean(axis=0) + elif op == 'max': + data_aux = var_info['data'][idx_first:, :, :, :].max(axis=0) + elif op == 'min': + data_aux = var_info['data'][idx_first:, :, :, :].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + aux_time_bounds.append([self.time[idx_first], self.time[-1]]) + + data_aux = data_aux.reshape((1, data_aux.shape[0], data_aux.shape[1], data_aux.shape[2])) + aux_time.append(self.time[idx_first].replace(hour=0, minute=0, second=0)) + # Append over time dimension + if stat_data is None: + stat_data = data_aux.copy() + else: + stat_data = np.vstack([stat_data, data_aux]) + self.variables[var_name]['data'] = stat_data + self.variables[var_name]['cell_methods'] = "time: {0} (interval: {1}hr)".format(op, time_interval) + self.time = aux_time + self._time = self.time + + self.set_time_bnds(aux_time_bounds) + + elif type_op == 'alltsteps': + for var_name, var_info in self.variables.items(): + if var_info['data'] is None: + self.load(var_name) + if op == 'mean': + aux_data = var_info['data'].mean(axis=0) + elif op == 'max': + aux_data = var_info['data'].max(axis=0) + elif op == 'min': + aux_data = var_info['data'].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]['data'] = aux_data + self.variables[var_name]['cell_methods'] = "time: {0} (interval: {1}hr)".format(op, time_interval) + + aux_time = self.time[0].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[self.time[0], self.time[-1]]] + self.time = [aux_time] + self._time = self.time + + self.set_time_bnds(aux_time_bounds) + + elif type_op == 'withoutt0': + for var_name, var_info in self.variables.items (): + if var_info['data'] is None: + self.load(var_name) + if op == 'mean': + aux_data = var_info['data'][1:, :].mean(axis=0) + elif op == 'max': + aux_data = var_info['data'][1:, :].max(axis=0) + elif op == 'min': + aux_data = var_info['data'][1:, :].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]['data'] = aux_data + self.variables[var_name]['cell_methods'] = "time: {0} (interval: {1}hr)".format(op, time_interval) + aux_time = self._time[1].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[self._time[1], self._time[-1]]] + self.time = [aux_time] + self._time = self.time + + self.set_time_bnds(aux_time_bounds) else: - idx = self._time.index(self._time[-1] - timedelta(hours=hours)) + 1 - return idx + raise NotImplementedError("Statistic operation type '{0}' is not implemented.".format(type_op)) + self.hours_start = 0 + self.hours_end = 0 + return None + + # ================================================================================================================== + # Reading + # ================================================================================================================== def set_read_axis_limits(self): """ @@ -334,7 +509,7 @@ class Nes(object): 'y_min': None, 'y_max': None, 'z_min': None, 'z_max': None, 't_min': None, 't_max': None} - + if self.parallel_method == 'Y': y_len = self._lat['data'].shape[0] axis_limits['y_min'] = (y_len // self.size) * self.rank @@ -374,41 +549,29 @@ class Nes(object): axis_limits['z_max'] = self.last_level return axis_limits - def set_write_axis_limits(self): + def get_time_id(self, hours, first=True): """ - Calculate the 4D writing axis limits + Get the index of the corresponding time value. + + Parameters + ---------- + hours : int + Number of hours to avoid + first : bool + Indicates if youy want to avoid from the first hours (True) or from the last (False) + Default: True Returns ------- - dict - Dictionary with the 4D limits of the rank data to write. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max + int + Possition of the time array """ - axis_limits = {'x_min': None, 'x_max': None, - 'y_min': None, 'y_max': None, - 'z_min': None, 'z_max': None, - 't_min': None, 't_max': None} - - if self.parallel_method == 'Y': - y_len = self._lat['data'].shape[0] - axis_limits['y_min'] = (y_len // self.size) * self.rank - if self.rank + 1 < self.size: - axis_limits['y_max'] = (y_len // self.size) * (self.rank + 1) - elif self.parallel_method == 'X': - x_len = self._lon['data'].shape[-1] - axis_limits['x_min'] = (x_len // self.size) * self.rank - if self.rank + 1 < self.size: - axis_limits['x_max'] = (x_len // self.size) * (self.rank + 1) - elif self.parallel_method == 'T': - t_len = len(self._time) - axis_limits['t_min'] = ((t_len // self.size) * self.rank) - if self.rank + 1 < self.size: - axis_limits['t_max'] = (t_len // self.size) * (self.rank + 1) - + from datetime import timedelta + if first: + idx = self._time.index(self._time[0] + timedelta(hours=hours)) else: - raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=['X', 'Y', 'T'])) - return axis_limits + idx = self._time.index(self._time[-1] - timedelta(hours=hours)) + 1 + return idx def open(self): """ @@ -747,42 +910,45 @@ class Nes(object): gl_attrs[attrname] = getattr(self.netcdf, attrname) return gl_attrs - def free_vars(self, var_list): - """ - Erase the selected variables from the variables information. + # ================================================================================================================== + # Writing + # ================================================================================================================== - Parameters - ---------- - var_list : list, str - List (or single string) of the variables to be loaded - """ - if isinstance(var_list, str): - var_list = [var_list] - - if self.is_xarray: - self.dataset = self.dataset.drop_vars(var_list) - self.variables = self.__get_lazy_variables() - else: - for var_name in var_list: - del self.variables[var_name] - return None - - def keep_vars(self, var_list): + def set_write_axis_limits(self): """ - Keep the selected variables and erases the rest. + Calculate the 4D writing axis limits - Parameters - ---------- - var_list : list, str - List (or single string) of the variables to be loaded + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to write. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max """ - if isinstance(var_list, str): - var_list = [var_list] + axis_limits = {'x_min': None, 'x_max': None, + 'y_min': None, 'y_max': None, + 'z_min': None, 'z_max': None, + 't_min': None, 't_max': None} - to_remove = list(set(self.variables.keys()).difference(set(var_list))) + if self.parallel_method == 'Y': + y_len = self._lat['data'].shape[0] + axis_limits['y_min'] = (y_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits['y_max'] = (y_len // self.size) * (self.rank + 1) + elif self.parallel_method == 'X': + x_len = self._lon['data'].shape[-1] + axis_limits['x_min'] = (x_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits['x_max'] = (x_len // self.size) * (self.rank + 1) + elif self.parallel_method == 'T': + t_len = len(self._time) + axis_limits['t_min'] = ((t_len // self.size) * self.rank) + if self.rank + 1 < self.size: + axis_limits['t_max'] = (t_len // self.size) * (self.rank + 1) - self.free_vars(to_remove) - return None + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=['X', 'Y', 'T'])) + return axis_limits def _create_dimensions(self, netcdf): """