From fb35627d570c4cb379490a2d5f6d2385ea2dfd03 Mon Sep 17 00:00:00 2001 From: Okke van Eck Date: Tue, 16 Jan 2024 14:15:05 +0100 Subject: [PATCH] Initial version of ecTrans from https://github.com/ddegrauwe/ectrans at commit (so including) 53848f47e40f3b598ceda09634e1f840ab7e2ebb --- .github/tools/install-fftw.sh | 99 + .github/tools/install-intel-oneapi.sh | 14 + .github/tools/install-mpi.sh | 166 + .github/tools/install-nvhpc.sh | 107 + .github/tools/reduce-output.sh | 43 + .github/workflows/build.yml | 196 + AUTHORS | 27 + CMakeLists.txt | 208 + ENV_lumi | 25 + LICENSE | 190 + README.md | 137 +- README_lumi_lam.md | 344 ++ VERSION | 1 + .../FindCUDAToolkit.cmake | 1026 +++++ cmake/FindOpenACC.cmake | 309 ++ cmake/FindPackageHandleStandardArgs.cmake | 605 +++ cmake/FindPackageMessage.cmake | 48 + cmake/ectrans-import.cmake.in | 65 + cmake/ectrans_add_library.cmake | 35 + cmake/ectrans_compile_options.cmake | 53 + cmake/ectrans_find_lapack.cmake | 71 + cmake/ectrans_macros.cmake | 30 + ...rans_target_fortran_module_directory.cmake | 38 + cmake/project_summary.cmake | 53 + src/CMakeLists.txt | 17 + src/etrans/CMakeLists.txt | 17 + src/etrans/cpu/CMakeLists.txt | 61 + src/etrans/cpu/aux/ellips.F90 | 8 + src/etrans/cpu/aux/ellips.h | 87 + src/etrans/cpu/aux/ellips64.F90 | 8 + src/etrans/cpu/aux/extper_mod.F90 | 144 + src/etrans/cpu/external/edir_trans.F90 | 651 +++ src/etrans/cpu/external/edir_transad.F90 | 493 +++ src/etrans/cpu/external/edist_grid.F90 | 136 + src/etrans/cpu/external/edist_spec.F90 | 195 + src/etrans/cpu/external/egath_grid.F90 | 129 + src/etrans/cpu/external/egath_spec.F90 | 194 + src/etrans/cpu/external/egpnorm_trans.F90 | 93 + src/etrans/cpu/external/einv_trans.F90 | 769 ++++ src/etrans/cpu/external/einv_transad.F90 | 609 +++ src/etrans/cpu/external/esetup_trans.F90 | 301 ++ src/etrans/cpu/external/especnorm.F90 | 136 + src/etrans/cpu/external/etrans_end.F90 | 147 + src/etrans/cpu/external/etrans_inq.F90 | 539 +++ src/etrans/cpu/external/etrans_release.F90 | 51 + src/etrans/cpu/internal/cpl_int_mod.F90 | 33 + src/etrans/cpu/internal/easre1ad_mod.F90 | 80 + src/etrans/cpu/internal/easre1b_mod.F90 | 93 + src/etrans/cpu/internal/easre1bad_mod.F90 | 97 + .../cpu/internal/edealloc_resol_mod.F90 | 102 + .../cpu/internal/edir_trans_ctl_mod.F90 | 202 + .../cpu/internal/edir_trans_ctlad_mod.F90 | 194 + .../cpu/internal/edist_spec_control_mod.F90 | 3 + src/etrans/cpu/internal/efsc_mod.F90 | 110 + src/etrans/cpu/internal/efscad_mod.F90 | 121 + src/etrans/cpu/internal/eftdir_ctl_mod.F90 | 215 + src/etrans/cpu/internal/eftdir_ctlad_mod.F90 | 202 + src/etrans/cpu/internal/eftdirad_mod.F90 | 120 + src/etrans/cpu/internal/eftinv_ctl_mod.F90 | 274 ++ src/etrans/cpu/internal/eftinv_ctlad_mod.F90 | 296 ++ src/etrans/cpu/internal/eftinvad_mod.F90 | 129 + .../cpu/internal/egath_spec_control_mod.F90 | 204 + .../cpu/internal/einv_trans_ctl_mod.F90 | 298 ++ .../cpu/internal/einv_trans_ctlad_mod.F90 | 292 ++ src/etrans/cpu/internal/eledir_mod.F90 | 99 + src/etrans/cpu/internal/eledirad_mod.F90 | 118 + src/etrans/cpu/internal/eleinv_mod.F90 | 105 + src/etrans/cpu/internal/eleinvad_mod.F90 | 115 + src/etrans/cpu/internal/eltdir_ctl_mod.F90 | 121 + src/etrans/cpu/internal/eltdir_ctlad_mod.F90 | 109 + src/etrans/cpu/internal/eltdir_mod.F90 | 184 + src/etrans/cpu/internal/eltdirad_mod.F90 | 166 + src/etrans/cpu/internal/eltinv_ctl_mod.F90 | 138 + src/etrans/cpu/internal/eltinv_ctlad_mod.F90 | 116 + src/etrans/cpu/internal/eltinv_mod.F90 | 213 + src/etrans/cpu/internal/eltinvad_mod.F90 | 252 ++ src/etrans/cpu/internal/eprfi1_mod.F90 | 105 + src/etrans/cpu/internal/eprfi1ad_mod.F90 | 103 + src/etrans/cpu/internal/eprfi1b_mod.F90 | 110 + src/etrans/cpu/internal/eprfi1bad_mod.F90 | 110 + src/etrans/cpu/internal/eprfi2_mod.F90 | 85 + src/etrans/cpu/internal/eprfi2ad_mod.F90 | 82 + src/etrans/cpu/internal/eprfi2b_mod.F90 | 92 + src/etrans/cpu/internal/eprfi2bad_mod.F90 | 90 + src/etrans/cpu/internal/eset_resol_mod.F90 | 71 + src/etrans/cpu/internal/esetup_dims_mod.F90 | 46 + src/etrans/cpu/internal/esetup_geom_mod.F90 | 66 + src/etrans/cpu/internal/espnorm_ctl_mod.F90 | 64 + src/etrans/cpu/internal/espnormc_mod.F90 | 3 + src/etrans/cpu/internal/espnormd_mod.F90 | 55 + src/etrans/cpu/internal/espnsde_mod.F90 | 101 + src/etrans/cpu/internal/espnsdead_mod.F90 | 112 + src/etrans/cpu/internal/eupdsp_mod.F90 | 141 + src/etrans/cpu/internal/eupdspad_mod.F90 | 145 + src/etrans/cpu/internal/eupdspb_mod.F90 | 105 + src/etrans/cpu/internal/eupdspbad_mod.F90 | 133 + src/etrans/cpu/internal/euvtvd_comm_mod.F90 | 128 + src/etrans/cpu/internal/euvtvd_mod.F90 | 111 + src/etrans/cpu/internal/euvtvdad_mod.F90 | 128 + src/etrans/cpu/internal/evdtuv_mod.F90 | 125 + src/etrans/cpu/internal/evdtuvad_comm_mod.F90 | 163 + src/etrans/cpu/internal/evdtuvad_mod.F90 | 151 + src/etrans/cpu/internal/suefft_mod.F90 | 114 + src/etrans/cpu/internal/suemp_trans_mod.F90 | 267 ++ .../cpu/internal/suemp_trans_preleg_mod.F90 | 240 ++ src/etrans/cpu/internal/suemplat_mod.F90 | 252 ++ src/etrans/cpu/internal/suemplatb_mod.F90 | 236 ++ src/etrans/cpu/internal/suestaonl_mod.F90 | 451 ++ src/etrans/cpu/internal/tpmald_dim.F90 | 23 + src/etrans/cpu/internal/tpmald_distr.F90 | 23 + src/etrans/cpu/internal/tpmald_fft.F90 | 20 + src/etrans/cpu/internal/tpmald_fields.F90 | 17 + src/etrans/cpu/internal/tpmald_geo.F90 | 22 + src/etrans/cpu/internal/tpmald_tcdis.F90 | 13 + src/etrans/gpu/CMakeLists.txt | 113 + src/etrans/gpu/aux/ellips.F90 | 8 + src/etrans/gpu/aux/ellips.h | 87 + src/etrans/gpu/aux/ellips64.F90 | 8 + src/etrans/gpu/aux/extper_mod.F90 | 144 + src/etrans/gpu/external/edir_trans.F90 | 681 +++ src/etrans/gpu/external/edir_transad.F90 | 493 +++ src/etrans/gpu/external/edist_grid.F90 | 136 + src/etrans/gpu/external/edist_spec.F90 | 181 + src/etrans/gpu/external/egath_grid.F90 | 129 + src/etrans/gpu/external/egath_spec.F90 | 194 + src/etrans/gpu/external/egpnorm_trans.F90 | 445 ++ src/etrans/gpu/external/einv_trans.F90 | 815 ++++ src/etrans/gpu/external/einv_transad.F90 | 609 +++ src/etrans/gpu/external/esetup_trans.F90 | 430 ++ src/etrans/gpu/external/especnorm.F90 | 136 + src/etrans/gpu/external/etrans_end.F90 | 153 + src/etrans/gpu/external/etrans_inq.F90 | 539 +++ src/etrans/gpu/external/etrans_release.F90 | 51 + src/etrans/gpu/internal/cpl_int_mod.F90 | 33 + src/etrans/gpu/internal/easre1ad_mod.F90 | 80 + src/etrans/gpu/internal/easre1b_mod.F90 | 107 + src/etrans/gpu/internal/easre1bad_mod.F90 | 86 + .../gpu/internal/edealloc_resol_mod.F90 | 102 + .../gpu/internal/edir_trans_ctl_mod.F90 | 202 + .../gpu/internal/edir_trans_ctlad_mod.F90 | 194 + .../gpu/internal/edist_spec_control_mod.F90 | 202 + src/etrans/gpu/internal/efourier_in_mod.F90 | 81 + src/etrans/gpu/internal/efourier_out_mod.F90 | 76 + src/etrans/gpu/internal/efsc_mod.F90 | 121 + src/etrans/gpu/internal/efscad_mod.F90 | 121 + src/etrans/gpu/internal/eftdata_mod.F90 | 8 + src/etrans/gpu/internal/eftdir_ctl_mod.F90 | 229 + src/etrans/gpu/internal/eftdir_ctlad_mod.F90 | 192 + src/etrans/gpu/internal/eftdir_mod.F90 | 95 + src/etrans/gpu/internal/eftdirad_mod.F90 | 120 + src/etrans/gpu/internal/eftinv_ctl_mod.F90 | 305 ++ src/etrans/gpu/internal/eftinv_ctlad_mod.F90 | 284 ++ src/etrans/gpu/internal/eftinv_mod.F90 | 119 + src/etrans/gpu/internal/eftinvad_mod.F90 | 129 + .../gpu/internal/egath_spec_control_mod.F90 | 205 + .../gpu/internal/einv_trans_ctl_mod.F90 | 303 ++ .../gpu/internal/einv_trans_ctlad_mod.F90 | 292 ++ src/etrans/gpu/internal/eledir_mod.F90 | 111 + src/etrans/gpu/internal/eledirad_mod.F90 | 99 + src/etrans/gpu/internal/eleinv_mod.F90 | 141 + src/etrans/gpu/internal/eleinvad_mod.F90 | 95 + src/etrans/gpu/internal/eltdata_mod.F90 | 10 + src/etrans/gpu/internal/eltdir_ctl_mod.F90 | 128 + src/etrans/gpu/internal/eltdir_ctlad_mod.F90 | 104 + src/etrans/gpu/internal/eltdir_mod.F90 | 225 + src/etrans/gpu/internal/eltdirad_mod.F90 | 166 + src/etrans/gpu/internal/eltinv_ctl_mod.F90 | 147 + src/etrans/gpu/internal/eltinv_ctlad_mod.F90 | 113 + src/etrans/gpu/internal/eltinv_mod.F90 | 245 ++ src/etrans/gpu/internal/eltinvad_mod.F90 | 252 ++ src/etrans/gpu/internal/eprfi1_mod.F90 | 3 + src/etrans/gpu/internal/eprfi1ad_mod.F90 | 103 + src/etrans/gpu/internal/eprfi1b_mod.F90 | 131 + src/etrans/gpu/internal/eprfi1bad_mod.F90 | 110 + src/etrans/gpu/internal/eprfi2_mod.F90 | 83 + src/etrans/gpu/internal/eprfi2ad_mod.F90 | 82 + src/etrans/gpu/internal/eprfi2b_mod.F90 | 105 + src/etrans/gpu/internal/eprfi2bad_mod.F90 | 90 + src/etrans/gpu/internal/eset_resol_mod.F90 | 78 + src/etrans/gpu/internal/esetup_dims_mod.F90 | 46 + src/etrans/gpu/internal/esetup_geom_mod.F90 | 66 + src/etrans/gpu/internal/espnorm_ctl_mod.F90 | 61 + src/etrans/gpu/internal/espnormc_mod.F90 | 85 + src/etrans/gpu/internal/espnormd_mod.F90 | 55 + src/etrans/gpu/internal/espnsde_mod.F90 | 109 + src/etrans/gpu/internal/espnsdead_mod.F90 | 112 + src/etrans/gpu/internal/eupdsp_mod.F90 | 142 + src/etrans/gpu/internal/eupdspad_mod.F90 | 145 + src/etrans/gpu/internal/eupdspb_mod.F90 | 113 + src/etrans/gpu/internal/eupdspbad_mod.F90 | 133 + src/etrans/gpu/internal/euvtvd_comm_mod.F90 | 173 + src/etrans/gpu/internal/euvtvd_mod.F90 | 127 + src/etrans/gpu/internal/euvtvdad_mod.F90 | 128 + src/etrans/gpu/internal/evdtuv_mod.F90 | 188 + src/etrans/gpu/internal/evdtuvad_comm_mod.F90 | 154 + src/etrans/gpu/internal/evdtuvad_mod.F90 | 151 + src/etrans/gpu/internal/suefft_mod.F90 | 68 + src/etrans/gpu/internal/suemp_trans_mod.F90 | 267 ++ .../gpu/internal/suemp_trans_preleg_mod.F90 | 255 ++ src/etrans/gpu/internal/suemplat_mod.F90 | 252 ++ src/etrans/gpu/internal/suemplatb_mod.F90 | 236 ++ src/etrans/gpu/internal/suestaonl_mod.F90 | 462 ++ src/etrans/gpu/internal/tpmald_dim.F90 | 23 + src/etrans/gpu/internal/tpmald_distr.F90 | 27 + src/etrans/gpu/internal/tpmald_fft.F90 | 20 + src/etrans/gpu/internal/tpmald_fields.F90 | 20 + src/etrans/gpu/internal/tpmald_geo.F90 | 22 + src/etrans/gpu/internal/tpmald_tcdis.F90 | 13 + src/etrans/include/etrans/edir_trans.h | 135 + src/etrans/include/etrans/edir_transad.h | 131 + src/etrans/include/etrans/edist_grid.h | 57 + src/etrans/include/etrans/edist_spec.h | 59 + src/etrans/include/etrans/egath_grid.h | 56 + src/etrans/include/etrans/egath_spec.h | 64 + src/etrans/include/etrans/egpnorm_trans.h | 59 + src/etrans/include/etrans/einv_trans.h | 151 + src/etrans/include/etrans/einv_transad.h | 150 + src/etrans/include/etrans/esetup_trans.h | 86 + src/etrans/include/etrans/especnorm.h | 56 + src/etrans/include/etrans/etrans_end.h | 41 + src/etrans/include/etrans/etrans_inq.h | 172 + src/etrans/include/etrans/etrans_release.h | 6 + src/etrans/programs/CMakeLists.txt | 62 + src/etrans/programs/aatestprog.F90 | 566 +++ src/etrans/programs/test_adjoint.F90 | 377 ++ src/etrans/programs/trinfo.F90 | 209 + src/programs/CMakeLists.txt | 154 + src/programs/ectrans-benchmark.F90 | 1370 ++++++ src/programs/ectrans-lam-benchmark.F90 | 1493 +++++++ src/programs/ectrans.in | 119 + src/trans/CMakeLists.txt | 23 + src/trans/cpu/CMakeLists.txt | 73 + src/trans/cpu/algor/bluestein_mod.F90 | 387 ++ src/trans/cpu/algor/butterfly_alg_mod.F90 | 1222 ++++++ src/trans/cpu/algor/fft992.F90 | 2327 ++++++++++ src/trans/cpu/algor/fft992_cc.F90 | 139 + src/trans/cpu/algor/interpol_decomp_mod.F90 | 246 ++ src/trans/cpu/algor/seefmm_mix.F90 | 553 +++ src/trans/cpu/algor/set99.F90 | 82 + src/trans/cpu/algor/set99b.F90 | 81 + src/trans/cpu/algor/wts500_mod.F90 | 3765 +++++++++++++++++ src/trans/cpu/external/dir_trans.F90 | 508 +++ src/trans/cpu/external/dir_transad.F90 | 506 +++ src/trans/cpu/external/dist_grid.F90 | 147 + src/trans/cpu/external/dist_grid_32.F90 | 140 + src/trans/cpu/external/dist_spec.F90 | 214 + src/trans/cpu/external/gath_grid.F90 | 140 + src/trans/cpu/external/gath_grid_32.F90 | 140 + src/trans/cpu/external/gath_spec.F90 | 194 + src/trans/cpu/external/get_current.F90 | 67 + src/trans/cpu/external/gpnorm_trans.F90 | 99 + src/trans/cpu/external/ini_spec_dist.F90 | 90 + src/trans/cpu/external/inv_trans.F90 | 621 +++ src/trans/cpu/external/inv_transad.F90 | 619 +++ src/trans/cpu/external/setup_trans.F90 | 430 ++ src/trans/cpu/external/setup_trans0.F90 | 213 + src/trans/cpu/external/specnorm.F90 | 144 + src/trans/cpu/external/sugawc.F90 | 102 + src/trans/cpu/external/trans_end.F90 | 140 + src/trans/cpu/external/trans_inq.F90 | 535 +++ src/trans/cpu/external/trans_pnm.F90 | 199 + src/trans/cpu/external/trans_release.F90 | 61 + src/trans/cpu/external/vordiv_to_uv.F90 | 179 + src/trans/cpu/internal/abort_trans_mod.F90 | 39 + src/trans/cpu/internal/asre1_mod.F90 | 92 + src/trans/cpu/internal/asre1ad_mod.F90 | 92 + src/trans/cpu/internal/asre1b_mod.F90 | 107 + src/trans/cpu/internal/asre1bad_mod.F90 | 108 + src/trans/cpu/internal/cdmap_mod.F90 | 178 + src/trans/cpu/internal/cpledn_mod.F90 | 134 + src/trans/cpu/internal/dealloc_resol_mod.F90 | 209 + src/trans/cpu/internal/dir_trans_ctl_mod.F90 | 195 + .../cpu/internal/dir_trans_ctlad_mod.F90 | 194 + .../cpu/internal/dist_grid_32_ctl_mod.F90 | 262 ++ src/trans/cpu/internal/dist_grid_ctl_mod.F90 | 280 ++ .../cpu/internal/dist_spec_control_mod.F90 | 420 ++ src/trans/cpu/internal/eq_regions_mod.F90 | 443 ++ src/trans/cpu/internal/field_split_mod.F90 | 140 + src/trans/cpu/internal/fourier_in_mod.F90 | 74 + src/trans/cpu/internal/fourier_inad_mod.F90 | 74 + src/trans/cpu/internal/fourier_out_mod.F90 | 73 + src/trans/cpu/internal/fourier_outad_mod.F90 | 73 + src/trans/cpu/internal/fsc_mod.F90 | 192 + src/trans/cpu/internal/fscad_mod.F90 | 146 + src/trans/cpu/internal/fspgl_int_mod.F90 | 111 + src/trans/cpu/internal/ftdir_ctl_mod.F90 | 196 + src/trans/cpu/internal/ftdir_ctlad_mod.F90 | 187 + src/trans/cpu/internal/ftdir_mod.F90 | 124 + src/trans/cpu/internal/ftdirad_mod.F90 | 121 + src/trans/cpu/internal/ftinv_ctl_mod.F90 | 297 ++ src/trans/cpu/internal/ftinv_ctlad_mod.F90 | 305 ++ src/trans/cpu/internal/ftinv_mod.F90 | 116 + src/trans/cpu/internal/ftinvad_mod.F90 | 126 + .../cpu/internal/gath_grid_32_ctl_mod.F90 | 278 ++ src/trans/cpu/internal/gath_grid_ctl_mod.F90 | 296 ++ .../cpu/internal/gath_spec_control_mod.F90 | 233 + src/trans/cpu/internal/gawl_mod.F90 | 118 + .../cpu/internal/gpnorm_trans_ctl_mod.F90 | 457 ++ src/trans/cpu/internal/inigptr_mod.F90 | 92 + src/trans/cpu/internal/inv_trans_ctl_mod.F90 | 299 ++ .../cpu/internal/inv_trans_ctlad_mod.F90 | 296 ++ src/trans/cpu/internal/ldfou2_mod.F90 | 97 + src/trans/cpu/internal/ldfou2ad_mod.F90 | 97 + src/trans/cpu/internal/ledir_mod.F90 | 279 ++ src/trans/cpu/internal/ledirad_mod.F90 | 207 + src/trans/cpu/internal/leinv_mod.F90 | 221 + src/trans/cpu/internal/leinvad_mod.F90 | 197 + src/trans/cpu/internal/ltdir_ctl_mod.F90 | 109 + src/trans/cpu/internal/ltdir_ctlad_mod.F90 | 110 + src/trans/cpu/internal/ltdir_mod.F90 | 199 + src/trans/cpu/internal/ltdirad_mod.F90 | 188 + src/trans/cpu/internal/ltinv_ctl_mod.F90 | 152 + src/trans/cpu/internal/ltinv_ctlad_mod.F90 | 124 + src/trans/cpu/internal/ltinv_mod.F90 | 336 ++ src/trans/cpu/internal/ltinvad_mod.F90 | 239 ++ src/trans/cpu/internal/myrecvset_mod.F90 | 83 + src/trans/cpu/internal/mysendset_mod.F90 | 80 + src/trans/cpu/internal/pe2set_mod.F90 | 121 + src/trans/cpu/internal/pre_suleg_mod.F90 | 71 + src/trans/cpu/internal/prepsnm_mod.F90 | 86 + src/trans/cpu/internal/prfi1_mod.F90 | 115 + src/trans/cpu/internal/prfi1ad_mod.F90 | 113 + src/trans/cpu/internal/prfi1b_mod.F90 | 121 + src/trans/cpu/internal/prfi1bad_mod.F90 | 112 + src/trans/cpu/internal/prfi2_mod.F90 | 99 + src/trans/cpu/internal/prfi2ad_mod.F90 | 91 + src/trans/cpu/internal/prfi2b_mod.F90 | 99 + src/trans/cpu/internal/prfi2bad_mod.F90 | 99 + src/trans/cpu/internal/read_legpol_mod.F90 | 286 ++ src/trans/cpu/internal/set2pe_mod.F90 | 131 + src/trans/cpu/internal/set_resol_mod.F90 | 77 + src/trans/cpu/internal/setup_dims_mod.F90 | 50 + src/trans/cpu/internal/setup_geom_mod.F90 | 110 + src/trans/cpu/internal/shuffle_mod.F90 | 137 + src/trans/cpu/internal/spnorm_ctl_mod.F90 | 62 + src/trans/cpu/internal/spnormc_mod.F90 | 89 + src/trans/cpu/internal/spnormd_mod.F90 | 66 + src/trans/cpu/internal/spnsde_mod.F90 | 121 + src/trans/cpu/internal/spnsdead_mod.F90 | 119 + src/trans/cpu/internal/sufft_mod.F90 | 106 + src/trans/cpu/internal/sugaw_mod.F90 | 431 ++ src/trans/cpu/internal/suleg_mod.F90 | 1207 ++++++ src/trans/cpu/internal/sump_trans0_mod.F90 | 115 + src/trans/cpu/internal/sump_trans_mod.F90 | 276 ++ .../cpu/internal/sump_trans_preleg_mod.F90 | 149 + src/trans/cpu/internal/sumplat_mod.F90 | 256 ++ src/trans/cpu/internal/sumplatb_mod.F90 | 226 + src/trans/cpu/internal/sumplatbeq_mod.F90 | 289 ++ src/trans/cpu/internal/sumplatf_mod.F90 | 150 + src/trans/cpu/internal/supol_mod.F90 | 173 + src/trans/cpu/internal/supolf_mod.F90 | 284 ++ src/trans/cpu/internal/sustaonl_mod.F90 | 457 ++ src/trans/cpu/internal/sutrle_mod.F90 | 366 ++ src/trans/cpu/internal/suwavedi_mod.F90 | 183 + src/trans/cpu/internal/tpm_constants.F90 | 20 + src/trans/cpu/internal/tpm_ctl.F90 | 43 + src/trans/cpu/internal/tpm_dim.F90 | 51 + src/trans/cpu/internal/tpm_distr.F90 | 169 + src/trans/cpu/internal/tpm_fft.F90 | 37 + src/trans/cpu/internal/tpm_fftw.F90 | 505 +++ src/trans/cpu/internal/tpm_fields.F90 | 38 + src/trans/cpu/internal/tpm_flt.F90 | 74 + src/trans/cpu/internal/tpm_gen.F90 | 45 + src/trans/cpu/internal/tpm_geometry.F90 | 37 + src/trans/cpu/internal/tpm_pol.F90 | 123 + src/trans/cpu/internal/tpm_trans.F90 | 58 + src/trans/cpu/internal/trgtol_mod.F90 | 838 ++++ src/trans/cpu/internal/trltog_mod.F90 | 833 ++++ src/trans/cpu/internal/trltom_mod.F90 | 149 + src/trans/cpu/internal/trmtol_mod.F90 | 155 + src/trans/cpu/internal/updsp_mod.F90 | 166 + src/trans/cpu/internal/updspad_mod.F90 | 178 + src/trans/cpu/internal/updspb_mod.F90 | 155 + src/trans/cpu/internal/updspbad_mod.F90 | 160 + src/trans/cpu/internal/uvtvd_mod.F90 | 144 + src/trans/cpu/internal/uvtvdad_mod.F90 | 139 + src/trans/cpu/internal/vd2uv_ctl_mod.F90 | 81 + src/trans/cpu/internal/vd2uv_mod.F90 | 155 + src/trans/cpu/internal/vdtuv_mod.F90 | 149 + src/trans/cpu/internal/vdtuvad_mod.F90 | 145 + src/trans/cpu/internal/write_legpol_mod.F90 | 229 + src/trans/cpu/sharedmem/sharedmem.c | 28 + src/trans/cpu/sharedmem/sharedmem_mod.F90 | 315 ++ src/trans/gpu/#CMakeLists.txt# | 124 + src/trans/gpu/CMakeLists.txt | 145 + .../external/fourier/create_plan_ffth.hip.cpp | 189 + .../fourier/destroy_plan_ffth.hip.cpp | 95 + .../fourier/execute_plan_ffth.hip.cpp | 173 + .../external/fourier/execute_plan_ffth.hip.h | 10 + src/trans/gpu/algor/interface/dbfgsl.h | 16 + src/trans/gpu/algor/interface/dpseuclid.h | 11 + src/trans/gpu/algor/interface/dysave.h | 27 + src/trans/gpu/algor/interface/eigsol.h | 17 + src/trans/gpu/algor/interface/intavg.h | 9 + src/trans/gpu/algor/interface/layeravg.h | 9 + src/trans/gpu/algor/interface/minv.h | 13 + src/trans/gpu/algor/interface/minv_8.h | 13 + src/trans/gpu/algor/interface/minv_caller.h | 9 + src/trans/gpu/algor/interface/multvdv.h | 8 + src/trans/gpu/algor/interface/mxmaop.h | 17 + src/trans/gpu/algor/interface/mxptma.h | 16 + src/trans/gpu/algor/interface/mxtrma.h | 14 + src/trans/gpu/algor/interface/mxture.h | 16 + src/trans/gpu/algor/interface/mxturhd.h | 14 + src/trans/gpu/algor/interface/mxturs.h | 14 + src/trans/gpu/algor/interface/n1cg1.h | 40 + src/trans/gpu/algor/interface/n1cga.h | 53 + src/trans/gpu/algor/interface/si_mxptco.h | 16 + src/trans/gpu/algor/interface/simplico.h | 19 + src/trans/gpu/algor/interface/sublayer.h | 9 + src/trans/gpu/algor/interface/suher.h | 18 + src/trans/gpu/algor/interface/suhert.h | 14 + src/trans/gpu/algor/interface/suhes.h | 14 + src/trans/gpu/algor/interface/tridia.h | 25 + .../gpu/algor/module/butterfly_alg_mod.F90 | 1148 +++++ src/trans/gpu/algor/module/dilatation_mod.F90 | 485 +++ src/trans/gpu/algor/module/hip_device_mod.F90 | 56 + .../gpu/algor/module/hip_device_mod.F90~ | 40 + .../algor/module/hipblasDgemmBatched.hip.cpp | 141 + .../algor/module/hipblasSgemmBatched.hip.cpp | 125 + src/trans/gpu/algor/module/hipblas_mod.F90 | 158 + .../gpu/algor/module/interpol_decomp_mod.F90 | 245 ++ .../algor/module/rocblasDgemmBatched.hip.cpp | 162 + .../algor/module/rocblasSgemmBatched.hip.cpp | 155 + src/trans/gpu/algor/module/rocblas_mod.F90 | 158 + src/trans/gpu/algor/module/seefmm_mix.F90 | 548 +++ src/trans/gpu/algor/module/wts500_mod.F90 | 3764 ++++++++++++++++ src/trans/gpu/external/dir_trans.F90 | 536 +++ src/trans/gpu/external/dir_transad.F90 | 506 +++ src/trans/gpu/external/dist_grid.F90 | 147 + src/trans/gpu/external/dist_grid_32.F90 | 140 + src/trans/gpu/external/dist_spec.F90 | 201 + src/trans/gpu/external/gath_grid.F90 | 140 + src/trans/gpu/external/gath_grid_32.F90 | 140 + src/trans/gpu/external/gath_spec.F90 | 194 + src/trans/gpu/external/get_current.F90 | 67 + src/trans/gpu/external/gpnorm_trans.F90 | 522 +++ src/trans/gpu/external/ini_spec_dist.F90 | 90 + src/trans/gpu/external/inv_trans.F90 | 661 +++ src/trans/gpu/external/inv_transad.F90 | 619 +++ src/trans/gpu/external/setup_trans.F90 | 988 +++++ src/trans/gpu/external/setup_trans0.F90 | 257 ++ src/trans/gpu/external/specnorm.F90 | 144 + src/trans/gpu/external/sugawc.F90 | 102 + src/trans/gpu/external/trans_end.F90 | 203 + src/trans/gpu/external/trans_inq.F90 | 529 +++ src/trans/gpu/external/trans_pnm.F90 | 200 + src/trans/gpu/external/trans_release.F90 | 61 + src/trans/gpu/external/vordiv_to_uv.F90 | 179 + src/trans/gpu/internal/abort_trans_mod.F90 | 39 + src/trans/gpu/internal/asre1_mod.F90 | 98 + src/trans/gpu/internal/asre1ad_mod.F90 | 92 + src/trans/gpu/internal/asre1b_mod.F90 | 125 + src/trans/gpu/internal/asre1bad_mod.F90 | 108 + src/trans/gpu/internal/cdmap_mod.F90 | 178 + src/trans/gpu/internal/cpledn_mod.F90 | 134 + .../gpu/internal/cuda_gemm_batched_mod.F90 | 248 ++ src/trans/gpu/internal/dealloc_resol_mod.F90 | 192 + src/trans/gpu/internal/dir_trans_ctl_mod.F90 | 307 ++ .../gpu/internal/dir_trans_ctlad_mod.F90 | 194 + .../gpu/internal/dist_grid_32_ctl_mod.F90 | 258 ++ src/trans/gpu/internal/dist_grid_ctl_mod.F90 | 275 ++ .../gpu/internal/dist_spec_control_mod.F90 | 233 + src/trans/gpu/internal/eq_regions_mod.F90 | 443 ++ src/trans/gpu/internal/field_split_mod.F90 | 140 + src/trans/gpu/internal/fourier_in_mod.F90 | 118 + src/trans/gpu/internal/fourier_inad_mod.F90 | 74 + src/trans/gpu/internal/fourier_out_mod.F90 | 119 + src/trans/gpu/internal/fourier_outad_mod.F90 | 73 + src/trans/gpu/internal/fsc_mod.F90 | 226 + src/trans/gpu/internal/fscad_mod.F90 | 146 + src/trans/gpu/internal/fspgl_int_mod.F90 | 157 + src/trans/gpu/internal/ftdir_ctl_mod.F90 | 266 ++ src/trans/gpu/internal/ftdir_ctlad_mod.F90 | 187 + src/trans/gpu/internal/ftdir_mod.F90 | 180 + src/trans/gpu/internal/ftdirad_mod.F90 | 94 + src/trans/gpu/internal/ftinv_ctl_mod.F90 | 274 ++ src/trans/gpu/internal/ftinv_ctlad_mod.F90 | 293 ++ src/trans/gpu/internal/ftinv_mod.F90 | 176 + src/trans/gpu/internal/ftinvad_mod.F90 | 94 + .../gpu/internal/gath_grid_32_ctl_mod.F90 | 277 ++ src/trans/gpu/internal/gath_grid_ctl_mod.F90 | 290 ++ .../gpu/internal/gath_spec_control_mod.F90 | 233 + src/trans/gpu/internal/gawl_mod.F90 | 118 + src/trans/gpu/internal/gstats_label_ifs.F90 | 955 +++++ src/trans/gpu/internal/inigptr_mod.F90 | 92 + src/trans/gpu/internal/inv_trans_ctl_mod.F90 | 320 ++ .../gpu/internal/inv_trans_ctlad_mod.F90 | 296 ++ src/trans/gpu/internal/ldfou2_mod.F90 | 135 + src/trans/gpu/internal/ldfou2ad_mod.F90 | 97 + src/trans/gpu/internal/ledir_mod.F90 | 501 +++ src/trans/gpu/internal/ledirad_mod.F90 | 207 + src/trans/gpu/internal/leinv_mod.F90 | 353 ++ src/trans/gpu/internal/leinvad_mod.F90 | 197 + src/trans/gpu/internal/ltdir_ctl_mod.F90 | 122 + src/trans/gpu/internal/ltdir_ctlad_mod.F90 | 110 + src/trans/gpu/internal/ltdir_mod.F90 | 210 + src/trans/gpu/internal/ltdirad_mod.F90 | 189 + src/trans/gpu/internal/ltinv_ctl_mod.F90 | 141 + src/trans/gpu/internal/ltinv_ctlad_mod.F90 | 119 + src/trans/gpu/internal/ltinv_mod.F90 | 368 ++ src/trans/gpu/internal/ltinvad_mod.F90 | 236 ++ src/trans/gpu/internal/myrecvset_mod.F90 | 83 + src/trans/gpu/internal/mysendset_mod.F90 | 80 + src/trans/gpu/internal/parkind_ectrans.F90 | 38 + src/trans/gpu/internal/pe2set_mod.F90 | 121 + src/trans/gpu/internal/pre_suleg_mod.F90 | 71 + src/trans/gpu/internal/prepsnm_mod.F90 | 105 + src/trans/gpu/internal/prfi1_mod.F90 | 114 + src/trans/gpu/internal/prfi1ad_mod.F90 | 111 + src/trans/gpu/internal/prfi1b_mod.F90 | 224 + src/trans/gpu/internal/prfi1bad_mod.F90 | 112 + src/trans/gpu/internal/prfi2_mod.F90 | 100 + src/trans/gpu/internal/prfi2ad_mod.F90 | 91 + src/trans/gpu/internal/prfi2b_mod.F90 | 136 + src/trans/gpu/internal/prfi2bad_mod.F90 | 99 + src/trans/gpu/internal/read_legpol_mod.F90 | 286 ++ src/trans/gpu/internal/set2pe_mod.F90 | 131 + src/trans/gpu/internal/set_resol_mod.F90 | 72 + src/trans/gpu/internal/setup_dims_mod.F90 | 50 + src/trans/gpu/internal/setup_geom_mod.F90 | 110 + src/trans/gpu/internal/shuffle_mod.F90 | 137 + src/trans/gpu/internal/spnorm_ctl_mod.F90 | 62 + src/trans/gpu/internal/spnormc_mod.F90 | 90 + src/trans/gpu/internal/spnormd_mod.F90 | 66 + src/trans/gpu/internal/spnsde_mod.F90 | 211 + src/trans/gpu/internal/spnsdead_mod.F90 | 119 + src/trans/gpu/internal/sufft_mod.F90 | 47 + src/trans/gpu/internal/sugaw_mod.F90 | 431 ++ src/trans/gpu/internal/suleg_mod.F90 | 1207 ++++++ src/trans/gpu/internal/sump_trans0_mod.F90 | 115 + src/trans/gpu/internal/sump_trans_mod.F90 | 276 ++ .../gpu/internal/sump_trans_preleg_mod.F90 | 149 + src/trans/gpu/internal/sumplat_mod.F90 | 256 ++ src/trans/gpu/internal/sumplatb_mod.F90 | 226 + src/trans/gpu/internal/sumplatbeq_mod.F90 | 289 ++ src/trans/gpu/internal/sumplatf_mod.F90 | 150 + src/trans/gpu/internal/supol_mod.F90 | 173 + src/trans/gpu/internal/supolf_mod.F90 | 284 ++ src/trans/gpu/internal/sustaonl_mod.F90 | 457 ++ src/trans/gpu/internal/sutrle_mod.F90 | 366 ++ src/trans/gpu/internal/suwavedi_mod.F90 | 183 + src/trans/gpu/internal/tpm_constants.F90 | 20 + src/trans/gpu/internal/tpm_ctl.F90 | 43 + src/trans/gpu/internal/tpm_dim.F90 | 58 + src/trans/gpu/internal/tpm_distr.F90 | 189 + src/trans/gpu/internal/tpm_fft.F90 | 29 + src/trans/gpu/internal/tpm_ffth.F90 | 239 ++ src/trans/gpu/internal/tpm_fields.F90 | 123 + src/trans/gpu/internal/tpm_flt.F90 | 67 + src/trans/gpu/internal/tpm_gen.F90 | 45 + src/trans/gpu/internal/tpm_geometry.F90 | 44 + src/trans/gpu/internal/tpm_pol.F90 | 123 + src/trans/gpu/internal/tpm_trans.F90 | 67 + src/trans/gpu/internal/trgtol_mod.F90 | 1638 +++++++ src/trans/gpu/internal/trltog_mod.F90 | 1636 +++++++ src/trans/gpu/internal/trltom_mod.F90 | 374 ++ src/trans/gpu/internal/trmtol_mod.F90 | 359 ++ src/trans/gpu/internal/updsp_mod.F90 | 183 + src/trans/gpu/internal/updspad_mod.F90 | 178 + src/trans/gpu/internal/updspb_mod.F90 | 160 + src/trans/gpu/internal/updspb_vd_mod.F90 | 170 + src/trans/gpu/internal/updspbad_mod.F90 | 160 + src/trans/gpu/internal/uvtvd_mod.F90 | 210 + src/trans/gpu/internal/uvtvdad_mod.F90 | 139 + src/trans/gpu/internal/vd2uv_ctl_mod.F90 | 81 + src/trans/gpu/internal/vd2uv_mod.F90 | 157 + src/trans/gpu/internal/vdtuv_mod.F90 | 226 + src/trans/gpu/internal/vdtuvad_mod.F90 | 145 + src/trans/gpu/internal/write_legpol_mod.F90 | 229 + .../gpu/internal_reducedmem/asre1b_mod.F90 | 108 + .../internal_reducedmem/dir_trans_ctl_mod.F90 | 209 + .../internal_reducedmem/fourier_in_mod.F90 | 100 + .../internal_reducedmem/fourier_out_mod.F90 | 97 + .../gpu/internal_reducedmem/ftdir_ctl_mod.F90 | 222 + .../gpu/internal_reducedmem/ftdir_mod.F90 | 169 + .../gpu/internal_reducedmem/ftinv_ctl_mod.F90 | 276 ++ .../gpu/internal_reducedmem/ftinv_mod.F90 | 161 + .../internal_reducedmem/inv_trans_ctl_mod.F90 | 298 ++ .../gpu/internal_reducedmem/leinv_mod.F90 | 327 ++ .../gpu/internal_reducedmem/ltdir_ctl_mod.F90 | 98 + .../gpu/internal_reducedmem/ltdir_mod.F90 | 223 + .../gpu/internal_reducedmem/ltinv_ctl_mod.F90 | 113 + .../gpu/internal_reducedmem/ltinv_mod.F90 | 316 ++ .../gpu/internal_reducedmem/prfi2b_mod.F90 | 112 + .../gpu/internal_reducedmem/spnsde_mod.F90 | 173 + .../gpu/internal_reducedmem/trgtol_mod.F90 | 1610 +++++++ .../gpu/internal_reducedmem/trltog_mod.F90 | 1578 +++++++ .../gpu/internal_reducedmem/updsp_mod.F90 | 186 + .../gpu/internal_reducedmem/updspb_mod.F90 | 140 + .../gpu/internal_reducedmem/uvtvd_mod.F90 | 198 + .../gpu/internal_reducedmem/vdtuv_mod.F90 | 194 + src/trans/gpu/sharedmem/sharedmem.c | 28 + src/trans/gpu/sharedmem/sharedmem_mod.F90 | 315 ++ src/trans/include/ectrans/dir_trans.h | 142 + src/trans/include/ectrans/dir_transad.h | 141 + src/trans/include/ectrans/dist_grid.h | 70 + src/trans/include/ectrans/dist_grid_32.h | 69 + src/trans/include/ectrans/dist_spec.h | 73 + src/trans/include/ectrans/gath_grid.h | 69 + src/trans/include/ectrans/gath_grid_32.h | 69 + src/trans/include/ectrans/gath_spec.h | 73 + src/trans/include/ectrans/get_current.h | 53 + src/trans/include/ectrans/gpnorm_trans.h | 69 + src/trans/include/ectrans/ini_spec_dist.h | 72 + src/trans/include/ectrans/inv_trans.h | 163 + src/trans/include/ectrans/inv_transad.h | 160 + src/trans/include/ectrans/setup_trans.h | 116 + src/trans/include/ectrans/setup_trans0.h | 90 + src/trans/include/ectrans/specnorm.h | 69 + src/trans/include/ectrans/sugawc.h | 60 + src/trans/include/ectrans/trans_end.h | 50 + src/trans/include/ectrans/trans_inq.h | 187 + src/trans/include/ectrans/trans_pnm.h | 60 + src/trans/include/ectrans/trans_release.h | 16 + src/trans/include/ectrans/vordiv_to_uv.h | 68 + src/transi/CMakeLists.txt | 34 + src/transi/include/ectrans/transi.h | 1079 +++++ src/transi/include/ectrans/version.h | 45 + src/transi/transi.c | 268 ++ src/transi/transi.h | 1079 +++++ src/transi/transi_module.F90 | 2268 ++++++++++ src/transi/version.c.in | 62 + src/transi/version.h | 45 + tests/CMakeLists.txt | 171 + tests/test-install.sh.in | 66 + tests/test_install/CMakeLists.txt | 40 + tests/test_install/main.F90 | 26 + tests/test_install/transi_gptosp.c | 195 + tests/test_install/transi_sptogp.c | 165 + tests/trans/test_adjoint.F90 | 365 ++ tests/transi/transi_test.c | 159 + tests/transi/transi_test.h | 57 + tests/transi/transi_test_invtrans_adjoint.c | 223 + tests/transi/transi_test_io.c | 301 ++ tests/transi/transi_test_lonlat.c | 300 ++ tests/transi/transi_test_lonlat_diff_incr.c | 201 + tests/transi/transi_test_memory.c | 77 + tests/transi/transi_test_memory_lonlat.c | 75 + tests/transi/transi_test_program.c | 354 ++ tests/transi/transi_test_timings.c | 96 + tests/transi/transi_test_vordiv_to_UV.c | 67 + toolchain_lumi.cmake | 47 + 643 files changed, 132808 insertions(+), 59 deletions(-) create mode 100755 .github/tools/install-fftw.sh create mode 100755 .github/tools/install-intel-oneapi.sh create mode 100755 .github/tools/install-mpi.sh create mode 100755 .github/tools/install-nvhpc.sh create mode 100755 .github/tools/reduce-output.sh create mode 100644 .github/workflows/build.yml create mode 100644 AUTHORS create mode 100644 CMakeLists.txt create mode 100644 ENV_lumi create mode 100644 LICENSE create mode 100644 README_lumi_lam.md create mode 100644 VERSION create mode 100644 cmake/FindCUDAToolkit-cmake-3.24/FindCUDAToolkit.cmake create mode 100644 cmake/FindOpenACC.cmake create mode 100644 cmake/FindPackageHandleStandardArgs.cmake create mode 100644 cmake/FindPackageMessage.cmake create mode 100644 cmake/ectrans-import.cmake.in create mode 100644 cmake/ectrans_add_library.cmake create mode 100644 cmake/ectrans_compile_options.cmake create mode 100644 cmake/ectrans_find_lapack.cmake create mode 100644 cmake/ectrans_macros.cmake create mode 100644 cmake/ectrans_target_fortran_module_directory.cmake create mode 100644 cmake/project_summary.cmake create mode 100644 src/CMakeLists.txt create mode 100644 src/etrans/CMakeLists.txt create mode 100644 src/etrans/cpu/CMakeLists.txt create mode 100644 src/etrans/cpu/aux/ellips.F90 create mode 100644 src/etrans/cpu/aux/ellips.h create mode 100644 src/etrans/cpu/aux/ellips64.F90 create mode 100644 src/etrans/cpu/aux/extper_mod.F90 create mode 100644 src/etrans/cpu/external/edir_trans.F90 create mode 100644 src/etrans/cpu/external/edir_transad.F90 create mode 100644 src/etrans/cpu/external/edist_grid.F90 create mode 100644 src/etrans/cpu/external/edist_spec.F90 create mode 100644 src/etrans/cpu/external/egath_grid.F90 create mode 100644 src/etrans/cpu/external/egath_spec.F90 create mode 100644 src/etrans/cpu/external/egpnorm_trans.F90 create mode 100644 src/etrans/cpu/external/einv_trans.F90 create mode 100644 src/etrans/cpu/external/einv_transad.F90 create mode 100644 src/etrans/cpu/external/esetup_trans.F90 create mode 100644 src/etrans/cpu/external/especnorm.F90 create mode 100644 src/etrans/cpu/external/etrans_end.F90 create mode 100644 src/etrans/cpu/external/etrans_inq.F90 create mode 100644 src/etrans/cpu/external/etrans_release.F90 create mode 100644 src/etrans/cpu/internal/cpl_int_mod.F90 create mode 100644 src/etrans/cpu/internal/easre1ad_mod.F90 create mode 100644 src/etrans/cpu/internal/easre1b_mod.F90 create mode 100644 src/etrans/cpu/internal/easre1bad_mod.F90 create mode 100644 src/etrans/cpu/internal/edealloc_resol_mod.F90 create mode 100644 src/etrans/cpu/internal/edir_trans_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 create mode 100644 src/etrans/cpu/internal/edist_spec_control_mod.F90 create mode 100644 src/etrans/cpu/internal/efsc_mod.F90 create mode 100644 src/etrans/cpu/internal/efscad_mod.F90 create mode 100644 src/etrans/cpu/internal/eftdir_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/eftdir_ctlad_mod.F90 create mode 100644 src/etrans/cpu/internal/eftdirad_mod.F90 create mode 100644 src/etrans/cpu/internal/eftinv_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/eftinv_ctlad_mod.F90 create mode 100644 src/etrans/cpu/internal/eftinvad_mod.F90 create mode 100644 src/etrans/cpu/internal/egath_spec_control_mod.F90 create mode 100644 src/etrans/cpu/internal/einv_trans_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 create mode 100644 src/etrans/cpu/internal/eledir_mod.F90 create mode 100644 src/etrans/cpu/internal/eledirad_mod.F90 create mode 100644 src/etrans/cpu/internal/eleinv_mod.F90 create mode 100644 src/etrans/cpu/internal/eleinvad_mod.F90 create mode 100644 src/etrans/cpu/internal/eltdir_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/eltdir_ctlad_mod.F90 create mode 100644 src/etrans/cpu/internal/eltdir_mod.F90 create mode 100644 src/etrans/cpu/internal/eltdirad_mod.F90 create mode 100644 src/etrans/cpu/internal/eltinv_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/eltinv_ctlad_mod.F90 create mode 100644 src/etrans/cpu/internal/eltinv_mod.F90 create mode 100644 src/etrans/cpu/internal/eltinvad_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi1_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi1ad_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi1b_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi1bad_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi2_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi2ad_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi2b_mod.F90 create mode 100644 src/etrans/cpu/internal/eprfi2bad_mod.F90 create mode 100644 src/etrans/cpu/internal/eset_resol_mod.F90 create mode 100644 src/etrans/cpu/internal/esetup_dims_mod.F90 create mode 100644 src/etrans/cpu/internal/esetup_geom_mod.F90 create mode 100644 src/etrans/cpu/internal/espnorm_ctl_mod.F90 create mode 100644 src/etrans/cpu/internal/espnormc_mod.F90 create mode 100644 src/etrans/cpu/internal/espnormd_mod.F90 create mode 100644 src/etrans/cpu/internal/espnsde_mod.F90 create mode 100644 src/etrans/cpu/internal/espnsdead_mod.F90 create mode 100644 src/etrans/cpu/internal/eupdsp_mod.F90 create mode 100644 src/etrans/cpu/internal/eupdspad_mod.F90 create mode 100644 src/etrans/cpu/internal/eupdspb_mod.F90 create mode 100644 src/etrans/cpu/internal/eupdspbad_mod.F90 create mode 100644 src/etrans/cpu/internal/euvtvd_comm_mod.F90 create mode 100644 src/etrans/cpu/internal/euvtvd_mod.F90 create mode 100644 src/etrans/cpu/internal/euvtvdad_mod.F90 create mode 100644 src/etrans/cpu/internal/evdtuv_mod.F90 create mode 100644 src/etrans/cpu/internal/evdtuvad_comm_mod.F90 create mode 100644 src/etrans/cpu/internal/evdtuvad_mod.F90 create mode 100644 src/etrans/cpu/internal/suefft_mod.F90 create mode 100644 src/etrans/cpu/internal/suemp_trans_mod.F90 create mode 100644 src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 create mode 100644 src/etrans/cpu/internal/suemplat_mod.F90 create mode 100644 src/etrans/cpu/internal/suemplatb_mod.F90 create mode 100644 src/etrans/cpu/internal/suestaonl_mod.F90 create mode 100644 src/etrans/cpu/internal/tpmald_dim.F90 create mode 100644 src/etrans/cpu/internal/tpmald_distr.F90 create mode 100644 src/etrans/cpu/internal/tpmald_fft.F90 create mode 100644 src/etrans/cpu/internal/tpmald_fields.F90 create mode 100644 src/etrans/cpu/internal/tpmald_geo.F90 create mode 100644 src/etrans/cpu/internal/tpmald_tcdis.F90 create mode 100644 src/etrans/gpu/CMakeLists.txt create mode 100644 src/etrans/gpu/aux/ellips.F90 create mode 100644 src/etrans/gpu/aux/ellips.h create mode 100644 src/etrans/gpu/aux/ellips64.F90 create mode 100644 src/etrans/gpu/aux/extper_mod.F90 create mode 100644 src/etrans/gpu/external/edir_trans.F90 create mode 100644 src/etrans/gpu/external/edir_transad.F90 create mode 100644 src/etrans/gpu/external/edist_grid.F90 create mode 100644 src/etrans/gpu/external/edist_spec.F90 create mode 100644 src/etrans/gpu/external/egath_grid.F90 create mode 100644 src/etrans/gpu/external/egath_spec.F90 create mode 100644 src/etrans/gpu/external/egpnorm_trans.F90 create mode 100644 src/etrans/gpu/external/einv_trans.F90 create mode 100644 src/etrans/gpu/external/einv_transad.F90 create mode 100644 src/etrans/gpu/external/esetup_trans.F90 create mode 100644 src/etrans/gpu/external/especnorm.F90 create mode 100644 src/etrans/gpu/external/etrans_end.F90 create mode 100644 src/etrans/gpu/external/etrans_inq.F90 create mode 100644 src/etrans/gpu/external/etrans_release.F90 create mode 100644 src/etrans/gpu/internal/cpl_int_mod.F90 create mode 100644 src/etrans/gpu/internal/easre1ad_mod.F90 create mode 100644 src/etrans/gpu/internal/easre1b_mod.F90 create mode 100644 src/etrans/gpu/internal/easre1bad_mod.F90 create mode 100644 src/etrans/gpu/internal/edealloc_resol_mod.F90 create mode 100644 src/etrans/gpu/internal/edir_trans_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/edir_trans_ctlad_mod.F90 create mode 100644 src/etrans/gpu/internal/edist_spec_control_mod.F90 create mode 100644 src/etrans/gpu/internal/efourier_in_mod.F90 create mode 100644 src/etrans/gpu/internal/efourier_out_mod.F90 create mode 100644 src/etrans/gpu/internal/efsc_mod.F90 create mode 100644 src/etrans/gpu/internal/efscad_mod.F90 create mode 100644 src/etrans/gpu/internal/eftdata_mod.F90 create mode 100644 src/etrans/gpu/internal/eftdir_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/eftdir_ctlad_mod.F90 create mode 100644 src/etrans/gpu/internal/eftdir_mod.F90 create mode 100644 src/etrans/gpu/internal/eftdirad_mod.F90 create mode 100644 src/etrans/gpu/internal/eftinv_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/eftinv_ctlad_mod.F90 create mode 100644 src/etrans/gpu/internal/eftinv_mod.F90 create mode 100644 src/etrans/gpu/internal/eftinvad_mod.F90 create mode 100644 src/etrans/gpu/internal/egath_spec_control_mod.F90 create mode 100644 src/etrans/gpu/internal/einv_trans_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/einv_trans_ctlad_mod.F90 create mode 100644 src/etrans/gpu/internal/eledir_mod.F90 create mode 100644 src/etrans/gpu/internal/eledirad_mod.F90 create mode 100644 src/etrans/gpu/internal/eleinv_mod.F90 create mode 100644 src/etrans/gpu/internal/eleinvad_mod.F90 create mode 100644 src/etrans/gpu/internal/eltdata_mod.F90 create mode 100644 src/etrans/gpu/internal/eltdir_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/eltdir_ctlad_mod.F90 create mode 100644 src/etrans/gpu/internal/eltdir_mod.F90 create mode 100644 src/etrans/gpu/internal/eltdirad_mod.F90 create mode 100644 src/etrans/gpu/internal/eltinv_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/eltinv_ctlad_mod.F90 create mode 100644 src/etrans/gpu/internal/eltinv_mod.F90 create mode 100644 src/etrans/gpu/internal/eltinvad_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi1_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi1ad_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi1b_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi1bad_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi2_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi2ad_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi2b_mod.F90 create mode 100644 src/etrans/gpu/internal/eprfi2bad_mod.F90 create mode 100644 src/etrans/gpu/internal/eset_resol_mod.F90 create mode 100644 src/etrans/gpu/internal/esetup_dims_mod.F90 create mode 100644 src/etrans/gpu/internal/esetup_geom_mod.F90 create mode 100644 src/etrans/gpu/internal/espnorm_ctl_mod.F90 create mode 100644 src/etrans/gpu/internal/espnormc_mod.F90 create mode 100644 src/etrans/gpu/internal/espnormd_mod.F90 create mode 100644 src/etrans/gpu/internal/espnsde_mod.F90 create mode 100644 src/etrans/gpu/internal/espnsdead_mod.F90 create mode 100644 src/etrans/gpu/internal/eupdsp_mod.F90 create mode 100644 src/etrans/gpu/internal/eupdspad_mod.F90 create mode 100644 src/etrans/gpu/internal/eupdspb_mod.F90 create mode 100644 src/etrans/gpu/internal/eupdspbad_mod.F90 create mode 100644 src/etrans/gpu/internal/euvtvd_comm_mod.F90 create mode 100644 src/etrans/gpu/internal/euvtvd_mod.F90 create mode 100644 src/etrans/gpu/internal/euvtvdad_mod.F90 create mode 100644 src/etrans/gpu/internal/evdtuv_mod.F90 create mode 100644 src/etrans/gpu/internal/evdtuvad_comm_mod.F90 create mode 100644 src/etrans/gpu/internal/evdtuvad_mod.F90 create mode 100644 src/etrans/gpu/internal/suefft_mod.F90 create mode 100644 src/etrans/gpu/internal/suemp_trans_mod.F90 create mode 100644 src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 create mode 100644 src/etrans/gpu/internal/suemplat_mod.F90 create mode 100644 src/etrans/gpu/internal/suemplatb_mod.F90 create mode 100644 src/etrans/gpu/internal/suestaonl_mod.F90 create mode 100644 src/etrans/gpu/internal/tpmald_dim.F90 create mode 100644 src/etrans/gpu/internal/tpmald_distr.F90 create mode 100644 src/etrans/gpu/internal/tpmald_fft.F90 create mode 100644 src/etrans/gpu/internal/tpmald_fields.F90 create mode 100644 src/etrans/gpu/internal/tpmald_geo.F90 create mode 100644 src/etrans/gpu/internal/tpmald_tcdis.F90 create mode 100644 src/etrans/include/etrans/edir_trans.h create mode 100644 src/etrans/include/etrans/edir_transad.h create mode 100644 src/etrans/include/etrans/edist_grid.h create mode 100644 src/etrans/include/etrans/edist_spec.h create mode 100644 src/etrans/include/etrans/egath_grid.h create mode 100644 src/etrans/include/etrans/egath_spec.h create mode 100644 src/etrans/include/etrans/egpnorm_trans.h create mode 100644 src/etrans/include/etrans/einv_trans.h create mode 100644 src/etrans/include/etrans/einv_transad.h create mode 100644 src/etrans/include/etrans/esetup_trans.h create mode 100644 src/etrans/include/etrans/especnorm.h create mode 100644 src/etrans/include/etrans/etrans_end.h create mode 100644 src/etrans/include/etrans/etrans_inq.h create mode 100644 src/etrans/include/etrans/etrans_release.h create mode 100644 src/etrans/programs/CMakeLists.txt create mode 100644 src/etrans/programs/aatestprog.F90 create mode 100644 src/etrans/programs/test_adjoint.F90 create mode 100644 src/etrans/programs/trinfo.F90 create mode 100644 src/programs/CMakeLists.txt create mode 100644 src/programs/ectrans-benchmark.F90 create mode 100644 src/programs/ectrans-lam-benchmark.F90 create mode 100755 src/programs/ectrans.in create mode 100644 src/trans/CMakeLists.txt create mode 100644 src/trans/cpu/CMakeLists.txt create mode 100644 src/trans/cpu/algor/bluestein_mod.F90 create mode 100644 src/trans/cpu/algor/butterfly_alg_mod.F90 create mode 100644 src/trans/cpu/algor/fft992.F90 create mode 100644 src/trans/cpu/algor/fft992_cc.F90 create mode 100644 src/trans/cpu/algor/interpol_decomp_mod.F90 create mode 100644 src/trans/cpu/algor/seefmm_mix.F90 create mode 100644 src/trans/cpu/algor/set99.F90 create mode 100644 src/trans/cpu/algor/set99b.F90 create mode 100644 src/trans/cpu/algor/wts500_mod.F90 create mode 100644 src/trans/cpu/external/dir_trans.F90 create mode 100644 src/trans/cpu/external/dir_transad.F90 create mode 100644 src/trans/cpu/external/dist_grid.F90 create mode 100644 src/trans/cpu/external/dist_grid_32.F90 create mode 100644 src/trans/cpu/external/dist_spec.F90 create mode 100644 src/trans/cpu/external/gath_grid.F90 create mode 100644 src/trans/cpu/external/gath_grid_32.F90 create mode 100644 src/trans/cpu/external/gath_spec.F90 create mode 100644 src/trans/cpu/external/get_current.F90 create mode 100644 src/trans/cpu/external/gpnorm_trans.F90 create mode 100644 src/trans/cpu/external/ini_spec_dist.F90 create mode 100644 src/trans/cpu/external/inv_trans.F90 create mode 100644 src/trans/cpu/external/inv_transad.F90 create mode 100644 src/trans/cpu/external/setup_trans.F90 create mode 100644 src/trans/cpu/external/setup_trans0.F90 create mode 100644 src/trans/cpu/external/specnorm.F90 create mode 100644 src/trans/cpu/external/sugawc.F90 create mode 100644 src/trans/cpu/external/trans_end.F90 create mode 100644 src/trans/cpu/external/trans_inq.F90 create mode 100644 src/trans/cpu/external/trans_pnm.F90 create mode 100644 src/trans/cpu/external/trans_release.F90 create mode 100644 src/trans/cpu/external/vordiv_to_uv.F90 create mode 100644 src/trans/cpu/internal/abort_trans_mod.F90 create mode 100644 src/trans/cpu/internal/asre1_mod.F90 create mode 100644 src/trans/cpu/internal/asre1ad_mod.F90 create mode 100644 src/trans/cpu/internal/asre1b_mod.F90 create mode 100644 src/trans/cpu/internal/asre1bad_mod.F90 create mode 100644 src/trans/cpu/internal/cdmap_mod.F90 create mode 100644 src/trans/cpu/internal/cpledn_mod.F90 create mode 100644 src/trans/cpu/internal/dealloc_resol_mod.F90 create mode 100644 src/trans/cpu/internal/dir_trans_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/dir_trans_ctlad_mod.F90 create mode 100644 src/trans/cpu/internal/dist_grid_32_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/dist_grid_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/dist_spec_control_mod.F90 create mode 100644 src/trans/cpu/internal/eq_regions_mod.F90 create mode 100644 src/trans/cpu/internal/field_split_mod.F90 create mode 100644 src/trans/cpu/internal/fourier_in_mod.F90 create mode 100644 src/trans/cpu/internal/fourier_inad_mod.F90 create mode 100644 src/trans/cpu/internal/fourier_out_mod.F90 create mode 100644 src/trans/cpu/internal/fourier_outad_mod.F90 create mode 100644 src/trans/cpu/internal/fsc_mod.F90 create mode 100644 src/trans/cpu/internal/fscad_mod.F90 create mode 100644 src/trans/cpu/internal/fspgl_int_mod.F90 create mode 100644 src/trans/cpu/internal/ftdir_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/ftdir_ctlad_mod.F90 create mode 100644 src/trans/cpu/internal/ftdir_mod.F90 create mode 100644 src/trans/cpu/internal/ftdirad_mod.F90 create mode 100644 src/trans/cpu/internal/ftinv_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/ftinv_ctlad_mod.F90 create mode 100644 src/trans/cpu/internal/ftinv_mod.F90 create mode 100644 src/trans/cpu/internal/ftinvad_mod.F90 create mode 100644 src/trans/cpu/internal/gath_grid_32_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/gath_grid_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/gath_spec_control_mod.F90 create mode 100644 src/trans/cpu/internal/gawl_mod.F90 create mode 100644 src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/inigptr_mod.F90 create mode 100644 src/trans/cpu/internal/inv_trans_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/inv_trans_ctlad_mod.F90 create mode 100644 src/trans/cpu/internal/ldfou2_mod.F90 create mode 100644 src/trans/cpu/internal/ldfou2ad_mod.F90 create mode 100644 src/trans/cpu/internal/ledir_mod.F90 create mode 100644 src/trans/cpu/internal/ledirad_mod.F90 create mode 100644 src/trans/cpu/internal/leinv_mod.F90 create mode 100644 src/trans/cpu/internal/leinvad_mod.F90 create mode 100644 src/trans/cpu/internal/ltdir_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/ltdir_ctlad_mod.F90 create mode 100644 src/trans/cpu/internal/ltdir_mod.F90 create mode 100644 src/trans/cpu/internal/ltdirad_mod.F90 create mode 100644 src/trans/cpu/internal/ltinv_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/ltinv_ctlad_mod.F90 create mode 100644 src/trans/cpu/internal/ltinv_mod.F90 create mode 100644 src/trans/cpu/internal/ltinvad_mod.F90 create mode 100644 src/trans/cpu/internal/myrecvset_mod.F90 create mode 100644 src/trans/cpu/internal/mysendset_mod.F90 create mode 100644 src/trans/cpu/internal/pe2set_mod.F90 create mode 100644 src/trans/cpu/internal/pre_suleg_mod.F90 create mode 100644 src/trans/cpu/internal/prepsnm_mod.F90 create mode 100644 src/trans/cpu/internal/prfi1_mod.F90 create mode 100644 src/trans/cpu/internal/prfi1ad_mod.F90 create mode 100644 src/trans/cpu/internal/prfi1b_mod.F90 create mode 100644 src/trans/cpu/internal/prfi1bad_mod.F90 create mode 100644 src/trans/cpu/internal/prfi2_mod.F90 create mode 100644 src/trans/cpu/internal/prfi2ad_mod.F90 create mode 100644 src/trans/cpu/internal/prfi2b_mod.F90 create mode 100644 src/trans/cpu/internal/prfi2bad_mod.F90 create mode 100644 src/trans/cpu/internal/read_legpol_mod.F90 create mode 100644 src/trans/cpu/internal/set2pe_mod.F90 create mode 100644 src/trans/cpu/internal/set_resol_mod.F90 create mode 100644 src/trans/cpu/internal/setup_dims_mod.F90 create mode 100644 src/trans/cpu/internal/setup_geom_mod.F90 create mode 100644 src/trans/cpu/internal/shuffle_mod.F90 create mode 100644 src/trans/cpu/internal/spnorm_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/spnormc_mod.F90 create mode 100644 src/trans/cpu/internal/spnormd_mod.F90 create mode 100644 src/trans/cpu/internal/spnsde_mod.F90 create mode 100644 src/trans/cpu/internal/spnsdead_mod.F90 create mode 100644 src/trans/cpu/internal/sufft_mod.F90 create mode 100644 src/trans/cpu/internal/sugaw_mod.F90 create mode 100644 src/trans/cpu/internal/suleg_mod.F90 create mode 100644 src/trans/cpu/internal/sump_trans0_mod.F90 create mode 100644 src/trans/cpu/internal/sump_trans_mod.F90 create mode 100644 src/trans/cpu/internal/sump_trans_preleg_mod.F90 create mode 100644 src/trans/cpu/internal/sumplat_mod.F90 create mode 100644 src/trans/cpu/internal/sumplatb_mod.F90 create mode 100644 src/trans/cpu/internal/sumplatbeq_mod.F90 create mode 100644 src/trans/cpu/internal/sumplatf_mod.F90 create mode 100644 src/trans/cpu/internal/supol_mod.F90 create mode 100644 src/trans/cpu/internal/supolf_mod.F90 create mode 100644 src/trans/cpu/internal/sustaonl_mod.F90 create mode 100644 src/trans/cpu/internal/sutrle_mod.F90 create mode 100644 src/trans/cpu/internal/suwavedi_mod.F90 create mode 100644 src/trans/cpu/internal/tpm_constants.F90 create mode 100644 src/trans/cpu/internal/tpm_ctl.F90 create mode 100644 src/trans/cpu/internal/tpm_dim.F90 create mode 100644 src/trans/cpu/internal/tpm_distr.F90 create mode 100644 src/trans/cpu/internal/tpm_fft.F90 create mode 100644 src/trans/cpu/internal/tpm_fftw.F90 create mode 100644 src/trans/cpu/internal/tpm_fields.F90 create mode 100644 src/trans/cpu/internal/tpm_flt.F90 create mode 100644 src/trans/cpu/internal/tpm_gen.F90 create mode 100644 src/trans/cpu/internal/tpm_geometry.F90 create mode 100644 src/trans/cpu/internal/tpm_pol.F90 create mode 100644 src/trans/cpu/internal/tpm_trans.F90 create mode 100644 src/trans/cpu/internal/trgtol_mod.F90 create mode 100644 src/trans/cpu/internal/trltog_mod.F90 create mode 100644 src/trans/cpu/internal/trltom_mod.F90 create mode 100644 src/trans/cpu/internal/trmtol_mod.F90 create mode 100644 src/trans/cpu/internal/updsp_mod.F90 create mode 100644 src/trans/cpu/internal/updspad_mod.F90 create mode 100644 src/trans/cpu/internal/updspb_mod.F90 create mode 100644 src/trans/cpu/internal/updspbad_mod.F90 create mode 100644 src/trans/cpu/internal/uvtvd_mod.F90 create mode 100644 src/trans/cpu/internal/uvtvdad_mod.F90 create mode 100644 src/trans/cpu/internal/vd2uv_ctl_mod.F90 create mode 100644 src/trans/cpu/internal/vd2uv_mod.F90 create mode 100644 src/trans/cpu/internal/vdtuv_mod.F90 create mode 100644 src/trans/cpu/internal/vdtuvad_mod.F90 create mode 100644 src/trans/cpu/internal/write_legpol_mod.F90 create mode 100644 src/trans/cpu/sharedmem/sharedmem.c create mode 100644 src/trans/cpu/sharedmem/sharedmem_mod.F90 create mode 100644 src/trans/gpu/#CMakeLists.txt# create mode 100644 src/trans/gpu/CMakeLists.txt create mode 100644 src/trans/gpu/algor/external/fourier/create_plan_ffth.hip.cpp create mode 100644 src/trans/gpu/algor/external/fourier/destroy_plan_ffth.hip.cpp create mode 100644 src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.cpp create mode 100644 src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.h create mode 100644 src/trans/gpu/algor/interface/dbfgsl.h create mode 100644 src/trans/gpu/algor/interface/dpseuclid.h create mode 100644 src/trans/gpu/algor/interface/dysave.h create mode 100644 src/trans/gpu/algor/interface/eigsol.h create mode 100644 src/trans/gpu/algor/interface/intavg.h create mode 100644 src/trans/gpu/algor/interface/layeravg.h create mode 100644 src/trans/gpu/algor/interface/minv.h create mode 100644 src/trans/gpu/algor/interface/minv_8.h create mode 100644 src/trans/gpu/algor/interface/minv_caller.h create mode 100644 src/trans/gpu/algor/interface/multvdv.h create mode 100644 src/trans/gpu/algor/interface/mxmaop.h create mode 100644 src/trans/gpu/algor/interface/mxptma.h create mode 100644 src/trans/gpu/algor/interface/mxtrma.h create mode 100644 src/trans/gpu/algor/interface/mxture.h create mode 100644 src/trans/gpu/algor/interface/mxturhd.h create mode 100644 src/trans/gpu/algor/interface/mxturs.h create mode 100644 src/trans/gpu/algor/interface/n1cg1.h create mode 100644 src/trans/gpu/algor/interface/n1cga.h create mode 100644 src/trans/gpu/algor/interface/si_mxptco.h create mode 100644 src/trans/gpu/algor/interface/simplico.h create mode 100644 src/trans/gpu/algor/interface/sublayer.h create mode 100644 src/trans/gpu/algor/interface/suher.h create mode 100644 src/trans/gpu/algor/interface/suhert.h create mode 100644 src/trans/gpu/algor/interface/suhes.h create mode 100644 src/trans/gpu/algor/interface/tridia.h create mode 100644 src/trans/gpu/algor/module/butterfly_alg_mod.F90 create mode 100644 src/trans/gpu/algor/module/dilatation_mod.F90 create mode 100644 src/trans/gpu/algor/module/hip_device_mod.F90 create mode 100644 src/trans/gpu/algor/module/hip_device_mod.F90~ create mode 100644 src/trans/gpu/algor/module/hipblasDgemmBatched.hip.cpp create mode 100644 src/trans/gpu/algor/module/hipblasSgemmBatched.hip.cpp create mode 100644 src/trans/gpu/algor/module/hipblas_mod.F90 create mode 100644 src/trans/gpu/algor/module/interpol_decomp_mod.F90 create mode 100644 src/trans/gpu/algor/module/rocblasDgemmBatched.hip.cpp create mode 100644 src/trans/gpu/algor/module/rocblasSgemmBatched.hip.cpp create mode 100644 src/trans/gpu/algor/module/rocblas_mod.F90 create mode 100644 src/trans/gpu/algor/module/seefmm_mix.F90 create mode 100644 src/trans/gpu/algor/module/wts500_mod.F90 create mode 100755 src/trans/gpu/external/dir_trans.F90 create mode 100755 src/trans/gpu/external/dir_transad.F90 create mode 100755 src/trans/gpu/external/dist_grid.F90 create mode 100755 src/trans/gpu/external/dist_grid_32.F90 create mode 100755 src/trans/gpu/external/dist_spec.F90 create mode 100755 src/trans/gpu/external/gath_grid.F90 create mode 100755 src/trans/gpu/external/gath_grid_32.F90 create mode 100755 src/trans/gpu/external/gath_spec.F90 create mode 100755 src/trans/gpu/external/get_current.F90 create mode 100755 src/trans/gpu/external/gpnorm_trans.F90 create mode 100755 src/trans/gpu/external/ini_spec_dist.F90 create mode 100755 src/trans/gpu/external/inv_trans.F90 create mode 100755 src/trans/gpu/external/inv_transad.F90 create mode 100755 src/trans/gpu/external/setup_trans.F90 create mode 100755 src/trans/gpu/external/setup_trans0.F90 create mode 100755 src/trans/gpu/external/specnorm.F90 create mode 100755 src/trans/gpu/external/sugawc.F90 create mode 100755 src/trans/gpu/external/trans_end.F90 create mode 100755 src/trans/gpu/external/trans_inq.F90 create mode 100755 src/trans/gpu/external/trans_pnm.F90 create mode 100755 src/trans/gpu/external/trans_release.F90 create mode 100755 src/trans/gpu/external/vordiv_to_uv.F90 create mode 100755 src/trans/gpu/internal/abort_trans_mod.F90 create mode 100755 src/trans/gpu/internal/asre1_mod.F90 create mode 100755 src/trans/gpu/internal/asre1ad_mod.F90 create mode 100755 src/trans/gpu/internal/asre1b_mod.F90 create mode 100755 src/trans/gpu/internal/asre1bad_mod.F90 create mode 100755 src/trans/gpu/internal/cdmap_mod.F90 create mode 100755 src/trans/gpu/internal/cpledn_mod.F90 create mode 100755 src/trans/gpu/internal/cuda_gemm_batched_mod.F90 create mode 100755 src/trans/gpu/internal/dealloc_resol_mod.F90 create mode 100755 src/trans/gpu/internal/dir_trans_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/dir_trans_ctlad_mod.F90 create mode 100755 src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/dist_grid_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/dist_spec_control_mod.F90 create mode 100755 src/trans/gpu/internal/eq_regions_mod.F90 create mode 100755 src/trans/gpu/internal/field_split_mod.F90 create mode 100755 src/trans/gpu/internal/fourier_in_mod.F90 create mode 100755 src/trans/gpu/internal/fourier_inad_mod.F90 create mode 100755 src/trans/gpu/internal/fourier_out_mod.F90 create mode 100755 src/trans/gpu/internal/fourier_outad_mod.F90 create mode 100755 src/trans/gpu/internal/fsc_mod.F90 create mode 100755 src/trans/gpu/internal/fscad_mod.F90 create mode 100755 src/trans/gpu/internal/fspgl_int_mod.F90 create mode 100755 src/trans/gpu/internal/ftdir_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/ftdir_ctlad_mod.F90 create mode 100755 src/trans/gpu/internal/ftdir_mod.F90 create mode 100755 src/trans/gpu/internal/ftdirad_mod.F90 create mode 100755 src/trans/gpu/internal/ftinv_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/ftinv_ctlad_mod.F90 create mode 100755 src/trans/gpu/internal/ftinv_mod.F90 create mode 100755 src/trans/gpu/internal/ftinvad_mod.F90 create mode 100755 src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/gath_grid_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/gath_spec_control_mod.F90 create mode 100755 src/trans/gpu/internal/gawl_mod.F90 create mode 100644 src/trans/gpu/internal/gstats_label_ifs.F90 create mode 100755 src/trans/gpu/internal/inigptr_mod.F90 create mode 100755 src/trans/gpu/internal/inv_trans_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/inv_trans_ctlad_mod.F90 create mode 100755 src/trans/gpu/internal/ldfou2_mod.F90 create mode 100755 src/trans/gpu/internal/ldfou2ad_mod.F90 create mode 100755 src/trans/gpu/internal/ledir_mod.F90 create mode 100755 src/trans/gpu/internal/ledirad_mod.F90 create mode 100755 src/trans/gpu/internal/leinv_mod.F90 create mode 100755 src/trans/gpu/internal/leinvad_mod.F90 create mode 100755 src/trans/gpu/internal/ltdir_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/ltdir_ctlad_mod.F90 create mode 100755 src/trans/gpu/internal/ltdir_mod.F90 create mode 100755 src/trans/gpu/internal/ltdirad_mod.F90 create mode 100755 src/trans/gpu/internal/ltinv_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/ltinv_ctlad_mod.F90 create mode 100755 src/trans/gpu/internal/ltinv_mod.F90 create mode 100755 src/trans/gpu/internal/ltinvad_mod.F90 create mode 100755 src/trans/gpu/internal/myrecvset_mod.F90 create mode 100755 src/trans/gpu/internal/mysendset_mod.F90 create mode 100644 src/trans/gpu/internal/parkind_ectrans.F90 create mode 100755 src/trans/gpu/internal/pe2set_mod.F90 create mode 100755 src/trans/gpu/internal/pre_suleg_mod.F90 create mode 100755 src/trans/gpu/internal/prepsnm_mod.F90 create mode 100755 src/trans/gpu/internal/prfi1_mod.F90 create mode 100755 src/trans/gpu/internal/prfi1ad_mod.F90 create mode 100755 src/trans/gpu/internal/prfi1b_mod.F90 create mode 100755 src/trans/gpu/internal/prfi1bad_mod.F90 create mode 100755 src/trans/gpu/internal/prfi2_mod.F90 create mode 100755 src/trans/gpu/internal/prfi2ad_mod.F90 create mode 100755 src/trans/gpu/internal/prfi2b_mod.F90 create mode 100755 src/trans/gpu/internal/prfi2bad_mod.F90 create mode 100755 src/trans/gpu/internal/read_legpol_mod.F90 create mode 100755 src/trans/gpu/internal/set2pe_mod.F90 create mode 100755 src/trans/gpu/internal/set_resol_mod.F90 create mode 100755 src/trans/gpu/internal/setup_dims_mod.F90 create mode 100755 src/trans/gpu/internal/setup_geom_mod.F90 create mode 100755 src/trans/gpu/internal/shuffle_mod.F90 create mode 100755 src/trans/gpu/internal/spnorm_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/spnormc_mod.F90 create mode 100755 src/trans/gpu/internal/spnormd_mod.F90 create mode 100755 src/trans/gpu/internal/spnsde_mod.F90 create mode 100755 src/trans/gpu/internal/spnsdead_mod.F90 create mode 100755 src/trans/gpu/internal/sufft_mod.F90 create mode 100755 src/trans/gpu/internal/sugaw_mod.F90 create mode 100755 src/trans/gpu/internal/suleg_mod.F90 create mode 100755 src/trans/gpu/internal/sump_trans0_mod.F90 create mode 100755 src/trans/gpu/internal/sump_trans_mod.F90 create mode 100755 src/trans/gpu/internal/sump_trans_preleg_mod.F90 create mode 100755 src/trans/gpu/internal/sumplat_mod.F90 create mode 100755 src/trans/gpu/internal/sumplatb_mod.F90 create mode 100755 src/trans/gpu/internal/sumplatbeq_mod.F90 create mode 100755 src/trans/gpu/internal/sumplatf_mod.F90 create mode 100755 src/trans/gpu/internal/supol_mod.F90 create mode 100755 src/trans/gpu/internal/supolf_mod.F90 create mode 100755 src/trans/gpu/internal/sustaonl_mod.F90 create mode 100755 src/trans/gpu/internal/sutrle_mod.F90 create mode 100755 src/trans/gpu/internal/suwavedi_mod.F90 create mode 100755 src/trans/gpu/internal/tpm_constants.F90 create mode 100755 src/trans/gpu/internal/tpm_ctl.F90 create mode 100755 src/trans/gpu/internal/tpm_dim.F90 create mode 100755 src/trans/gpu/internal/tpm_distr.F90 create mode 100755 src/trans/gpu/internal/tpm_fft.F90 create mode 100755 src/trans/gpu/internal/tpm_ffth.F90 create mode 100755 src/trans/gpu/internal/tpm_fields.F90 create mode 100755 src/trans/gpu/internal/tpm_flt.F90 create mode 100755 src/trans/gpu/internal/tpm_gen.F90 create mode 100755 src/trans/gpu/internal/tpm_geometry.F90 create mode 100755 src/trans/gpu/internal/tpm_pol.F90 create mode 100755 src/trans/gpu/internal/tpm_trans.F90 create mode 100755 src/trans/gpu/internal/trgtol_mod.F90 create mode 100755 src/trans/gpu/internal/trltog_mod.F90 create mode 100755 src/trans/gpu/internal/trltom_mod.F90 create mode 100755 src/trans/gpu/internal/trmtol_mod.F90 create mode 100755 src/trans/gpu/internal/updsp_mod.F90 create mode 100755 src/trans/gpu/internal/updspad_mod.F90 create mode 100755 src/trans/gpu/internal/updspb_mod.F90 create mode 100755 src/trans/gpu/internal/updspb_vd_mod.F90 create mode 100755 src/trans/gpu/internal/updspbad_mod.F90 create mode 100755 src/trans/gpu/internal/uvtvd_mod.F90 create mode 100755 src/trans/gpu/internal/uvtvdad_mod.F90 create mode 100755 src/trans/gpu/internal/vd2uv_ctl_mod.F90 create mode 100755 src/trans/gpu/internal/vd2uv_mod.F90 create mode 100755 src/trans/gpu/internal/vdtuv_mod.F90 create mode 100755 src/trans/gpu/internal/vdtuvad_mod.F90 create mode 100755 src/trans/gpu/internal/write_legpol_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/asre1b_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/dir_trans_ctl_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/fourier_in_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/fourier_out_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ftdir_ctl_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ftdir_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ftinv_ctl_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ftinv_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/inv_trans_ctl_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/leinv_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ltdir_ctl_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ltdir_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ltinv_ctl_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/ltinv_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/prfi2b_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/spnsde_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/trgtol_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/trltog_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/updsp_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/updspb_mod.F90 create mode 100644 src/trans/gpu/internal_reducedmem/uvtvd_mod.F90 create mode 100755 src/trans/gpu/internal_reducedmem/vdtuv_mod.F90 create mode 100644 src/trans/gpu/sharedmem/sharedmem.c create mode 100644 src/trans/gpu/sharedmem/sharedmem_mod.F90 create mode 100644 src/trans/include/ectrans/dir_trans.h create mode 100644 src/trans/include/ectrans/dir_transad.h create mode 100644 src/trans/include/ectrans/dist_grid.h create mode 100644 src/trans/include/ectrans/dist_grid_32.h create mode 100644 src/trans/include/ectrans/dist_spec.h create mode 100644 src/trans/include/ectrans/gath_grid.h create mode 100644 src/trans/include/ectrans/gath_grid_32.h create mode 100644 src/trans/include/ectrans/gath_spec.h create mode 100644 src/trans/include/ectrans/get_current.h create mode 100644 src/trans/include/ectrans/gpnorm_trans.h create mode 100644 src/trans/include/ectrans/ini_spec_dist.h create mode 100644 src/trans/include/ectrans/inv_trans.h create mode 100644 src/trans/include/ectrans/inv_transad.h create mode 100644 src/trans/include/ectrans/setup_trans.h create mode 100644 src/trans/include/ectrans/setup_trans0.h create mode 100644 src/trans/include/ectrans/specnorm.h create mode 100644 src/trans/include/ectrans/sugawc.h create mode 100644 src/trans/include/ectrans/trans_end.h create mode 100644 src/trans/include/ectrans/trans_inq.h create mode 100644 src/trans/include/ectrans/trans_pnm.h create mode 100644 src/trans/include/ectrans/trans_release.h create mode 100644 src/trans/include/ectrans/vordiv_to_uv.h create mode 100644 src/transi/CMakeLists.txt create mode 100644 src/transi/include/ectrans/transi.h create mode 100644 src/transi/include/ectrans/version.h create mode 100644 src/transi/transi.c create mode 100644 src/transi/transi.h create mode 100644 src/transi/transi_module.F90 create mode 100644 src/transi/version.c.in create mode 100644 src/transi/version.h create mode 100644 tests/CMakeLists.txt create mode 100755 tests/test-install.sh.in create mode 100644 tests/test_install/CMakeLists.txt create mode 100644 tests/test_install/main.F90 create mode 100644 tests/test_install/transi_gptosp.c create mode 100644 tests/test_install/transi_sptogp.c create mode 100644 tests/trans/test_adjoint.F90 create mode 100644 tests/transi/transi_test.c create mode 100644 tests/transi/transi_test.h create mode 100644 tests/transi/transi_test_invtrans_adjoint.c create mode 100644 tests/transi/transi_test_io.c create mode 100644 tests/transi/transi_test_lonlat.c create mode 100644 tests/transi/transi_test_lonlat_diff_incr.c create mode 100644 tests/transi/transi_test_memory.c create mode 100644 tests/transi/transi_test_memory_lonlat.c create mode 100644 tests/transi/transi_test_program.c create mode 100644 tests/transi/transi_test_timings.c create mode 100644 tests/transi/transi_test_vordiv_to_UV.c create mode 100644 toolchain_lumi.cmake diff --git a/.github/tools/install-fftw.sh b/.github/tools/install-fftw.sh new file mode 100755 index 0000000..9fe750f --- /dev/null +++ b/.github/tools/install-fftw.sh @@ -0,0 +1,99 @@ +#! /usr/bin/env bash + +# (C) Copyright 2013 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. + +set +x +set -e -o pipefail + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export PATH=$SCRIPTDIR:$PATH + +# Some defaults for the arguments +PREFIX=$(pwd)/install +fftw_version=3.3.10 +fftw_configure="--enable-shared" +fftw_with_single=false + +while [ $# != 0 ]; do + case "$1" in + "--prefix") + PREFIX="$2"; shift + ;; + "--version") + fftw_version="$2"; shift + ;; + "--with-single") + fftw_with_single=true; + ;; + *) + echo "Unrecognized argument '$1'" + exit 1 + ;; + esac + shift +done + +echo "Installing FFTW version ${fftw_version}" + +fftw_installed=${PREFIX}/fftw-${fftw_version}-installed +if [[ -f "${fftw_installed}" ]]; then + echo "FFTW ${fftw_version} is already installed at ${PREFIX}" + exit +fi + +os=$(uname) +case "$os" in + Darwin) + brew ls --versions fftw || brew install fftw + exit + ;; + *) + ;; +esac + + +if [ -z "${TMPDIR+x}" ]; then + TMPDIR=${HOME}/tmp +fi +mkdir -p ${TMPDIR}/downloads + +fftw_tarball_url=http://www.fftw.org/fftw-${fftw_version}.tar.gz +fftw_tarball=$TMPDIR/downloads/fftw-${fftw_version}.tar.gz +fftw_dir=$TMPDIR/downloads/fftw-${fftw_version} + +echo "+ curl -L ${fftw_tarball_url} > ${fftw_tarball}" +curl -L ${fftw_tarball_url} > ${fftw_tarball} +echo "+ tar xzf ${fftw_tarball} -C ${TMPDIR}/downloads" +tar xzf ${fftw_tarball} -C ${TMPDIR}/downloads +echo "+ cd ${fftw_dir}" +cd ${fftw_dir} +echo "+ ./configure --prefix=${PREFIX} ${fftw_configure}" +./configure --prefix=${PREFIX} ${fftw_configure} +echo "+ make -j8" +make -j8 +echo "+ make install" +make install + +if $fftw_with_single; then + # Now again in single precision + make clean + echo "+ ./configure --prefix=${PREFIX} ${fftw_configure} --enable-float" + ./configure --prefix=${PREFIX} ${fftw_configure} --enable-float + echo "+ make -j8" + make -j8 + echo "+ make install" + make install +fi + + +echo "+ rm -rf \${fftw_tarball} \${fftw_dir}" +rm -rf ${fftw_tarball} ${fftw_dir} + +echo "+ touch ${fftw_installed}" +touch ${fftw_installed} diff --git a/.github/tools/install-intel-oneapi.sh b/.github/tools/install-intel-oneapi.sh new file mode 100755 index 0000000..78af1ae --- /dev/null +++ b/.github/tools/install-intel-oneapi.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +KEY=GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB +wget https://apt.repos.intel.com/intel-gpg-keys/$KEY +sudo apt-key add $KEY +rm $KEY +echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list +sudo apt-get update +sudo apt-get install \ + intel-oneapi-compiler-fortran \ + intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic \ + intel-oneapi-mpi \ + intel-oneapi-mpi-devel \ + intel-oneapi-mkl diff --git a/.github/tools/install-mpi.sh b/.github/tools/install-mpi.sh new file mode 100755 index 0000000..03a5e3a --- /dev/null +++ b/.github/tools/install-mpi.sh @@ -0,0 +1,166 @@ +#!/bin/bash + + +set +x +set -e -o pipefail + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export PATH=$SCRIPTDIR:$PATH + +# Some defaults for the arguments +PREFIX=$(pwd)/${MPI} +mpi_override=false +MPI=openmpi + +while [ $# != 0 ]; do + case "$1" in + "--prefix") + PREFIX="$2"; shift + ;; + "--override") + mpi_override=true; + ;; + "--version") + mpi_version="$2"; shift + ;; + "--mpi") + MPI="$2"; shift + ;; + *) + echo "Unrecognized argument '$1'" + exit 1 + ;; + esac + shift +done + +os=$(uname) +OMPIVER=4.1.1 +MPICHVER=3.4.2 + +if [ ! -z ${mpi_version+x} ]; then + if [[ "${MPI}" =~ [Oo][Pp][Ee][Nn]\-?[Mm][Pp][Ii] ]]; then + OMPIVER=${mpi_version} + fi + if [[ "${MPI}" =~ [Mm][Pp][Ii][Cc][Hh] ]]; then + MPICHVER=${mpi_version} + fi +fi + + +mkdir -p ${PREFIX} +touch ${PREFIX}/env.sh + +MPI_INSTALLED=false + +case "$os" in + Darwin) + case "$MPI" in + mpich) + brew ls --versions mpich || brew install mpich + ;; + openmpi) + brew ls --versions openmpi || brew install openmpi + echo "localhost slots=72" >> $(brew --prefix)/etc/openmpi-default-hostfile + # workaround for open-mpi/omp#7516 + echo "setting the mca gds to hash..." + echo "gds = hash" >> $(brew --prefix)/etc/pmix-mca-params.conf + + # workaround for open-mpi/ompi#5798 + echo "setting the mca btl_vader_backing_directory to /tmp..." + echo "btl_vader_backing_directory = /tmp" >> $(brew --prefix)/etc/openmpi-mca-params.conf + ;; + *) + echo "Unknown MPI implementation: $MPI" + exit 1 + ;; + esac + ;; + + Linux) + if [ -n "${MPI_HOME}" ]; then + echo "MPI is already installed at MPI_HOME=${MPI_HOME}." + echo "Not taking any action." + exit 0 + fi + if [ -n "${I_MPI_ROOT}" ]; then + echo "MPI is already installed at I_MPI_ROOT=${I_MPI_ROOT}." + echo "Not taking any action." + exit 0 + fi + case "$MPI" in + mpich) + if [ -f ${PREFIX}/include/mpi.h ]; then + echo "${PREFIX}/include/mpi.h found" + fi + if [ -f ${PREFIX}/lib/libmpich.so ]; then + echo "${PREFIX}/lib/libmpich.so found -- nothing to build." + else + echo "Downloading mpich source..." + wget http://www.mpich.org/static/downloads/${MPICHVER}/mpich-${MPICHVER}.tar.gz + tar xfz mpich-${MPICHVER}.tar.gz + rm mpich-${MPICHVER}.tar.gz + echo "Configuring and building mpich..." + cd mpich-${MPICHVER} + unset F90 + unset F90FLAGS + ${SCRIPTDIR}/reduce-output.sh ./configure \ + --prefix=${PREFIX} \ + --enable-static=false \ + --enable-alloca=true \ + --enable-threads=single \ + --enable-fortran=yes \ + --enable-fast=all \ + --enable-g=none \ + --enable-timing=none + ${SCRIPTDIR}/reduce-output.sh make -j48 + ${SCRIPTDIR}/reduce-output.sh make install + MPI_INSTALLED=true + cd - + rm -rf mpich-${MPICHVER} + fi + ;; + openmpi) + if [ -f ${PREFIX}/include/mpi.h ]; then + echo "openmpi/include/mpi.h found." + fi + if [ -f ${PREFIX}/lib/libmpi.so ] || [ -f ${PREFIX}/lib64/libmpi.so ]; then + echo "libmpi.so found -- nothing to build." + else + echo "Downloading openmpi source..." + wget --no-check-certificate https://www.open-mpi.org/software/ompi/v4.1/downloads/openmpi-$OMPIVER.tar.gz + tar -zxf openmpi-$OMPIVER.tar.gz + rm openmpi-$OMPIVER.tar.gz + echo "Configuring and building openmpi..." + cd openmpi-$OMPIVER + ${SCRIPTDIR}/reduce-output.sh ./configure --prefix=${PREFIX} + ${SCRIPTDIR}/reduce-output.sh make -j4 + ${SCRIPTDIR}/reduce-output.sh make install + MPI_INSTALLED=true + echo "localhost slots=72" >> ${PREFIX}/etc/openmpi-default-hostfile + cd - + rm -rf openmpi-$OMPIVER + fi + ;; + *) + echo "Unknown MPI implementation: $MPI" + exit 1 + ;; + esac + ;; + + *) + echo "Unknown operating system: $os" + exit 1 + ;; +esac + + +if ${MPI_INSTALLED} ; then +cat > ${PREFIX}/env.sh << EOF +export MPI_HOME=${PREFIX} +export PATH=\${MPI_HOME}/bin:\${PATH} +EOF +echo "Please source ${PREFIX}/env.sh, containing:" +cat ${PREFIX}/env.sh +fi diff --git a/.github/tools/install-nvhpc.sh b/.github/tools/install-nvhpc.sh new file mode 100755 index 0000000..8a8f332 --- /dev/null +++ b/.github/tools/install-nvhpc.sh @@ -0,0 +1,107 @@ +#!/bin/sh + +# Install NVHPC +# https://github.com/nemequ/pgi-travis +# +# Originally written for Squash by +# Evan Nemerson. For documentation, bug reports, support requests, +# etc. please use . +# +# To the extent possible under law, the author(s) of this script have +# waived all copyright and related or neighboring rights to this work. +# See for +# details. + +version=21.9 + +TEMPORARY_FILES="${TMPDIR:-/tmp}" +export NVHPC_INSTALL_DIR=$(pwd)/nvhpc-install +export NVHPC_SILENT=true +while [ $# != 0 ]; do + case "$1" in + "--prefix") + export NVHPC_INSTALL_DIR="$2"; shift + ;; + "--tmpdir") + TEMPORARY_FILES="$2"; shift + ;; + "--verbose") + export NVHPC_SILENT=false; + ;; + "--version") + version="$2"; shift + ;; + *) + echo "Unrecognized argument '$1'" + exit 1 + ;; + esac + shift +done + +case "$(uname -m)" in + x86_64|ppc64le|aarch64) + ;; + *) + echo "Unknown architecture: $(uname -m)" >&2 + exit 1 + ;; +esac + +if [ -d "${NVHPC_INSTALL_DIR}" ]; then + if [[ $(find "${NVHPC_INSTALL_DIR}" -name "nvc" | wc -l) == 1 ]]; then + echo "NVHPC already installed at ${NVHPC_INSTALL_DIR}" + exit + fi +fi + +# Example download URL for version 21.9 +# https://developer.download.nvidia.com/hpc-sdk/21.9/nvhpc_2020_219_Linux_x86_64_cuda_11.0.tar.gz + +ver="$(echo $version | tr -d . )" +URL=$(curl -s "https://developer.nvidia.com/nvidia-hpc-sdk-$ver-downloads" | grep -oP "https://developer.download.nvidia.com/hpc-sdk/([0-9]{2}\.[0-9]+)/nvhpc_([0-9]{4})_([0-9]+)_Linux_$(uname -m)_cuda_([0-9\.]+).tar.gz" | sort | tail -1) +FOLDER="$(basename "$(echo "${URL}" | grep -oP '[^/]+$')" .tar.gz)" + +if [ ! -d "${TEMPORARY_FILES}/${FOLDER}" ]; then + echo "Downloading ${TEMPORARY_FILES}/${FOLDER} from URL [${URL}]" + mkdir -p ${TEMPORARY_FILES} + curl --location \ + --user-agent "pgi-travis (https://github.com/nemequ/pgi-travis)" \ + "${URL}" | tar zx -C "${TEMPORARY_FILES}" +else + echo "Download already present in ${TEMPORARY_FILES}/${FOLDER}" +fi + +echo "+ ${TEMPORARY_FILES}/${FOLDER}/install" +"${TEMPORARY_FILES}/${FOLDER}/install" + +#comment out to cleanup +#rm -rf "${TEMPORARY_FILES}/${FOLDER}" + +NVHPC_VERSION=$(basename "${NVHPC_INSTALL_DIR}"/Linux_$(uname -m)/*.*/) + +# Use gcc which is available in PATH +${NVHPC_INSTALL_DIR}/Linux_$(uname -m)/${NVHPC_VERSION}/compilers/bin/makelocalrc \ + -x ${NVHPC_INSTALL_DIR}/Linux_$(uname -m)/${NVHPC_VERSION}/compilers/bin \ + -gcc $(which gcc) \ + -gpp $(which g++) \ + -g77 $(which gfortran) + +cat > ${NVHPC_INSTALL_DIR}/env.sh << EOF +### Variables +export NVHPC_INSTALL_DIR=${NVHPC_INSTALL_DIR} +export NVHPC_VERSION=${NVHPC_VERSION} +export NVHPC_DIR=\${NVHPC_INSTALL_DIR}/Linux_$(uname -m)/\${NVHPC_VERSION} + +### Compilers +export PATH=\${NVHPC_DIR}/compilers/bin:\${PATH} +export NVHPC_LIBRARY_PATH=\${NVHPC_DIR}/compilers/lib +export LD_LIBRARY_PATH=\${NVHPC_LIBRARY_PATH} + +### MPI +export MPI_HOME=\${NVHPC_DIR}/comm_libs/mpi +export PATH=\${MPI_HOME}/bin:\${PATH} +EOF + +cat ${NVHPC_INSTALL_DIR}/env.sh + diff --git a/.github/tools/reduce-output.sh b/.github/tools/reduce-output.sh new file mode 100755 index 0000000..5faa2a3 --- /dev/null +++ b/.github/tools/reduce-output.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# (C) Copyright 2013 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. + + +# Abort on Error +set -e + +PING_SLEEP=30s + +dump_output() { + echo " ++ Tailing the last 100 lines of output from $BUILD_OUTPUT" + tail -100 $BUILD_OUTPUT +} +error_handler() { + echo ERROR: An error was encountered with the build. + kill $PING_LOOP_PID + dump_output + exit 1 +} +# If an error occurs, run our error handler to output a tail of the build +trap 'error_handler' ERR + +# Set up a repeating loop to display some output regularly. +bash -c "while true; do sleep $PING_SLEEP; echo \" ++ \$(date) - running ... \"; done" & +PING_LOOP_PID=$! +BUILD_OUTPUT=build-$PING_LOOP_PID.out +touch $BUILD_OUTPUT +echo " + $@" +echo " ++ Output redirected to $BUILD_OUTPUT" +$@ >> $BUILD_OUTPUT 2>&1 + +# The build finished without returning an error so dump a tail of the output +dump_output + +# nicely terminate the ping output loop +kill $PING_LOOP_PID diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..792cc6d --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,196 @@ +name: build + +# Controls when the action will run +on: + + # Trigger the workflow on all pushes, except on tag creation + push: + branches: + - '**' + tags-ignore: + - '**' + + # Trigger the workflow on all pull requests + pull_request: ~ + + # Allow workflow to be dispatched on demand + workflow_dispatch: ~ + +env: + ECTRANS_TOOLS: ${{ github.workspace }}/.github/tools + CTEST_PARALLEL_LEVEL: 1 + CACHE_SUFFIX: v1 # Increase to force new cache to be created + +jobs: + ci: + name: ci + + strategy: + fail-fast: false # false: try to complete all jobs + + matrix: + build_type: [Release,Debug] + name: + - linux gnu-10 + - linux clang-12 + - linux nvhpc-21.9 + - linux intel + - macos + + include: + + - name: linux gnu-10 + os: ubuntu-20.04 + compiler: gnu-10 + compiler_cc: gcc-10 + compiler_cxx: g++-10 + compiler_fc: gfortran-10 + ctest_options: -E memory + caching: true + + - name: linux clang-12 + os: ubuntu-20.04 + compiler: clang-12 + compiler_cc: clang-12 + compiler_cxx: clang++-12 + compiler_fc: gfortran-10 + ctest_options: -E memory + caching: true + + - name: linux clang-12 + build_type: Release + os: ubuntu-20.04 + compiler: clang-12 + compiler_cc: clang-12 + compiler_cxx: clang++-12 + compiler_fc: gfortran-10 + ctest_options: -E memory + caching: true + + - name: linux nvhpc-21.9 + os: ubuntu-20.04 + compiler: nvhpc-21.9 + compiler_cc: nvc + compiler_cxx: nvc++ + compiler_fc: nvfortran + cmake_options: -DCMAKE_CXX_FLAGS=--diag_suppress177 + ctest_options: -E memory + caching: false + + - name : linux intel + os: ubuntu-20.04 + compiler: intel-oneapi + compiler_cc: icc + compiler_cxx: icpc + compiler_fc: ifort + caching: true + + - name: macos + # Xcode compiler requires empty environment variables, so we pass null (~) here + os: macos-11 + compiler: clang-12 + compiler_cc: ~ + compiler_cxx: ~ + compiler_fc: gfortran-11 + caching: true + + runs-on: ${{ matrix.os }} + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Environment + run: | + echo "DEPS_DIR=${{ runner.temp }}/deps" >> $GITHUB_ENV + echo "CC=${{ matrix.compiler_cc }}" >> $GITHUB_ENV + echo "CXX=${{ matrix.compiler_cxx }}" >> $GITHUB_ENV + echo "FC=${{ matrix.compiler_fc }}" >> $GITHUB_ENV + + if [[ "${{ matrix.os }}" =~ macos ]]; then + brew install ninja + else + sudo apt-get update + sudo apt-get install ninja-build + fi + + printenv + + - name: Cache Dependencies + # There seems to be a problem with cached NVHPC dependencies, leading to SIGILL perhaps due to slightly different architectures + if: matrix.caching + id: deps-cache + uses: pat-s/always-upload-cache@v2.1.5 + with: + path: ${{ env.DEPS_DIR }} + key: deps-${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.build_type }}-${{ env.CACHE_SUFFIX }} + + - name: Install NVHPC compiler + if: contains( matrix.compiler, 'nvhpc' ) + shell: bash -eux {0} + run: | + ${ECTRANS_TOOLS}/install-nvhpc.sh --prefix /opt/nvhpc + source /opt/nvhpc/env.sh + echo "${NVHPC_DIR}/compilers/bin" >> $GITHUB_PATH + [ -z ${MPI_HOME+x} ] || echo "MPI_HOME=${MPI_HOME}" >> $GITHUB_ENV + + - name: Install Intel oneAPI compiler + if: contains( matrix.compiler, 'intel' ) + run: | + ${ECTRANS_TOOLS}/install-intel-oneapi.sh + source /opt/intel/oneapi/setvars.sh + printenv >> $GITHUB_ENV + echo "CACHE_SUFFIX=$(icc -dumpversion)" >> $GITHUB_ENV + + - name: Install MPI + shell: bash -eux {0} + run: | + FCFLAGS=-fPIC CFLAGS=-fPIC FFLAGS=-fPIC ${ECTRANS_TOOLS}/install-mpi.sh --mpi openmpi --prefix ${DEPS_DIR}/openmpi + [ -f ${DEPS_DIR}/openmpi/env.sh ] && source ${DEPS_DIR}/openmpi/env.sh + [ -z ${MPI_HOME+x} ] || echo "MPI_HOME=${MPI_HOME}" >> $GITHUB_ENV + + - name: Install FFTW + shell: bash -eux {0} + run: | + ${ECTRANS_TOOLS}/install-fftw.sh --version 3.3.10 --with-single --prefix ${DEPS_DIR}/fftw + echo "FFTW_ROOT=${DEPS_DIR}/fftw" >> $GITHUB_ENV + + - name: Set Build & Test Environment + run: | + + # Add mpirun to path for testing + [ -z ${MPI_HOME+x} ] || echo "${MPI_HOME}/bin" >> $GITHUB_PATH + + + - name: Build & Test + id: build-test + uses: ecmwf-actions/build-package@v2 + with: + self_coverage: true + force_build: true + cache_suffix: "${{ matrix.build_type }}-${{ env.CACHE_SUFFIX }}" + recreate_cache: ${{ matrix.caching == false }} + dependencies: | + ecmwf/ecbuild + ecmwf/eckit + ecmwf/fckit + ecmwf-ifs/fiat + dependency_branch: develop + dependency_cmake_options: | + ecmwf/eckit: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF -DENABLE_ECKIT_CMD=OFF -DENABLE_ECKIT_SQL=OFF -DENABLE_MPI=ON -DENABLE_OMP=OFF" + ecmwf/fckit: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF" + ecmwf-ifs/fiat: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF -DENABLE_MPI=ON" + cmake_options: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.cmake_options }} -DENABLE_MPI=ON -DENABLE_FFTW=ON" + ctest_options: "${{ matrix.ctest_options }}" + + - name: Verify tools + run: | + export PATH=${{ steps.build-test.outputs.bin_path }}:$PATH + + echo "+ ectrans --info" + ectrans --info + +# - name: Codecov Upload +# if: steps.build-test.outputs.coverage_file +# uses: codecov/codecov-action@v2 +# with: +# files: ${{ steps.build-test.outputs.coverage_file }} diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..c8f0a1c --- /dev/null +++ b/AUTHORS @@ -0,0 +1,27 @@ +Authors and Contributors +======================== + +- W. Deconinck (ECMWF) +- D. Dent (ECMWF) +- P. Dueben (ECMWF) +- R. El Khatib (Meteo France) +- J. Hague (ECMWF) +- M. Hamrud (ECMWF) +- L. Isaksen (ECMWF) +- G. Mozdzynski (ECMWF) +- P. Marguinaud (Meteo France) +- A. Mueller (ECMWF) +- M. Hortal (ECMWF) +- P. Courtier (ECMWF) +- D. Degrauwe (RMI) +- D. Giard (Meteo France) +- G. Radnoti (ECMWF) +- D. Salmond (ECMWF) +- Y. Seity (Meteo France) +- F. Vana (ECMWF) +- N. Wedi (ECMWF) +- T. Wilhelmsson (ECMWF) +- K. Yessad (Meteo France) + +If you have contributed to this project, +please add your name in the above alphabetical list. diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ba30d5b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,208 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +cmake_minimum_required( VERSION 3.18 FATAL_ERROR ) +# CMake 3.17 adds INTERFACE link options which get propagated to the link stage, +# even if the target is linked in privately +# CMake 3.18 allows "LINK_LANG_AND_ID" generator expression. + +find_package( ecbuild 3.4 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ecbuild ) + +project( ectrans LANGUAGES C Fortran CXX ) + + + +include( ectrans_macros ) + +ecbuild_enable_fortran( REQUIRED NO_MODULE_DIRECTORY ) + +### Find (optional) dependencies + +ecbuild_find_package( NAME fiat REQUIRED ) + +ecbuild_add_option( FEATURE MPI + DESCRIPTION "Support for MPI distributed memory parallelism" + CONDITION fiat_HAVE_MPI ) + +ecbuild_add_option( FEATURE OMP + DEFAULT ON + DESCRIPTION "Support for OpenMP shared memory parallelism" + REQUIRED_PACKAGES "OpenMP COMPONENTS Fortran" ) + +ecbuild_add_option( FEATURE DOUBLE_PRECISION + DEFAULT OFF + DESCRIPTION "Support for Double Precision" ) + +ecbuild_add_option( FEATURE SINGLE_PRECISION + DEFAULT ON + DESCRIPTION "Support for Single Precision" ) + +if( HAVE_SINGLE_PRECISION ) + set( single "single" ) +endif() +set( HAVE_dp ${HAVE_DOUBLE_PRECISION} ) +set( HAVE_sp ${HAVE_SINGLE_PRECISION} ) + +ecbuild_add_option( FEATURE MKL + DESCRIPTION "Use MKL for BLAS and/or FFTW" + DEFAULT OFF + REQUIRED_PACKAGES "MKL QUIET" ) + +if( NOT HAVE_MKL ) + option( FFTW_ENABLE_MKL OFF ) +endif() + +if( ENABLE_GPU OR ECTRANS_ENABLE_GPU ) # must be explicitly turned ON as FEATURE GPU is OFF by default (see below) + #include(CheckLanguage) + #check_language(HIP) + #if(CMAKE_HIP_COMPILER) + # enable_language(HIP) + #else() + # ecbuild_critical("No HIP compiler found") + #endif() + #if(NOT DEFINED HIP_PATH) + #if(NOT DEFINED ENV{HIP_PATH}) + #set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") + #else() + #set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + #endif() + #endif() + #list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} /opt/rocm) + #set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + set(CMAKE_MODULE_PATH $ENV{HIP_ROOT}/cmake ${CMAKE_MODULE_PATH}) + ecbuild_warn_var(CMAKE_MODULE_PATH) + find_package(HIP) + #if( HIP_FOUND ) + find_package(hipfft) + find_package(hipblas) + find_package(rocblas) + find_package(rocfft) + #endif() + + + find_package(OpenACC COMPONENTS Fortran C CXX) + + find_package(MPI COMPONENTS Fortran CXX) + + if( NOT CMAKE_HIP_COMPILER) + ecbuild_info("No HIP compiler found") + endif() + if(FALSE) + if( NOT TARGET HIP::hipblas ) + ecbuild_info("No target HIP::hipblas") + endif() + if( NOT TARGET HIP::hipfft ) + ecbuild_info("No target HIP::hipfft") + endif() + endif() + if( NOT TARGET OpenACC::OpenACC_Fortran ) + ecbuild_info("No target OpenACC:OpenACC_Fortran") + endif() + if( NOT TARGET MPI::MPI_Fortran ) + ecbuild_info("No target MPI::MPI_Fortran") + endif() + if( NOT TARGET MPI::MPI_CXX ) + ecbuild_info("No target MPI::MPI_CXX") + endif() +endif() + +ecbuild_add_option( FEATURE FFTW + DEFAULT ON + DESCRIPTION "Support for fftw" + REQUIRED_PACKAGES "FFTW COMPONENTS double ${single}" ) + +ecbuild_add_option( FEATURE TRANSI + DEFAULT ON + DESCRIPTION "Compile TransI C-interface to trans" + CONDITION HAVE_DOUBLE_PRECISION ) + +ecbuild_add_option( FEATURE CPU + DEFAULT OFF + DESCRIPTION "Compile CPU version of ectrans" + ) + +ecbuild_add_option( FEATURE GPU + DEFAULT OFF + DESCRIPTION "Compile GPU version of ectrans (Requires OpenACC or sufficient OpenMP offloading support and MPI and CUDA. For now only NVHPC supported.)" + CONDITION + HIP_FOUND + AND HAVE_MPI AND TARGET MPI::MPI_CXX AND TARGET MPI::MPI_Fortran + # AND TARGET HIP::hipblas AND TARGET HIP::hipfft + ) + +ecbuild_add_option( FEATURE ACCGPU + DEFAULT ON + DESCRIPTION "Support for using GPUs with OpenACC" + CONDITION HAVE_GPU + AND TARGET OpenACC::OpenACC_Fortran + #REQUIRED_PACKAGES OpenACC + ) + +ecbuild_add_option( FEATURE OMPGPU + DEFAULT ON + DESCRIPTION "Support for using GPUs with OpenMP offloading" + CONDITION HAVE_GPU + AND TARGET OpenMP::OpenMP_Fortran + REQUIRED_PACKAGES "OpenMP COMPONENTS Fortran" ) + +ecbuild_add_option( FEATURE GPU_AWARE_MPI + DEFAULT ON + CONDITION HAVE_GPU + DESCRIPTION "Enable CUDA-aware MPI") + +ecbuild_add_option( FEATURE GPU_REDUCED_MEMORY + DEFAULT OFF + CONDITION HAVE_GPU + DESCRIPTION "Use (slower) code that reduces memory requirements on GPU" ) + + +set( HAVE_acc ${HAVE_ACCGPU} ) +set( HAVE_omp ${HAVE_OMPGPU} ) + +ecbuild_add_option( FEATURE ETRANS + DEFAULT OFF + DESCRIPTION "Include Limited-Area-Model Transforms" ) + +# Following is NVHPC compiler specific and should really be coming from external input +if( CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC" ) + if( HAVE_ACCGPU ) + set( acc_flags -acc -gpu=lineinfo,deepcopy,fastmath,nordc ) + set( acc_link_flags -acc ) + # Pass cmake command-line option "-DCMAKE_Fortran_FLAGS=-Minfo=acc" for diagnostics info + endif() + if( HAVE_OMPGPU ) + set( omp_flags -mp=gpu -gpu=lineinfo,fastmath -Minfo=mp ) + set( omp_link_flags -mp=gpu ) + endif() +endif() + +if( HAVE_GPU ) + #enable_language( HIP ) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70 80) + endif() +endif() + +#ectrans_find_lapack() + +### Add sources and tests +include( ectrans_compile_options ) + +add_subdirectory( src ) +add_subdirectory( tests ) + +### Export +if( BUILD_SHARED_LIBS ) + set( PACKAGE_REQUIRES_PRIVATE_DEPENDENCIES 0 ) +else() + set( PACKAGE_REQUIRES_PRIVATE_DEPENDENCIES 1 ) +endif() + +ecbuild_install_project( NAME ${PROJECT_NAME} ) + +ecbuild_print_summary() diff --git a/ENV_lumi b/ENV_lumi new file mode 100644 index 0000000..ce77d70 --- /dev/null +++ b/ENV_lumi @@ -0,0 +1,25 @@ +module purge +module load LUMI/22.08 partition/G +module load PrgEnv-cray/8.3.3 cce/15.0.0 +module load rocm/5.2.3 + +export BASEDIR=/project/project_462000140/${USER}/ectrans/lam/ +export SOURCEDIR=${BASEDIR}/sources +export BUILDDIR=${BASEDIR}/build +export INSTALLDIR=${BASEDIR}/install +export TOOLCHAIN_FILE=${SOURCEDIR}/ectrans/toolchain_lumi.cmake + +mkdir -p ${BASEDIR} ${SOURCEDIR} ${BUILDDIR} ${INSTALLDIR} + +export CC=cc +export CXX=CC +export FC=ftn +export PATH=${PATH}:${INSTALLDIR}/ecbuild/bin/ + +export ECBUILD_TOOLCHAIN="${TOOLCHAIN_FILE}" +export CMAKE_BUILD_TYPE=RelWithDebInfo +export HIP_ROOT=${ROCM_PATH}/hip + +cd ${BASEDIR} + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..af6ca02 --- /dev/null +++ b/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 1996-2018 ECMWF + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 58296c0..39d2721 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,111 @@ -# ecTrans Dwarf +ecTrans +******* +Introduction +============ +ecTrans is the global spherical Harmonics transforms library, extracted from the IFS. +It contains both CPU and GPU (Nvidia) code-paths. +The CPU version uses a hybrid of MPI and OpenMP parallelisation strategies, while the GPU version combines MPI and OpenACC. +A default installation builds both CPU libraries (trans_sp, trans_dp) and various flavours of GPU libraries in (trans_gpu_{sp/dp} shared library, trans_gpu_static_{sp/dp} static library, trans_gpu_static_CA_{sp/dp} static library requiring CUDA-aware MPI implementation), as well as a C interface to the double-precision version (transi_dp). A simple benchmark driver is also built against each of these libraries, allowing simple testing of the transforms. -## Getting started +License +======= -To make it easy for you to get started with GitLab, here's a list of recommended next steps. +ecTrans is distributed under the Apache License Version 2.0. +See `LICENSE` file for details. -Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)! +Installing ecTrans +================== -## Add your files +Supported Platforms +------------------- -- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files -- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command: +- Linux +- Apple MacOS -``` -cd existing_repo -git remote add origin https://earth.bsc.es/gitlab/ovaneck/ectrans-dwarf.git -git branch -M main -git push -uf origin main -``` +Other UNIX-like operating systems may work too out of the box. -## Integrate with your tools +The GPU codepath has only been tested with NVHPC compilers on recent Nvidia GPUs. -- [ ] [Set up project integrations](https://earth.bsc.es/gitlab/ovaneck/ectrans-dwarf/-/settings/integrations) +Requirements +------------ +- Fortran compiler with OpenMP support +- C compiler +- FIAT (see https://github.com/ecmwf-ifs/fiat ) +- ecBuild (see https://github.com/ecmwf/ecbuild) +- CMake (see https://cmake.org) +- A BLAS library -## Collaborate with your team +Further optional recommended dependencies: +- FFTW (http://www.fftw.org) -- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/) -- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html) -- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically) -- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/) -- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html) +For the GPU libraries : +- Fortran compiler with OpenACC support +- CUDA toolkit (compiler, and CUBLAS and CUFFT libraries) -## Test and Deploy +Building ecTrans +---------------- -Use the built-in continuous integration in GitLab. +Building and installing Trans happens via CMake, which provides automatic detection for +third-party libraries in standard locations and helps cross-plaform portability. +There are multiple ways to help CMake discover packages in non-standard locations. +One explicit way is to e.g. set environment variables for each dependency. -- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html) -- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/) -- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html) -- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/) -- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html) +Environment variables -*** + $ export ecbuild_ROOT= + $ export fiat_ROOT= + $ export CC= + $ export FC= -# Editing this README +You must compile FIAT out-of-source, so create a build-directory (anywhere) -When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template. + $ mkdir build && cd build + +Configuration of the build happens through standard CMake -## Suggestions for a good README -Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information. + $ cmake -## Name -Choose a self-explaining name for your project. +Extra options can be added to the `cmake` command to control the build: -## Description -Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors. + - `-DCMAKE_BUILD_TYPE=` default=RelWithDebInfo (typically `-O2 -g`) + - `-DENABLE_TESTS=` default=ON + - `-DENABLE_SINGLE_PRECISION=` default=ON + - `-DENABLE_DOUBLE_PRECISION=` default=ON + - `-DENABLE_TRANSI=` default=ON + - `-DENABLE_MKL=` default=ON + - `-DENABLE_FFTW=` default=ON + - `-DENABLE_GPU=` default=OFF + - `-DCMAKE_INSTALL_PREFIX=` -## Badges -On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge. +More options to control compilation flags, only when defaults are not sufficient -## Visuals -Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method. + - `-DCMAKE_Fortran_FLAGS=` + - `-DCMAKE_C_FLAGS=` -## Installation -Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection. +Additional option for the GPU code, allowing to reduce memory consumption on the GPU at the +cost of slower execution: -## Usage -Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README. + - `-DENABLE_REDUCED_MEMORY=ON` -## Support -Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc. +Once this has finished successfully, run ``make`` and ``make install``. -## Roadmap -If you have ideas for releases in the future, it is a good idea to list them in the README. +Optionally, tests can be run to check succesful compilation, when the feature TESTS is enabled (`-DENABLE_TESTS=ON`, default ON) -## Contributing -State if you are open to contributions and what your requirements are for accepting them. + $ ctest -For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self. +The benchmark drivers are found in the bin directory. +A brief description of available command-line arguments can be obtained with e.g. +ectrans-benchmark-sp --help -You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser. +Reporting Bugs +============== -## Authors and acknowledgment -Show your appreciation to those who have contributed to the project. +TODO -## License -For open source projects, say how it is licensed. +Contributing +============ + +TODO -## Project status -If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers. diff --git a/README_lumi_lam.md b/README_lumi_lam.md new file mode 100644 index 0000000..b7b7314 --- /dev/null +++ b/README_lumi_lam.md @@ -0,0 +1,344 @@ +# Limited-Area Model spectral transforms on LUMI + +## Usage (aka the most interesting part) + +### (re)Compilation + + source ~dadegrau2/ENV_lumi + rm -rf ${BUILDDIR}/ectrans ${INSTALLDIR}/ectrans + mkdir -p ${BUILDDIR}/ectrans + cd ${BUILDDIR}/ectrans + ecbuild --prefix=${INSTALLDIR}/ectrans -Dfiat_ROOT=${INSTALLDIR}/fiat -DBUILD_SHARED_LIBS=OFF -DENABLE_FFTW=OFF -DENABLE_GPU=ON -DENABLE_OMPGPU=OFF -DENABLE_ACCGPU=ON -DENABLE_TESTS=OFF -DENABLE_GPU_AWARE_MPI=ON -DENABLE_CPU=ON -DENABLE_ETRANS=ON ${SOURCEDIR}/ectrans + make -j32 + make -j32 # note: dependencies aren't worked out entirely correctly by cmake, therefore a second make is necessary. + make install + +### Test run on single GPU + +Allocate GPU resource with + + salloc --nodes=1 --ntasks-per-node=1 --cpus-per-task=1 --gpus-per-node=1 --account=project_462000140 --partition=standard-g --time=04:00:00 --mem=0 + +Recompile/run with + + cd ${BASEDIR}/test/ + make -j16 -C ${BUILDDIR}/ectrans/ install + + args="--truncation 79 --nproma 32 --vordiv --scders --uvders --nfld 1 --nlev 10 --norms --check 10" + srun ${INSTALLDIR}/ectrans/bin/ectrans-benchmark-sp ${args} # run global benchmark on CPU + srun ${INSTALLDIR}/ectrans/bin/ectrans-benchmark-gpu-sp-acc ${args} # run global benchmark on GPU + + args="--nlon 128 --nlat 128 --nproma 32 --vordiv --scders --uvders --nfld 5 --nlev 10 --norms --check 10" + srun ${INSTALLDIR}/ectrans/bin/ectrans-lam-benchmark-sp ${args} # run LAM benchmark on CPU + srun ${INSTALLDIR}/ectrans/bin/ectrans-lam-benchmark-gpu-sp-acc ${args} # run LAM benchmark on GPU + +Note: recompiling like this may not be sufficient when modifying e.g. hip.cc files; do a full recompilation (above) in this case. + +Note: a bug still resides in the global gpu runs when setting nfld>1. + +### Test run on multiple GPUs + +Allocate GPU resources with + + salloc --nodes=1 --ntasks-per-node=8 --gpus-per-node=8 --account=project_462000140 --partition=standard-g --time=04:00:00 --mem=0 + +Make sure to set + + export MPICH_GPU_SUPPORT_ENABLED=1 + CPU_BIND="map_cpu:48,56,16,24,1,8,32,40" + +Then launch global cpu/gpu runs with + + args="--truncation 79 --nproma 32 --vordiv --scders --uvders --nfld 1 --nlev 10 --norms --check 10" + srun --cpu-bind=${CPU_BIND} ./select_gpu ${INSTALLDIR}/ectrans/bin/ectrans-benchmark-sp ${args} + srun --cpu-bind=${CPU_BIND} ./select_gpu ${INSTALLDIR}/ectrans/bin/ectrans-benchmark-gpu-sp-acc ${args} + +while the LAM cases are launched with + + args="--nlon 256 --nlat 256 --nproma 32 --niter 1 --vordiv --scders --uvders --nfld 5 --nlev 80 --norms --check 10 --dump-values" + srun --cpu-bind=${CPU_BIND} ./select_gpu ${INSTALLDIR}/ectrans/bin/ectrans-lam-benchmark-sp ${args} + srun --cpu-bind=${CPU_BIND} ./select_gpu ${INSTALLDIR}/ectrans/bin/ectrans-lam-benchmark-gpu-sp-acc ${args} + +The `select_gpu` wrapper can be found on the lumi documentation pages. + +Small note: the `nfld>1` case gives a crash in the global run, both on cpu and on gpu. + +## Prerequisites: ecbuild and fiat + +### ecbuild installation + + cd ${SOURCEDIR} + git clone https://github.com/ecmwf/ecbuild.git + cd ecbuild + git checkout master + sed -i -e "s/-Gfast//" cmake/compiler_flags/Cray_Fortran.cmake # remove obsolete switch -Gfast + mkdir -p ${BUILDDIR}/ecbuild + cd ${BUILDDIR}/ecbuild + ${SOURCEDIR}/ecbuild/bin/ecbuild --prefix=${INSTALLDIR}/ecbuild ${SOURCEDIR}/ecbuild + make + make install + +### fiat installation +With Cray compiler on lumi, one gets into trouble with OpenMP: for some reason, during linking the openmp library isn't found... This is solved by adding `${OpenMP_C_FLAGS}` in `programs/CMakeLists.txt`: `target_link_libraries( fiat-printbinding ${OpenMP_C_FLAGS} OpenMP::OpenMP_C )` + + cd ${SOURCEDIR} + git clone https://github.com/ecmwf-ifs/fiat + cd fiat + git checkout main + # ADD OpenMP::OpenMP_C here! + rm -rf ${BUILDDIR}/fiat + mkdir -p ${BUILDDIR}/fiat + cd ${BUILDDIR}/fiat + ecbuild -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=${INSTALLDIR}/fiat -DENABLE_MPI=ON -DBUILD_SHARED_LIBS=OFF -DENABLE_TESTS=OFF ${SOURCEDIR}/fiat + make -j16 + make install + +## Code organization and data layout + +Just for reference: a somewhat simplified overview of routines and data layout + +### Original CPU code + + +Inverse transforms: +``` +einv_trans # input: PSPVOR(NLEV,NSPEC2), PSPDIV(NLEV,NSPEC2), PSPSC3A(NFLD*NLEV,NSPEC2) + einv_trans_ctl + eltinv_ctl + DO JM=1,NX_l # loop over x-wavenumbers + eltinv # north-south transforms + eprfi1b # transpose to (NY,NFLD) + eleinv + plan_fft + execute_fft # small-batched transform along y with stride=1, distance=ny+2, lot=nfld + easre1b # transpose to FOUBUF_IN(NFLD,NY) + ENDDO + trmtol # inter-GPU communications to FOUBUF(NFLD,NY) + + eftinv_ctl + DO JGL=1,NY_l # loop over latitudes + fourier_in # copy FOUBUF to ZGTF(NFLD,NX,JGL) + eftinv + plan_fft + execute_fft # small-batched transform along x with stride=nfld,distance=1, lot=NFLD + ENDDO + trltog # inter-GPU comms and transpose to PGP3A(NPROMA,NLEV*NFLD,NBLK),PGPUV(NPROMA,NLEV*2,NBLK) +``` + +Direct transforms: +``` +edir_trans_ctl # input PGP3A(NPROMA,NLEV,NFLD,NBLK) + eftdir_ctl + trgtol # inter-GPU comms and transposition to ZGTF(NFLD,NX) + DO JGL=1,NY_l # loop over latitudes + ftdir + plan_fft + execute_fft # batched transform along x with stride=NFLD, distance=1, lot=NFLD + fourier_out # copy to FOUBUF_IN(NFLD,NX) + ENDDO + eltdir_ctl + trltom # inter-GPU comms to FOUBUF(NFLD,NY) + DO JM=1,NX_l # loop over x-wavenumbers + eltdir + eprfi2b # transpose to PFFT(NY,NFLD) + eledir + plan_fft + execute_fft # batched transform along y with stride=1, distance=ny+2, lot=NFLD + eupdsp + eupdspb # transpose to PSPSC3A(NFLD,NSPEC2) +``` + +Two aspects of this code and data organization are quite striking: (i) 2x3 transpositions are performed. On CPU this doesn't seem to be very expensive; (ii) the Fourier transforms are performed inside loops over JGL and JM. This means that the batch size of the transforms is quite limited. + +### GPU code + +To unleash the computing power of GPUs, as much as possible parallellism should be exposed. In the case of the spectral transforms, this means the batch size should be as large as possible. Instead of using the loops over JGL and JM and small batch sizes as for the GPU code, the following organization is taken, which removes the loops and increases the batch sizes. + +Inverse transforms: +``` +einv_trans # input: PSPVOR(NLEV,NSPEC2), PSPDIV(NLEV,NSPEC2), PSPSC3A(NFLD*NLEV,NSPEC2) + einv_trans_ctl + eltinv_ctl + eltinv # north-south transforms + eprfi1b # transpose to (NY,NFLD) + eleinv + plan_fft + execute_fft # batched transform along y with stride=1, distance=ny+2 + easre1b # transpose to FOUBUF_IN(NFLD,NY) + trmtol # inter-GPU communications to FOUBUF(NFLD,NY) + + eftinv_ctl + efourier_in # transpose FOUBUF to ZGTF(NX,NFLD) + eftinv + plan_fft + execute_fft # batched transform along x with stride=1,distance=nx+2 + trltog # inter-GPU comms to PGP3A(NPROMA,NLEV*NFLD,NBLK),PGPUV(NPROMA,NLEV*2,NBLK) +``` + +Direct transforms: +``` +edir_trans_ctl # input PGP3A(NPROMA,NFLD*NLEV,NBLK) + eftdir_ctl + trgtol # inter-GPU comms to ZGTF(NX,NY_l*NFLD) + eftdir + plan_fft + execute_fft # batched transform along x with stride=1, distance=nx+2, lot=NY_l*NFLD + efourier_out # transpose to FOUBUF_IN(NFLD,NY_l,NX) + eltdir_ctl + trltom # inter-GPU comms to FOUBUF(NFLD*NX_l,NY) + eltdir + eprfi2b # transpose to PFFT(NY,NX_l*NFLD) + eledir + plan_fft + execute_fft # batched transform along y with stride=1, distance=ny+2, lot=NX_l*NFLD + eupdsp + eupdspb # transpose to PSPSC3A(NFLD,NSPEC2) +``` + +Although the number of transpositions remains the same (2x3), they are performed at a different place. This is necessary because a batched Fourier transform must be performed either on the first dimension, or on the last dimension. As a consequence, the input to `trltog` and the output from `trgtol` is transposed w.r.t. the CPU version and a nasty switch `LDTRANSPOSED` is necessary in those routines. + + +## Optimizations + +Just documenting some ideas... + +### Tiling transpositions + +The most expensive routines (especially on GPU) are those where data are transposed, i.e. where the leading dimension changes. The reason is that a transposition is very much nonlocal in terms of data access: coalesced data access is only possible for the input or for the output, but not for both. + +One possibility to solve this is to use a tiled approach, either coded explicitly, as explained [here](https://developer.nvidia.com/blog/efficient-matrix-transpose-cuda-cc/), or by using the OpenACC `tile` directive. How well this directive is treated by the compiler (as compared to using shared memory) is to be tested. Also the optimal tile size in the `tile` directive is machine-dependent and could be optimized. + +### Avoiding transpositions + +Considering the direct transforms, at least one transposition is necessary, because the leading dimension of the input is `NPROMA` ($x$), while the leading dimension of the output is `NFLD`. But the other two transpositions (in `eprfi2b` and `eupdspb`) may be avoided. As a matter of fact, in the nvidia-optimized branch by Lukas, they are avoided. + +The problem is that these transpositions are necessary to maximize the batch size of the Fourier transforms. A different possibility is to keep the loop over `JM` as in the CPU code, but to use different GPU streams to treat the different x-wavenumbers. According to Lukas, the performance of a batched FFT is better than that of multiplexed FFTs in different streams, but if this avoids two transpositions, it may well be worth the effort. + +This can be tested in a small toy application, where the performance of taking FFTs over different dimensions is considered: + +* batched along leading dimension: +``` +Z(NY,NX_l,NFLD) # input/output data +plan_fft +execute_fft(Z) # stride=1, distance=NY, lot=NX_l*NFLD +``` +* batched along trailing dimension: +``` +Z(NX_l,NFLD,NY) # input/output data +plan_fft +execute_fft(Z) # stride=NX_l*NFLD, distance=1, lot=NX_l*NFLD +``` +* streamed along trailing dimension +``` +Z(NX_l,NY,NFLD) # input/output data +DO JFLD=1,NFLD + hipfft_setstream(JFLD) # ... or something like that + plan_fft + execute_fft(Z(1,1,JFLD)) # stride=NX_l, distance=1, lot=NX_l +ENDDO +``` +* streamed along leading dimension +``` +Z(NX_l,NY,NFLD) # input/output data +DO JX=1,NX_l + hipfft_setstream(JX) # ... or something like that + plan_fft + execute_fft(Z(JX,1,1)) # stride=NX_l, distance=NX_l*NY, lot=NFLD +ENDDO +``` + +Realistic dimension values are something like `NX_l=128`, `NFLD=240`, `NY=1024`. + +Besides avoiding transpositions, if the multiplexed transforms perform well, it would also allow to remove the nasty `LDTRANSPOSED` switch in `trltog` and `trgtol`. + +### Revise triple loop in trgtol/trltog + +In `trltog` and `trgtol`, data layout is changed from `(NPROMA,NFLD,NBLK)` to `(NX,NFLD)`. It may be better to loop over `NX` and calculate the indices `JLON` and `JBLK`, instead of the current approach which uses a double loop over `JLON` and `JBLK`, calculating `JX` on the go. This is also how it's done in the nvidia-opt branch. + +### Overlapping host-device transfers and communications + +Considering the direct transforms, in the current code, copying of output data from the device to the host only starts after all calculations are finished: `edir_trans`, there is + +``` +!$ACC data copyin (PGP ) if (present (PGP )) +!$ACC data copyin (PGPUV) if (present (PGPUV)) +!$ACC data copyin (PGP3A) if (present (PGP3A)) +!$ACC data copyin (PGP3B) if (present (PGP3B)) +!$ACC data copyin (PGP2 ) if (present (PGP2 )) +!$ACC data copyout (PSPVOR ) if (present (PSPVOR )) +!$ACC data copyout (PSPDIV ) if (present (PSPDIV )) +!$ACC data copyout (PSPSCALAR) if (present (PSPSCALAR)) +!$ACC data copyout (PSPSC3A ) if (present (PSPSC3A )) +!$ACC data copyout (PSPSC3B ) if (present (PSPSC3B )) +!$ACC data copyout (PSPSC2 ) if (present (PSPSC2 )) +CALL EDIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +``` + +However, looking inside the code deeper, it becomes apparent that calculations on `PSPVOR` have already finished well before the end of `edir_trans_ctl`. So the transfer of `PSPVOR `from the device to the host could already start earlier, while processing of the other fields `PSPDIV`, `PSPSC3A`, `PSPSC2` still continues. In terms of code, this would require changing the `copyout` clauses in `edir_trans` to `create` clauses, and to put an `update host async(1)` clause right after the processing of `PSPVOR` in `eupdsp` (and same for the other fields). + +Completely symmetric actions could be taken for the input data of the inverse transforms in `eltinv`. + +### Merging small kernels + +The most costly kernel `EASRE1B` is way too small: it has an arithmetic intensity if 0.05, the GPU is starving and waiting all the time. We should try to merge the kernel with a larger routine, like what is done in the Nvidia optimized version. +The same holds for all the other small kernels (`fourier_in`, `fourier_out`, ...) + + +## History + +### Different existing repositories contain elements for this: +* IAL/ectrans_withlam:withlam +integration of CPU lam sources in ectrans +* anmrde/ectrans:gpu_omp +OpenACC/(OpenMP)/hip branch of global with Cray compiler +* ddegrauwe/etrans:gpu_daand_lam +OpenACC/cufft branch for Nvidia compiler +* ddegrauwe/ectrans:gpu_daand_lam +modifs to global Nvidia-targeted code for lam: + * LDGW switch to work on transposed arrays e.g. PGLAT in TRGTOL + * generalizing FFTC plans to have stride + * switch LALLOPERM2 (only used in etrans, not in ectrans) +* ddegrauwe/ectrans:gpu_lumi +OpenACC/rocfft branch for Cray compiler + +### Merge plan: +1. Start from anmrde/ectrans:gpu_omp +2. integrate LAM (CPU) by *merging* in IAL/ectrans_withlam:withlam +3. move to GPU by *merging* ddegrauwe/ectrans:gpu_daand_lam +4. introduce optimizations by Lukas M. + +### 1. Andreas' branch + +When enabling GPU-aware MPI communications, Crya/Lumi complains about quite a lot of scalars not being present in OpenACC regions. Fixes for this were committed here. + + +### 2. Integrate LAM sources (CPU) + +I created a new branch gpu-omp-daand-lam for this. `etrans` sources were taken from git@github.com:ACCORD-NWP/ectrans_withlam.git + +Some incompatibilities due to version differences were solved as follows: +* NSTACK_MEMORY_TR isn't present in TPM_GEN (due to ectrans not being the latest IAL version). This was removed from eftdir_ctl, eftinv_ctl, eftdir_ctlad, eftinv_ctlad +* egath_spec was rewritten; not compatible with gath_spec_control that's in ectrans; I put back the version of egath_spec from cy43t2. +* same for edist_grid + +### 3. LAM-GPU changes +Put the cpu-specific sources in `cpu`, and put OpenACC/cuFFT sources from Thomas B. in `gpu`. + +Changes done for hipfft: +* remove all references to cuda, including tpm_fftc, fftc (cuda fft data type) +* removed LALLOPERM2 (assuming .FALSE.) +* removed LDGW, transposing ZGTF everywhere. \ No newline at end of file diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..3eefcb9 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +1.0.0 diff --git a/cmake/FindCUDAToolkit-cmake-3.24/FindCUDAToolkit.cmake b/cmake/FindCUDAToolkit-cmake-3.24/FindCUDAToolkit.cmake new file mode 100644 index 0000000..970d8e9 --- /dev/null +++ b/cmake/FindCUDAToolkit-cmake-3.24/FindCUDAToolkit.cmake @@ -0,0 +1,1026 @@ +# Copy of CMake version 3.24 which adds support for finding cublas and cufft in +# directory parallel to the cuda libraries. +# This file can be deleted when using CMake 3.22 + +ecbuild_warn("Using FindCUDAToolkit backported from cmake 3.24") + +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindCUDAToolkit +--------------- + +.. versionadded:: 3.17 + +This script locates the NVIDIA CUDA toolkit and the associated libraries, but +does not require the ``CUDA`` language be enabled for a given project. This +module does not search for the NVIDIA CUDA Samples. + +.. versionadded:: 3.19 + QNX support. + +Search Behavior +^^^^^^^^^^^^^^^ + +The CUDA Toolkit search behavior uses the following order: + +1. If the ``CUDA`` language has been enabled we will use the directory + containing the compiler as the first search location for ``nvcc``. + +2. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g., + ``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it + will be searched. If both an environment variable **and** a + configuration variable are specified, the *configuration* variable takes + precedence. + + The directory specified here must be such that the executable ``nvcc`` or + the appropriate ``version.txt`` file can be found underneath the specified + directory. + +3. If the CUDA_PATH environment variable is defined, it will be searched + for ``nvcc``. + +4. The user's path is searched for ``nvcc`` using :command:`find_program`. If + this is found, no subsequent search attempts are performed. Users are + responsible for ensuring that the first ``nvcc`` to show up in the path is + the desired path in the event that multiple CUDA Toolkits are installed. + +5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is + used. No subsequent search attempts are performed. No default symbolic link + location exists for the Windows platform. + +6. The platform specific default install locations are searched. If exactly one + candidate is found, this is used. The default CUDA Toolkit install locations + searched are: + + +-------------+-------------------------------------------------------------+ + | Platform | Search Pattern | + +=============+=============================================================+ + | macOS | ``/Developer/NVIDIA/CUDA-X.Y`` | + +-------------+-------------------------------------------------------------+ + | Other Unix | ``/usr/local/cuda-X.Y`` | + +-------------+-------------------------------------------------------------+ + | Windows | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` | + +-------------+-------------------------------------------------------------+ + + Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as + ``/usr/local/cuda-9.0`` or + ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0`` + + .. note:: + + When multiple CUDA Toolkits are installed in the default location of a + system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0`` + exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this + package is marked as **not** found. + + There are too many factors involved in making an automatic decision in + the presence of multiple CUDA Toolkits being installed. In this + situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or + (2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for + :command:`find_program` to find. + +Arguments +^^^^^^^^^ + +``[]`` + The ``[]`` argument requests a version with which the package found + should be compatible. See :ref:`find_package version format ` + for more details. + +Options +^^^^^^^ + +``REQUIRED`` + If specified, configuration will error if a suitable CUDA Toolkit is not + found. + +``QUIET`` + If specified, the search for a suitable CUDA Toolkit will not produce any + messages. + +``EXACT`` + If specified, the CUDA Toolkit is considered found only if the exact + ``VERSION`` specified is recovered. + +Imported targets +^^^^^^^^^^^^^^^^ + +An :ref:`imported target ` named ``CUDA::toolkit`` is provided. + +This module defines :prop_tgt:`IMPORTED` targets for each +of the following libraries that are part of the CUDAToolkit: + +- :ref:`CUDA Runtime Library` +- :ref:`CUDA Driver Library` +- :ref:`cuBLAS` +- :ref:`cuFFT` +- :ref:`cuRAND` +- :ref:`cuSOLVER` +- :ref:`cuSPARSE` +- :ref:`cuPTI` +- :ref:`NPP` +- :ref:`nvBLAS` +- :ref:`nvGRAPH` +- :ref:`nvJPEG` +- :ref:`nvidia-ML` +- :ref:`nvRTC` +- :ref:`nvToolsExt` +- :ref:`OpenCL` +- :ref:`cuLIBOS` + +.. _`cuda_toolkit_rt_lib`: + +CUDA Runtime Library +"""""""""""""""""""" + +The CUDA Runtime library (cudart) are what most applications will typically +need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. + +Targets Created: + +- ``CUDA::cudart`` +- ``CUDA::cudart_static`` + +.. _`cuda_toolkit_driver_lib`: + +CUDA Driver Library +"""""""""""""""""""" + +The CUDA Driver library (cuda) are used by applications that use calls +such as `cuMemAlloc`, and `cuMemFree`. + +Targets Created: + +- ``CUDA::cuda_driver`` + +.. _`cuda_toolkit_cuBLAS`: + +cuBLAS +"""""" + +The `cuBLAS `_ library. + +Targets Created: + +- ``CUDA::cublas`` +- ``CUDA::cublas_static`` +- ``CUDA::cublasLt`` starting in CUDA 10.1 +- ``CUDA::cublasLt_static`` starting in CUDA 10.1 + +.. _`cuda_toolkit_cuFFT`: + +cuFFT +""""" + +The `cuFFT `_ library. + +Targets Created: + +- ``CUDA::cufft`` +- ``CUDA::cufftw`` +- ``CUDA::cufft_static`` +- ``CUDA::cufft_static_nocallback`` starting in CUDA 9.2, requires CMake 3.23+ +- ``CUDA::cufftw_static`` + +cuRAND +"""""" + +The `cuRAND `_ library. + +Targets Created: + +- ``CUDA::curand`` +- ``CUDA::curand_static`` + +.. _`cuda_toolkit_cuSOLVER`: + +cuSOLVER +"""""""" + +The `cuSOLVER `_ library. + +Targets Created: + +- ``CUDA::cusolver`` +- ``CUDA::cusolver_static`` + +.. _`cuda_toolkit_cuSPARSE`: + +cuSPARSE +"""""""" + +The `cuSPARSE `_ library. + +Targets Created: + +- ``CUDA::cusparse`` +- ``CUDA::cusparse_static`` + +.. _`cuda_toolkit_cupti`: + +cupti +""""" + +The `NVIDIA CUDA Profiling Tools Interface `_. + +Targets Created: + +- ``CUDA::cupti`` +- ``CUDA::cupti_static`` + +.. _`cuda_toolkit_NPP`: + +NPP +""" + +The `NPP `_ libraries. + +Targets Created: + +- `nppc`: + + - ``CUDA::nppc`` + - ``CUDA::nppc_static`` + +- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h` + + - ``CUDA::nppial`` + - ``CUDA::nppial_static`` + +- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h` + + - ``CUDA::nppicc`` + - ``CUDA::nppicc_static`` + +- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h` + Removed starting in CUDA 11.0, use :ref:`nvJPEG` instead. + + - ``CUDA::nppicom`` + - ``CUDA::nppicom_static`` + +- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h` + + - ``CUDA::nppidei`` + - ``CUDA::nppidei_static`` + +- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h` + + - ``CUDA::nppif`` + - ``CUDA::nppif_static`` + +- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h` + + - ``CUDA::nppig`` + - ``CUDA::nppig_static`` + +- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h` + + - ``CUDA::nppim`` + - ``CUDA::nppim_static`` + +- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h` + + - ``CUDA::nppist`` + - ``CUDA::nppist_static`` + +- `nppisu`: Memory support functions in `nppi_support_functions.h` + + - ``CUDA::nppisu`` + - ``CUDA::nppisu_static`` + +- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h` + + - ``CUDA::nppitc`` + - ``CUDA::nppitc_static`` + +- `npps`: + + - ``CUDA::npps`` + - ``CUDA::npps_static`` + +.. _`cuda_toolkit_nvBLAS`: + +nvBLAS +"""""" + +The `nvBLAS `_ libraries. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvblas`` + +.. _`cuda_toolkit_nvGRAPH`: + +nvGRAPH +""""""" + +The `nvGRAPH `_ library. +Removed starting in CUDA 11.0 + +Targets Created: + +- ``CUDA::nvgraph`` +- ``CUDA::nvgraph_static`` + + +.. _`cuda_toolkit_nvJPEG`: + +nvJPEG +"""""" + +The `nvJPEG `_ library. +Introduced in CUDA 10. + +Targets Created: + +- ``CUDA::nvjpeg`` +- ``CUDA::nvjpeg_static`` + +.. _`cuda_toolkit_nvRTC`: + +nvRTC +""""" + +The `nvRTC `_ (Runtime Compilation) library. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvrtc`` + +.. _`cuda_toolkit_nvml`: + +nvidia-ML +""""""""" + +The `NVIDIA Management Library `_. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvml`` + +.. _`cuda_toolkit_nvToolsExt`: + +nvToolsExt +"""""""""" + +The `NVIDIA Tools Extension `_. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvToolsExt`` + +.. _`cuda_toolkit_opencl`: + +OpenCL +"""""" + +The `NVIDIA OpenCL Library `_. +This is a shared library only. + +Targets Created: + +- ``CUDA::OpenCL`` + +.. _`cuda_toolkit_cuLIBOS`: + +cuLIBOS +""""""" + +The cuLIBOS library is a backend thread abstraction layer library which is +static only. The ``CUDA::cublas_static``, ``CUDA::cusparse_static``, +``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP +libraries all automatically have this dependency linked. + +Target Created: + +- ``CUDA::culibos`` + +**Note**: direct usage of this target by consumers should not be necessary. + +.. _`cuda_toolkit_cuRAND`: + + + +Result variables +^^^^^^^^^^^^^^^^ + +``CUDAToolkit_FOUND`` + A boolean specifying whether or not the CUDA Toolkit was found. + +``CUDAToolkit_VERSION`` + The exact version of the CUDA Toolkit found (as reported by + ``nvcc --version`` or ``version.txt``). + +``CUDAToolkit_VERSION_MAJOR`` + The major version of the CUDA Toolkit. + +``CUDAToolkit_VERSION_MINOR`` + The minor version of the CUDA Toolkit. + +``CUDAToolkit_VERSION_PATCH`` + The patch version of the CUDA Toolkit. + +``CUDAToolkit_BIN_DIR`` + The path to the CUDA Toolkit library directory that contains the CUDA + executable ``nvcc``. + +``CUDAToolkit_INCLUDE_DIRS`` + The path to the CUDA Toolkit ``include`` folder containing the header files + required to compile a project linking against CUDA. + +``CUDAToolkit_LIBRARY_DIR`` + The path to the CUDA Toolkit library directory that contains the CUDA + Runtime library ``cudart``. + +``CUDAToolkit_LIBRARY_ROOT`` + .. versionadded:: 3.18 + + The path to the CUDA Toolkit directory containing the nvvm directory and + version.txt. + +``CUDAToolkit_TARGET_DIR`` + The path to the CUDA Toolkit directory including the target architecture + when cross-compiling. When not cross-compiling this will be equivalent to + the parent directory of ``CUDAToolkit_BIN_DIR``. + +``CUDAToolkit_NVCC_EXECUTABLE`` + The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may + **not** be the same as + :variable:`CMAKE_CUDA_COMPILER _COMPILER>`. ``nvcc`` must be + found to determine the CUDA Toolkit version as well as determining other + features of the Toolkit. This variable is set for the convenience of + modules that depend on this one. + + +#]=======================================================================] + +# NOTE: much of this was simply extracted from FindCUDA.cmake. + +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# Copyright (c) 2007-2009 +# Scientific Computing and Imaging Institute, University of Utah +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### + +# The toolkit is located during compiler detection for CUDA and stored in CMakeCUDACompiler.cmake as +# CMAKE_CUDA_COMPILER_TOOLKIT_ROOT and CMAKE_CUDA_COMPILER_LIBRARY_ROOT. +# We compute the rest based on those here to avoid re-searching and to avoid finding a possibly +# different installation. +if(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT) + set(CUDAToolkit_ROOT_DIR "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") + set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}") + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_ROOT_DIR}/bin") + set(CUDAToolkit_NVCC_EXECUTABLE "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}") + + if(CUDAToolkit_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + endif() +else() + function(_CUDAToolkit_find_root_dir ) + cmake_parse_arguments(arg "" "" "SEARCH_PATHS;FIND_FLAGS" ${ARGN}) + + if(NOT CUDAToolkit_BIN_DIR) + if(NOT CUDAToolkit_SENTINEL_FILE) + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ${arg_SEARCH_PATHS} + ${arg_FIND_FLAGS} + ) + endif() + + if(NOT CUDAToolkit_NVCC_EXECUTABLE) + find_file(CUDAToolkit_SENTINEL_FILE + NAMES version.txt + PATHS ${arg_SEARCH_PATHS} + NO_DEFAULT_PATH + ) + endif() + + if(EXISTS "${CUDAToolkit_NVCC_EXECUTABLE}") + # If NVCC exists then invoke it to find the toolkit location. + # This allows us to support wrapper scripts (e.g. ccache or colornvcc), CUDA Toolkit, + # NVIDIA HPC SDK, and distro's splayed layouts + execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "-v" "__cmake_determine_cuda" + OUTPUT_VARIABLE _CUDA_NVCC_OUT ERROR_VARIABLE _CUDA_NVCC_OUT) + if(_CUDA_NVCC_OUT MATCHES "\\#\\$ TOP=([^\r\n]*)") + get_filename_component(CUDAToolkit_BIN_DIR "${CMAKE_MATCH_1}/bin" ABSOLUTE) + else() + get_filename_component(CUDAToolkit_BIN_DIR "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) + endif() + unset(_CUDA_NVCC_OUT) + + mark_as_advanced(CUDAToolkit_BIN_DIR) + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}" CACHE PATH "" FORCE) + endif() + + if(CUDAToolkit_SENTINEL_FILE) + get_filename_component(CUDAToolkit_BIN_DIR ${CUDAToolkit_SENTINEL_FILE} DIRECTORY ABSOLUTE) + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}/bin") + + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) + endif() + endif() + + if(CUDAToolkit_BIN_DIR) + get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) + set(CUDAToolkit_ROOT_DIR "${CUDAToolkit_ROOT_DIR}" PARENT_SCOPE) + endif() + + endfunction() + + function(_CUDAToolkit_find_version_file result_variable) + # We first check for a non-scattered installation to prefer it over a scattered installation. + if(CUDAToolkit_ROOT AND EXISTS "${CUDAToolkit_ROOT}/version.txt") + set(${result_variable} "${CUDAToolkit_ROOT}/version.txt" PARENT_SCOPE) + elseif(CUDAToolkit_ROOT_DIR AND EXISTS "${CUDAToolkit_ROOT_DIR}/version.txt") + set(${result_variable} "${CUDAToolkit_ROOT_DIR}/version.txt" PARENT_SCOPE) + elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt") + set(${result_variable} "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt" PARENT_SCOPE) + elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt") + set(${result_variable} "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt" PARENT_SCOPE) + endif() + endfunction() + + # For NVCC we can easily deduce the SDK binary directory from the compiler path. + if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + get_filename_component(CUDAToolkit_BIN_DIR "${CMAKE_CUDA_COMPILER}" DIRECTORY) + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}" CACHE PATH "") + # Try language provided path first. + _CUDAToolkit_find_root_dir(SEARCH_PATHS "${CUDAToolkit_BIN_DIR}" FIND_FLAGS NO_DEFAULT_PATH) + mark_as_advanced(CUDAToolkit_BIN_DIR) + endif() + + # Try user provided path + if(NOT CUDAToolkit_ROOT_DIR AND CUDAToolkit_ROOT) + _CUDAToolkit_find_root_dir(SEARCH_PATHS "${CUDAToolkit_ROOT}" FIND_FLAGS PATH_SUFFIXES bin NO_DEFAULT_PATH) + endif() + if(NOT CUDAToolkit_ROOT_DIR) + _CUDAToolkit_find_root_dir(FIND_FLAGS PATHS ENV CUDA_PATH PATH_SUFFIXES bin) + endif() + + # If the user specified CUDAToolkit_ROOT but the toolkit could not be found, this is an error. + if(NOT CUDAToolkit_ROOT_DIR AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT})) + # Declare error messages now, print later depending on find_package args. + set(fail_base "Could not find nvcc executable in path specified by") + set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}") + set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}") + + if(CUDAToolkit_FIND_REQUIRED) + if(DEFINED CUDAToolkit_ROOT) + message(FATAL_ERROR ${cuda_root_fail}) + elseif(DEFINED ENV{CUDAToolkit_ROOT}) + message(FATAL_ERROR ${env_cuda_root_fail}) + endif() + else() + if(NOT CUDAToolkit_FIND_QUIETLY) + if(DEFINED CUDAToolkit_ROOT) + message(STATUS ${cuda_root_fail}) + elseif(DEFINED ENV{CUDAToolkit_ROOT}) + message(STATUS ${env_cuda_root_fail}) + endif() + endif() + set(CUDAToolkit_FOUND FALSE) + unset(fail_base) + unset(cuda_root_fail) + unset(env_cuda_root_fail) + return() + endif() + endif() + + # CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults. + # + # - Linux: /usr/local/cuda-X.Y + # - macOS: /Developer/NVIDIA/CUDA-X.Y + # - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y + # + # We will also search the default symlink location /usr/local/cuda first since + # if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked + # directory is the desired location. + if(NOT CUDAToolkit_ROOT_DIR) + if(UNIX) + if(NOT APPLE) + set(platform_base "/usr/local/cuda-") + else() + set(platform_base "/Developer/NVIDIA/CUDA-") + endif() + else() + set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + endif() + + # Build out a descending list of possible cuda installations, e.g. + file(GLOB possible_paths "${platform_base}*") + # Iterate the glob results and create a descending list. + set(versions) + foreach(p ${possible_paths}) + # Extract version number from end of string + string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p}) + if(IS_DIRECTORY ${p} AND p_version) + list(APPEND versions ${p_version}) + endif() + endforeach() + + # Sort numerically in descending order, so we try the newest versions first. + list(SORT versions COMPARE NATURAL ORDER DESCENDING) + + # With a descending list of versions, populate possible paths to search. + set(search_paths) + foreach(v ${versions}) + list(APPEND search_paths "${platform_base}${v}") + endforeach() + + # Force the global default /usr/local/cuda to the front on Unix. + if(UNIX) + list(INSERT search_paths 0 "/usr/local/cuda") + endif() + + # Now search for the toolkit again using the platform default search paths. + _CUDAToolkit_find_root_dir(SEARCH_PATHS "${search_paths}" FIND_FLAGS PATH_SUFFIXES bin) + + # We are done with these variables now, cleanup for caller. + unset(platform_base) + unset(possible_paths) + unset(versions) + unset(search_paths) + + if(NOT CUDAToolkit_ROOT_DIR) + if(CUDAToolkit_FIND_REQUIRED) + message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.") + elseif(NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.") + endif() + + set(CUDAToolkit_FOUND FALSE) + return() + endif() + endif() + + _CUDAToolkit_find_version_file( _CUDAToolkit_version_file ) + if(_CUDAToolkit_version_file) + # CUDAToolkit_LIBRARY_ROOT contains the device library and version file. + get_filename_component(CUDAToolkit_LIBRARY_ROOT "${_CUDAToolkit_version_file}" DIRECTORY ABSOLUTE) + endif() + unset(_CUDAToolkit_version_file) + + if(CUDAToolkit_NVCC_EXECUTABLE AND + CMAKE_CUDA_COMPILER_VERSION AND + CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) + # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value + # This if statement will always match, but is used to provide variables for MATCH 1,2,3... + if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") + endif() + elseif(CUDAToolkit_NVCC_EXECUTABLE) + # Compute the version by invoking nvcc + execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) + else() + _CUDAToolkit_find_version_file(version_file) + if(version_file) + file(READ "${version_file}" VERSION_INFO) + if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + endif() + endif() +endif() + +# Find target directory when crosscompiling. +if(CMAKE_CROSSCOMPILING) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set(CUDAToolkit_TARGET_NAME "armv7-linux-androideabi") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + if(ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi") + elseif (CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CUDAToolkit_TARGET_NAME "aarch64-qnx") + else() + set(CUDAToolkit_TARGET_NAME "aarch64-linux") + endif(ANDROID_ARCH_NAME STREQUAL "arm64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CUDAToolkit_TARGET_NAME "x86_64-linux") + endif() + + if(EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + # add known CUDA target root path to the set of directories we search for programs, libraries and headers + list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") + + # Mark that we need to pop the root search path changes after we have + # found all cuda libraries so that searches for our cross-compilation + # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or + # PATh + set(_CUDAToolkit_Pop_ROOT_PATH True) + endif() +endif() + +# If not already set we can simply use the toolkit root or it's a scattered installation. +if(NOT CUDAToolkit_TARGET_DIR) + # Not cross compiling + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") + # Now that we have the real ROOT_DIR, find components inside it. + list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) + + # Mark that we need to pop the prefix path changes after we have + # found the cudart library. + set(_CUDAToolkit_Pop_Prefix True) +endif() + +# CUDAToolkit_TARGET_DIR always points to the directory containing the include directory. +# On a scattered installation /usr, on a non-scattered something like /usr/local/cuda or /usr/local/cuda-10.2/targets/aarch64-linux. +if(EXISTS "${CUDAToolkit_TARGET_DIR}/include/cuda_runtime.h") + set(CUDAToolkit_INCLUDE_DIR "${CUDAToolkit_TARGET_DIR}/include") +elseif(NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Unable to find cuda_runtime.h in \"${CUDAToolkit_TARGET_DIR}/include\" for CUDAToolkit_INCLUDE_DIR.") +endif() + +# The NVHPC layout moves math library headers and libraries to a sibling directory. +# Create a separate variable so this directory can be selectively added to math targets. +if(NOT EXISTS "${CUDAToolkit_INCLUDE_DIR}/cublas_v2.h") + set(CUDAToolkit_MATH_INCLUDE_DIR "${CUDAToolkit_TARGET_DIR}/../../math_libs/include") + cmake_path(NORMAL_PATH CUDAToolkit_MATH_INCLUDE_DIR) + if(NOT EXISTS "${CUDAToolkit_MATH_INCLUDE_DIR}/cublas_v2.h") + if(NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Unable to find cublas_v2.h in either \"${CUDAToolkit_INCLUDE_DIR}\" or \"${CUDAToolkit_MATH_INCLUDE_DIR}\"") + endif() + unset(CUDAToolkit_MATH_INCLUDE_DIR) + endif() +endif() + +# Find the CUDA Runtime Library libcudart +find_library(CUDA_CUDART + NAMES cudart + PATH_SUFFIXES lib64 lib/x64 +) +find_library(CUDA_CUDART + NAMES cudart + PATH_SUFFIXES lib64/stubs lib/x64/stubs +) + +if(NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Unable to find cudart library.") +endif() + +if(_CUDAToolkit_Pop_Prefix) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + unset(_CUDAToolkit_Pop_Prefix) +endif() + +#----------------------------------------------------------------------------- +# Perform version comparison and validate all required variables are set. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CUDAToolkit + REQUIRED_VARS + CUDAToolkit_INCLUDE_DIR + CUDA_CUDART + CUDAToolkit_BIN_DIR + VERSION_VAR + CUDAToolkit_VERSION +) + +unset(CUDAToolkit_ROOT_DIR) +mark_as_advanced(CUDA_CUDART + CUDAToolkit_INCLUDE_DIR + CUDAToolkit_NVCC_EXECUTABLE + CUDAToolkit_SENTINEL_FILE + ) + +#----------------------------------------------------------------------------- +# Construct result variables +if(CUDAToolkit_FOUND) + set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR}) + get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE) +endif() + +#----------------------------------------------------------------------------- +# Construct import targets +if(CUDAToolkit_FOUND) + + function(_CUDAToolkit_find_and_add_import_lib lib_name) + cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES;EXTRA_INCLUDE_DIRS" ${ARGN}) + + set(search_names ${lib_name} ${arg_ALT}) + + find_library(CUDA_${lib_name}_LIBRARY + NAMES ${search_names} + HINTS ${CUDAToolkit_LIBRARY_DIR} + ENV CUDA_PATH + PATH_SUFFIXES nvidia/current lib64 lib/x64 lib + ${arg_EXTRA_PATH_SUFFIXES} + ) + # Don't try any stub directories until we have exhausted all other + # search locations. + find_library(CUDA_${lib_name}_LIBRARY + NAMES ${search_names} + HINTS ${CUDAToolkit_LIBRARY_DIR} + ENV CUDA_PATH + PATH_SUFFIXES lib64/stubs lib/x64/stubs lib/stubs stubs + # Support NVHPC splayed math library layout + ../../math_libs/${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}/lib64 + ../../math_libs/lib64 + ) + + mark_as_advanced(CUDA_${lib_name}_LIBRARY) + + if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) + add_library(CUDA::${lib_name} UNKNOWN IMPORTED) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + if(DEFINED CUDAToolkit_MATH_INCLUDE_DIR) + string(FIND ${CUDA_${lib_name}_LIBRARY} "math_libs" math_libs) + if(NOT ${math_libs} EQUAL -1) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_MATH_INCLUDE_DIR}") + endif() + endif() + set_property(TARGET CUDA::${lib_name} PROPERTY IMPORTED_LOCATION "${CUDA_${lib_name}_LIBRARY}") + foreach(dep ${arg_DEPS}) + if(TARGET CUDA::${dep}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) + endif() + endforeach() + if(arg_EXTRA_INCLUDE_DIRS) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${arg_EXTRA_INCLUDE_DIRS}") + endif() + endif() + endfunction() + + if(NOT TARGET CUDA::toolkit) + add_library(CUDA::toolkit IMPORTED INTERFACE) + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + endif() + + _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda) + + _CUDAToolkit_find_and_add_import_lib(cudart) + _CUDAToolkit_find_and_add_import_lib(cudart_static) + + # setup dependencies that are required for cudart_static when building + # on linux. These are generally only required when using the CUDA toolkit + # when CUDA language is disabled + if(NOT TARGET CUDA::cudart_static_deps + AND TARGET CUDA::cudart_static) + + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) + target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps) + + if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER)) + find_package(Threads REQUIRED) + target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() + + if(UNIX AND NOT APPLE AND NOT (CMAKE_SYSTEM_NAME STREQUAL "QNX")) + # On Linux, you must link against librt when using the static cuda runtime. + find_library(CUDAToolkit_rt_LIBRARY rt) + mark_as_advanced(CUDAToolkit_rt_LIBRARY) + if(NOT CUDAToolkit_rt_LIBRARY) + message(WARNING "Could not find librt library, needed by CUDA::cudart_static") + else() + target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library + foreach (cuda_lib cublasLt cublas cufft curand cusparse nppc nvjpeg) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos) + endforeach() + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.0.0) + # cublas depends on cublasLt + # https://docs.nvidia.com/cuda/archive/11.0/cublas/index.html#static-library + _CUDAToolkit_find_and_add_import_lib(cublas DEPS cublasLt) + _CUDAToolkit_find_and_add_import_lib(cublas_static DEPS cublasLt_static) + endif() + + # cuFFTW depends on cuFFT + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft) + _CUDAToolkit_find_and_add_import_lib(cufftw_static DEPS cufft_static) + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 9.2) + _CUDAToolkit_find_and_add_import_lib(cufft_static_nocallback DEPS culibos) + endif() + + # cuSOLVER depends on cuBLAS, and cuSPARSE + _CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos) + + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.1.2) + # cusolver depends on liblapack_static.a starting with CUDA 10.1 update 2, + # https://docs.nvidia.com/cuda/archive/11.5.0/cusolver/index.html#static-link-lapack + _CUDAToolkit_find_and_add_import_lib(cusolver_lapack_static ALT lapack_static) # implementation detail static lib + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cusolver_lapack_static) + endif() + + if(CUDAToolkit_VERSION VERSION_GREATER 11.2.1) + # cusolver depends on libcusolver_metis and cublasLt + # https://docs.nvidia.com/cuda/archive/11.2.2/cusolver/index.html#link-dependency + _CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublasLt) + + _CUDAToolkit_find_and_add_import_lib(cusolver_metis_static ALT metis_static) # implementation detail static lib + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cusolver_metis_static cublasLt_static) + endif() + + # nvGRAPH depends on cuRAND, and cuSOLVER. + _CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver) + _CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static) + + # Process the majority of the NPP libraries. + foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static) + endforeach() + + find_path(CUDAToolkit_CUPTI_INCLUDE_DIR cupti.h PATHS + "${CUDAToolkit_ROOT_DIR}/extras/CUPTI/include" + "${CUDAToolkit_INCLUDE_DIR}/../extras/CUPTI/include" + "${CUDATookit_INCLUDE_DIR}" + NO_DEFAULT_PATH) + + if(CUDAToolkit_CUPTI_INCLUDE_DIR) + _CUDAToolkit_find_and_add_import_lib(cupti + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/ + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + _CUDAToolkit_find_and_add_import_lib(cupti_static + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/ + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + endif() + + _CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver) + + _CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml) + + if(WIN32) + # nvtools can be installed outside the CUDA toolkit directory + # so prefer the NVTOOLSEXT_PATH windows only environment variable + # In addition on windows the most common name is nvToolsExt64_1 + find_library(CUDA_nvToolsExt_LIBRARY + NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt + PATHS ENV NVTOOLSEXT_PATH + ENV CUDA_PATH + PATH_SUFFIXES lib/x64 lib + ) + endif() + _CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64) + + _CUDAToolkit_find_and_add_import_lib(OpenCL) +endif() + +if(_CUDAToolkit_Pop_ROOT_PATH) + list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0) + unset(_CUDAToolkit_Pop_ROOT_PATH) +endif() diff --git a/cmake/FindOpenACC.cmake b/cmake/FindOpenACC.cmake new file mode 100644 index 0000000..c3fd1db --- /dev/null +++ b/cmake/FindOpenACC.cmake @@ -0,0 +1,309 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindOpenACC +----------- + +.. versionadded:: 3.10 + +Detect OpenACC support by the compiler. + +This module can be used to detect OpenACC support in a compiler. +If the compiler supports OpenACC, the flags required to compile with +OpenACC support are returned in variables for the different languages. +Currently, only NVHPC, PGI, GNU and Cray compilers are supported. + +Imported Targets +^^^^^^^^^^^^^^^^ + +.. versionadded:: 3.16 + +The module provides :prop_tgt:`IMPORTED` targets: + +``OpenACC::OpenACC_`` + Target for using OpenACC from ````. + +Variables +^^^^^^^^^ + +This module will set the following variables per language in your +project, where ```` is one of C, CXX, or Fortran: + +``OpenACC__FOUND`` + Variable indicating if OpenACC support for ```` was detected. +``OpenACC__FLAGS`` + OpenACC compiler flags for ````, separated by spaces. +``OpenACC__OPTIONS`` + .. versionadded:: 3.16 + + OpenACC compiler flags for ````, as a list. Suitable for usage + with target_compile_options or target_link_options. + +The module will also try to provide the OpenACC version variables: + +``OpenACC__SPEC_DATE`` + Date of the OpenACC specification implemented by the ```` compiler. +``OpenACC__VERSION_MAJOR`` + Major version of OpenACC implemented by the ```` compiler. +``OpenACC__VERSION_MINOR`` + Minor version of OpenACC implemented by the ```` compiler. +``OpenACC__VERSION`` + OpenACC version implemented by the ```` compiler. + +The specification date is formatted as given in the OpenACC standard: +``yyyymm`` where ``yyyy`` and ``mm`` represents the year and month of +the OpenACC specification implemented by the ```` compiler. + +Input Variables +^^^^^^^^^^^^^^^ + +``OpenACC_ACCEL_TARGET=`` +If set, will the correct target accelerator flag set to the will +be returned with OpenACC__FLAGS. +#]=======================================================================] + + +message("") +message("=================================" ) +message(" ENTERING FindOpenACC.cmake") +message("=================================" ) +message("") + + +set(OpenACC_C_CXX_TEST_SOURCE +" +int main(){ +#ifdef _OPENACC + return 0; +#else + breaks_on_purpose +#endif +} +" +) +set(OpenACC_Fortran_TEST_SOURCE +" +program test +#ifndef _OPENACC + breaks_on_purpose +#endif +endprogram test +" +) +set(OpenACC_C_CXX_CHECK_VERSION_SOURCE +" +#include +const char accver_str[] = { 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'A', + 'C', 'C', '-', 'd', 'a', 't', 'e', '[', + ('0' + ((_OPENACC/100000)%10)), + ('0' + ((_OPENACC/10000)%10)), + ('0' + ((_OPENACC/1000)%10)), + ('0' + ((_OPENACC/100)%10)), + ('0' + ((_OPENACC/10)%10)), + ('0' + ((_OPENACC/1)%10)), + ']', '\\0' }; +int main() +{ + puts(accver_str); + return 0; +} +") +set(OpenACC_Fortran_CHECK_VERSION_SOURCE +" + program acc_ver + implicit none + integer, parameter :: zero = ichar('0') + character, dimension(25), parameter :: accver_str =& + (/ 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'A', 'C', 'C', '-',& + 'd', 'a', 't', 'e', '[',& + char(zero + mod(_OPENACC/100000, 10)),& + char(zero + mod(_OPENACC/10000, 10)),& + char(zero + mod(_OPENACC/1000, 10)),& + char(zero + mod(_OPENACC/100, 10)),& + char(zero + mod(_OPENACC/10, 10)),& + char(zero + mod(_OPENACC/1, 10)), ']' /) + print *, accver_str + end program acc_ver +" +) + + +function(_OPENACC_WRITE_SOURCE_FILE LANG SRC_FILE_CONTENT_VAR SRC_FILE_NAME SRC_FILE_FULLPATH) + set(WORK_DIR ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenACC) + if("${LANG}" STREQUAL "C") + set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.c") + file(WRITE "${SRC_FILE}" "${OpenACC_C_CXX_${SRC_FILE_CONTENT_VAR}}") + elseif("${LANG}" STREQUAL "CXX") + set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.cpp") + file(WRITE "${SRC_FILE}" "${OpenACC_C_CXX_${SRC_FILE_CONTENT_VAR}}") + elseif("${LANG}" STREQUAL "Fortran") + set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.F90") + file(WRITE "${SRC_FILE}_in" "${OpenACC_Fortran_${SRC_FILE_CONTENT_VAR}}") + configure_file("${SRC_FILE}_in" "${SRC_FILE}" @ONLY) + endif() + set(${SRC_FILE_FULLPATH} "${SRC_FILE}" PARENT_SCOPE) +endfunction() + + +function(_OPENACC_GET_FLAGS_CANDIDATE LANG FLAG_VAR) + set(ACC_FLAG_NVHPC "-acc") + set(ACC_FLAG_PGI "-acc") + set(ACC_FLAG_GNU "-fopenacc") + set(ACC_FLAG_Cray "-h acc") + + if(DEFINED ACC_FLAG_${CMAKE_${LANG}_COMPILER_ID}) + set("${FLAG_VAR}" "${ACC_FLAG_${CMAKE_${LANG}_COMPILER_ID}}" PARENT_SCOPE) + else() + # Fall back to a few common flags. + set("${FLAG_VAR}" ${ACC_FLAG_GNU} ${ACC_FLAG_PGI}) + endif() + +endfunction() + + +function(_OPENACC_GET_ACCEL_TARGET_FLAG LANG TARGET FLAG_VAR) + # Find target accelerator flags. + set(ACC_TARGET_FLAG_NVHPC "-ta") + set(ACC_TARGET_FLAG_PGI "-ta") + if(DEFINED ACC_TARGET_FLAG_${CMAKE_${LANG}_COMPILER_ID}) + set("${FLAG_VAR}" "${ACC_TARGET_FLAG_${CMAKE_${LANG}_COMPILER_ID}}=${TARGET}" PARENT_SCOPE) + endif() +endfunction() + + +function(_OPENACC_GET_VERBOSE_FLAG LANG FLAG_VAR) + # Find compiler's verbose flag for OpenACC. + set(ACC_VERBOSE_FLAG_NVHPC "-Minfo=accel") + set(ACC_VERBOSE_FLAG_PGI "-Minfo=accel") + if(DEFINED ACC_VERBOSE_FLAG_${CMAKE_${LANG}_COMPILER_ID}) + set("${FLAG_VAR}" "${ACC_VERBOSE_FLAG_${CMAKE_${LANG}_COMPILER_ID}}" PARENT_SCOPE) + endif() +endfunction() + + +function(_OPENACC_GET_FLAGS LANG FLAG_VAR) + set(FLAG_CANDIDATES "") + _OPENACC_GET_FLAGS_CANDIDATE("${LANG}" FLAG_CANDIDATES) + _OPENACC_WRITE_SOURCE_FILE("${LANG}" "TEST_SOURCE" OpenACCTryFlag _OPENACC_TEST_SRC) + + message("FLAG_CANDIDATES = ${FLAG_CANDIDATES}") + + foreach(FLAG IN LISTS FLAG_CANDIDATES) + message("try_compile with ${FLAG}") + try_compile(OpenACC_FLAG_TEST_RESULT ${CMAKE_BINARY_DIR} ${_OPENACC_TEST_SRC} + CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${FLAG}" + OUTPUT_VARIABLE OpenACC_TRY_COMPILE_OUTPUT + ) + message("OpenACC_TRY_COMPILE_OUTPUT = ${OpenACC_TRY_COMPILE_OUTPUT}") + if(OpenACC_FLAG_TEST_RESULT) + set("${FLAG_VAR}" "${FLAG}") + if(DEFINED OpenACC_ACCEL_TARGET) + _OPENACC_GET_ACCEL_TARGET_FLAG("${LANG}" "${OpenACC_ACCEL_TARGET}" TARGET_FLAG) + string(APPEND "${FLAG_VAR}" " ${TARGET_FLAG}") + endif() + + if(CMAKE_VERBOSE_MAKEFILE) + # -Minfo=accel prints out OpenACC's messages on optimizations. + _OPENACC_GET_VERBOSE_FLAG("${LANG}" OpenACC_VERBOSE_FLAG) + string(APPEND "${FLAG_VAR}" " ${OpenACC_VERBOSE_FLAG}") + endif() + set("${FLAG_VAR}" "${${FLAG_VAR}}" PARENT_SCOPE) + break() + endif() + endforeach() + +endfunction() + + +function(_OPENACC_GET_SPEC_DATE LANG SPEC_DATE) + _OPENACC_WRITE_SOURCE_FILE("${LANG}" "CHECK_VERSION_SOURCE" OpenACCCheckVersion _OPENACC_TEST_SRC) + + set(BIN_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenACC/accver_${LANG}.bin") + try_compile(OpenACC_SPECTEST_${LANG} "${CMAKE_BINARY_DIR}" "${_OPENACC_TEST_SRC}" + CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OpenACC_${LANG}_FLAGS}" + COPY_FILE ${BIN_FILE} + OUTPUT_VARIABLE OUTPUT) + + if(${OpenACC_SPECTEST_${LANG}}) + file(STRINGS ${BIN_FILE} specstr LIMIT_COUNT 1 REGEX "INFO:OpenACC-date") + set(regex_spec_date ".*INFO:OpenACC-date\\[0*([^]]*)\\].*") + if("${specstr}" MATCHES "${regex_spec_date}") + set(${SPEC_DATE} "${CMAKE_MATCH_1}" PARENT_SCOPE) + endif() + endif() +endfunction() + + +macro(_OPENACC_SET_VERSION_BY_SPEC_DATE LANG) + set(OpenACC_SPEC_DATE_MAP + # Combined versions, 2.5 onwards + "201510=2.5" + # 2013 08 is the corrected version. + "201308=2.0" + "201306=2.0" + "201111=1.0" + ) + + string(REGEX MATCHALL "${OpenACC_${LANG}_SPEC_DATE}=([0-9]+)\\.([0-9]+)" _version_match "${OpenACC_SPEC_DATE_MAP}") + if(NOT _version_match STREQUAL "") + set(OpenACC_${LANG}_VERSION_MAJOR ${CMAKE_MATCH_1}) + set(OpenACC_${LANG}_VERSION_MINOR ${CMAKE_MATCH_2}) + set(OpenACC_${LANG}_VERSION "${OpenACC_${LANG}_VERSION_MAJOR}.${OpenACC_${LANG}_VERSION_MINOR}") + else() + unset(OpenACC_${LANG}_VERSION_MAJOR) + unset(OpenACC_${LANG}_VERSION_MINOR) + unset(OpenACC_${LANG}_VERSION) + endif() + unset(_version_match) + unset(OpenACC_SPEC_DATE_MAP) +endmacro() + + +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) +foreach (LANG IN ITEMS C CXX Fortran) + if(CMAKE_${LANG}_COMPILER_LOADED) + set(OpenACC_${LANG}_FIND_QUIETLY ${OpenACC_FIND_QUIETLY}) + set(OpenACC_${LANG}_FIND_REQUIRED ${OpenACC_FIND_REQUIRED}) + set(OpenACC_${LANG}_FIND_VERSION ${OpenACC_FIND_VERSION}) + set(OpenACC_${LANG}_FIND_VERSION_EXACT ${OpenACC_FIND_VERSION_EXACT}) + + if(NOT DEFINED OpenACC_${LANG}_FLAGS) + _OPENACC_GET_FLAGS("${LANG}" OpenACC_${LANG}_FLAGS) + endif() + message("OpenACC_Fortran_FLAGS = ${OpenACC_Fortran_FLAGS}") + if(NOT DEFINED OpenACC_${LANG}_OPTIONS) + separate_arguments(OpenACC_${LANG}_OPTIONS NATIVE_COMMAND "${OpenACC_${LANG}_FLAGS}") + endif() + _OPENACC_GET_SPEC_DATE("${LANG}" OpenACC_${LANG}_SPEC_DATE) + _OPENACC_SET_VERSION_BY_SPEC_DATE("${LANG}") + + find_package_handle_standard_args(OpenACC_${LANG} + NAME_MISMATCHED + REQUIRED_VARS OpenACC_${LANG}_FLAGS + VERSION_VAR OpenACC_${LANG}_VERSION + ) + endif() +endforeach() + +foreach (LANG IN ITEMS C CXX Fortran) + if(OpenACC_${LANG}_FOUND AND NOT TARGET OpenACC::OpenACC_${LANG}) + add_library(OpenACC::OpenACC_${LANG} INTERFACE IMPORTED) + endif() + if(OpenACC_${LANG}_LIBRARIES) + set_property(TARGET OpenACC::OpenACC_${LANG} PROPERTY + INTERFACE_LINK_LIBRARIES "${OpenACC_${LANG}_LIBRARIES}") + endif() + if(OpenACC_${LANG}_FLAGS) + set_property(TARGET OpenACC::OpenACC_${LANG} PROPERTY + INTERFACE_COMPILE_OPTIONS "$<$:${OpenACC_${LANG}_OPTIONS}>") + set_property(TARGET OpenACC::OpenACC_${LANG} PROPERTY + INTERFACE_LINK_OPTIONS "$<$:${OpenACC_${LANG}_OPTIONS}>") + unset(_OpenACC_${LANG}_OPTIONS) + endif() +endforeach() + +unset(OpenACC_C_CXX_TEST_SOURCE) +unset(OpenACC_Fortran_TEST_SOURCE) +unset(OpenACC_C_CXX_CHECK_VERSION_SOURCE) +unset(OpenACC_Fortran_CHECK_VERSION_SOURCE) diff --git a/cmake/FindPackageHandleStandardArgs.cmake b/cmake/FindPackageHandleStandardArgs.cmake new file mode 100644 index 0000000..fbcf7cd --- /dev/null +++ b/cmake/FindPackageHandleStandardArgs.cmake @@ -0,0 +1,605 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindPackageHandleStandardArgs +----------------------------- + +This module provides functions intended to be used in :ref:`Find Modules` +implementing :command:`find_package()` calls. + +.. command:: find_package_handle_standard_args + + This command handles the ``REQUIRED``, ``QUIET`` and version-related + arguments of :command:`find_package`. It also sets the + ``_FOUND`` variable. The package is considered found if all + variables listed contain valid results, e.g. valid filepaths. + + There are two signatures: + + .. code-block:: cmake + + find_package_handle_standard_args( + (DEFAULT_MSG|) + ... + ) + + find_package_handle_standard_args( + [FOUND_VAR ] + [REQUIRED_VARS ...] + [VERSION_VAR ] + [HANDLE_VERSION_RANGE] + [HANDLE_COMPONENTS] + [CONFIG_MODE] + [NAME_MISMATCHED] + [REASON_FAILURE_MESSAGE ] + [FAIL_MESSAGE ] + ) + + The ``_FOUND`` variable will be set to ``TRUE`` if all + the variables ``...`` are valid and any optional + constraints are satisfied, and ``FALSE`` otherwise. A success or + failure message may be displayed based on the results and on + whether the ``REQUIRED`` and/or ``QUIET`` option was given to + the :command:`find_package` call. + + The options are: + + ``(DEFAULT_MSG|)`` + In the simple signature this specifies the failure message. + Use ``DEFAULT_MSG`` to ask for a default message to be computed + (recommended). Not valid in the full signature. + + ``FOUND_VAR `` + .. deprecated:: 3.3 + + Specifies either ``_FOUND`` or + ``_FOUND`` as the result variable. This exists only + for compatibility with older versions of CMake and is now ignored. + Result variables of both names are always set for compatibility. + + ``REQUIRED_VARS ...`` + Specify the variables which are required for this package. + These may be named in the generated failure message asking the + user to set the missing variable values. Therefore these should + typically be cache entries such as ``FOO_LIBRARY`` and not output + variables like ``FOO_LIBRARIES``. + + .. versionchanged:: 3.18 + If ``HANDLE_COMPONENTS`` is specified, this option can be omitted. + + ``VERSION_VAR `` + Specify the name of a variable that holds the version of the package + that has been found. This version will be checked against the + (potentially) specified required version given to the + :command:`find_package` call, including its ``EXACT`` option. + The default messages include information about the required + version and the version which has been actually found, both + if the version is ok or not. + + ``HANDLE_VERSION_RANGE`` + .. versionadded:: 3.19 + + Enable handling of a version range, if one is specified. Without this + option, a developer warning will be displayed if a version range is + specified. + + ``HANDLE_COMPONENTS`` + Enable handling of package components. In this case, the command + will report which components have been found and which are missing, + and the ``_FOUND`` variable will be set to ``FALSE`` + if any of the required components (i.e. not the ones listed after + the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are + missing. + + ``CONFIG_MODE`` + Specify that the calling find module is a wrapper around a + call to ``find_package( NO_MODULE)``. This implies + a ``VERSION_VAR`` value of ``_VERSION``. The command + will automatically check whether the package configuration file + was found. + + ``REASON_FAILURE_MESSAGE `` + .. versionadded:: 3.16 + + Specify a custom message of the reason for the failure which will be + appended to the default generated message. + + ``FAIL_MESSAGE `` + Specify a custom failure message instead of using the default + generated message. Not recommended. + + ``NAME_MISMATCHED`` + .. versionadded:: 3.17 + + Indicate that the ```` does not match + ``${CMAKE_FIND_PACKAGE_NAME}``. This is usually a mistake and raises a + warning, but it may be intentional for usage of the command for components + of a larger package. + +Example for the simple signature: + +.. code-block:: cmake + + find_package_handle_standard_args(LibXml2 DEFAULT_MSG + LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR) + +The ``LibXml2`` package is considered to be found if both +``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid. +Then also ``LibXml2_FOUND`` is set to ``TRUE``. If it is not found +and ``REQUIRED`` was used, it fails with a +:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was +used or not. If it is found, success will be reported, including +the content of the first ````. On repeated CMake runs, +the same message will not be printed again. + +.. note:: + + If ```` does not match ``CMAKE_FIND_PACKAGE_NAME`` for the + calling module, a warning that there is a mismatch is given. The + ``FPHSA_NAME_MISMATCHED`` variable may be set to bypass the warning if using + the old signature and the ``NAME_MISMATCHED`` argument using the new + signature. To avoid forcing the caller to require newer versions of CMake for + usage, the variable's value will be used if defined when the + ``NAME_MISMATCHED`` argument is not passed for the new signature (but using + both is an error).. + +Example for the full signature: + +.. code-block:: cmake + + find_package_handle_standard_args(LibArchive + REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR + VERSION_VAR LibArchive_VERSION) + +In this case, the ``LibArchive`` package is considered to be found if +both ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid. +Also the version of ``LibArchive`` will be checked by using the version +contained in ``LibArchive_VERSION``. Since no ``FAIL_MESSAGE`` is given, +the default messages will be printed. + +Another example for the full signature: + +.. code-block:: cmake + + find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4) + find_package_handle_standard_args(Automoc4 CONFIG_MODE) + +In this case, a ``FindAutmoc4.cmake`` module wraps a call to +``find_package(Automoc4 NO_MODULE)`` and adds an additional search +directory for ``automoc4``. Then the call to +``find_package_handle_standard_args`` produces a proper success/failure +message. + +.. command:: find_package_check_version + + .. versionadded:: 3.19 + + Helper function which can be used to check if a ```` is valid + against version-related arguments of :command:`find_package`. + + .. code-block:: cmake + + find_package_check_version( + [HANDLE_VERSION_RANGE] + [RESULT_MESSAGE_VARIABLE ] + ) + + The ```` will hold a boolean value giving the result of the check. + + The options are: + + ``HANDLE_VERSION_RANGE`` + Enable handling of a version range, if one is specified. Without this + option, a developer warning will be displayed if a version range is + specified. + + ``RESULT_MESSAGE_VARIABLE `` + Specify a variable to get back a message describing the result of the check. + +Example for the usage: + +.. code-block:: cmake + + find_package_check_version(1.2.3 result HANDLE_VERSION_RANGE + RESULT_MESSAGE_VARIABLE reason) + if (result) + message (STATUS "${reason}") + else() + message (FATAL_ERROR "${reason}") + endif() +#]=======================================================================] + +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake) + + +cmake_policy(PUSH) +# numbers and boolean constants +cmake_policy (SET CMP0012 NEW) +# IN_LIST operator +cmake_policy (SET CMP0057 NEW) + + +# internal helper macro +macro(_FPHSA_FAILURE_MESSAGE _msg) + set (__msg "${_msg}") + if (FPHSA_REASON_FAILURE_MESSAGE) + string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n") + endif() + if (${_NAME}_FIND_REQUIRED) + message(FATAL_ERROR "${__msg}") + else () + if (NOT ${_NAME}_FIND_QUIETLY) + message(STATUS "${__msg}") + endif () + endif () +endmacro() + + +# internal helper macro to generate the failure message when used in CONFIG_MODE: +macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE) + # _CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found: + if(${_NAME}_CONFIG) + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})") + else() + # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version. + # List them all in the error message: + if(${_NAME}_CONSIDERED_CONFIGS) + set(configsText "") + list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount) + math(EXPR configsCount "${configsCount} - 1") + foreach(currentConfigIndex RANGE ${configsCount}) + list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename) + list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version) + string(APPEND configsText "\n ${filename} (version ${version})") + endforeach() + if (${_NAME}_NOT_FOUND_MESSAGE) + if (FPHSA_REASON_FAILURE_MESSAGE) + string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ") + else() + set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}") + endif() + else() + string(APPEND configsText "\n") + endif() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}") + + else() + # Simple case: No Config-file was found at all: + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}") + endif() + endif() +endmacro() + + +function(FIND_PACKAGE_CHECK_VERSION version result) + cmake_parse_arguments (PARSE_ARGV 2 FPCV "HANDLE_VERSION_RANGE;NO_AUTHOR_WARNING_VERSION_RANGE" "RESULT_MESSAGE_VARIABLE" "") + + if (FPCV_UNPARSED_ARGUMENTS) + message (FATAL_ERROR "find_package_check_version(): ${FPCV_UNPARSED_ARGUMENTS}: unexpected arguments") + endif() + if ("RESULT_MESSAGE_VARIABLE" IN_LIST FPCV_KEYWORDS_MISSING_VALUES) + message (FATAL_ERROR "find_package_check_version(): RESULT_MESSAGE_VARIABLE expects an argument") + endif() + + set (${result} FALSE PARENT_SCOPE) + if (FPCV_RESULT_MESSAGE_VARIABLE) + unset (${FPCV_RESULT_MESSAGE_VARIABLE} PARENT_SCOPE) + endif() + + if (_CMAKE_FPHSA_PACKAGE_NAME) + set (package "${_CMAKE_FPHSA_PACKAGE_NAME}") + elseif (CMAKE_FIND_PACKAGE_NAME) + set (package "${CMAKE_FIND_PACKAGE_NAME}") + else() + message (FATAL_ERROR "find_package_check_version(): Cannot be used outside a 'Find Module'") + endif() + + if (NOT FPCV_NO_AUTHOR_WARNING_VERSION_RANGE + AND ${package}_FIND_VERSION_RANGE AND NOT FPCV_HANDLE_VERSION_RANGE) + message(AUTHOR_WARNING + "`find_package()` specify a version range but the option " + "HANDLE_VERSION_RANGE` is not passed to `find_package_check_version()`. " + "Only the lower endpoint of the range will be used.") + endif() + + + set (version_ok FALSE) + unset (version_msg) + + if (FPCV_HANDLE_VERSION_RANGE AND ${package}_FIND_VERSION_RANGE) + if ((${package}_FIND_VERSION_RANGE_MIN STREQUAL "INCLUDE" + AND version VERSION_GREATER_EQUAL ${package}_FIND_VERSION_MIN) + AND ((${package}_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" + AND version VERSION_LESS_EQUAL ${package}_FIND_VERSION_MAX) + OR (${package}_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" + AND version VERSION_LESS ${package}_FIND_VERSION_MAX))) + set (version_ok TRUE) + set(version_msg "(found suitable version \"${version}\", required range is \"${${package}_FIND_VERSION_RANGE}\")") + else() + set(version_msg "Found unsuitable version \"${version}\", required range is \"${${package}_FIND_VERSION_RANGE}\"") + endif() + elseif (DEFINED ${package}_FIND_VERSION) + if(${package}_FIND_VERSION_EXACT) # exact version required + # count the dots in the version string + string(REGEX REPLACE "[^.]" "" version_dots "${version}") + # add one dot because there is one dot more than there are components + string(LENGTH "${version_dots}." version_dots) + if (version_dots GREATER ${package}_FIND_VERSION_COUNT) + # Because of the C++ implementation of find_package() ${package}_FIND_VERSION_COUNT + # is at most 4 here. Therefore a simple lookup table is used. + if (${package}_FIND_VERSION_COUNT EQUAL 1) + set(version_regex "[^.]*") + elseif (${package}_FIND_VERSION_COUNT EQUAL 2) + set(version_regex "[^.]*\\.[^.]*") + elseif (${package}_FIND_VERSION_COUNT EQUAL 3) + set(version_regex "[^.]*\\.[^.]*\\.[^.]*") + else() + set(version_regex "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*") + endif() + string(REGEX REPLACE "^(${version_regex})\\..*" "\\1" version_head "${version}") + if (NOT ${package}_FIND_VERSION VERSION_EQUAL version_head) + set(version_msg "Found unsuitable version \"${version}\", but required is exact version \"${${package}_FIND_VERSION}\"") + else () + set(version_ok TRUE) + set(version_msg "(found suitable exact version \"${_FOUND_VERSION}\")") + endif () + else () + if (NOT ${package}_FIND_VERSION VERSION_EQUAL version) + set(version_msg "Found unsuitable version \"${version}\", but required is exact version \"${${package}_FIND_VERSION}\"") + else () + set(version_ok TRUE) + set(version_msg "(found suitable exact version \"${version}\")") + endif () + endif () + else() # minimum version + if (${package}_FIND_VERSION VERSION_GREATER version) + set(version_msg "Found unsuitable version \"${version}\", but required is at least \"${${package}_FIND_VERSION}\"") + else() + set(version_ok TRUE) + set(version_msg "(found suitable version \"${version}\", minimum required is \"${${package}_FIND_VERSION}\")") + endif() + endif() + else () + set(version_ok TRUE) + set(version_msg "(found version \"${version}\")") + endif() + + set (${result} ${version_ok} PARENT_SCOPE) + if (FPCV_RESULT_MESSAGE_VARIABLE) + set (${FPCV_RESULT_MESSAGE_VARIABLE} "${version_msg}" PARENT_SCOPE) + endif() +endfunction() + + +function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG) + + # Set up the arguments for `cmake_parse_arguments`. + set(options CONFIG_MODE HANDLE_COMPONENTS NAME_MISMATCHED HANDLE_VERSION_RANGE) + set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR) + set(multiValueArgs REQUIRED_VARS) + + # Check whether we are in 'simple' or 'extended' mode: + set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} ) + list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX) + + unset(FPHSA_NAME_MISMATCHED_override) + if (DEFINED FPHSA_NAME_MISMATCHED) + # If the variable NAME_MISMATCHED variable is set, error if it is passed as + # an argument. The former is for old signatures, the latter is for new + # signatures. + list(FIND ARGN "NAME_MISMATCHED" name_mismatched_idx) + if (NOT name_mismatched_idx EQUAL "-1") + message(FATAL_ERROR + "The `NAME_MISMATCHED` argument may only be specified by the argument or " + "the variable, not both.") + endif () + + # But use the variable if it is not an argument to avoid forcing minimum + # CMake version bumps for calling modules. + set(FPHSA_NAME_MISMATCHED_override "${FPHSA_NAME_MISMATCHED}") + endif () + + if(${INDEX} EQUAL -1) + set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG}) + set(FPHSA_REQUIRED_VARS ${ARGN}) + set(FPHSA_VERSION_VAR) + else() + cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN}) + + if(FPHSA_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"") + endif() + + if(NOT FPHSA_FAIL_MESSAGE) + set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG") + endif() + + # In config-mode, we rely on the variable _CONFIG, which is set by find_package() + # when it successfully found the config-file, including version checking: + if(FPHSA_CONFIG_MODE) + list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG) + list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS) + set(FPHSA_VERSION_VAR ${_NAME}_VERSION) + endif() + + if(NOT FPHSA_REQUIRED_VARS AND NOT FPHSA_HANDLE_COMPONENTS) + message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()") + endif() + endif() + + if (DEFINED FPHSA_NAME_MISMATCHED_override) + set(FPHSA_NAME_MISMATCHED "${FPHSA_NAME_MISMATCHED_override}") + endif () + + if (DEFINED CMAKE_FIND_PACKAGE_NAME + AND NOT FPHSA_NAME_MISMATCHED + AND NOT _NAME STREQUAL CMAKE_FIND_PACKAGE_NAME) + message(AUTHOR_WARNING + "The package name passed to `find_package_handle_standard_args` " + "(${_NAME}) does not match the name of the calling package " + "(${CMAKE_FIND_PACKAGE_NAME}). This can lead to problems in calling " + "code that expects `find_package` result variables (e.g., `_FOUND`) " + "to follow a certain pattern.") + endif () + + if (${_NAME}_FIND_VERSION_RANGE AND NOT FPHSA_HANDLE_VERSION_RANGE) + message(AUTHOR_WARNING + "`find_package()` specify a version range but the module ${_NAME} does " + "not support this capability. Only the lower endpoint of the range " + "will be used.") + endif() + + # to propagate package name to FIND_PACKAGE_CHECK_VERSION + set(_CMAKE_FPHSA_PACKAGE_NAME "${_NAME}") + + # now that we collected all arguments, process them + + if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG") + set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}") + endif() + + if (FPHSA_REQUIRED_VARS) + list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR) + endif() + + string(TOUPPER ${_NAME} _NAME_UPPER) + string(TOLOWER ${_NAME} _NAME_LOWER) + + if(FPHSA_FOUND_VAR) + set(_FOUND_VAR_UPPER ${_NAME_UPPER}_FOUND) + set(_FOUND_VAR_MIXED ${_NAME}_FOUND) + if(FPHSA_FOUND_VAR STREQUAL _FOUND_VAR_MIXED OR FPHSA_FOUND_VAR STREQUAL _FOUND_VAR_UPPER) + set(_FOUND_VAR ${FPHSA_FOUND_VAR}) + else() + message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_FOUND_VAR_MIXED}\" and \"${_FOUND_VAR_UPPER}\" are valid names.") + endif() + else() + set(_FOUND_VAR ${_NAME_UPPER}_FOUND) + endif() + + # collect all variables which were not found, so they can be printed, so the + # user knows better what went wrong (#6375) + set(MISSING_VARS "") + set(DETAILS "") + # check if all passed variables are valid + set(FPHSA_FOUND_${_NAME} TRUE) + foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS}) + if(NOT ${_CURRENT_VAR}) + set(FPHSA_FOUND_${_NAME} FALSE) + string(APPEND MISSING_VARS " ${_CURRENT_VAR}") + else() + string(APPEND DETAILS "[${${_CURRENT_VAR}}]") + endif() + endforeach() + if(FPHSA_FOUND_${_NAME}) + set(${_NAME}_FOUND TRUE) + set(${_NAME_UPPER}_FOUND TRUE) + else() + set(${_NAME}_FOUND FALSE) + set(${_NAME_UPPER}_FOUND FALSE) + endif() + + # component handling + unset(FOUND_COMPONENTS_MSG) + unset(MISSING_COMPONENTS_MSG) + + if(FPHSA_HANDLE_COMPONENTS) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(${_NAME}_${comp}_FOUND) + + if(NOT DEFINED FOUND_COMPONENTS_MSG) + set(FOUND_COMPONENTS_MSG "found components:") + endif() + string(APPEND FOUND_COMPONENTS_MSG " ${comp}") + + else() + + if(NOT DEFINED MISSING_COMPONENTS_MSG) + set(MISSING_COMPONENTS_MSG "missing components:") + endif() + string(APPEND MISSING_COMPONENTS_MSG " ${comp}") + + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + string(APPEND MISSING_VARS " ${comp}") + endif() + + endif() + endforeach() + set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}") + string(APPEND DETAILS "[c${COMPONENT_MSG}]") + endif() + + # version handling: + set(VERSION_MSG "") + set(VERSION_OK TRUE) + + # check with DEFINED here as the requested or found version may be "0" + if (DEFINED ${_NAME}_FIND_VERSION) + if(DEFINED ${FPHSA_VERSION_VAR}) + set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}}) + if (FPHSA_HANDLE_VERSION_RANGE) + set (FPCV_HANDLE_VERSION_RANGE HANDLE_VERSION_RANGE) + else() + set(FPCV_HANDLE_VERSION_RANGE NO_AUTHOR_WARNING_VERSION_RANGE) + endif() + find_package_check_version ("${_FOUND_VERSION}" VERSION_OK RESULT_MESSAGE_VARIABLE VERSION_MSG + ${FPCV_HANDLE_VERSION_RANGE}) + else() + # if the package was not found, but a version was given, add that to the output: + if(${_NAME}_FIND_VERSION_EXACT) + set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")") + elseif (FPHSA_HANDLE_VERSION_RANGE AND ${_NAME}_FIND_VERSION_RANGE) + set(VERSION_MSG "(Required is version range \"${${_NAME}_FIND_VERSION_RANGE}\")") + else() + set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")") + endif() + endif() + else () + # Check with DEFINED as the found version may be 0. + if(DEFINED ${FPHSA_VERSION_VAR}) + set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")") + endif() + endif () + + if(VERSION_OK) + string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]") + else() + set(${_NAME}_FOUND FALSE) + endif() + + + # print the result: + if (${_NAME}_FOUND) + FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}") + else () + + if(FPHSA_CONFIG_MODE) + _FPHSA_HANDLE_FAILURE_CONFIG_MODE() + else() + if(NOT VERSION_OK) + set(RESULT_MSG) + if (_FIRST_REQUIRED_VAR) + string (APPEND RESULT_MSG "found ${${_FIRST_REQUIRED_VAR}}") + endif() + if (COMPONENT_MSG) + if (RESULT_MSG) + string (APPEND RESULT_MSG ", ") + endif() + string (APPEND RESULT_MSG "${FOUND_COMPONENTS_MSG}") + endif() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (${RESULT_MSG})") + else() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}") + endif() + endif() + + endif () + + set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE) + set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE) +endfunction() + + +cmake_policy(POP) diff --git a/cmake/FindPackageMessage.cmake b/cmake/FindPackageMessage.cmake new file mode 100644 index 0000000..0628b98 --- /dev/null +++ b/cmake/FindPackageMessage.cmake @@ -0,0 +1,48 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindPackageMessage +------------------ + +.. code-block:: cmake + + find_package_message( "message for user" "find result details") + +This function is intended to be used in FindXXX.cmake modules files. +It will print a message once for each unique find result. This is +useful for telling the user where a package was found. The first +argument specifies the name (XXX) of the package. The second argument +specifies the message to display. The third argument lists details +about the find result so that if they change the message will be +displayed again. The macro also obeys the QUIET argument to the +find_package command. + +Example: + +.. code-block:: cmake + + if(X11_FOUND) + find_package_message(X11 "Found X11: ${X11_X11_LIB}" + "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]") + else() + ... + endif() +#]=======================================================================] + +function(find_package_message pkg msg details) + # Avoid printing a message repeatedly for the same find result. + if(NOT ${pkg}_FIND_QUIETLY) + string(REPLACE "\n" "" details "${details}") + set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg}) + if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}") + # The message has not yet been printed. + message(STATUS "${msg}") + + # Save the find details in the cache to avoid printing the same + # message again. + set("${DETAILS_VAR}" "${details}" + CACHE INTERNAL "Details about finding ${pkg}") + endif() + endif() +endfunction() diff --git a/cmake/ectrans-import.cmake.in b/cmake/ectrans-import.cmake.in new file mode 100644 index 0000000..88a6964 --- /dev/null +++ b/cmake/ectrans-import.cmake.in @@ -0,0 +1,65 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Import for trans package +# +# This file is included during +# +# find_package( ectrans [COMPONENTS (double|single|transi)] [QUIET] [REQUIRED] ) +# +# Supported COMPONENTS: double single transi +# +# If available following targets will be exported: +# - trans_dp Double precision trans library +# - trans_sp Single precision trans library +# - transi_dp Double precision transi library (C interface to trans_dp) +# + +################################################################## +## Export project variables + +set( ectrans_VERSION_STR @ectrans_VERSION_STR@ ) +set( ectrans_HAVE_MPI @ectrans_HAVE_MPI@ ) +set( ectrans_HAVE_OMP @ectrans_HAVE_OMP@ ) +set( ectrans_HAVE_FFTW @ectrans_HAVE_FFTW@ ) +set( ectrans_HAVE_TRANSI @ectrans_HAVE_TRANSI@ ) +set( ectrans_HAVE_SINGLE_PRECISION @ectrans_HAVE_SINGLE_PRECISION@ ) +set( ectrans_HAVE_DOUBLE_PRECISION @ectrans_HAVE_DOUBLE_PRECISION@ ) +set( ectrans_REQUIRES_PRIVATE_DEPENDENCIES @PACKAGE_REQUIRES_PRIVATE_DEPENDENCIES@ ) + +if( NOT ${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY ) + message( STATUS "Found ectrans version ${ectrans_VERSION_STR}" ) +endif() + +################################################################## +## Export project dependencies + +include( CMakeFindDependencyMacro ) +if( ectrans_REQUIRES_PRIVATE_DEPENDENCIES OR CMAKE_Fortran_COMPILER_LOADED ) + if( NOT CMAKE_Fortran_COMPILER_LOADED ) + enable_language( Fortran ) + endif() + if( trans_HAVE_OMP AND NOT TARGET OpenMP::OpenMP_Fortran ) + find_dependency( OpenMP COMPONENTS Fortran ) + endif() + find_dependency( fiat HINTS ${CMAKE_CURRENT_LIST_DIR}/../fiat @fiat_DIR@ ) +endif() + + +################################################################## +## Handle components + +set( ${CMAKE_FIND_PACKAGE_NAME}_single_FOUND ${ectrans_HAVE_SINGLE_PRECISION} ) +set( ${CMAKE_FIND_PACKAGE_NAME}_double_FOUND ${ectrans_HAVE_DOUBLE_PRECISION} ) +set( ${CMAKE_FIND_PACKAGE_NAME}_transi_FOUND ${ectrans_HAVE_TRANSI} ) + +foreach( _component ${${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS} ) + if( NOT ${CMAKE_FIND_PACKAGE_NAME}_${_component}_FOUND AND ${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED ) + message( SEND_ERROR "ectrans was not build with support for COMPONENT ${_component}" ) + endif() +endforeach() diff --git a/cmake/ectrans_add_library.cmake b/cmake/ectrans_add_library.cmake new file mode 100644 index 0000000..38751ef --- /dev/null +++ b/cmake/ectrans_add_library.cmake @@ -0,0 +1,35 @@ +function( ectrans_add_library ) + + set( options ) + set( single_value_args TYPE TARGET ) + set( multi_value_args SOURCES LINKER_LANGUAGE ) + cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${_FIRST_ARG} ${ARGN} ) + + + if( NOT DEFINED _PAR_SOURCES ) + # ecbuild_add_library requires a "SOURCES" keyword, which should not be required for linking an object library + if( NOT DEFINED _PAR_LINKER_LANGUAGE ) + ecbuild_error("LINKER_LANGUAGE keyword missing") + endif() + set( dummy_source ${CMAKE_CURRENT_BINARY_DIR}/${_PAR_TARGET}_dummy.F90 ) + if( NOT EXISTS ${dummy_source} ) + file( WRITE ${dummy_source} "subroutine ${_PAR_TARGET}_dummy; end subroutine" ) + endif() + ecbuild_add_library( ${ARGV} SOURCES ${dummy_source} ) + else() + ecbuild_add_library( ${ARGV} ) + endif() + + if( _PAR_TYPE STREQUAL "OBJECT" ) + # ecbuild support for exporting object libraries is missing. For now do it manually. + install( TARGETS ${_PAR_TARGET} + EXPORT ${PROJECT_NAME}-targets + RUNTIME DESTINATION ${INSTALL_BIN_DIR} + LIBRARY DESTINATION ${INSTALL_LIB_DIR} + ARCHIVE DESTINATION ${INSTALL_LIB_DIR} + ) + export( TARGETS ${_PAR_TARGET} APPEND FILE "${PROJECT_TARGETS_FILE}" ) + endif() + +endfunction() + diff --git a/cmake/ectrans_compile_options.cmake b/cmake/ectrans_compile_options.cmake new file mode 100644 index 0000000..f8e008c --- /dev/null +++ b/cmake/ectrans_compile_options.cmake @@ -0,0 +1,53 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +if( CMAKE_Fortran_COMPILER_ID MATCHES "XL" ) + ecbuild_add_fortran_flags("-qextname -qnobindcextname") +endif() + +# gfortran 10 has become stricter with argument matching +if( CMAKE_Fortran_COMPILER_ID MATCHES "GNU" + AND NOT CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 10 ) + ecbuild_add_fortran_flags("-fallow-argument-mismatch") +endif() + +if( CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC" ) + ecbuild_add_fortran_flags("-Mlarge_arrays") + + # should really be part of configuration, or ecbuild default? + ecbuild_add_fortran_flags("-traceback" BUILD DEBUG ) + ecbuild_add_fortran_flags("-fast" BUILD RELEASE ) + ecbuild_add_fortran_flags("-gopt -fast" BUILD RELWITHDEBINFO ) +endif() + + + +macro( ectrans_add_compile_options ) + set( options ) + set( single_value_args FLAGS ) + set( multi_value_args SOURCES ) + cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${_FIRST_ARG} ${ARGN} ) + if(_PAR_UNPARSED_ARGUMENTS) + ecbuild_critical("Unknown keywords given to ectrans_add_compile_flags(): \"${_PAR_UNPARSED_ARGUMENTS}\"") + endif() + if(NOT _PAR_SOURCES) + ecbuild_critical("SOURCES keyword missing to ectrans_add_compile_flags()") + endif() + if(NOT _PAR_FLAGS) + ecbuild_critical("FLAGS keyword missing to ectrans_add_compile_flags()") + endif() + foreach( _file ${_PAR_SOURCES} ) + ecbuild_warn("Adding custom compile flags for file ${_file} : [${_PAR_FLAGS}]") + if( NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${_file} ) + ecbuild_error("${_file} does not exist") + endif() + set_source_files_properties( ${_file} PROPERTIES COMPILE_FLAGS "${_PAR_FLAGS}" ) + endforeach() +endmacro() + diff --git a/cmake/ectrans_find_lapack.cmake b/cmake/ectrans_find_lapack.cmake new file mode 100644 index 0000000..293207a --- /dev/null +++ b/cmake/ectrans_find_lapack.cmake @@ -0,0 +1,71 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +macro( ectrans_find_lapack ) + # This macro sets the LAPACK_LIBRARIES variable + # IF MKL is preferred, unless ENABLE_MKL=OFF + + if( HAVE_MKL ) + set( LAPACK_LIBRARIES ${MKL_LIBRARIES} ) + else() + # Following libsci code should disappear soon, with more recent cmake versions (needs more investigation) + set( _cray_libsci_loaded $ENV{CRAY_LIBSCI_DIR} ) + if( _cray_libsci_loaded ) + set( _CRAY_PRGENV $ENV{PE_ENV} ) + string( TOLOWER "${_CRAY_PRGENV}" _cray_prgenv ) + set( LAPACK_LIBRARIES sci_${_cray_prgenv} ) + ecbuild_debug( "LAPACK found, already loaded as part of Cray's libsci" ) + else() + ecbuild_find_package( NAME LAPACK REQUIRED ) + if( TARGET lapack ) + ecbuild_debug( "LAPACK found as CMake target lapack" ) + set( LAPACK_LIBRARIES lapack ) + endif() + endif() + endif() + ecbuild_debug_var( LAPACK_LIBRARIES ) + + set( LAPACK_sp ${LAPACK_LIBRARIES} CACHE STRING "ectrans: Double precision LAPACK libraries" ) + set( LAPACK_dp ${LAPACK_LIBRARIES} CACHE STRING "ectrans: Single precision LAPACK libraries" ) + + set( LAPACK_LINK PRIVATE ) + + ### Following is a hack that should be removed when there is no more Cray computer at ECMWF + # It allows to use a different LAPACK library for single and double precision, to be able to + # stay bitreproducible for double precision in operations of CY47R1 + + set( _cray_libsci_loaded $ENV{CRAY_LIBSCI_DIR} ) + + if( CMAKE_Fortran_COMPILER_ID MATCHES "Cray" ) + if( HAVE_MKL AND ECTRANS_CRAYHACK_DOUBLE_PRECISION_WITHOUT_MKL ) + # Following libsci code should disappear soon, with more recent cmake versions (needs more investigation) + if( _cray_libsci_loaded ) + set( _CRAY_PRGENV $ENV{PE_ENV} ) + string( TOLOWER "${_CRAY_PRGENV}" _cray_prgenv ) + set( LAPACK_dp sci_${_cray_prgenv} ) + ecbuild_debug( "LAPACK found, already loaded as part of Cray's libsci" ) + else() + ecbuild_find_package( NAME LAPACK REQUIRED ) + set( LAPACK_dp ${LAPACK_LIBRARIES} ) + if( TARGET lapack ) + ecbuild_debug( "LAPACK found as CMake target lapack" ) + set( LAPACK_dp lapack ) + endif() + endif() + endif() + endif() + + if( _cray_libsci_loaded ) + if( NOT LAPACK_sp MATCHES "sci" OR NOT LAPACK_dp MATCHES "sci" ) + ecbuild_warn( "Danger! Cray's libsci is loaded, which is different from selected LAPACK. " + "No guarantees on link order can be made for the final executable.") + set( LAPACK_LINK PUBLIC ) + endif() + endif() + +endmacro() diff --git a/cmake/ectrans_macros.cmake b/cmake/ectrans_macros.cmake new file mode 100644 index 0000000..2665bb8 --- /dev/null +++ b/cmake/ectrans_macros.cmake @@ -0,0 +1,30 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +if( CMAKE_VERSION VERSION_LESS 3.22.0 ) + # FindCUDAToolkit from cmake < 3.22 does not support recent NVHPC directory structures + set( CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/FindCUDAToolkit-cmake-3.24 ${CMAKE_MODULE_PATH} ) +endif() + +### Workaround to extract GIT_SHA1 from parent directory +if( NOT ${PROJECT_NAME}_GIT_SHA1 ) + get_filename_component( PARENT_DIR ${PROJECT_SOURCE_DIR} DIRECTORY ) + if( EXISTS ${PARENT_DIR}/.git ) + get_filename_component( PARENT_REPOSITORY_NAME ${PARENT_DIR} NAME_WE ) + get_git_head_revision( GIT_REFSPEC ${PROJECT_NAME}_GIT_SHA1 ) + string( SUBSTRING "${${PROJECT_NAME}_GIT_SHA1}" 0 7 ${PROJECT_NAME}_GIT_SHA1_SHORT ) + set( ${PROJECT_NAME}_GIT_SHA1_SHORT "${PARENT_REPOSITORY_NAME}/${${PROJECT_NAME}_GIT_SHA1_SHORT}" ) + set( ${PROJECT_NAME}_GIT_SHA1 "${PARENT_REPOSITORY_NAME}/${${PROJECT_NAME}_GIT_SHA1}" ) + endif() +endif() + +include( ectrans_target_fortran_module_directory ) +include( ectrans_find_lapack ) +include( ectrans_add_library ) +include( CheckLanguage ) + diff --git a/cmake/ectrans_target_fortran_module_directory.cmake b/cmake/ectrans_target_fortran_module_directory.cmake new file mode 100644 index 0000000..14405ca --- /dev/null +++ b/cmake/ectrans_target_fortran_module_directory.cmake @@ -0,0 +1,38 @@ +# (C) Copyright 2005- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +macro( ectrans_target_fortran_module_directory ) + set( options NO_MODULE_DIRECTORY ) + set( single_value_args TARGET MODULE_DIRECTORY INSTALL_DIRECTORY ) + set( multi_value_args "" ) + cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${_FIRST_ARG} ${ARGN} ) + + if( NOT _PAR_TARGET ) + ecbuild_critical( "Missing argument TARGET" ) + endif() + + if( _PAR_NO_MODULE_DIRECTORY ) + set_target_properties( ${_PAR_TARGET} PROPERTIES Fortran_MODULE_DIRECTORY "" ) + else() + if( NOT _PAR_MODULE_DIRECTORY ) + ecbuild_critical( "Missing argument MODULE_DIRECTORY" ) + endif() + set_target_properties( ${_PAR_TARGET} PROPERTIES Fortran_MODULE_DIRECTORY ${_PAR_MODULE_DIRECTORY} ) + target_include_directories( ${_PAR_TARGET} PUBLIC $ ) + endif() + + if( ECBUILD_INSTALL_FORTRAN_MODULES ) + if( _PAR_INSTALL_DIRECTORY ) + target_include_directories( ${_PAR_TARGET} PUBLIC $ ) + install( DIRECTORY ${_PAR_MODULE_DIRECTORY}/ + DESTINATION ${_PAR_INSTALL_DIRECTORY} + COMPONENT modules ) + endif() + endif() + +endmacro() diff --git a/cmake/project_summary.cmake b/cmake/project_summary.cmake new file mode 100644 index 0000000..eba63a5 --- /dev/null +++ b/cmake/project_summary.cmake @@ -0,0 +1,53 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +ecbuild_info( "build type : [${CMAKE_BUILD_TYPE}]" ) +set( Fortran_flags_str "Fortran flags" ) +set( C_flags_str "C flags " ) +set( CXX_flags_str "C++ flags " ) + + +#message("") +#message( "=================================" ) +#set( lang "Fortran" ) +#message("${CMAKE_${lang}_FLAGS}") +#message("${CMAKE_${lang}_FLAGS_${CMAKE_BUILD_TYPE_CAPS}}") +#message("${${PNAME}_${lang}_FLAGS}") +#message("${${PNAME}_${lang}_FLAGS_${CMAKE_BUILD_TYPE_CAPS}}") +#message( "=================================" ) +#message("") + +string( TOUPPER ${PROJECT_NAME} PNAME ) + foreach( lang Fortran C CXX ) + set( flags "${CMAKE_${lang}_FLAGS} ${CMAKE_${lang}_FLAGS_${CMAKE_BUILD_TYPE_CAPS}} ${${PNAME}_${lang}_FLAGS} ${${PNAME}_${lang}_FLAGS_${CMAKE_BUILD_TYPE_CAPS}}" ) + string(REGEX REPLACE "[ ]+" " " flags ${flags}) + string(STRIP "${flags}" flags) +ecbuild_info( "${${lang}_flags_str} : [${flags}]" ) + endforeach() +ecbuild_info( "OMP" ) + foreach( lang Fortran ) +ecbuild_info( " OpenMP_${lang}_FLAGS : [${OpenMP_${lang}_FLAGS}]" ) + endforeach() +ecbuild_info( "ACC" ) + foreach( lang Fortran ) +ecbuild_info( " OpenACC_${lang}_FLAGS : [${OpenACC_${lang}_FLAGS}]" ) + endforeach() +ecbuild_info( "BLAS/LAPACK" ) + if( HAVE_SINGLE_PRECISION AND HAVE_DOUBLE_PRECISION AND ECTRANS_CRAYHACK_DOUBLE_PRECISION_WITHOUT_MKL ) +ecbuild_info( " trans_dp : [${LAPACK_dp}]" ) +ecbuild_info( " trans_sp : [${LAPACK_sp}]" ) + else() +ecbuild_info( " LAPACK_LIBRARIES : [${LAPACK_LIBRARIES}]" ) + endif() +ecbuild_info( "FFTW" ) +ecbuild_info( " FFTW_LIBRARIES : [${FFTW_LIBRARIES}]" ) +ecbuild_info( "---------------------------------------------------------" ) + + + + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..47a25b5 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,17 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +add_subdirectory( trans ) +add_subdirectory( programs ) +if( HAVE_TRANSI ) + add_subdirectory(transi) +endif() + +if( HAVE_ETRANS ) + add_subdirectory(etrans) +endif() diff --git a/src/etrans/CMakeLists.txt b/src/etrans/CMakeLists.txt new file mode 100644 index 0000000..29f854d --- /dev/null +++ b/src/etrans/CMakeLists.txt @@ -0,0 +1,17 @@ +if( HAVE_CPU) + add_subdirectory( cpu ) +endif() + +if( HAVE_GPU ) + add_subdirectory( gpu ) +endif() + +## Install trans interface + +file( GLOB etrans_interface include/etrans/* ) +install( + FILES ${etrans_interface} + DESTINATION include/etrans +) + +add_subdirectory( programs ) diff --git a/src/etrans/cpu/CMakeLists.txt b/src/etrans/cpu/CMakeLists.txt new file mode 100644 index 0000000..013434d --- /dev/null +++ b/src/etrans/cpu/CMakeLists.txt @@ -0,0 +1,61 @@ +# Create library etrans_${prec}.so + +if( HAVE_SINGLE_PRECISION ) + list( APPEND precs sp ) +endif() + +if( HAVE_DOUBLE_PRECISION ) + list( APPEND precs dp ) +endif() + +ecbuild_list_add_pattern( LIST etrans_src + GLOB + external/*.F90 + internal/*.F90 + aux/*.F90 + QUIET + )# + +foreach ( prec IN LISTS precs ) + + ecbuild_add_library( TARGET etrans_${prec} + + PUBLIC_INCLUDES + $ + $ + $ + $ + $ + + PUBLIC_LIBS + fiat + parkind_${prec} + + SOURCES + ${etrans_src} + + INSTALL_HEADERS_LIST + ${etrans_hdr} + + HEADER_DESTINATION include/etrans + #TYPE STATIC + ) + + + if( HAVE_FFTW ) + target_link_libraries( etrans_${prec} PRIVATE ${FFTW_LIBRARIES} ) + target_include_directories( etrans_${prec} PRIVATE ${FFTW_INCLUDE_DIRS} ) + target_compile_definitions( etrans_${prec} PRIVATE WITH_FFTW ) + endif() + + ectrans_target_fortran_module_directory( + TARGET etrans_${prec} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/etrans_${prec} + INSTALL_DIRECTORY module/etrans_${prec} + ) + + if( HAVE_OMP ) + target_link_libraries( etrans_${prec} PRIVATE OpenMP::OpenMP_Fortran ) + endif() + +endforeach() diff --git a/src/etrans/cpu/aux/ellips.F90 b/src/etrans/cpu/aux/ellips.F90 new file mode 100644 index 0000000..e3af473 --- /dev/null +++ b/src/etrans/cpu/aux/ellips.F90 @@ -0,0 +1,8 @@ +! Oct-2012 P. Marguinaud 64b LFI + +#undef JLIK +#undef _ELLIPS_ +#define JLIK JPIM +#define _ELLIPS_ ELLIPS +#include "ellips.h" + diff --git a/src/etrans/cpu/aux/ellips.h b/src/etrans/cpu/aux/ellips.h new file mode 100644 index 0000000..b196ebf --- /dev/null +++ b/src/etrans/cpu/aux/ellips.h @@ -0,0 +1,87 @@ +! Jan-2011 P. Marguinaud Interface to thread-safe FA +SUBROUTINE _ELLIPS_ (KSMAX,KMSMAX,KNTMP,KMTMP) +USE PARKIND1, ONLY : JPRB, JPIM, JPIB, JPRD +USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +!USE LFI_PRECISION +IMPLICIT NONE +! +! ***ELLIPS*** - General routine for computing elliptic truncation +! +! Purpose. +! -------- +! Computation of zonal and meridional limit wavenumbers within the ellipse +! Interface: +! ---------- +! *CALL* *ELLIPS * +! +! Explicit arguments : +! -------------------- +! +! Implicit arguments : +! -------------------- +! +! +! Method. +! ------- +! See documentation +! +! Externals. NONE. +! ---------- +! +! Reference. +! ---------- +! ARPEGE/ALADIN documentation +! +! Author. +! ------- +! G. Radnoti LACE 97/04/04 +! +! Modifications. +!------------------------------------------------------------- +! J.Vivoda, 99/05/19 treating NSMAX=0 and NMSMAX=0 +! +! +INTEGER (KIND=JLIK) KSMAX, KMSMAX +INTEGER (KIND=JLIK) KNTMP(0:KMSMAX),KMTMP(0:KSMAX) +! +INTEGER (KIND=JLIK) JM, JN +! +REAL (KIND=JPRD) ZEPS, ZKN, ZKM, ZAUXIL +! +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('ELLIPS',0,ZHOOK_HANDLE) +ZEPS=1.E-10 +ZAUXIL=0. +! +! 1. Computing meridional limit wavenumbers along zonal wavenumbers +! +DO JM=1,KMSMAX-1 +ZKN = REAL(KSMAX,JPRD)/REAL(KMSMAX,JPRD)* & +& SQRT(MAX(ZAUXIL,REAL(KMSMAX**2-JM**2,JPRD))) + KNTMP(JM)=INT(ZKN+ZEPS, JLIK) +ENDDO + +IF( KMSMAX.EQ.0 )THEN + KNTMP(0)=KSMAX +ELSE + KNTMP(0)=KSMAX + KNTMP(KMSMAX)=0 +ENDIF +! +! 2. Computing zonal limit wavenumbers along meridional wavenumbers +! +DO JN=1,KSMAX-1 + ZKM = REAL(KMSMAX,JPRD)/REAL(KSMAX,JPRD)* & + & SQRT(MAX(ZAUXIL,REAL(KSMAX**2-JN**2,JPRD))) + KMTMP(JN)=INT(ZKM+ZEPS, JLIK) +ENDDO + +IF( KSMAX.EQ.0 )THEN + KMTMP(0)=KMSMAX +ELSE + KMTMP(0)=KMSMAX + KMTMP(KSMAX)=0 +ENDIF +! +IF (LHOOK) CALL DR_HOOK('ELLIPS',1,ZHOOK_HANDLE) +END diff --git a/src/etrans/cpu/aux/ellips64.F90 b/src/etrans/cpu/aux/ellips64.F90 new file mode 100644 index 0000000..0839382 --- /dev/null +++ b/src/etrans/cpu/aux/ellips64.F90 @@ -0,0 +1,8 @@ +! Oct-2012 P. Marguinaud 64b LFI + +#undef JLIK +#undef _ELLIPS_ +#define JLIK JPIB +#define _ELLIPS_ ELLIPS64 +#include "ellips.h" + diff --git a/src/etrans/cpu/aux/extper_mod.F90 b/src/etrans/cpu/aux/extper_mod.F90 new file mode 100644 index 0000000..37cd07e --- /dev/null +++ b/src/etrans/cpu/aux/extper_mod.F90 @@ -0,0 +1,144 @@ +MODULE EXTPER_MOD +CONTAINS +SUBROUTINE EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& + & KPOINTERS,KALFA) + +! purpose : +! -------- +! Make spline extension. + +! *CALL* *EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& +! & KPOINTERS,KALFA) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! PWORK : Input: values in C U I area +! : Output: input+(spline extension on the E area) +! KDIM : Dimension of the C U I U E unit of work (one row or one m) +! KPSTA : Position where the unit of work starts +! KPOINTS : Position where the unit of work ends +! KFLDS : number of 2D fields +! KUNITS : Number of units of work +! KPOINTERS : Array of pointers for the units of work +! KALFA : boundary condition of a spline: +! = 0 ... natural spline +! = 1 ... boundary condition computed differentially +! (additional option) +! references : +! ---------- + +! author : +! ------ +! M. Hortal 03-11-2009 +! ----------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DISTR + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KDIM +INTEGER(KIND=JPIM),INTENT(IN) :: KPSTA +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTS +INTEGER(KIND=JPIM),INTENT(IN) :: KFLDS +INTEGER(KIND=JPIM),INTENT(IN) :: KUNITS +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTERS(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KALFA + +! arrays : +! -------- +INTEGER(KIND=JPIM) :: IENDX, IENDY, JFL, JLAT, JLON, IA + +REAL(KIND=JPRB) :: ZA, ZB, ZC, ZD, ZEPSA, ZEPSB, ZJ, ZK, ZKP1,& + & ZLAM, ZLAMB, ZM1, ZM2, ZMM, ZNY +REAL(KIND=JPRB) :: ZMAX(KUNITS), ZMIN(KUNITS) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EXTPER',0,ZHOOK_HANDLE) + +!* 0. Security +! -------- + +IF(UBOUND(PWORK,1) < KFLDS) THEN + CALL ABOR1(' EXTPER, PWORK first dimension too small') +ENDIF +IF(UBOUND(PWORK,2) < KDIM+2) THEN + !WRITE(NOUT,*) ' UBOUND(PWORK,2)=',UBOUND(PWORK,2),' KDIM=',KDIM,' KUNITS=',& + ! &KUNITS + CALL ABOR1(' EXTPER, PWORK second dimension too small') +ENDIF +IF(UBOUND(KPOINTERS,1) < KUNITS) THEN + CALL ABOR1(' EXTPER, KPOINTERS too small') +ENDIF +IF(UBOUND(PWORK,2) < KPOINTERS(KUNITS)+KDIM) THEN + !WRITE(NERR,*) ' EXTPER, KUNITS=',KUNITS,' KPOINTERS=',KPOINTERS(1:KUNITS),& + ! &' KDIM=',KDIM,' UBOUND(PWORK,2)=',UBOUND(PWORK,2) + CALL ABOR1(' EXTPER, value of KPOINTERS too large') +ENDIF + +!* 1. Spline Extension. +! ------------------- + +DO JFL = 1, KFLDS + + ZK = REAL(KDIM-KPOINTS+1,JPRB) + ZKP1 = ZK + 1.0_JPRB + ZLAMB = ZK/ZKP1 + ZNY = REAL(KALFA,JPRB)/ZKP1 + + DO JLAT=1,KUNITS + ZEPSA = & + &((PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK -& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1))*6._JPRB/ZKP1 -& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-2)) + + ZEPSB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA) -& + & (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPSTA+2)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ZMM + ZA = PWORK(JFL,KPOINTERS(JLAT)+KPOINTS) + ZB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK-& + & (2.0_JPRB*ZM1 + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + + DO JLON=KPOINTERS(JLAT)+KPOINTS+1,KPOINTERS(JLAT)+KDIM + + ZJ = REAL(JLON - (KPOINTERS(JLAT)+KPOINTS),JPRB) + PWORK(JFL,JLON) = ZA + ZJ * (ZB + ZJ * (ZC + ZD * ZJ)) + ENDDO + ENDDO + + +ENDDO + +IF (LHOOK) CALL DR_HOOK('EXTPER',1,ZHOOK_HANDLE) +END SUBROUTINE EXTPER +END MODULE EXTPER_MOD diff --git a/src/etrans/cpu/external/edir_trans.F90 b/src/etrans/cpu/external/edir_trans.F90 new file mode 100644 index 0000000..d083891 --- /dev/null +++ b/src/etrans/cpu/external/edir_trans.F90 @@ -0,0 +1,651 @@ +SUBROUTINE EDIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! PMEANU(:),PMEANV(:) - mean wind +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTL - control of Legendre transform +! EFTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: 01-03-13 adaptation to aladin +! P. Smolikova 02-09-30 : AUX_PROC for d4 in NH +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 19-04-2013 Comparison of ubound(pspdiv,1) +! with ubound(pspvor,1) +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTL_MOD ,ONLY : EDIR_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +INTEGER, SAVE :: number_of_calls=0 + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',0,ZHOOK_HANDLE) + + +#ifndef gnarls +write (20,*) __FILE__, __LINE__ +number_of_calls=number_of_calls+1 +write (20,*) 'number of calls = ',number_of_calls +call flush(20) +write (0,*) __FILE__, __LINE__ +write (0,*) 'number of calls = ',number_of_calls +call flush(0) +#endif + +CALL GSTATS(1808,0) +CALL ESET_RESOL(KRESOL) + + +! check if args are contiguous +if ( present(PSPVOR) ) THEN + write (20,*) 'shape(PSPVOR) = ',SHAPE(PSPVOR) + !if (.not. is_contiguous(PSPVOR) ) call abort_trans('PSPVOR not contiguous') +endif +if ( present(PSPDIV) ) THEN + write (20,*) 'shape(PSPDIV) = ',SHAPE(PSPDIV) + !if (.not. is_contiguous(PSPDIV) ) call abort_trans('PSPDIV not contiguous') +endif +if ( present(PSPSCALAR) ) THEN + write (20,*) 'shape(PSPDIV) = ',SHAPE(PSPSCALAR) + !if (.not. is_contiguous(PSPSCALAR) ) call abort_trans('PSPSCALAR not contiguous') +endif +if ( present(PSPSC3A) ) THEN + write (20,*) 'shape(PSPSC3A) = ',SHAPE(PSPSC3A) + !if (.not. is_contiguous(PSPSC3A) ) call abort_trans('PSPSC3A not contiguous') +endif +if ( present(PSPSC3B) ) THEN + write (20,*) 'shape(PSPSC3B) = ',SHAPE(PSPSC3B) + !if (.not. is_contiguous(PSPSC3B) ) call abort_trans('PSPSC3B not contiguous') +endif +if ( present(PSPSC2) ) THEN + write (20,*) 'shape(PSPSC2) = ',SHAPE(PSPSC2) + !if (.not. is_contiguous(PSPSC2) ) call abort_trans('PSPSC2 not contiguous') +endif +if ( present(PGP) ) THEN + write (20,*) 'shape(PGP) = ',SHAPE(PGP) + !if (.not. is_contiguous(PGP) ) call abort_trans('PGP not contiguous') +endif +if ( present(PGPUV) ) THEN + write (20,*) 'shape(PGPUV) = ',SHAPE(PGPUV) + !if (.not. is_contiguous(PGPUV) ) call abort_trans('PGPUV not contiguous') +endif +if ( present(PGP3A) ) THEN + write (20,*) 'shape(PGP3A) = ',SHAPE(PGP3A) + !if (.not. is_contiguous(PGP3A) ) call abort_trans('PGP3A not contiguous') +endif +if ( present(PGP3B) ) THEN + write (20,*) 'shape(PGP3B) = ',SHAPE(PGP3B) + !if (.not. is_contiguous(PGP3B) ) call abort_trans('PGP3B not contiguous') +endif +if ( present(PGP2) ) THEN + write (20,*) 'shape(PGP2) = ',SHAPE(PGP2) + !if (.not. is_contiguous(PGP2) ) call abort_trans('PGP2 not contiguous') +endif +if ( present(PMEANU) ) THEN + write (20,*) 'shape(PMEANU) = ',SHAPE(PMEANU) + !if (.not. is_contiguous(PMEANU) ) call abort_trans('PMEANU not contiguous') +endif +if ( present(PMEANV) ) THEN + write (20,*) 'shape(PMEANV) = ',SHAPE(PMEANV) + !if (.not. is_contiguous(PMEANV) ) call abort_trans('PMEANV not contiguous') +endif +if ( present(KVSETUV) ) THEN + write (20,*) 'KVSETU = ',(KVSETUV) +endif +if ( present(KVSETSC) ) THEN + write (20,*) 'KVSETSC = ',(KVSETSC) +endif +if ( present(KVSETSC3A) ) THEN + write (20,*) 'KVSETSC3A = ',(KVSETSC3A) +endif +if ( present(KVSETSC3B) ) THEN + write (20,*) 'KVSETSC3B = ',(KVSETSC3B) +endif +if ( present(KVSETSC2) ) THEN + write (20,*) 'KVSETSC2 = ',(KVSETSC2) +endif +if ( present(KPROMA) ) THEN + write (20,*) 'KPROMA = ',KPROMA +endif + + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +! This is for use in TRGTOL which is shared with adjoint inverse transform +LSCDERS=.FALSE. +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANS : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= UBOUND(PSPVOR,1)) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1808,1) + +! ------------------------------------------------------------------ + +CALL EDIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) + + + +#ifndef gnarls +if ( number_of_calls > -1 ) then + +write (20,*) __FILE__, __LINE__ + +write (20,*) 'EDIR_TRANS INPUT:' +if ( present(PGP) ) then + write (20,*) 'PGP = '; write (20,'(4E24.13)') PGP(1:4,:,1) +endif +if ( present(PGPUV) ) then + write (20,*) 'PGPUV = '; write (20,'(4E24.13)') PGPUV(1:4,:,:,1) +endif +if ( present(PGP3A) ) then + write (20,*) 'PGP3A = '; write (20,'(4E24.13)') PGP3A(1:4,:,:,1) +endif +if ( present(PGP3B) ) then + write (20,*) 'PGP3B = '; write (20,'(4E24.13)') PGP3B(1:4,:,:,1) +endif +if ( present(PGP2) ) then + write (20,*) 'PGP2 = '; write (20,'(4E24.13)') PGP2(1:4,:,1) +endif + + +write (20,*) 'EDIR_TRANS OUTPUT:' +if ( present(PSPVOR) ) then + write (20,*) 'PSPVOR = '; write (20,'(4E24.13)') PSPVOR(:,1:20:4) +endif +if ( present(PSPDIV) ) then + write (20,*) 'PSPDIV = '; write (20,'(4E24.13)') PSPDIV(:,1:20:4) +endif +if ( present(PSPSCALAR) ) then + write (20,*) 'PSPSCALAR = '; write (20,'(4E24.13)') PSPSCALAR(:,1:20:4) +endif +if ( present(PSPSC3A) ) then + write (20,*) 'PSPSC3A = '; write (20,'(4E24.13)') PSPSC3A(:,1:20:4,:) +endif +if ( present(PSPSC3B) ) then + write (20,*) 'PSPSC3B = '; write (20,'(4E24.13)') PSPSC3B(:,1:20:4,:) +endif +if ( present(PSPSC2) ) then + write (20,*) 'PSPSC2 = '; write (20,'(4E24.13)') PSPSC2(:,1:20:4) +endif +if ( present(PMEANU) ) then + write (20,*) 'PMEANU = '; write (20,'(4E24.13)') PMEANU(:) +endif +if ( present(PMEANV) ) then + write (20,*) 'PMEANV = '; write (20,'(4E24.13)') PMEANV(:) +endif + +endif + + +if ( number_of_calls == -1 ) then + write (0,*) 'aborting at call number ',number_of_calls + call abort_trans('hold it') +endif +#endif + + + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANS diff --git a/src/etrans/cpu/external/edir_transad.F90 b/src/etrans/cpu/external/edir_transad.F90 new file mode 100644 index 0000000..beac97c --- /dev/null +++ b/src/etrans/cpu/external/edir_transad.F90 @@ -0,0 +1,493 @@ +SUBROUTINE EDIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EDIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EDIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIR_TRANS_CTLAD - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTLAD_MOD ,ONLY : EDIR_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',0,ZHOOK_HANDLE) + +CALL GSTATS(1810,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +LSCDERS=.FALSE. ! This is for use in TRLTOG which is shared with inverse transform +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETUV CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1810,1) + +! Perform transform + +CALL EDIR_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANSAD + diff --git a/src/etrans/cpu/external/edist_grid.F90 b/src/etrans/cpu/external/edist_grid.F90 new file mode 100644 index 0000000..f7e0988 --- /dev/null +++ b/src/etrans/cpu/external/edist_grid.F90 @@ -0,0 +1,136 @@ +SUBROUTINE EDIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *EDIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL EDIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS, IFLDS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_GRID: KFROM TOO SHORT!') +ENDIF +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_GRID:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_GRID:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'EDIST_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'EDIST_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('EDIST_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'EDIST_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('EDIST_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('EDIST_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (UBOUND (KSORT, 1) /= UBOUND (PGP, 2)) THEN + CALL ABORT_TRANS('EDIST_GRID: DIMENSION MISMATCH KSORT, PGP') + ENDIF +ENDIF + +CALL DIST_GRID_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP,KSORT) +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_GRID + diff --git a/src/etrans/cpu/external/edist_spec.F90 b/src/etrans/cpu/external/edist_spec.F90 new file mode 100644 index 0000000..eeaa512 --- /dev/null +++ b/src/etrans/cpu/external/edist_spec.F90 @@ -0,0 +1,195 @@ +SUBROUTINE EDIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSORT) + +!**** *EDIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument (change the order of fields) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR ,ONLY : DALD + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_SPEC_CONTROL_MOD ,ONLY : DIST_SPEC_CONTROL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM),INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDIM1_IS_FLD +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFDISTG) +INTEGER(KIND=JPIM) :: IFSEND,IFRECV,J, IFLD, ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G, ISPEC2MX +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD=.TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD=LDIM1_IS_FLD +IF(LLDIM1_IS_FLD) THEN + IFLD=1 + ICOEFF=2 +ELSE + IFLD=2 + ICOEFF=1 +ENDIF + +ISMAX = RALD%NMSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +ALLOCATE(IALLMS(ISMAX+1)) +ALLOCATE(IKN(0:ISMAX)) +ISPEC2 = D%NSPEC2 +ISPEC2_G = R%NSPEC2_G +IPOSSP(:) = D%NPOSSP(:) +IDIM0G(:) = D%NDIM0G(:) +ISPEC2MX = D%NSPEC2MX +IUMPP(:) = D%NUMPP(:) +IALLMS(:) = D%NALLMS(:) +IPTRMS(:) = D%NPTRMS(:) +DO J=0,ISMAX + IKN(J)=2*DALD%NCPL2M(J) +ENDDO + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KFROM TOO SHORT!') +ENDIF + +IFSEND = 0 +IFRECV = 0 + +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_SPEC:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_SPEC:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EDIST_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFSEND) THEN + WRITE(NERR,*)'EDIST_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFSEND + CALL ABORT_TRANS('EDIST_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*)'EDIST_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFDISTG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EDIST_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EDIST_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFRECV = IFRECV+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFDISTG) +ELSE + IFRECV = KFDISTG + IVSET(:) = MYSETV +ENDIF + +IF(IFRECV > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELDS TO RECEIVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFRECV) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (.NOT. PRESENT (PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: KSORT REQUIRES PSPEC') + ENDIF + IF (UBOUND (KSORT, 1) /= UBOUND (PSPEC, IFLD)) THEN + CALL ABORT_TRANS('EDIST_SPEC: DIMENSION MISMATCH KSORT, PSPEC') + ENDIF +ENDIF + +CALL DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2MX,ISPEC2_G,IPOSSP,IDIM0G,IUMPP,IALLMS,IPTRMS,IKN,KSORT) +DEALLOCATE(IDIM0G) +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC + diff --git a/src/etrans/cpu/external/egath_grid.F90 b/src/etrans/cpu/external/egath_grid.F90 new file mode 100644 index 0000000..05455b5 --- /dev/null +++ b/src/etrans/cpu/external/egath_grid.F90 @@ -0,0 +1,129 @@ +SUBROUTINE EGATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *EGATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL EGATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM),INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_GRID + diff --git a/src/etrans/cpu/external/egath_spec.F90 b/src/etrans/cpu/external/egath_spec.F90 new file mode 100644 index 0000000..c225eb9 --- /dev/null +++ b/src/etrans/cpu/external/egath_spec.F90 @@ -0,0 +1,194 @@ +SUBROUTINE EGATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,KMSMAX,LDZA0IP) + +!**** *EGATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero imaginary part of first coefficients + +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- EGATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Modified 03-09-30 Y. Seity, bug correction IFSEND=0 +! R. El Khatib 23-Oct-2012 Monkey business +! P.Marguinaud 10-Oct-2013 Add an option to set (or not) first +! coefficients imaginary part to zero +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EGATH_SPEC_CONTROL_MOD ,ONLY : EGATH_SPEC_CONTROL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFGATHG) +INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J +INTEGER(KIND=JPIM) :: IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, IMSMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: ICPL2M(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KTO TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IMSMAX = RALD%NMSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +IF(PRESENT(KMSMAX)) IMSMAX = KMSMAX +ALLOCATE(IDIM0G(0:IMSMAX)) +ALLOCATE(ICPL2M(0:IMSMAX)) +IF(IMSMAX /= RALD%NMSMAX .OR. ISMAX /= R%NSMAX) THEN + CALL ABORT_TRANS('EGATH_SPEC:TRUNCATION CHANGE NOT YET CODED') +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) + ICPL2M(:) = DALD%NCPL2M(:) +ENDIF + +IFSEND = 0 +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'EGATH_SPEC:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('EGATH_SPEC:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EGATH_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFRECV) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFRECV + CALL ABORT_TRANS('EGATH_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EGATH_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFGATHG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFSEND = IFSEND+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFGATHG) +ELSEIF(NPRTRV > 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET MISSING, NPRTRV ',NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET MISSING, NPRTRV > 1') +ELSE + IFSEND = KFGATHG + IVSET(:) = 1 +ENDIF + +IF(IFSEND > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELDS TO RECIEVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFSEND) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EGATH_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL EGATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& + & IMSMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,ICPL2M,LDZA0IP) +DEALLOCATE(IDIM0G) +DEALLOCATE(ICPL2M) + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC + diff --git a/src/etrans/cpu/external/egpnorm_trans.F90 b/src/etrans/cpu/external/egpnorm_trans.F90 new file mode 100644 index 0000000..3c2b329 --- /dev/null +++ b/src/etrans/cpu/external/egpnorm_trans.F90 @@ -0,0 +1,93 @@ +SUBROUTINE EGPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *EGPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms + +!** Interface. +! ---------- +! CALL EGPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 +! R. El Khatib 07-08-2009 Optimisation directive for NEC +! R. El Khatib 16-Sep-2019 merge with global model code +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + +!ifndef INTERFACE + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DIM ,ONLY : R +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GPNORM_TRANS_CTL_MOD, ONLY : GPNORM_TRANS_CTL +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL ,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KRESOL + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: JGL +REAL(KIND=JPRB) :: ZW(R%NDGL) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',0,ZHOOK_HANDLE) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +DO JGL=1,R%NDGL + ZW(1:)=1._JPRB/G%NLOEN(JGL) +ENDDO +CALL GPNORM_TRANS_CTL(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,ZW(1:R%NDGL)) + +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE EGPNORM_TRANS diff --git a/src/etrans/cpu/external/einv_trans.F90 b/src/etrans/cpu/external/einv_trans.F90 new file mode 100644 index 0000000..3ec6e53 --- /dev/null +++ b/src/etrans/cpu/external/einv_trans.F90 @@ -0,0 +1,769 @@ +SUBROUTINE EINV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTL_MOD ,ONLY : EINV_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +INTEGER, SAVE :: number_of_calls=0 + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',0,ZHOOK_HANDLE) + + +#ifndef gnarls + +write (20,*) __FILE__, __LINE__ +number_of_calls=number_of_calls+1 +write (20,*) 'number of calls = ',number_of_calls +call flush(20) +write (0,*) __FILE__, __LINE__ +write (0,*) 'number of calls = ',number_of_calls +call flush(0) +#endif + +! check if args are contiguous +if ( present(PSPVOR) ) THEN + write (20,*) 'shape(PSPVOR) = ',shape(PSPVOR) + !if (.not. is_contiguous(PSPVOR) ) call abort_trans('PSPVOR not contiguous') +endif +if ( present(PSPDIV) ) THEN + write (20,*) 'shape(PSPDIV) = ',shape(PSPDIV) + !if (.not. is_contiguous(PSPDIV) ) call abort_trans('PSPDIV not contiguous') +endif +if ( present(PSPSCALAR) ) THEN + write (20,*) 'shape(PSPSCALAR) = ',shape(PSPSCALAR) + !if (.not. is_contiguous(PSPSCALAR) ) call abort_trans('PSPSCALAR not contiguous') +endif +if ( present(PSPSC3A) ) THEN + write (20,*) 'shape(PSPSC3A) = ',shape(PSPSC3A) + !if (.not. is_contiguous(PSPSC3A) ) call abort_trans('PSPSC3A not contiguous') +endif +if ( present(PSPSC3B) ) THEN + write (20,*) 'shape(PSPSC3B) = ',shape(PSPSC3B) + !if (.not. is_contiguous(PSPSC3B) ) call abort_trans('PSPSC3B not contiguous') +endif +if ( present(PSPSC2) ) THEN + write (20,*) 'shape(PSPSC2) = ',shape(PSPSC2) + !if (.not. is_contiguous(PSPSC2) ) call abort_trans('PSPSC2 not contiguous') +endif +if ( present(PGP) ) THEN + write (20,*) 'shape(PGP) = ',shape(PGP) + !if (.not. is_contiguous(PGP) ) call abort_trans('PGP not contiguous') +endif +if ( present(PGPUV) ) THEN + write (20,*) 'shape(PGPUV) = ',shape(PGPUV) + !if (.not. is_contiguous(PGPUV) ) call abort_trans('PGPUV not contiguous') +endif +if ( present(PGP3A) ) THEN + write (20,*) 'shape(PGP3A) = ',shape(PGP3A) + !if (.not. is_contiguous(PGP3A) ) call abort_trans('PGP3A not contiguous') +endif +if ( present(PGP3B) ) THEN + write (20,*) 'shape(PGP3B) = ',shape(PGP3B) + !if (.not. is_contiguous(PGP3B) ) call abort_trans('PGP3B not contiguous') +endif +if ( present(PGP2) ) THEN + write (20,*) 'shape(PGP2) = ',shape(PGP2) + !if (.not. is_contiguous(PGP2) ) call abort_trans('PGP2 not contiguous') +endif +if ( present(PMEANU) ) THEN + write (20,*) 'shape(PMEANU) = ',shape(PMEANU) + !if (.not. is_contiguous(PMEANU) ) call abort_trans('PMEANU not contiguous') +endif +if ( present(PMEANV) ) THEN + write (20,*) 'shape(PMEANV) = ',shape(PMEANV) + !if (.not. is_contiguous(PMEANV) ) call abort_trans('PMEANV not contiguous') +endif + +if ( present(KVSETUV) ) THEN + write (20,*) 'KVSETU = ',(KVSETUV) +endif +if ( present(KVSETSC) ) THEN + write (20,*) 'KVSETSC = ',(KVSETSC) +endif +if ( present(KVSETSC3A) ) THEN + write (20,*) 'KVSETSC3A = ',(KVSETSC3A) +endif +if ( present(KVSETSC3B) ) THEN + write (20,*) 'KVSETSC3B = ',(KVSETSC3B) +endif +if ( present(KVSETSC2) ) THEN + write (20,*) 'KVSETSC2 = ',(KVSETSC2) +endif +if ( present(KPROMA) ) THEN + write (20,*) 'KPROMA = ',KPROMA +endif +if ( present(LDSCDERS) ) THEN + write (20,*) 'LDSCDERS = ',LDSCDERS +endif +if ( present(LDVORGP) ) THEN + write (20,*) 'LDVORGP = ',LDVORGP +endif +if ( present(LDDIVGP) ) THEN + write (20,*) 'LDDIVGP = ',LDDIVGP +endif +if ( present(LDUVDER) ) THEN + write (20,*) 'LDUVDER = ',LDUVDER +endif + + +CALL GSTATS(1807,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF(.NOT. PRESENT(PSPSCALAR) ) THEN + CALL ABORT_TRANS('INV_TRANS : KVSETSC PRESENT BUT PSPSCALAR MISSING') + ENDIF + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS_G > 0 ) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPDIV TOO SHORT') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANS:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1807,1) + +! ------------------------------------------------------------------ + +! Perform transform +CALL EINV_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV ) + + +#ifndef gnarls + +if ( number_of_calls > -1 ) then + +write (20,*) __FILE__, __LINE__ + +write (20,*) 'EINV_TRANS INPUT:' +if ( present(PSPVOR) ) then + write (20,*) 'PSPVOR = '; write (20,'(4E24.13)') (PSPVOR(:,1:20:4)) +endif +if ( present(PSPDIV) ) then + write (20,*) 'PSPDIV = '; write (20,'(4E24.13)') (PSPDIV(:,1:20:4)) +endif +if ( present(PSPSCALAR) ) then + write (20,*) 'PSPSCALAR = '; write (20,'(4E24.13)') (PSPSCALAR(:,1:20:4)) +endif +if ( present(PSPSC3A) ) then + write (20,*) 'PSPSC3A = '; write (20,'(4E24.13)') (PSPSC3A(:,1:20:4,:)) +endif +if ( present(PSPSC3B) ) then + write (20,*) 'PSPSC3B = '; write (20,'(4E24.13)') (PSPSC3B(:,1:20:4,:)) +endif +if ( present(PSPSC2) ) then + write (20,*) 'PSPSC2 = '; write (20,'(4E24.13)') (PSPSC2(:,1:20:4)) +endif +if ( present(PMEANU) ) then + write (20,*) 'PMEANU = '; write (20,'(4E24.13)') (PMEANU(:)) +endif +if ( present(PMEANV) ) then + write (20,*) 'PMEANV = '; write (20,'(4E24.13)') (PMEANV(:)) +endif + +write (20,*) 'EINV_TRANS OUTPUT:' +if ( present(PGP) ) then + write (20,*) 'PGP = '; write (20,'(4E24.13)') (PGP(1:4,:,1)) +endif +if ( present(PGPUV) ) then + write (20,*) 'PGPUV = '; write (20,'(4E24.13)') (PGPUV(1:4,:,:,1)) +endif +if ( present(PGP3A) ) then + write (20,*) 'PGP3A = '; write (20,'(4E24.13)') (PGP3A(1:4,:,:,1)) +endif +if ( present(PGP3B) ) then + write (20,*) 'PGP3B = '; write (20,'(4E24.13)') (PGP3B(1:4,:,:,1)) +endif +if ( present(PGP2) ) then + write (20,*) 'PGP2 = '; write (20,'(4E24.13)') (PGP2(1:4,:,1)) +endif + +endif + +if ( number_of_calls == -1 ) then + write (0,*) 'aborting at call number ',number_of_calls + call abort_trans('hold it') +endif + +#endif + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANS + diff --git a/src/etrans/cpu/external/einv_transad.F90 b/src/etrans/cpu/external/einv_transad.F90 new file mode 100644 index 0000000..0f38dd3 --- /dev/null +++ b/src/etrans/cpu/external/einv_transad.F90 @@ -0,0 +1,609 @@ +SUBROUTINE EINV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EINV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: like in direct code: IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTLAD_MOD ,ONLY : EINV_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',0,ZHOOK_HANDLE) +CALL GSTATS(1809,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING") + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'EINV_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPVOR TOO SHORT") + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPDIV MISSING") + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPDIV TOO SHORT") + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'EINV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('EINV_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('EINV_TRANSAD:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1809,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL EINV_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANSAD + diff --git a/src/etrans/cpu/external/esetup_trans.F90 b/src/etrans/cpu/external/esetup_trans.F90 new file mode 100644 index 0000000..8109289 --- /dev/null +++ b/src/etrans/cpu/external/esetup_trans.F90 @@ -0,0 +1,301 @@ +SUBROUTINE ESETUP_TRANS(KMSMAX,KSMAX,KDGL,KDGUX,KLOEN,LDSPLIT,& + & KTMAX,KRESOL,PEXWN,PEYWN,PWEIGHT,LDGRIDONLY,KNOEXTZL,KNOEXTZG, & + & LDUSEFFTW) +!**** *ESETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL ESETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space +! LDGRIDONLY - true if only grid space is required + + +! LDSPLIT describe the distribution among processors of +! grid-point data and has no relevance if you are using a single processor + +! LDUSEFFTW - Use FFTW for FFTs + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESETUP_DIMS - setup distribution independent dimensions +! SUEMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! ESETUP_GEOM - Compute arrays related to grid-point geometry +! SUEMP_TRANS - Second part of setup of distributed environment +! SUEFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 02-04-11 A. Bogatchev: Passing of TCDIS +! 02-11-14 C. Fischer: soften test on KDGL +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Aug-2012 %LAM in GEOM_TYPE +! R. El Khatib 14-Jun-2013 LENABLED +! R. El Khatib 01-Sep-2015 Support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPRINTLEV, MSETUP0, & + & NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +USE TPM_FLT ,ONLY : FLT_RESOL +USE TPM_CTL ,ONLY : CTL_RESOL + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : ALDDISTR_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +USE TPMALD_FIELDS ,ONLY : ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESETUP_DIMS_MOD ,ONLY : ESETUP_DIMS +USE SUEMP_TRANS_MOD ,ONLY : SUEMP_TRANS +USE SUEMP_TRANS_PRELEG_MOD ,ONLY : SUEMP_TRANS_PRELEG +!USE SULEG_MOD +USE ESETUP_GEOM_MOD ,ONLY : ESETUP_GEOM +USE SUEFFT_MOD ,ONLY : SUEFFT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Dummy arguments +INTEGER(KIND=JPIM),INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +LOGICAL ,OPTIONAL,INTENT(IN) :: LDGRIDONLY +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEXWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEYWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZG +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUSEFFTW + +!ifndef INTERFACE + +! Local variables +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',0,ZHOOK_HANDLE) + +IF(MSETUP0 == 0) THEN + CALL ABORT_TRANS('ESETUP_TRANS: SETUP_TRANS0 HAS TO BE CALLED BEFORE ESETUP_TRANS') +ENDIF +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE ESETUP_TRANS ===' + +! Allocate resolution dependent structures common to global and LAM +IF(.NOT. ALLOCATED(DIM_RESOL)) THEN + NDEF_RESOL = 1 + ALLOCATE(DIM_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(GEOM_RESOL(NMAX_RESOL)) + ALLOCATE(DISTR_RESOL(NMAX_RESOL)) + ALLOCATE(FFT_RESOL(NMAX_RESOL)) + ALLOCATE(FFTB_RESOL(NMAX_RESOL)) +#ifdef WITH_FFTW + ALLOCATE(FFTW_RESOL(NMAX_RESOL)) +#endif + ALLOCATE(FLT_RESOL(NMAX_RESOL)) + ALLOCATE(CTL_RESOL(NMAX_RESOL)) + GEOM_RESOL(:)%LAM=.FALSE. + ALLOCATE(LENABLED(NMAX_RESOL)) + LENABLED(:)=.FALSE. +ELSE + NDEF_RESOL = NDEF_RESOL+1 + IF(NDEF_RESOL > NMAX_RESOL) THEN + CALL ABORT_TRANS('ESETUP_TRANS:NDEF_RESOL > NMAX_RESOL') + ENDIF +ENDIF +! Allocate LAM-specific resolution dependent structures +IF(.NOT. ALLOCATED(ALDDIM_RESOL)) THEN + ALLOCATE(ALDDIM_RESOL(NMAX_RESOL)) + ALLOCATE(ALDFIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(ALDGEO_RESOL(NMAX_RESOL)) + ALLOCATE(ALDDISTR_RESOL(NMAX_RESOL)) + ALLOCATE(ALDFFT_RESOL(NMAX_RESOL)) +ENDIF + + +IF (PRESENT(KRESOL)) THEN + KRESOL=NDEF_RESOL +ENDIF + +! Point at structures due to be initialized +CALL ESET_RESOL(NDEF_RESOL) +IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL + +! Defaults for optional arguments + +G%LREDUCED_GRID = .FALSE. +D%LGRIDONLY = .FALSE. +D%LSPLIT = .FALSE. +TALD%LFFT992=.TRUE. ! Use FFT992 interface for FFTs +#ifdef WITH_FFTW +TW%LFFTW=.FALSE. ! Use FFTW interface for FFTs +#endif + +! NON-OPTIONAL ARGUMENTS +R%NSMAX = KSMAX +RALD%NMSMAX=KMSMAX +RALD%NDGUX=KDGUX +R%NDGL = KDGL +RALD%NDGLSUR=KDGL+2 +R%NDLON =KLOEN(1) + +! IMPLICIT argument : +G%LAM = .TRUE. + +IF (KDGL <= 0) THEN + CALL ABORT_TRANS ('ESETUP_TRANS: KDGL IS NOT A POSITIVE NUMBER') +ENDIF + +! Optional arguments + +ALLOCATE(G%NLOEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) + +IF (G%LREDUCED_GRID) THEN + G%NLOEN(:) = KLOEN(1:R%NDGL) +ELSE + G%NLOEN(:) = R%NDLON +ENDIF + +IF(PRESENT(LDSPLIT)) THEN + D%LSPLIT = LDSPLIT +ENDIF + +IF(PRESENT(KTMAX)) THEN + R%NTMAX = KTMAX +ELSE + R%NTMAX = R%NSMAX +ENDIF +IF(R%NTMAX /= R%NSMAX) THEN + !This SHOULD work but I don't know how to test it /MH + WRITE(NERR,*) 'R%NTMAX /= R%NSMAX',R%NTMAX,R%NSMAX + CALL ABORT_TRANS('ESETUP_TRANS:R%NTMAX /= R%NSMAX HAS NOT BEEN VALIDATED') +ENDIF + +IF(PRESENT(PWEIGHT)) THEN + D%LWEIGHTED_DISTR = .TRUE. + IF( D%LWEIGHTED_DISTR .AND. .NOT.D%LSPLIT )THEN + CALL ABORT_TRANS('SETUP_TRANS: LWEIGHTED_DISTR=T AND LSPLIT=F NOT SUPPORTED') + ENDIF + IF(SIZE(PWEIGHT) /= SUM(G%NLOEN(:)) )THEN + CALL ABORT_TRANS('SETUP_TRANS:SIZE(PWEIGHT) /= SUM(G%NLOEN(:))') + ENDIF + ALLOCATE(D%RWEIGHT(SIZE(PWEIGHT))) + D%RWEIGHT(:)=PWEIGHT(:) +ELSE + D%LWEIGHTED_DISTR = .FALSE. +ENDIF + +IF(PRESENT(LDGRIDONLY)) THEN + D%LGRIDONLY=LDGRIDONLY +ENDIF + +IF (PRESENT(KNOEXTZL)) THEN + R%NNOEXTZL=KNOEXTZL +ELSE + R%NNOEXTZL=0 +ENDIF + +IF (PRESENT(KNOEXTZG)) THEN + R%NNOEXTZG=KNOEXTZG +ELSE + R%NNOEXTZG=0 +ENDIF + +#ifdef WITH_FFTW +IF(PRESENT(LDUSEFFTW)) THEN + TW%LFFTW=LDUSEFFTW +ENDIF +#endif + +IF(PRESENT(LDUSEFFTW)) THEN + TALD%LFFT992=.NOT.LDUSEFFTW +ELSE + TALD%LFFT992=.TRUE. +ENDIF + +! Setup resolution dependent structures +! ------------------------------------- + +! Setup distribution independent dimensions +CALL ESETUP_DIMS +IF (PRESENT(PEXWN)) GALD%EXWN=PEXWN +IF (PRESENT(PEYWN)) GALD%EYWN=PEYWN + +! First part of setup of distributed environment +CALL SUEMP_TRANS_PRELEG + +CALL GSTATS(1802,0) +! Compute arrays related to grid-point geometry +CALL ESETUP_GEOM +! Second part of setup of distributed environment +CALL SUEMP_TRANS +! Initialize Fast Fourier Transform package +CALL SUEFFT +CALL GSTATS(1802,1) + +! Signal the current resolution is active +LENABLED(NDEF_RESOL)=.TRUE. + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!endif INTERFACE + +END SUBROUTINE ESETUP_TRANS + diff --git a/src/etrans/cpu/external/especnorm.F90 b/src/etrans/cpu/external/especnorm.F90 new file mode 100644 index 0000000..f816ee4 --- /dev/null +++ b/src/etrans/cpu/external/especnorm.F90 @@ -0,0 +1,136 @@ +SUBROUTINE ESPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *ESPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL ESPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESPNORM_CTL_MOD ,ONLY : ESPNORM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KMASTER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PNORM(:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IMASTER,IFLD,IFLD_G,J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ESPECNORM',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +! Set defaults +IMASTER = 1 +IFLD = 0 + +IF(PRESENT(KMASTER)) THEN + IMASTER = KMASTER +ENDIF + +IF(PRESENT(KVSET)) THEN + IFLD_G = UBOUND(KVSET,1) + DO J=1,IFLD_G + IF(KVSET(J) > NPRTRV) THEN + WRITE(NERR,*) 'ESPECNORM:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('ESPECNORM:KVSET TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFLD = IFLD+1 + ENDIF + ENDDO +ELSE + IF(PRESENT(PSPEC)) THEN + IFLD = UBOUND(PSPEC,1) + ENDIF + IFLD_G = IFLD +ENDIF + +IF(NPRTRV >1) THEN + IF(IFLD > 0 .AND. .NOT. PRESENT(KVSET)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IFLD > 0 AND NOT PRESENT(KVSET)',& + & NPRTRV,IFLD + CALL ABORT_TRANS('ESPECNORM: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF +IF(MYPROC == IMASTER) THEN + IF(.NOT. PRESENT(PNORM)) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM NOT PRESENT') + ENDIF + IF(UBOUND(PNORM,1) < IFLD_G) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM TOO SMALL') + ENDIF +ENDIF +IF(IFLD > 0 ) THEN + IF(.NOT. PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('ESPECNORM: PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,1) < IFLD) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,2) < D%NSPEC2) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL ESPNORM_CTL(PSPEC,IFLD,IFLD_G,KVSET,IMASTER,PMET,PNORM) +IF (LHOOK) CALL DR_HOOK('ESPECNORM',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPECNORM diff --git a/src/etrans/cpu/external/etrans_end.F90 b/src/etrans/cpu/external/etrans_end.F90 new file mode 100644 index 0000000..18905e4 --- /dev/null +++ b/src/etrans/cpu/external/etrans_end.F90 @@ -0,0 +1,147 @@ +SUBROUTINE ETRANS_END(CDMODE) + +!**** *ETRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL ETRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 after G.Radnoti +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Jul-2013 LENABLED +! R. El Khatib 01-Set-2015 Support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +USE TPM_FLT ,ONLY : S, FLT_RESOL +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL + +IMPLICIT NONE + +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE +! Local variables +CHARACTER*5 :: CLMODE +INTEGER(KIND=JPIM) :: JRES +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ETRANS_END',0,ZHOOK_HANDLE) +CLMODE='FINAL' +IF (PRESENT(CDMODE)) CLMODE=CDMODE +IF (CLMODE == 'FINAL') THEN + DO JRES=1,NDEF_RESOL + CALL EDEALLOC_RESOL(JRES) + ENDDO + NULLIFY(R) + IF (ALLOCATED(DIM_RESOL)) DEALLOCATE(DIM_RESOL) + NULLIFY(RALD) + IF (ALLOCATED(ALDDIM_RESOL)) DEALLOCATE(ALDDIM_RESOL) +!EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF +!TPM_DISTR + NULLIFY(D) + IF (ALLOCATED(DISTR_RESOL)) DEALLOCATE(DISTR_RESOL) + NULLIFY(DALD) + IF (ALLOCATED(ALDDISTR_RESOL)) DEALLOCATE(ALDDISTR_RESOL) +!TPM_FFT + NULLIFY(T) + IF (ALLOCATED(FFT_RESOL)) DEALLOCATE(FFT_RESOL) + NULLIFY(TB) + IF( ALLOCATED(FFTB_RESOL) ) DEALLOCATE(FFTB_RESOL) +#ifdef WITH_FFTW + !TPM_FFTW + NULLIFY(TW) + DEALLOCATE(FFTW_RESOL) +#endif +!TPM_FLT + NULLIFY(S) + IF (ALLOCATED(FLT_RESOL)) DEALLOCATE(FLT_RESOL) + NULLIFY(TALD) + IF (ALLOCATED(ALDFFT_RESOL)) DEALLOCATE(ALDFFT_RESOL) + +!TPM_FIELDS + NULLIFY(F) + IF (ALLOCATED(FIELDS_RESOL)) DEALLOCATE(FIELDS_RESOL) + NULLIFY(FALD) + IF (ALLOCATED(ALDFIELDS_RESOL)) DEALLOCATE(ALDFIELDS_RESOL) + +!TPM_GEOMETRY + NULLIFY(G) + IF(ALLOCATED(GEOM_RESOL)) DEALLOCATE(GEOM_RESOL) + NULLIFY(GALD) + IF(ALLOCATED(ALDGEO_RESOL)) DEALLOCATE(ALDGEO_RESOL) +!TPM_TRANS + IF(ALLOCATED(FOUBUF_IN)) DEALLOCATE(FOUBUF_IN) + IF(ALLOCATED(FOUBUF)) DEALLOCATE(FOUBUF) + + IF (ALLOCATED(LENABLED)) DEALLOCATE(LENABLED) + MSETUP0 = 0 + NMAX_RESOL = 0 + NCUR_RESOL = 0 + NDEF_RESOL = 0 +ENDIF + +IF (CLMODE == 'FINAL' .OR. CLMODE == 'INTER') THEN + !EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF + !TPM_DISTR + IF (ALLOCATED(NPRCIDS)) DEALLOCATE(NPRCIDS) +ENDIF +IF (LHOOK) CALL DR_HOOK('ETRANS_END',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_END + diff --git a/src/etrans/cpu/external/etrans_inq.F90 b/src/etrans/cpu/external/etrans_inq.F90 new file mode 100644 index 0000000..1d580d6 --- /dev/null +++ b/src/etrans/cpu/external/etrans_inq.F90 @@ -0,0 +1,539 @@ +SUBROUTINE ETRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + & KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + & KMYMS,KESM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + & KULTPP,KPTRLS,& + & KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + & LDSPLITLAT,LDLINEAR_GRID,& + & KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,KDEF_RESOL,LDLAM,& + & PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KCPL2M,KCPL4M ,KPROCM) + +!**** *ETRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL ETRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resolution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension +! N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KESM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation - n direction +! KMSMAX - spectral truncation - m direction +! KNVALUE - n value for each KSPEC2 spectral coeffient +! KMVALUE - m value for each KSPEC2 spectral coeffient +! LDLINEAR_GRID : .TRUE. if the grid is linear + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLEPINM - Eigen-values of the inverse Laplace operator + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing with TFL 36R4 +! R. El Khatib 08-Aug-2012 KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,LDLAM,KDEF_RESOL,LDLINEAR_GRID +! T. Dalkilic 28-Aug-2012 KCPL4M +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NDEF_RESOL +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_DISTR ,ONLY : DALD +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPMALD_FIELDS + +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS_EW, N_REGIONS_NS +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2G +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KNUMP +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOT +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTG +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTMX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KMYMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KESM0(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KUMPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPOSSP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KALLMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSTA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KONL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KULTPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL,INTENT(INOUT) :: LDSPLITLAT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL :: PGW(:) ! Argument NOT used +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLEI3 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPOLEGL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPMS(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL2M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL4M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPROCM(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMVALUE(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PLEPINM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLINEAR_GRID +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1,IU2 +INTEGER(KIND=JPIM) :: IC, JN, JMLOC, IM, JJ, JM +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX),ICPLM(0:RALD%NMSMAX) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IF(PRESENT(KSPEC)) KSPEC = D%NSPEC +IF(PRESENT(KSPEC2)) KSPEC2 = D%NSPEC2 +IF(PRESENT(KSPEC2G)) KSPEC2G = R%NSPEC2_G +IF(PRESENT(KSPEC2MX)) KSPEC2MX = D%NSPEC2MX +IF(PRESENT(KNUMP)) KNUMP = D%NUMP +IF(PRESENT(KGPTOT)) KGPTOT = D%NGPTOT +IF(PRESENT(KGPTOTG)) KGPTOTG = D%NGPTOTG +IF(PRESENT(KGPTOTMX)) KGPTOTMX = D%NGPTOTMX +IF(PRESENT(KFRSTLOFF)) KFRSTLOFF = D%NFRSTLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPRTRW)) KPRTRW = NPRTRW +IF(PRESENT(KMYSETW)) KMYSETW = MYSETW +IF(PRESENT(KMYSETV)) KMYSETV = MYSETV +IF(PRESENT(KMY_REGION_NS)) KMY_REGION_NS = MY_REGION_NS +IF(PRESENT(KMY_REGION_EW)) KMY_REGION_EW = MY_REGION_EW +IF(PRESENT(LDLAM)) LDLAM = G%LAM +IF(PRESENT(KDEF_RESOL)) KDEF_RESOL = NDEF_RESOL + +IF(PRESENT(KGPTOTL)) THEN + IF(UBOUND(KGPTOTL,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KGPTOTL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 2 TOO SMALL') + ELSE + KGPTOTL(1:N_REGIONS_NS,1:N_REGIONS_EW) = D%NGPTOTL(:,:) + ENDIF +ENDIF + +IF(PRESENT(KMYMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KMYMS,1) < D%NUMP) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS TOO SMALL') + ELSE + KMYMS(1:D%NUMP) = D%MYMS(:) + ENDIF +ENDIF + +IF(PRESENT(KESM0)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KESM0,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 TOO SMALL') + ELSE + KESM0(0:RALD%NMSMAX) = DALD%NESM0(:) + ENDIF +ENDIF + +IF(PRESENT(KCPL2M)) THEN + IF(UBOUND(KCPL2M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL2M TOO SMALL') + ELSE + KCPL2M(0:RALD%NMSMAX) = DALD%NCPL2M(:) + ENDIF +ENDIF +IF(PRESENT(KPROCM)) THEN + IF(UBOUND(KPROCM,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPROCM TOO SMALL') + ELSE + KPROCM(0:RALD%NMSMAX) = D%NPROCM(:) + ENDIF +ENDIF + +IF(PRESENT(KUMPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KUMPP,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP TOO SMALL') + ELSE + KUMPP(1:NPRTRW) = D%NUMPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPOSSP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPOSSP,1) < NPRTRW+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP TOO SMALL') + ELSE + KPOSSP(1:NPRTRW+1) = D%NPOSSP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRMS,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS TOO SMALL') + ELSE + KPTRMS(1:NPRTRW) = D%NPTRMS(:) + ENDIF +ENDIF + +IF(PRESENT(KALLMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KALLMS,1) < RALD%NMSMAX+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS TOO SMALL') + ELSE + KALLMS(1:RALD%NMSMAX+1) = D%NALLMS(:) + ENDIF +ENDIF + +IF(PRESENT(KDIM0G)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KDIM0G,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G TOO SMALL') + ELSE + KDIM0G(0:RALD%NMSMAX) = D%NDIM0G(0:RALD%NMSMAX) + ENDIF +ENDIF + +IF(PRESENT(KFRSTLAT)) THEN + IF(UBOUND(KFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KFRSTLAT TOO SMALL') + ELSE + KFRSTLAT(1:N_REGIONS_NS) = D%NFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KLSTLAT)) THEN + IF(UBOUND(KLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KLSTLAT TOO SMALL') + ELSE + KLSTLAT(1:N_REGIONS_NS) = D%NLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLAT)) THEN + IF(UBOUND(KPTRLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLAT TOO SMALL') + ELSE + KPTRLAT(1:R%NDGL) = D%NPTRLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRFRSTLAT)) THEN + IF(UBOUND(KPTRFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRFRSTLAT TOO SMALL') + ELSE + KPTRFRSTLAT(1:N_REGIONS_NS) = D%NPTRFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLSTLAT)) THEN + IF(UBOUND(KPTRLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLSTLAT TOO SMALL') + ELSE + KPTRLSTLAT(1:N_REGIONS_NS) = D%NPTRLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KSTA)) THEN + IF(UBOUND(KSTA,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 1 TOO SMALL') + ELSEIF(UBOUND(KSTA,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 2 TOO SMALL') + ELSE + KSTA(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NSTA(:,:) + ENDIF +ENDIF + +IF(PRESENT(KONL)) THEN + IF(UBOUND(KONL,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KONL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 2 TOO SMALL') + ELSE + KONL(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NONL(:,:) + ENDIF +ENDIF + +IF(PRESENT(LDSPLITLAT)) THEN + IF(UBOUND(LDSPLITLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: LDSPLITLAT TOO SMALL') + ELSE + LDSPLITLAT(1:R%NDGL) = D%LSPLITLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KULTPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KULTPP,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP TOO SMALL') + ELSE + KULTPP(1:NPRTRNS) = D%NULTPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRLS,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS TOO SMALL') + ELSE + KPTRLS(1:NPRTRNS) = D%NPTRLS(:) + ENDIF +ENDIF + +IF(PRESENT(PMU)) THEN + IF(UBOUND(PMU,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: PMU TOO SMALL') + ELSE + PMU(1:R%NDGL) = F%RMU + ENDIF +ENDIF + +IF(PRESENT(PRPNM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF + IU1 = UBOUND(PRPNM,1) + IU2 = UBOUND(PRPNM,2) + IF(IU1 < R%NDGNH) THEN + CALL ABORT_TRANS('ETRANS_INQ:FIRST DIM. OF PRNM TOO SMALL') + ELSE + IU1 = MIN(IU1,R%NLEI3) + IU2 = MIN(IU2,D%NSPOLEGL) + PRPNM(1:IU1,1:IU2) = F%RPNM(1:IU1,1:IU2) + ENDIF +ENDIF +IF(PRESENT(KLEI3)) THEN + KLEI3=R%NLEI3 +ENDIF +IF(PRESENT(KSPOLEGL)) THEN + KSPOLEGL=D%NSPOLEGL +ENDIF +IF(PRESENT(KPMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPMS,1) < R%NSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS TOO SMALL') + ELSE + KPMS(0:R%NSMAX) = D%NPMS(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KSMAX)) KSMAX = R%NSMAX +IF(PRESENT(KMSMAX)) KMSMAX = RALD%NMSMAX +IF(PRESENT(PLEPINM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(PLEPINM,1) < R%NSPEC_G/2) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM TOO SMALL') + ELSEIF (LBOUND(PLEPINM,1) /= -1) THEN + CALL ABORT_TRANS('ETRANS_INQ: LOWER BOUND OF PLEPINM SHOULD BE -1') + ELSE + PLEPINM(:) = FALD%RLEPINM(:) + ENDIF +ENDIF +IF(PRESENT(KNVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KNVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KNVALUE(IC+JJ)=JN + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KMVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KMVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KMVALUE(IC+JJ)=IM + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KCPL4M)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KCPL4M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + KCPL4M(JM) = 4*(ISNAX(JM)+1) + ENDDO + ENDIF +ENDIF + + +IF(PRESENT(LDLINEAR_GRID)) THEN + LDLINEAR_GRID = R%NSMAX > (R%NDGL -1)/3 .OR. RALD%NMSMAX > (R%NDLON -1)/3 +ENDIF + + +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_INQ diff --git a/src/etrans/cpu/external/etrans_release.F90 b/src/etrans/cpu/external/etrans_release.F90 new file mode 100644 index 0000000..ea4f5a8 --- /dev/null +++ b/src/etrans/cpu/external/etrans_release.F90 @@ -0,0 +1,51 @@ +SUBROUTINE ETRANS_RELEASE(KRESOL) + +!**** *ETRANS_RELEASE* - release a spectral resolution + +! Purpose. +! -------- +! Release all arrays related to a given resolution tag + +!** Interface. +! ---------- +! CALL ETRANS_RELEASE + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL + +!endif INTERFACE + +! ------------------------------------------------------------------ + +CALL EDEALLOC_RESOL(KRESOL) + +! ------------------------------------------------------------------ + +END SUBROUTINE ETRANS_RELEASE diff --git a/src/etrans/cpu/internal/cpl_int_mod.F90 b/src/etrans/cpu/internal/cpl_int_mod.F90 new file mode 100644 index 0000000..2b55a5b --- /dev/null +++ b/src/etrans/cpu/internal/cpl_int_mod.F90 @@ -0,0 +1,33 @@ +MODULE CPL_INT_MOD +CONTAINS +SUBROUTINE CPL_INT(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,CPL_PROC,KPTRGP) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE +INTEGER(KIND=JPIM), INTENT(IN) :: KENDROWL +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KFFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEN +INTEGER(KIND=JPIM), INTENT(IN) :: KSTA(KENDROWL) +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB), INTENT(INOUT) :: PGTF(KFIELDS,KLEN) +EXTERNAL CPL_PROC + +INTEGER(KIND=JPIM) :: IPTRGP(KFIELDS) +INTEGER(KIND=JPIM) :: J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!-------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',0,ZHOOK_HANDLE) +IF(PRESENT(KPTRGP)) THEN + IPTRGP(:)=KPTRGP(1:KFIELDS) +ELSE + DO J=1,KFIELDS + IPTRGP(J)=J + ENDDO +ENDIF +CALL CPL_PROC(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,IPTRGP) +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',1,ZHOOK_HANDLE) +END SUBROUTINE CPL_INT +END MODULE CPL_INT_MOD diff --git a/src/etrans/cpu/internal/easre1ad_mod.F90 b/src/etrans/cpu/internal/easre1ad_mod.F90 new file mode 100644 index 0000000..b382d78 --- /dev/null +++ b/src/etrans/cpu/internal/easre1ad_mod.F90 @@ -0,0 +1,80 @@ +MODULE EASRE1AD_MOD +CONTAINS +SUBROUTINE EASRE1AD(KM,KMLOC,KF_OUT_LT,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_TRANS +USE EASRE1BAD_MOD ,ONLY : EASRE1BAD + +!**** *EASRE1AD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *EASRE1AD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. EASRE1BAD - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1AD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IFLDS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EASRE1AD_MOD:EASRE1AD',0,ZHOOK_HANDLE) +IFLDS = KF_OUT_LT + +CALL EASRE1BAD(IFLDS,KM,KMLOC,PIA) +IF (LHOOK) CALL DR_HOOK('EASRE1AD_MOD:EASRE1AD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1AD +END MODULE EASRE1AD_MOD diff --git a/src/etrans/cpu/internal/easre1b_mod.F90 b/src/etrans/cpu/internal/easre1b_mod.F90 new file mode 100644 index 0000000..cae14b3 --- /dev/null +++ b/src/etrans/cpu/internal/easre1b_mod.F90 @@ -0,0 +1,93 @@ +MODULE EASRE1B_MOD +CONTAINS +SUBROUTINE EASRE1B(KFC,KM,KMLOC,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFC - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 26-Aug-2021 Optimizations +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +REAL(KIND=JPRB), INTENT(IN) :: PIA(RALD%NDGLSUR+R%NNOEXTZG,KFC) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',0,ZHOOK_HANDLE) +#ifdef __INTEL_COMPILER +!$OMP SIMD PRIVATE(JGL) +DO JFLD=1,KFC + DO JGL=1,R%NDGL + FOUBUF_IN((D%NSTAGT0B(D%NPROCL(JGL))+D%NPNTGTB1(KMLOC,JGL))*KFC+JFLD)=PIA(JGL,JFLD) + ENDDO +ENDDO +#else +DO JGL=1,R%NDGL + IPROC=D%NPROCL(JGL) + IISTAN=(D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*KFC + DO JFLD =1,KFC + FOUBUF_IN(IISTAN+JFLD)=PIA(JGL,JFLD) + ENDDO +ENDDO +#endif +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1B +END MODULE EASRE1B_MOD diff --git a/src/etrans/cpu/internal/easre1bad_mod.F90 b/src/etrans/cpu/internal/easre1bad_mod.F90 new file mode 100644 index 0000000..0aa6f34 --- /dev/null +++ b/src/etrans/cpu/internal/easre1bad_mod.F90 @@ -0,0 +1,97 @@ +MODULE EASRE1BAD_MOD +CONTAINS +SUBROUTINE EASRE1BAD(KFC,KM,KMLOC,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +!**** *EASRE1BAD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *EASRE1BAD(..) + +! Explicit arguments : +! ------------------- KFC - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1BAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 26-Aug-2021 Optimizations +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC + +REAL(KIND=JPRB), INTENT(OUT) :: PIA(RALD%NDGLSUR+R%NNOEXTZG,KFC) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EASRE1BAD_MOD:EASRE1BAD',0,ZHOOK_HANDLE) +#ifdef __INTEL_COMPILER +!$OMP SIMD PRIVATE(JGL) +DO JFLD =1,KFC + DO JGL=1,R%NDGL + PIA(JGL,JFLD)=FOUBUF_IN((D%NSTAGT0B(D%NPROCL(JGL))+D%NPNTGTB1(KMLOC,JGL))*KFC+JFLD) + ENDDO +ENDDO +#else +DO JGL=1,R%NDGL + IPROC=D%NPROCL(JGL) + DO JFLD =1,KFC + IISTAN=(D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*KFC + PIA(JGL,JFLD)=FOUBUF_IN(IISTAN+JFLD) + ENDDO +ENDDO +#endif +IF (LHOOK) CALL DR_HOOK('EASRE1BAD_MOD:EASRE1BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1BAD +END MODULE EASRE1BAD_MOD diff --git a/src/etrans/cpu/internal/edealloc_resol_mod.F90 b/src/etrans/cpu/internal/edealloc_resol_mod.F90 new file mode 100644 index 0000000..5d341b9 --- /dev/null +++ b/src/etrans/cpu/internal/edealloc_resol_mod.F90 @@ -0,0 +1,102 @@ +MODULE EDEALLOC_RESOL_MOD +CONTAINS +SUBROUTINE EDEALLOC_RESOL(KRESOL) + +!**** *EDEALLOC_RESOL_MOD* - Deallocations of a resolution + +! Purpose. +! -------- +! Release allocated arrays for a given resolution + +!** Interface. +! ---------- +! CALL EDEALLOC_RESOL_MOD + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 from etrans_end +! B. Bochenek (Apr 2015): Phasing: update +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LENABLED, NOUT +USE TPM_DISTR ,ONLY : D +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FFT ,ONLY : T +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW,DESTROY_PLANS_FFTW +#endif +USE TPM_FLT ,ONLY : S + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +IF (.NOT.LENABLED(KRESOL)) THEN + + WRITE(UNIT=NOUT,FMT='('' EDEALLOC_RESOL WARNING: KRESOL = '',I3,'' ALREADY DISABLED '')') KRESOL + +ELSE + + CALL ESET_RESOL(KRESOL) + + !TPM_DISTR + DEALLOCATE(D%NFRSTLAT,D%NLSTLAT,D%NPTRLAT,D%NPTRFRSTLAT,D%NPTRLSTLAT) + DEALLOCATE(D%LSPLITLAT,D%NSTA,D%NONL,D%NGPTOTL,D%NPROCA_GP) + + IF(D%LWEIGHTED_DISTR) THEN + DEALLOCATE(D%RWEIGHT) + ENDIF + + IF(.NOT.D%LGRIDONLY) THEN + + DEALLOCATE(D%MYMS,D%NUMPP,D%NPOSSP,D%NPROCM,D%NDIM0G,D%NASM0,D%NATM0) + DEALLOCATE(D%NLATLS,D%NLATLE,D%NPMT,D%NPMS,D%NPMG,D%NULTPP,D%NPROCL) + DEALLOCATE(D%NPTRLS,D%NALLMS,D%NPTRMS,D%NSTAGT0B,D%NSTAGT1B,D%NPNTGTB0) + DEALLOCATE(D%NPNTGTB1,D%NLTSFTB,D%NLTSGTB,D%MSTABF) + DEALLOCATE(D%NSTAGTF) + + !TPM_FFT + DEALLOCATE(T%TRIGS,T%NFAX) +#ifdef WITH_FFTW + !TPM_FFTW + IF( TW%LFFTW )THEN + CALL DESTROY_PLANS_FFTW + ENDIF +#endif + !TPM_GEOMETRY + DEALLOCATE(G%NMEN,G%NDGLU) + + ELSE + + DEALLOCATE(G%NLOEN) + + ENDIF + + LENABLED(KRESOL)=.FALSE. + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EDEALLOC_RESOL +END MODULE EDEALLOC_RESOL_MOD diff --git a/src/etrans/cpu/internal/edir_trans_ctl_mod.F90 b/src/etrans/cpu/internal/edir_trans_ctl_mod.F90 new file mode 100644 index 0000000..34c6db0 --- /dev/null +++ b/src/etrans/cpu/internal/edir_trans_ctl_mod.F90 @@ -0,0 +1,202 @@ +MODULE EDIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PMEANU,PMEANV - mean winds +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! G. Radnoti 01-03-13 adaptation to aladin +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTDIR_CTL_MOD ,ONLY : ELTDIR_CTL +USE EFTDIR_CTL_MOD ,ONLY : EFTDIR_CTL + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP,& + & AUX_PROC=AUX_PROC) + ENDIF + CALL ELTDIR_CTL(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV,AUX_PROC=AUX_PROC) + ENDDO +ELSE + + ! No splitting of fields, transform done in one go + + CALL EFTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,IF_GPB,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2,& + & AUX_PROC=AUX_PROC) + + CALL ELTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV,& + & AUX_PROC=AUX_PROC) + +ENDIF +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTL +END MODULE EDIR_TRANS_CTL_MOD diff --git a/src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 b/src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 new file mode 100644 index 0000000..34de8ee --- /dev/null +++ b/src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 @@ -0,0 +1,194 @@ +MODULE EDIR_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +!**** *EDIR_TRANS_CTLAD* - Control routine for direct spectral transform-adj. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTDIR_CTLAD_MOD ,ONLY : ELTDIR_CTLAD +USE EFTDIR_CTLAD_MOD ,ONLY : EFTDIR_CTLAD + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTLAD_MOD:EDIR_TRANS_CTLAD',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL ELTDIR_CTLAD(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL ELTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + + CALL EFTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,IF_GPB,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) +ENDIF +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTLAD_MOD:EDIR_TRANS_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTLAD +END MODULE EDIR_TRANS_CTLAD_MOD diff --git a/src/etrans/cpu/internal/edist_spec_control_mod.F90 b/src/etrans/cpu/internal/edist_spec_control_mod.F90 new file mode 100644 index 0000000..ce55ba9 --- /dev/null +++ b/src/etrans/cpu/internal/edist_spec_control_mod.F90 @@ -0,0 +1,3 @@ +MODULE EDIST_SPEC_CONTROL_MOD + ! dead code - merged with DIST_SPEC_CONTROL_MOD +END MODULE EDIST_SPEC_CONTROL_MOD diff --git a/src/etrans/cpu/internal/efsc_mod.F90 b/src/etrans/cpu/internal/efsc_mod.F90 new file mode 100644 index 0000000..77ab471 --- /dev/null +++ b/src/etrans/cpu/internal/efsc_mod.F90 @@ -0,0 +1,110 @@ +MODULE EFSC_MOD +CONTAINS +SUBROUTINE EFSC(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSC - Division by a*cos(theta), east-west derivatives + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSC(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G +USE TPMALD_GEO ,ONLY : GALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(IN ) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + +INTEGER(KIND=JPIM) :: JF,IGLG,II,IR,JM +REAL(KIND=JPRB) :: ZIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',0,ZHOOK_HANDLE) +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + +! ------------------------------------------------------------------ + +!* EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + ZIM=REAL(JM,JPRB)*GALD%EXWN + IR = ISTAGTF+2*JM+1 + II = IR+1 +! use unroll to provoke vectorization of outer loop +!cdir unroll=4 + DO JF=1,2*KF_UV + PUVDERS(JF,IR) = -PUV(JF,II)*ZIM + PUVDERS(JF,II) = PUV(JF,IR)*ZIM + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + ZIM=REAL(JM,JPRB)*GALD%EXWN + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,KF_SCALARS + PEWDERS(JF,IR) = -PSCALAR(JF,II)*ZIM + PEWDERS(JF,II) = PSCALAR(JF,IR)*ZIM + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFSC +END MODULE EFSC_MOD diff --git a/src/etrans/cpu/internal/efscad_mod.F90 b/src/etrans/cpu/internal/efscad_mod.F90 new file mode 100644 index 0000000..4b335f4 --- /dev/null +++ b/src/etrans/cpu/internal/efscad_mod.F90 @@ -0,0 +1,121 @@ +MODULE EFSCAD_MOD +CONTAINS +SUBROUTINE EFSCAD(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *EFSCAD - Division by a*cos(theta), east-west derivatives - adjoint + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL EFSCAD(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G + +USE TPMALD_GEO ,ONLY : GALD + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + +INTEGER(KIND=JPIM) :: JF,IGLG,II,IR,JM + +REAL(KIND=JPRB) :: ZIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSCAD_MOD:EFSCAD',0,ZHOOK_HANDLE) +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + + ZIM=REAL(JM,JPRB)*GALD%EXWN + + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,2*KF_UV + + PUV(JF,II) = PUV(JF,II) - ZIM*PUVDERS(JF,IR) + PUV(JF,IR) = PUV(JF,IR) + ZIM*PUVDERS(JF,II) + + PUVDERS(JF,IR) = 0.0_JPRB + PUVDERS(JF,II) = 0.0_JPRB + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + + ZIM=REAL(JM,JPRB)*GALD%EXWN + + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,KF_SCALARS + + PSCALAR(JF,II) = PSCALAR(JF,II) - ZIM* PEWDERS(JF,IR) + PSCALAR(JF,IR) = PSCALAR(JF,IR) + ZIM* PEWDERS(JF,II) + + PEWDERS(JF,IR) = 0.0_JPRB + PEWDERS(JF,II) = 0.0_JPRB + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EFSCAD_MOD:EFSCAD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EFSCAD +END MODULE EFSCAD_MOD diff --git a/src/etrans/cpu/internal/eftdir_ctl_mod.F90 b/src/etrans/cpu/internal/eftdir_ctl_mod.F90 new file mode 100644 index 0000000..8738f4c --- /dev/null +++ b/src/etrans/cpu/internal/eftdir_ctl_mod.F90 @@ -0,0 +1,215 @@ +MODULE EFTDIR_CTL_MOD +CONTAINS +SUBROUTINE EFTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,AUX_PROC) + +!**** *EFTDIR_CTL - Direct Fourier transform control + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTL(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_GPB - total global number of output gridpoint fields +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-03-13 adaptation to aladin (coupling) +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 19-11-01 : G. Radnoti bug corection by introducing cpl_int interface +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN ,ONLY : NSTACK_MEMORY_TR +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE TRGTOL_MOD ,ONLY : TRGTOL +USE FOURIER_OUT_MOD ,ONLY : FOURIER_OUT +USE FTDIR_MOD ,ONLY : FTDIR +USE EXTPER_MOD ,ONLY : EXTPER +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables +!REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS,D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER, CONTIGUOUS :: ZGTF(:,:) +REAL(KIND=JPRB) :: ZDUM +INTEGER(KIND=JPIM) :: IST,INUL,JGL,IGL,IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTL_MOD:EFTDIR_CTL',0,ZHOOK_HANDLE) + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +!IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +!ELSE +! ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) +!! Now, force the OS to allocate this shared array right now, not when it starts +!! to be used which is an OPEN-MP loop, that would cause a threads +!! synchronization lock : +! IF (KF_FS > 0 .AND. D%NLENGTF > 0) THEN +! ZGTF_HEAP(1,1)=HUGE(1._JPRB) +! ENDIF +! ZGTF => ZGTF_HEAP(:,:) +!ENDIF + +! Transposition + +CALL GSTATS(158,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(158,1) +CALL GSTATS(106,0) + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + CALL EXTPER(ZGTF,R%NDLON+R%NNOEXTZL,1,R%NDLON,KF_FS,D%NDGL_FS,D%NSTAGTF,0) +ELSE + IF (PRESENT(AUX_PROC)) THEN + CALL AUX_PROC(ZGTF,ZDUM,KF_FS,D%NLENGTF,1,D%NDGL_FS,0,.TRUE.,& + & D%NSTAGTF,INUL,INUL,INUL) + ENDIF +ENDIF + +! Fourier transform + +IBLEN=D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here +ENDIF + +CALL GSTATS(1640,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + IF(KF_FS>0) THEN + CALL FTDIR(ZGTF,KF_FS,IGL) + ENDIF + +! Save Fourier data in FOUBUF_IN + + CALL FOURIER_OUT(ZGTF,KF_FS,IGL) +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1640,1) +CALL GSTATS(106,1) +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTL_MOD:EFTDIR_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR_CTL +END MODULE EFTDIR_CTL_MOD diff --git a/src/etrans/cpu/internal/eftdir_ctlad_mod.F90 b/src/etrans/cpu/internal/eftdir_ctlad_mod.F90 new file mode 100644 index 0000000..2cb60db --- /dev/null +++ b/src/etrans/cpu/internal/eftdir_ctlad_mod.F90 @@ -0,0 +1,202 @@ +MODULE EFTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE EFTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTDIR_CTLAD - Direct Fourier transform control - adjoint + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL EFTDIR_CTLAD(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! EFTDIRAD - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 19-11-01 G. Radnoti bug correction by introducing CPL_INT interface +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 05-03-15 remove HLOMP +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN ,ONLY : NSTACK_MEMORY_TR +USE TPM_DISTR ,ONLY : D + +USE TRLTOG_MOD ,ONLY : TRLTOG +USE FOURIER_OUTAD_MOD ,ONLY : FOURIER_OUTAD +USE EFTDIRAD_MOD ,ONLY : EFTDIRAD +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP2(:,:,:) + +! Local variables +!REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS,D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER :: ZGTF(:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: JGL,IGL,J1,J2 +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTLAD_MOD:EFTDIR_CTLAD',0,ZHOOK_HANDLE) +CALL GSTATS(133,0) + +!IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +!ELSE +! ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) +! ZGTF => ZGTF_HEAP(:,:) +!ENDIF + +ZGTF(:,:)=0._JPRB + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IVSETSC(:) = -1 +ENDIF +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +CALL GSTATS(1642,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + CALL FOURIER_OUTAD(ZGTF,KF_FS,IGL) + +! Fourier transform + + IF(KF_FS>0) THEN + CALL EFTDIRAD(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1642,1) +CALL GSTATS(133,1) + +! Transposition + +CALL GSTATS(183,0) +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +CALL GSTATS(183,1) +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTLAD_MOD:EFTDIR_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR_CTLAD +END MODULE EFTDIR_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eftdirad_mod.F90 b/src/etrans/cpu/internal/eftdirad_mod.F90 new file mode 100644 index 0000000..f0c1721 --- /dev/null +++ b/src/etrans/cpu/internal/eftdirad_mod.F90 @@ -0,0 +1,120 @@ +MODULE EFTDIRAD_MOD +CONTAINS +SUBROUTINE EFTDIRAD(PREEL,KFIELDS,KGL) + +!**** *EFTDIRAD - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform - adjoint +! -------- + +!** Interface. +! ---------- +! CALL EFTDIRAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_TRANS +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE TPM_DIM ,ONLY : R +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTDIRAD_MOD:EFTDIRAD',0,ZHOOK_HANDLE) + +ITYPE = 1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG) +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO +DO JJ=1,1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = 2.0_JPRB * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +#ifdef WITH_FFTW +IF( .NOT. TW%LFFTW )THEN +#endif + + IF( T%LUSEFFT992(KGL) )THEN + + CALL FFT992(PREEL(1,IOFF),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + + ELSE + + CALL BLUESTEIN_FFT(TB,ILOEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + + ENDIF + +#ifdef WITH_FFTW +ELSE + + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + +ENDIF +#endif + + + ! Change of metric (not in forward routine) +ZNORM=1.0_JPRB/(2.0_JPRB*REAL(ILOEN,JPRB)) +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = ZNORM * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EFTDIRAD_MOD:EFTDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIRAD +END MODULE EFTDIRAD_MOD diff --git a/src/etrans/cpu/internal/eftinv_ctl_mod.F90 b/src/etrans/cpu/internal/eftinv_ctl_mod.F90 new file mode 100644 index 0000000..71ba600 --- /dev/null +++ b/src/etrans/cpu/internal/eftinv_ctl_mod.F90 @@ -0,0 +1,274 @@ +MODULE EFTINV_CTL_MOD +CONTAINS +SUBROUTINE EFTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTINV_CTL - Inverse Fourier transform control + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINV_CTL(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Hello : 03-10-14 old way of calling +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NERR !,NSTACK_MEMORY_TR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D + +USE FOURIER_IN_MOD ,ONLY : FOURIER_IN +USE EFSC_MOD ,ONLY : EFSC +USE FTINV_MOD ,ONLY : FTINV +USE TRLTOG_MOD ,ONLY : TRLTOG +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) + +!REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS,D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER :: ZGTF(:,:) +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! 1. Copy Fourier data to local array + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD:EFTINV_CTL',0,ZHOOK_HANDLE) +CALL GSTATS(107,0) + +!IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +!ELSE +! ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) +!! Now, force the OS to allocate this shared array right now, not when it starts +!! to be used which is an OPEN-MP loop, that would cause a threads synchronization lock : +! IF (KF_FS > 0 .AND. D%NLENGTF > 0) THEN +! ZGTF_HEAP(1,1)=HUGE(1._JPRB) +! ENDIF +! ZGTF => ZGTF_HEAP(:,:) +!ENDIF + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +CALL GSTATS(1639,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + CALL FOURIER_IN(ZGTF,KF_OUT_LT,IGL) + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSC(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 3. Fourier transform + IF(KF_FS > 0) THEN + CALL FTINV(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1639,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF +CALL GSTATS(107,1) + +! 4. Transposition + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSEIF(PRESENT(KVSETSC2).OR.PRESENT(KVSETSC3A)& + & .OR.PRESENT(KVSETSC3B)) THEN + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTL_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(157,0) +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(157,1) + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD:EFTINV_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV_CTL +END MODULE EFTINV_CTL_MOD diff --git a/src/etrans/cpu/internal/eftinv_ctlad_mod.F90 b/src/etrans/cpu/internal/eftinv_ctlad_mod.F90 new file mode 100644 index 0000000..0e1a543 --- /dev/null +++ b/src/etrans/cpu/internal/eftinv_ctlad_mod.F90 @@ -0,0 +1,296 @@ +MODULE EFTINV_CTLAD_MOD +CONTAINS +SUBROUTINE EFTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTINV_CTLAD - Inverse Fourier transform control - adjoint + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINV_CTLAD(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NERR !,NSTACK_MEMORY_TR +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D + +USE FOURIER_INAD_MOD ,ONLY : FOURIER_INAD +USE EFSCAD_MOD ,ONLY : EFSCAD +USE EFTINVAD_MOD ,ONLY : EFTINVAD +USE TRGTOL_MOD ,ONLY : TRGTOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE EXTPER_MOD ,ONLY : EXTPER +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! ------------------------------------------------------------------ + +!REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS,D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER :: ZGTF(:,:) +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST, IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! 4. Transposition + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTLAD_MOD:EFTINV_CTLAD',0,ZHOOK_HANDLE) + +!IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +!ELSE +! ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) +! ZGTF => ZGTF_HEAP(:,:) +!ENDIF + +ZGTF(:,:)=0._JPRB + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF + +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV_CTLAD:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTLAD_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(182,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(182,1) + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + CALL EXTPER(ZGTF,R%NDLON+R%NNOEXTZL,1,R%NDLON,KF_FS,D%NDGL_FS,D%NSTAGTF,0) +ENDIF + + +! 3. Fourier transform + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here +ENDIF + +CALL GSTATS(132,0) + +CALL GSTATS(1641,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + IF(KF_FS > 0) THEN + CALL EFTINVAD(ZGTF,KF_FS,IGL) + ENDIF + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSCAD(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 1. Copy Fourier data to local array + + CALL FOURIER_INAD(ZGTF,KF_OUT_LT,IGL) + +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1641,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF + +CALL GSTATS(132,1) +IF (LHOOK) CALL DR_HOOK('EFTINV_CTLAD_MOD:EFTINV_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV_CTLAD +END MODULE EFTINV_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eftinvad_mod.F90 b/src/etrans/cpu/internal/eftinvad_mod.F90 new file mode 100644 index 0000000..38cce57 --- /dev/null +++ b/src/etrans/cpu/internal/eftinvad_mod.F90 @@ -0,0 +1,129 @@ +MODULE EFTINVAD_MOD +CONTAINS +SUBROUTINE EFTINVAD(PREEL,KFIELDS,KGL) + +!**** *EFTINVAD - Inverse Fourier transform - adjoint + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINVAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTINVAD_MOD:EFTINVAD',0,ZHOOK_HANDLE) + +ITYPE =-1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 + +! ! Change of metric (not in forward routine) + +#ifdef WITH_FFTW +IF( .NOT. TW%LFFTW )THEN +#endif + + IF( T%LUSEFFT992(KGL) )THEN + + CALL FFT992(PREEL(1,IOFF),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + + ELSE + + CALL BLUESTEIN_FFT(TB,ILOEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + DO JJ=1,ICLEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ)=PREEL(JF,IOFF-1+JJ)/REAL(ILOEN,JPRB) + ENDDO + ENDDO + + ENDIF + +#ifdef WITH_FFTW +ELSE + + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + +ENDIF +#endif + +ZNORM=2.0_JPRB*REAL(ILOEN,JPRB) +DO JJ=1,1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = (ZNORM/2.0_JPRB) * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +DO JJ=3,ILOEN+1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = ZNORM * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EFTINVAD_MOD:EFTINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINVAD +END MODULE EFTINVAD_MOD diff --git a/src/etrans/cpu/internal/egath_spec_control_mod.F90 b/src/etrans/cpu/internal/egath_spec_control_mod.F90 new file mode 100644 index 0000000..6b7f35a --- /dev/null +++ b/src/etrans/cpu/internal/egath_spec_control_mod.F90 @@ -0,0 +1,204 @@ +MODULE EGATH_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE EGATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& + & KMSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,KCPL2M,LDZA0IP) + +!**** *EGATH_SPEC_CONTROL* - Gather global spectral array from processors + +! Purpose. +! -------- +! Routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be distributed +! KTO(:) - Processor responsible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero unused spectral coefficients + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD, JP_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYPROC, NPROC + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SET2PE_MOD ,ONLY : SET2PE +USE SUWAVEDI_MOD + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KCPL2M(0:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG) +REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISP,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),IPOS0,JNM +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS + +! ------------------------------------------------------------------ + + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFGATHG + PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFGATHG + DO JM=1,KSPEC2_G + PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + IF(IMYFIELDS>0) THEN + ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KMSMAX + DO JN=0,KCPL2M(JM)/2-1 + IDIST(II+1) = KDIM0G(JM)+4*JN + IDIST(II+2) = KDIM0G(JM)+4*JN+1 + IDIST(II+3) = KDIM0G(JM)+4*JN+2 + IDIST(II+4) = KDIM0G(JM)+4*JN+3 + II = II+4 + ENDDO + ENDDO + CALL GSTATS(1804,1) + ENDIF + + CALL GSTATS_BARRIER(788) + + !Send + CALL GSTATS(810,0) + IFLDS = 0 + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + + IFLDS = IFLDS+1 + ISND = KTO(JFLD) + ITAG = MTAGDISTSP+JFLD+17 + IF(LDIM1_IS_FLD) THEN + ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) + CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='EGATH_SPEC_CONTROL') + ELSE + CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='EGATH_SPEC_CONTROL') + ENDIF + ENDIF + ENDDO + ENDIF + + ! Recieve + IFLDR = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IBSET = KVSET(JFLD) + IFLDR = IFLDR+1 + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(IRCV,0,0,JA,IBSET) + ITAG = MTAGDISTSP+JFLD+17 + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + &CDSTRING='EGATH_SPEC_CONTROL') + IF( ILENR /= ILEN )THEN + WRITE(0,'("EGATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& + &JFLD,JA,ILEN,ILENR + CALL ABORT_TRANS('EGATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Check for completion of sends + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JFLD), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ENDIF + ENDDO + ENDIF + CALL GSTATS(810,1) + CALL GSTATS_BARRIER2(788) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN,ISP) + DO JFLD=1,IMYFIELDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ELSE + DO JNM=1,KSPEC2_G + PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) + + !Synchronize processors + CALL GSTATS(785,0) + CALL MPL_BARRIER(CDSTRING='EGATH_SPEC_CONTROL:') + CALL GSTATS(785,1) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC_CONTROL +END MODULE EGATH_SPEC_CONTROL_MOD + diff --git a/src/etrans/cpu/internal/einv_trans_ctl_mod.F90 b/src/etrans/cpu/internal/einv_trans_ctl_mod.F90 new file mode 100644 index 0000000..fde4b80 --- /dev/null +++ b/src/etrans/cpu/internal/einv_trans_ctl_mod.F90 @@ -0,0 +1,298 @@ +MODULE EINV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PSPMEANU,PSPMEANV) + +!**** *EINV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTINV_CTL_MOD ,ONLY : ELTINV_CTL +USE EFTINV_CTL_MOD ,ONLY : EFTINV_CTL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G + +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL ELTINV_CTL(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV,FSPGL_PROC=FSPGL_PROC) + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL ELTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV,FSPGL_PROC=FSPGL_PROC) + + CALL EFTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTL +END MODULE EINV_TRANS_CTL_MOD diff --git a/src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 b/src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 new file mode 100644 index 0000000..aa00708 --- /dev/null +++ b/src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 @@ -0,0 +1,292 @@ +MODULE EINV_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +!**** *EINV_TRANS_CTLAD* - Control routine for inverse spectral transform adj. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTLAD - control of Legendre transform +! FTINV_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTINV_CTLAD_MOD ,ONLY : ELTINV_CTLAD +USE EFTINV_CTLAD_MOD ,ONLY : EFTINV_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments +! +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTLAD_MOD:EINV_TRANS_CTLAD',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + CALL ELTINV_CTLAD(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL EFTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL ELTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV ) +ENDIF +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTLAD_MOD:EINV_TRANS_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTLAD +END MODULE EINV_TRANS_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eledir_mod.F90 b/src/etrans/cpu/internal/eledir_mod.F90 new file mode 100644 index 0000000..d1c6765 --- /dev/null +++ b/src/etrans/cpu/internal/eledir_mod.F90 @@ -0,0 +1,99 @@ +MODULE ELEDIR_MOD +CONTAINS +SUBROUTINE ELEDIR(KM,KFC,KLED2,PFFT) + +!**** *ELEDIR* - Direct meridional transform. + +! Purpose. +! -------- +! Direct meridional tranform of state variables. + +!** Interface. +! ---------- +! CALL ELEDIR(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +USE TPMALD_FFT ,ONLY : TALD +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +#endif +USE TPMALD_DIM ,ONLY : RALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KFC,KLED2 +REAL(KIND=JPRB) , INTENT(INOUT) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +! ------------------------------------------------------------------ + +!* 1. PERFORM FOURIER TRANFORM. +! -------------------------- + +IF (KFC>0) THEN + ITYPE=-1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG + IF( TALD%LFFT992 )THEN + CALL FFT992(PFFT,TALD%TRIGSE,TALD%NFAXE,1,ICLEN,IRLEN,KFC,ITYPE) +#ifdef WITH_FFTW + ELSEIF( TW%LFFTW )THEN + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,LL_ALL,PFFT) +#endif + ELSE + CALL ABORT_TRANS('ELEDIR_MOD:ELEDIR: NO FFT PACKAGE SELECTED') + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEDIR +END MODULE ELEDIR_MOD diff --git a/src/etrans/cpu/internal/eledirad_mod.F90 b/src/etrans/cpu/internal/eledirad_mod.F90 new file mode 100644 index 0000000..e7e8f3d --- /dev/null +++ b/src/etrans/cpu/internal/eledirad_mod.F90 @@ -0,0 +1,118 @@ +MODULE ELEDIRAD_MOD +CONTAINS +SUBROUTINE ELEDIRAD(KM,KFC,KLED2,PFFT) + +!**** *ELEDIRAD* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL ELEDIRAD(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib : fix missing support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +#endif +USE TPMALD_FFT ,ONLY : TALD +USE TPMALD_DIM ,ONLY : RALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 + +REAL(KIND=JPRB), INTENT(INOUT) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +INTEGER(KIND=JPIM) :: JF, JJ +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELEDIRAD_MOD:ELEDIRAD',0,ZHOOK_HANDLE) + +IF (KFC>0) THEN + DO JJ=1,1 + DO JF=1,KFC + PFFT(JJ,JF) = 2.0_JPRB * PFFT(JJ,JF) + ENDDO + ENDDO + ITYPE=1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG + IF( TALD%LFFT992 )THEN + CALL FFT992(PFFT,TALD%TRIGSE,TALD%NFAXE,1,RALD%NDGLSUR+R%NNOEXTZG,IRLEN,KFC,ITYPE) +#ifdef WITH_FFTW + ELSEIF( TW%LFFTW )THEN + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,LL_ALL,PFFT) +#endif + ELSE + CALL ABORT_TRANS('ELEDIR_MOD:ELEDIR: NO FFT PACKAGE SELECTED') + ENDIF + ZNORM=1.0_JPRB/(2.0_JPRB*REAL(R%NDGL+R%NNOEXTZG,JPRB)) + DO JJ=1,R%NDGL+R%NNOEXTZG + DO JF=1,KFC + PFFT(JJ,JF) = ZNORM * PFFT(JJ,JF) + ENDDO + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELEDIRAD_MOD:ELEDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEDIRAD +END MODULE ELEDIRAD_MOD diff --git a/src/etrans/cpu/internal/eleinv_mod.F90 b/src/etrans/cpu/internal/eleinv_mod.F90 new file mode 100644 index 0000000..2fd953a --- /dev/null +++ b/src/etrans/cpu/internal/eleinv_mod.F90 @@ -0,0 +1,105 @@ +MODULE ELEINV_MOD +CONTAINS +SUBROUTINE ELEINV(KM,KFC,KF_OUT_LT,PIA) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +#endif +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FFT ,ONLY : TALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(INOUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',0,ZHOOK_HANDLE) + +IF (KFC>0) THEN + ITYPE=1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG + IF( TALD%LFFT992 )THEN + CALL FFT992(PIA,TALD%TRIGSE,TALD%NFAXE,1,RALD%NDGLSUR+R%NNOEXTZG,IRLEN,KFC,ITYPE) + +#ifdef WITH_FFTW + ELSEIF( TW%LFFTW )THEN + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,LL_ALL,PIA) +#endif + ELSE + CALL ABORT_TRANS('ELEINV_MOD:ELEINV: NO FFT PACKAGE SELECTED') + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + +END SUBROUTINE ELEINV +END MODULE ELEINV_MOD diff --git a/src/etrans/cpu/internal/eleinvad_mod.F90 b/src/etrans/cpu/internal/eleinvad_mod.F90 new file mode 100644 index 0000000..ab7a4c9 --- /dev/null +++ b/src/etrans/cpu/internal/eleinvad_mod.F90 @@ -0,0 +1,115 @@ +MODULE ELEINVAD_MOD +CONTAINS +SUBROUTINE ELEINVAD(KM,KFC,KF_OUT_LT,PIA) + +!**** *ELEINVAD* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL ELEINVAD(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINVAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +USE TPMALD_FFT ,ONLY : TALD +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +#endif +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FFT ,ONLY : TALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +INTEGER(KIND=JPIM) :: JJ, JF +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELEINVAD_MOD:ELEINVAD',0,ZHOOK_HANDLE) + +IF (KFC>0) THEN + ITYPE=-1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG + IF( TALD%LFFT992 )THEN + CALL FFT992(PIA,TALD%TRIGSE,TALD%NFAXE,1,ICLEN,IRLEN,KFC,ITYPE) +#ifdef WITH_FFTW + ELSEIF( TW%LFFTW )THEN + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,LL_ALL,PIA) +#endif + ELSE + CALL ABORT_TRANS('ELEDIR_MOD:ELEINVAD: NO FFT PACKAGE SELECTED') + ENDIF + ZNORM=2.0_JPRB*REAL(R%NDGL+R%NNOEXTZG,JPRB) + DO JJ=1,1 + DO JF=1,KFC + PIA(JJ,JF) = (ZNORM/2.0_JPRB) * PIA(JJ,JF) + ENDDO + ENDDO + DO JJ=3,R%NDGL+R%NNOEXTZG+1 + DO JF=1,KFC + PIA(JJ,JF) = ZNORM * PIA(JJ,JF) + ENDDO + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELEINVAD_MOD:ELEINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEINVAD +END MODULE ELEINVAD_MOD diff --git a/src/etrans/cpu/internal/eltdir_ctl_mod.F90 b/src/etrans/cpu/internal/eltdir_ctl_mod.F90 new file mode 100644 index 0000000..01a6473 --- /dev/null +++ b/src/etrans/cpu/internal/eltdir_ctl_mod.F90 @@ -0,0 +1,121 @@ +MODULE ELTDIR_CTL_MOD +CONTAINS +SUBROUTINE ELTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV,AUX_PROC) + +!**** *ELTDIR_CTL* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL ELTDIR_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_FS - number of fields in Fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! KFLDPTRUV(:) - field pointer for vorticity and divergence (input) +! KFLDPTRSC(:) - field pointer for scalarvalued fields (input) +! PSPMEANU(:),PSPMEANV(:) - mean winds + +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTDIR_MOD ,ONLY : ELTDIR +USE EUVTVD_COMM_MOD , ONLY : EUVTVD_COMM +USE TRLTOM_MOD ,ONLY : TRLTOM +USE MPL_MODULE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2,INUL +REAL(KIND=JPRB) :: ZDUM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTL_MOD:ELTDIR_CTL',0,ZHOOK_HANDLE) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! enforce allocation here +ENDIF +CALL GSTATS(153,0) +CALL TRLTOM(FOUBUF_IN,FOUBUF,2*KF_FS) +CALL GSTATS(153,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) + +! Periodization of auxiliary fields in y direction + +IF (PRESENT(AUX_PROC)) THEN + CALL AUX_PROC(ZDUM,FOUBUF,2*KF_FS,1,IBLEN,0,D%NUMP,.FALSE.,& + & INUL,D%NPROCL,D%NSTAGT0B,D%NPNTGTB1) +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1645,0) +IF (KF_FS>0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTDIR(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO + IF (KF_UV > 0) THEN + if ( present(kfldptruv) ) then ! daand: no idea why this is needed, but I get crash on lumi otherwise. + CALL EUVTVD_COMM(KF_UV,PSPMEANU,PSPMEANV,KFLDPTRUV) + else + CALL EUVTVD_COMM(KF_UV,PSPMEANU,PSPMEANV) + endif + ENDIF +ENDIF +CALL GSTATS(1645,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTL_MOD:ELTDIR_CTL',1,ZHOOK_HANDLE) + +! ----------------------------------------------------------------- + +END SUBROUTINE ELTDIR_CTL +END MODULE ELTDIR_CTL_MOD diff --git a/src/etrans/cpu/internal/eltdir_ctlad_mod.F90 b/src/etrans/cpu/internal/eltdir_ctlad_mod.F90 new file mode 100644 index 0000000..3433e8c --- /dev/null +++ b/src/etrans/cpu/internal/eltdir_ctlad_mod.F90 @@ -0,0 +1,109 @@ +MODULE ELTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE ELTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +!**** *ELTDIR_CTLAD* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL LTDIR_CTLAD(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTDIRAD_MOD ,ONLY : ELTDIRAD +USE TRMTOL_MOD ,ONLY : TRMTOL + + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTLAD_MOD:ELTDIR_CTLAD',0,ZHOOK_HANDLE) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here +ENDIF +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1646,0) +IF(KF_FS > 0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTDIRAD(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC, PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1646,1) + +CALL GSTATS(181,0) +CALL TRMTOL(FOUBUF,FOUBUF_IN,2*KF_FS) +CALL GSTATS(181,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTLAD_MOD:ELTDIR_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR_CTLAD +END MODULE ELTDIR_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eltdir_mod.F90 b/src/etrans/cpu/internal/eltdir_mod.F90 new file mode 100644 index 0000000..01a9a1e --- /dev/null +++ b/src/etrans/cpu/internal/eltdir_mod.F90 @@ -0,0 +1,184 @@ +MODULE ELTDIR_MOD +CONTAINS +SUBROUTINE ELTDIR(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD + +USE EPRFI2_MOD ,ONLY : EPRFI2 +USE ELEDIR_MOD ,ONLY : ELEDIR +USE EUVTVD_MOD +USE EUPDSP_MOD ,ONLY : EUPDSP +USE EXTPER_MOD ,ONLY : EXTPER + +! +!**** *ELTDIR* - Control of Direct Legendre transform step + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIR(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2 - prepares the Fourier work arrays for model variables +! ELEDIR - direct Legendre transform +! EUVTVD - +! EUPDSP - updating of spectral arrays (fields) +! EUVTVD_COMM - +! EXTPER - + + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-03-14 G. Radnoti aladin version +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IFC, IINDEX(2*KF_FS), JF, JDIM +INTEGER(KIND=JPIM) :: IFLD, IR, J +INTEGER(KIND=JPIM) :: IUS,IVS,IVORS,IDIVS + +REAL(KIND=JPRB) :: ZFFT(RALD%NDGLSUR+R%NNOEXTZG,KLED2,D%NUMP) +REAL(KIND=JPRB) :: ZVODI(RALD%NDGLSUR+R%NNOEXTZG,MAX(4*KF_UV,1),D%NUMP) + +! Only if R%NNOEXTZG > 0 : +REAL(KIND=JPRB) :: ZFFT2(KLED2,(RALD%NDGLSUR+R%NNOEXTZG)*MIN(1,R%NNOEXTZG)) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',0,ZHOOK_HANDLE) + +IUS = 1 +IVS = 2*KF_UV+1 +IVORS = IUS +IDIVS = IVS +IFC = 2*KF_FS + +!* 1. PREPARE WORK ARRAYS. +! -------------------- + +CALL EPRFI2(KM,KMLOC,KF_FS,ZFFT(:,:,KMLOC)) + +!* 2. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + +IF(R%NNOEXTZG>0) THEN + DO JF = 1,IFC + DO JDIM = 1,R%NDGL + ZFFT2(JF,JDIM)=ZFFT(JDIM,JF,KMLOC) + ENDDO + ENDDO + IINDEX(1)=0 + CALL EXTPER(ZFFT2(:,:),R%NDGL+R%NNOEXTZG,1,R%NDGL,IFC,1,IINDEX,0) + DO JF = 1,IFC + DO JDIM = 1,R%NDGL+R%NNOEXTZG + ZFFT(JDIM,JF,KMLOC) = ZFFT2(JF,JDIM) + ENDDO + ENDDO +ENDIF + +!* 3. DIRECT LEGENDRE TRANSFORM. +! -------------------------- + +CALL ELEDIR(KM,IFC,KLED2,ZFFT(:,:,KMLOC)) + +!* 4. COMPUTE VORTICITY AND DIVERGENCE AND STORE MEAN WIND ON TASK OWNING WAVE 0 +! -------------------------------------------------------------------------- + +IF( KF_UV > 0 ) THEN + CALL EUVTVD(KM,KMLOC,KF_UV,ZFFT(:,IUS:,KMLOC),ZFFT(:,IVS:,KMLOC),& + & ZVODI(:,IVORS:,KMLOC),ZVODI(:,IDIVS:,KMLOC)) + IF (KM == 0) THEN + IF (PRESENT(KFLDPTRUV)) THEN + DO J = 1, KF_UV + IR = 2*J-1 + IFLD=KFLDPTRUV(J) + PSPMEANU(IFLD)=ZFFT(1,IUS-1+IR,KMLOC) + PSPMEANV(IFLD)=ZFFT(1,IVS-1+IR,KMLOC) + ENDDO + ELSE + DO J = 1, KF_UV + IR = 2*J-1 + PSPMEANU(J)=ZFFT(1,IUS-1+IR,KMLOC) + PSPMEANV(J)=ZFFT(1,IVS-1+IR,KMLOC) + ENDDO + ENDIF + ENDIF +ENDIF + +!* 5. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSP(KM,KF_UV,KF_SCALARS,ZFFT(:,:,KMLOC),ZVODI(:,:,KMLOC), & + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,KFLDPTRUV,KFLDPTRSC) + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR +END MODULE ELTDIR_MOD diff --git a/src/etrans/cpu/internal/eltdirad_mod.F90 b/src/etrans/cpu/internal/eltdirad_mod.F90 new file mode 100644 index 0000000..fd11df0 --- /dev/null +++ b/src/etrans/cpu/internal/eltdirad_mod.F90 @@ -0,0 +1,166 @@ +MODULE ELTDIRAD_MOD +CONTAINS +SUBROUTINE ELTDIRAD(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD + +USE EPRFI2AD_MOD ,ONLY : EPRFI2AD +USE ELEDIRAD_MOD ,ONLY : ELEDIRAD +USE EUVTVDAD_MOD +USE EUPDSPAD_MOD ,ONLY : EUPDSPAD + + +!**** *ELTDIRAD* - Control of Direct Legendre transform step - adjoint + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIRAD(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2AD - prepares the Fourier work arrays for model variables. +! ELEDIRAD - direct Legendre transform +! EUVTVDAD - +! EUPDSPAD - updating of spectral arrays (fields) + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ +! +IMPLICIT NONE +! +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IFC +INTEGER(KIND=JPIM) :: IUS,IUE,IVS,IVE,IVORS,IVORE,IDIVS,IDIVE + +REAL(KIND=JPRB) :: ZFFT(RALD%NDGLSUR+R%NNOEXTZG,KLED2) +REAL(KIND=JPRB) :: ZVODI(RALD%NDGLSUR+R%NNOEXTZG,MAX(4*KF_UV,1)) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM +! -------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTDIRAD_MOD:ELTDIRAD',0,ZHOOK_HANDLE) +ZFFT=0.0_JPRB +ZVODI=0.0_JPRB + +! ------------------------------------------------------------------ + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSPAD(KM,KF_UV,KF_SCALARS,ZFFT,ZVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +! ------------------------------------------------------------------ + +!* 5. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- +IF( KF_UV > 0 ) THEN + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV +! SET PART OF ZFFT CONTAINING U AND V TO 0. + ZFFT(:,IUS:IVE) = 0.0_JPRB + CALL EUVTVDAD(KM,KMLOC,KF_UV,KFLDPTRUV,ZFFT(:,IUS:IUE),ZFFT(:,IVS:IVE),& + & ZVODI(:,IVORS:IVORE),ZVODI(:,IDIVS:IDIVE),PSPMEANU,PSPMEANV) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. DIRECT LEGENDRE TRANSFORM. +! -------------------------- +IFC = 2*KF_FS +CALL ELEDIRAD(KM,IFC,KLED2,ZFFT) + +! ------------------------------------------------------------------ + +!* 3. FOURIER SPACE COMPUTATIONS. +! --------------------------- + +! ------------------------------------------------------------------ + +!* 2. PREPARE WORK ARRAYS. +! -------------------- + +CALL EPRFI2AD(KM,KMLOC,KF_FS,ZFFT) +IF (LHOOK) CALL DR_HOOK('ELTDIRAD_MOD:ELTDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIRAD +END MODULE ELTDIRAD_MOD + diff --git a/src/etrans/cpu/internal/eltinv_ctl_mod.F90 b/src/etrans/cpu/internal/eltinv_ctl_mod.F90 new file mode 100644 index 0000000..f9f8c7c --- /dev/null +++ b/src/etrans/cpu/internal/eltinv_ctl_mod.F90 @@ -0,0 +1,138 @@ +MODULE ELTINV_CTL_MOD +CONTAINS +SUBROUTINE ELTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV,FSPGL_PROC) + +!**** *ELTINV_CTL* - Control routine for inverse Legandre transform. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTINV_MOD ,ONLY : ELTINV +USE TRMTOL_MOD ,ONLY : TRMTOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTL_MOD:ELTINV_CTL',0,ZHOOK_HANDLE) +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! to force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! to force allocation here +ENDIF +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! to force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! to force allocation here +ENDIF + +IF(KF_OUT_LT > 0) THEN +CALL GSTATS(1647,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTINV(IM,JM,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1647,1) +ENDIF + +!write (*,*) __FILE__, __LINE__ +!write (*,*)' FOUBUF_IN = ',FOUBUF_IN +!call flush(6) + +CALL GSTATS(152,0) +CALL TRMTOL(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +CALL GSTATS(152,1) + +!write (*,*) __FILE__, __LINE__ +!write (*,*)' FOUBUF = ',FOUBUF +!call flush(6) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +IF (LHOOK) CALL DR_HOOK('ELTINV_CTL_MOD:ELTINV_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV_CTL +END MODULE ELTINV_CTL_MOD diff --git a/src/etrans/cpu/internal/eltinv_ctlad_mod.F90 b/src/etrans/cpu/internal/eltinv_ctlad_mod.F90 new file mode 100644 index 0000000..43e8f4c --- /dev/null +++ b/src/etrans/cpu/internal/eltinv_ctlad_mod.F90 @@ -0,0 +1,116 @@ +MODULE ELTINV_CTLAD_MOD +CONTAINS +SUBROUTINE ELTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +!**** *ELTINV_CTLAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D +USE ELTINVAD_MOD ,ONLY : ELTINVAD +USE TRLTOM_MOD ,ONLY : TRLTOM +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: IBLEN, ILEI2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTLAD_MOD:ELTINV_CTLAD',0,ZHOOK_HANDLE) + +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here +ENDIF +CALL GSTATS(180,0) +CALL TRLTOM(FOUBUF,FOUBUF_IN,2*KF_OUT_LT) +CALL GSTATS(180,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) + +CALL GSTATS(1648,0) +IF(KF_OUT_LT > 0) THEN + CALL ELTINVAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) +ENDIF +CALL GSTATS(1648,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +IF (LHOOK) CALL DR_HOOK('ELTINV_CTLAD_MOD:ELTINV_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV_CTLAD +END MODULE ELTINV_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eltinv_mod.F90 b/src/etrans/cpu/internal/eltinv_mod.F90 new file mode 100644 index 0000000..524ace8 --- /dev/null +++ b/src/etrans/cpu/internal/eltinv_mod.F90 @@ -0,0 +1,213 @@ +MODULE ELTINV_MOD +CONTAINS +SUBROUTINE ELTINV(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPMALD_DIM ,ONLY : RALD +USE EPRFI1B_MOD ,ONLY : EPRFI1B +USE EVDTUV_MOD ,ONLY : EVDTUV +USE ESPNSDE_MOD ,ONLY : ESPNSDE +USE ELEINV_MOD ,ONLY : ELEINV +USE EASRE1B_MOD ,ONLY : EASRE1B +USE FSPGL_INT_MOD ,ONLY : FSPGL_INT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!**** *LTINV* - Inverse Legendre transform + +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence +! SPNSDE - compute north-south derivatives +! LEINV - Inverse Legendre transform +! ASRE1 - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 26-Aug-2021 Optimization for EASRE1B +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(RALD%NDGLSUR+R%NNOEXTZG,KLEI2) + +INTEGER(KIND=JPIM) :: IFC, ISTA +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: IFIRST, ILAST,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + + +! ------------------------------------------------------------------ + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV +ZIA=0.0_JPRB +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL EPRFI1B(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1B(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + CALL EVDTUV(KM,KF_UV,KFLDPTRUV,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU),PSPMEANU,PSPMEANV) + +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL ESPNSDE(KM,KF_SCALARS,ZIA(:,ISL:ISU),ZIA(:,IDL:IDU)) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +CALL ELEINV(KM,IFC,KF_OUT_LT,ZIA(:,ISTA:ISTA+IFC-1)) + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + +CALL EASRE1B(IFC,KM,KMLOC,ZIA(:,ISTA:ISTA+IFC-1)) +! ------------------------------------------------------------------ + +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +IF(PRESENT(FSPGL_PROC)) THEN + CALL FSPGL_INT(KM,KMLOC,KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& + & KFLDPTRUV,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV +END MODULE ELTINV_MOD + diff --git a/src/etrans/cpu/internal/eltinvad_mod.F90 b/src/etrans/cpu/internal/eltinvad_mod.F90 new file mode 100644 index 0000000..a332b2e --- /dev/null +++ b/src/etrans/cpu/internal/eltinvad_mod.F90 @@ -0,0 +1,252 @@ +MODULE ELTINVAD_MOD +CONTAINS +SUBROUTINE ELTINVAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +!**** *ELTINVAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL ELTINVAD(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINVAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn add KMLOC to EVDTUVAD call +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR + +USE EASRE1BAD_MOD ,ONLY : EASRE1BAD +USE ELEINVAD_MOD ,ONLY : ELEINVAD +USE EPRFI1BAD_MOD ,ONLY : EPRFI1BAD +USE ESPNSDEAD_MOD ,ONLY : ESPNSDEAD +USE EVDTUVAD_MOD ,ONLY : EVDTUVAD +USE EVDTUVAD_COMM_MOD +USE EXTPER_MOD ,ONLY : EXTPER + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(RALD%NDGLSUR+R%NNOEXTZG,KLEI2,D%NUMP) +REAL(KIND=JPRB) :: ZIA2(KLEI2,RALD%NDGLSUR+R%NNOEXTZG) + +INTEGER(KIND=JPIM) :: IFC, ISTA, IINDEX(2*KF_OUT_LT), JF, JDIM, IM, JM +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: ILAST,IFIRST,IDIM1,IDIM3,J3 + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINVAD_MOD:ELTINVAD',0,ZHOOK_HANDLE) + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV +ENDIF +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 +ENDIF + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM,JF,JDIM,IINDEX,ZIA2) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + +! 7. OPTIONAL COMPUTATIONS IN FOURIER SPACE +! -------------------------------------- + +!commented IF(PRESENT(FSPGL_PROC)) THEN +!commented CALL FSPGL_INT(IM,JM,FSPGL_PROC) +!commented ENDIF + + +!* 6. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + + ZIA(:,:,JM)=0.0_JPRB + CALL EASRE1BAD(IFC,IM,JM,ZIA(:,ISTA:ISTA+IFC-1,JM)) + + +!* 5. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + + IF(R%NNOEXTZG>0) THEN + DO JF = 1,IFC + DO JDIM = 1,R%NDGL + ZIA2(JF,JDIM)=ZIA(JDIM,JF,JM) + ENDDO + ENDDO + IINDEX(1)=0 + CALL EXTPER(ZIA2(:,:),R%NDGL+R%NNOEXTZG,1,R%NDGL,IFC,1,IINDEX,0) + DO JF = 1,IFC + DO JDIM = 1,R%NDGL+R%NNOEXTZG + ZIA(JDIM,JF,JM) = ZIA2(JF,JDIM) + ENDDO + ENDDO + ENDIF + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + + CALL ELEINVAD(IM,IFC,KF_OUT_LT,ZIA(:,ISTA:ISTA+IFC-1,JM)) + + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + + ZIA(:,1:ISTA-1,JM) = 0.0_JPRB + + IF (KF_UV > 0) THEN + CALL EVDTUVAD(IM,JM,KF_UV,KFLDPTRUV,ZIA(:,IVORL:IVORU,JM),ZIA(:,IDIVL:IDIVU,JM),& + & ZIA(:,IUL:IUU,JM),ZIA(:,IVL:IVU,JM),PSPMEANU,PSPMEANV) + ENDIF + + +ENDDO +!$OMP END PARALLEL DO + +!* 2. COMMUNICATION OF MEAN WIND +! -------------------------- + +IF (KF_UV > 0) THEN + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL EVDTUVAD_COMM(IM,JM,KF_UV,KFLDPTRUV,PSPMEANU,PSPMEANV) + ENDDO +ENDIF + +!* 2. PREPARE SPECTRAL FIELDS +! ----------------------- + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM,IFIRST,ILAST,IDIM1,IDIM3) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + + IFIRST = 1 + ILAST = 4*KF_UV + IF (KF_UV > 0) THEN + CALL EPRFI1BAD(IM,ZIA(:,IVORL:IVORU,JM),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1BAD(IM,ZIA(:,IDIVL:IDIVU,JM),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + ENDIF + + IF (KF_SCDERS > 0) THEN + CALL ESPNSDEAD(IM,KF_SCALARS,ZIA(:,ISL:ISU,JM),ZIA(:,IDL:IDU,JM)) + ENDIF + + IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + ENDIF + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ELTINVAD_MOD:ELTINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINVAD +END MODULE ELTINVAD_MOD diff --git a/src/etrans/cpu/internal/eprfi1_mod.F90 b/src/etrans/cpu/internal/eprfi1_mod.F90 new file mode 100644 index 0000000..3e3feca --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1_mod.F90 @@ -0,0 +1,105 @@ +MODULE EPRFI1_MOD +CONTAINS +SUBROUTINE EPRFI1(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DISTR +!USE TPM_TRANS + +USE EPRFI1B_MOD ,ONLY : EPRFI1B + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1 in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI1_MOD:EPRFI1',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL EPRFI1B(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1B(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1B(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1_MOD:EPRFI1',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1 +END MODULE EPRFI1_MOD + diff --git a/src/etrans/cpu/internal/eprfi1ad_mod.F90 b/src/etrans/cpu/internal/eprfi1ad_mod.F90 new file mode 100644 index 0000000..ad7cd17 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1ad_mod.F90 @@ -0,0 +1,103 @@ +MODULE EPRFI1AD_MOD +CONTAINS +SUBROUTINE EPRFI1AD(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DISTR +!USE TPM_TRANS + +USE EPRFI1BAD_MOD ,ONLY : EPRFI1BAD + +!**** *EPRFI1AD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1AD(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1AD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ +! +IMPLICIT NONE +! +! +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI1AD_MOD:EPRFI1AD',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL EPRFI1BAD(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1BAD(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1BAD(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1AD_MOD:EPRFI1AD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1AD +END MODULE EPRFI1AD_MOD diff --git a/src/etrans/cpu/internal/eprfi1b_mod.F90 b/src/etrans/cpu/internal/eprfi1b_mod.F90 new file mode 100644 index 0000000..1a64daf --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1b_mod.F90 @@ -0,0 +1,110 @@ +MODULE EPRFI1B_MOD +CONTAINS +SUBROUTINE EPRFI1B(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_DISTR +USE TPMALD_DISTR ,ONLY : DALD +! +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1B(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',0,ZHOOK_HANDLE) +ILCM = DALD%NCPL2M(KM) +IOFF = DALD%NESM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + PIA(J ,IR) = PSPEC(IFLD,INM ) + PIA(J+1,IR) = PSPEC(IFLD,INM+1) + PIA(J ,II) = PSPEC(IFLD,INM+2) + PIA(J+1,II) = PSPEC(IFLD,INM+3) + ENDDO + ENDDO + +ELSE + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + !DIR$ IVDEP + !OCL NOVREC + !cdir unroll=4 + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + PIA(J ,IR) = PSPEC(JFLD,INM ) + PIA(J+1,IR) = PSPEC(JFLD,INM+1) + PIA(J ,II) = PSPEC(JFLD,INM+2) + PIA(J+1,II) = PSPEC(JFLD,INM+3) + ENDDO + ENDDO + +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1B +END MODULE EPRFI1B_MOD diff --git a/src/etrans/cpu/internal/eprfi1bad_mod.F90 b/src/etrans/cpu/internal/eprfi1bad_mod.F90 new file mode 100644 index 0000000..81a31ea --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1bad_mod.F90 @@ -0,0 +1,110 @@ +MODULE EPRFI1BAD_MOD +CONTAINS +SUBROUTINE EPRFI1BAD(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPMALD_DISTR ,ONLY : DALD + +!**** *EPRFI1BAD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1BAD(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1BAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF, IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1BAD_MOD:EPRFI1BAD',0,ZHOOK_HANDLE) +ILCM=DALD%NCPL2M(KM) + +IOFF = DALD%NESM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + + PSPEC(IFLD,INM ) = PSPEC(IFLD,INM ) + PIA(J ,IR) + PSPEC(IFLD,INM+1) = PSPEC(IFLD,INM+1) + PIA(J+1,IR) + PSPEC(IFLD,INM+2) = PSPEC(IFLD,INM+2) + PIA(J ,II) + PSPEC(IFLD,INM+3) = PSPEC(IFLD,INM+3) + PIA(J+1,II) + + ENDDO + ENDDO +ELSE + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + + PSPEC(JFLD,INM ) = PSPEC(JFLD,INM ) + PIA(J ,IR) + PSPEC(JFLD,INM+1) = PSPEC(JFLD,INM+1) + PIA(J+1,IR) + PSPEC(JFLD,INM+2) = PSPEC(JFLD,INM+2) + PIA(J ,II) + PSPEC(JFLD,INM+3) = PSPEC(JFLD,INM+3) + PIA(J+1,II) + + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1BAD_MOD:EPRFI1BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1BAD +END MODULE EPRFI1BAD_MOD diff --git a/src/etrans/cpu/internal/eprfi2_mod.F90 b/src/etrans/cpu/internal/eprfi2_mod.F90 new file mode 100644 index 0000000..21a031a --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2_mod.F90 @@ -0,0 +1,85 @@ +MODULE EPRFI2_MOD +CONTAINS +SUBROUTINE EPRFI2(KM,KMLOC,KF_FS,PFFT) + +!**** *EPRFI2* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. PRFI2B - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_TRANS + +USE EPRFI2B_MOD ,ONLY : EPRFI2B +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL EPRFI2B(KF_FS,KM,KMLOC,PFFT) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2 +END MODULE EPRFI2_MOD diff --git a/src/etrans/cpu/internal/eprfi2ad_mod.F90 b/src/etrans/cpu/internal/eprfi2ad_mod.F90 new file mode 100644 index 0000000..186dc29 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2ad_mod.F90 @@ -0,0 +1,82 @@ +MODULE EPRFI2AD_MOD +CONTAINS +SUBROUTINE EPRFI2AD(KM,KMLOC,KF_FS,PFFT) + +!**** *EPRFI2AD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2AD(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. EPRFI2BAD - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE EPRFI2BAD_MOD ,ONLY : EPRFI2BAD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL EPRFI2BAD(KF_FS,KM,KMLOC,PFFT) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2AD +END MODULE EPRFI2AD_MOD diff --git a/src/etrans/cpu/internal/eprfi2b_mod.F90 b/src/etrans/cpu/internal/eprfi2b_mod.F90 new file mode 100644 index 0000000..6c304d8 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2b_mod.F90 @@ -0,0 +1,92 @@ +MODULE EPRFI2B_MOD +CONTAINS +SUBROUTINE EPRFI2B(KFIELD,KM,KMLOC,PFFT) + +!**** *EPRFI2B* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +!USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL +INTEGER(KIND=JPIM) :: IJR,IJI +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',0,ZHOOK_HANDLE) + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +!DIR$ IVDEP +!OCL NOVREC +DO JGL=1,R%NDGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + DO JF =1,KFIELD + IJR = 2*(JF-1)+1 + IJI = IJR+1 + PFFT(JGL,IJR) = FOUBUF(ISTAN+IJR) + PFFT(JGL,IJI) = FOUBUF(ISTAN+IJI) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2B +END MODULE EPRFI2B_MOD diff --git a/src/etrans/cpu/internal/eprfi2bad_mod.F90 b/src/etrans/cpu/internal/eprfi2bad_mod.F90 new file mode 100644 index 0000000..4086566 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2bad_mod.F90 @@ -0,0 +1,90 @@ +MODULE EPRFI2BAD_MOD +CONTAINS +SUBROUTINE EPRFI2BAD(KFIELD,KM,KMLOC,PFFT) + +!**** *EPRFI2BAD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL + +INTEGER(KIND=JPIM) :: IJR,IJI +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('EPRFI2BAD_MOD:EPRFI2BAD',0,ZHOOK_HANDLE) +DO JGL=1,R%NDGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + DO JF =1,KFIELD + IJR = 2*(JF-1)+1 + IJI = IJR+1 + FOUBUF(ISTAN+IJR) = PFFT(JGL,IJR) + FOUBUF(ISTAN+IJI) = PFFT(JGL,IJI) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EPRFI2BAD_MOD:EPRFI2BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2BAD +END MODULE EPRFI2BAD_MOD diff --git a/src/etrans/cpu/internal/eset_resol_mod.F90 b/src/etrans/cpu/internal/eset_resol_mod.F90 new file mode 100644 index 0000000..b5f1434 --- /dev/null +++ b/src/etrans/cpu/internal/eset_resol_mod.F90 @@ -0,0 +1,71 @@ +MODULE ESET_RESOL_MOD +CONTAINS +SUBROUTINE ESET_RESOL(KRESOL) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL +USE TPM_DIM ,ONLY : R, DIM_RESOL +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! Local varaibles +INTEGER(KIND=JPIM) :: IRESOL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',0,ZHOOK_HANDLE) +IF(MSETUP0 == 0) CALL ABORT_TRANS('ESET_RESOL:TRANS NOT SETUP') +IRESOL = 1 +IF(PRESENT(KRESOL)) THEN + IRESOL = KRESOL + IF(KRESOL < 1 .OR. KRESOL > NMAX_RESOL) THEN + WRITE(NOUT,*)'ESET_RESOL: UNKNOWN RESOLUTION ',KRESOL,NMAX_RESOL + CALL ABORT_TRANS('ESET_RESOL:KRESOL < 1 .OR. KRESOL > NMAX_RESOL') + ENDIF +ENDIF +IF(IRESOL /= NCUR_RESOL) THEN + NCUR_RESOL = IRESOL + R => DIM_RESOL(NCUR_RESOL) + F => FIELDS_RESOL(NCUR_RESOL) + G => GEOM_RESOL(NCUR_RESOL) + D => DISTR_RESOL(NCUR_RESOL) + T => FFT_RESOL(NCUR_RESOL) + TB => FFTB_RESOL(NCUR_RESOL) +#ifdef WITH_FFTW + TW => FFTW_RESOL(NCUR_RESOL) +#endif + + RALD => ALDDIM_RESOL(NCUR_RESOL) + DALD => ALDDISTR_RESOL(NCUR_RESOL) + TALD => ALDFFT_RESOL(NCUR_RESOL) + FALD => ALDFIELDS_RESOL(NCUR_RESOL) + GALD => ALDGEO_RESOL(NCUR_RESOL) + +ENDIF +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',1,ZHOOK_HANDLE) + +END SUBROUTINE ESET_RESOL +END MODULE ESET_RESOL_MOD diff --git a/src/etrans/cpu/internal/esetup_dims_mod.F90 b/src/etrans/cpu/internal/esetup_dims_mod.F90 new file mode 100644 index 0000000..077f274 --- /dev/null +++ b/src/etrans/cpu/internal/esetup_dims_mod.F90 @@ -0,0 +1,46 @@ +MODULE ESETUP_DIMS_MOD +CONTAINS +SUBROUTINE ESETUP_DIMS + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JN,ISPOLEG +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_DIMS_MOD:ESETUP_DIMS',0,ZHOOK_HANDLE) +ISPOLEG = 0 +DO JM=0,R%NSMAX + DO JN=JM,R%NTMAX+1 + ISPOLEG = ISPOLEG+1 + ENDDO +ENDDO +R%NSPOLEG = ISPOLEG +CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) +R%NSPEC_G=0 +DO JM=0,RALD%NMSMAX + R%NSPEC_G=R%NSPEC_G+2*(ISNAX(JM)+1) +ENDDO +R%NSPEC2_G = R%NSPEC_G*2 + +R%NDGNH = (R%NDGL+1)/2 + +R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) +R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) + +R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) +R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) +IF (LHOOK) CALL DR_HOOK('ESETUP_DIMS_MOD:ESETUP_DIMS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESETUP_DIMS +END MODULE ESETUP_DIMS_MOD diff --git a/src/etrans/cpu/internal/esetup_geom_mod.F90 b/src/etrans/cpu/internal/esetup_geom_mod.F90 new file mode 100644 index 0000000..a93c67d --- /dev/null +++ b/src/etrans/cpu/internal/esetup_geom_mod.F90 @@ -0,0 +1,66 @@ +MODULE ESETUP_GEOM_MOD +CONTAINS +SUBROUTINE ESETUP_GEOM + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: IDGLU(0:RALD%NMSMAX,R%NDGNH) +INTEGER(KIND=JPIM) :: JGL,JM + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_GEOM_MOD:ESETUP_GEOM',0,ZHOOK_HANDLE) +IF(.NOT.D%LGRIDONLY) THEN +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_GEOM ===' + +ALLOCATE (G%NMEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'G%NMEN ',SIZE(G%NMEN ),SHAPE(G%NMEN ) +G%NMEN(:)=RALD%NMSMAX +IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JGL,G%NLOEN,G%NMEN) '')') + WRITE(NOUT,FMT='(8(1X,''('',I4,I4,I4,'')''))')& + & (JGL,G%NLOEN(JGL),G%NMEN(JGL),JGL=1,R%NDGL) +ENDIF +ALLOCATE(G%NDGLU(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'G%NDGLU ',SIZE(G%NDGLU ),SHAPE(G%NDGLU ) +IDGLU(:,:) = 0 +G%NDGLU(:) = 0 +DO JGL=1,R%NDGNH + DO JM=0,G%NMEN(JGL) + IDGLU(JM,JGL) = 1 + ENDDO +ENDDO +DO JM=0,RALD%NMSMAX + DO JGL=1,R%NDGNH + G%NDGLU(JM) = G%NDGLU(JM)+IDGLU(JM,JGL) + ENDDO +ENDDO +IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JM,G%NDGLU) '')') + WRITE(NOUT,FMT='(10(1X,''('',I4,I4,'')''))')& + & (JM,G%NDGLU(JM),JM=0,RALD%NMSMAX) +ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('ESETUP_GEOM_MOD:ESETUP_GEOM',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE ESETUP_GEOM +END MODULE ESETUP_GEOM_MOD diff --git a/src/etrans/cpu/internal/espnorm_ctl_mod.F90 b/src/etrans/cpu/internal/espnorm_ctl_mod.F90 new file mode 100644 index 0000000..074a762 --- /dev/null +++ b/src/etrans/cpu/internal/espnorm_ctl_mod.F90 @@ -0,0 +1,64 @@ +MODULE ESPNORM_CTL_MOD +CONTAINS +SUBROUTINE ESPNORM_CTL(PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET,PNORM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MYSETV, MYPROC + +USE ESPNORMD_MOD ,ONLY : ESPNORMD +USE SPNORMC_MOD ,ONLY : SPNORMC + +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFLD,KFLD_G +INTEGER(KIND=JPIM) :: IVSET(KFLD_G) + +REAL(KIND=JPRB) :: ZMET(0:R%NSPEC_G) + +REAL(KIND=JPRB) :: ZSM(KFLD,D%NUMP) + +REAL(KIND=JPRB) :: ZGM(KFLD_G,0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE1 + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',0,ZHOOK_HANDLE) +IF(PRESENT(KVSET)) THEN + IVSET(:) = KVSET(:) +ELSE + IVSET(:) = MYSETV +ENDIF + +IF(PRESENT(PMET)) THEN + ZMET(:) = PMET(:) +ELSE + ZMET(:) = 1.0_JPRB +ENDIF + +CALL ESPNORMD(PSPEC,KFLD,ZMET,ZSM) + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:SPNORMC',0,ZHOOK_HANDLE1) +CALL SPNORMC(ZSM,KFLD_G,IVSET,KMASTER,RALD%NMSMAX,ZGM) +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:SPNORMC',1,ZHOOK_HANDLE1) + +IF(MYPROC == KMASTER) THEN + PNORM(1:KFLD_G) = SUM(ZGM,DIM=2) + PNORM(1:KFLD_G) = SQRT(PNORM(1:KFLD_G)) +ENDIF +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORM_CTL +END MODULE ESPNORM_CTL_MOD diff --git a/src/etrans/cpu/internal/espnormc_mod.F90 b/src/etrans/cpu/internal/espnormc_mod.F90 new file mode 100644 index 0000000..4b56285 --- /dev/null +++ b/src/etrans/cpu/internal/espnormc_mod.F90 @@ -0,0 +1,3 @@ +MODULE ESPNORMC_MOD + ! dead code +END MODULE ESPNORMC_MOD diff --git a/src/etrans/cpu/internal/espnormd_mod.F90 b/src/etrans/cpu/internal/espnormd_mod.F90 new file mode 100644 index 0000000..75e245a --- /dev/null +++ b/src/etrans/cpu/internal/espnormd_mod.F90 @@ -0,0 +1,55 @@ +MODULE ESPNORMD_MOD +CONTAINS +SUBROUTINE ESPNORMD(PSPEC,KFLD,PMET,PSM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PMET(0:R%NSPEC_G) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(OUT) :: PSM(:,:) +INTEGER(KIND=JPIM) :: JM ,JFLD ,JN ,IM ,ISP +INTEGER(KIND=JPIM) :: IN,ISPE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',0,ZHOOK_HANDLE) + +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JM,IM,JN,ISP,JFLD,IN,ISPE) +DO JM=1,D%NUMP + PSM(:,JM) = 0.0_JPRB + IM = D%MYMS(JM) + + IN=DALD%NCPL2M(IM)/2 - 1 + DO JN=0,IN + ISP=DALD%NESM0(IM) + (JN)*4 + ISPE=DALD%NPME (IM) + JN + DO JFLD=1,KFLD + PSM(JFLD,JM) =PSM(JFLD,JM)& + & + PMET(ISPE) *& + & ( PSPEC(JFLD,ISP )**2 + PSPEC(JFLD,ISP+1)**2 +& + & PSPEC(JFLD,ISP+2)**2 + PSPEC(JFLD,ISP+3)**2 ) + + ENDDO + ENDDO + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORMD +END MODULE ESPNORMD_MOD + diff --git a/src/etrans/cpu/internal/espnsde_mod.F90 b/src/etrans/cpu/internal/espnsde_mod.F90 new file mode 100644 index 0000000..9160e61 --- /dev/null +++ b/src/etrans/cpu/internal/espnsde_mod.F90 @@ -0,0 +1,101 @@ +MODULE ESPNSDE_MOD +CONTAINS +SUBROUTINE ESPNSDE(KM,KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_TRANS +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_GEO ,ONLY : GALD + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: PF(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:) + +INTEGER(KIND=JPIM) :: J, JN,IN +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',0,ZHOOK_HANDLE) +DO JN=1,DALD%NCPL2M(KM),2 + IN =(JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + DO J=1,2*KF_SCALARS + PNSD(JN ,J) = -ZIN*PF(JN+1,J) + PNSD(JN+1,J) = ZIN*PF(JN,J) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDE +END MODULE ESPNSDE_MOD diff --git a/src/etrans/cpu/internal/espnsdead_mod.F90 b/src/etrans/cpu/internal/espnsdead_mod.F90 new file mode 100644 index 0000000..3ca9ded --- /dev/null +++ b/src/etrans/cpu/internal/espnsdead_mod.F90 @@ -0,0 +1,112 @@ +MODULE ESPNSDEAD_MOD +CONTAINS +SUBROUTINE ESPNSDEAD(KM,KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_TRANS + +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_GEO ,ONLY : GALD + + +!**** *ESPNSDEAD* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL ESPNSDEAD(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDEAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(INOUT) :: PF(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PNSD(:,:) +INTEGER(KIND=JPIM) :: ISKIP, J, JN +INTEGER(KIND=JPIM) :: IN +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDEAD_MOD:ESPNSDEAD',0,ZHOOK_HANDLE) +IF(KM == 0) THEN + ISKIP = 1 +ELSE + ISKIP = 1 +ENDIF + +DO JN=1,DALD%NCPL2M(KM),2 + + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + + DO J=1,2*KF_SCALARS,ISKIP + + PF(JN+1,J) = PF(JN+1,J)-ZIN*PNSD(JN ,J) + PF(JN ,J) = PF(JN ,J)+ZIN*PNSD(JN+1,J) + + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ESPNSDEAD_MOD:ESPNSDEAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDEAD +END MODULE ESPNSDEAD_MOD diff --git a/src/etrans/cpu/internal/eupdsp_mod.F90 b/src/etrans/cpu/internal/eupdsp_mod.F90 new file mode 100644 index 0000000..210ac4f --- /dev/null +++ b/src/etrans/cpu/internal/eupdsp_mod.F90 @@ -0,0 +1,141 @@ +MODULE EUPDSP_MOD +CONTAINS +SUBROUTINE EUPDSP(KM,KF_UV,KF_SCALARS,PFFT,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPB_MOD ,ONLY : EUPDSPB +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PVODI(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',0,ZHOOK_HANDLE) +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPB(KM,KF_UV,PVODI(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPB(KM,KF_UV,PVODI(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPB(KM,KF_SCALARS,PFFT(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(KM,IDIM1,PFFT(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSP +END MODULE EUPDSP_MOD diff --git a/src/etrans/cpu/internal/eupdspad_mod.F90 b/src/etrans/cpu/internal/eupdspad_mod.F90 new file mode 100644 index 0000000..8f1699a --- /dev/null +++ b/src/etrans/cpu/internal/eupdspad_mod.F90 @@ -0,0 +1,145 @@ +MODULE EUPDSPAD_MOD +CONTAINS +SUBROUTINE EUPDSPAD(KM,KF_UV,KF_SCALARS,PFFT,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSPAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSPAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPADB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPADB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPBAD_MOD ,ONLY : EUPDSPBAD +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS + +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PVODI(:,:) + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND +INTEGER(KIND=JPIM) :: IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSPAD_MOD:EUPDSPAD',0,ZHOOK_HANDLE) +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPBAD(KM,KF_UV,PVODI(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPBAD(KM,KF_UV,PVODI(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPBAD(KM,KF_SCALARS,PFFT(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPAD_MOD:EUPDSPAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSPAD +END MODULE EUPDSPAD_MOD diff --git a/src/etrans/cpu/internal/eupdspb_mod.F90 b/src/etrans/cpu/internal/eupdspb_mod.F90 new file mode 100644 index 0000000..37601c8 --- /dev/null +++ b/src/etrans/cpu/internal/eupdspb_mod.F90 @@ -0,0 +1,105 @@ +MODULE EUPDSPB_MOD +CONTAINS +SUBROUTINE EUPDSPB(KM,KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPB* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPB(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_DISTR +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(IN) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',0,ZHOOK_HANDLE) +IF(PRESENT(KFLDPTR)) THEN + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + IFLD = KFLDPTR(JFLD) + PSPEC(IFLD,INM) =POA(JN,IR) + PSPEC(IFLD,INM+1) =POA(JN+1,IR) + PSPEC(IFLD,INM+2) =POA(JN,II) + PSPEC(IFLD,INM+3) =POA(JN+1,II) + ENDDO + ENDDO +ELSE + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 +! use unroll to provoke vectorization of outer loop +!cdir unroll=4 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + PSPEC(JFLD,INM) =POA(JN,IR) + PSPEC(JFLD,INM+1) =POA(JN+1,IR) + PSPEC(JFLD,INM+2) =POA(JN,II) + PSPEC(JFLD,INM+3) =POA(JN+1,II) + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',1,ZHOOK_HANDLE) + +END SUBROUTINE EUPDSPB +END MODULE EUPDSPB_MOD diff --git a/src/etrans/cpu/internal/eupdspbad_mod.F90 b/src/etrans/cpu/internal/eupdspbad_mod.F90 new file mode 100644 index 0000000..894f002 --- /dev/null +++ b/src/etrans/cpu/internal/eupdspbad_mod.F90 @@ -0,0 +1,133 @@ +MODULE EUPDSPBAD_MOD +CONTAINS +SUBROUTINE EUPDSPBAD(KM,KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPBAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPBAD(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_DISTR + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(OUT) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 0. NOTE. +! ----- + +! The following transfer reads : +! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) +! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) +! with n from m to NSMAX +! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. +! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) +! nn is the loop index. + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUPDSPBAD_MOD:EUPDSPBAD',0,ZHOOK_HANDLE) +POA(:,:) = 0.0_JPRB + +IF(PRESENT(KFLDPTR)) THEN + + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + IFLD = KFLDPTR(JFLD) +!DIR$ IVDEP +!OCL NOVREC + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + POA(JN+1,IR) = PSPEC(IFLD,INM+1) + POA(JN,II) = PSPEC(IFLD,INM+2) + POA(JN+1,II) = PSPEC(IFLD,INM+3) + PSPEC(IFLD,INM )= 0.0_JPRB + PSPEC(IFLD,INM+1)= 0.0_JPRB + PSPEC(IFLD,INM+2)= 0.0_JPRB + PSPEC(IFLD,INM+3)= 0.0_JPRB + ENDDO + ENDDO + +ELSE + + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + POA(JN,IR) = PSPEC(JFLD,INM) + POA(JN+1,IR) = PSPEC(JFLD,INM+1) + POA(JN,II) = PSPEC(JFLD,INM+2) + POA(JN+1,II) = PSPEC(JFLD,INM+3) + PSPEC(JFLD,INM )= 0.0_JPRB + PSPEC(JFLD,INM+1)= 0.0_JPRB + PSPEC(JFLD,INM+2)= 0.0_JPRB + PSPEC(JFLD,INM+3)= 0.0_JPRB + ENDDO + ENDDO + +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPBAD_MOD:EUPDSPBAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSPBAD +END MODULE EUPDSPBAD_MOD diff --git a/src/etrans/cpu/internal/euvtvd_comm_mod.F90 b/src/etrans/cpu/internal/euvtvd_comm_mod.F90 new file mode 100644 index 0000000..d5ecef8 --- /dev/null +++ b/src/etrans/cpu/internal/euvtvd_comm_mod.F90 @@ -0,0 +1,128 @@ +MODULE EUVTVD_COMM_MOD +CONTAINS +SUBROUTINE EUVTVD_COMM(KFIELD,PSPMEANU,PSPMEANV,KFLDPTR) +!**** *EUVTVD_COMM* - Communicate mean wind + +! Purpose. +! -------- + +!** Interface. +! ---------- +! CALL EUVTVD_COMM(KFIELD,PSPMEANU,PSPMEANV,KFLDPTR) + +! Explicit arguments : +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 12-Jan-2020 Fix missing finalization of communications +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR +USE TPMALD_GEO +USE TPMALD_DISTR +USE MPL_MODULE +USE SET2PE_MOD +USE ABORT_TRANS_MOD +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(INOUT) :: PSPMEANU(KFIELD) +REAL(KIND=JPRB), INTENT(INOUT) :: PSPMEANV(KFIELD) +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(KFIELD) + +INTEGER(KIND=JPIM) :: J, JA,ITAG,ILEN,IFLD,ISND, IM, JM + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',0,ZHOOK_HANDLE) + +!* 1. COMMUNICATE MEAN WIND +! --------------------- + + +IF (D%NPROCM(0) == MYSETW) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=1 + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVD_COMM:') + ENDIF + ENDDO + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVD_COMM:') + ENDIF + ENDDO +ELSE + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ITAG=1 + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=NPRCIDS(ISND),KTAG=ITAG,KOUNT=ILEN, CDSTRING='EUVTVD_COMM:') + IF (ILEN /= 2*KFIELD) CALL ABORT_TRANS('EUVTVD_COMM: RECV INVALID RECEIVE MESSAGE LENGHT') + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD_COMM + +END MODULE EUVTVD_COMM_MOD diff --git a/src/etrans/cpu/internal/euvtvd_mod.F90 b/src/etrans/cpu/internal/euvtvd_mod.F90 new file mode 100644 index 0000000..38d918d --- /dev/null +++ b/src/etrans/cpu/internal/euvtvd_mod.F90 @@ -0,0 +1,111 @@ +MODULE EUVTVD_MOD +CONTAINS +SUBROUTINE EUVTVD(KM,KMLOC,KFIELD,PU,PV,PVOR,PDIV) + +!**** *EUVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX - calculation part. + +!** Interface. +! ---------- +! CALL EUVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PU(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PV(:,:) +REAL(KIND=JPRB), INTENT(OUT):: PVOR(:,:) +REAL(KIND=JPRB), INTENT(OUT):: PDIV(:,:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN + +REAL(KIND=JPRB) :: ZKM, ZIN + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',0,ZHOOK_HANDLE) + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR=2*J-1 + II=IR+1 + DO JN=1,R%NDGL+R%NNOEXTZG + PDIV(JN,IR)=-ZKM*PU(JN,II) + PDIV(JN,II)= ZKM*PU(JN,IR) + PVOR(JN,IR)=-ZKM*PV(JN,II) + PVOR(JN,II)= ZKM*PV(JN,IR) + ENDDO +ENDDO +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PVOR(JN,J )=PVOR(JN ,J)+ZIN*PU(JN+1,J) + PVOR(JN+1,J)=PVOR(JN+1,J)-ZIN*PU(JN ,J) + PDIV(JN,J )=PDIV(JN ,J)-ZIN*PV(JN+1,J) + PDIV(JN+1,J)=PDIV(JN+1,J)+ZIN*PV(JN ,J) + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD +END MODULE EUVTVD_MOD diff --git a/src/etrans/cpu/internal/euvtvdad_mod.F90 b/src/etrans/cpu/internal/euvtvdad_mod.F90 new file mode 100644 index 0000000..8b72f99 --- /dev/null +++ b/src/etrans/cpu/internal/euvtvdad_mod.F90 @@ -0,0 +1,128 @@ +MODULE EUVTVDAD_MOD +CONTAINS +SUBROUTINE EUVTVDAD(KM,KMLOC,KFIELD,KFLDPTR,PU,PV,PVOR,PDIV,PSPMEANU,PSPMEANV) + +!**** *EUVTVDAD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL EUVTVDAD() + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 G. Radnoti: b-level conform mean wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn removed erasing of mean wind +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_FIELDS + +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD, KM, KMLOC +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(INOUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, IFLD + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVDAD_MOD:EUVTVDAD',0,ZHOOK_HANDLE) + +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IR=2*J-1 + IFLD=KFLDPTR(J) + PU(1,IR)=PSPMEANU(IFLD) + PV(1,IR)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + IR=2*J-1 + PU(1,IR)=PSPMEANU(J) + PV(1,IR)=PSPMEANV(J) + ENDDO + ENDIF +ENDIF + +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PU(JN+1,J) = PU(JN+1,J) + ZIN * PVOR(JN ,J) + PU(JN ,J) = PU(JN ,J) - ZIN * PVOR(JN+1,J) + PV(JN+1,J) = PV(JN+1,J) - ZIN * PDIV(JN ,J) + PV(JN ,J) = PV(JN ,J) + ZIN * PDIV(JN+1,J) + ENDDO +ENDDO + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR=2*J-1 + II=IR+1 + DO JN=1,R%NDGL+R%NNOEXTZG + PU(JN,II) = PU(JN,II) - ZKM * PDIV(JN,IR) + PU(JN,IR) = PU(JN,IR) + ZKM * PDIV(JN,II) + PV(JN,II) = PV(JN,II) - ZKM * PVOR(JN,IR) + PV(JN,IR) = PV(JN,IR) + ZKM * PVOR(JN,II) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EUVTVDAD_MOD:EUVTVDAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUVTVDAD +END MODULE EUVTVDAD_MOD diff --git a/src/etrans/cpu/internal/evdtuv_mod.F90 b/src/etrans/cpu/internal/evdtuv_mod.F90 new file mode 100644 index 0000000..33f9f4e --- /dev/null +++ b/src/etrans/cpu/internal/evdtuv_mod.F90 @@ -0,0 +1,125 @@ +MODULE EVDTUV_MOD +CONTAINS +SUBROUTINE EVDTUV(KM,KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(IN) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IN, IFLD + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',0,ZHOOK_HANDLE) +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PU(JN ,J) = -ZIN*PVOR(JN+1,J) + PU(JN+1,J) = ZIN*PVOR(JN,J) + PV(JN ,J) = -ZIN*PDIV(JN+1,J) + PV(JN+1,J) = ZIN*PDIV(JN,J) + ENDDO +ENDDO +DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JN=1,DALD%NCPL2M(KM) + IJ=(JN-1)/2 + PU(JN,IR)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*(-ZKM*PDIV(JN,II)-PU(JN,IR)) + PU(JN,II)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*( ZKM*PDIV(JN,IR)-PU(JN,II)) + PV(JN,IR)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*(-ZKM*PVOR(JN,II)+PV(JN,IR)) + PV(JN,II)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*( ZKM*PVOR(JN,IR)+PV(JN,II)) + ENDDO +ENDDO +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PU(1,IR)=PSPMEANU(IFLD) + PV(1,IR)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J = 1, KFIELD + IR = 2*J-1 + PU(1,IR)=PSPMEANU(J) + PV(1,IR)=PSPMEANV(J) + ENDDO + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',1,ZHOOK_HANDLE) + +END SUBROUTINE EVDTUV +END MODULE EVDTUV_MOD diff --git a/src/etrans/cpu/internal/evdtuvad_comm_mod.F90 b/src/etrans/cpu/internal/evdtuvad_comm_mod.F90 new file mode 100644 index 0000000..492a01b --- /dev/null +++ b/src/etrans/cpu/internal/evdtuvad_comm_mod.F90 @@ -0,0 +1,163 @@ +MODULE EVDTUVAD_COMM_MOD +CONTAINS +SUBROUTINE EVDTUVAD_COMM(KM,KMLOC,KFIELD,KFLDPTR,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR + +USE TPMALD_FIELDS +USE TPMALD_GEO +USE TPMALD_DISTR + +USE MPL_MODULE +USE ABORT_TRANS_MOD +USE SET2PE_MOD + + +!**** *EVDTUVAD_COMM* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space communicate the mean winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL EVDTUVAD_COMM(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn Fix mean wind for NPRTRW > 1 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! R. El Khatib 12-Jan-2020 Fix missing finalization of communications +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD, KMLOC + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IFLD + +INTEGER(KIND=JPIM) :: IN +INTEGER(KIND=JPIM) :: ISND, JA, ITAG, ILEN + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_COMM_MOD:EVDTUVAD_COMM',0,ZHOOK_HANDLE) + +IF (NPRTRW > 1 .AND. KFIELD > 0) THEN + IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+ISND + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA), & + & CDSTRING='EVDTUVAD_COMM:') + ENDIF + ENDDO + ELSE + IF (KMLOC == 1) THEN + IF (D%NPROCM(0) /= MYSETW) THEN + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+MYPROC + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=ISND,KTAG=ITAG,KOUNT=ILEN,CDSTRING='EVDTUVAD_COMM:') + IF (ILEN /= 2*KFIELD) THEN + CALL ABORT_TRANS('EVDTUVAD_COMM: RECV INVALID RECEIVE MESSAGE LENGTH') + ENDIF + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF + ENDIF + ENDIF + ENDIF + IF (KM == 0) THEN + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVDAD_COMM:') + ENDIF + ENDDO + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_COMM_MOD:EVDTUVAD_COMM',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EVDTUVAD_COMM +END MODULE EVDTUVAD_COMM_MOD diff --git a/src/etrans/cpu/internal/evdtuvad_mod.F90 b/src/etrans/cpu/internal/evdtuvad_mod.F90 new file mode 100644 index 0000000..a34135f --- /dev/null +++ b/src/etrans/cpu/internal/evdtuvad_mod.F90 @@ -0,0 +1,151 @@ +MODULE EVDTUVAD_MOD +CONTAINS +SUBROUTINE EVDTUVAD(KM,KMLOC,KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +!**** *EVDTUVAD* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL EVDTUVAD(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn Fix mean wind for NPRTRW > 1 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD, KMLOC +REAL(KIND=JPRB), INTENT(INOUT) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IFLD + +INTEGER(KIND=JPIM) :: IN +INTEGER(KIND=JPIM) :: ISND, JA, ITAG, ILEN + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_MOD:EVDTUVAD',0,ZHOOK_HANDLE) + +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=PU(1,IR) + PSPMEANV(IFLD)=PV(1,IR) + ENDDO + ELSE + DO J = 1, KFIELD + IR = 2*J-1 + PSPMEANU(J)=PU(1,IR) + PSPMEANV(J)=PV(1,IR) + ENDDO + ENDIF +ENDIF + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JN=1,DALD%NCPL2M(KM) + IJ=(JN-1)/2 + PDIV(JN,II)=PDIV(JN,II)-ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,IR) + PU(JN,IR)=-FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,IR) + + PDIV(JN,IR)=PDIV(JN,IR)+ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,II) + PU(JN,II)=-FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,II) + + PVOR(JN,II)=PVOR(JN,II)-ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,IR) + PV(JN,IR)=FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,IR) + + PVOR(JN,IR)=PVOR(JN,IR)+ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,II) + PV(JN,II)=FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,II) + + ENDDO +ENDDO + +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PVOR(JN+1,J) = PVOR(JN+1,J)-ZIN*PU(JN ,J) + PVOR(JN ,J) = PVOR(JN ,J)+ZIN*PU(JN+1,J) + PDIV(JN+1,J) = PDIV(JN+1,J)-ZIN*PV(JN ,J) + PDIV(JN ,J) = PDIV(JN ,J)+ZIN*PV(JN+1,J) + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_MOD:EVDTUVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EVDTUVAD +END MODULE EVDTUVAD_MOD diff --git a/src/etrans/cpu/internal/suefft_mod.F90 b/src/etrans/cpu/internal/suefft_mod.F90 new file mode 100644 index 0000000..96d4879 --- /dev/null +++ b/src/etrans/cpu/internal/suefft_mod.F90 @@ -0,0 +1,114 @@ +MODULE SUEFFT_MOD +CONTAINS +SUBROUTINE SUEFFT + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, INIT_PLANS_FFTW +#endif +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_INIT, FFTB_TYPE +! + +USE TPMALD_FFT ,ONLY : TALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JGL,IGLG, ILATS +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEFFT_MOD:SUEFFT',0,ZHOOK_HANDLE) + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEFFT ===' + +#ifdef WITH_FFTW + IF(TW%LFFTW)THEN + + CALL INIT_PLANS_FFTW(MAX(R%NDLON+R%NNOEXTZL,R%NDGL+R%NNOEXTZG)) + + ELSE + + NULLIFY(TW%FFTW_PLANS) +#endif + + ALLOCATE(T%TRIGS(R%NDLON+R%NNOEXTZL,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%TRIGS ',SIZE(T%TRIGS),SHAPE(T%TRIGS) + ALLOCATE(T%NFAX(19,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%NFAX ',SIZE(T%NFAX),SHAPE(T%NFAX) + ALLOCATE(T%LUSEFFT992(D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%LUSEFFT992',SIZE(T%LUSEFFT992),SHAPE(T%LUSEFFT992) + + ! + ! create TRIGS and NFAX for latitude lengths supported by FFT992, + ! that is just with factors 2, 3 or 5 + ! + + T%LBLUESTEIN=.FALSE. + ILATS=0 + DO JGL=1,D%NDGL_FS + IGLG = D%NPTRLS(MYSETW)+JGL-1 + IF (G%NLOEN(IGLG)>1) THEN + CALL SET99B(T%TRIGS(1,JGL),T%NFAX(1,JGL),G%NLOEN(IGLG)+R%NNOEXTZL,T%LUSEFFT992(JGL)) + IF( .NOT.T%LUSEFFT992(JGL) )THEN + ILATS=ILATS+1 + T%LBLUESTEIN=.TRUE. + ENDIF + ENDIF + ENDDO + + ! + ! we only initialise for bluestein if there are latitude lengths + ! not supported by FFT992 + ! + + IF( T%LBLUESTEIN )THEN + TB%NDLON=R%NDLON + TB%NLAT_COUNT=ILATS + ILATS=0 + ALLOCATE(TB%NLATS(TB%NLAT_COUNT)) + DO JGL=1,D%NDGL_FS + IF( .NOT.T%LUSEFFT992(JGL) )THEN + ILATS=ILATS+1 + TB%NLATS(ILATS)=R%NDLON+R%NNOEXTZL + ENDIF + ENDDO + CALL BLUESTEIN_INIT(TB) + ENDIF + +#ifdef WITH_FFTW + + ENDIF +#endif + + IF(TALD%LFFT992)THEN + ALLOCATE(TALD%TRIGSE(R%NDGL+R%NNOEXTZG)) + IF(LLP2)WRITE(NOUT,9) 'TALD%TRIGSE ',SIZE(TALD%TRIGSE),SHAPE(TALD%TRIGSE) + ALLOCATE(TALD%NFAXE(19)) + IF(LLP2)WRITE(NOUT,9) 'TALD%NFAXE ',SIZE(TALD%NFAXE),SHAPE(TALD%NFAXE) + CALL SET99(TALD%TRIGSE,TALD%NFAXE,R%NDGL+R%NNOEXTZG) + ENDIF + +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEFFT_MOD:SUEFFT',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEFFT +END MODULE SUEFFT_MOD diff --git a/src/etrans/cpu/internal/suemp_trans_mod.F90 b/src/etrans/cpu/internal/suemp_trans_mod.F90 new file mode 100644 index 0000000..ae689f5 --- /dev/null +++ b/src/etrans/cpu/internal/suemp_trans_mod.F90 @@ -0,0 +1,267 @@ +MODULE SUEMP_TRANS_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS + +! Set up distributed environment for the transform package (part 2) +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, NPRTRNS, NPRTRV, NPRTRW, MYSETW, NPROC, MYPROC +USE TPMALD_DIM ,ONLY : RALD +!USE TPMALD_DISTR +!USE SUWAVEDI_MOD +!USE PE2SET_MOD +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUEMPLAT_MOD ,ONLY : SUEMPLAT +USE SUESTAONL_MOD ,ONLY : SUESTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JMLOC +INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM +INTEGER(KIND=JPIM) :: I1,I2,I3,IAUX0,IAUX1,JA1 +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF +INTEGER(KIND=JPIM), ALLOCATABLE :: IGPTOTL(:,:) + +REAL(KIND=JPRB) :: ZMEDIAP + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPRB),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',0,ZHOOK_HANDLE) +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS ===' + +IF(.NOT.D%LGRIDONLY) THEN + +ALLOCATE(D%NULTPP(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) +ALLOCATE(D%NPTRLS(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) +ALLOCATE(D%NPROCL(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) + +CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) +D%NDGL_FS = D%NULTPP(MYSETW) + +! Help arrays for spectral to fourier space transposition +ALLOCATE(D%NLTSGTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) +ALLOCATE(D%NLTSFTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) +ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) +ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) +ALLOCATE(D%MSTABF (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) + +D%NLTSGTB(:) = 0 +DO JGL=1,D%NDGL_FS + IGL = D%NPTRLS(MYSETW)+JGL-1 + DO JM=0,G%NMEN(IGL) + D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 + ENDDO +ENDDO +DO JA=1,NPRTRW + IPLAT = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA)+JGL-1 + DO JM=1,D%NUMP + IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN + IPLAT = IPLAT + 1 + ENDIF + ENDDO + ENDDO + D%NLTSFTB(JA) = IPLAT +ENDDO + +DO JA=1,NPRTRW-1 + ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) + IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) + D%MSTABF(IRECVSET) = ISENDSET +ENDDO +D%MSTABF(MYSETW) = MYSETW + +ALLOCATE(D%NPNTGTB0(0:RALD%NMSMAX,D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) +ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(MYSETW) + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 + IM = D%NALLMS(JML) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB0(IM,JGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB0(IM,JGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA) + JGL - 1 + DO JM=1,D%NUMP + IM = D%MYMS(JM) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB1(JM,IGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB1(JM,IGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + +IAUX0 = 0 +IAUX1 = 0 +DO JA=1,NPRTRNS-1 + I1 = MYSENDSET(NPRTRNS,MYSETW,JA) + I2 = MYRECVSET(NPRTRNS,MYSETW,JA) + DO JA1=1,NPRTRNS-1 + IF(MYSENDSET(NPRTRNS,MYSETW,JA1) == I2) I3 =MYRECVSET(NPRTRNS,MYSETW,JA1) + ENDDO + IAUX0 = MAX(D%NLTSFTB(I1),D%NLTSGTB(I2),IAUX0) + IAUX1 = MAX(D%NLTSGTB(I2),D%NLTSFTB(I3),IAUX1) +ENDDO +IAUX0 = MAX(D%NLTSGTB(MYSETW),IAUX0) +IAUX1 = MAX(D%NLTSGTB(MYSETW),IAUX1) +DO JA=1,NPRTRNS+1 + D%NSTAGT0B(JA) = (JA-1)*IAUX0 + D%NSTAGT1B(JA) = (JA-1)*IAUX1 +ENDDO +D%NLENGT0B = IAUX0*NPRTRNS +D%NLENGT1B = IAUX1*NPRTRNS + +ENDIF + +! GRIDPOINT SPACE + +ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) +ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) +ALLOCATE(D%NPTRLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) +ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) +ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) +ALLOCATE(D%LSPLITLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) +ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) + + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + ALLOCATE(ZDUM(1)) + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ELSE + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ENDIF +D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF + +IF (LLP1) THEN + IF(.NOT.D%LGRIDONLY) THEN + WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUEMPLAT: ''/)') + WRITE(NOUT,FMT='('' D%NULTPP '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) + WRITE(NOUT,FMT='('' D%NPROCL '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) + ENDIF + WRITE(NOUT,FMT='('' D%NFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') + WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF + WRITE(NOUT,FMT='('' D%NPTRLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%LSPLITLAT '')') + WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='(/)') +ENDIF +ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) +ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) +ELSE + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) +ENDIF +! IGPTOTL is the number of grid points in each individual processor +ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IGPTOTL(:,:)=0 +DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + IGPTOT = 0 + DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) + IGPTOT = IGPTOT+D%NONL(JGL,JB) + ENDDO + IGPTOTL(JA,JB) = IGPTOT + ENDDO +ENDDO +D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) +D%NGPTOTMX = MAXVAL(IGPTOTL) +D%NGPTOTG = SUM(IGPTOTL) +ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) +D%NGPTOTL(:,:) = IGPTOTL(:,:) + +IF(.NOT.D%LGRIDONLY) THEN +ALLOCATE(D%NSTAGTF(D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) +IOFF = 0 +DO JGL=1,D%NDGL_FS + D%NSTAGTF(JGL) = IOFF + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IOFF = IOFF + G%NLOEN(IGL)+3+R%NNOEXTZL +ENDDO +D%NLENGTF = IOFF +ENDIF + +IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) +DEALLOCATE(IGPTOTL) +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS +END MODULE SUEMP_TRANS_MOD + diff --git a/src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 b/src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 new file mode 100644 index 0000000..34f3fb7 --- /dev/null +++ b/src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 @@ -0,0 +1,240 @@ +MODULE SUEMP_TRANS_PRELEG_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS_PRELEG + +! Set up distributed environment for the transform package (part 1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW + +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD + +!USE SUWAVEDI_MOD +!USE ABORT_TRANS_MOD + +IMPLICIT NONE + + INTEGER(KIND=JPIM) :: JA,JM,JMLOC,JW,JV,ILATPP,IRESTL,IMLOC,IDT,INM,JN,IM,ILAST + + LOGICAL :: LLP1,LLP2 + + INTEGER(KIND=JPIM) :: ISPEC(NPRTRW),IMYMS(RALD%NMSMAX+1),IKNTMP(0:RALD%NMSMAX) + INTEGER(KIND=JPIM) :: IKMTMP(0:R%NSMAX),ISPEC2P + INTEGER(KIND=JPIM) :: IC(NPRTRW) + INTEGER(KIND=JPIM) :: IMDIM,IL,IND,IK,IPOS,IKM + REAL(KIND=JPRB) :: ZLEPDIM + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + ! ------------------------------------------------------------------ + + IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',0,ZHOOK_HANDLE) + + IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS_PRELEG ===' + + !* 1. Initialize partitioning of wave numbers to PEs ! + ! ---------------------------------------------- + + ALLOCATE(D%NASM0(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) + + ALLOCATE(DALD%NESM0(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NESM0 ',SIZE(DALD%NESM0 ),SHAPE(DALD%NESM0 ) + + ALLOCATE(D%NATM0(0:R%NTMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) + ALLOCATE(D%NUMPP(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) + ALLOCATE(D%NPOSSP(NPRTRW+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) + + ALLOCATE(D%NPROCM(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) + + ALLOCATE(DALD%NPME(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NPME',SIZE(DALD%NPME),SHAPE(DALD%NPME) + ALLOCATE(DALD%NCPL2M(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NCPL2M',SIZE(DALD%NCPL2M),SHAPE(DALD%NCPL2M) + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,IKNTMP,IKMTMP) + DALD%NPME(0)=1 + DO JM=1,RALD%NMSMAX + DALD%NPME(JM)=DALD%NPME(JM-1)+IKNTMP(JM-1)+1 + ENDDO + DO JM=0,RALD%NMSMAX + DALD%NCPL2M(JM) = 2*(IKNTMP(JM)+1) + ENDDO + ALLOCATE(FALD%RLEPINM(R%NSPEC_G/2)) + IF(LLP2)WRITE(NOUT,9) 'FALD%RLEPINM',SIZE(FALD%RLEPINM),SHAPE(FALD%RLEPINM) + DO JM=0,RALD%NMSMAX + DO JN=1,IKNTMP(JM) + ZLEPDIM=-((REAL(JM,JPRB)**2)*(GALD%EXWN**2)+& + & (REAL(JN,JPRB)**2)*(GALD%EYWN**2)) + FALD%RLEPINM(DALD%NPME(JM)+JN)=1./ZLEPDIM + ENDDO + ENDDO + DO JM=1,RALD%NMSMAX + ZLEPDIM=-(REAL(JM,JPRB)**2)*(GALD%EXWN**2) + FALD%RLEPINM(DALD%NPME(JM))=1./ZLEPDIM + ENDDO + FALD%RLEPINM(DALD%NPME(0))=0. + + D%NUMPP(:) = 0 + ISPEC(:) = 0 + DALD%NESM0(:)=-99 + + IMDIM = 0 + IL = 1 + IND = 1 + IK = 0 + IPOS = 1 + DO JM=0,RALD%NMSMAX + IK = IK + IND + IF (IK > NPRTRW) THEN + IK = NPRTRW + IND = -1 + ELSEIF (IK < 1) THEN + IK = 1 + IND = 1 + ENDIF + + IKM =DALD%NCPL2M(JM)/2 -1 + D%NPROCM(JM) = IK + ISPEC(IK) = ISPEC(IK)+IKM+1 + D%NUMPP(IK) = D%NUMPP(IK)+1 + IF (IK == MYSETW) THEN + IMDIM = IMDIM + IKM+1 + IMYMS(IL) = JM + DALD%NESM0(JM) = IPOS + IPOS = IPOS+(IKM+1)*4 + IL = IL+1 + ENDIF + ENDDO + D%NPOSSP(1) = 1 + ISPEC2P = 4*ISPEC(1) + D%NSPEC2MX = ISPEC2P + DO JA=2,NPRTRW + D%NPOSSP(JA) = D%NPOSSP(JA-1)+ISPEC2P + ISPEC2P = 4*ISPEC(JA) + D%NSPEC2MX=MAX(D%NSPEC2MX,ISPEC2P) + ENDDO + D%NPOSSP(NPRTRW+1) = D%NPOSSP(NPRTRW)+ISPEC2P + + D%NSPEC2 = 4*IMDIM + D%NSPEC=D%NSPEC2 + + D%NUMP = D%NUMPP (MYSETW) + ALLOCATE(D%MYMS(D%NUMP)) + IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) + D%MYMS(:) = IMYMS(1:D%NUMP) + D%NUMTP = D%NUMP + + ! pointer to the first wave number of a given wave-set in NALLMS array + ALLOCATE(D%NPTRMS(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) + D%NPTRMS(:) = 1 + DO JA=2,NPRTRW + D%NPTRMS(JA) = D%NPTRMS(JA-1)+D%NUMPP(JA-1) + ENDDO + ! D%NALLMS : wave numbers for all wave-set concatenated together to give all + ! wave numbers in wave-set order. + ALLOCATE(D%NALLMS(RALD%NMSMAX+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) + IC(:) = 0 + DO JM=0,RALD%NMSMAX + D%NALLMS(IC(D%NPROCM(JM))+D%NPTRMS(D%NPROCM(JM))) = JM + IC(D%NPROCM(JM)) = IC(D%NPROCM(JM))+1 + ENDDO + ALLOCATE(D%NDIM0G(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) + IPOS = 1 + DO JA=1,NPRTRW + DO JMLOC=1,D%NUMPP(JA) + IM = D%NALLMS(D%NPTRMS(JA)+JMLOC-1) + D%NDIM0G(IM) = IPOS + IPOS = IPOS+2*DALD%NCPL2M(IM) + ENDDO + ENDDO + +ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) +IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) +ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) +IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) + +D%NLATLS(:,:) = 9999 +D%NLATLE(:,:) = -1 + +ILATPP = R%NDGL/NPRTRW +IRESTL = R%NDGL-NPRTRW*ILATPP +DO JW=1,NPRTRW + IF (JW > IRESTL) THEN + D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JA-IRESTL-1)*ILATPP+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 + ELSE + D%NLATLS(JW,1) = (JA-1)*(ILATPP+1)+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP + ENDIF +ENDDO +ILAST=0 +DO JW=1,NPRTRW + ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV + IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP + DO JV=1,NPRTRV + IF (JV > IRESTL) THEN + D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 + ELSE + D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP + ENDIF + ENDDO + ILAST=D%NLATLE(JW,NPRTRV) +ENDDO +IF (LLP1) THEN + DO JW=1,NPRTRW + DO JV=1,NPRTRV + WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& + & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) + ENDDO + ENDDO +ENDIF + +ALLOCATE(D%NPMT(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) +ALLOCATE(D%NPMS(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) +ALLOCATE(D%NPMG(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) +IDT = R%NTMAX-R%NSMAX +INM = 0 +DO JMLOC=1,D%NUMP + IMLOC = D%MYMS(JMLOC) + + INM = INM+R%NTMAX+2-IMLOC +ENDDO +INM = 0 +DO JM=0,R%NSMAX + + INM = INM+R%NTMAX+2-JM +ENDDO + +D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 + +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS_PRELEG +END MODULE SUEMP_TRANS_PRELEG_MOD diff --git a/src/etrans/cpu/internal/suemplat_mod.F90 b/src/etrans/cpu/internal/suemplat_mod.F90 new file mode 100644 index 0000000..c06f316 --- /dev/null +++ b/src/etrans/cpu/internal/suemplat_mod.F90 @@ -0,0 +1,252 @@ +MODULE SUEMPLAT_MOD +CONTAINS +SUBROUTINE SUEMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,LDSPLITLAT,KMYPROC,KLOEN,KDGUX) + +!**** *SUEMPLAT * - Initialize gridpoint distrbution in N-S direction + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *SUEMPLAT * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction +! LDSPLIT -true for latitudes shared between sets +! PWEIGHT -weight per grid-point if weighted +! distribution +! LDEQ_REGIONS -true if eq_regions partitioning +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KRESTM -number of PEs with one extra point +! KFRSTLAT -first latitude row on processor +! KLSTLAT -last latitude row on processor +! KFRSTLOFF -offset for first latitude in set +! KPROCAGP -number of grid points per A set +! KPTRLAT -pointer to start of latitude +! KPTRFRSTLAT-pointer to first latitude +! KPTRLSTLAT -pointer to last latitude +! KPTRFLOFF -offset for pointer to first latitude +! LDSPLITLAT -true for latitudes which are split +! PMEDIAP -mean weight per PE if weighted +! distribution +! + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 20-Sep-2010 Phasing cy37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV + +USE SUEMPLATB_MOD ,ONLY : SUEMPLATB +USE SUMPLATBEQ_MOD ,ONLY : SUMPLATBEQ +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) +REAL(KIND=JPRB),INTENT(OUT) :: PMEDIAP +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) + +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(OUT) :: LDSPLITLAT(:) +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(KDGL) +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX + +! === END OF INTERFACE BLOCK === +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +INTEGER(KIND=JPIM) :: IPTRLATITUDE, JA, JGL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +LOGICAL :: LLFOURIER +LOGICAL :: LLDEBUG=.FALSE. + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUEMPLAT_MOD:SUEMPLAT',0,ZHOOK_HANDLE) + +INDIC(:)=0 +ILAST(:)=0 + +IF(LDWEIGHTED_DISTR.AND..NOT.LDEQ_REGIONS)THEN + CALL ABORT_TRANS ('SUEMPLAT: LDWEIGHTED_DISTR=T AND LDEQ_REGIONS=F NOT SUPPORTED') +ENDIF + +IF( LDEQ_REGIONS )THEN + CALL SUMPLATBEQ(1,KDGL,KPROC,KPROCA,KLOEN,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ELSE + LLFOURIER=.FALSE. +!REK commented out for now ... monkey business to be done again, should lead to the use of sumplatb +!REK CALL SUMPLATB(1,KDGL,KPROCA,G%NLOEN,LDSPLIT,LLFOURIER,& +!REK &KMEDIAP,KRESTM,INDIC,ILAST) + CALL SUEMPLATB(1,KDGL,KPROCA,KLOEN,LDSPLIT,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,INDIC,ILAST,KDGUX) +ENDIF + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': COMPUTATION OF +! KFRSTLAT TO LDSPLITLAT. +! --------------------------------------------- + +! * Computation of first and last latitude of processor sets +! ----------- in grid-point-space ----------------------- +IF(KMYPROC==1.AND.LLDEBUG)THEN + WRITE(0,'("")') + WRITE(0,'("SUEMPLAT_MOD:LDWEIGHTED_DISTR=",L1)')LDWEIGHTED_DISTR + WRITE(0,'("")') + DO JA=1,KPROCA + WRITE(0,'("SUEMPLAT_MOD: JA=",I3," ILAST=",I3," INDIC=",I3)')& + &JA,ILAST(JA),INDIC(JA) + ENDDO + WRITE(0,'("")') + IF( LDEQ_REGIONS .AND. LDSPLIT )THEN + DO JA=1,KPROCA + WRITE(0,'("SUEMPLAT_MOD: JA=",I3," KPROCAGP=",I8)')& + &JA,KPROCAGP(JA) + ENDDO + WRITE(0,'("")') + ENDIF +ENDIF +KFRSTLAT(1) = 1 +KLSTLAT(KPROCA) = KDGL +DO JA=1,KPROCA-1 + IF(KMYPROC==1 .AND. NPRINTLEV > 1)THEN + WRITE(NOUT,'("SUEMPLAT_MOD: JA=",I3," ILAST=",I3," INDIC=",I3)')& + &JA,ILAST(JA),INDIC(JA) + ENDIF + IF ((.NOT. LDSPLIT) .OR. INDIC(JA) == 0) THEN + KFRSTLAT(JA+1) = ILAST(JA) + 1 + KLSTLAT(JA) = ILAST(JA) + ELSE + KFRSTLAT(JA+1) = INDIC(JA) + KLSTLAT(JA) = INDIC(JA) + ENDIF +ENDDO +KFRSTLOFF=KFRSTLAT(KMYSETA)-1 + +! * Initialise following data structures:- +! NPTRLAT (pointer to the start of each latitude) +! LSPLITLAT (TRUE if latitude is split over two A sets) +! NPTRFRSTLAT (pointer to the first latitude of each A set) +! NPTRLSTLAT (pointer to the last latitude of each A set) + +DO JGL=1,KDGL + KPTRLAT (JGL)=-999 + LDSPLITLAT(JGL)=.FALSE. +ENDDO +IPTRLATITUDE=0 +DO JA=1,KPROCA + DO JGL=KFRSTLAT(JA),KLSTLAT(JA) + IPTRLATITUDE=IPTRLATITUDE+1 + LDSPLITLAT(JGL)=.TRUE. + IF( KPTRLAT(JGL) == -999 )THEN + KPTRLAT(JGL)=IPTRLATITUDE + LDSPLITLAT(JGL)=.FALSE. + ENDIF + ENDDO +ENDDO +DO JA=1,KPROCA + IF( LDSPLITLAT(KFRSTLAT(JA)) .AND. JA /= 1 )THEN + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA))+1 + ELSE + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA)) + ENDIF + IF( LDSPLITLAT(KLSTLAT(JA)) .AND. JA == KPROCA)THEN + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA))+1 + ELSE + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA)) + ENDIF +ENDDO +KPTRFLOFF=KPTRFRSTLAT(KMYSETA)-1 +IF(KMYPROC==1 .AND. NPRINTLEV > 1)THEN + DO JGL=1,KDGL + WRITE(NOUT,'("SUEMPLAT_MOD: JGL=",I3," KPTRLAT=",I3," LDSPLITLAT=",L4)')& + & JGL,KPTRLAT(JGL),LDSPLITLAT(JGL) + ENDDO + DO JA=1,KPROCA + WRITE(NOUT,'("SUEMPLAT_MOD: JA=",I3," KFRSTLAT=",I3," KLSTLAT=",I3,& + & " KPTRFRSTLAT=",I3," KPTRLSTLAT=",I3)')& + & JA,KFRSTLAT(JA),KLSTLAT(JA),KPTRFRSTLAT(JA),KPTRLSTLAT(JA) + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMPLAT_MOD:SUEMPLAT',1,ZHOOK_HANDLE) +END SUBROUTINE SUEMPLAT +END MODULE SUEMPLAT_MOD + diff --git a/src/etrans/cpu/internal/suemplatb_mod.F90 b/src/etrans/cpu/internal/suemplatb_mod.F90 new file mode 100644 index 0000000..a736177 --- /dev/null +++ b/src/etrans/cpu/internal/suemplatb_mod.F90 @@ -0,0 +1,236 @@ +MODULE SUEMPLATB_MOD +CONTAINS +SUBROUTINE SUEMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,KINDIC,KLAST,KDGUX) + +!**** *SUMPLATB * - Routine to initialize parallel environment + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *SUMPLATB * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! KDGUX -last latitude for meaningful computations +! (suggested to pass NDGUX in gp-space, NDGL in Fourier space +! for having a good load-balance) +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution` + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' +! PMEDIAP -mean weight per PE if weighted distribution + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! K. YESSAD (after old version of sumplat.F). + +! Modifications. +! -------------- +! Original : 98-12-07 +! G. Radnoti: 03-03-03: Semi-merge with sumplatb, only difference: +! NS-partitioning according to NDGUX +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 21-Sep-2010 phasing CY37 +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(KPROCA) +REAL(KIND=JPRB),INTENT(IN) :: PMEDIAP + +INTEGER(KIND=JPIM) :: IPP1(KPROCA),ILAST1(KPROCA) +INTEGER(KIND=JPIM) :: IPP(KPROCA) +INTEGER(KIND=JPIM) :: IFIRST(KPROCA) + +INTEGER(KIND=JPIM) :: ICOMP, IGL, IMAXI, IMAXIOL, IMEDIA, ITOT, JA, JGL,& + & ILAST,IREST,ILIMIT,IFRST +LOGICAL :: LLDONE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- + +! * Computation of KMEDIAP and KRESTM. + +IF (LHOOK) CALL DR_HOOK('SUEMPLATB_MOD:SUEMPLATB',0,ZHOOK_HANDLE) +IF (LDWEIGHTED_DISTR) THEN + CALL ABORT_TRANS ('SUMPLATBEQ: ALADIN CODE IS NOT PREPARED FOR WEIGHTED DISTRIBUTION') +ENDIF +IMEDIA = SUM(KLOENG(KDGSA:KDGUX)) +KMEDIAP = IMEDIA / KPROCA +IF (KMEDIAP < KLOENG(KDGL/2)) THEN + CALL ABORT_TRANS ('SUMPLATB: KPROCA TOO BIG FOR THIS RESOLUTION') +ENDIF +KRESTM = IMEDIA - KMEDIAP * KPROCA +IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +! * Computation of intermediate quantities KINDIC and KLAST + +IF (LDSPLIT) THEN + + IREST = 0 + ILAST =0 + DO JA=1,KPROCA + IF (JA <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = KMEDIAP + ELSE + ICOMP = KMEDIAP - 1 + ENDIF + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGUX + ILAST = JGL + IF(ITOT+KLOENG(JGL) < ICOMP) THEN + ITOT = ITOT+KLOENG(JGL) + ELSEIF(ITOT+KLOENG(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = KLOENG(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + KLAST(KPROCA)=KDGL + KINDIC(KPROCA)=0 +ELSE + + KINDIC(:) = 0 + + IMAXI = KMEDIAP-1 + IMAXIOL = HUGE(IMAXIOL) + DO + ILIMIT = IMAXI + IMAXI = 0 + IFRST = KDGUX + ILAST1(:) = 0 + IPP1(:) = 0 + DO JA=KPROCA,1,-1 + IGL = IFRST + LATS:DO JGL=IGL,1,-1 + IF (IPP1(JA) < ILIMIT .OR. JA == 1) THEN + IFRST = JGL-1 + IPP1(JA) = IPP1(JA) + KLOENG(JGL) + IF(ILAST1(JA) == 0) ILAST1(JA) = JGL + ELSE + EXIT LATS + ENDIF + ENDDO LATS + IMAXI = MAX (IMAXI,IPP1(JA)) + ENDDO + IF(IMAXI >= IMAXIOL) EXIT + KLAST(:) = ILAST1(:) + IPP(:) = IPP1(:) + IMAXIOL = IMAXI + ENDDO + +! make the distribution more uniform +! ---------------------------------- + + IFIRST(1) = 0 + IF (KLAST(1) > 0) IFIRST(1) = 1 + DO JA=2,KPROCA + IF (IPP(JA) > 0) THEN + IFIRST(JA) = KLAST(JA-1)+1 + ELSE + IFIRST(JA) = 0 + ENDIF + ENDDO + + LLDONE = .FALSE. + DO WHILE( .NOT.LLDONE ) + LLDONE = .TRUE. + + DO JA=1,KPROCA-1 + IF (IPP(JA) > IPP(JA+1)) THEN + IF (IPP(JA)-IPP(JA+1) > IPP(JA+1) + 2 *& + & KLOENG(KLAST(JA)) -IPP(JA) ) THEN + IPP(JA) = IPP(JA) - KLOENG(KLAST(JA)) + IPP(JA+1) = IPP(JA+1) + KLOENG(KLAST(JA)) + IF (KLAST(JA+1) == 0) KLAST(JA+1) = KLAST(JA) + IFIRST(JA+1) = KLAST(JA) + KLAST(JA) = KLAST(JA) - 1 + IF (KLAST(JA) == 0) IFIRST(JA) = 0 + LLDONE = .FALSE. + ENDIF + ELSE + IF( IFIRST(JA+1) > 0 )THEN + IF (IPP(JA+1)-IPP(JA) >= IPP(JA) + 2 *& + & KLOENG(IFIRST(JA+1)) -IPP(JA+1) ) THEN + IPP(JA) = IPP(JA) + KLOENG(IFIRST(JA+1)) + IPP(JA+1) = IPP(JA+1) - KLOENG(IFIRST(JA+1)) + KLAST(JA) = IFIRST(JA+1) + IF (IFIRST(JA) == 0) IFIRST(JA) = KLAST(JA) + IF (KLAST(JA+1) == KLAST(JA)) THEN + KLAST(JA+1) = 0 + IFIRST(JA+1) = 0 + ELSE + IFIRST(JA+1) = IFIRST(JA+1) + 1 + ENDIF + LLDONE = .FALSE. + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + KLAST(KPROCA)=KDGL +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMPLATB_MOD:SUEMPLATB',1,ZHOOK_HANDLE) +END SUBROUTINE SUEMPLATB +END MODULE SUEMPLATB_MOD diff --git a/src/etrans/cpu/internal/suestaonl_mod.F90 b/src/etrans/cpu/internal/suestaonl_mod.F90 new file mode 100644 index 0000000..7cd384d --- /dev/null +++ b/src/etrans/cpu/internal/suestaonl_mod.F90 @@ -0,0 +1,451 @@ +MODULE SUESTAONL_MOD +CONTAINS +SUBROUTINE SUESTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) + +!**** *SUESTAONL * - Routine to initialize parallel environment, TAL + +! Purpose. +! -------- +! Initialize D%NSTA and D%NONL. +! Calculation of distribution of grid points to processors : +! Splitting of grid in B direction + +!** Interface. +! ---------- +! *CALL* *SUESTAONL * + +! Explicit arguments : +! -------------------- +! KMEDIAP - mean number of grid points per PE +! KRESTM - number of PEs with one extra point +! LDWEIGHTED_DISTR -true if weighted distribution +! PWEIGHT -weight per grid-point if weighted +! distribution +! PMEDIAP -mean weight per PE if weighted +! distribution +! KPROCAGP -number of grid points per A set +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) +! 03-03-03 G. Radnoti: no merge: only difference with +! sustaonl: ezone added to last a-set +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! A.Bogatchev Sep-2010 phasing for AL37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! R. El Khatib 26-Apr-2018 vectorization +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC +USE TPMALD_DIM ,ONLY : RALD +USE SET2PE_MOD ,ONLY : SET2PE +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_NS, N_REGIONS_EW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR +REAL(KIND=JPRB),INTENT(IN) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) + +INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL) +INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) + +INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, IDWIDE, & + & IGL, IGL1, IGL2, IGLOFF, IGPTA, & + & IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & + & ILSEND, INPLAT, INXLAT, IPOS, & + & IPROCB, IPTSRE, IRECV, & + & IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & + & ILAT, ILON, ILOEN +INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWEIGHT(:,:) +INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) + +LOGICAL :: LLABORT +LOGICAL :: LLP1,LLP2 + +REAL(KIND=JPRB) :: ZLAT, ZLAT1(R%NDGL), ZCOMP +REAL(KIND=JPRB) :: ZDIVID(R%NDGL),ZXPTLAT(R%NDGL) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ----------------------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',0,ZHOOK_HANDLE) +IXPTLAT (:)=999999 +ILSTPTLAT(:)=999999 +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IDWIDE = R%NDGL/2 +IBUFLEN = R%NDGL*N_REGIONS_EW*2 +IDGLG = R%NDGL + +I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) +I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) + +ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 + +IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) + + +IF (D%LSPLIT) THEN + IF( LEQ_REGIONS )THEN + IGPTA=0 + DO JA=1,MY_REGION_NS-1 + IGPTA = IGPTA + KPROCAGP(JA) + ENDDO + IGPTS = KPROCAGP(MY_REGION_NS) + ELSE + IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ELSE + IGPTS = KMEDIAP+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ENDIF + ELSE + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP-1 + IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) + ELSE + IGPTS = KMEDIAP-1+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*KRESTM+(KMEDIAP-1)*(MY_REGION_NS-1-KRESTM) + ENDIF + ENDIF + ENDIF +ELSE + IGPTA = IGPTPRSETS + IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) +ENDIF +IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) +IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP +IXPTLAT(1) = IGPTA-IGPTPRSETS+1 +ZXPTLAT(1) = REAL(IXPTLAT(1)) +ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) +INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 +DO JGL=2,ILEN + IXPTLAT(JGL) = 1 + ZXPTLAT(JGL) = 1.0_JPRB + ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) + INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) +ENDDO +ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS + +DO JB=1,N_REGIONS_EW + DO JGL=1,R%NDGL+N_REGIONS_NS-1 + D%NSTA(JGL,JB) = 0 + D%NONL(JGL,JB) = 0 + ENDDO +ENDDO + +! grid point decomposition +! --------------------------------------- +DO JGL=1,ILEN + ZDIVID(JGL)=1._JPRB/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRB) +ENDDO +IF( LDWEIGHTED_DISTR )THEN + ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) + IGL=0 + DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IGL=IGL+1 + ZWEIGHT(JL,JGL)=PWEIGHT(IGL) + ENDDO + ENDDO + ZCOMP=0 + IGPTS=0 +ENDIF +DO JB=1,N_REGIONS(MY_REGION_NS) + + IF( .NOT.LDWEIGHTED_DISTR )THEN + + IF (JB <= IREST) THEN + IPTSRE = IGPTSP+1 + ELSE + IPTSRE = IGPTSP + ENDIF + + DO JNPTSRE=1,IPTSRE + ZLAT = 1._JPRB + DO JGL=1,ILEN + ZLAT1(JGL) = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + ENDDO + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + IF (ZLAT1(JGL) < ZLAT) THEN + ZLAT=ZLAT1(JGL) + INXLAT = JGL + ENDIF + ENDIF + ENDDO + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ELSE + DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & + & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) + + IGPTS = IGPTS + 1 + ZLAT = 1._JPRB + DO JGL=1,ILEN + ZLAT1(JGL) = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + ENDDO + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + IF (ZLAT1(JGL) < ZLAT) THEN + ZLAT = ZLAT1(JGL) + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN + CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') + ENDIF + ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 + ILOEN=G%NLOEN(ILAT) + IF(ILON<1.OR.ILON>ILOEN)THEN + CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') + ENDIF + ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ZCOMP = ZCOMP - PMEDIAP + + ENDIF + +ENDDO + +IF( LDWEIGHTED_DISTR )THEN + DEALLOCATE(ZWEIGHT) +ENDIF +! Exchange local partitioning info to produce global view + +IF( NPROC > 1 )THEN + IF( LEQ_REGIONS )THEN + + ITAG = MTAGPART + IPOS = 0 + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + ILENG(NPRCIDS(IRECV))=ILEN + ENDDO + ENDDO + IOFF(1)=0 + DO JJ=2,NPROC + IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) + ENDDO + ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) + CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') + DO JA=1,N_REGIONS_NS + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + IPOS = IOFF(NPRCIDS(IRECV)) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUFG(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUFG(IPOS) + ENDDO + ENDIF + ENDDO + ENDDO + DEALLOCATE(ICOMBUFG) + + ELSE + + ITAG = MTAGPART + IPOS = 0 + DO JB=1,N_REGIONS(MY_REGION_NS) + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) + ENDDO + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUESTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) + IF(ISEND /= MYPROC) THEN + CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & + & CDSTRING='SUESTAONL:') + ENDIF + ENDDO + DO JA=1,N_REGIONS_NS + CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 + CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & + & KOUNT=ILRECV,CDSTRING='SUESTAONL:') + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IPOS = 0 + DO JB=1,N_REGIONS(JA) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUF(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUF(IPOS) + ENDDO + ENDDO + ENDIF + ENDDO + + ENDIF +ENDIF + +! Confirm consistency of global partitioning, specifically testing for +! multiple assignments of same grid point and unassigned grid points + +LLABORT = .FALSE. +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + ICHK(JL,JGL) = 1 + ENDDO +ENDDO +DO JA=1,N_REGIONS_NS + IGLOFF = D%NPTRFRSTLAT(JA) + DO JB=1,N_REGIONS(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL = IGLOFF+JGL-IGL1 + DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + IF( ICHK(JL,JGL) /= 1 )THEN + WRITE(NOUT,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " row=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + WRITE(0,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " ROW=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + LLABORT = .TRUE. + ENDIF + ICHK(JL,JGL) = 2 + ENDDO + ENDDO + ENDDO +ENDDO +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IF( ICHK(JL,JGL) /= 2 )THEN + WRITE(NOUT,'(" SUESTAONL : row=",i4," sta=",i4,& + & " GRID POINT NOT ASSIGNED")') JGL,JL + LLABORT = .TRUE. + ENDIF + ENDDO +ENDDO +IF( LLABORT )THEN + WRITE(NOUT,'(" SUESTAONL : inconsistent partitioning")') + CALL ABORT_TRANS(' SUESTAONL: inconsistent partitioning') +ENDIF + +IF (LLP1) THEN + WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUESTAONL '')') + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') + WRITE(UNIT=NOUT,FMT='('' '')') + IPROCB = MIN(32,N_REGIONS_EW) + WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I3))') (JB,JB=1,IPROCB) + DO JA=1,N_REGIONS_NS + IPROCB = MIN(32,N_REGIONS(JA)) + WRITE(UNIT=NOUT,FMT='('' '')') + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL=IGLOFF+JGL-IGL1 + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," NSTA=",& + & 32(1X,I3))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," D%NONL=",& + & 32(1X,I3))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' '')') +ENDIF +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE SUESTAONL +END MODULE SUESTAONL_MOD diff --git a/src/etrans/cpu/internal/tpmald_dim.F90 b/src/etrans/cpu/internal/tpmald_dim.F90 new file mode 100644 index 0000000..7163342 --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_dim.F90 @@ -0,0 +1,23 @@ +MODULE TPMALD_DIM + +! Module for dimensions. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDDIM_TYPE + +! COLLOCATION GRID DIMENSIONS + +INTEGER(KIND=JPIM) :: NDGLSUR ! Number of rows of latitudes+... +INTEGER(KIND=JPIM) :: NMSMAX ! Zonal truncation +INTEGER(KIND=JPIM) :: NDGUX ! Number of rows in zone C+I +END TYPE ALDDIM_TYPE + +TYPE(ALDDIM_TYPE),ALLOCATABLE,TARGET :: ALDDIM_RESOL(:) +TYPE(ALDDIM_TYPE),POINTER :: RALD + +END MODULE TPMALD_DIM diff --git a/src/etrans/cpu/internal/tpmald_distr.F90 b/src/etrans/cpu/internal/tpmald_distr.F90 new file mode 100644 index 0000000..9f358db --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_distr.F90 @@ -0,0 +1,23 @@ +MODULE TPMALD_DISTR + +! Module for distributed memory environment. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDDISTR_TYPE + +INTEGER(KIND=JPIM) ,POINTER :: NESM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,POINTER :: NCPL2M(:) ! Number of complex Laplace coefficient for m given +INTEGER(KIND=JPIM) ,POINTER :: NPME(:) ! Address for the Laplace operator and its inverse + +END TYPE ALDDISTR_TYPE + +TYPE(ALDDISTR_TYPE),ALLOCATABLE,TARGET :: ALDDISTR_RESOL(:) +TYPE(ALDDISTR_TYPE),POINTER :: DALD + +END MODULE TPMALD_DISTR + diff --git a/src/etrans/cpu/internal/tpmald_fft.F90 b/src/etrans/cpu/internal/tpmald_fft.F90 new file mode 100644 index 0000000..337dade --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_fft.F90 @@ -0,0 +1,20 @@ +MODULE TPMALD_FFT + +! Module for Fourier transforms. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDFFT_TYPE +REAL(KIND=JPRB) ,POINTER :: TRIGSE(:) ! list of trigonometric function values +INTEGER(KIND=JPIM),POINTER :: NFAXE(:) ! list of factors of truncation +LOGICAL :: LFFT992=.TRUE. +END TYPE ALDFFT_TYPE + +TYPE(ALDFFT_TYPE),ALLOCATABLE,TARGET :: ALDFFT_RESOL(:) +TYPE(ALDFFT_TYPE),POINTER :: TALD + +END MODULE TPMALD_FFT diff --git a/src/etrans/cpu/internal/tpmald_fields.F90 b/src/etrans/cpu/internal/tpmald_fields.F90 new file mode 100644 index 0000000..9dfda6d --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_fields.F90 @@ -0,0 +1,17 @@ +MODULE TPMALD_FIELDS + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDFIELDS_TYPE + +REAL(KIND=JPRB) ,POINTER :: RLEPINM(:) ! eigen-values of the inverse Laplace operator +END TYPE ALDFIELDS_TYPE + +TYPE(ALDFIELDS_TYPE),ALLOCATABLE,TARGET :: ALDFIELDS_RESOL(:) +TYPE(ALDFIELDS_TYPE),POINTER :: FALD + +END MODULE TPMALD_FIELDS diff --git a/src/etrans/cpu/internal/tpmald_geo.F90 b/src/etrans/cpu/internal/tpmald_geo.F90 new file mode 100644 index 0000000..326739a --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_geo.F90 @@ -0,0 +1,22 @@ +MODULE TPMALD_GEO + +! Module containing data describing plane projection grid. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDGEO_TYPE + +! GEOGRAPHY + +REAL(KIND=JPRB) :: EYWN ! Y-reso +REAL(KIND=JPRB) :: EXWN ! X-reso +END TYPE ALDGEO_TYPE + +TYPE(ALDGEO_TYPE),ALLOCATABLE,TARGET :: ALDGEO_RESOL(:) +TYPE(ALDGEO_TYPE),POINTER :: GALD + +END MODULE TPMALD_GEO diff --git a/src/etrans/cpu/internal/tpmald_tcdis.F90 b/src/etrans/cpu/internal/tpmald_tcdis.F90 new file mode 100644 index 0000000..2b57ca5 --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_tcdis.F90 @@ -0,0 +1,13 @@ +MODULE TPMALD_TCDIS + +! useless + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRB) :: TCDIS + +END MODULE TPMALD_TCDIS diff --git a/src/etrans/gpu/CMakeLists.txt b/src/etrans/gpu/CMakeLists.txt new file mode 100644 index 0000000..608b66b --- /dev/null +++ b/src/etrans/gpu/CMakeLists.txt @@ -0,0 +1,113 @@ +# Create library etrans_${prec}.so + + + +if(CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC") + + # Compile setup_trans with pinned memory to improve data movement performance. + ectrans_add_compile_options( + SOURCES external/setup_trans.F90 + #FLAGS "-gpu=pinned,deepcopy,fastmath,nordc") + FLAGS "-gpu=pinned,fastmath") + # TODO: check if it is sufficient to only set "-gpu=pinned" which appends rather than overwrites + +endif() + +# list of source files +ecbuild_list_add_pattern( LIST etrans_src + GLOB + external/*.F90 + internal/*.F90 + aux/*.F90 + QUIET + )# + +#set_source_files_properties( internal/ftinv_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) +#set_source_files_properties( internal/ftdir_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) + + +foreach( prec sp dp ) + if( HAVE_${prec} ) + foreach( gpumethod acc) + if( HAVE_${gpumethod}) + + # We build an object library first. And then use these objects to create a shared and static library. + + ectrans_add_library( + TARGET etrans_gpu_object_${prec}_${gpumethod} + TYPE OBJECT + SOURCES ${etrans_src} + PUBLIC_INCLUDES + $ + $ + $ + $ + $ + $ + + PUBLIC_LIBS + fiat + parkind_${prec} + + PRIVATE_LIBS hip::hipfft + roc::hipblas + roc::rocblas + roc::rocfft + MPI::MPI_Fortran + mpifort + omptarget + #MPI::MPI_C + MPI::MPI_CXX + #nvhpcwrapnvtx + #${LAPACK_LIBRARIES} + ) + + target_link_options ( etrans_gpu_object_${prec}_${gpumethod} INTERFACE "$<$:${${gpumethod}_link_flags}>" ) + target_compile_options( etrans_gpu_object_${prec}_${gpumethod} PUBLIC $<$:${${gpumethod}_flags}>) + + ectrans_target_fortran_module_directory( + TARGET etrans_gpu_object_${prec}_${gpumethod} + #MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/etrans_gpu_${prec}_${gpumethod} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/etrans_gpu_${prec}_${gpumethod} + INSTALL_DIRECTORY module/etrans_gpu_${prec}_${gpumethod} + ) + + if( HAVE_OMP ) + target_link_libraries( etrans_gpu_object_${prec}_${gpumethod} PRIVATE OpenMP::OpenMP_Fortran ) + endif() + + if( prec STREQUAL sp ) + target_compile_definitions( etrans_gpu_object_${prec}_${gpumethod} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) + endif() + + if( gpumethod STREQUAL acc ) + target_compile_definitions( etrans_gpu_object_${prec}_${gpumethod} PRIVATE ACCGPU ) + target_link_libraries( etrans_gpu_object_${prec}_${gpumethod} PRIVATE OpenACC::OpenACC_Fortran ) + endif() + + if( gpumethod STREQUAL omp ) + target_compile_definitions( etrans_gpu_object_${prec}_${gpumethod} PRIVATE OMPGPU ) + endif() + + if( HAVE_GPU_AWARE_MPI ) + target_compile_definitions( etrans_gpu_object_${prec}_${gpumethod} PRIVATE USE_CUDA_AWARE_MPI_FT ) + endif() + + ectrans_add_library( + TARGET etrans_gpu_shared_${prec}_${gpumethod} + OUTPUT_NAME etrans_gpu_${prec}_${gpumethod} + TYPE SHARED + LINKER_LANGUAGE Fortran + PUBLIC_LIBS etrans_gpu_object_${prec}_${gpumethod} + ) + + ectrans_add_library( + TARGET etrans_gpu_${prec}_${gpumethod} + TYPE STATIC + LINKER_LANGUAGE Fortran + PUBLIC_LIBS etrans_gpu_object_${prec}_${gpumethod} + ) + endif() + endforeach() + endif() +endforeach() diff --git a/src/etrans/gpu/aux/ellips.F90 b/src/etrans/gpu/aux/ellips.F90 new file mode 100644 index 0000000..e3af473 --- /dev/null +++ b/src/etrans/gpu/aux/ellips.F90 @@ -0,0 +1,8 @@ +! Oct-2012 P. Marguinaud 64b LFI + +#undef JLIK +#undef _ELLIPS_ +#define JLIK JPIM +#define _ELLIPS_ ELLIPS +#include "ellips.h" + diff --git a/src/etrans/gpu/aux/ellips.h b/src/etrans/gpu/aux/ellips.h new file mode 100644 index 0000000..b196ebf --- /dev/null +++ b/src/etrans/gpu/aux/ellips.h @@ -0,0 +1,87 @@ +! Jan-2011 P. Marguinaud Interface to thread-safe FA +SUBROUTINE _ELLIPS_ (KSMAX,KMSMAX,KNTMP,KMTMP) +USE PARKIND1, ONLY : JPRB, JPIM, JPIB, JPRD +USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +!USE LFI_PRECISION +IMPLICIT NONE +! +! ***ELLIPS*** - General routine for computing elliptic truncation +! +! Purpose. +! -------- +! Computation of zonal and meridional limit wavenumbers within the ellipse +! Interface: +! ---------- +! *CALL* *ELLIPS * +! +! Explicit arguments : +! -------------------- +! +! Implicit arguments : +! -------------------- +! +! +! Method. +! ------- +! See documentation +! +! Externals. NONE. +! ---------- +! +! Reference. +! ---------- +! ARPEGE/ALADIN documentation +! +! Author. +! ------- +! G. Radnoti LACE 97/04/04 +! +! Modifications. +!------------------------------------------------------------- +! J.Vivoda, 99/05/19 treating NSMAX=0 and NMSMAX=0 +! +! +INTEGER (KIND=JLIK) KSMAX, KMSMAX +INTEGER (KIND=JLIK) KNTMP(0:KMSMAX),KMTMP(0:KSMAX) +! +INTEGER (KIND=JLIK) JM, JN +! +REAL (KIND=JPRD) ZEPS, ZKN, ZKM, ZAUXIL +! +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('ELLIPS',0,ZHOOK_HANDLE) +ZEPS=1.E-10 +ZAUXIL=0. +! +! 1. Computing meridional limit wavenumbers along zonal wavenumbers +! +DO JM=1,KMSMAX-1 +ZKN = REAL(KSMAX,JPRD)/REAL(KMSMAX,JPRD)* & +& SQRT(MAX(ZAUXIL,REAL(KMSMAX**2-JM**2,JPRD))) + KNTMP(JM)=INT(ZKN+ZEPS, JLIK) +ENDDO + +IF( KMSMAX.EQ.0 )THEN + KNTMP(0)=KSMAX +ELSE + KNTMP(0)=KSMAX + KNTMP(KMSMAX)=0 +ENDIF +! +! 2. Computing zonal limit wavenumbers along meridional wavenumbers +! +DO JN=1,KSMAX-1 + ZKM = REAL(KMSMAX,JPRD)/REAL(KSMAX,JPRD)* & + & SQRT(MAX(ZAUXIL,REAL(KSMAX**2-JN**2,JPRD))) + KMTMP(JN)=INT(ZKM+ZEPS, JLIK) +ENDDO + +IF( KSMAX.EQ.0 )THEN + KMTMP(0)=KMSMAX +ELSE + KMTMP(0)=KMSMAX + KMTMP(KSMAX)=0 +ENDIF +! +IF (LHOOK) CALL DR_HOOK('ELLIPS',1,ZHOOK_HANDLE) +END diff --git a/src/etrans/gpu/aux/ellips64.F90 b/src/etrans/gpu/aux/ellips64.F90 new file mode 100644 index 0000000..0839382 --- /dev/null +++ b/src/etrans/gpu/aux/ellips64.F90 @@ -0,0 +1,8 @@ +! Oct-2012 P. Marguinaud 64b LFI + +#undef JLIK +#undef _ELLIPS_ +#define JLIK JPIB +#define _ELLIPS_ ELLIPS64 +#include "ellips.h" + diff --git a/src/etrans/gpu/aux/extper_mod.F90 b/src/etrans/gpu/aux/extper_mod.F90 new file mode 100644 index 0000000..8135d80 --- /dev/null +++ b/src/etrans/gpu/aux/extper_mod.F90 @@ -0,0 +1,144 @@ +MODULE EXTPER_MOD +CONTAINS +SUBROUTINE EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& + & KPOINTERS,KALFA) + +! purpose : +! -------- +! Make spline extension. + +! *CALL* *EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& +! & KPOINTERS,KALFA) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! PWORK : Input: values in C U I area +! : Output: input+(spline extension on the E area) +! KDIM : Dimension of the C U I U E unit of work (one row or one m) +! KPSTA : Position where the unit of work starts +! KPOINTS : Position where the unit of work ends +! KFLDS : number of 2D fields +! KUNITS : Number of units of work +! KPOINTERS : Array of pointers for the units of work +! KALFA : boundary condition of a spline: +! = 0 ... natural spline +! = 1 ... boundary condition computed differentially +! (additional option) +! references : +! ---------- + +! author : +! ------ +! M. Hortal 03-11-2009 +! ----------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DISTR + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KDIM +INTEGER(KIND=JPIM),INTENT(IN) :: KPSTA +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTS +INTEGER(KIND=JPIM),INTENT(IN) :: KFLDS +INTEGER(KIND=JPIM),INTENT(IN) :: KUNITS +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTERS(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KALFA + +! arrays : +! -------- +INTEGER(KIND=JPIM) :: IENDX, IENDY, JFL, JLAT, JLON, IA + +REAL(KIND=JPRB) :: ZA, ZB, ZC, ZD, ZEPSA, ZEPSB, ZJ, ZK, ZKP1,& + & ZLAM, ZLAMB, ZM1, ZM2, ZMM, ZNY +REAL(KIND=JPRB) :: ZMAX(KUNITS), ZMIN(KUNITS) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EXTPER',0,ZHOOK_HANDLE) + +!* 0. Security +! -------- + +IF(UBOUND(PWORK,1) < KFLDS) THEN + CALL ABOR1(' EXTPER, PWORK first dimension too small') +ENDIF +IF(UBOUND(PWORK,2) < KDIM+2) THEN + WRITE(NOUT,*) ' UBOUND(PWORK,2)=',UBOUND(PWORK,2),' KDIM=',KDIM,' KUNITS=',& + &KUNITS + CALL ABOR1(' EXTPER, PWORK second dimension too small') +ENDIF +IF(UBOUND(KPOINTERS,1) < KUNITS) THEN + CALL ABOR1(' EXTPER, KPOINTERS too small') +ENDIF +IF(UBOUND(PWORK,2) < KPOINTERS(KUNITS)+KDIM) THEN + WRITE(NERR,*) ' EXTPER, KUNITS=',KUNITS,' KPOINTERS=',KPOINTERS(1:KUNITS),& + &' KDIM=',KDIM,' UBOUND(PWORK,2)=',UBOUND(PWORK,2) + CALL ABOR1(' EXTPER, value of KPOINTERS too large') +ENDIF + +!* 1. Spline Extension. +! ------------------- + +DO JFL = 1, KFLDS + + ZK = REAL(KDIM-KPOINTS+1,JPRB) + ZKP1 = ZK + 1.0_JPRB + ZLAMB = ZK/ZKP1 + ZNY = REAL(KALFA,JPRB)/ZKP1 + + DO JLAT=1,KUNITS + ZEPSA = & + &((PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK -& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1))*6._JPRB/ZKP1 -& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-2)) + + ZEPSB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA) -& + & (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPSTA+2)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ZMM + ZA = PWORK(JFL,KPOINTERS(JLAT)+KPOINTS) + ZB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK-& + & (2.0_JPRB*ZM1 + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + + DO JLON=KPOINTERS(JLAT)+KPOINTS+1,KPOINTERS(JLAT)+KDIM + + ZJ = REAL(JLON - (KPOINTERS(JLAT)+KPOINTS),JPRB) + PWORK(JFL,JLON) = ZA + ZJ * (ZB + ZJ * (ZC + ZD * ZJ)) + ENDDO + ENDDO + + +ENDDO + +IF (LHOOK) CALL DR_HOOK('EXTPER',1,ZHOOK_HANDLE) +END SUBROUTINE EXTPER +END MODULE EXTPER_MOD diff --git a/src/etrans/gpu/external/edir_trans.F90 b/src/etrans/gpu/external/edir_trans.F90 new file mode 100644 index 0000000..46d02fc --- /dev/null +++ b/src/etrans/gpu/external/edir_trans.F90 @@ -0,0 +1,681 @@ +SUBROUTINE EDIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! PMEANU(:),PMEANV(:) - mean wind +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTL - control of Legendre transform +! EFTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: 01-03-13 adaptation to aladin +! P. Smolikova 02-09-30 : AUX_PROC for d4 in NH +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 19-04-2013 Comparison of ubound(pspdiv,1) +! with ubound(pspvor,1) +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTL_MOD ,ONLY : EDIR_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +!USE MPI, ONLY : MPI_BARRIER, MPI_COMM_WORLD + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +INTEGER(KIND=JPIM) :: IERROR +INTEGER, SAVE :: number_of_calls=0 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',0,ZHOOK_HANDLE) + +#ifndef gnarls +write (20,*) __FILE__, __LINE__ +number_of_calls=number_of_calls+1 +write (20,*) 'number of calls = ',number_of_calls +call flush(20) +write (0,*) __FILE__, __LINE__ +write (0,*) 'number of calls = ',number_of_calls +call flush(0) +#endif + +CALL GSTATS(1808,0) +CALL ESET_RESOL(KRESOL) + +! check if args are contiguous +if ( present(PSPVOR) ) THEN + write (20,*) 'shape(PSPVOR) = ',SHAPE(PSPVOR) + if (.not. is_contiguous(PSPVOR) ) call abort_trans('PSPVOR not contiguous') +endif +if ( present(PSPDIV) ) THEN + write (20,*) 'shape(PSPDIV) = ',SHAPE(PSPDIV) + if (.not. is_contiguous(PSPDIV) ) call abort_trans('PSPDIV not contiguous') +endif +if ( present(PSPSCALAR) ) THEN + write (20,*) 'shape(PSPSCALAR) = ',SHAPE(PSPSCALAR) + if (.not. is_contiguous(PSPSCALAR) ) call abort_trans('PSPSCALAR not contiguous') +endif +if ( present(PSPSC3A) ) THEN + write (20,*) 'shape(PSPSC3A) = ',SHAPE(PSPSC3A) + if (.not. is_contiguous(PSPSC3A) ) call abort_trans('PSPSC3A not contiguous') +endif +if ( present(PSPSC3B) ) THEN + write (20,*) 'shape(PSPSC3B) = ',SHAPE(PSPSC3B) + if (.not. is_contiguous(PSPSC3B) ) call abort_trans('PSPSC3B not contiguous') +endif +if ( present(PSPSC2) ) THEN + write (20,*) 'shape(PSPSC2) = ',SHAPE(PSPSC2) + if (.not. is_contiguous(PSPSC2) ) call abort_trans('PSPSC2 not contiguous') +endif +if ( present(PGP) ) THEN + write (20,*) 'shape(PGP) = ',SHAPE(PGP) + if (.not. is_contiguous(PGP) ) call abort_trans('PGP not contiguous') +endif +if ( present(PGPUV) ) THEN + write (20,*) 'shape(PGPUV) = ',SHAPE(PGPUV) + if (.not. is_contiguous(PGPUV) ) call abort_trans('PGPUV not contiguous') +endif +if ( present(PGP3A) ) THEN + write (20,*) 'shape(PGP3A) = ',SHAPE(PGP3A) + if (.not. is_contiguous(PGP3A) ) call abort_trans('PGP3A not contiguous') +endif +if ( present(PGP3B) ) THEN + write (20,*) 'shape(PGP3B) = ',SHAPE(PGP3B) + if (.not. is_contiguous(PGP3B) ) call abort_trans('PGP3B not contiguous') +endif +if ( present(PGP2) ) THEN + write (20,*) 'shape(PGP2) = ',SHAPE(PGP2) + if (.not. is_contiguous(PGP2) ) call abort_trans('PGP2 not contiguous') +endif +if ( present(PMEANU) ) THEN + write (20,*) 'shape(PMEANU) = ',SHAPE(PMEANU) + if (.not. is_contiguous(PMEANU) ) call abort_trans('PMEANU not contiguous') +endif +if ( present(PMEANV) ) THEN + write (20,*) 'shape(PMEANV) = ',SHAPE(PMEANV) + if (.not. is_contiguous(PMEANV) ) call abort_trans('PMEANV not contiguous') +endif +if ( present(KVSETUV) ) THEN + write (20,*) 'KVSETU = ',(KVSETUV) +endif +if ( present(KVSETSC) ) THEN + write (20,*) 'KVSETSC = ',(KVSETSC) +endif +if ( present(KVSETSC3A) ) THEN + write (20,*) 'KVSETSC3A = ',(KVSETSC3A) +endif +if ( present(KVSETSC3B) ) THEN + write (20,*) 'KVSETSC3B = ',(KVSETSC3B) +endif +if ( present(KVSETSC2) ) THEN + write (20,*) 'KVSETSC2 = ',(KVSETSC2) +endif +if ( present(KPROMA) ) THEN + write (20,*) 'KPROMA = ',KPROMA +endif +!call flush(20) + + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +! This is for use in TRGTOL which is shared with adjoint inverse transform +LSCDERS=.FALSE. +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANS : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= UBOUND(PSPVOR,1)) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1808,1) + +! ------------------------------------------------------------------ + + +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) +#ifdef USE_CUDA_AWARE_MPI_FT +!$ACC data copyin (PGP ) if (present (PGP )) +!$ACC data copyin (PGPUV) if (present (PGPUV)) +!$ACC data copyin (PGP3A) if (present (PGP3A)) +!$ACC data copyin (PGP3B) if (present (PGP3B)) +!$ACC data copyin (PGP2 ) if (present (PGP2 )) +#endif +!$ACC data copyout (PSPVOR ) if (present (PSPVOR )) +!$ACC data copyout (PSPDIV ) if (present (PSPDIV )) +!$ACC data copyout (PSPSCALAR) if (present (PSPSCALAR)) +!$ACC data copyout (PSPSC3A ) if (present (PSPSC3A )) +!$ACC data copyout (PSPSC3B ) if (present (PSPSC3B )) +!$ACC data copyout (PSPSC2 ) if (present (PSPSC2 )) +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) +CALL EDIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +#ifdef USE_CUDA_AWARE_MPI_FT +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +#endif + + +#ifdef gnarls +if ( number_of_calls > 1 ) then + +write (20,*) __FILE__, __LINE__ + +write (20,*) 'EDIR_TRANS INPUT:' +if ( present(PGP) ) then + write (20,*) 'PGP = '; write (20,'(4E24.13)') PGP(1:4,:,1) +endif +if ( present(PGPUV) ) then + write (20,*) 'PGPUV = '; write (20,'(4E24.13)') PGPUV(1:4,:,:,1) +endif +if ( present(PGP3A) ) then + write (20,*) 'PGP3A = '; write (20,'(4E24.13)') PGP3A(1:4,:,:,1) +endif +if ( present(PGP3B) ) then + write (20,*) 'PGP3B = '; write (20,'(4E24.13)') PGP3B(1:4,:,:,1) +endif +if ( present(PGP2) ) then + write (20,*) 'PGP2 = '; write (20,'(4E24.13)') PGP2(1:4,:,1) +endif + + +write (20,*) 'EDIR_TRANS OUTPUT:' +if ( present(PSPVOR) ) then + write (20,*) 'PSPVOR = '; write (20,'(4E24.13)') PSPVOR(:,1:20:4) +endif +if ( present(PSPDIV) ) then + write (20,*) 'PSPDIV = '; write (20,'(4E24.13)') PSPDIV(:,1:20:4) +endif +if ( present(PSPSCALAR) ) then + write (20,*) 'PSPSCALAR = '; write (20,'(4E24.13)') PSPSCALAR(:,1:20:4) +endif +if ( present(PSPSC3A) ) then + write (20,*) 'PSPSC3A = '; write (20,'(4E24.13)') PSPSC3A(:,1:20:4,:) +endif +if ( present(PSPSC3B) ) then + write (20,*) 'PSPSC3B = '; write (20,'(4E24.13)') PSPSC3B(:,1:20:4,:) +endif +if ( present(PSPSC2) ) then + write (20,*) 'PSPSC2 = '; write (20,'(4E24.13)') PSPSC2(:,1:20:4) +endif +if ( present(PMEANU) ) then + write (20,*) 'PMEANU = '; write (20,'(4E24.13)') PMEANU(:) +endif +if ( present(PMEANV) ) then + write (20,*) 'PMEANV = '; write (20,'(4E24.13)') PMEANV(:) +endif + +endif + + +if ( number_of_calls == -1 ) then + write (0,*) 'aborting at call number ',number_of_calls + call abort_trans('hold it') +endif +#endif + +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANS + diff --git a/src/etrans/gpu/external/edir_transad.F90 b/src/etrans/gpu/external/edir_transad.F90 new file mode 100644 index 0000000..beac97c --- /dev/null +++ b/src/etrans/gpu/external/edir_transad.F90 @@ -0,0 +1,493 @@ +SUBROUTINE EDIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EDIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EDIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIR_TRANS_CTLAD - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTLAD_MOD ,ONLY : EDIR_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',0,ZHOOK_HANDLE) + +CALL GSTATS(1810,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +LSCDERS=.FALSE. ! This is for use in TRLTOG which is shared with inverse transform +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETUV CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1810,1) + +! Perform transform + +CALL EDIR_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANSAD + diff --git a/src/etrans/gpu/external/edist_grid.F90 b/src/etrans/gpu/external/edist_grid.F90 new file mode 100644 index 0000000..d66de47 --- /dev/null +++ b/src/etrans/gpu/external/edist_grid.F90 @@ -0,0 +1,136 @@ +SUBROUTINE EDIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *EDIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL EDIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_GRID: KFROM TOO SHORT!') +ENDIF +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_GRID:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_GRID:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'DIST_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'DIST_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('DIST_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'DIST_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('DIST_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('DIST_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (UBOUND (KSORT, 1) /= UBOUND (PGP, 2)) THEN + CALL ABORT_TRANS('EDIST_GRID: DIMENSION MISMATCH KSORT, PGP') + ENDIF +ENDIF + +CALL DIST_GRID_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP,KSORT) +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_GRID + diff --git a/src/etrans/gpu/external/edist_spec.F90 b/src/etrans/gpu/external/edist_spec.F90 new file mode 100644 index 0000000..43d67f2 --- /dev/null +++ b/src/etrans/gpu/external/edist_spec.F90 @@ -0,0 +1,181 @@ +SUBROUTINE EDIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSORT) + +!**** *EDIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument (change the order of fields) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIST_SPEC_CONTROL_MOD ,ONLY : EDIST_SPEC_CONTROL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM),INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDIM1_IS_FLD +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFDISTG) +INTEGER(KIND=JPIM) :: IFSEND,IFRECV,J, IFLD, ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM), ALLOCATABLE :: IDIM0G(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD=.TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD=LDIM1_IS_FLD +IF(LLDIM1_IS_FLD) THEN + IFLD=1 + ICOEFF=2 +ELSE + IFLD=2 + ICOEFF=1 +ENDIF + +ISMAX = RALD%NMSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +ISPEC2 = D%NSPEC2 +ISPEC2_G = R%NSPEC2_G +IPOSSP(:) = D%NPOSSP(:) +IDIM0G(:) = D%NDIM0G(:) + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KFROM TOO SHORT!') +ENDIF + +IFSEND = 0 +IFRECV = 0 + +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_SPEC:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_SPEC:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EDIST_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFSEND) THEN + WRITE(NERR,*)'EDIST_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFSEND + CALL ABORT_TRANS('EDIST_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*)'EDIST_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFDISTG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EDIST_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EDIST_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFRECV = IFRECV+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFDISTG) +ELSE + IFRECV = KFDISTG + IVSET(:) = MYSETV +ENDIF + +IF(IFRECV > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELDS TO RECEIVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFRECV) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (.NOT. PRESENT (PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: KSORT REQUIRES PSPEC') + ENDIF + IF (UBOUND (KSORT, 1) /= UBOUND (PSPEC, IFLD)) THEN + CALL ABORT_TRANS('EDIST_SPEC: DIMENSION MISMATCH KSORT, PSPEC') + ENDIF +ENDIF + +CALL EDIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,IVSET,PSPEC,LLDIM1_IS_FLD,& + ISMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,KSORT) +DEALLOCATE(IDIM0G) +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC + diff --git a/src/etrans/gpu/external/egath_grid.F90 b/src/etrans/gpu/external/egath_grid.F90 new file mode 100644 index 0000000..05455b5 --- /dev/null +++ b/src/etrans/gpu/external/egath_grid.F90 @@ -0,0 +1,129 @@ +SUBROUTINE EGATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *EGATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL EGATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM),INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_GRID + diff --git a/src/etrans/gpu/external/egath_spec.F90 b/src/etrans/gpu/external/egath_spec.F90 new file mode 100644 index 0000000..c225eb9 --- /dev/null +++ b/src/etrans/gpu/external/egath_spec.F90 @@ -0,0 +1,194 @@ +SUBROUTINE EGATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,KMSMAX,LDZA0IP) + +!**** *EGATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero imaginary part of first coefficients + +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- EGATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Modified 03-09-30 Y. Seity, bug correction IFSEND=0 +! R. El Khatib 23-Oct-2012 Monkey business +! P.Marguinaud 10-Oct-2013 Add an option to set (or not) first +! coefficients imaginary part to zero +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EGATH_SPEC_CONTROL_MOD ,ONLY : EGATH_SPEC_CONTROL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFGATHG) +INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J +INTEGER(KIND=JPIM) :: IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, IMSMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: ICPL2M(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KTO TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IMSMAX = RALD%NMSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +IF(PRESENT(KMSMAX)) IMSMAX = KMSMAX +ALLOCATE(IDIM0G(0:IMSMAX)) +ALLOCATE(ICPL2M(0:IMSMAX)) +IF(IMSMAX /= RALD%NMSMAX .OR. ISMAX /= R%NSMAX) THEN + CALL ABORT_TRANS('EGATH_SPEC:TRUNCATION CHANGE NOT YET CODED') +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) + ICPL2M(:) = DALD%NCPL2M(:) +ENDIF + +IFSEND = 0 +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'EGATH_SPEC:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('EGATH_SPEC:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EGATH_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFRECV) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFRECV + CALL ABORT_TRANS('EGATH_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EGATH_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFGATHG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFSEND = IFSEND+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFGATHG) +ELSEIF(NPRTRV > 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET MISSING, NPRTRV ',NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET MISSING, NPRTRV > 1') +ELSE + IFSEND = KFGATHG + IVSET(:) = 1 +ENDIF + +IF(IFSEND > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELDS TO RECIEVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFSEND) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EGATH_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL EGATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& + & IMSMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,ICPL2M,LDZA0IP) +DEALLOCATE(IDIM0G) +DEALLOCATE(ICPL2M) + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC + diff --git a/src/etrans/gpu/external/egpnorm_trans.F90 b/src/etrans/gpu/external/egpnorm_trans.F90 new file mode 100644 index 0000000..2234221 --- /dev/null +++ b/src/etrans/gpu/external/egpnorm_trans.F90 @@ -0,0 +1,445 @@ +SUBROUTINE EGPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *EGPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather +! than an approach using a more expensive global gather collective communication + +!** Interface. +! ---------- +! CALL EGPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! A.Bogatchev after gpnorm_trans + +! Modifications. +! -------------- +! Original : 12th Jun 2009 +! R. El Khatib 07-08-2009 Optimisation directive for NEC +! A.Bogatchev 16-09-2010 Intruducing of LGPNORM + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LGPNORM, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW +USE TPM_GEOMETRY ,ONLY : G +!USE TPM_FIELDS +!USE TPMALD_DIM ,ONLY : RALD +!USE TPMALD_DISTR +!USE TPMALD_FIELDS +!USE TPMALD_GEO + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +!USE SET_RESOL_MOD +USE TRGTOL_MOD ,ONLY : TRGTOL +USE SET2PE_MOD ,ONLY : SET2PE +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB),INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +!ifndef INTERFACE + +! Local variables +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +INTEGER(KIND=JPIM) :: IUBOUND(4) +INTEGER(KIND=JPIM) :: IVSET(KFIELDS) +INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETG(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZGTF(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZAVE(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMINGL(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAXGL(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMIN(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAX(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZAVEG(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMING(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAXG(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZSND(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZRCV(:) +INTEGER(KIND=JPIM) :: J,JGL,IGL,JL,JF,IF_GP,IF_SCALARS_G,IF_FS,JSETV,JSETW,IWLATS +INTEGER(KIND=JPIM) :: IPROC,ITAG,ILEN,ILENR,IBEG,IEND,IND + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',0,ZHOOK_HANDLE) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +NPROMA = KPROMA +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +! Consistency checks + +IUBOUND(1:3)=UBOUND(PGP) +IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EGPNORM_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EGPNORM_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFIELDS) THEN + WRITE(NOUT,*)'EGPNORM_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFIELDS + CALL ABORT_TRANS('EGPNORM_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'EGPNORM_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EGPNORM_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF_GP=KFIELDS +IF_SCALARS_G=0 + +IF_FS=0 +DO J=1,KFIELDS + IVSET(J)=MOD(J-1,NPRTRV)+1 + IF(IVSET(J)==MYSETV)THEN + IF_FS=IF_FS+1 + ENDIF +ENDDO + +ALLOCATE(IVSETS(NPRTRV)) +IVSETS(:)=0 +DO J=1,KFIELDS + IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 +ENDDO +ALLOCATE(IVSETG(NPRTRV,MAXVAL(IVSETS(:)))) +IVSETG(:,:)=0 +IVSETS(:)=0 +DO J=1,KFIELDS + IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 + IVSETG(IVSET(J),IVSETS(IVSET(J)))=J +ENDDO + +ALLOCATE(ZGTF(IF_FS,D%NLENGTF)) +LGPNORM=.TRUE. +CALL TRGTOL(ZGTF,IF_FS,IF_GP,IF_SCALARS_G,IVSET,PGP=PGP) +LGPNORM=.FALSE. + +IBEG=1 +IEND=D%NDGL_FS +ALLOCATE(ZAVE(IF_FS,IBEG:IEND)) +ALLOCATE(ZMIN(IF_FS)) +ALLOCATE(ZMAX(IF_FS)) + +IF( IF_FS > 0 )THEN + + ZAVE(:,:)=0.0_JPRB + + IF(.NOT.LDAVE_ONLY)THEN + ALLOCATE(ZMINGL(IF_FS,IBEG:IEND)) + ALLOCATE(ZMAXGL(IF_FS,IBEG:IEND)) + DO JF=1,IF_FS + ZMINGL(JF,:)=ZGTF(JF,D%NSTAGTF(1)+1) + ZMAXGL(JF,:)=ZGTF(JF,D%NSTAGTF(1)+1) + ENDDO + ENDIF + +! FIRST DO SUMS IN EACH FULL LATITUDE + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JGL,IGL,JF,JL) + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 +!CDIR NOLOOPCHG + DO JF=1,IF_FS + DO JL=1,G%NLOEN(IGL) + ZAVE(JF,JGL)=ZAVE(JF,JGL)+ZGTF(JF,D%NSTAGTF(JGL)+JL) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + DO JL=1,G%NLOEN(IGL) + ZMINGL(JF,JGL)=MIN(ZMINGL(JF,JGL),ZGTF(JF,D%NSTAGTF(JGL)+JL)) + ZMAXGL(JF,JGL)=MAX(ZMAXGL(JF,JGL),ZGTF(JF,D%NSTAGTF(JGL)+JL)) + ENDDO + ENDIF + ENDDO + ENDDO +!$OMP END PARALLEL DO + + IF(.NOT.LDAVE_ONLY)THEN + DO JF=1,IF_FS + ZMIN(JF)=MINVAL(ZMINGL(JF,:)) + ZMAX(JF)=MAXVAL(ZMAXGL(JF,:)) + ENDDO + DEALLOCATE(ZMINGL) + DEALLOCATE(ZMAXGL) + ENDIF + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JF=1,IF_FS + ZAVE(JF,JGL)=ZAVE(JF,JGL)/(G%NLOEN(IGL)*G%NLOEN(IGL)) + ENDDO + ENDDO + +ENDIF + +! IT IS IMPORTANT THAT SUMS ARE NOW DONE IN LATITUDE ORDER +ALLOCATE(ZAVEG(R%NDGL,KFIELDS)) +ALLOCATE(ZMING(KFIELDS)) +ALLOCATE(ZMAXG(KFIELDS)) + +ZAVEG(:,:)=0.0_JPRB +DO JF=1,IF_FS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + ZAVEG(IGL,IVSETG(MYSETV,JF))=ZAVEG(IGL,IVSETG(MYSETV,JF))+ZAVE(JF,JGL) + ENDDO +ENDDO + +IF(LDAVE_ONLY)THEN + ZMING(:)=PMIN(:) + ZMAXG(:)=PMAX(:) +ELSE + DO JF=1,IF_FS + ZMING(IVSETG(MYSETV,JF))=ZMIN(JF) + ZMAXG(IVSETG(MYSETV,JF))=ZMAX(JF) + ENDDO +ENDIF + +! RECEIVE ABOVE FROM OTHER NPRTRV SETS FOR SAME LATS BUT DIFFERENT FIELDS +ITAG=123 + + +IF( MYSETV==1 )THEN + + DO JSETV=2,NPRTRV + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*IVSETS(JSETV)+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*IVSETS(JSETV) + ENDIF + IF(ILEN > 0)THEN + ALLOCATE(ZRCV(ILEN)) + CALL SET2PE(IPROC,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='EGPNORM_TRANS:V') + IF(ILENR /= ILEN)THEN + CALL ABOR1('EGPNORM_TRANS:ILENR /= ILEN') + ENDIF + IND=0 + DO JF=1,IVSETS(JSETV) + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZAVEG(IGL,IVSETG(JSETV,JF))=ZRCV(IND) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZMING(IVSETG(JSETV,JF))=ZRCV(IND) + IND=IND+1 + ZMAXG(IVSETG(JSETV,JF))=ZRCV(IND) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDDO + ENDIF + DEALLOCATE(ZRCV) + ENDIF + ENDDO + +ELSE + + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*IVSETS(MYSETV)+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*IVSETS(MYSETV) + ENDIF + IF(ILEN > 0)THEN + CALL SET2PE(IPROC,0,0,MYSETW,1) + ALLOCATE(ZSND(ILEN)) + IND=0 + DO JF=1,IF_FS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZSND(IND)=ZAVEG(IGL,IVSETG(MYSETV,JF)) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZSND(IND)=ZMING(IVSETG(MYSETV,JF)) + IND=IND+1 + ZSND(IND)=ZMAXG(IVSETG(MYSETV,JF)) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZSND(IND)=PMIN(JF) + IND=IND+1 + ZSND(IND)=PMAX(JF) + ENDDO + ENDIF + CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='EGPNORM_TRANS:V') + DEALLOCATE(ZSND) + ENDIF + +ENDIF + +! FINALLY RECEIVE CONTRIBUTIONS FROM OTHER NPRTRW SETS + +IF( MYSETV == 1 )THEN + + IF( MYSETW == 1 )THEN + + DO JSETW=2,NPRTRW + IWLATS=D%NULTPP(JSETW) + IBEG=1 + IEND=IWLATS + IF(LDAVE_ONLY)THEN + ILEN=IWLATS*KFIELDS+2*KFIELDS + ELSE + ILEN=(IWLATS+2)*KFIELDS + ENDIF + IF(ILEN > 0 )THEN + ALLOCATE(ZRCV(ILEN)) + CALL SET2PE(IPROC,0,0,JSETW,1) + CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='EGPNORM_TRANS:W') + IF(ILENR /= ILEN)THEN + CALL ABOR1('EGPNORM_TRANS:ILENR /= ILEN') + ENDIF + IND=0 + DO JF=1,KFIELDS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(JSETW) + JGL - 1 + IND=IND+1 + ZAVEG(IGL,JF)=ZRCV(IND) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDDO + ENDIF + DEALLOCATE(ZRCV) + ENDIF + ENDDO + + ELSE + + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*KFIELDS+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*KFIELDS + ENDIF + IF(ILEN > 0)THEN + CALL SET2PE(IPROC,0,0,1,1) + ALLOCATE(ZSND(ILEN)) + IND=0 + DO JF=1,KFIELDS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZSND(IND)=ZAVEG(IGL,JF) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZSND(IND)=ZMING(JF) + IND=IND+1 + ZSND(IND)=ZMAXG(JF) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZSND(IND)=ZMING(JF) + IND=IND+1 + ZSND(IND)=ZMAXG(JF) + ENDDO + ENDIF + CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='EGPNORM_TRANS:V') + DEALLOCATE(ZSND) + ENDIF + + ENDIF + +ENDIF + + +IF( MYSETW == 1 .AND. MYSETV == 1 )THEN + + PAVE(:)=0.0_JPRB + DO JGL=1,R%NDGL + PAVE(:)=PAVE(:)+ZAVEG(JGL,:) + ENDDO + + PMIN(:)=ZMING(:) + PMAX(:)=ZMAXG(:) + +ENDIF + +DEALLOCATE(ZGTF) +DEALLOCATE(ZAVE) +DEALLOCATE(ZMIN) +DEALLOCATE(ZMAX) +DEALLOCATE(ZAVEG) +DEALLOCATE(ZMING) +DEALLOCATE(ZMAXG) +DEALLOCATE(IVSETS) +DEALLOCATE(IVSETG) + +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE EGPNORM_TRANS diff --git a/src/etrans/gpu/external/einv_trans.F90 b/src/etrans/gpu/external/einv_trans.F90 new file mode 100644 index 0000000..47b245b --- /dev/null +++ b/src/etrans/gpu/external/einv_trans.F90 @@ -0,0 +1,815 @@ +SUBROUTINE EINV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTL_MOD ,ONLY : EINV_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +!USE MPI, ONLY : MPI_COMM_WORLD, MPI_BARRIER + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3, IERROR +INTEGER, SAVE :: number_of_calls=0 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',0,ZHOOK_HANDLE) + +#ifndef gnarls + +write (20,*) __FILE__, __LINE__ +number_of_calls=number_of_calls+1 +write (20,*) 'number of calls = ',number_of_calls +call flush(20) +write (0,*) __FILE__, __LINE__ +write (0,*) 'number of calls = ',number_of_calls +call flush(0) + +#endif + +! check if args are contiguous +if ( present(PSPVOR) ) THEN + write (20,*) 'shape(PSPVOR) = ',shape(PSPVOR) + if (.not. is_contiguous(PSPVOR) ) call abort_trans('PSPVOR not contiguous') +endif +if ( present(PSPDIV) ) THEN + write (20,*) 'shape(PSPDIV) = ',shape(PSPDIV) + if (.not. is_contiguous(PSPDIV) ) call abort_trans('PSPDIV not contiguous') +endif +if ( present(PSPSCALAR) ) THEN + write (20,*) 'shape(PSPSCALAR) = ',shape(PSPSCALAR) + if (.not. is_contiguous(PSPSCALAR) ) call abort_trans('PSPSCALAR not contiguous') +endif +if ( present(PSPSC3A) ) THEN + write (20,*) 'shape(PSPSC3A) = ',shape(PSPSC3A) + if (.not. is_contiguous(PSPSC3A) ) call abort_trans('PSPSC3A not contiguous') +endif +if ( present(PSPSC3B) ) THEN + write (20,*) 'shape(PSPSC3B) = ',shape(PSPSC3B) + if (.not. is_contiguous(PSPSC3B) ) call abort_trans('PSPSC3B not contiguous') +endif +if ( present(PSPSC2) ) THEN + write (20,*) 'shape(PSPSC2) = ',shape(PSPSC2) + if (.not. is_contiguous(PSPSC2) ) call abort_trans('PSPSC2 not contiguous') +endif +if ( present(PGP) ) THEN + write (20,*) 'shape(PGP) = ',shape(PGP) + if (.not. is_contiguous(PGP) ) call abort_trans('PGP not contiguous') +endif +if ( present(PGPUV) ) THEN + write (20,*) 'shape(PGPUV) = ',shape(PGPUV) + if (.not. is_contiguous(PGPUV) ) call abort_trans('PGPUV not contiguous') +endif +if ( present(PGP3A) ) THEN + write (20,*) 'shape(PGP3A) = ',shape(PGP3A) + if (.not. is_contiguous(PGP3A) ) call abort_trans('PGP3A not contiguous') +endif +if ( present(PGP3B) ) THEN + write (20,*) 'shape(PGP3B) = ',shape(PGP3B) + if (.not. is_contiguous(PGP3B) ) call abort_trans('PGP3B not contiguous') +endif +if ( present(PGP2) ) THEN + write (20,*) 'shape(PGP2) = ',shape(PGP2) + if (.not. is_contiguous(PGP2) ) call abort_trans('PGP2 not contiguous') +endif +if ( present(PMEANU) ) THEN + write (20,*) 'shape(PMEANU) = ',shape(PMEANU) + if (.not. is_contiguous(PMEANU) ) call abort_trans('PMEANU not contiguous') +endif +if ( present(PMEANV) ) THEN + write (20,*) 'shape(PMEANV) = ',shape(PMEANV) + if (.not. is_contiguous(PMEANV) ) call abort_trans('PMEANV not contiguous') +endif + +if ( present(KVSETUV) ) THEN + write (20,*) 'KVSETU = ',(KVSETUV) +endif +if ( present(KVSETSC) ) THEN + write (20,*) 'KVSETSC = ',(KVSETSC) +endif +if ( present(KVSETSC3A) ) THEN + write (20,*) 'KVSETSC3A = ',(KVSETSC3A) +endif +if ( present(KVSETSC3B) ) THEN + write (20,*) 'KVSETSC3B = ',(KVSETSC3B) +endif +if ( present(KVSETSC2) ) THEN + write (20,*) 'KVSETSC2 = ',(KVSETSC2) +endif +if ( present(KPROMA) ) THEN + write (20,*) 'KPROMA = ',KPROMA +endif +if ( present(LDSCDERS) ) THEN + write (20,*) 'LDSCDERS = ',LDSCDERS +endif +if ( present(LDVORGP) ) THEN + write (20,*) 'LDVORGP = ',LDVORGP +endif +if ( present(LDDIVGP) ) THEN + write (20,*) 'LDDIVGP = ',LDDIVGP +endif +if ( present(LDUVDER) ) THEN + write (20,*) 'LDUVDER = ',LDUVDER +endif +!call flush(20) + +#ifndef gnarls + +if ( number_of_calls == -1 ) then + write (0,*) 'aborting at call number ',number_of_calls + call abort_trans('hold it') +endif + +#endif + + +CALL GSTATS(1807,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF(.NOT. PRESENT(PSPSCALAR) ) THEN + CALL ABORT_TRANS('INV_TRANS : KVSETSC PRESENT BUT PSPSCALAR MISSING') + ENDIF + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS_G > 0 ) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPDIV TOO SHORT') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANS:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1807,1) + +! ------------------------------------------------------------------ + +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + +!$ACC data copyin (PSPVOR ) if (present (PSPVOR )) +!$ACC data copyin (PSPDIV ) if (present (PSPDIV )) +!$ACC data copyin (PSPSCALAR) if (present (PSPSCALAR)) +!$ACC data copyin (PSPSC3A ) if (present (PSPSC3A )) +!$ACC data copyin (PSPSC3B ) if (present (PSPSC3B )) +!$ACC data copyin (PSPSC2 ) if (present (PSPSC2 )) +!$ACC data copyin (PMEANU ) if (present (PMEANU )) +!$ACC data copyin (PMEANV ) if (present (PMEANV )) +#ifdef USE_CUDA_AWARE_MPI_FT +!$ACC data copyout (PGP ) if (present (PGP )) +!$ACC data copyout (PGPUV) if (present (PGPUV)) +!$ACC data copyout (PGP3A) if (present (PGP3A)) +!$ACC data copyout (PGP3B) if (present (PGP3B)) +!$ACC data copyout (PGP2 ) if (present (PGP2 )) +#endif +! Perform transform +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) +CALL EINV_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV ) + +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) +#ifdef USE_CUDA_AWARE_MPI_FT +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +#endif +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data +!$ACC end data + +!call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + + +#ifdef gnarls + +if ( number_of_calls > 1 ) then + +write (20,*) __FILE__, __LINE__ + +write (20,*) 'EINV_TRANS INPUT:' +if ( present(PSPVOR) ) then + write (20,*) 'PSPVOR = '; write (20,'(4E24.13)') (PSPVOR(:,1:20:4)) +endif +if ( present(PSPDIV) ) then + write (20,*) 'PSPDIV = '; write (20,'(4E24.13)') (PSPDIV(:,1:20:4)) +endif +if ( present(PSPSCALAR) ) then + write (20,*) 'PSPSCALAR = '; write (20,'(4E24.13)') (PSPSCALAR(:,1:20:4)) +endif +if ( present(PSPSC3A) ) then + write (20,*) 'PSPSC3A = '; write (20,'(4E24.13)') (PSPSC3A(:,1:20:4,:)) +endif +if ( present(PSPSC3B) ) then + write (20,*) 'PSPSC3B = '; write (20,'(4E24.13)') (PSPSC3B(:,1:20:4,:)) +endif +if ( present(PSPSC2) ) then + write (20,*) 'PSPSC2 = '; write (20,'(4E24.13)') (PSPSC2(:,1:20:4)) +endif +if ( present(PMEANU) ) then + write (20,*) 'PMEANU = '; write (20,'(4E24.13)') (PMEANU(:)) +endif +if ( present(PMEANV) ) then + write (20,*) 'PMEANV = '; write (20,'(4E24.13)') (PMEANV(:)) +endif + +write (20,*) 'EINV_TRANS OUTPUT:' +if ( present(PGP) ) then + write (20,*) 'PGP = '; write (20,'(4E24.13)') (PGP(1:4,:,1)) +endif +if ( present(PGPUV) ) then + write (20,*) 'PGPUV = '; write (20,'(4E24.13)') (PGPUV(1:4,:,:,1)) +endif +if ( present(PGP3A) ) then + write (20,*) 'PGP3A = '; write (20,'(4E24.13)') (PGP3A(1:4,:,:,1)) +endif +if ( present(PGP3B) ) then + write (20,*) 'PGP3B = '; write (20,'(4E24.13)') (PGP3B(1:4,:,:,1)) +endif +if ( present(PGP2) ) then + write (20,*) 'PGP2 = '; write (20,'(4E24.13)') (PGP2(1:4,:,1)) +endif + +endif + +if ( number_of_calls == -1 ) then + write (0,*) 'aborting at call number ',number_of_calls + call abort_trans('hold it') +endif + +#endif + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANS + diff --git a/src/etrans/gpu/external/einv_transad.F90 b/src/etrans/gpu/external/einv_transad.F90 new file mode 100644 index 0000000..0f38dd3 --- /dev/null +++ b/src/etrans/gpu/external/einv_transad.F90 @@ -0,0 +1,609 @@ +SUBROUTINE EINV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EINV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: like in direct code: IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTLAD_MOD ,ONLY : EINV_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',0,ZHOOK_HANDLE) +CALL GSTATS(1809,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING") + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'EINV_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPVOR TOO SHORT") + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPDIV MISSING") + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPDIV TOO SHORT") + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'EINV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('EINV_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('EINV_TRANSAD:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1809,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL EINV_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANSAD + diff --git a/src/etrans/gpu/external/esetup_trans.F90 b/src/etrans/gpu/external/esetup_trans.F90 new file mode 100644 index 0000000..523c432 --- /dev/null +++ b/src/etrans/gpu/external/esetup_trans.F90 @@ -0,0 +1,430 @@ +SUBROUTINE ESETUP_TRANS(KMSMAX,KSMAX,KDGL,KDGUX,KLOEN,LDSPLIT,& + & KTMAX,KRESOL,PEXWN,PEYWN,PWEIGHT,LDGRIDONLY,KNOEXTZL,KNOEXTZG, & + & LDUSEFFTW) +!**** *ESETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL ESETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space +! LDGRIDONLY - true if only grid space is required + + +! LDSPLIT describe the distribution among processors of +! grid-point data and has no relevance if you are using a single processor + +! LDUSEFFTW - Use FFTW for FFTs + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESETUP_DIMS - setup distribution independent dimensions +! SUEMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! ESETUP_GEOM - Compute arrays related to grid-point geometry +! SUEMP_TRANS - Second part of setup of distributed environment +! SUEFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 02-04-11 A. Bogatchev: Passing of TCDIS +! 02-11-14 C. Fischer: soften test on KDGL +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Aug-2012 %LAM in GEOM_TYPE +! R. El Khatib 14-Jun-2013 LENABLED +! R. El Khatib 01-Sep-2015 Support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPRINTLEV, MSETUP0, & + & NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL, R_NNOEXTZL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL,NPROC,D_NUMP,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,& + & D_NPROCL,D_NPNTGTB1,D_NASM0,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,& + & D_NPROCM,D_NPTRLS, MYPROC +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX +USE TPM_FIELDS ,ONLY : FIELDS_RESOL, F +USE TPM_FFT ,ONLY : T, FFT_RESOL !, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +!USE TPM_FFTC ,ONLY : TC, FFTC_RESOL +USE TPM_FFTH ,ONLY : TC, FFTH_RESOL + +USE TPM_FLT ,ONLY : FLT_RESOL + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : ALDDISTR_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +USE TPMALD_FIELDS ,ONLY : ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESETUP_DIMS_MOD ,ONLY : ESETUP_DIMS +USE SUEMP_TRANS_MOD ,ONLY : SUEMP_TRANS +USE SUEMP_TRANS_PRELEG_MOD ,ONLY : SUEMP_TRANS_PRELEG +!USE SULEG_MOD +USE ESETUP_GEOM_MOD ,ONLY : ESETUP_GEOM +USE SUEFFT_MOD ,ONLY : SUEFFT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +#ifdef _OPENACC +use openacc +#endif + +!endif INTERFACE + +IMPLICIT NONE + +! Dummy arguments +INTEGER(KIND=JPIM),INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +LOGICAL ,OPTIONAL,INTENT(IN) :: LDGRIDONLY +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEXWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEYWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZG +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUSEFFTW + +!ifndef INTERFACE + +! Local variables +LOGICAL :: LLP1,LLP2 +INTEGER(KIND=JPIM) :: I, J +INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE +INTEGER :: INUMDEVS, IUNIT, ISTAT, IDEV, MYGPU + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',0,ZHOOK_HANDLE) + + +#ifdef _OPENACC +!!IDEVTYPE=ACC_DEVICE_NVIDIA +IDEVTYPE=ACC_GET_DEVICE_TYPE() +INUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) +MYGPU = MOD(MYPROC-1,INUMDEVS) +CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) +MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) +!ISTAT = CUDA_GETDEVICE(IDEV) +#endif + +IF(MSETUP0 == 0) THEN + CALL ABORT_TRANS('ESETUP_TRANS: SETUP_TRANS0 HAS TO BE CALLED BEFORE ESETUP_TRANS') +ENDIF +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE ESETUP_TRANS ===' + +! Allocate resolution dependent structures common to global and LAM +IF(.NOT. ALLOCATED(DIM_RESOL)) THEN + NDEF_RESOL = 1 + ALLOCATE(DIM_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(GEOM_RESOL(NMAX_RESOL)) + ALLOCATE(DISTR_RESOL(NMAX_RESOL)) + ALLOCATE(FFT_RESOL(NMAX_RESOL)) + !ALLOCATE(FFTB_RESOL(NMAX_RESOL)) +#ifdef WITH_FFTW + ALLOCATE(FFTW_RESOL(NMAX_RESOL)) +#endif + !ALLOCATE(FFTC_RESOL(NMAX_RESOL)) + ALLOCATE(FFTH_RESOL(NMAX_RESOL)) + ALLOCATE(FLT_RESOL(NMAX_RESOL)) + GEOM_RESOL(:)%LAM=.FALSE. + ALLOCATE(LENABLED(NMAX_RESOL)) + LENABLED(:)=.FALSE. +ELSE + NDEF_RESOL = NDEF_RESOL+1 + IF(NDEF_RESOL > NMAX_RESOL) THEN + CALL ABORT_TRANS('ESETUP_TRANS:NDEF_RESOL > NMAX_RESOL') + ENDIF +ENDIF +! Allocate LAM-specific resolution dependent structures +IF(.NOT. ALLOCATED(ALDDIM_RESOL)) THEN + ALLOCATE(ALDDIM_RESOL(NMAX_RESOL)) + ALLOCATE(ALDFIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(ALDGEO_RESOL(NMAX_RESOL)) + ALLOCATE(ALDDISTR_RESOL(NMAX_RESOL)) + ALLOCATE(ALDFFT_RESOL(NMAX_RESOL)) +ENDIF + + +IF (PRESENT(KRESOL)) THEN + KRESOL=NDEF_RESOL +ENDIF + +! Point at structures due to be initialized +CALL ESET_RESOL(NDEF_RESOL) +IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL + +! Defaults for optional arguments + +G%LREDUCED_GRID = .FALSE. +D%LGRIDONLY = .FALSE. +D%LSPLIT = .FALSE. +TALD%LFFT992=.TRUE. ! Use FFT992 interface for FFTs +#ifdef WITH_FFTW +TW%LFFTW=.FALSE. ! Use FFTW interface for FFTs +#endif + +! NON-OPTIONAL ARGUMENTS +R%NSMAX = KSMAX +RALD%NMSMAX=KMSMAX +RALD%NDGUX=KDGUX +R%NDGL = KDGL +RALD%NDGLSUR=KDGL+2 +R%NDLON =KLOEN(1) + +! IMPLICIT argument : +G%LAM = .TRUE. + +IF (KDGL <= 0) THEN + CALL ABORT_TRANS ('ESETUP_TRANS: KDGL IS NOT A POSITIVE NUMBER') +ENDIF + +! Optional arguments + +ALLOCATE(G%NLOEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) + +IF (G%LREDUCED_GRID) THEN + G%NLOEN(:) = KLOEN(1:R%NDGL) +ELSE + G%NLOEN(:) = R%NDLON +ENDIF + +IF(PRESENT(LDSPLIT)) THEN + D%LSPLIT = LDSPLIT +ENDIF + +IF(PRESENT(KTMAX)) THEN + R%NTMAX = KTMAX +ELSE + R%NTMAX = R%NSMAX +ENDIF +IF(R%NTMAX /= R%NSMAX) THEN + !This SHOULD work but I don't know how to test it /MH + WRITE(NERR,*) 'R%NTMAX /= R%NSMAX',R%NTMAX,R%NSMAX + CALL ABORT_TRANS('ESETUP_TRANS:R%NTMAX /= R%NSMAX HAS NOT BEEN VALIDATED') +ENDIF + +IF(PRESENT(PWEIGHT)) THEN + D%LWEIGHTED_DISTR = .TRUE. + IF( D%LWEIGHTED_DISTR .AND. .NOT.D%LSPLIT )THEN + CALL ABORT_TRANS('SETUP_TRANS: LWEIGHTED_DISTR=T AND LSPLIT=F NOT SUPPORTED') + ENDIF + IF(SIZE(PWEIGHT) /= SUM(G%NLOEN(:)) )THEN + CALL ABORT_TRANS('SETUP_TRANS:SIZE(PWEIGHT) /= SUM(G%NLOEN(:))') + ENDIF + ALLOCATE(D%RWEIGHT(SIZE(PWEIGHT))) + D%RWEIGHT(:)=PWEIGHT(:) +ELSE + D%LWEIGHTED_DISTR = .FALSE. +ENDIF + +IF(PRESENT(LDGRIDONLY)) THEN + D%LGRIDONLY=LDGRIDONLY +ENDIF + +IF (PRESENT(KNOEXTZL)) THEN + R%NNOEXTZL=KNOEXTZL +ELSE + R%NNOEXTZL=0 +ENDIF + +IF (PRESENT(KNOEXTZG)) THEN + R%NNOEXTZG=KNOEXTZG +ELSE + R%NNOEXTZG=0 +ENDIF + +#ifdef WITH_FFTW +IF(PRESENT(LDUSEFFTW)) THEN + TW%LFFTW=LDUSEFFTW +ENDIF +#endif + +IF(PRESENT(LDUSEFFTW)) THEN + TALD%LFFT992=.NOT.LDUSEFFTW +ELSE + TALD%LFFT992=.TRUE. +ENDIF + +! Setup resolution dependent structures +! ------------------------------------- + +! Setup distribution independent dimensions +CALL ESETUP_DIMS +IF (PRESENT(PEXWN)) GALD%EXWN=PEXWN +IF (PRESENT(PEYWN)) GALD%EYWN=PEYWN + +! First part of setup of distributed environment +CALL SUEMP_TRANS_PRELEG + +CALL GSTATS(1802,0) +! Compute arrays related to grid-point geometry +CALL ESETUP_GEOM +! Second part of setup of distributed environment +CALL SUEMP_TRANS +! Initialize Fast Fourier Transform package +CALL SUEFFT +CALL GSTATS(1802,1) + +! Signal the current resolution is active +LENABLED(NDEF_RESOL)=.TRUE. + +IF( .NOT.D%LGRIDONLY ) THEN + +WRITE(NOUT,*) '===now going to allocate GPU arrays' + + +!$acc enter data & +!$acc& copyin(F,G,R,D) + +R_NSMAX=R%NSMAX +R_NTMAX=R%NTMAX +R_NDGNH=R%NDGNH +R_NDGL=R%NDGL +R_NNOEXTZL=R%NNOEXTZL + + +ALLOCATE(D_NSTAGT0B(SIZE(D%NSTAGT0B))) +ALLOCATE(D_NSTAGT1B(SIZE(D%NSTAGT1B))) +ALLOCATE(D_NPNTGTB0(0:SIZE(D%NPNTGTB0,1)-1,SIZE(D%NPNTGTB0,2))) +ALLOCATE(D_NPNTGTB1(SIZE(D%NPNTGTB1,1),SIZE(D%NPNTGTB1,2))) +ALLOCATE(D_MYMS(SIZE(D%MYMS))) +ALLOCATE(D_NPROCL(SIZE(D%NPROCL))) +ALLOCATE(D_NASM0(0:SIZE(D%NASM0)-1)) +ALLOCATE(D_NSTAGTF(SIZE(D%NSTAGTF))) +ALLOCATE(D_MSTABF(SIZE(D%MSTABF))) +ALLOCATE(D_NPROCM(0:SIZE(D%NPROCM)-1)) +ALLOCATE(D_NPTRLS(SIZE(D%NPTRLS))) + +ALLOCATE(G_NDGLU(0:SIZE(G%NDGLU)-1)) +ALLOCATE(G_NMEN(SIZE(G%NMEN))) +ALLOCATE(G_NLOEN(SIZE(G%NLOEN))) + +DO I=0,SIZE(G%NDGLU)-1 + G_NDGLU(I)=G%NDGLU(I) +end DO + +G_NMEN_MAX=0 +DO I=1,SIZE(G%NMEN) + G_NMEN(I)=G%NMEN(I) + if (G_NMEN(I) .gt. G_NMEN_MAX) G_NMEN_MAX=G_NMEN(I) +end DO + +G_NLOEN_MAX=0 +DO I=1,SIZE(G%NLOEN) + G_NLOEN(I)=G%NLOEN(I) + if (G_NLOEN(I) .gt. G_NLOEN_MAX) G_NLOEN_MAX=G_NLOEN(I) +end DO + +DO I=1,SIZE(D%NSTAGT0B) + D_NSTAGT0B(I)=D%NSTAGT0B(I) +END DO + +DO I=1,SIZE(D%NSTAGT1B) + D_NSTAGT1B(I)=D%NSTAGT1B(I) +END DO + +DO I=1,SIZE(D%NPROCL) + D_NPROCL(I)=D%NPROCL(I) +END DO + +DO I=0,SIZE(D%NASM0)-1 + D_NASM0(I)=D%NASM0(I) +END DO + +DO I=1,SIZE(D%NSTAGTF) + D_NSTAGTF(I)=D%NSTAGTF(I) +END DO + +DO I=1,SIZE(D%MSTABF) + D_MSTABF(I)=D%MSTABF(I) +END DO + +DO I=0,SIZE(D%NPROCM)-1 + D_NPROCM(I)=D%NPROCM(I) +END DO + +DO I=1,SIZE(D%NPTRLS) + D_NPTRLS(I)=D%NPTRLS(I) +END DO + +DO I=1,SIZE(D%NPNTGTB0,2) + DO J=0,SIZE(D%NPNTGTB0,1)-1 + D_NPNTGTB0(J,I)=D%NPNTGTB0(J,I) + end DO +END DO + +DO I=1,SIZE(D%NPNTGTB1,2) + DO J=1,SIZE(D%NPNTGTB1,1) + D_NPNTGTB1(J,I)=D%NPNTGTB1(J,I) + end DO +END DO + +D_NUMP=D%NUMP + +DO I=1,SIZE(D%MYMS) + D_MYMS(I)=D%MYMS(I) +end DO + +!$ACC enter data create(R_NSMAX,R_NTMAX,R_NDGL,R_NNOEXTZL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_NPROCL,D_NUMP,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX) + +!$ACC update device(R_NSMAX,R_NTMAX,R_NDGL,R_NNOEXTZL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_NPROCL,D_NUMP,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX) + +WRITE(NOUT,*) '===GPU arrays successfully allocated' +!endif INTERFACE + +ENDIF + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!endif INTERFACE + +END SUBROUTINE ESETUP_TRANS + diff --git a/src/etrans/gpu/external/especnorm.F90 b/src/etrans/gpu/external/especnorm.F90 new file mode 100644 index 0000000..f816ee4 --- /dev/null +++ b/src/etrans/gpu/external/especnorm.F90 @@ -0,0 +1,136 @@ +SUBROUTINE ESPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *ESPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL ESPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESPNORM_CTL_MOD ,ONLY : ESPNORM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KMASTER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PNORM(:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IMASTER,IFLD,IFLD_G,J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ESPECNORM',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +! Set defaults +IMASTER = 1 +IFLD = 0 + +IF(PRESENT(KMASTER)) THEN + IMASTER = KMASTER +ENDIF + +IF(PRESENT(KVSET)) THEN + IFLD_G = UBOUND(KVSET,1) + DO J=1,IFLD_G + IF(KVSET(J) > NPRTRV) THEN + WRITE(NERR,*) 'ESPECNORM:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('ESPECNORM:KVSET TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFLD = IFLD+1 + ENDIF + ENDDO +ELSE + IF(PRESENT(PSPEC)) THEN + IFLD = UBOUND(PSPEC,1) + ENDIF + IFLD_G = IFLD +ENDIF + +IF(NPRTRV >1) THEN + IF(IFLD > 0 .AND. .NOT. PRESENT(KVSET)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IFLD > 0 AND NOT PRESENT(KVSET)',& + & NPRTRV,IFLD + CALL ABORT_TRANS('ESPECNORM: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF +IF(MYPROC == IMASTER) THEN + IF(.NOT. PRESENT(PNORM)) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM NOT PRESENT') + ENDIF + IF(UBOUND(PNORM,1) < IFLD_G) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM TOO SMALL') + ENDIF +ENDIF +IF(IFLD > 0 ) THEN + IF(.NOT. PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('ESPECNORM: PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,1) < IFLD) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,2) < D%NSPEC2) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL ESPNORM_CTL(PSPEC,IFLD,IFLD_G,KVSET,IMASTER,PMET,PNORM) +IF (LHOOK) CALL DR_HOOK('ESPECNORM',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPECNORM diff --git a/src/etrans/gpu/external/etrans_end.F90 b/src/etrans/gpu/external/etrans_end.F90 new file mode 100644 index 0000000..f09ffa2 --- /dev/null +++ b/src/etrans/gpu/external/etrans_end.F90 @@ -0,0 +1,153 @@ +SUBROUTINE ETRANS_END(CDMODE) + +!**** *ETRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL ETRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 after G.Radnoti +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Jul-2013 LENABLED +! R. El Khatib 01-Set-2015 Support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL !, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +USE TPM_FLT ,ONLY : S, FLT_RESOL +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL + +IMPLICIT NONE + +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE +! Local variables +CHARACTER*5 :: CLMODE +INTEGER(KIND=JPIM) :: JRES +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ETRANS_END',0,ZHOOK_HANDLE) +CLMODE='FINAL' +IF (PRESENT(CDMODE)) CLMODE=CDMODE +IF (CLMODE == 'FINAL') THEN + DO JRES=1,NDEF_RESOL + CALL EDEALLOC_RESOL(JRES) + ENDDO + NULLIFY(R) + IF (ALLOCATED(DIM_RESOL)) DEALLOCATE(DIM_RESOL) + NULLIFY(RALD) + IF (ALLOCATED(ALDDIM_RESOL)) DEALLOCATE(ALDDIM_RESOL) +!EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF +!TPM_DISTR + NULLIFY(D) + IF (ALLOCATED(DISTR_RESOL)) DEALLOCATE(DISTR_RESOL) + NULLIFY(DALD) + IF (ALLOCATED(ALDDISTR_RESOL)) DEALLOCATE(ALDDISTR_RESOL) +!TPM_FFT + NULLIFY(T) + IF (ALLOCATED(FFT_RESOL)) DEALLOCATE(FFT_RESOL) + !NULLIFY(TB) + !IF( ALLOCATED(FFTB_RESOL) ) DEALLOCATE(FFTB_RESOL) +#ifdef WITH_FFTW + !TPM_FFTW + NULLIFY(TW) + DEALLOCATE(FFTW_RESOL) +#endif +!TPM_FLT + NULLIFY(S) + IF (ALLOCATED(FLT_RESOL)) DEALLOCATE(FLT_RESOL) + NULLIFY(TALD) + IF (ALLOCATED(ALDFFT_RESOL)) DEALLOCATE(ALDFFT_RESOL) + +!TPM_FIELDS + NULLIFY(F) + IF (ALLOCATED(FIELDS_RESOL)) DEALLOCATE(FIELDS_RESOL) + NULLIFY(FALD) + IF (ALLOCATED(ALDFIELDS_RESOL)) DEALLOCATE(ALDFIELDS_RESOL) + +!TPM_GEOMETRY + NULLIFY(G) + IF(ALLOCATED(GEOM_RESOL)) DEALLOCATE(GEOM_RESOL) + NULLIFY(GALD) + IF(ALLOCATED(ALDGEO_RESOL)) DEALLOCATE(ALDGEO_RESOL) +!TPM_TRANS + IF(ALLOCATED(FOUBUF_IN)) THEN +!$acc exit data delete (FOUBUF_IN) + DEALLOCATE(FOUBUF_IN) + ENDIF + IF(ALLOCATED(FOUBUF)) THEN +!$acc exit data delete (FOUBUF) + DEALLOCATE(FOUBUF) + ENDIF + + IF (ALLOCATED(LENABLED)) DEALLOCATE(LENABLED) + MSETUP0 = 0 + NMAX_RESOL = 0 + NCUR_RESOL = 0 + NDEF_RESOL = 0 +ENDIF + +IF (CLMODE == 'FINAL' .OR. CLMODE == 'INTER') THEN + !EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF + !TPM_DISTR + IF (ALLOCATED(NPRCIDS)) DEALLOCATE(NPRCIDS) +ENDIF +IF (LHOOK) CALL DR_HOOK('ETRANS_END',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_END + diff --git a/src/etrans/gpu/external/etrans_inq.F90 b/src/etrans/gpu/external/etrans_inq.F90 new file mode 100644 index 0000000..1d580d6 --- /dev/null +++ b/src/etrans/gpu/external/etrans_inq.F90 @@ -0,0 +1,539 @@ +SUBROUTINE ETRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + & KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + & KMYMS,KESM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + & KULTPP,KPTRLS,& + & KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + & LDSPLITLAT,LDLINEAR_GRID,& + & KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,KDEF_RESOL,LDLAM,& + & PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KCPL2M,KCPL4M ,KPROCM) + +!**** *ETRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL ETRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resolution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension +! N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KESM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation - n direction +! KMSMAX - spectral truncation - m direction +! KNVALUE - n value for each KSPEC2 spectral coeffient +! KMVALUE - m value for each KSPEC2 spectral coeffient +! LDLINEAR_GRID : .TRUE. if the grid is linear + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLEPINM - Eigen-values of the inverse Laplace operator + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing with TFL 36R4 +! R. El Khatib 08-Aug-2012 KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,LDLAM,KDEF_RESOL,LDLINEAR_GRID +! T. Dalkilic 28-Aug-2012 KCPL4M +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NDEF_RESOL +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_DISTR ,ONLY : DALD +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPMALD_FIELDS + +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS_EW, N_REGIONS_NS +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2G +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KNUMP +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOT +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTG +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTMX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KMYMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KESM0(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KUMPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPOSSP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KALLMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSTA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KONL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KULTPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL,INTENT(INOUT) :: LDSPLITLAT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL :: PGW(:) ! Argument NOT used +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLEI3 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPOLEGL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPMS(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL2M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL4M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPROCM(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMVALUE(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PLEPINM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLINEAR_GRID +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1,IU2 +INTEGER(KIND=JPIM) :: IC, JN, JMLOC, IM, JJ, JM +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX),ICPLM(0:RALD%NMSMAX) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IF(PRESENT(KSPEC)) KSPEC = D%NSPEC +IF(PRESENT(KSPEC2)) KSPEC2 = D%NSPEC2 +IF(PRESENT(KSPEC2G)) KSPEC2G = R%NSPEC2_G +IF(PRESENT(KSPEC2MX)) KSPEC2MX = D%NSPEC2MX +IF(PRESENT(KNUMP)) KNUMP = D%NUMP +IF(PRESENT(KGPTOT)) KGPTOT = D%NGPTOT +IF(PRESENT(KGPTOTG)) KGPTOTG = D%NGPTOTG +IF(PRESENT(KGPTOTMX)) KGPTOTMX = D%NGPTOTMX +IF(PRESENT(KFRSTLOFF)) KFRSTLOFF = D%NFRSTLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPRTRW)) KPRTRW = NPRTRW +IF(PRESENT(KMYSETW)) KMYSETW = MYSETW +IF(PRESENT(KMYSETV)) KMYSETV = MYSETV +IF(PRESENT(KMY_REGION_NS)) KMY_REGION_NS = MY_REGION_NS +IF(PRESENT(KMY_REGION_EW)) KMY_REGION_EW = MY_REGION_EW +IF(PRESENT(LDLAM)) LDLAM = G%LAM +IF(PRESENT(KDEF_RESOL)) KDEF_RESOL = NDEF_RESOL + +IF(PRESENT(KGPTOTL)) THEN + IF(UBOUND(KGPTOTL,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KGPTOTL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 2 TOO SMALL') + ELSE + KGPTOTL(1:N_REGIONS_NS,1:N_REGIONS_EW) = D%NGPTOTL(:,:) + ENDIF +ENDIF + +IF(PRESENT(KMYMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KMYMS,1) < D%NUMP) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS TOO SMALL') + ELSE + KMYMS(1:D%NUMP) = D%MYMS(:) + ENDIF +ENDIF + +IF(PRESENT(KESM0)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KESM0,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 TOO SMALL') + ELSE + KESM0(0:RALD%NMSMAX) = DALD%NESM0(:) + ENDIF +ENDIF + +IF(PRESENT(KCPL2M)) THEN + IF(UBOUND(KCPL2M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL2M TOO SMALL') + ELSE + KCPL2M(0:RALD%NMSMAX) = DALD%NCPL2M(:) + ENDIF +ENDIF +IF(PRESENT(KPROCM)) THEN + IF(UBOUND(KPROCM,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPROCM TOO SMALL') + ELSE + KPROCM(0:RALD%NMSMAX) = D%NPROCM(:) + ENDIF +ENDIF + +IF(PRESENT(KUMPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KUMPP,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP TOO SMALL') + ELSE + KUMPP(1:NPRTRW) = D%NUMPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPOSSP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPOSSP,1) < NPRTRW+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP TOO SMALL') + ELSE + KPOSSP(1:NPRTRW+1) = D%NPOSSP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRMS,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS TOO SMALL') + ELSE + KPTRMS(1:NPRTRW) = D%NPTRMS(:) + ENDIF +ENDIF + +IF(PRESENT(KALLMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KALLMS,1) < RALD%NMSMAX+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS TOO SMALL') + ELSE + KALLMS(1:RALD%NMSMAX+1) = D%NALLMS(:) + ENDIF +ENDIF + +IF(PRESENT(KDIM0G)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KDIM0G,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G TOO SMALL') + ELSE + KDIM0G(0:RALD%NMSMAX) = D%NDIM0G(0:RALD%NMSMAX) + ENDIF +ENDIF + +IF(PRESENT(KFRSTLAT)) THEN + IF(UBOUND(KFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KFRSTLAT TOO SMALL') + ELSE + KFRSTLAT(1:N_REGIONS_NS) = D%NFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KLSTLAT)) THEN + IF(UBOUND(KLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KLSTLAT TOO SMALL') + ELSE + KLSTLAT(1:N_REGIONS_NS) = D%NLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLAT)) THEN + IF(UBOUND(KPTRLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLAT TOO SMALL') + ELSE + KPTRLAT(1:R%NDGL) = D%NPTRLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRFRSTLAT)) THEN + IF(UBOUND(KPTRFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRFRSTLAT TOO SMALL') + ELSE + KPTRFRSTLAT(1:N_REGIONS_NS) = D%NPTRFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLSTLAT)) THEN + IF(UBOUND(KPTRLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLSTLAT TOO SMALL') + ELSE + KPTRLSTLAT(1:N_REGIONS_NS) = D%NPTRLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KSTA)) THEN + IF(UBOUND(KSTA,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 1 TOO SMALL') + ELSEIF(UBOUND(KSTA,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 2 TOO SMALL') + ELSE + KSTA(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NSTA(:,:) + ENDIF +ENDIF + +IF(PRESENT(KONL)) THEN + IF(UBOUND(KONL,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KONL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 2 TOO SMALL') + ELSE + KONL(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NONL(:,:) + ENDIF +ENDIF + +IF(PRESENT(LDSPLITLAT)) THEN + IF(UBOUND(LDSPLITLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: LDSPLITLAT TOO SMALL') + ELSE + LDSPLITLAT(1:R%NDGL) = D%LSPLITLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KULTPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KULTPP,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP TOO SMALL') + ELSE + KULTPP(1:NPRTRNS) = D%NULTPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRLS,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS TOO SMALL') + ELSE + KPTRLS(1:NPRTRNS) = D%NPTRLS(:) + ENDIF +ENDIF + +IF(PRESENT(PMU)) THEN + IF(UBOUND(PMU,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: PMU TOO SMALL') + ELSE + PMU(1:R%NDGL) = F%RMU + ENDIF +ENDIF + +IF(PRESENT(PRPNM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF + IU1 = UBOUND(PRPNM,1) + IU2 = UBOUND(PRPNM,2) + IF(IU1 < R%NDGNH) THEN + CALL ABORT_TRANS('ETRANS_INQ:FIRST DIM. OF PRNM TOO SMALL') + ELSE + IU1 = MIN(IU1,R%NLEI3) + IU2 = MIN(IU2,D%NSPOLEGL) + PRPNM(1:IU1,1:IU2) = F%RPNM(1:IU1,1:IU2) + ENDIF +ENDIF +IF(PRESENT(KLEI3)) THEN + KLEI3=R%NLEI3 +ENDIF +IF(PRESENT(KSPOLEGL)) THEN + KSPOLEGL=D%NSPOLEGL +ENDIF +IF(PRESENT(KPMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPMS,1) < R%NSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS TOO SMALL') + ELSE + KPMS(0:R%NSMAX) = D%NPMS(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KSMAX)) KSMAX = R%NSMAX +IF(PRESENT(KMSMAX)) KMSMAX = RALD%NMSMAX +IF(PRESENT(PLEPINM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(PLEPINM,1) < R%NSPEC_G/2) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM TOO SMALL') + ELSEIF (LBOUND(PLEPINM,1) /= -1) THEN + CALL ABORT_TRANS('ETRANS_INQ: LOWER BOUND OF PLEPINM SHOULD BE -1') + ELSE + PLEPINM(:) = FALD%RLEPINM(:) + ENDIF +ENDIF +IF(PRESENT(KNVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KNVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KNVALUE(IC+JJ)=JN + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KMVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KMVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KMVALUE(IC+JJ)=IM + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KCPL4M)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KCPL4M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + KCPL4M(JM) = 4*(ISNAX(JM)+1) + ENDDO + ENDIF +ENDIF + + +IF(PRESENT(LDLINEAR_GRID)) THEN + LDLINEAR_GRID = R%NSMAX > (R%NDGL -1)/3 .OR. RALD%NMSMAX > (R%NDLON -1)/3 +ENDIF + + +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_INQ diff --git a/src/etrans/gpu/external/etrans_release.F90 b/src/etrans/gpu/external/etrans_release.F90 new file mode 100644 index 0000000..ea4f5a8 --- /dev/null +++ b/src/etrans/gpu/external/etrans_release.F90 @@ -0,0 +1,51 @@ +SUBROUTINE ETRANS_RELEASE(KRESOL) + +!**** *ETRANS_RELEASE* - release a spectral resolution + +! Purpose. +! -------- +! Release all arrays related to a given resolution tag + +!** Interface. +! ---------- +! CALL ETRANS_RELEASE + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL + +!endif INTERFACE + +! ------------------------------------------------------------------ + +CALL EDEALLOC_RESOL(KRESOL) + +! ------------------------------------------------------------------ + +END SUBROUTINE ETRANS_RELEASE diff --git a/src/etrans/gpu/internal/cpl_int_mod.F90 b/src/etrans/gpu/internal/cpl_int_mod.F90 new file mode 100644 index 0000000..2b55a5b --- /dev/null +++ b/src/etrans/gpu/internal/cpl_int_mod.F90 @@ -0,0 +1,33 @@ +MODULE CPL_INT_MOD +CONTAINS +SUBROUTINE CPL_INT(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,CPL_PROC,KPTRGP) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE +INTEGER(KIND=JPIM), INTENT(IN) :: KENDROWL +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KFFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEN +INTEGER(KIND=JPIM), INTENT(IN) :: KSTA(KENDROWL) +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB), INTENT(INOUT) :: PGTF(KFIELDS,KLEN) +EXTERNAL CPL_PROC + +INTEGER(KIND=JPIM) :: IPTRGP(KFIELDS) +INTEGER(KIND=JPIM) :: J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!-------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',0,ZHOOK_HANDLE) +IF(PRESENT(KPTRGP)) THEN + IPTRGP(:)=KPTRGP(1:KFIELDS) +ELSE + DO J=1,KFIELDS + IPTRGP(J)=J + ENDDO +ENDIF +CALL CPL_PROC(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,IPTRGP) +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',1,ZHOOK_HANDLE) +END SUBROUTINE CPL_INT +END MODULE CPL_INT_MOD diff --git a/src/etrans/gpu/internal/easre1ad_mod.F90 b/src/etrans/gpu/internal/easre1ad_mod.F90 new file mode 100644 index 0000000..b382d78 --- /dev/null +++ b/src/etrans/gpu/internal/easre1ad_mod.F90 @@ -0,0 +1,80 @@ +MODULE EASRE1AD_MOD +CONTAINS +SUBROUTINE EASRE1AD(KM,KMLOC,KF_OUT_LT,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_TRANS +USE EASRE1BAD_MOD ,ONLY : EASRE1BAD + +!**** *EASRE1AD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *EASRE1AD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. EASRE1BAD - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1AD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IFLDS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EASRE1AD_MOD:EASRE1AD',0,ZHOOK_HANDLE) +IFLDS = KF_OUT_LT + +CALL EASRE1BAD(IFLDS,KM,KMLOC,PIA) +IF (LHOOK) CALL DR_HOOK('EASRE1AD_MOD:EASRE1AD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1AD +END MODULE EASRE1AD_MOD diff --git a/src/etrans/gpu/internal/easre1b_mod.F90 b/src/etrans/gpu/internal/easre1b_mod.F90 new file mode 100644 index 0000000..6f574e1 --- /dev/null +++ b/src/etrans/gpu/internal/easre1b_mod.F90 @@ -0,0 +1,107 @@ +MODULE EASRE1B_MOD +CONTAINS +SUBROUTINE EASRE1B(KFIELD,PFFT) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R, R_NDGL +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D, D_NUMP, D_NSTAGT0B, D_NPNTGTB1, D_NPROCL + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PFFT(:,:,:) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +INTEGER(KIND=JPIM) :: JM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + + +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',0,ZHOOK_HANDLE) + + +!$acc parallel loop collapse(3) private (JM, JGL, JFLD, IPROC, IISTAN) & +!$acc& present (FOUBUF_IN, PFFT, D_NSTAGT0B, D_NPNTGTB1, D_NPROCL, D_NUMP, R_NDGL) & +!$acc& copyin(KFIELD) default(none) +DO JM = 1, D_NUMP !100 + DO JGL=1,R_NDGL !400 + DO JFLD =1,2*KFIELD !500 + IPROC=D_NPROCL(JGL) + IISTAN=(D_NSTAGT0B(IPROC) + D_NPNTGTB1(JM,JGL))*2*KFIELD + FOUBUF_IN(IISTAN+JFLD)=PFFT(JGL,JM,JFLD) + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + + + + + +#ifdef gnarls +!$acc data present(FOUBUF_IN) +!$acc update host(FOUBUF_IN) +write (20,*) __FILE__,__LINE__ +write (20,*) 'FOUBUF_IN = ' +write (20,'(6E18.8)') FOUBUF_IN +call flush(20) +!$acc end data +#endif + +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1B +END MODULE EASRE1B_MOD diff --git a/src/etrans/gpu/internal/easre1bad_mod.F90 b/src/etrans/gpu/internal/easre1bad_mod.F90 new file mode 100644 index 0000000..717ba71 --- /dev/null +++ b/src/etrans/gpu/internal/easre1bad_mod.F90 @@ -0,0 +1,86 @@ +MODULE EASRE1BAD_MOD +CONTAINS +SUBROUTINE EASRE1BAD(KFIELD,KM,KMLOC,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +!**** *EASRE1BAD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *EASRE1BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1BAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD,KM,KMLOC + +REAL(KIND=JPRB), INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EASRE1BAD_MOD:EASRE1BAD',0,ZHOOK_HANDLE) +DO JGL=1,R%NDGL + IPROC=D%NPROCL(JGL) + DO JFLD =1,2*KFIELD + IISTAN=(D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*2*KFIELD + PIA(JGL,JFLD)=FOUBUF_IN(IISTAN+JFLD) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EASRE1BAD_MOD:EASRE1BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1BAD +END MODULE EASRE1BAD_MOD + diff --git a/src/etrans/gpu/internal/edealloc_resol_mod.F90 b/src/etrans/gpu/internal/edealloc_resol_mod.F90 new file mode 100644 index 0000000..5d341b9 --- /dev/null +++ b/src/etrans/gpu/internal/edealloc_resol_mod.F90 @@ -0,0 +1,102 @@ +MODULE EDEALLOC_RESOL_MOD +CONTAINS +SUBROUTINE EDEALLOC_RESOL(KRESOL) + +!**** *EDEALLOC_RESOL_MOD* - Deallocations of a resolution + +! Purpose. +! -------- +! Release allocated arrays for a given resolution + +!** Interface. +! ---------- +! CALL EDEALLOC_RESOL_MOD + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 from etrans_end +! B. Bochenek (Apr 2015): Phasing: update +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LENABLED, NOUT +USE TPM_DISTR ,ONLY : D +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FFT ,ONLY : T +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW,DESTROY_PLANS_FFTW +#endif +USE TPM_FLT ,ONLY : S + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +IF (.NOT.LENABLED(KRESOL)) THEN + + WRITE(UNIT=NOUT,FMT='('' EDEALLOC_RESOL WARNING: KRESOL = '',I3,'' ALREADY DISABLED '')') KRESOL + +ELSE + + CALL ESET_RESOL(KRESOL) + + !TPM_DISTR + DEALLOCATE(D%NFRSTLAT,D%NLSTLAT,D%NPTRLAT,D%NPTRFRSTLAT,D%NPTRLSTLAT) + DEALLOCATE(D%LSPLITLAT,D%NSTA,D%NONL,D%NGPTOTL,D%NPROCA_GP) + + IF(D%LWEIGHTED_DISTR) THEN + DEALLOCATE(D%RWEIGHT) + ENDIF + + IF(.NOT.D%LGRIDONLY) THEN + + DEALLOCATE(D%MYMS,D%NUMPP,D%NPOSSP,D%NPROCM,D%NDIM0G,D%NASM0,D%NATM0) + DEALLOCATE(D%NLATLS,D%NLATLE,D%NPMT,D%NPMS,D%NPMG,D%NULTPP,D%NPROCL) + DEALLOCATE(D%NPTRLS,D%NALLMS,D%NPTRMS,D%NSTAGT0B,D%NSTAGT1B,D%NPNTGTB0) + DEALLOCATE(D%NPNTGTB1,D%NLTSFTB,D%NLTSGTB,D%MSTABF) + DEALLOCATE(D%NSTAGTF) + + !TPM_FFT + DEALLOCATE(T%TRIGS,T%NFAX) +#ifdef WITH_FFTW + !TPM_FFTW + IF( TW%LFFTW )THEN + CALL DESTROY_PLANS_FFTW + ENDIF +#endif + !TPM_GEOMETRY + DEALLOCATE(G%NMEN,G%NDGLU) + + ELSE + + DEALLOCATE(G%NLOEN) + + ENDIF + + LENABLED(KRESOL)=.FALSE. + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EDEALLOC_RESOL +END MODULE EDEALLOC_RESOL_MOD diff --git a/src/etrans/gpu/internal/edir_trans_ctl_mod.F90 b/src/etrans/gpu/internal/edir_trans_ctl_mod.F90 new file mode 100644 index 0000000..34c6db0 --- /dev/null +++ b/src/etrans/gpu/internal/edir_trans_ctl_mod.F90 @@ -0,0 +1,202 @@ +MODULE EDIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PMEANU,PMEANV - mean winds +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! G. Radnoti 01-03-13 adaptation to aladin +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTDIR_CTL_MOD ,ONLY : ELTDIR_CTL +USE EFTDIR_CTL_MOD ,ONLY : EFTDIR_CTL + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP,& + & AUX_PROC=AUX_PROC) + ENDIF + CALL ELTDIR_CTL(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV,AUX_PROC=AUX_PROC) + ENDDO +ELSE + + ! No splitting of fields, transform done in one go + + CALL EFTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,IF_GPB,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2,& + & AUX_PROC=AUX_PROC) + + CALL ELTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV,& + & AUX_PROC=AUX_PROC) + +ENDIF +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTL +END MODULE EDIR_TRANS_CTL_MOD diff --git a/src/etrans/gpu/internal/edir_trans_ctlad_mod.F90 b/src/etrans/gpu/internal/edir_trans_ctlad_mod.F90 new file mode 100644 index 0000000..34de8ee --- /dev/null +++ b/src/etrans/gpu/internal/edir_trans_ctlad_mod.F90 @@ -0,0 +1,194 @@ +MODULE EDIR_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +!**** *EDIR_TRANS_CTLAD* - Control routine for direct spectral transform-adj. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTDIR_CTLAD_MOD ,ONLY : ELTDIR_CTLAD +USE EFTDIR_CTLAD_MOD ,ONLY : EFTDIR_CTLAD + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTLAD_MOD:EDIR_TRANS_CTLAD',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL ELTDIR_CTLAD(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL ELTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + + CALL EFTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,IF_GPB,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) +ENDIF +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTLAD_MOD:EDIR_TRANS_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTLAD +END MODULE EDIR_TRANS_CTLAD_MOD diff --git a/src/etrans/gpu/internal/edist_spec_control_mod.F90 b/src/etrans/gpu/internal/edist_spec_control_mod.F90 new file mode 100644 index 0000000..738364f --- /dev/null +++ b/src/etrans/gpu/internal/edist_spec_control_mod.F90 @@ -0,0 +1,202 @@ +MODULE EDIST_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE EDIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,KSORT) + +!**** *EDIST_SPEC_CONTROL* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. Hamdi Feb. 2006 Phasing CY31 +! P. Marguinaud Oct. 2014 Add KSORT argument (change output fields order) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD +!USE TPM_GEN +!USE TPM_DIM +USE TPM_DISTR ,ONLY : NPRCIDS, NPRTRW, MTAGDISTSP, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR ,ONLY : DALD +!USE TPMALD_DIM +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN), TARGET :: KSORT (:) + +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G) +REAL(KIND=JPRB) :: ZFLD(KSPEC2_G),ZBUF(KSPEC2_G) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,JNM,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISTA,ISTP,ILENR,ISENDREQ(NPRTRW) +INTEGER(KIND=JPIM), POINTER :: ISORT (:) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Compute help array for distribution +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC_CONTROL_MOD:EDIST_SPEC_CONTROL',0,ZHOOK_HANDLE) + +IF (PRESENT (KSORT)) THEN + ISORT => KSORT +ELSE + ALLOCATE (ISORT (KFDISTG)) + DO JFLD = 1, KFDISTG + ISORT (JFLD) = JFLD + ENDDO +ENDIF + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFDISTG + PSPEC(ISORT (JFLD),JM) = PSPECG(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFDISTG + DO JM=1,KSPEC2_G + PSPEC(JM,ISORT (JFLD)) = PSPECG(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KSMAX + DO JN=0,DALD%NCPL2M(JM)/2-1 + IDIST(II+1)= KDIM0G(JM)+4*JN + IDIST(II+2)= KDIM0G(JM)+4*JN+1 + IDIST(II+3)= KDIM0G(JM)+4*JN+2 + IDIST(II+4)= KDIM0G(JM)+4*JN+3 + II = II+4 + ENDDO + ENDDO + CALL GSTATS(1804,1) +!Distribute spectral array + + IFLDR = 0 + IFLDS = 0 + + CALL GSTATS(812,0) + DO JFLD=1,KFDISTG + IBSET = KVSET(JFLD) + ITAG = MTAGDISTSP+JFLD + + ! Send + IF(KFROM(JFLD) == MYPROC) THEN + IFLDS = IFLDS+1 + IF (LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + ZBUF(IDIST(JNM)) = PSPECG(IFLDS,JNM) + ENDDO + ELSE + DO JNM=1,KSPEC2_G + ZBUF(IDIST(JNM)) = PSPECG(JNM,IFLDS) + ENDDO + ENDIF + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(ISND,0,0,JA,IBSET) + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_SEND(ZBUF(ISTA:ISTP),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA),& + & CDSTRING='EDIST_SPEC_CONTROL:') + ENDIF + ENDDO + ENDIF + + !Recieve + IF( IBSET == MYSETV )THEN + + IF( KSPEC2 > 0 )THEN + IRCV = KFROM(JFLD) + CALL MPL_RECV(ZFLD(1:KSPEC2),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + & KOUNT=ILENR,CDSTRING='EDIST_SPEC_CONTROL:') + IF( ILENR /= KSPEC2 )THEN + CALL ABORT_TRANS(' EDIST_SPEC_CONTROL: INVALID RECEIVE MESSAGE LENGTH') + ENDIF + IFLDR = IFLDR+1 + IF (LDIM1_IS_FLD) THEN + PSPEC(ISORT (IFLDR),:) = ZFLD(1:KSPEC2) + ELSE + PSPEC(:,ISORT (IFLDR)) = ZFLD(1:KSPEC2) + ENDIF + ENDIF + ENDIF + IF (KFROM(JFLD) == MYPROC) THEN + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF (ILEN > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA),& + & CDSTRING='DIST_SPEC_CTL: WAIT') + ENDIF + ENDDO + ENDIF + ENDDO + CALL GSTATS(812,1) + !Synchronize processors +CALL GSTATS(787,0) + IF( NPROC > 1 )THEN + CALL MPL_BARRIER(CDSTRING='EDIST_SPEC_CONTROL:') + ENDIF +CALL GSTATS(787,1) +ENDIF + +IF (.NOT. PRESENT (KSORT)) THEN + DEALLOCATE (ISORT) +ENDIF + +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC_CONTROL_MOD:EDIST_SPEC_CONTROL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC_CONTROL +END MODULE EDIST_SPEC_CONTROL_MOD diff --git a/src/etrans/gpu/internal/efourier_in_mod.F90 b/src/etrans/gpu/internal/efourier_in_mod.F90 new file mode 100644 index 0000000..55a4fa2 --- /dev/null +++ b/src/etrans/gpu/internal/efourier_in_mod.F90 @@ -0,0 +1,81 @@ +MODULE EFOURIER_IN_MOD +CONTAINS +SUBROUTINE EFOURIER_IN(PREEL,KFIELDS) + +!**** *FOURIER_IN* - Copy fourier data from buffer to local array + +! Purpose. +! -------- +! Routine for copying fourier data from buffer to local array + +!** Interface. +! ---------- +! CALL FOURIER_IN(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM +USE PARKIND_ECTRANS, ONLY : JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, & + & D_NSTAGTF, D_MSTABF, D_NSTAGT0B, D_NPNTGTB0, D_NPROCM, D_NPTRLS +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G, G_NMEN,G_NMEN_MAX +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +REAL(KIND=JPRBT), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: JM, JF, IGLG, IPROC, IR, II, ISTA +INTEGER(KIND=JPIM) :: IOFF, JGL +INTEGER(KIND=JPIM) :: NDGL_FS + +! ------------------------------------------------------------------ + +NDGL_FS=D%NDGL_FS + +!$acc data & +!$acc& copyin(D_NPTRLS,D_NSTAGTF,D_MSTABF,D_NSTAGT0B,D_NPNTGTB0,G_NMEN,G_NMEN_MAX,D_NPROCM,NDGL_FS,KFIELDS,MYSETW) & +!$acc& present(PREEL,FOUBUF) + +!$acc kernels default(none) +PREEL(:,:)=0._JPRBT +!$acc end kernels + +!$acc parallel loop collapse(3) private(IGLG,IPROC,ISTA,IOFF) default (none) +DO JGL = 1, NDGL_FS + DO JM=0,G_NMEN_MAX + DO JF=1,KFIELDS + IGLG = D_NPTRLS(MYSETW)+JGL-1 + IF ( JM .LE. G_NMEN(IGLG)) THEN + IPROC = D_NPROCM(JM) + ISTA = (D_NSTAGT0B(D_MSTABF(IPROC))+D_NPNTGTB0(JM,JGL))*2*KFIELDS + IOFF = 1+D_NSTAGTF(JGL) + PREEL(IOFF+2*JM+0,JF) = FOUBUF(ISTA+2*JF-1) + PREEL(IOFF+2*JM+1,JF) = FOUBUF(ISTA+2*JF ) + END IF + ENDDO + ENDDO +END DO + +!$acc end data + +END SUBROUTINE EFOURIER_IN +END MODULE EFOURIER_IN_MOD + diff --git a/src/etrans/gpu/internal/efourier_out_mod.F90 b/src/etrans/gpu/internal/efourier_out_mod.F90 new file mode 100644 index 0000000..1c496e6 --- /dev/null +++ b/src/etrans/gpu/internal/efourier_out_mod.F90 @@ -0,0 +1,76 @@ +MODULE EFOURIER_OUT_MOD +CONTAINS +SUBROUTINE EFOURIER_OUT(PREEL,KFIELDS) + +!**** *FOURIER_OUT* - Copy fourier data from local array to buffer + +! Purpose. +! -------- +! Routine for copying fourier data from local array to buffer + +!** Interface. +! ---------- +! CALL FOURIER_OUT(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM +USE PARKIND_ECTRANS, ONLY : JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, & + & D_NPTRLS, D_NSTAGTF, D_MSTABF, D_NSTAGT1B, & + & D_NPNTGTB0, D_NPROCM, D_NPROCL +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G, G_NMEN,G_NMEN_MAX +! + +IMPLICIT NONE + +REAL(KIND=JPRBT), INTENT(IN) :: PREEL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS + +INTEGER(KIND=JPIM) :: JGL +INTEGER(KIND=JPIM) :: JM, JF, IGLG, IPROC, IR, II, ISTA, JMMAX +INTEGER(KIND=JPIM) :: IOFF + +! ------------------------------------------------------------------ + +!$acc data & +!$acc& copy(D_NPTRLS,D_NSTAGTF,D_MSTABF,D_NSTAGT1B,D_NPNTGTB0,G_NMEN,G_NMEN_MAX,D_NPROCM) & +!$acc& present(PREEL,FOUBUF_IN) + +!$acc parallel loop collapse(3) private(IGLG,JMMAX,IPROC,ISTA,IOFF) +DO JGL = 1, D%NDGL_FS + DO JM=0,G_NMEN_MAX + DO JF=1,KFIELDS + IGLG = D_NPTRLS(MYSETW)+JGL-1 + JMMAX = G_NMEN(IGLG) + IF (JM .LE. JMMAX) THEN + IPROC = D_NPROCM(JM) + ISTA = (D_NSTAGT1B(D_MSTABF(IPROC))+D_NPNTGTB0(JM,JGL))*2*KFIELDS + IOFF = 1+D_NSTAGTF(JGL) + FOUBUF_IN(ISTA+2*JF-1) = PREEL(IOFF+2*JM+0,JF) + FOUBUF_IN(ISTA+2*JF ) = PREEL(IOFF+2*JM+1,JF) + END IF + ENDDO + ENDDO +END DO +!$acc end data + +END SUBROUTINE EFOURIER_OUT +END MODULE EFOURIER_OUT_MOD + diff --git a/src/etrans/gpu/internal/efsc_mod.F90 b/src/etrans/gpu/internal/efsc_mod.F90 new file mode 100644 index 0000000..e035a63 --- /dev/null +++ b/src/etrans/gpu/internal/efsc_mod.F90 @@ -0,0 +1,121 @@ +MODULE EFSC_MOD +CONTAINS +SUBROUTINE EFSC(KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSC - Division by a*cos(theta), east-west derivatives + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSC(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW, D_NPTRLS, D_NSTAGTF +USE TPM_GEOMETRY ,ONLY : G, G_NMEN +USE TPMALD_GEO ,ONLY : GALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(IN ) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + +INTEGER(KIND=JPIM) :: JF,IGLG,II,IR,JM,JGL +REAL(KIND=JPRB) :: ZIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',0,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!* EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN +!$acc parallel loop collapse (2) private (JF, JGL, IGLG, IMEN, ISTAGTF, JM, ZIM, IR, II) & +!$acc & present (D_NPTRLS, G_NMEN, D_NSTAGTF, PUVDERS, PUV) + DO JF=1,2*KF_UV + DO JGL = 1, D%NDGL_FS + IGLG = D_NPTRLS(MYSETW)+JGL-1 + IMEN = G_NMEN(IGLG) + ISTAGTF = D_NSTAGTF(JGL) + DO JM=0,IMEN + ZIM=REAL(JM,JPRB)*GALD%EXWN + IR = ISTAGTF+2*JM+1 + II = IR+1 + PUVDERS(IR,JF) = -PUV(II,JF)*ZIM + PUVDERS(II,JF) = PUV(IR,JF)*ZIM + ENDDO + ENDDO + ENDDO +!$acc end parallel loop +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN +!$acc parallel loop collapse (2) private (JF, JGL, IGLG, IMEN, ISTAGTF, JM, ZIM, IR, II) & +!$acc & present (D_NPTRLS, G_NMEN, D_NSTAGTF, PEWDERS, PSCALAR) + DO JF=1,KF_SCALARS + DO JGL = 1, D%NDGL_FS + IGLG = D_NPTRLS(MYSETW)+JGL-1 + IMEN = G_NMEN(IGLG) + ISTAGTF = D_NSTAGTF(JGL) + DO JM=0,IMEN + ZIM=REAL(JM,JPRB)*GALD%EXWN + IR = ISTAGTF+2*JM+1 + II = IR+1 + PEWDERS(IR,JF) = -PSCALAR(II,JF)*ZIM + PEWDERS(II,JF) = PSCALAR(IR,JF)*ZIM + ENDDO + ENDDO + ENDDO +!$acc end parallel loop +ENDIF + +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFSC +END MODULE EFSC_MOD diff --git a/src/etrans/gpu/internal/efscad_mod.F90 b/src/etrans/gpu/internal/efscad_mod.F90 new file mode 100644 index 0000000..4b335f4 --- /dev/null +++ b/src/etrans/gpu/internal/efscad_mod.F90 @@ -0,0 +1,121 @@ +MODULE EFSCAD_MOD +CONTAINS +SUBROUTINE EFSCAD(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *EFSCAD - Division by a*cos(theta), east-west derivatives - adjoint + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL EFSCAD(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G + +USE TPMALD_GEO ,ONLY : GALD + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + +INTEGER(KIND=JPIM) :: JF,IGLG,II,IR,JM + +REAL(KIND=JPRB) :: ZIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSCAD_MOD:EFSCAD',0,ZHOOK_HANDLE) +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + + ZIM=REAL(JM,JPRB)*GALD%EXWN + + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,2*KF_UV + + PUV(JF,II) = PUV(JF,II) - ZIM*PUVDERS(JF,IR) + PUV(JF,IR) = PUV(JF,IR) + ZIM*PUVDERS(JF,II) + + PUVDERS(JF,IR) = 0.0_JPRB + PUVDERS(JF,II) = 0.0_JPRB + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + + ZIM=REAL(JM,JPRB)*GALD%EXWN + + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,KF_SCALARS + + PSCALAR(JF,II) = PSCALAR(JF,II) - ZIM* PEWDERS(JF,IR) + PSCALAR(JF,IR) = PSCALAR(JF,IR) + ZIM* PEWDERS(JF,II) + + PEWDERS(JF,IR) = 0.0_JPRB + PEWDERS(JF,II) = 0.0_JPRB + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EFSCAD_MOD:EFSCAD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EFSCAD +END MODULE EFSCAD_MOD diff --git a/src/etrans/gpu/internal/eftdata_mod.F90 b/src/etrans/gpu/internal/eftdata_mod.F90 new file mode 100644 index 0000000..2e12340 --- /dev/null +++ b/src/etrans/gpu/internal/eftdata_mod.F90 @@ -0,0 +1,8 @@ +MODULE EFTDATA_MOD + +USE PARKIND1, ONLY : JPRB + +IMPLICIT NONE + +REAL(KIND=JPRB), ALLOCATABLE, SAVE, TARGET :: ZGTF_PERM (:,:) +END MODULE diff --git a/src/etrans/gpu/internal/eftdir_ctl_mod.F90 b/src/etrans/gpu/internal/eftdir_ctl_mod.F90 new file mode 100644 index 0000000..af0862e --- /dev/null +++ b/src/etrans/gpu/internal/eftdir_ctl_mod.F90 @@ -0,0 +1,229 @@ +MODULE EFTDIR_CTL_MOD +CONTAINS +SUBROUTINE EFTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,AUX_PROC) + +!**** *EFTDIR_CTL - Direct Fourier transform control + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTL(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_GPB - total global number of output gridpoint fields +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-03-13 adaptation to aladin (coupling) +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 19-11-01 : G. Radnoti bug corection by introducing cpl_int interface +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D +!USE TPM_GEN ,ONLY : LALLOPERM2 +USE EFTDATA_MOD ,ONLY : ZGTF_PERM + +USE TRGTOL_MOD ,ONLY : TRGTOL, TRGTOL_CUDAAWARE +USE EFOURIER_OUT_MOD ,ONLY : EFOURIER_OUT +USE EFTDIR_MOD ,ONLY : EFTDIR +USE EXTPER_MOD ,ONLY : EXTPER +!use cudafor + +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables +REAL(KIND=JPRB) :: ZDUM +REAL(KIND=JPRB), POINTER :: ZGTF (:,:) +INTEGER(KIND=JPIM) :: IST,INUL,JGL,IGL,IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTL_MOD:EFTDIR_CTL',0,ZHOOK_HANDLE) + +IF (ALLOCATED (ZGTF_PERM)) THEN + IF ((UBOUND (ZGTF_PERM, 1) /= D%NLENGTF) .OR. (UBOUND (ZGTF_PERM, 2) < KF_FS)) THEN + !$acc exit data delete (ZGTF_PERM) + DEALLOCATE (ZGTF_PERM) + ENDIF +ENDIF + +IF (.NOT. ALLOCATED (ZGTF_PERM)) THEN + ALLOCATE (ZGTF_PERM (D%NLENGTF,KF_FS)) + !$acc enter data create (ZGTF_PERM) +ENDIF + +ZGTF => ZGTF_PERM (:,1:KF_FS) + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +! Transposition + +CALL GSTATS(158,0) + +#ifdef USE_CUDA_AWARE_MPI_FT +CALL TRGTOL_CUDAAWARE(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED=.TRUE.) +#else +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED=.TRUE.) +!$acc update device(ZGTF) +#endif + +CALL GSTATS(158,1) +CALL GSTATS(106,0) + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + CALL ABOR1 ('EFTDIR_CTL: BIPERIODICIZATION NOT SUPPORTED') +ELSE + IF (PRESENT(AUX_PROC)) THEN + CALL ABOR1 ('EFTDIR_CTL: AUX_PROC not supported yet; check ZGTF dimensions') + CALL AUX_PROC(ZGTF,ZDUM,KF_FS,D%NLENGTF,1,D%NDGL_FS,0,.TRUE.,& + & D%NSTAGTF,INUL,INUL,INUL) + ENDIF +ENDIF + + +! Fourier transform + +IBLEN=D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN +!$acc exit data delete (FOUBUF_IN) + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +!$acc enter data create (FOUBUF_IN) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +!$acc enter data create (FOUBUF_IN) +ENDIF + +CALL GSTATS(1640,0) + +IF(KF_FS>0) THEN + CALL EFTDIR (ZGTF, KF_FS) +ENDIF + +! Save Fourier data in FOUBUF_IN + +CALL EFOURIER_OUT (ZGTF, KF_FS) + +CALL GSTATS(1640,1) +CALL GSTATS(106,1) + +!IF (.NOT. LALLOPERM2) THEN + !$acc exit data delete (ZGTF_PERM) + DEALLOCATE (ZGTF_PERM) +!ENDIF + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTL_MOD:EFTDIR_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR_CTL +END MODULE EFTDIR_CTL_MOD diff --git a/src/etrans/gpu/internal/eftdir_ctlad_mod.F90 b/src/etrans/gpu/internal/eftdir_ctlad_mod.F90 new file mode 100644 index 0000000..b4acf90 --- /dev/null +++ b/src/etrans/gpu/internal/eftdir_ctlad_mod.F90 @@ -0,0 +1,192 @@ +MODULE EFTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE EFTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTDIR_CTLAD - Direct Fourier transform control - adjoint + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL EFTDIR_CTLAD(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! EFTDIRAD - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 19-11-01 G. Radnoti bug correction by introducing CPL_INT interface +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 05-03-15 remove HLOMP +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D + +USE TRLTOG_MOD ,ONLY : TRLTOG +USE FOURIER_OUTAD_MOD ,ONLY : FOURIER_OUTAD +USE EFTDIRAD_MOD ,ONLY : EFTDIRAD +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP2(:,:,:) + +! Local variables +REAL(KIND=JPRB) :: ZGTF(KF_FS,D%NLENGTF) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: JGL,IGL,J1,J2 +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTLAD_MOD:EFTDIR_CTLAD',0,ZHOOK_HANDLE) +CALL GSTATS(133,0) +ZGTF(:,:)=0._JPRB + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IVSETSC(:) = -1 +ENDIF +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +CALL GSTATS(1642,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + CALL FOURIER_OUTAD(ZGTF,KF_FS,IGL) + +! Fourier transform + + IF(KF_FS>0) THEN + CALL EFTDIRAD(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1642,1) +CALL GSTATS(133,1) + +! Transposition + +CALL GSTATS(183,0) +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +CALL GSTATS(183,1) +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTLAD_MOD:EFTDIR_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR_CTLAD +END MODULE EFTDIR_CTLAD_MOD diff --git a/src/etrans/gpu/internal/eftdir_mod.F90 b/src/etrans/gpu/internal/eftdir_mod.F90 new file mode 100644 index 0000000..125ff8a --- /dev/null +++ b/src/etrans/gpu/internal/eftdir_mod.F90 @@ -0,0 +1,95 @@ +MODULE EFTDIR_MOD +CONTAINS +SUBROUTINE EFTDIR(PREEL, KFIELDS) + + +!**** *FTDIR - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPIB, JPRB +USE PARKIND_ECTRANS, ONLY : JPRBT + +USE TPM_DISTR ,ONLY : D +USE TPM_DIM ,ONLY : R +#ifdef HAVE_CUFFT +USE TPM_FFTC ,ONLY : CREATE_PLAN_FFT +USE CUDA_DEVICE_MOD +use cudafor +#endif + +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE ISO_C_BINDING + +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IRLEN,ICLEN +!INTEGER(KIND=JPIM) :: IPLAN_R2C +TYPE(C_PTR) :: IPLAN_R2C +REAL(KIND=JPRBT) :: ZSCAL +!REAL(KIND=JPRBT), ALLOCATABLE :: ZGTF2(:,:), ZGTF3(:,:) +!INTEGER(KIND=JPIM) :: IDIM1, IDIM2, LOT,NX + +integer :: istat + +! ------------------------------------------------------------------ + +IRLEN=R%NDLON+R%NNOEXTZG +ICLEN=D%NLENGTF/D%NDGL_FS + +CALL CREATE_PLAN_FFT(IPLAN_R2C,-1,IRLEN,KFIELDS*D%NDGL_FS,LDNONSTRIDED=.TRUE.) + +CALL EXECUTE_PLAN_FFT(-1,IRLEN,PREEL(1,1),PREEL(1,1),IPLAN_R2C) + +#ifdef HAVE_CUFFT +CALL CREATE_PLAN_FFT (IPLAN_R2C, -1, KN=IRLEN, KLOT=KFIELDS*D%NDGL_FS, & + & KISTRIDE=1, KIDIST=ICLEN, KOSTRIDE=1, KODIST=ICLEN/2) +!$acc host_data use_device(PREEL) +CALL EXECUTE_PLAN_FFTC_INPLACE (IPLAN_R2C, -1, PREEL (1, 1)) +!$acc end host_data +#endif + +ZSCAL = 1._JPRB / REAL (R%NDLON, JPRB) + +!$acc kernels present (PREEL) copyin (ZSCAL) +PREEL = ZSCAL * PREEL +!$acc end kernels + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR +END MODULE EFTDIR_MOD diff --git a/src/etrans/gpu/internal/eftdirad_mod.F90 b/src/etrans/gpu/internal/eftdirad_mod.F90 new file mode 100644 index 0000000..45c7eaf --- /dev/null +++ b/src/etrans/gpu/internal/eftdirad_mod.F90 @@ -0,0 +1,120 @@ +MODULE EFTDIRAD_MOD +CONTAINS +SUBROUTINE EFTDIRAD(PREEL,KFIELDS,KGL) + +!**** *EFTDIRAD - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform - adjoint +! -------- + +!** Interface. +! ---------- +! CALL EFTDIRAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_TRANS +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T !, TB +!USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE TPM_DIM ,ONLY : R +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTDIRAD_MOD:EFTDIRAD',0,ZHOOK_HANDLE) + +ITYPE = 1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG) +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO +DO JJ=1,1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = 2.0_JPRB * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +#ifdef WITH_FFTW +IF( .NOT. TW%LFFTW )THEN +#endif + + !IF( T%LUSEFFT992(KGL) )THEN + + ! CALL FFT992(PREEL(1,IOFF),T%TRIGS(1,KGL),& + ! &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + + !ELSE + + !CALL BLUESTEIN_FFT(TB,ILOEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + + !ENDIF + +#ifdef WITH_FFTW +ELSE + + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + +ENDIF +#endif + + + ! Change of metric (not in forward routine) +ZNORM=1.0_JPRB/(2.0_JPRB*REAL(ILOEN,JPRB)) +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = ZNORM * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EFTDIRAD_MOD:EFTDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIRAD +END MODULE EFTDIRAD_MOD diff --git a/src/etrans/gpu/internal/eftinv_ctl_mod.F90 b/src/etrans/gpu/internal/eftinv_ctl_mod.F90 new file mode 100644 index 0000000..0d2ca7b --- /dev/null +++ b/src/etrans/gpu/internal/eftinv_ctl_mod.F90 @@ -0,0 +1,305 @@ +MODULE EFTINV_CTL_MOD +CONTAINS +SUBROUTINE EFTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTINV_CTL - Inverse Fourier transform control + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINV_CTL(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Hello : 03-10-14 old way of calling +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NERR, LALLOPERM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, FOUBUF +USE TPM_DISTR ,ONLY : D +!USE TPM_GEN ,ONLY : LALLOPERM2 +USE EFTDATA_MOD ,ONLY : ZGTF_PERM + + +USE EFOURIER_IN_MOD ,ONLY : EFOURIER_IN +USE EFSC_MOD ,ONLY : EFSC +USE EFTINV_MOD ,ONLY : EFTINV +USE TRLTOG_MOD ,ONLY : TRLTOG, TRLTOG_CUDAAWARE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) + +REAL(KIND=JPRB),TARGET :: ZDUM(D%NLENGTF,1) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) +REAL(KIND=JPRB), POINTER :: ZGTF (:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +INTEGER(KIND=JPIM) :: JF_FS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE_BAR + + +! ------------------------------------------------------------------ + +! 1. Copy Fourier data to local array + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD:EFTINV_CTL',0,ZHOOK_HANDLE) + + +IF (ALLOCATED (ZGTF_PERM)) THEN + IF ((UBOUND (ZGTF_PERM, 1) /= D%NLENGTF) .OR. (UBOUND (ZGTF_PERM, 2) < KF_FS)) THEN + !$acc exit data delete (ZGTF_PERM) + DEALLOCATE (ZGTF_PERM) + ENDIF +ENDIF + +IF (.NOT. ALLOCATED (ZGTF_PERM)) THEN + ALLOCATE (ZGTF_PERM (D%NLENGTF,KF_FS)) + !$acc enter data create (ZGTF_PERM) +ENDIF + +ZGTF => ZGTF_PERM (:,1:KF_FS) + +CALL GSTATS(107,0) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + ZUV => ZGTF(:,IST:IST+2*KF_UV-1) + IST = IST+2*KF_UV + ZSCALAR => ZGTF(:,IST:IST+KF_SCALARS-1) + IST = IST+KF_SCALARS + ZNSDERS => ZGTF(:,IST:IST+KF_SCDERS-1) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(:,IST:IST+2*KF_UV-1) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(:,1:1) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(:,IST:IST+KF_SCDERS-1) + ELSE + ZEWDERS => ZDUM(:,1:1) + ENDIF +ENDIF + +CALL GSTATS(1639,0) + +CALL EFOURIER_IN(ZGTF,KF_OUT_LT) ! COPIES DATA FROM FOUBUF + +IF (.NOT.LALLOPERM) THEN +!$acc exit data delete (FOUBUF) + DEALLOCATE (FOUBUF) +ENDIF + +! 2. Fourier space computations + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSC(KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) +ENDIF + +! 3. Fourier transform +IF(KF_FS > 0) THEN + CALL EFTINV (ZGTF, KF_FS) +ENDIF + +CALL GSTATS(1639,1) + +!THOMAS +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD_BAR',0,ZHOOK_HANDLE_BAR) + + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF +CALL GSTATS(107,1) + +!THOMAS +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD_BAR',1,ZHOOK_HANDLE_BAR) + + +! 4. Transposition + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSEIF(PRESENT(KVSETSC2).OR.PRESENT(KVSETSC3A)& + & .OR.PRESENT(KVSETSC3B)) THEN + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTL_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(157,0) +JF_FS=KF_FS-D%IADJUST_I + +#ifdef USE_CUDA_AWARE_MPI_FT +CALL TRLTOG_CUDAAWARE(ZGTF,JF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED=.TRUE.) +#else + +!$acc update host (ZGTF) +CALL TRLTOG(ZGTF,JF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED=.TRUE.) + +#endif + +CALL GSTATS(157,1) + +!IF (.NOT. LALLOPERM2) THEN + !$acc exit data delete (ZGTF_PERM) + DEALLOCATE (ZGTF_PERM) +!ENDIF + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD:EFTINV_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV_CTL +END MODULE EFTINV_CTL_MOD diff --git a/src/etrans/gpu/internal/eftinv_ctlad_mod.F90 b/src/etrans/gpu/internal/eftinv_ctlad_mod.F90 new file mode 100644 index 0000000..1ac50e1 --- /dev/null +++ b/src/etrans/gpu/internal/eftinv_ctlad_mod.F90 @@ -0,0 +1,284 @@ +MODULE EFTINV_CTLAD_MOD +CONTAINS +SUBROUTINE EFTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTINV_CTLAD - Inverse Fourier transform control - adjoint + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINV_CTLAD(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D + +USE FOURIER_INAD_MOD ,ONLY : FOURIER_INAD +USE EFSCAD_MOD ,ONLY : EFSCAD +USE EFTINVAD_MOD ,ONLY : EFTINVAD +USE TRGTOL_MOD ,ONLY : TRGTOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE EXTPER_MOD ,ONLY : EXTPER +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRB),TARGET :: ZGTF(KF_FS,D%NLENGTF) +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST, IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! 4. Transposition + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTLAD_MOD:EFTINV_CTLAD',0,ZHOOK_HANDLE) + +ZGTF(:,:)=0._JPRB + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF + +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV_CTLAD:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTLAD_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(182,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(182,1) + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + CALL EXTPER(ZGTF,R%NDLON+R%NNOEXTZL,1,R%NDLON,KF_FS,D%NDGL_FS,D%NSTAGTF,0) +ENDIF + + +! 3. Fourier transform + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +CALL GSTATS(132,0) + +CALL GSTATS(1641,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + IF(KF_FS > 0) THEN + CALL EFTINVAD(ZGTF,KF_FS,IGL) + ENDIF + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSCAD(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 1. Copy Fourier data to local array + + CALL FOURIER_INAD(ZGTF,KF_OUT_LT,IGL) + +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1641,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF + +CALL GSTATS(132,1) +IF (LHOOK) CALL DR_HOOK('EFTINV_CTLAD_MOD:EFTINV_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV_CTLAD +END MODULE EFTINV_CTLAD_MOD diff --git a/src/etrans/gpu/internal/eftinv_mod.F90 b/src/etrans/gpu/internal/eftinv_mod.F90 new file mode 100644 index 0000000..dc3cc50 --- /dev/null +++ b/src/etrans/gpu/internal/eftinv_mod.F90 @@ -0,0 +1,119 @@ +MODULE EFTINV_MOD +CONTAINS +SUBROUTINE EFTINV(PREEL,KFIELDS) + +!**** *FTINV - Inverse Fourier transform + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 : 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE PARKIND_ECTRANS, ONLY : JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC +USE TPM_GEOMETRY ,ONLY : G +use tpm_gen, only: nout +USE TPM_FFT ,ONLY : T !, TB +!USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +#ifdef HAVE_CUFFT +USE TPM_FFTC ,ONLY : CREATE_PLAN_FFT, destroy_plan_fft +USE CUDA_DEVICE_MOD +#endif +USE TPM_DIM ,ONLY : R +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE ISO_C_BINDING + +IMPLICIT NONE + +INTEGER (KIND=JPIM), INTENT(IN) :: KFIELDS +REAL (KIND=JPRBT), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN +!INTEGER(KIND=JPIM) :: IPLAN_C2R +TYPE(C_PTR) :: IPLAN_C2R +integer :: istat +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTINV_MOD:EFTINV',0,ZHOOK_HANDLE) + +IRLEN=R%NDLON+R%NNOEXTZG +ICLEN=D%NLENGTF/D%NDGL_FS + +CALL CREATE_PLAN_FFT(IPLAN_C2R,1,IRLEN,KFIELDS*D%NDGL_FS,LDNONSTRIDED=.TRUE.) + + + +#ifdef gnarls +!$acc data present(PREEL) +!$acc update host(PREEL) +!$acc end data +write (*,*) __FILE__, __LINE__ +write (20,*) 'shape(PREEL) = ',shape(PREEL) +write (20,*) 'FFTH INPUT:' +write (20,'(6E18.8)') PREEL +call flush(20) +#endif + + +CALL EXECUTE_PLAN_FFT(1,IRLEN,PREEL(1,1),PREEL(1,1),IPLAN_C2R) + +#ifdef gnarls +!$acc data present(PREEL) +!$acc update host(PREEL) +!$acc end data +! write (*,*) __FILE__, __LINE__ +write (20,*) 'FFTH OUTPUT:' +write (20,'(6E18.8)') PREEL +call flush(620) +#endif + + + + +#ifdef HAVE_CUFFT +CALL CREATE_PLAN_FFT (IPLAN_C2R, +1, KN=IRLEN, KLOT=KFIELDS*D%NDGL_FS, & + & KISTRIDE=1, KIDIST=ICLEN/2, KOSTRIDE=1, KODIST=ICLEN) +!$acc host_data use_device(PREEL) +CALL EXECUTE_PLAN_FFTC_INPLACE (IPLAN_C2R, +1, PREEL (1, 1)) +!$acc end host_data +istat = cuda_Synchronize() +#endif + +IF (LHOOK) CALL DR_HOOK('EFTINV_MOD:EFTINV',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV +END MODULE EFTINV_MOD diff --git a/src/etrans/gpu/internal/eftinvad_mod.F90 b/src/etrans/gpu/internal/eftinvad_mod.F90 new file mode 100644 index 0000000..93581c3 --- /dev/null +++ b/src/etrans/gpu/internal/eftinvad_mod.F90 @@ -0,0 +1,129 @@ +MODULE EFTINVAD_MOD +CONTAINS +SUBROUTINE EFTINVAD(PREEL,KFIELDS,KGL) + +!**** *EFTINVAD - Inverse Fourier transform - adjoint + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINVAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T !, TB +!USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTINVAD_MOD:EFTINVAD',0,ZHOOK_HANDLE) + +ITYPE =-1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 + +! ! Change of metric (not in forward routine) + +#ifdef WITH_FFTW +IF( .NOT. TW%LFFTW )THEN +#endif + + !IF( T%LUSEFFT992(KGL) )THEN + + ! CALL FFT992(PREEL(1,IOFF),T%TRIGS(1,KGL),& + ! &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + + !ELSE + + !CALL BLUESTEIN_FFT(TB,ILOEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + ! DO JJ=1,ICLEN + ! DO JF=1,KFIELDS + ! PREEL(JF,IOFF-1+JJ)=PREEL(JF,IOFF-1+JJ)/REAL(ILOEN,JPRB) + ! ENDDO + ! ENDDO + + !ENDIF + +#ifdef WITH_FFTW +ELSE + + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + +ENDIF +#endif + +ZNORM=2.0_JPRB*REAL(ILOEN,JPRB) +DO JJ=1,1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = (ZNORM/2.0_JPRB) * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +DO JJ=3,ILOEN+1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = ZNORM * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EFTINVAD_MOD:EFTINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINVAD +END MODULE EFTINVAD_MOD diff --git a/src/etrans/gpu/internal/egath_spec_control_mod.F90 b/src/etrans/gpu/internal/egath_spec_control_mod.F90 new file mode 100644 index 0000000..f9ec729 --- /dev/null +++ b/src/etrans/gpu/internal/egath_spec_control_mod.F90 @@ -0,0 +1,205 @@ +MODULE EGATH_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE EGATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& + & KMSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,KCPL2M,LDZA0IP) + +!**** *EGATH_SPEC_CONTROL* - Gather global spectral array from processors + +! Purpose. +! -------- +! Routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be distributed +! KTO(:) - Processor responsible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero unused spectral coefficients + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD, JP_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYPROC, NPROC + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SET2PE_MOD ,ONLY : SET2PE +USE SUWAVEDI_MOD + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KCPL2M(0:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG) +REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISP,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),IPOS0,JNM +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS + +! ------------------------------------------------------------------ + + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFGATHG + PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFGATHG + DO JM=1,KSPEC2_G + PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + IF(IMYFIELDS>0) THEN + ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KMSMAX + DO JN=0,KCPL2M(JM)/2-1 + IDIST(II+1) = KDIM0G(JM)+4*JN + IDIST(II+2) = KDIM0G(JM)+4*JN+1 + IDIST(II+3) = KDIM0G(JM)+4*JN+2 + IDIST(II+4) = KDIM0G(JM)+4*JN+3 + II = II+4 + ENDDO + ENDDO + CALL GSTATS(1804,1) + ENDIF + + CALL GSTATS_BARRIER(788) + + !Send + CALL GSTATS(810,0) + IFLDS = 0 + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + + IFLDS = IFLDS+1 + ISND = KTO(JFLD) + ITAG = MTAGDISTSP+JFLD+17 + IF(LDIM1_IS_FLD) THEN + ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) + CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='EGATH_SPEC_CONTROL') + ELSE + CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='EGATH_SPEC_CONTROL') + ENDIF + ENDIF + ENDDO + ENDIF + + ! Recieve + IFLDR = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IBSET = KVSET(JFLD) + IFLDR = IFLDR+1 + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(IRCV,0,0,JA,IBSET) + ITAG = MTAGDISTSP+JFLD+17 + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + &CDSTRING='EGATH_SPEC_CONTROL') + IF( ILENR /= ILEN )THEN + WRITE(0,'("EGATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& + &JFLD,JA,ILEN,ILENR + CALL ABORT_TRANS('EGATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Check for completion of sends + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JFLD), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ENDIF + ENDDO + ENDIF + CALL GSTATS(810,1) + CALL GSTATS_BARRIER2(788) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN,ISP) + DO JFLD=1,IMYFIELDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ELSE + DO JNM=1,KSPEC2_G + PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) + + !Synchronize processors + CALL GSTATS(785,0) + CALL MPL_BARRIER(CDSTRING='EGATH_SPEC_CONTROL:') + CALL GSTATS(785,1) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC_CONTROL +END MODULE EGATH_SPEC_CONTROL_MOD + + diff --git a/src/etrans/gpu/internal/einv_trans_ctl_mod.F90 b/src/etrans/gpu/internal/einv_trans_ctl_mod.F90 new file mode 100644 index 0000000..8271e9f --- /dev/null +++ b/src/etrans/gpu/internal/einv_trans_ctl_mod.F90 @@ -0,0 +1,303 @@ +MODULE EINV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PSPMEANU,PSPMEANV) + + +!**** *EINV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTINV_CTL_MOD ,ONLY : ELTINV_CTL +USE EFTINV_CTL_MOD ,ONLY : EFTINV_CTL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G + +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL ELTINV_CTL(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV,FSPGL_PROC=FSPGL_PROC) + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + + ELSEIF(IF_UV_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),KPTRGP=IPTRGP,& + & PGP=PGP) + + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + + CALL ELTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV,FSPGL_PROC=FSPGL_PROC) + + CALL EFTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTL +END MODULE EINV_TRANS_CTL_MOD diff --git a/src/etrans/gpu/internal/einv_trans_ctlad_mod.F90 b/src/etrans/gpu/internal/einv_trans_ctlad_mod.F90 new file mode 100644 index 0000000..aa00708 --- /dev/null +++ b/src/etrans/gpu/internal/einv_trans_ctlad_mod.F90 @@ -0,0 +1,292 @@ +MODULE EINV_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +!**** *EINV_TRANS_CTLAD* - Control routine for inverse spectral transform adj. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTLAD - control of Legendre transform +! FTINV_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTINV_CTLAD_MOD ,ONLY : ELTINV_CTLAD +USE EFTINV_CTLAD_MOD ,ONLY : EFTINV_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments +! +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTLAD_MOD:EINV_TRANS_CTLAD',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + CALL ELTINV_CTLAD(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL EFTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL ELTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV ) +ENDIF +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTLAD_MOD:EINV_TRANS_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTLAD +END MODULE EINV_TRANS_CTLAD_MOD diff --git a/src/etrans/gpu/internal/eledir_mod.F90 b/src/etrans/gpu/internal/eledir_mod.F90 new file mode 100644 index 0000000..133ae9c --- /dev/null +++ b/src/etrans/gpu/internal/eledir_mod.F90 @@ -0,0 +1,111 @@ +MODULE ELEDIR_MOD +CONTAINS +SUBROUTINE ELEDIR(KFC,KLED2,PFFT) + +!**** *ELEDIR* - Direct meridional transform. + +! Purpose. +! -------- +! Direct meridional tranform of state variables. + +!** Interface. +! ---------- +! CALL ELEDIR(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Reference. +! ---------- + +! Author. +! ------- + +! Modifications. +! -------------- +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, D_NUMP +USE TPM_DIM ,ONLY : R +USE TPMALD_FFT ,ONLY : TALD +USE TPMALD_DIM ,ONLY : RALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +#ifdef HAVE_CUFFT +USE TPM_FFTC ,ONLY : CREATE_PLAN_FFT +USE CUDA_DEVICE_MOD +#endif +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE ISO_C_BINDING + +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFC,KLED2 +REAL(KIND=JPRB) , INTENT(INOUT) :: PFFT(:,:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, JLOT +!INTEGER(KIND=JPIM) :: IPLAN_R2C +TYPE(C_PTR) :: IPLAN_R2C +INTEGER(KIND=JPIM) :: JM, JF, JJ +REAL (KIND=JPRB) :: ZSCAL + +integer :: istat + +! ------------------------------------------------------------------ + +!* 1. PERFORM FOURIER TRANFORM. +! -------------------------- + +IRLEN=R%NDGL+R%NNOEXTZG +ICLEN=RALD%NDGLSUR+R%NNOEXTZG +JLOT=UBOUND(PFFT,2)*UBOUND (PFFT,3) + + + + +CALL CREATE_PLAN_FFT(IPLAN_R2C,-1,IRLEN,JLOT,LDNONSTRIDED=.TRUE.) + +CALL EXECUTE_PLAN_FFT(-1,IRLEN,PFFT(1,1,1),PFFT(1,1,1),IPLAN_R2C) + +#ifdef HAVE_CUFFT +CALL CREATE_PLAN_FFT (IPLAN_R2C, -1, KN=IRLEN, KLOT=UBOUND (PFFT,2)*UBOUND (PFFT, 3), & + & KISTRIDE=1, KIDIST=ICLEN, KOSTRIDE=1, KODIST=ICLEN/2) +!$acc host_data use_device (PFFT) +CALL EXECUTE_PLAN_FFTC_INPLACE(IPLAN_R2C, -1, PFFT (1, 1, 1)) +!$acc end host_data +istat = cuda_Synchronize() +#endif + +ZSCAL = 1._JPRB / REAL (IRLEN, JPRB) + +!$acc parallel loop collapse (3) copyin (D_NUMP, KFC, ICLEN, ZSCAL) present (PFFT) +DO JF = 1, KFC + DO JM = 1, D_NUMP + DO JJ = 1, ICLEN + PFFT (JJ, JM, JF) = PFFT (JJ, JM, JF) * ZSCAL + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEDIR +END MODULE ELEDIR_MOD diff --git a/src/etrans/gpu/internal/eledirad_mod.F90 b/src/etrans/gpu/internal/eledirad_mod.F90 new file mode 100644 index 0000000..60dba52 --- /dev/null +++ b/src/etrans/gpu/internal/eledirad_mod.F90 @@ -0,0 +1,99 @@ +MODULE ELEDIRAD_MOD +CONTAINS +SUBROUTINE ELEDIRAD(KM,KFC,KLED2,PFFT) + +!**** *ELEDIRAD* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL ELEDIRAD(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS + +USE TPMALD_FFT ,ONLY : TALD +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 + +REAL(KIND=JPRB), INTENT(INOUT) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: JF, JJ +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELEDIRAD_MOD:ELEDIRAD',0,ZHOOK_HANDLE) +DO JJ=1,1 + DO JF=1,KFC + PFFT(JJ,JF) = 2.0_JPRB * PFFT(JJ,JF) + ENDDO +ENDDO + +CALL FFT992(PFFT,TALD%TRIGSE,TALD%NFAXE,1,RALD%NDGLSUR+R%NNOEXTZG,R%NDGL+R%NNOEXTZG,KFC,1) + +ZNORM=1.0_JPRB/(2.0_JPRB*REAL(R%NDGL+R%NNOEXTZG,JPRB)) +DO JJ=1,R%NDGL+R%NNOEXTZG + DO JF=1,KFC + PFFT(JJ,JF) = ZNORM * PFFT(JJ,JF) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ELEDIRAD_MOD:ELEDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEDIRAD +END MODULE ELEDIRAD_MOD diff --git a/src/etrans/gpu/internal/eleinv_mod.F90 b/src/etrans/gpu/internal/eleinv_mod.F90 new file mode 100644 index 0000000..ae314c9 --- /dev/null +++ b/src/etrans/gpu/internal/eleinv_mod.F90 @@ -0,0 +1,141 @@ +MODULE ELEINV_MOD +CONTAINS +SUBROUTINE ELEINV(KFC,KF_OUT_LT,PFFT) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, D_NUMP +USE TPM_DIM ,ONLY : R +#ifdef HAVE_CUFFT +USE TPM_FFTC ,ONLY : CREATE_PLAN_FFT +USE CUDA_DEVICE_MOD +#endif +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FFT ,ONLY : TALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE ISO_C_BINDING + +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(INOUT) :: PFFT(:,:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, JLOT +!INTEGER(KIND=JPIM) :: IPLAN_C2R +TYPE(C_PTR) :: IPLAN_C2R +REAL (KIND=JPRB) :: ZSCAL + +integer :: istat + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',0,ZHOOK_HANDLE) + +IRLEN=R%NDGL+R%NNOEXTZG +ICLEN=RALD%NDGLSUR+R%NNOEXTZG +JLOT=UBOUND(PFFT,2)*UBOUND (PFFT,3) + +IF (JLOT==0) THEN + write (*,*) __FILE__, __LINE__ + write (*,*) 'N = ',IRLEN + write (*,*) 'lot = ',JLOT + IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + RETURN +ENDIF + +CALL CREATE_PLAN_FFT(IPLAN_C2R,1,IRLEN,JLOT,LDNONSTRIDED=.TRUE.) + +! !$acc data present(PFFT) +! !$acc update host(PFFT) +! !$acc end data +! write (*,*) __FILE__, __LINE__ +! write (*,*) 'N = ',IRLEN +! write (*,*) 'lot = ',JLOT +! write (*,*) 'shape(PFFT) = ',shape(PFFT) +! write (*,*) 'FFTH INPUT:' +! write (*,*) PFFT +! call flush(6) + +CALL EXECUTE_PLAN_FFT(1,IRLEN,PFFT(1,1,1),PFFT(1,1,1),IPLAN_C2R) + +! !$acc data present(PFFT) +! !$acc update host(PFFT) +! !$acc end data +! write (*,*) __FILE__, __LINE__ +! write (*,*) 'FFTH OUTPUT:' +! write (*,*) PFFT +! call flush(6) + + +#ifdef HAVE_CUFFT +CALL CREATE_PLAN_FFT (IPLAN_C2R, +1, KN=IRLEN, KLOT=UBOUND (PFFT,2)*UBOUND (PFFT, 3), & + & KISTRIDE=1, KIDIST=ICLEN/2, KOSTRIDE=1, KODIST=ICLEN) + +!$acc host_data use_device (PFFT) +CALL EXECUTE_PLAN_FFTC_INPLACE(IPLAN_C2R, +1, PFFT (1, 1, 1)) +!$acc end host_data +istat = cuda_Synchronize() +#endif + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + +END SUBROUTINE ELEINV +END MODULE ELEINV_MOD diff --git a/src/etrans/gpu/internal/eleinvad_mod.F90 b/src/etrans/gpu/internal/eleinvad_mod.F90 new file mode 100644 index 0000000..635f711 --- /dev/null +++ b/src/etrans/gpu/internal/eleinvad_mod.F90 @@ -0,0 +1,95 @@ +MODULE ELEINVAD_MOD +CONTAINS +SUBROUTINE ELEINVAD(KM,KFC,KF_OUT_LT,PIA) + +!**** *ELEINVAD* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL ELEINVAD(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINVAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS + +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FFT ,ONLY : TALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: JJ, JF +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELEINVAD_MOD:ELEINVAD',0,ZHOOK_HANDLE) +CALL FFT992(PIA,TALD%TRIGSE,TALD%NFAXE,1,RALD%NDGLSUR+R%NNOEXTZG,R%NDGL+R%NNOEXTZG,KFC,-1) + +ZNORM=2.0_JPRB*REAL(R%NDGL+R%NNOEXTZG,JPRB) +DO JJ=1,1 + DO JF=1,KFC + PIA(JJ,JF) = (ZNORM/2.0_JPRB) * PIA(JJ,JF) + ENDDO +ENDDO + +DO JJ=3,R%NDGL+R%NNOEXTZG+1 + DO JF=1,KFC + PIA(JJ,JF) = ZNORM * PIA(JJ,JF) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ELEINVAD_MOD:ELEINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEINVAD +END MODULE ELEINVAD_MOD diff --git a/src/etrans/gpu/internal/eltdata_mod.F90 b/src/etrans/gpu/internal/eltdata_mod.F90 new file mode 100644 index 0000000..e30e368 --- /dev/null +++ b/src/etrans/gpu/internal/eltdata_mod.F90 @@ -0,0 +1,10 @@ +MODULE ELTDATA_MOD + +USE PARKIND1, ONLY : JPRB + +IMPLICIT NONE + +REAL(KIND=JPRB), ALLOCATABLE, SAVE, TARGET :: ZVODI_PERM (:,:,:) +REAL(KIND=JPRB), ALLOCATABLE, SAVE, TARGET :: ZFFT_PERM (:,:,:) +END MODULE + diff --git a/src/etrans/gpu/internal/eltdir_ctl_mod.F90 b/src/etrans/gpu/internal/eltdir_ctl_mod.F90 new file mode 100644 index 0000000..bcb7691 --- /dev/null +++ b/src/etrans/gpu/internal/eltdir_ctl_mod.F90 @@ -0,0 +1,128 @@ +MODULE ELTDIR_CTL_MOD +CONTAINS +SUBROUTINE ELTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV,AUX_PROC) + +!**** *ELTDIR_CTL* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL ELTDIR_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_FS - number of fields in Fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! KFLDPTRUV(:) - field pointer for vorticity and divergence (input) +! KFLDPTRSC(:) - field pointer for scalarvalued fields (input) +! PSPMEANU(:),PSPMEANV(:) - mean winds +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTDIR_MOD ,ONLY : ELTDIR +USE TRLTOM_MOD ,ONLY : TRLTOM, TRLTOM_CUDAAWARE +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2,INUL +REAL(KIND=JPRB) :: ZDUM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTL_MOD:ELTDIR_CTL',0,ZHOOK_HANDLE) + +IBLEN = D%NLENGT0B*2*KF_FS + +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN +!$acc exit data delete (FOUBUF) + DEALLOCATE(FOUBUF) + ALLOCATE (FOUBUF (MAX (1,IBLEN))) +!$acc enter data create (FOUBUF) + ENDIF +ELSE + ALLOCATE (FOUBUF (MAX (1,IBLEN))) +!$acc enter data create (FOUBUF) +ENDIF + +CALL GSTATS(153,0) +! daand: GPU-aware MPI doesn't work here on Lumi +#ifdef USE_CUDA_AWARE_MPI_FT +CALL TRLTOM_CUDAAWARE(FOUBUF_IN,FOUBUF,2*KF_FS) +#else +!$acc update host(FOUBUF_IN) +CALL TRLTOM(FOUBUF_IN,FOUBUF,2*KF_FS) +!$acc update device(FOUBUF) +#endif +CALL GSTATS(153,1) + +IF (.NOT.LALLOPERM) THEN +!$acc exit data delete (FOUBUF_IN) + DEALLOCATE (FOUBUF_IN) +ENDIF + +! Periodization of auxiliary fields in y direction + +IF (PRESENT(AUX_PROC)) THEN + CALL AUX_PROC(ZDUM,FOUBUF,2*KF_FS,1,IBLEN,0,D%NUMP,.FALSE.,& + & INUL,D%NPROCL,D%NSTAGT0B,D%NPNTGTB1) +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1645,0) +IF(KF_FS>0) THEN + CALL ELTDIR(KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) +ENDIF +CALL GSTATS(1645,1) + +IF (.NOT.LALLOPERM) THEN +!$acc exit data delete (FOUBUF) + DEALLOCATE (FOUBUF) +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTL_MOD:ELTDIR_CTL',1,ZHOOK_HANDLE) + +! ----------------------------------------------------------------- + +END SUBROUTINE ELTDIR_CTL +END MODULE ELTDIR_CTL_MOD + + diff --git a/src/etrans/gpu/internal/eltdir_ctlad_mod.F90 b/src/etrans/gpu/internal/eltdir_ctlad_mod.F90 new file mode 100644 index 0000000..fa69f05 --- /dev/null +++ b/src/etrans/gpu/internal/eltdir_ctlad_mod.F90 @@ -0,0 +1,104 @@ +MODULE ELTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE ELTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +!**** *ELTDIR_CTLAD* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL LTDIR_CTLAD(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTDIRAD_MOD ,ONLY : ELTDIRAD +USE TRMTOL_MOD ,ONLY : TRMTOL + + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTLAD_MOD:ELTDIR_CTLAD',0,ZHOOK_HANDLE) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1646,0) +IF(KF_FS > 0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTDIRAD(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC, PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1646,1) + +CALL GSTATS(181,0) +CALL TRMTOL(FOUBUF,FOUBUF_IN,2*KF_FS) +CALL GSTATS(181,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTLAD_MOD:ELTDIR_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR_CTLAD +END MODULE ELTDIR_CTLAD_MOD diff --git a/src/etrans/gpu/internal/eltdir_mod.F90 b/src/etrans/gpu/internal/eltdir_mod.F90 new file mode 100644 index 0000000..7dceda2 --- /dev/null +++ b/src/etrans/gpu/internal/eltdir_mod.F90 @@ -0,0 +1,225 @@ +MODULE ELTDIR_MOD +CONTAINS +SUBROUTINE ELTDIR(KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD +!USE TPM_GEN ,ONLY : LALLOPERM2 +USE ELTDATA_MOD ,ONLY : ZFFT_PERM, ZVODI_PERM + + + +USE EPRFI2_MOD ,ONLY : EPRFI2 +USE ELEDIR_MOD ,ONLY : ELEDIR +USE EUVTVD_MOD +USE EUPDSP_MOD ,ONLY : EUPDSP +USE EUVTVD_COMM_MOD +USE EXTPER_MOD ,ONLY : EXTPER + +USE TPM_DISTR ,ONLY : D_NUMP +USE TPM_DIM ,ONLY : R_NDGL + +! +!**** *ELTDIR* - Control of Direct Legendre transform step + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIR(...)* + +! Explicit arguments : +! -------------------- IM - zonal wavenumber +! JM - local zonal wavenumber + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2 - prepares the Fourier work arrays for model variables +! ELEDIR - direct Legendre transform +! EUVTVD - +! EUPDSP - updating of spectral arrays (fields) +! EUVTVD_COMM - +! EXTPER - + + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-03-14 G. Radnoti aladin version +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPMEANV(:) + +REAL(KIND=JPRB), POINTER :: ZFFT(:,:,:) +INTEGER(KIND=JPIM) :: IINDEX(2*KF_FS), JF, JDIM +INTEGER(KIND=JPIM) :: IM +INTEGER(KIND=JPIM) :: JM +INTEGER(KIND=JPIM) :: IUS,IUE,IVS,IVE,IVORS,IVORE,IDIVS,IDIVE,IFC +REAL(KIND=JPRB), POINTER :: ZVODI(:,:,:) +INTEGER(KIND=JPIM) :: JGL, IJR, IJI + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',0,ZHOOK_HANDLE) + + +!* 1. PREPARE WORK ARRAYS. +! -------------------- + +IF (ALLOCATED (ZFFT_PERM)) THEN + IF ((UBOUND (ZFFT_PERM, 1) /= RALD%NDGLSUR+R%NNOEXTZG) & +& .OR. (UBOUND (ZFFT_PERM, 2) /= D%NUMP) & +& .OR. (UBOUND (ZFFT_PERM, 3) < KLED2)) THEN + !$acc exit data delete (ZFFT_PERM) + DEALLOCATE (ZFFT_PERM) + ENDIF +ENDIF + +IF (.NOT. ALLOCATED (ZFFT_PERM)) THEN + ALLOCATE (ZFFT_PERM (RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,KLED2)) + !$acc enter data create (ZFFT_PERM) +ENDIF + + +IF (ALLOCATED (ZVODI_PERM)) THEN + IF ((UBOUND (ZVODI_PERM, 1) /= RALD%NDGLSUR+R%NNOEXTZG) & +& .OR. (UBOUND (ZVODI_PERM, 2) /= D%NUMP) & +& .OR. (UBOUND (ZVODI_PERM, 3) < MAX(4*KF_UV,1))) THEN + !$acc exit data delete (ZVODI_PERM) + DEALLOCATE (ZVODI_PERM) + ENDIF +ENDIF + +IF (.NOT. ALLOCATED (ZVODI_PERM)) THEN + ALLOCATE (ZVODI_PERM (RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,MAX(4*KF_UV,1))) + !$acc enter data create (ZVODI_PERM) +ENDIF + +ZFFT => ZFFT_PERM (:,:,1:KLED2) +ZVODI => ZVODI_PERM (:,:,1:MAX(4*KF_UV,1)) + +!$acc kernels present (ZVODI, ZFFT) +ZVODI = 0._JPRB +ZFFT = 0._JPRB +!$acc end kernels + + +IFC = 2 * KF_FS + +CALL EPRFI2(KF_FS,ZFFT) + +!* 2. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + +IF(R%NNOEXTZG>0) THEN + CALL ABOR1 ('ELTDIR: BIPERIODICIZATION NOT SUPPORTED') +ENDIF + +!* 3. DIRECT LEGENDRE TRANSFORM. +! -------------------------- + +CALL ELEDIR(IFC,KLED2,ZFFT) + + +!* 4. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- +IF( KF_UV > 0 ) THEN + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + + CALL EUVTVD(KF_UV,ZFFT(:,:,IUS:IUE),ZFFT(:,:,IVS:IVE),& + & ZVODI(:,:,IVORS:IVORE),ZVODI(:,:,IDIVS:IDIVE)) + +!* 5. COMMUNICATION OF MEAN WIND +! -------------------------- + + + DO JM=1,D%NUMP + IM = D%MYMS(JM) + + CALL EUVTVD_COMM(IM,JM,KF_UV,KFLDPTRUV,ZFFT(:,:,IUS:IUE), & + & ZFFT(:,:,IVS:IVE), & + & PSPMEANU,PSPMEANV) + + ENDDO + +ENDIF + + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSP(KF_UV,KF_SCALARS,ZFFT,ZVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,KFLDPTRUV,KFLDPTRSC) + +!IF (.NOT. LALLOPERM2) THEN + !$acc exit data delete (ZFFT_PERM, ZVODI_PERM) + DEALLOCATE (ZFFT_PERM) + DEALLOCATE (ZVODI_PERM) +!ENDIF + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR +END MODULE ELTDIR_MOD diff --git a/src/etrans/gpu/internal/eltdirad_mod.F90 b/src/etrans/gpu/internal/eltdirad_mod.F90 new file mode 100644 index 0000000..fd11df0 --- /dev/null +++ b/src/etrans/gpu/internal/eltdirad_mod.F90 @@ -0,0 +1,166 @@ +MODULE ELTDIRAD_MOD +CONTAINS +SUBROUTINE ELTDIRAD(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD + +USE EPRFI2AD_MOD ,ONLY : EPRFI2AD +USE ELEDIRAD_MOD ,ONLY : ELEDIRAD +USE EUVTVDAD_MOD +USE EUPDSPAD_MOD ,ONLY : EUPDSPAD + + +!**** *ELTDIRAD* - Control of Direct Legendre transform step - adjoint + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIRAD(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2AD - prepares the Fourier work arrays for model variables. +! ELEDIRAD - direct Legendre transform +! EUVTVDAD - +! EUPDSPAD - updating of spectral arrays (fields) + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ +! +IMPLICIT NONE +! +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IFC +INTEGER(KIND=JPIM) :: IUS,IUE,IVS,IVE,IVORS,IVORE,IDIVS,IDIVE + +REAL(KIND=JPRB) :: ZFFT(RALD%NDGLSUR+R%NNOEXTZG,KLED2) +REAL(KIND=JPRB) :: ZVODI(RALD%NDGLSUR+R%NNOEXTZG,MAX(4*KF_UV,1)) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM +! -------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTDIRAD_MOD:ELTDIRAD',0,ZHOOK_HANDLE) +ZFFT=0.0_JPRB +ZVODI=0.0_JPRB + +! ------------------------------------------------------------------ + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSPAD(KM,KF_UV,KF_SCALARS,ZFFT,ZVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +! ------------------------------------------------------------------ + +!* 5. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- +IF( KF_UV > 0 ) THEN + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV +! SET PART OF ZFFT CONTAINING U AND V TO 0. + ZFFT(:,IUS:IVE) = 0.0_JPRB + CALL EUVTVDAD(KM,KMLOC,KF_UV,KFLDPTRUV,ZFFT(:,IUS:IUE),ZFFT(:,IVS:IVE),& + & ZVODI(:,IVORS:IVORE),ZVODI(:,IDIVS:IDIVE),PSPMEANU,PSPMEANV) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. DIRECT LEGENDRE TRANSFORM. +! -------------------------- +IFC = 2*KF_FS +CALL ELEDIRAD(KM,IFC,KLED2,ZFFT) + +! ------------------------------------------------------------------ + +!* 3. FOURIER SPACE COMPUTATIONS. +! --------------------------- + +! ------------------------------------------------------------------ + +!* 2. PREPARE WORK ARRAYS. +! -------------------- + +CALL EPRFI2AD(KM,KMLOC,KF_FS,ZFFT) +IF (LHOOK) CALL DR_HOOK('ELTDIRAD_MOD:ELTDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIRAD +END MODULE ELTDIRAD_MOD + diff --git a/src/etrans/gpu/internal/eltinv_ctl_mod.F90 b/src/etrans/gpu/internal/eltinv_ctl_mod.F90 new file mode 100644 index 0000000..74999f8 --- /dev/null +++ b/src/etrans/gpu/internal/eltinv_ctl_mod.F90 @@ -0,0 +1,147 @@ +MODULE ELTINV_CTL_MOD +CONTAINS +SUBROUTINE ELTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV,FSPGL_PROC) + +!**** *ELTINV_CTL* - Control routine for inverse Legandre transform. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTINV_MOD ,ONLY : ELTINV +USE TRMTOL_MOD ,ONLY : TRMTOL, TRMTOL_CUDAAWARE +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTL_MOD:ELTINV_CTL',0,ZHOOK_HANDLE) +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT +IBLEN = D%NLENGT0B*2*KF_OUT_LT + +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN +!$acc exit data delete (FOUBUF_IN) + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +!$acc enter data create (FOUBUF_IN) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +!$acc enter data create (FOUBUF_IN) +ENDIF + +#ifndef gnarls +! daand: initialize for debugging +FOUBUF_IN(:)=-1. +!$acc update device(FOUBUF_IN) +#endif + +IF(KF_OUT_LT > 0) THEN +CALL GSTATS(1647,0) +CALL ELTINV(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) +CALL GSTATS(1647,1) +ENDIF + +IF (ALLOCATED (FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN +!$acc exit data delete (FOUBUF) + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +!$acc enter data create (FOUBUF) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +!$acc enter data create (FOUBUF) +ENDIF + +CALL GSTATS(152,0) + +! daand: GPU-aware MPI doesn't work here on Lumi +#ifdef USE_CUDA_AWARE_MPI_FT +CALL TRMTOL_CUDAAWARE(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +#else +!$acc update host(FOUBUF_IN) +CALL TRMTOL(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +!$acc update device(FOUBUF) +#endif + +CALL GSTATS(152,1) + +IF (.NOT.LALLOPERM) THEN +!$acc exit data delete (FOUBUF_IN) + DEALLOCATE (FOUBUF_IN) +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTL_MOD:ELTINV_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV_CTL +END MODULE ELTINV_CTL_MOD diff --git a/src/etrans/gpu/internal/eltinv_ctlad_mod.F90 b/src/etrans/gpu/internal/eltinv_ctlad_mod.F90 new file mode 100644 index 0000000..f42c1e1 --- /dev/null +++ b/src/etrans/gpu/internal/eltinv_ctlad_mod.F90 @@ -0,0 +1,113 @@ +MODULE ELTINV_CTLAD_MOD +CONTAINS +SUBROUTINE ELTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +!**** *ELTINV_CTLAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D +USE ELTINVAD_MOD ,ONLY : ELTINVAD +USE TRLTOM_MOD ,ONLY : TRLTOM +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: IBLEN, ILEI2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTLAD_MOD:ELTINV_CTLAD',0,ZHOOK_HANDLE) + +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF +CALL GSTATS(180,0) +CALL TRLTOM(FOUBUF,FOUBUF_IN,2*KF_OUT_LT) +CALL GSTATS(180,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) + +CALL GSTATS(1648,0) +IF(KF_OUT_LT > 0) THEN + CALL ELTINVAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) +ENDIF +CALL GSTATS(1648,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +IF (LHOOK) CALL DR_HOOK('ELTINV_CTLAD_MOD:ELTINV_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV_CTLAD +END MODULE ELTINV_CTLAD_MOD diff --git a/src/etrans/gpu/internal/eltinv_mod.F90 b/src/etrans/gpu/internal/eltinv_mod.F90 new file mode 100644 index 0000000..b403549 --- /dev/null +++ b/src/etrans/gpu/internal/eltinv_mod.F90 @@ -0,0 +1,245 @@ +MODULE ELTINV_MOD +CONTAINS +SUBROUTINE ELTINV(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPMALD_DIM ,ONLY : RALD +USE EPRFI1B_MOD ,ONLY : EPRFI1B +USE EVDTUV_MOD ,ONLY : EVDTUV +USE ESPNSDE_MOD ,ONLY : ESPNSDE +USE ELEINV_MOD ,ONLY : ELEINV +USE EASRE1B_MOD ,ONLY : EASRE1B +USE FSPGL_INT_MOD ,ONLY : FSPGL_INT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +!USE TPM_GEN ,ONLY : LALLOPERM2 +USE ELTDATA_MOD ,ONLY : ZFFT_PERM + +!**** *LTINV* - Inverse Legendre transform + +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence +! SPNSDE - compute north-south derivatives +! LEINV - Inverse Legendre transform +! ASRE1 - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +!REAL(KIND=JPRB) :: ZFFT(RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,KLEI2) +REAL(KIND=JPRB), POINTER :: ZFFT(:,:,:) + + + +INTEGER(KIND=JPIM) :: IFC, ISTA +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: IFIRST, ILAST,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + + +! ------------------------------------------------------------------ + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV + +IF (ALLOCATED (ZFFT_PERM)) THEN + IF ((UBOUND (ZFFT_PERM, 1) /= RALD%NDGLSUR+R%NNOEXTZG) & +& .OR. (UBOUND (ZFFT_PERM, 2) /= D%NUMP) & +& .OR. (UBOUND (ZFFT_PERM, 3) < KLEI2)) THEN + !$acc exit data delete (ZFFT_PERM) + DEALLOCATE (ZFFT_PERM) + ENDIF +ENDIF + +IF (.NOT. ALLOCATED (ZFFT_PERM)) THEN + ALLOCATE (ZFFT_PERM (RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,KLEI2)) + !$acc enter data create (ZFFT_PERM) +ENDIF + +ZFFT => ZFFT_PERM (:,:,1:KLEI2) + +!$acc kernels present (ZFFT) +ZFFT = 0.0_JPRB +!$acc end kernels + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL EPRFI1B(ZFFT(:,:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1B(ZFFT(:,:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + CALL EVDTUV(KF_UV,KFLDPTRUV,ZFFT(:,:,IVORL:IVORU),ZFFT(:,:,IDIVL:IDIVU),& + & ZFFT(:,:,IUL:IUU),ZFFT(:,:,IVL:IVU),PSPMEANU,PSPMEANV) +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL ESPNSDE(KF_SCALARS,ZFFT(:,:,ISL:ISU),ZFFT(:,:,IDL:IDU)) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +CALL ELEINV(IFC,KF_OUT_LT,ZFFT) + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + +CALL EASRE1B(KF_OUT_LT,ZFFT(:,:,ISTA:ISTA+IFC-1)) + +! ------------------------------------------------------------------ + +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +IF(PRESENT(FSPGL_PROC)) THEN + CALL FSPGL_INT(KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& + & KFLDPTRUV,KFLDPTRSC) +ENDIF + + +!IF (.NOT. LALLOPERM2) THEN + !$acc exit data delete (ZFFT_PERM) + DEALLOCATE (ZFFT_PERM) +!ENDIF + +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + + + +END SUBROUTINE ELTINV +END MODULE ELTINV_MOD + diff --git a/src/etrans/gpu/internal/eltinvad_mod.F90 b/src/etrans/gpu/internal/eltinvad_mod.F90 new file mode 100644 index 0000000..843db44 --- /dev/null +++ b/src/etrans/gpu/internal/eltinvad_mod.F90 @@ -0,0 +1,252 @@ +MODULE ELTINVAD_MOD +CONTAINS +SUBROUTINE ELTINVAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +!**** *ELTINVAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL ELTINVAD(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINVAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn add KMLOC to EVDTUVAD call +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR + +USE EASRE1BAD_MOD ,ONLY : EASRE1BAD +USE ELEINVAD_MOD ,ONLY : ELEINVAD +USE EPRFI1BAD_MOD ,ONLY : EPRFI1BAD +USE ESPNSDEAD_MOD ,ONLY : ESPNSDEAD +USE EVDTUVAD_MOD ,ONLY : EVDTUVAD +USE EVDTUVAD_COMM_MOD +USE EXTPER_MOD ,ONLY : EXTPER + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(RALD%NDGLSUR+R%NNOEXTZG,KLEI2,D%NUMP) +REAL(KIND=JPRB) :: ZIA2(KLEI2,RALD%NDGLSUR+R%NNOEXTZG) + +INTEGER(KIND=JPIM) :: IFC, ISTA, IINDEX(2*KF_OUT_LT), JF, JDIM, IM, JM +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: ILAST,IFIRST,IDIM1,IDIM3,J3 + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINVAD_MOD:ELTINVAD',0,ZHOOK_HANDLE) + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV +ENDIF +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 +ENDIF + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM,JF,JDIM,IINDEX,ZIA2) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + +! 7. OPTIONAL COMPUTATIONS IN FOURIER SPACE +! -------------------------------------- + +!commented IF(PRESENT(FSPGL_PROC)) THEN +!commented CALL FSPGL_INT(IM,JM,FSPGL_PROC) +!commented ENDIF + + +!* 6. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + + ZIA(:,:,JM)=0.0_JPRB + CALL EASRE1BAD(KF_OUT_LT,IM,JM,ZIA(:,ISTA:ISTA+IFC-1,JM)) + + +!* 5. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + + IF(R%NNOEXTZG>0) THEN + DO JF = 1,IFC + DO JDIM = 1,R%NDGL + ZIA2(JF,JDIM)=ZIA(JDIM,JF,JM) + ENDDO + ENDDO + IINDEX(1)=0 + CALL EXTPER(ZIA2(:,:),R%NDGL+R%NNOEXTZG,1,R%NDGL,IFC,1,IINDEX,0) + DO JF = 1,IFC + DO JDIM = 1,R%NDGL+R%NNOEXTZG + ZIA(JDIM,JF,JM) = ZIA2(JF,JDIM) + ENDDO + ENDDO + ENDIF + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + + CALL ELEINVAD(IM,IFC,KF_OUT_LT,ZIA(:,ISTA:ISTA+IFC-1,JM)) + + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + + ZIA(:,1:ISTA-1,JM) = 0.0_JPRB + + IF (KF_UV > 0) THEN + CALL EVDTUVAD(IM,JM,KF_UV,KFLDPTRUV,ZIA(:,IVORL:IVORU,JM),ZIA(:,IDIVL:IDIVU,JM),& + & ZIA(:,IUL:IUU,JM),ZIA(:,IVL:IVU,JM),PSPMEANU,PSPMEANV) + ENDIF + + +ENDDO +!$OMP END PARALLEL DO + +!* 2. COMMUNICATION OF MEAN WIND +! -------------------------- + +IF (KF_UV > 0) THEN + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL EVDTUVAD_COMM(IM,JM,KF_UV,KFLDPTRUV,PSPMEANU,PSPMEANV) + ENDDO +ENDIF + +!* 2. PREPARE SPECTRAL FIELDS +! ----------------------- + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM,IFIRST,ILAST,IDIM1,IDIM3) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + + IFIRST = 1 + ILAST = 4*KF_UV + IF (KF_UV > 0) THEN + CALL EPRFI1BAD(IM,ZIA(:,IVORL:IVORU,JM),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1BAD(IM,ZIA(:,IDIVL:IDIVU,JM),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + ENDIF + + IF (KF_SCDERS > 0) THEN + CALL ESPNSDEAD(IM,KF_SCALARS,ZIA(:,ISL:ISU,JM),ZIA(:,IDL:IDU,JM)) + ENDIF + + IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + ENDIF + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ELTINVAD_MOD:ELTINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINVAD +END MODULE ELTINVAD_MOD diff --git a/src/etrans/gpu/internal/eprfi1_mod.F90 b/src/etrans/gpu/internal/eprfi1_mod.F90 new file mode 100644 index 0000000..167f086 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi1_mod.F90 @@ -0,0 +1,3 @@ +MODULE EPRFI1_MOD +END MODULE EPRFI1_MOD + diff --git a/src/etrans/gpu/internal/eprfi1ad_mod.F90 b/src/etrans/gpu/internal/eprfi1ad_mod.F90 new file mode 100644 index 0000000..ad7cd17 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi1ad_mod.F90 @@ -0,0 +1,103 @@ +MODULE EPRFI1AD_MOD +CONTAINS +SUBROUTINE EPRFI1AD(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DISTR +!USE TPM_TRANS + +USE EPRFI1BAD_MOD ,ONLY : EPRFI1BAD + +!**** *EPRFI1AD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1AD(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1AD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ +! +IMPLICIT NONE +! +! +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI1AD_MOD:EPRFI1AD',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL EPRFI1BAD(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1BAD(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1BAD(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1AD_MOD:EPRFI1AD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1AD +END MODULE EPRFI1AD_MOD diff --git a/src/etrans/gpu/internal/eprfi1b_mod.F90 b/src/etrans/gpu/internal/eprfi1b_mod.F90 new file mode 100644 index 0000000..f319184 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi1b_mod.F90 @@ -0,0 +1,131 @@ +MODULE EPRFI1B_MOD +CONTAINS +SUBROUTINE EPRFI1B(PFFT,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +USE TPM_DISTR +USE TPMALD_DISTR ,ONLY : DALD, DALD_NESM0, DALD_NCPL2M +! +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1B(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PFFT(:,:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF,IFLD +INTEGER(KIND=JPIM) :: IM, JM, MAX_NCPL2M +INTEGER(KIND=JPIM) :: JFLDPTR(KFIELDS) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',0,ZHOOK_HANDLE) + +IF (PRESENT(KFLDPTR)) THEN + JFLDPTR=KFLDPTR +ELSE + DO JFLD=1,KFIELDS + JFLDPTR(JFLD)=JFLD + ENDDO +ENDIF + +!$acc data present (PFFT, PSPEC) + +!$acc kernels default(none) +PFFT = 0._JPRB +!$acc end kernels + +MAX_NCPL2M = MAXVAL (DALD_NCPL2M) + +!$ACC parallel loop collapse(3) & +!$ACC& present(D_MYMS,DALD_NCPL2M,DALD_NESM0,D_NUMP) & +!$ACC& present(PFFT,PSPEC) & +!$ACC& copyin(KFIELDS,MAX_NCPL2M,JFLDPTR) & +!$ACC& private(IR,II,IM,ILCM,IOFF,INM,JFLD) default(none) +DO JM = 1, D_NUMP + DO JFLD=1,KFIELDS + DO J=1,MAX_NCPL2M,2 + IR = 2*(JFLD-1)+1 + II = IR+1 + IM = D_MYMS(JM) + ILCM = DALD_NCPL2M(IM) + if (J .LE. ILCM) then + IOFF = DALD_NESM0(IM) + INM = IOFF+(J-1)*2 + PFFT(J ,JM,IR) = PSPEC(JFLDPTR(JFLD),INM ) + PFFT(J+1,JM,IR) = PSPEC(JFLDPTR(JFLD),INM+1) + PFFT(J ,JM,II) = PSPEC(JFLDPTR(JFLD),INM+2) + PFFT(J+1,JM,II) = PSPEC(JFLDPTR(JFLD),INM+3) + endif + ENDDO + ENDDO +ENDDO + + + +#ifdef gnarls +!$acc update host(PFFT) +write (20,*) __FILE__,__LINE__ +write (20,*) 'PFFT = ' +write (20,'(6E18.8)') PFFT +call flush(20) +#endif + +!$acc end data + +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1B +END MODULE EPRFI1B_MOD diff --git a/src/etrans/gpu/internal/eprfi1bad_mod.F90 b/src/etrans/gpu/internal/eprfi1bad_mod.F90 new file mode 100644 index 0000000..81a31ea --- /dev/null +++ b/src/etrans/gpu/internal/eprfi1bad_mod.F90 @@ -0,0 +1,110 @@ +MODULE EPRFI1BAD_MOD +CONTAINS +SUBROUTINE EPRFI1BAD(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPMALD_DISTR ,ONLY : DALD + +!**** *EPRFI1BAD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1BAD(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1BAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF, IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1BAD_MOD:EPRFI1BAD',0,ZHOOK_HANDLE) +ILCM=DALD%NCPL2M(KM) + +IOFF = DALD%NESM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + + PSPEC(IFLD,INM ) = PSPEC(IFLD,INM ) + PIA(J ,IR) + PSPEC(IFLD,INM+1) = PSPEC(IFLD,INM+1) + PIA(J+1,IR) + PSPEC(IFLD,INM+2) = PSPEC(IFLD,INM+2) + PIA(J ,II) + PSPEC(IFLD,INM+3) = PSPEC(IFLD,INM+3) + PIA(J+1,II) + + ENDDO + ENDDO +ELSE + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + + PSPEC(JFLD,INM ) = PSPEC(JFLD,INM ) + PIA(J ,IR) + PSPEC(JFLD,INM+1) = PSPEC(JFLD,INM+1) + PIA(J+1,IR) + PSPEC(JFLD,INM+2) = PSPEC(JFLD,INM+2) + PIA(J ,II) + PSPEC(JFLD,INM+3) = PSPEC(JFLD,INM+3) + PIA(J+1,II) + + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1BAD_MOD:EPRFI1BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1BAD +END MODULE EPRFI1BAD_MOD diff --git a/src/etrans/gpu/internal/eprfi2_mod.F90 b/src/etrans/gpu/internal/eprfi2_mod.F90 new file mode 100644 index 0000000..dc0da35 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi2_mod.F90 @@ -0,0 +1,83 @@ +MODULE EPRFI2_MOD +CONTAINS +SUBROUTINE EPRFI2(KF_FS,PFFT) + +!**** *EPRFI2* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. PRFI2B - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM +USE PARKIND_ECTRANS, ONLY : JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_TRANS + +USE EPRFI2B_MOD ,ONLY : EPRFI2B +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS +REAL(KIND=JPRBT) , INTENT(OUT) :: PFFT(:,:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL EPRFI2B(KF_FS,PFFT) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2 +END MODULE EPRFI2_MOD diff --git a/src/etrans/gpu/internal/eprfi2ad_mod.F90 b/src/etrans/gpu/internal/eprfi2ad_mod.F90 new file mode 100644 index 0000000..186dc29 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi2ad_mod.F90 @@ -0,0 +1,82 @@ +MODULE EPRFI2AD_MOD +CONTAINS +SUBROUTINE EPRFI2AD(KM,KMLOC,KF_FS,PFFT) + +!**** *EPRFI2AD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2AD(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. EPRFI2BAD - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE EPRFI2BAD_MOD ,ONLY : EPRFI2BAD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL EPRFI2BAD(KF_FS,KM,KMLOC,PFFT) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2AD +END MODULE EPRFI2AD_MOD diff --git a/src/etrans/gpu/internal/eprfi2b_mod.F90 b/src/etrans/gpu/internal/eprfi2b_mod.F90 new file mode 100644 index 0000000..902de02 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi2b_mod.F90 @@ -0,0 +1,105 @@ +MODULE EPRFI2B_MOD +CONTAINS +SUBROUTINE EPRFI2B(KFIELD,PFFT) + +!**** *EPRFI2B* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND_ECTRANS, ONLY : JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R, R_NDGNH, R_NDGL +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G, G_NDGLU +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1,MYPROC +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRBT) , INTENT(OUT) :: PFFT(:,:,:) + +INTEGER(KIND=JPIM) :: IM, JM +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL +INTEGER(KIND=JPIM) :: IJR, IJI +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',0,ZHOOK_HANDLE) + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +!$acc data & +!$acc& present(PFFT) & +!$acc& present(FOUBUF) & +!$acc& copyin(R_NDGL,D_NSTAGT1B,D_NPNTGTB1,D_NPROCL,D_NUMP,D_MYMS,G_NDGLU) + +!loop over wavenumber +!$acc parallel loop collapse(3) private(ISTAN,IM,IJR,IJI,JM) +DO JM = 1, D_NUMP + DO JF =1,KFIELD + DO JGL=1,R_NDGL + IM = D_MYMS(JM) + IJR = 2*(JF-1)+1 + IJI = IJR+1 + ISTAN = (D_NSTAGT1B(D_NPROCL(JGL) )+D_NPNTGTB1(JM,JGL))*2*KFIELD + PFFT(JGL,JM,IJR) = FOUBUF(ISTAN+IJR) + PFFT(JGL,JM,IJI) = FOUBUF(ISTAN+IJI) + ENDDO + ENDDO +ENDDO + +!$acc end data + +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2B +END MODULE EPRFI2B_MOD diff --git a/src/etrans/gpu/internal/eprfi2bad_mod.F90 b/src/etrans/gpu/internal/eprfi2bad_mod.F90 new file mode 100644 index 0000000..4086566 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi2bad_mod.F90 @@ -0,0 +1,90 @@ +MODULE EPRFI2BAD_MOD +CONTAINS +SUBROUTINE EPRFI2BAD(KFIELD,KM,KMLOC,PFFT) + +!**** *EPRFI2BAD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL + +INTEGER(KIND=JPIM) :: IJR,IJI +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('EPRFI2BAD_MOD:EPRFI2BAD',0,ZHOOK_HANDLE) +DO JGL=1,R%NDGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + DO JF =1,KFIELD + IJR = 2*(JF-1)+1 + IJI = IJR+1 + FOUBUF(ISTAN+IJR) = PFFT(JGL,IJR) + FOUBUF(ISTAN+IJI) = PFFT(JGL,IJI) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EPRFI2BAD_MOD:EPRFI2BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2BAD +END MODULE EPRFI2BAD_MOD diff --git a/src/etrans/gpu/internal/eset_resol_mod.F90 b/src/etrans/gpu/internal/eset_resol_mod.F90 new file mode 100644 index 0000000..77c5a08 --- /dev/null +++ b/src/etrans/gpu/internal/eset_resol_mod.F90 @@ -0,0 +1,78 @@ +MODULE ESET_RESOL_MOD +CONTAINS +SUBROUTINE ESET_RESOL(KRESOL) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL +USE TPM_DIM ,ONLY : R, DIM_RESOL +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL !, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +#ifdef HAVE_CUFFT +USE TPM_FFTC ,ONLY : TC, FFTC_RESOL +#endif +USE TPM_FFTH ,ONLY : TC, FFTH_RESOL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! Local varaibles +INTEGER(KIND=JPIM) :: IRESOL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',0,ZHOOK_HANDLE) +IF(MSETUP0 == 0) CALL ABORT_TRANS('ESET_RESOL:TRANS NOT SETUP') +IRESOL = 1 +IF(PRESENT(KRESOL)) THEN + IRESOL = KRESOL + IF(KRESOL < 1 .OR. KRESOL > NMAX_RESOL) THEN + WRITE(NOUT,*)'ESET_RESOL: UNKNOWN RESOLUTION ',KRESOL,NMAX_RESOL + CALL ABORT_TRANS('ESET_RESOL:KRESOL < 1 .OR. KRESOL > NMAX_RESOL') + ENDIF +ENDIF +IF(IRESOL /= NCUR_RESOL) THEN + NCUR_RESOL = IRESOL + R => DIM_RESOL(NCUR_RESOL) + F => FIELDS_RESOL(NCUR_RESOL) + G => GEOM_RESOL(NCUR_RESOL) + D => DISTR_RESOL(NCUR_RESOL) + T => FFT_RESOL(NCUR_RESOL) + !TB => FFTB_RESOL(NCUR_RESOL) +#ifdef WITH_FFTW + TW => FFTW_RESOL(NCUR_RESOL) +#endif +#ifdef HAVE_CUFFT + TC => FFTC_RESOL(NCUR_RESOL) +#endif + TC => FFTH_RESOL(NCUR_RESOL) + RALD => ALDDIM_RESOL(NCUR_RESOL) + DALD => ALDDISTR_RESOL(NCUR_RESOL) + TALD => ALDFFT_RESOL(NCUR_RESOL) + FALD => ALDFIELDS_RESOL(NCUR_RESOL) + GALD => ALDGEO_RESOL(NCUR_RESOL) + +ENDIF +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',1,ZHOOK_HANDLE) + +END SUBROUTINE ESET_RESOL +END MODULE ESET_RESOL_MOD diff --git a/src/etrans/gpu/internal/esetup_dims_mod.F90 b/src/etrans/gpu/internal/esetup_dims_mod.F90 new file mode 100644 index 0000000..077f274 --- /dev/null +++ b/src/etrans/gpu/internal/esetup_dims_mod.F90 @@ -0,0 +1,46 @@ +MODULE ESETUP_DIMS_MOD +CONTAINS +SUBROUTINE ESETUP_DIMS + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JN,ISPOLEG +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_DIMS_MOD:ESETUP_DIMS',0,ZHOOK_HANDLE) +ISPOLEG = 0 +DO JM=0,R%NSMAX + DO JN=JM,R%NTMAX+1 + ISPOLEG = ISPOLEG+1 + ENDDO +ENDDO +R%NSPOLEG = ISPOLEG +CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) +R%NSPEC_G=0 +DO JM=0,RALD%NMSMAX + R%NSPEC_G=R%NSPEC_G+2*(ISNAX(JM)+1) +ENDDO +R%NSPEC2_G = R%NSPEC_G*2 + +R%NDGNH = (R%NDGL+1)/2 + +R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) +R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) + +R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) +R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) +IF (LHOOK) CALL DR_HOOK('ESETUP_DIMS_MOD:ESETUP_DIMS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESETUP_DIMS +END MODULE ESETUP_DIMS_MOD diff --git a/src/etrans/gpu/internal/esetup_geom_mod.F90 b/src/etrans/gpu/internal/esetup_geom_mod.F90 new file mode 100644 index 0000000..a93c67d --- /dev/null +++ b/src/etrans/gpu/internal/esetup_geom_mod.F90 @@ -0,0 +1,66 @@ +MODULE ESETUP_GEOM_MOD +CONTAINS +SUBROUTINE ESETUP_GEOM + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: IDGLU(0:RALD%NMSMAX,R%NDGNH) +INTEGER(KIND=JPIM) :: JGL,JM + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_GEOM_MOD:ESETUP_GEOM',0,ZHOOK_HANDLE) +IF(.NOT.D%LGRIDONLY) THEN +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_GEOM ===' + +ALLOCATE (G%NMEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'G%NMEN ',SIZE(G%NMEN ),SHAPE(G%NMEN ) +G%NMEN(:)=RALD%NMSMAX +IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JGL,G%NLOEN,G%NMEN) '')') + WRITE(NOUT,FMT='(8(1X,''('',I4,I4,I4,'')''))')& + & (JGL,G%NLOEN(JGL),G%NMEN(JGL),JGL=1,R%NDGL) +ENDIF +ALLOCATE(G%NDGLU(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'G%NDGLU ',SIZE(G%NDGLU ),SHAPE(G%NDGLU ) +IDGLU(:,:) = 0 +G%NDGLU(:) = 0 +DO JGL=1,R%NDGNH + DO JM=0,G%NMEN(JGL) + IDGLU(JM,JGL) = 1 + ENDDO +ENDDO +DO JM=0,RALD%NMSMAX + DO JGL=1,R%NDGNH + G%NDGLU(JM) = G%NDGLU(JM)+IDGLU(JM,JGL) + ENDDO +ENDDO +IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JM,G%NDGLU) '')') + WRITE(NOUT,FMT='(10(1X,''('',I4,I4,'')''))')& + & (JM,G%NDGLU(JM),JM=0,RALD%NMSMAX) +ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('ESETUP_GEOM_MOD:ESETUP_GEOM',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE ESETUP_GEOM +END MODULE ESETUP_GEOM_MOD diff --git a/src/etrans/gpu/internal/espnorm_ctl_mod.F90 b/src/etrans/gpu/internal/espnorm_ctl_mod.F90 new file mode 100644 index 0000000..27732c4 --- /dev/null +++ b/src/etrans/gpu/internal/espnorm_ctl_mod.F90 @@ -0,0 +1,61 @@ +MODULE ESPNORM_CTL_MOD +CONTAINS +SUBROUTINE ESPNORM_CTL(PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET,PNORM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MYSETV, MYPROC + +USE ESPNORMD_MOD ,ONLY : ESPNORMD +USE ESPNORMC_MOD ,ONLY : ESPNORMC + +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFLD,KFLD_G +INTEGER(KIND=JPIM) :: IVSET(KFLD_G) + +REAL(KIND=JPRB) :: ZMET(0:R%NSPEC_G) + +REAL(KIND=JPRB) :: ZSM(KFLD,D%NUMP) + +REAL(KIND=JPRB) :: ZGM(KFLD_G,0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',0,ZHOOK_HANDLE) +IF(PRESENT(KVSET)) THEN + IVSET(:) = KVSET(:) +ELSE + IVSET(:) = MYSETV +ENDIF + +IF(PRESENT(PMET)) THEN + ZMET(:) = PMET(:) +ELSE + ZMET(:) = 1.0_JPRB +ENDIF + +CALL ESPNORMD(PSPEC,KFLD,ZMET,ZSM) + +CALL ESPNORMC(ZSM,KFLD_G,IVSET,KMASTER,ZGM) + +IF(MYPROC == KMASTER) THEN + PNORM(1:KFLD_G) = SUM(ZGM,DIM=2) + PNORM(1:KFLD_G) = SQRT(PNORM(1:KFLD_G)) +ENDIF +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORM_CTL +END MODULE ESPNORM_CTL_MOD diff --git a/src/etrans/gpu/internal/espnormc_mod.F90 b/src/etrans/gpu/internal/espnormc_mod.F90 new file mode 100644 index 0000000..1555155 --- /dev/null +++ b/src/etrans/gpu/internal/espnormc_mod.F90 @@ -0,0 +1,85 @@ +MODULE ESPNORMC_MOD +CONTAINS +SUBROUTINE ESPNORMC(PSM,KFLD_G,KVSET,KMASTER,PGM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, MYPROC, NPROC + +USE PE2SET_MOD ,ONLY : PE2SET + +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSM(:,:) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,INTENT(OUT) :: PGM(:,0:) + +REAL(KIND=JPRB) :: ZRECVBUF(KFLD_G*(RALD%NMSMAX+1)) + +INTEGER(KIND=JPIM) :: IFLDR(NPRTRV) + +INTEGER(KIND=JPIM) :: ISTOTAL,JFLD,ITAG,JROC,IMSGLEN,IRECVID +INTEGER(KIND=JPIM) :: IRECVNUMP,IRECVFLD,IFLD,JMLOC,IM,IBUFLENR,IA,IB +INTEGER(KIND=JPIM) :: IRECVSETA,IRECVSETB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORMC_MOD:ESPNORMC',0,ZHOOK_HANDLE) +ISTOTAL = SIZE(PSM) +IBUFLENR =SIZE(ZRECVBUF) + +IFLDR(:) = 0 +DO JFLD=1,KFLD_G + IFLDR(KVSET(JFLD)) = IFLDR(KVSET(JFLD))+1 +ENDDO +ITAG = 100 + +IF (NPROC > 1.AND.MYPROC /= KMASTER) THEN + CALL MPL_SEND(PSM(:,:),KDEST=NPRCIDS(KMASTER),KTAG=ITAG,& + & CDSTRING='ESPNORMC:') +ENDIF + +IF (MYPROC == KMASTER) THEN + DO JROC=1,NPROC + IF (JROC == KMASTER) THEN + ZRECVBUF(1:ISTOTAL) = RESHAPE(PSM,SHAPE(ZRECVBUF(1:ISTOTAL))) + IRECVID = MYPROC + IMSGLEN = ISTOTAL + ELSE + CALL MPL_RECV(ZRECVBUF(1:IBUFLENR),KTAG=ITAG,& + & KFROM=IRECVID,CDSTRING='ESPNORMC :') + ENDIF + CALL PE2SET(IRECVID,IA,IB,IRECVSETA,IRECVSETB) + IRECVNUMP = D%NUMPP(IRECVSETA) + IRECVFLD = IFLDR(IRECVSETB) + IFLD = 0 + DO JFLD=1,KFLD_G + IF(KVSET(JFLD) == IRECVSETB) THEN + IFLD=IFLD+1 + DO JMLOC=1,IRECVNUMP + IM = D%NALLMS(D%NPTRMS(IRECVSETA)-1+JMLOC) + PGM(JFLD,IM) = ZRECVBUF((JMLOC-1)*IRECVFLD+IFLD) + ENDDO + ENDIF + ENDDO + ENDDO +ENDIF + +! Perform barrier synchronisation to guarantee all processors have +! completed communication + +IF( NPROC > 1 )THEN + CALL MPL_BARRIER(CDSTRING='ESPNORMC') +ENDIF +IF (LHOOK) CALL DR_HOOK('ESPNORMC_MOD:ESPNORMC',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORMC +END MODULE ESPNORMC_MOD diff --git a/src/etrans/gpu/internal/espnormd_mod.F90 b/src/etrans/gpu/internal/espnormd_mod.F90 new file mode 100644 index 0000000..75e245a --- /dev/null +++ b/src/etrans/gpu/internal/espnormd_mod.F90 @@ -0,0 +1,55 @@ +MODULE ESPNORMD_MOD +CONTAINS +SUBROUTINE ESPNORMD(PSPEC,KFLD,PMET,PSM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PMET(0:R%NSPEC_G) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(OUT) :: PSM(:,:) +INTEGER(KIND=JPIM) :: JM ,JFLD ,JN ,IM ,ISP +INTEGER(KIND=JPIM) :: IN,ISPE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',0,ZHOOK_HANDLE) + +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JM,IM,JN,ISP,JFLD,IN,ISPE) +DO JM=1,D%NUMP + PSM(:,JM) = 0.0_JPRB + IM = D%MYMS(JM) + + IN=DALD%NCPL2M(IM)/2 - 1 + DO JN=0,IN + ISP=DALD%NESM0(IM) + (JN)*4 + ISPE=DALD%NPME (IM) + JN + DO JFLD=1,KFLD + PSM(JFLD,JM) =PSM(JFLD,JM)& + & + PMET(ISPE) *& + & ( PSPEC(JFLD,ISP )**2 + PSPEC(JFLD,ISP+1)**2 +& + & PSPEC(JFLD,ISP+2)**2 + PSPEC(JFLD,ISP+3)**2 ) + + ENDDO + ENDDO + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORMD +END MODULE ESPNORMD_MOD + diff --git a/src/etrans/gpu/internal/espnsde_mod.F90 b/src/etrans/gpu/internal/espnsde_mod.F90 new file mode 100644 index 0000000..5138539 --- /dev/null +++ b/src/etrans/gpu/internal/espnsde_mod.F90 @@ -0,0 +1,109 @@ +MODULE ESPNSDE_MOD +CONTAINS +SUBROUTINE ESPNSDE(KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, D_NUMP, D_MYMS +USE TPMALD_DISTR ,ONLY : DALD, DALD_NCPL2M +USE TPMALD_GEO ,ONLY : GALD + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: PF(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:,:) + +INTEGER(KIND=JPIM) :: J, JN,IN, JM, IM, JNMAX +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',0,ZHOOK_HANDLE) + +JNMAX = MAXVAL (DALD%NCPL2M) + +!$acc parallel loop collapse (3) private (JM, J, JN, IM, IN, ZIN) & +!$acc & present (D_NUMP, D_MYMS, DALD_NCPL2M, PNSD, PF) +DO J=1,2*KF_SCALARS + DO JM = 1, D_NUMP + DO JN=1,JNMAX,2 + IM = D_MYMS(JM) + IF (JN <= DALD_NCPL2M(IM)) THEN + IN =(JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PNSD(JN ,JM,J) = -ZIN*PF(JN+1,JM,J) + PNSD(JN+1,JM,J) = ZIN*PF(JN ,JM,J) + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDE +END MODULE ESPNSDE_MOD diff --git a/src/etrans/gpu/internal/espnsdead_mod.F90 b/src/etrans/gpu/internal/espnsdead_mod.F90 new file mode 100644 index 0000000..3ca9ded --- /dev/null +++ b/src/etrans/gpu/internal/espnsdead_mod.F90 @@ -0,0 +1,112 @@ +MODULE ESPNSDEAD_MOD +CONTAINS +SUBROUTINE ESPNSDEAD(KM,KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_TRANS + +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_GEO ,ONLY : GALD + + +!**** *ESPNSDEAD* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL ESPNSDEAD(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDEAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(INOUT) :: PF(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PNSD(:,:) +INTEGER(KIND=JPIM) :: ISKIP, J, JN +INTEGER(KIND=JPIM) :: IN +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDEAD_MOD:ESPNSDEAD',0,ZHOOK_HANDLE) +IF(KM == 0) THEN + ISKIP = 1 +ELSE + ISKIP = 1 +ENDIF + +DO JN=1,DALD%NCPL2M(KM),2 + + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + + DO J=1,2*KF_SCALARS,ISKIP + + PF(JN+1,J) = PF(JN+1,J)-ZIN*PNSD(JN ,J) + PF(JN ,J) = PF(JN ,J)+ZIN*PNSD(JN+1,J) + + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ESPNSDEAD_MOD:ESPNSDEAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDEAD +END MODULE ESPNSDEAD_MOD diff --git a/src/etrans/gpu/internal/eupdsp_mod.F90 b/src/etrans/gpu/internal/eupdsp_mod.F90 new file mode 100644 index 0000000..a98918c --- /dev/null +++ b/src/etrans/gpu/internal/eupdsp_mod.F90 @@ -0,0 +1,142 @@ +MODULE EUPDSP_MOD +CONTAINS +SUBROUTINE EUPDSP(KF_UV,KF_SCALARS,PFFA,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSP(...) + +! Explicit arguments : +! -------------------- +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPB_MOD ,ONLY : EUPDSPB +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV,KF_SCALARS +REAL(KIND=JPRB) , INTENT(IN) :: PFFA(:,:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PVODI(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',0,ZHOOK_HANDLE) + +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPB(KF_UV,PVODI(:,:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPB(KF_UV,PVODI(:,:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPB(KF_SCALARS,PFFA(:,:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(IDIM1,PFFA(:,:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(IDIM1,PFFA(:,:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(IDIM1,PFFA(:,:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSP +END MODULE EUPDSP_MOD diff --git a/src/etrans/gpu/internal/eupdspad_mod.F90 b/src/etrans/gpu/internal/eupdspad_mod.F90 new file mode 100644 index 0000000..8f1699a --- /dev/null +++ b/src/etrans/gpu/internal/eupdspad_mod.F90 @@ -0,0 +1,145 @@ +MODULE EUPDSPAD_MOD +CONTAINS +SUBROUTINE EUPDSPAD(KM,KF_UV,KF_SCALARS,PFFT,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSPAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSPAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPADB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPADB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPBAD_MOD ,ONLY : EUPDSPBAD +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS + +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PVODI(:,:) + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND +INTEGER(KIND=JPIM) :: IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSPAD_MOD:EUPDSPAD',0,ZHOOK_HANDLE) +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPBAD(KM,KF_UV,PVODI(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPBAD(KM,KF_UV,PVODI(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPBAD(KM,KF_SCALARS,PFFT(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPAD_MOD:EUPDSPAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSPAD +END MODULE EUPDSPAD_MOD diff --git a/src/etrans/gpu/internal/eupdspb_mod.F90 b/src/etrans/gpu/internal/eupdspb_mod.F90 new file mode 100644 index 0000000..dff4d46 --- /dev/null +++ b/src/etrans/gpu/internal/eupdspb_mod.F90 @@ -0,0 +1,113 @@ +MODULE EUPDSPB_MOD +CONTAINS +SUBROUTINE EUPDSPB(KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPB* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPB(....) + +! Explicit arguments : +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPMALD_DISTR ,ONLY : DALD, DALD_NESM0, DALD_NCPL2M +USE TPM_DISTR ,ONLY : D, D_MYMS, D_NUMP +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRB) ,INTENT(IN) :: POA(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD, JM, IM +INTEGER(KIND=JPIM) :: JFLDPTR(KFIELD) +INTEGER(KINd=JPIM) :: MAX_NCPL2M +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + +! ------------------------------------------------------------------ + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',0,ZHOOK_HANDLE) + +!$ACC data present (POA, PSPEC) + +IF(PRESENT(KFLDPTR)) THEN + JFLDPTR=KFLDPTR +ELSE + DO JFLD=1,KFIELD + JFLDPTR(JFLD)=JFLD + ENDDO +ENDIF + +MAX_NCPL2M = MAXVAL (DALD_NCPL2M) + +!$ACC parallel loop collapse(3) & +!$acc& copyin(MAX_NCPL2M,KFIELD,JFLDPTR) & +!$acc& present(D_NUMP,D_MYMS,DALD_NESM0,DALD_NCPL2M) & +!$acc& private(JM,JN,JFLD,IM,INM,IR,II ) +DO JN=1,MAX_NCPL2M,2 + DO JM = 1, D_NUMP + DO JFLD=1,KFIELD + IM = D_MYMS(JM) + INM=DALD_NESM0(IM)+(JN-1)*2 + if ( JN .LE. DALD_NCPL2M(IM) ) then + IR= 2*JFLD-1 + II=IR+1 + PSPEC(JFLDPTR(JFLD),INM) =POA(JN ,JM,IR) + PSPEC(JFLDPTR(JFLD),INM+1) =POA(JN+1,JM,IR) + PSPEC(JFLDPTR(JFLD),INM+2) =POA(JN ,JM,II) + PSPEC(JFLDPTR(JFLD),INM+3) =POA(JN+1,JM,II) + endif + ENDDO + ENDDO + + ENDDO + +!$ACC end data + +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',1,ZHOOK_HANDLE) + +END SUBROUTINE EUPDSPB +END MODULE EUPDSPB_MOD diff --git a/src/etrans/gpu/internal/eupdspbad_mod.F90 b/src/etrans/gpu/internal/eupdspbad_mod.F90 new file mode 100644 index 0000000..894f002 --- /dev/null +++ b/src/etrans/gpu/internal/eupdspbad_mod.F90 @@ -0,0 +1,133 @@ +MODULE EUPDSPBAD_MOD +CONTAINS +SUBROUTINE EUPDSPBAD(KM,KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPBAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPBAD(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_DISTR + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(OUT) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 0. NOTE. +! ----- + +! The following transfer reads : +! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) +! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) +! with n from m to NSMAX +! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. +! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) +! nn is the loop index. + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUPDSPBAD_MOD:EUPDSPBAD',0,ZHOOK_HANDLE) +POA(:,:) = 0.0_JPRB + +IF(PRESENT(KFLDPTR)) THEN + + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + IFLD = KFLDPTR(JFLD) +!DIR$ IVDEP +!OCL NOVREC + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + POA(JN+1,IR) = PSPEC(IFLD,INM+1) + POA(JN,II) = PSPEC(IFLD,INM+2) + POA(JN+1,II) = PSPEC(IFLD,INM+3) + PSPEC(IFLD,INM )= 0.0_JPRB + PSPEC(IFLD,INM+1)= 0.0_JPRB + PSPEC(IFLD,INM+2)= 0.0_JPRB + PSPEC(IFLD,INM+3)= 0.0_JPRB + ENDDO + ENDDO + +ELSE + + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + POA(JN,IR) = PSPEC(JFLD,INM) + POA(JN+1,IR) = PSPEC(JFLD,INM+1) + POA(JN,II) = PSPEC(JFLD,INM+2) + POA(JN+1,II) = PSPEC(JFLD,INM+3) + PSPEC(JFLD,INM )= 0.0_JPRB + PSPEC(JFLD,INM+1)= 0.0_JPRB + PSPEC(JFLD,INM+2)= 0.0_JPRB + PSPEC(JFLD,INM+3)= 0.0_JPRB + ENDDO + ENDDO + +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPBAD_MOD:EUPDSPBAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSPBAD +END MODULE EUPDSPBAD_MOD diff --git a/src/etrans/gpu/internal/euvtvd_comm_mod.F90 b/src/etrans/gpu/internal/euvtvd_comm_mod.F90 new file mode 100644 index 0000000..831a9a4 --- /dev/null +++ b/src/etrans/gpu/internal/euvtvd_comm_mod.F90 @@ -0,0 +1,173 @@ +MODULE EUVTVD_COMM_MOD +CONTAINS +SUBROUTINE EUVTVD_COMM(KM,KMLOC,KFIELD,KFLDPTR,PU,PV,PSPMEANU,PSPMEANV) + +!**** *EUVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX - communication part. + +!** Interface. +! ---------- +! CALL EUVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR +USE TPMALD_GEO +USE TPMALD_DISTR +USE MPL_MODULE +USE SET2PE_MOD +USE ABORT_TRANS_MOD +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD, KM, KMLOC +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZIN +INTEGER(KIND=JPIM) :: JA,ITAG,ILEN,IFLD,ISND +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',0,ZHOOK_HANDLE) + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +IF (KM == 0) THEN + +!$acc data present(PU,PV) +!$acc data copyout (PSPMEANU, PSPMEANV, KMLOC) +!$acc data copyin (KFLDPTR) if(present (KFLDPTR)) + + IF (PRESENT(KFLDPTR)) THEN +!$acc parallel loop private(ir,ifld) + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=PU(1,KMLOC,IR) + PSPMEANV(IFLD)=PV(1,KMLOC,IR) + ENDDO +!$acc end parallel loop + ELSE +!$acc parallel loop private(j,ir) + DO J = 1, KFIELD + IR = 2*J-1 + PSPMEANU(J)=PU(1,KMLOC,IR) + PSPMEANV(J)=PV(1,KMLOC,IR) + ENDDO +!$acc end parallel loop + ENDIF + +!$acc end data +!$acc end data +!$acc end data +ENDIF + +IF (NPRTRW > 1 .AND. KFIELD > 0) THEN + IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+ISND + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVD_COMM:') + ENDIF + ENDDO + ELSE + IF (KMLOC == 1) THEN + IF (D%NPROCM(0) /= MYSETW) THEN + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+MYPROC + + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=ISND,KTAG=ITAG,KOUNT=ILEN, & + & CDSTRING='EUVTVD_COMM:') + IF (ILEN /= 2*KFIELD) CALL ABORT_TRANS('EUVTVD_COMM: RECV INVALID RECEIVE MESSAGE LENGHT') + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF + ENDIF + ENDIF + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD_COMM +END MODULE EUVTVD_COMM_MOD diff --git a/src/etrans/gpu/internal/euvtvd_mod.F90 b/src/etrans/gpu/internal/euvtvd_mod.F90 new file mode 100644 index 0000000..1fce758 --- /dev/null +++ b/src/etrans/gpu/internal/euvtvd_mod.F90 @@ -0,0 +1,127 @@ +MODULE EUVTVD_MOD +CONTAINS +SUBROUTINE EUVTVD(KFIELD,PU,PV,PVOR,PDIV) + +!**** *EUVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX - calculation part. + +!** Interface. +! ---------- +! CALL EUVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC +USE TPM_DISTR ,ONLY : D_NUMP,D_MYMS +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PVOR(:,:,:),PDIV(:,:,:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN +INTEGER(KIND=JPIM) :: IM, JM, JNMAX + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +INTEGER(KIND=JPIM) :: JA,ITAG,ILEN,IFLD,ISND +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',0,ZHOOK_HANDLE) + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + + +!$acc parallel loop collapse(3) private(J,JM,JN,IR,II,IM,ZKM) present (PVOR, PDIV, PU, PV) +DO J=1,KFIELD + DO JM=1,D_NUMP + DO JN=1,R%NDGL+R%NNOEXTZG + IM = D_MYMS(JM) + ZKM=REAL(IM,JPRB)*GALD%EXWN + IR=2*J-1 + II=IR+1 + PDIV(JN,JM,IR)=-ZKM*PU(JN,JM,II) + PDIV(JN,JM,II)= ZKM*PU(JN,JM,IR) + PVOR(JN,JM,IR)=-ZKM*PV(JN,JM,II) + PVOR(JN,JM,II)= ZKM*PV(JN,JM,IR) + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +JNMAX = MAXVAL(DALD%NCPL2M) + +!$acc parallel loop collapse(3) private(J,JM,JN,IM,ZIN,IN) copyin (JNMAX) present (PVOR, PDIV, PU, PV) +DO J=1,2*KFIELD + DO JM=1,D_NUMP + DO JN=1,JNMAX,2 + IM = D_MYMS(JM) + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PVOR(JN ,JM,J)=PVOR(JN ,JM,J)+ZIN*PU(JN+1,JM,J) + PVOR(JN+1,JM,J)=PVOR(JN+1,JM,J)-ZIN*PU(JN ,JM,J) + PDIV(JN ,JM,J)=PDIV(JN ,JM,J)-ZIN*PV(JN+1,JM,J) + PDIV(JN+1,JM,J)=PDIV(JN+1,JM,J)+ZIN*PV(JN ,JM,J) + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD +END MODULE EUVTVD_MOD diff --git a/src/etrans/gpu/internal/euvtvdad_mod.F90 b/src/etrans/gpu/internal/euvtvdad_mod.F90 new file mode 100644 index 0000000..8b72f99 --- /dev/null +++ b/src/etrans/gpu/internal/euvtvdad_mod.F90 @@ -0,0 +1,128 @@ +MODULE EUVTVDAD_MOD +CONTAINS +SUBROUTINE EUVTVDAD(KM,KMLOC,KFIELD,KFLDPTR,PU,PV,PVOR,PDIV,PSPMEANU,PSPMEANV) + +!**** *EUVTVDAD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL EUVTVDAD() + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 G. Radnoti: b-level conform mean wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn removed erasing of mean wind +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_FIELDS + +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD, KM, KMLOC +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(INOUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, IFLD + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVDAD_MOD:EUVTVDAD',0,ZHOOK_HANDLE) + +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IR=2*J-1 + IFLD=KFLDPTR(J) + PU(1,IR)=PSPMEANU(IFLD) + PV(1,IR)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + IR=2*J-1 + PU(1,IR)=PSPMEANU(J) + PV(1,IR)=PSPMEANV(J) + ENDDO + ENDIF +ENDIF + +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PU(JN+1,J) = PU(JN+1,J) + ZIN * PVOR(JN ,J) + PU(JN ,J) = PU(JN ,J) - ZIN * PVOR(JN+1,J) + PV(JN+1,J) = PV(JN+1,J) - ZIN * PDIV(JN ,J) + PV(JN ,J) = PV(JN ,J) + ZIN * PDIV(JN+1,J) + ENDDO +ENDDO + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR=2*J-1 + II=IR+1 + DO JN=1,R%NDGL+R%NNOEXTZG + PU(JN,II) = PU(JN,II) - ZKM * PDIV(JN,IR) + PU(JN,IR) = PU(JN,IR) + ZKM * PDIV(JN,II) + PV(JN,II) = PV(JN,II) - ZKM * PVOR(JN,IR) + PV(JN,IR) = PV(JN,IR) + ZKM * PVOR(JN,II) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EUVTVDAD_MOD:EUVTVDAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUVTVDAD +END MODULE EUVTVDAD_MOD diff --git a/src/etrans/gpu/internal/evdtuv_mod.F90 b/src/etrans/gpu/internal/evdtuv_mod.F90 new file mode 100644 index 0000000..0a19f1c --- /dev/null +++ b/src/etrans/gpu/internal/evdtuv_mod.F90 @@ -0,0 +1,188 @@ +MODULE EVDTUV_MOD +CONTAINS +SUBROUTINE EVDTUV(KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +USE TPMALD_FIELDS ,ONLY : FALD, FALD_RLEPINM +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD, DALD_NCPL2M, DALD_NPME +USE TPM_DISTR ,ONLY : D, D_NUMP, D_MYMS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:,:),PDIV(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:,:),PV (:,:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(IN) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IN, IFLD +INTEGER(KIND=JPIM) :: JM, IM +INTEGER(KIND=JPIM) :: JNMAX + +REAL(KIND=JPRB) :: ZLEPINM +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',0,ZHOOK_HANDLE) + +JNMAX = MAXVAL (DALD%NCPL2M) + +! check if args are contiguous +#ifndef gnarls +write (20,*) 'shape(PVOR) = ', shape(PVOR) +if (.not. is_contiguous(PVOR) ) call abort_trans('PVOR not contiguous') +write (20,*) 'shape(PDIV) = ', shape(PDIV) +if (.not. is_contiguous(PDIV) ) call abort_trans('PDIV not contiguous') +write (20,*) 'shape(PU) = ', shape(PU) +if (.not. is_contiguous(PU) ) call abort_trans('PU not contiguous') +write (20,*) 'shape(PV) = ', shape(PV) +if (.not. is_contiguous(PV) ) call abort_trans('PV not contiguous') +if ( present(PSPMEANU) ) THEN + write (20,*) 'shape(PSPMEANU) = ', shape(PSPMEANU) + if (.not. is_contiguous(PSPMEANU) ) call abort_trans('PSPMEANU not contiguous') +endif +if ( present(PSPMEANV) ) THEN + write (20,*) 'shape(PSPMEANU) = ', shape(PSPMEANU) + if (.not. is_contiguous(PSPMEANV) ) call abort_trans('PSPMEANV not contiguous') +endif +#endif + +!$acc parallel loop collapse (3) private (JM, J, JN, IM, IN, ZIN) & +!$acc & present (D_NUMP, D_MYMS, DALD_NCPL2M, PU, PV, PVOR, PDIV) +DO J=1,2*KFIELD + DO JM = 1, D_NUMP + DO JN=1,JNMAX,2 + IM = D_MYMS (JM) + IF (JN <= DALD_NCPL2M(IM)) THEN + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PU(JN ,JM,J) = -ZIN*PVOR(JN+1,JM,J) + PU(JN+1,JM,J) = ZIN*PVOR(JN ,JM,J) + PV(JN ,JM,J) = -ZIN*PDIV(JN+1,JM,J) + PV(JN+1,JM,J) = ZIN*PDIV(JN ,JM,J) + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +!$acc parallel loop collapse (3) private (JM, J, JN, IM, ZKM, IR, II, IJ, ZLEPINM) & +!$acc & present (D_NUMP, D_MYMS, DALD_NCPL2M, FALD_RLEPINM, PU, PV, PDIV, PVOR) +DO J=1,KFIELD + DO JM = 1, D_NUMP + DO JN=1,JNMAX + IM = D_MYMS (JM) + ZKM=REAL(IM,JPRB)*GALD%EXWN + IR = 2*J-1 + II = IR+1 + IF (JN <= DALD_NCPL2M(IM)) THEN + IJ=(JN-1)/2 + ZLEPINM = FALD_RLEPINM(DALD_NPME(IM)+IJ) + PU(JN,JM,IR)= ZLEPINM*(-ZKM*PDIV(JN,JM,II)-PU(JN,JM,IR)) + PU(JN,JM,II)= ZLEPINM*( ZKM*PDIV(JN,JM,IR)-PU(JN,JM,II)) + PV(JN,JM,IR)= ZLEPINM*(-ZKM*PVOR(JN,JM,II)+PV(JN,JM,IR)) + PV(JN,JM,II)= ZLEPINM*( ZKM*PVOR(JN,JM,IR)+PV(JN,JM,II)) + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (PRESENT(KFLDPTR)) THEN +!$acc parallel loop collapse (2) private (J, JM, IM, IR, IFLD) & +!$acc & present (D_NUMP, D_MYMS, PU, PV) copyin (PSPMEANU, PSPMEANV, KFLDPTR) + DO J = 1, KFIELD + DO JM = 1, D_NUMP + IM = D_MYMS (JM) + IF (IM == 0) THEN + IR = 2*J-1 + IFLD=KFLDPTR(J) + PU(1,JM,IR)=PSPMEANU(IFLD) + PV(1,JM,IR)=PSPMEANV(IFLD) + ENDIF + ENDDO + ENDDO +!$acc end parallel loop +ELSE +!$acc parallel loop collapse (2) private (J, JM, IM, IR) & +!$acc & present (D_NUMP, D_MYMS, PU, PV) copyin (PSPMEANU, PSPMEANV) + DO J = 1, KFIELD + DO JM = 1, D_NUMP + IM = D_MYMS (JM) + IF (IM == 0) THEN + IR = 2*J-1 + PU(1,JM,IR)=PSPMEANU(J) + PV(1,JM,IR)=PSPMEANV(J) + ENDIF + ENDDO + ENDDO +!$acc end parallel loop +ENDIF + +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',1,ZHOOK_HANDLE) + +END SUBROUTINE EVDTUV +END MODULE EVDTUV_MOD diff --git a/src/etrans/gpu/internal/evdtuvad_comm_mod.F90 b/src/etrans/gpu/internal/evdtuvad_comm_mod.F90 new file mode 100644 index 0000000..eff2ddd --- /dev/null +++ b/src/etrans/gpu/internal/evdtuvad_comm_mod.F90 @@ -0,0 +1,154 @@ +MODULE EVDTUVAD_COMM_MOD +CONTAINS +SUBROUTINE EVDTUVAD_COMM(KM,KMLOC,KFIELD,KFLDPTR,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR + +USE TPMALD_FIELDS +USE TPMALD_GEO +USE TPMALD_DISTR + +USE MPL_MODULE +USE ABORT_TRANS_MOD +USE SET2PE_MOD + + +!**** *EVDTUVAD_COMM* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space communicate the mean winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL EVDTUVAD_COMM(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn Fix mean wind for NPRTRW > 1 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD, KMLOC + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IFLD + +INTEGER(KIND=JPIM) :: IN +INTEGER(KIND=JPIM) :: ISND, JA, ITAG, ILEN + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_COMM_MOD:EVDTUVAD_COMM',0,ZHOOK_HANDLE) + +IF (NPRTRW > 1 .AND. KFIELD > 0) THEN + IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+ISND + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA), & + & CDSTRING='EVDTUVAD_COMM:') + ENDIF + ENDDO + ELSE + IF (KMLOC == 1) THEN + IF (D%NPROCM(0) /= MYSETW) THEN + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+MYPROC + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=ISND,KTAG=ITAG,KOUNT=ILEN,CDSTRING='EVDTUVAD_COMM:') + IF (ILEN /= 2*KFIELD) THEN + CALL ABORT_TRANS('EVDTUVAD_COMM: RECV INVALID RECEIVE MESSAGE LENGTH') + ENDIF + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF + ENDIF + ENDIF + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_COMM_MOD:EVDTUVAD_COMM',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EVDTUVAD_COMM +END MODULE EVDTUVAD_COMM_MOD diff --git a/src/etrans/gpu/internal/evdtuvad_mod.F90 b/src/etrans/gpu/internal/evdtuvad_mod.F90 new file mode 100644 index 0000000..a34135f --- /dev/null +++ b/src/etrans/gpu/internal/evdtuvad_mod.F90 @@ -0,0 +1,151 @@ +MODULE EVDTUVAD_MOD +CONTAINS +SUBROUTINE EVDTUVAD(KM,KMLOC,KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +!**** *EVDTUVAD* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL EVDTUVAD(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn Fix mean wind for NPRTRW > 1 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD, KMLOC +REAL(KIND=JPRB), INTENT(INOUT) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IFLD + +INTEGER(KIND=JPIM) :: IN +INTEGER(KIND=JPIM) :: ISND, JA, ITAG, ILEN + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_MOD:EVDTUVAD',0,ZHOOK_HANDLE) + +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=PU(1,IR) + PSPMEANV(IFLD)=PV(1,IR) + ENDDO + ELSE + DO J = 1, KFIELD + IR = 2*J-1 + PSPMEANU(J)=PU(1,IR) + PSPMEANV(J)=PV(1,IR) + ENDDO + ENDIF +ENDIF + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JN=1,DALD%NCPL2M(KM) + IJ=(JN-1)/2 + PDIV(JN,II)=PDIV(JN,II)-ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,IR) + PU(JN,IR)=-FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,IR) + + PDIV(JN,IR)=PDIV(JN,IR)+ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,II) + PU(JN,II)=-FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,II) + + PVOR(JN,II)=PVOR(JN,II)-ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,IR) + PV(JN,IR)=FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,IR) + + PVOR(JN,IR)=PVOR(JN,IR)+ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,II) + PV(JN,II)=FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,II) + + ENDDO +ENDDO + +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PVOR(JN+1,J) = PVOR(JN+1,J)-ZIN*PU(JN ,J) + PVOR(JN ,J) = PVOR(JN ,J)+ZIN*PU(JN+1,J) + PDIV(JN+1,J) = PDIV(JN+1,J)-ZIN*PV(JN ,J) + PDIV(JN ,J) = PDIV(JN ,J)+ZIN*PV(JN+1,J) + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_MOD:EVDTUVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EVDTUVAD +END MODULE EVDTUVAD_MOD diff --git a/src/etrans/gpu/internal/suefft_mod.F90 b/src/etrans/gpu/internal/suefft_mod.F90 new file mode 100644 index 0000000..6be8bf0 --- /dev/null +++ b/src/etrans/gpu/internal/suefft_mod.F90 @@ -0,0 +1,68 @@ +MODULE SUEFFT_MOD +CONTAINS +SUBROUTINE SUEFFT + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T!, TB +!USE TPM_FFTC ,ONLY : TC, INIT_PLANS_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, INIT_PLANS_FFTW +#endif +USE TPM_FFTH ,ONLY : TC, INIT_PLANS_FFT + +!USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_INIT, FFTB_TYPE +! + +USE TPMALD_FFT ,ONLY : TALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JGL,IGLG, ILATS +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEFFT_MOD:SUEFFT',0,ZHOOK_HANDLE) + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEFFT ===' + +#ifdef WITH_FFTW + IF(TW%LFFTW)THEN + CALL INIT_PLANS_FFTW(MAX(R%NDLON+R%NNOEXTZL,R%NDGL+R%NNOEXTZG)) + ELSE + NULLIFY(TW%FFTW_PLANS) + ENDIF +#endif + + CALL INIT_PLANS_FFT(MAX(R%NDLON+R%NNOEXTZL,R%NDGL+R%NNOEXTZG)) + + !IF(TALD%LFFT992)THEN + ! ALLOCATE(TALD%TRIGSE(R%NDGL+R%NNOEXTZG)) + ! IF(LLP2)WRITE(NOUT,9) 'TALD%TRIGSE ',SIZE(TALD%TRIGSE),SHAPE(TALD%TRIGSE) + ! ALLOCATE(TALD%NFAXE(19)) + ! IF(LLP2)WRITE(NOUT,9) 'TALD%NFAXE ',SIZE(TALD%NFAXE),SHAPE(TALD%NFAXE) + ! CALL SET99(TALD%TRIGSE,TALD%NFAXE,R%NDGL+R%NNOEXTZG) + !ENDIF + +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEFFT_MOD:SUEFFT',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEFFT +END MODULE SUEFFT_MOD diff --git a/src/etrans/gpu/internal/suemp_trans_mod.F90 b/src/etrans/gpu/internal/suemp_trans_mod.F90 new file mode 100644 index 0000000..83a4e25 --- /dev/null +++ b/src/etrans/gpu/internal/suemp_trans_mod.F90 @@ -0,0 +1,267 @@ +MODULE SUEMP_TRANS_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS + +! Set up distributed environment for the transform package (part 2) +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, NPRTRNS, NPRTRV, NPRTRW, MYSETW, NPROC, MYPROC +USE TPMALD_DIM ,ONLY : RALD +!USE TPMALD_DISTR +!USE SUWAVEDI_MOD +!USE PE2SET_MOD +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUEMPLAT_MOD ,ONLY : SUEMPLAT +USE SUESTAONL_MOD ,ONLY : SUESTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JMLOC +INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM +INTEGER(KIND=JPIM) :: I1,I2,I3,IAUX0,IAUX1,JA1 +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF +INTEGER(KIND=JPIM), ALLOCATABLE :: IGPTOTL(:,:) + +REAL(KIND=JPRB) :: ZMEDIAP + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPRB),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',0,ZHOOK_HANDLE) +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS ===' + +IF(.NOT.D%LGRIDONLY) THEN + +ALLOCATE(D%NULTPP(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) +ALLOCATE(D%NPTRLS(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) +ALLOCATE(D%NPROCL(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) + +CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) +D%NDGL_FS = D%NULTPP(MYSETW) + +! Help arrays for spectral to fourier space transposition +ALLOCATE(D%NLTSGTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) +ALLOCATE(D%NLTSFTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) +ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) +ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) +ALLOCATE(D%MSTABF (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) + +D%NLTSGTB(:) = 0 +DO JGL=1,D%NDGL_FS + IGL = D%NPTRLS(MYSETW)+JGL-1 + DO JM=0,G%NMEN(IGL) + D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 + ENDDO +ENDDO +DO JA=1,NPRTRW + IPLAT = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA)+JGL-1 + DO JM=1,D%NUMP + IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN + IPLAT = IPLAT + 1 + ENDIF + ENDDO + ENDDO + D%NLTSFTB(JA) = IPLAT +ENDDO + +DO JA=1,NPRTRW-1 + ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) + IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) + D%MSTABF(IRECVSET) = ISENDSET +ENDDO +D%MSTABF(MYSETW) = MYSETW + +ALLOCATE(D%NPNTGTB0(0:RALD%NMSMAX,D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) +ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(MYSETW) + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 + IM = D%NALLMS(JML) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB0(IM,JGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB0(IM,JGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA) + JGL - 1 + DO JM=1,D%NUMP + IM = D%MYMS(JM) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB1(JM,IGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB1(JM,IGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + +IAUX0 = 0 +IAUX1 = 0 +DO JA=1,NPRTRNS-1 + I1 = MYSENDSET(NPRTRNS,MYSETW,JA) + I2 = MYRECVSET(NPRTRNS,MYSETW,JA) + DO JA1=1,NPRTRNS-1 + IF(MYSENDSET(NPRTRNS,MYSETW,JA1) == I2) I3 =MYRECVSET(NPRTRNS,MYSETW,JA1) + ENDDO + IAUX0 = MAX(D%NLTSFTB(I1),D%NLTSGTB(I2),IAUX0) + IAUX1 = MAX(D%NLTSGTB(I2),D%NLTSFTB(I3),IAUX1) +ENDDO +IAUX0 = MAX(D%NLTSGTB(MYSETW),IAUX0) +IAUX1 = MAX(D%NLTSGTB(MYSETW),IAUX1) +DO JA=1,NPRTRNS+1 + D%NSTAGT0B(JA) = (JA-1)*IAUX0 + D%NSTAGT1B(JA) = (JA-1)*IAUX1 +ENDDO +D%NLENGT0B = IAUX0*NPRTRNS +D%NLENGT1B = IAUX1*NPRTRNS + +ENDIF + +! GRIDPOINT SPACE + +ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) +ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) +ALLOCATE(D%NPTRLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) +ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) +ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) +ALLOCATE(D%LSPLITLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) +ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) + + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + ALLOCATE(ZDUM(1)) + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ELSE + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ENDIF +D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF + +IF (LLP1) THEN + IF(.NOT.D%LGRIDONLY) THEN + WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUEMPLAT: ''/)') + WRITE(NOUT,FMT='('' D%NULTPP '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) + WRITE(NOUT,FMT='('' D%NPROCL '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) + ENDIF + WRITE(NOUT,FMT='('' D%NFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') + WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF + WRITE(NOUT,FMT='('' D%NPTRLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%LSPLITLAT '')') + WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='(/)') +ENDIF +ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) +ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) +ELSE + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) +ENDIF +! IGPTOTL is the number of grid points in each individual processor +ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IGPTOTL(:,:)=0 +DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + IGPTOT = 0 + DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) + IGPTOT = IGPTOT+D%NONL(JGL,JB) + ENDDO + IGPTOTL(JA,JB) = IGPTOT + ENDDO +ENDDO +D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) +D%NGPTOTMX = MAXVAL(IGPTOTL) +D%NGPTOTG = SUM(IGPTOTL) +ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) +D%NGPTOTL(:,:) = IGPTOTL(:,:) + +IF(.NOT.D%LGRIDONLY) THEN +ALLOCATE(D%NSTAGTF(D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) +IOFF = 0 +DO JGL=1,D%NDGL_FS + D%NSTAGTF(JGL) = IOFF + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IOFF = IOFF + G%NLOEN(IGL)+2+R%NNOEXTZL +ENDDO +D%NLENGTF = IOFF +ENDIF + +IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) +DEALLOCATE(IGPTOTL) +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS +END MODULE SUEMP_TRANS_MOD + diff --git a/src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 b/src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 new file mode 100644 index 0000000..55f9a5f --- /dev/null +++ b/src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 @@ -0,0 +1,255 @@ +MODULE SUEMP_TRANS_PRELEG_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS_PRELEG + +! Set up distributed environment for the transform package (part 1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW + +USE TPMALD_DISTR ,ONLY : DALD, DALD_NCPL2M, DALD_NPME, DALD_NESM0 +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FIELDS ,ONLY : FALD, FALD_RLEPINM +USE TPMALD_GEO ,ONLY : GALD + +!USE SUWAVEDI_MOD +!USE ABORT_TRANS_MOD + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JA,JM,JMLOC,JW,JV,ILATPP,IRESTL,IMLOC,IDT,INM,JN,IM,ILAST + +LOGICAL :: LLP1,LLP2 + +INTEGER(KIND=JPIM) :: ISPEC(NPRTRW),IMYMS(RALD%NMSMAX+1),IKNTMP(0:RALD%NMSMAX) +INTEGER(KIND=JPIM) :: IKMTMP(0:R%NSMAX),ISPEC2P +INTEGER(KIND=JPIM) :: IC(NPRTRW) +INTEGER(KIND=JPIM) :: IMDIM,IL,IND,IK,IPOS,IKM +REAL(KIND=JPRB) :: ZLEPDIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',0,ZHOOK_HANDLE) + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS_PRELEG ===' + + !* 1. Initialize partitioning of wave numbers to PEs ! + ! ---------------------------------------------- + + ALLOCATE(D%NASM0(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) + + ALLOCATE(DALD%NESM0(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NESM0 ',SIZE(DALD%NESM0 ),SHAPE(DALD%NESM0 ) + + ALLOCATE(D%NATM0(0:R%NTMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) + ALLOCATE(D%NUMPP(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) + ALLOCATE(D%NPOSSP(NPRTRW+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) + + ALLOCATE(D%NPROCM(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) + + ALLOCATE(DALD%NPME(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NPME',SIZE(DALD%NPME),SHAPE(DALD%NPME) + ALLOCATE(DALD%NCPL2M(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NCPL2M',SIZE(DALD%NCPL2M),SHAPE(DALD%NCPL2M) + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,IKNTMP,IKMTMP) + DALD%NPME(0)=1 + DO JM=1,RALD%NMSMAX + DALD%NPME(JM)=DALD%NPME(JM-1)+IKNTMP(JM-1)+1 + ENDDO + ALLOCATE (DALD_NPME (0:RALD%NMSMAX)) + DALD_NPME = DALD%NPME + !$acc enter data create (DALD_NPME) + !$acc update device (DALD_NPME) + DO JM=0,RALD%NMSMAX + DALD%NCPL2M(JM) = 2*(IKNTMP(JM)+1) + ENDDO + ALLOCATE (DALD_NCPL2M (0:RALD%NMSMAX)) + DALD_NCPL2M = DALD%NCPL2M + !$acc enter data create (DALD_NCPL2M) + !$acc update device (DALD_NCPL2M) + ALLOCATE(FALD%RLEPINM(R%NSPEC_G/2)) + IF(LLP2)WRITE(NOUT,9) 'FALD%RLEPINM',SIZE(FALD%RLEPINM),SHAPE(FALD%RLEPINM) + DO JM=0,RALD%NMSMAX + DO JN=1,IKNTMP(JM) + ZLEPDIM=-((REAL(JM,JPRB)**2)*(GALD%EXWN**2)+& + & (REAL(JN,JPRB)**2)*(GALD%EYWN**2)) + FALD%RLEPINM(DALD%NPME(JM)+JN)=1./ZLEPDIM + ENDDO + ENDDO + DO JM=1,RALD%NMSMAX + ZLEPDIM=-(REAL(JM,JPRB)**2)*(GALD%EXWN**2) + FALD%RLEPINM(DALD%NPME(JM))=1./ZLEPDIM + ENDDO + FALD%RLEPINM(DALD%NPME(0))=0. + ALLOCATE (FALD_RLEPINM (R%NSPEC_G/2)) + FALD_RLEPINM = FALD%RLEPINM + !$acc enter data create (FALD_RLEPINM) + !$acc update device (FALD_RLEPINM) + D%NUMPP(:) = 0 + ISPEC(:) = 0 + DALD%NESM0(:)=-99 + + IMDIM = 0 + IL = 1 + IND = 1 + IK = 0 + IPOS = 1 + DO JM=0,RALD%NMSMAX + IK = IK + IND + IF (IK > NPRTRW) THEN + IK = NPRTRW + IND = -1 + ELSEIF (IK < 1) THEN + IK = 1 + IND = 1 + ENDIF + + IKM =DALD%NCPL2M(JM)/2 -1 + D%NPROCM(JM) = IK + ISPEC(IK) = ISPEC(IK)+IKM+1 + D%NUMPP(IK) = D%NUMPP(IK)+1 + IF (IK == MYSETW) THEN + IMDIM = IMDIM + IKM+1 + IMYMS(IL) = JM + DALD%NESM0(JM) = IPOS + IPOS = IPOS+(IKM+1)*4 + IL = IL+1 + ENDIF + ENDDO + ALLOCATE (DALD_NESM0(0:RALD%NMSMAX)) + DALD_NESM0 = DALD%NESM0 + !$acc enter data copyin (DALD_NESM0) + D%NPOSSP(1) = 1 + ISPEC2P = 4*ISPEC(1) + D%NSPEC2MX = ISPEC2P + DO JA=2,NPRTRW + D%NPOSSP(JA) = D%NPOSSP(JA-1)+ISPEC2P + ISPEC2P = 4*ISPEC(JA) + D%NSPEC2MX=MAX(D%NSPEC2MX,ISPEC2P) + ENDDO + D%NPOSSP(NPRTRW+1) = D%NPOSSP(NPRTRW)+ISPEC2P + + D%NSPEC2 = 4*IMDIM + D%NSPEC=D%NSPEC2 + + D%NUMP = D%NUMPP (MYSETW) + ALLOCATE(D%MYMS(D%NUMP)) + IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) + D%MYMS(:) = IMYMS(1:D%NUMP) + D%NUMTP = D%NUMP + + ! pointer to the first wave number of a given wave-set in NALLMS array + ALLOCATE(D%NPTRMS(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) + D%NPTRMS(:) = 1 + DO JA=2,NPRTRW + D%NPTRMS(JA) = D%NPTRMS(JA-1)+D%NUMPP(JA-1) + ENDDO + ! D%NALLMS : wave numbers for all wave-set concatenated together to give all + ! wave numbers in wave-set order. + ALLOCATE(D%NALLMS(RALD%NMSMAX+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) + IC(:) = 0 + DO JM=0,RALD%NMSMAX + D%NALLMS(IC(D%NPROCM(JM))+D%NPTRMS(D%NPROCM(JM))) = JM + IC(D%NPROCM(JM)) = IC(D%NPROCM(JM))+1 + ENDDO + ALLOCATE(D%NDIM0G(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) + IPOS = 1 + DO JA=1,NPRTRW + DO JMLOC=1,D%NUMPP(JA) + IM = D%NALLMS(D%NPTRMS(JA)+JMLOC-1) + D%NDIM0G(IM) = IPOS + IPOS = IPOS+2*DALD%NCPL2M(IM) + ENDDO + ENDDO + + ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) + IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) + ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) + IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) + + D%NLATLS(:,:) = 9999 + D%NLATLE(:,:) = -1 + + ILATPP = R%NDGL/NPRTRW + IRESTL = R%NDGL-NPRTRW*ILATPP + DO JW=1,NPRTRW + IF (JW > IRESTL) THEN + D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JA-IRESTL-1)*ILATPP+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 + ELSE + D%NLATLS(JW,1) = (JA-1)*(ILATPP+1)+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP + ENDIF + ENDDO + ILAST=0 + DO JW=1,NPRTRW + ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV + IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP + DO JV=1,NPRTRV + IF (JV > IRESTL) THEN + D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 + ELSE + D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP + ENDIF + ENDDO + ILAST=D%NLATLE(JW,NPRTRV) + ENDDO + IF (LLP1) THEN + DO JW=1,NPRTRW + DO JV=1,NPRTRV + WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& + & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) + ENDDO + ENDDO + ENDIF + + ALLOCATE(D%NPMT(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) + ALLOCATE(D%NPMS(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) + ALLOCATE(D%NPMG(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) + IDT = R%NTMAX-R%NSMAX + INM = 0 + DO JMLOC=1,D%NUMP + IMLOC = D%MYMS(JMLOC) + + INM = INM+R%NTMAX+2-IMLOC + ENDDO + INM = 0 + DO JM=0,R%NSMAX + + INM = INM+R%NTMAX+2-JM + ENDDO + + D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 + +ENDIF + + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS_PRELEG +END MODULE SUEMP_TRANS_PRELEG_MOD diff --git a/src/etrans/gpu/internal/suemplat_mod.F90 b/src/etrans/gpu/internal/suemplat_mod.F90 new file mode 100644 index 0000000..c06f316 --- /dev/null +++ b/src/etrans/gpu/internal/suemplat_mod.F90 @@ -0,0 +1,252 @@ +MODULE SUEMPLAT_MOD +CONTAINS +SUBROUTINE SUEMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,LDSPLITLAT,KMYPROC,KLOEN,KDGUX) + +!**** *SUEMPLAT * - Initialize gridpoint distrbution in N-S direction + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *SUEMPLAT * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction +! LDSPLIT -true for latitudes shared between sets +! PWEIGHT -weight per grid-point if weighted +! distribution +! LDEQ_REGIONS -true if eq_regions partitioning +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KRESTM -number of PEs with one extra point +! KFRSTLAT -first latitude row on processor +! KLSTLAT -last latitude row on processor +! KFRSTLOFF -offset for first latitude in set +! KPROCAGP -number of grid points per A set +! KPTRLAT -pointer to start of latitude +! KPTRFRSTLAT-pointer to first latitude +! KPTRLSTLAT -pointer to last latitude +! KPTRFLOFF -offset for pointer to first latitude +! LDSPLITLAT -true for latitudes which are split +! PMEDIAP -mean weight per PE if weighted +! distribution +! + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 20-Sep-2010 Phasing cy37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV + +USE SUEMPLATB_MOD ,ONLY : SUEMPLATB +USE SUMPLATBEQ_MOD ,ONLY : SUMPLATBEQ +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) +REAL(KIND=JPRB),INTENT(OUT) :: PMEDIAP +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) + +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(OUT) :: LDSPLITLAT(:) +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(KDGL) +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX + +! === END OF INTERFACE BLOCK === +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +INTEGER(KIND=JPIM) :: IPTRLATITUDE, JA, JGL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +LOGICAL :: LLFOURIER +LOGICAL :: LLDEBUG=.FALSE. + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUEMPLAT_MOD:SUEMPLAT',0,ZHOOK_HANDLE) + +INDIC(:)=0 +ILAST(:)=0 + +IF(LDWEIGHTED_DISTR.AND..NOT.LDEQ_REGIONS)THEN + CALL ABORT_TRANS ('SUEMPLAT: LDWEIGHTED_DISTR=T AND LDEQ_REGIONS=F NOT SUPPORTED') +ENDIF + +IF( LDEQ_REGIONS )THEN + CALL SUMPLATBEQ(1,KDGL,KPROC,KPROCA,KLOEN,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ELSE + LLFOURIER=.FALSE. +!REK commented out for now ... monkey business to be done again, should lead to the use of sumplatb +!REK CALL SUMPLATB(1,KDGL,KPROCA,G%NLOEN,LDSPLIT,LLFOURIER,& +!REK &KMEDIAP,KRESTM,INDIC,ILAST) + CALL SUEMPLATB(1,KDGL,KPROCA,KLOEN,LDSPLIT,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,INDIC,ILAST,KDGUX) +ENDIF + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': COMPUTATION OF +! KFRSTLAT TO LDSPLITLAT. +! --------------------------------------------- + +! * Computation of first and last latitude of processor sets +! ----------- in grid-point-space ----------------------- +IF(KMYPROC==1.AND.LLDEBUG)THEN + WRITE(0,'("")') + WRITE(0,'("SUEMPLAT_MOD:LDWEIGHTED_DISTR=",L1)')LDWEIGHTED_DISTR + WRITE(0,'("")') + DO JA=1,KPROCA + WRITE(0,'("SUEMPLAT_MOD: JA=",I3," ILAST=",I3," INDIC=",I3)')& + &JA,ILAST(JA),INDIC(JA) + ENDDO + WRITE(0,'("")') + IF( LDEQ_REGIONS .AND. LDSPLIT )THEN + DO JA=1,KPROCA + WRITE(0,'("SUEMPLAT_MOD: JA=",I3," KPROCAGP=",I8)')& + &JA,KPROCAGP(JA) + ENDDO + WRITE(0,'("")') + ENDIF +ENDIF +KFRSTLAT(1) = 1 +KLSTLAT(KPROCA) = KDGL +DO JA=1,KPROCA-1 + IF(KMYPROC==1 .AND. NPRINTLEV > 1)THEN + WRITE(NOUT,'("SUEMPLAT_MOD: JA=",I3," ILAST=",I3," INDIC=",I3)')& + &JA,ILAST(JA),INDIC(JA) + ENDIF + IF ((.NOT. LDSPLIT) .OR. INDIC(JA) == 0) THEN + KFRSTLAT(JA+1) = ILAST(JA) + 1 + KLSTLAT(JA) = ILAST(JA) + ELSE + KFRSTLAT(JA+1) = INDIC(JA) + KLSTLAT(JA) = INDIC(JA) + ENDIF +ENDDO +KFRSTLOFF=KFRSTLAT(KMYSETA)-1 + +! * Initialise following data structures:- +! NPTRLAT (pointer to the start of each latitude) +! LSPLITLAT (TRUE if latitude is split over two A sets) +! NPTRFRSTLAT (pointer to the first latitude of each A set) +! NPTRLSTLAT (pointer to the last latitude of each A set) + +DO JGL=1,KDGL + KPTRLAT (JGL)=-999 + LDSPLITLAT(JGL)=.FALSE. +ENDDO +IPTRLATITUDE=0 +DO JA=1,KPROCA + DO JGL=KFRSTLAT(JA),KLSTLAT(JA) + IPTRLATITUDE=IPTRLATITUDE+1 + LDSPLITLAT(JGL)=.TRUE. + IF( KPTRLAT(JGL) == -999 )THEN + KPTRLAT(JGL)=IPTRLATITUDE + LDSPLITLAT(JGL)=.FALSE. + ENDIF + ENDDO +ENDDO +DO JA=1,KPROCA + IF( LDSPLITLAT(KFRSTLAT(JA)) .AND. JA /= 1 )THEN + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA))+1 + ELSE + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA)) + ENDIF + IF( LDSPLITLAT(KLSTLAT(JA)) .AND. JA == KPROCA)THEN + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA))+1 + ELSE + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA)) + ENDIF +ENDDO +KPTRFLOFF=KPTRFRSTLAT(KMYSETA)-1 +IF(KMYPROC==1 .AND. NPRINTLEV > 1)THEN + DO JGL=1,KDGL + WRITE(NOUT,'("SUEMPLAT_MOD: JGL=",I3," KPTRLAT=",I3," LDSPLITLAT=",L4)')& + & JGL,KPTRLAT(JGL),LDSPLITLAT(JGL) + ENDDO + DO JA=1,KPROCA + WRITE(NOUT,'("SUEMPLAT_MOD: JA=",I3," KFRSTLAT=",I3," KLSTLAT=",I3,& + & " KPTRFRSTLAT=",I3," KPTRLSTLAT=",I3)')& + & JA,KFRSTLAT(JA),KLSTLAT(JA),KPTRFRSTLAT(JA),KPTRLSTLAT(JA) + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMPLAT_MOD:SUEMPLAT',1,ZHOOK_HANDLE) +END SUBROUTINE SUEMPLAT +END MODULE SUEMPLAT_MOD + diff --git a/src/etrans/gpu/internal/suemplatb_mod.F90 b/src/etrans/gpu/internal/suemplatb_mod.F90 new file mode 100644 index 0000000..a736177 --- /dev/null +++ b/src/etrans/gpu/internal/suemplatb_mod.F90 @@ -0,0 +1,236 @@ +MODULE SUEMPLATB_MOD +CONTAINS +SUBROUTINE SUEMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,KINDIC,KLAST,KDGUX) + +!**** *SUMPLATB * - Routine to initialize parallel environment + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *SUMPLATB * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! KDGUX -last latitude for meaningful computations +! (suggested to pass NDGUX in gp-space, NDGL in Fourier space +! for having a good load-balance) +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution` + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' +! PMEDIAP -mean weight per PE if weighted distribution + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! K. YESSAD (after old version of sumplat.F). + +! Modifications. +! -------------- +! Original : 98-12-07 +! G. Radnoti: 03-03-03: Semi-merge with sumplatb, only difference: +! NS-partitioning according to NDGUX +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 21-Sep-2010 phasing CY37 +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(KPROCA) +REAL(KIND=JPRB),INTENT(IN) :: PMEDIAP + +INTEGER(KIND=JPIM) :: IPP1(KPROCA),ILAST1(KPROCA) +INTEGER(KIND=JPIM) :: IPP(KPROCA) +INTEGER(KIND=JPIM) :: IFIRST(KPROCA) + +INTEGER(KIND=JPIM) :: ICOMP, IGL, IMAXI, IMAXIOL, IMEDIA, ITOT, JA, JGL,& + & ILAST,IREST,ILIMIT,IFRST +LOGICAL :: LLDONE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- + +! * Computation of KMEDIAP and KRESTM. + +IF (LHOOK) CALL DR_HOOK('SUEMPLATB_MOD:SUEMPLATB',0,ZHOOK_HANDLE) +IF (LDWEIGHTED_DISTR) THEN + CALL ABORT_TRANS ('SUMPLATBEQ: ALADIN CODE IS NOT PREPARED FOR WEIGHTED DISTRIBUTION') +ENDIF +IMEDIA = SUM(KLOENG(KDGSA:KDGUX)) +KMEDIAP = IMEDIA / KPROCA +IF (KMEDIAP < KLOENG(KDGL/2)) THEN + CALL ABORT_TRANS ('SUMPLATB: KPROCA TOO BIG FOR THIS RESOLUTION') +ENDIF +KRESTM = IMEDIA - KMEDIAP * KPROCA +IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +! * Computation of intermediate quantities KINDIC and KLAST + +IF (LDSPLIT) THEN + + IREST = 0 + ILAST =0 + DO JA=1,KPROCA + IF (JA <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = KMEDIAP + ELSE + ICOMP = KMEDIAP - 1 + ENDIF + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGUX + ILAST = JGL + IF(ITOT+KLOENG(JGL) < ICOMP) THEN + ITOT = ITOT+KLOENG(JGL) + ELSEIF(ITOT+KLOENG(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = KLOENG(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + KLAST(KPROCA)=KDGL + KINDIC(KPROCA)=0 +ELSE + + KINDIC(:) = 0 + + IMAXI = KMEDIAP-1 + IMAXIOL = HUGE(IMAXIOL) + DO + ILIMIT = IMAXI + IMAXI = 0 + IFRST = KDGUX + ILAST1(:) = 0 + IPP1(:) = 0 + DO JA=KPROCA,1,-1 + IGL = IFRST + LATS:DO JGL=IGL,1,-1 + IF (IPP1(JA) < ILIMIT .OR. JA == 1) THEN + IFRST = JGL-1 + IPP1(JA) = IPP1(JA) + KLOENG(JGL) + IF(ILAST1(JA) == 0) ILAST1(JA) = JGL + ELSE + EXIT LATS + ENDIF + ENDDO LATS + IMAXI = MAX (IMAXI,IPP1(JA)) + ENDDO + IF(IMAXI >= IMAXIOL) EXIT + KLAST(:) = ILAST1(:) + IPP(:) = IPP1(:) + IMAXIOL = IMAXI + ENDDO + +! make the distribution more uniform +! ---------------------------------- + + IFIRST(1) = 0 + IF (KLAST(1) > 0) IFIRST(1) = 1 + DO JA=2,KPROCA + IF (IPP(JA) > 0) THEN + IFIRST(JA) = KLAST(JA-1)+1 + ELSE + IFIRST(JA) = 0 + ENDIF + ENDDO + + LLDONE = .FALSE. + DO WHILE( .NOT.LLDONE ) + LLDONE = .TRUE. + + DO JA=1,KPROCA-1 + IF (IPP(JA) > IPP(JA+1)) THEN + IF (IPP(JA)-IPP(JA+1) > IPP(JA+1) + 2 *& + & KLOENG(KLAST(JA)) -IPP(JA) ) THEN + IPP(JA) = IPP(JA) - KLOENG(KLAST(JA)) + IPP(JA+1) = IPP(JA+1) + KLOENG(KLAST(JA)) + IF (KLAST(JA+1) == 0) KLAST(JA+1) = KLAST(JA) + IFIRST(JA+1) = KLAST(JA) + KLAST(JA) = KLAST(JA) - 1 + IF (KLAST(JA) == 0) IFIRST(JA) = 0 + LLDONE = .FALSE. + ENDIF + ELSE + IF( IFIRST(JA+1) > 0 )THEN + IF (IPP(JA+1)-IPP(JA) >= IPP(JA) + 2 *& + & KLOENG(IFIRST(JA+1)) -IPP(JA+1) ) THEN + IPP(JA) = IPP(JA) + KLOENG(IFIRST(JA+1)) + IPP(JA+1) = IPP(JA+1) - KLOENG(IFIRST(JA+1)) + KLAST(JA) = IFIRST(JA+1) + IF (IFIRST(JA) == 0) IFIRST(JA) = KLAST(JA) + IF (KLAST(JA+1) == KLAST(JA)) THEN + KLAST(JA+1) = 0 + IFIRST(JA+1) = 0 + ELSE + IFIRST(JA+1) = IFIRST(JA+1) + 1 + ENDIF + LLDONE = .FALSE. + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + KLAST(KPROCA)=KDGL +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMPLATB_MOD:SUEMPLATB',1,ZHOOK_HANDLE) +END SUBROUTINE SUEMPLATB +END MODULE SUEMPLATB_MOD diff --git a/src/etrans/gpu/internal/suestaonl_mod.F90 b/src/etrans/gpu/internal/suestaonl_mod.F90 new file mode 100644 index 0000000..c715e84 --- /dev/null +++ b/src/etrans/gpu/internal/suestaonl_mod.F90 @@ -0,0 +1,462 @@ +MODULE SUESTAONL_MOD +CONTAINS +SUBROUTINE SUESTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) + +!**** *SUESTAONL * - Routine to initialize parallel environment, TAL + +! Purpose. +! -------- +! Initialize D%NSTA and D%NONL. +! Calculation of distribution of grid points to processors : +! Splitting of grid in B direction + +!** Interface. +! ---------- +! *CALL* *SUESTAONL * + +! Explicit arguments : +! -------------------- +! KMEDIAP - mean number of grid points per PE +! KRESTM - number of PEs with one extra point +! LDWEIGHTED_DISTR -true if weighted distribution +! PWEIGHT -weight per grid-point if weighted +! distribution +! PMEDIAP -mean weight per PE if weighted +! distribution +! KPROCAGP -number of grid points per A set +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) +! 03-03-03 G. Radnoti: no merge: only difference with +! sustaonl: ezone added to last a-set +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! A.Bogatchev Sep-2010 phasing for AL37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC +USE TPMALD_DIM ,ONLY : RALD +USE SET2PE_MOD ,ONLY : SET2PE +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_NS, N_REGIONS_EW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR +REAL(KIND=JPRB),INTENT(IN) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) + +INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL) +INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) + +INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, IDWIDE, & + & IGL, IGL1, IGL2, IGLOFF, IGPTA, & + & IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & + & ILSEND, INPLAT, INXLAT, IPOS, & + & IPROCB, IPTSRE, IRECV, & + & IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & + & ILAT, ILON, ILOEN +INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWEIGHT(:,:) +INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) + +LOGICAL :: LLABORT +LOGICAL :: LLP1,LLP2 + +REAL(KIND=JPRB) :: ZLAT, ZLAT1, ZCOMP +REAL(KIND=JPRB) :: ZDIVID(R%NDGL),ZXPTLAT(R%NDGL) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ----------------------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',0,ZHOOK_HANDLE) + + + +IXPTLAT (:)=999999 +ILSTPTLAT(:)=999999 +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IDWIDE = R%NDGL/2 +IBUFLEN = R%NDGL*N_REGIONS_EW*2 +IDGLG = R%NDGL + +I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) +I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) + +ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 + +IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) + + + +IF (D%LSPLIT) THEN + IF( LEQ_REGIONS )THEN + IGPTA=0 + DO JA=1,MY_REGION_NS-1 + IGPTA = IGPTA + KPROCAGP(JA) + ENDDO + IGPTS = KPROCAGP(MY_REGION_NS) + ELSE + IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ELSE + IGPTS = KMEDIAP+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ENDIF + ELSE + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP-1 + IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) + ELSE + IGPTS = KMEDIAP-1+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*KRESTM+(KMEDIAP-1)*(MY_REGION_NS-1-KRESTM) + ENDIF + ENDIF + ENDIF +ELSE + IGPTA = IGPTPRSETS + IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) +ENDIF + +IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) +IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP +IXPTLAT(1) = IGPTA-IGPTPRSETS+1 +ZXPTLAT(1) = REAL(IXPTLAT(1)) +ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) +INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 +DO JGL=2,ILEN + IXPTLAT(JGL) = 1 + ZXPTLAT(JGL) = 1.0_JPRB + ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) + INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) +ENDDO +ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS + +DO JB=1,N_REGIONS_EW + DO JGL=1,R%NDGL+N_REGIONS_NS-1 + D%NSTA(JGL,JB) = 0 + D%NONL(JGL,JB) = 0 + ENDDO +ENDDO + +! grid point decomposition +! --------------------------------------- +DO JGL=1,ILEN + ZDIVID(JGL)=1._JPRB/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRB) +ENDDO +IF( LDWEIGHTED_DISTR )THEN + ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) + IGL=0 + DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IGL=IGL+1 + ZWEIGHT(JL,JGL)=PWEIGHT(IGL) + ENDDO + ENDDO + ZCOMP=0 + IGPTS=0 +ENDIF +DO JB=1,N_REGIONS(MY_REGION_NS) + + IF( .NOT.LDWEIGHTED_DISTR )THEN + + IF (JB <= IREST) THEN + IPTSRE = IGPTSP+1 + ELSE + IPTSRE = IGPTSP + ENDIF + + DO JNPTSRE=1,IPTSRE + ZLAT = 1._JPRB + ZLAT1 = 1._JPRB + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + ZLAT1 = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + IF (ZLAT1 < ZLAT) THEN + ZLAT=ZLAT1 + INXLAT = JGL + ENDIF + ENDIF + ENDDO + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ELSE + DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & + & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) + + IGPTS = IGPTS + 1 + ZLAT = 1._JPRB + ZLAT1 = 1._JPRB + + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + ZLAT1 = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + IF (ZLAT1 < ZLAT) THEN + ZLAT = ZLAT1 + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN + CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') + ENDIF + ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 + ILOEN=G%NLOEN(ILAT) + IF(ILON<1.OR.ILON>ILOEN)THEN + CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') + ENDIF + ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ZCOMP = ZCOMP - PMEDIAP + + ENDIF + +ENDDO + + +IF( LDWEIGHTED_DISTR )THEN + DEALLOCATE(ZWEIGHT) +ENDIF +! Exchange local partitioning info to produce global view + + +IF( NPROC > 1 )THEN + IF( LEQ_REGIONS )THEN + + ITAG = MTAGPART + IPOS = 0 + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + ILENG(NPRCIDS(IRECV))=ILEN + ENDDO + ENDDO + IOFF(1)=0 + DO JJ=2,NPROC + IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) + ENDDO + ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) + CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') + DO JA=1,N_REGIONS_NS + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + IPOS = IOFF(NPRCIDS(IRECV)) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUFG(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUFG(IPOS) + ENDDO + ENDIF + ENDDO + ENDDO + DEALLOCATE(ICOMBUFG) + + ELSE + + ITAG = MTAGPART + IPOS = 0 + DO JB=1,N_REGIONS(MY_REGION_NS) + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) + ENDDO + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUESTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) + IF(ISEND /= MYPROC) THEN + CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & + & CDSTRING='SUESTAONL:') + ENDIF + ENDDO + DO JA=1,N_REGIONS_NS + CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 + CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & + & KOUNT=ILRECV,CDSTRING='SUESTAONL:') + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IPOS = 0 + DO JB=1,N_REGIONS(JA) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUF(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUF(IPOS) + ENDDO + ENDDO + ENDIF + ENDDO + + ENDIF +ENDIF + +! Confirm consistency of global partitioning, specifically testing for +! multiple assignments of same grid point and unassigned grid points + + +LLABORT = .FALSE. +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + ICHK(JL,JGL) = 1 + ENDDO +ENDDO +DO JA=1,N_REGIONS_NS + IGLOFF = D%NPTRFRSTLAT(JA) + DO JB=1,N_REGIONS(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL = IGLOFF+JGL-IGL1 + DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + IF( ICHK(JL,JGL) /= 1 )THEN + WRITE(NOUT,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " row=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + WRITE(0,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " ROW=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + LLABORT = .TRUE. + ENDIF + ICHK(JL,JGL) = 2 + ENDDO + ENDDO + ENDDO +ENDDO + +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IF( ICHK(JL,JGL) /= 2 )THEN + WRITE(NOUT,'(" SUESTAONL : row=",i4," sta=",i4,& + & " GRID POINT NOT ASSIGNED")') JGL,JL + LLABORT = .TRUE. + ENDIF + ENDDO +ENDDO +IF( LLABORT )THEN + WRITE(NOUT,'(" SUESTAONL : inconsistent partitioning")') + CALL ABORT_TRANS(' SUESTAONL: inconsistent partitioning') +ENDIF + + +IF (LLP1) THEN + WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUESTAONL '')') + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') + WRITE(UNIT=NOUT,FMT='('' '')') + IPROCB = MIN(32,N_REGIONS_EW) + WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I3))') (JB,JB=1,IPROCB) + DO JA=1,N_REGIONS_NS + IPROCB = MIN(32,N_REGIONS(JA)) + WRITE(UNIT=NOUT,FMT='('' '')') + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL=IGLOFF+JGL-IGL1 + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," NSTA=",& + & 32(1X,I3))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," D%NONL=",& + & 32(1X,I3))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' '')') +ENDIF + + + +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE SUESTAONL +END MODULE SUESTAONL_MOD diff --git a/src/etrans/gpu/internal/tpmald_dim.F90 b/src/etrans/gpu/internal/tpmald_dim.F90 new file mode 100644 index 0000000..7163342 --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_dim.F90 @@ -0,0 +1,23 @@ +MODULE TPMALD_DIM + +! Module for dimensions. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDDIM_TYPE + +! COLLOCATION GRID DIMENSIONS + +INTEGER(KIND=JPIM) :: NDGLSUR ! Number of rows of latitudes+... +INTEGER(KIND=JPIM) :: NMSMAX ! Zonal truncation +INTEGER(KIND=JPIM) :: NDGUX ! Number of rows in zone C+I +END TYPE ALDDIM_TYPE + +TYPE(ALDDIM_TYPE),ALLOCATABLE,TARGET :: ALDDIM_RESOL(:) +TYPE(ALDDIM_TYPE),POINTER :: RALD + +END MODULE TPMALD_DIM diff --git a/src/etrans/gpu/internal/tpmald_distr.F90 b/src/etrans/gpu/internal/tpmald_distr.F90 new file mode 100644 index 0000000..91c1c48 --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_distr.F90 @@ -0,0 +1,27 @@ +MODULE TPMALD_DISTR + +! Module for distributed memory environment. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDDISTR_TYPE + +INTEGER(KIND=JPIM) ,POINTER :: NESM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,POINTER :: NCPL2M(:) ! Number of complex Laplace coefficient for m given +INTEGER(KIND=JPIM) ,POINTER :: NPME(:) ! Address for the Laplace operator and its inverse + +END TYPE ALDDISTR_TYPE + +TYPE(ALDDISTR_TYPE),ALLOCATABLE,TARGET :: ALDDISTR_RESOL(:) +TYPE(ALDDISTR_TYPE),POINTER :: DALD + +INTEGER(KIND=JPIM), ALLOCATABLE :: DALD_NESM0 (:) +INTEGER(KIND=JPIM), ALLOCATABLE :: DALD_NCPL2M (:) +INTEGER(KIND=JPIM), ALLOCATABLE :: DALD_NPME (:) + +END MODULE TPMALD_DISTR + diff --git a/src/etrans/gpu/internal/tpmald_fft.F90 b/src/etrans/gpu/internal/tpmald_fft.F90 new file mode 100644 index 0000000..337dade --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_fft.F90 @@ -0,0 +1,20 @@ +MODULE TPMALD_FFT + +! Module for Fourier transforms. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDFFT_TYPE +REAL(KIND=JPRB) ,POINTER :: TRIGSE(:) ! list of trigonometric function values +INTEGER(KIND=JPIM),POINTER :: NFAXE(:) ! list of factors of truncation +LOGICAL :: LFFT992=.TRUE. +END TYPE ALDFFT_TYPE + +TYPE(ALDFFT_TYPE),ALLOCATABLE,TARGET :: ALDFFT_RESOL(:) +TYPE(ALDFFT_TYPE),POINTER :: TALD + +END MODULE TPMALD_FFT diff --git a/src/etrans/gpu/internal/tpmald_fields.F90 b/src/etrans/gpu/internal/tpmald_fields.F90 new file mode 100644 index 0000000..bb5c3a8 --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_fields.F90 @@ -0,0 +1,20 @@ +MODULE TPMALD_FIELDS + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND_ECTRANS, ONLY : JPRBT + +IMPLICIT NONE + +SAVE + +TYPE ALDFIELDS_TYPE + +REAL(KIND=JPRB) ,POINTER :: RLEPINM(:) ! eigen-values of the inverse Laplace operator +END TYPE ALDFIELDS_TYPE + +TYPE(ALDFIELDS_TYPE),ALLOCATABLE,TARGET :: ALDFIELDS_RESOL(:) +TYPE(ALDFIELDS_TYPE),POINTER :: FALD + +REAL (KIND=JPRB), ALLOCATABLE :: FALD_RLEPINM (:) + +END MODULE TPMALD_FIELDS diff --git a/src/etrans/gpu/internal/tpmald_geo.F90 b/src/etrans/gpu/internal/tpmald_geo.F90 new file mode 100644 index 0000000..326739a --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_geo.F90 @@ -0,0 +1,22 @@ +MODULE TPMALD_GEO + +! Module containing data describing plane projection grid. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDGEO_TYPE + +! GEOGRAPHY + +REAL(KIND=JPRB) :: EYWN ! Y-reso +REAL(KIND=JPRB) :: EXWN ! X-reso +END TYPE ALDGEO_TYPE + +TYPE(ALDGEO_TYPE),ALLOCATABLE,TARGET :: ALDGEO_RESOL(:) +TYPE(ALDGEO_TYPE),POINTER :: GALD + +END MODULE TPMALD_GEO diff --git a/src/etrans/gpu/internal/tpmald_tcdis.F90 b/src/etrans/gpu/internal/tpmald_tcdis.F90 new file mode 100644 index 0000000..2b57ca5 --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_tcdis.F90 @@ -0,0 +1,13 @@ +MODULE TPMALD_TCDIS + +! useless + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRB) :: TCDIS + +END MODULE TPMALD_TCDIS diff --git a/src/etrans/include/etrans/edir_trans.h b/src/etrans/include/etrans/edir_trans.h new file mode 100644 index 0000000..6f97217 --- /dev/null +++ b/src/etrans/include/etrans/edir_trans.h @@ -0,0 +1,135 @@ +INTERFACE +SUBROUTINE EDIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV,AUX_PROC) + + +!**** *EDIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! PMEANU(:),PMEANV(:) - mean wind +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + + +END SUBROUTINE EDIR_TRANS + +END INTERFACE diff --git a/src/etrans/include/etrans/edir_transad.h b/src/etrans/include/etrans/edir_transad.h new file mode 100644 index 0000000..7dc6fa0 --- /dev/null +++ b/src/etrans/include/etrans/edir_transad.h @@ -0,0 +1,131 @@ +INTERFACE +SUBROUTINE EDIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + + +!**** *EDIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EDIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split +! +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIR_TRANS_CTLAD - control routine +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANV(:) + + +END SUBROUTINE EDIR_TRANSAD + + +END INTERFACE diff --git a/src/etrans/include/etrans/edist_grid.h b/src/etrans/include/etrans/edist_grid.h new file mode 100644 index 0000000..92e93ae --- /dev/null +++ b/src/etrans/include/etrans/edist_grid.h @@ -0,0 +1,57 @@ +INTERFACE +SUBROUTINE EDIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *EDIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL EDIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_GRID +END INTERFACE diff --git a/src/etrans/include/etrans/edist_spec.h b/src/etrans/include/etrans/edist_spec.h new file mode 100644 index 0000000..43b9b4b --- /dev/null +++ b/src/etrans/include/etrans/edist_spec.h @@ -0,0 +1,59 @@ +INTERFACE +SUBROUTINE EDIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSORT) + +!**** *EDIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC +END INTERFACE diff --git a/src/etrans/include/etrans/egath_grid.h b/src/etrans/include/etrans/egath_grid.h new file mode 100644 index 0000000..a9742c3 --- /dev/null +++ b/src/etrans/include/etrans/egath_grid.h @@ -0,0 +1,56 @@ +INTERFACE +SUBROUTINE EGATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *EGATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL EGATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_GRID +END INTERFACE diff --git a/src/etrans/include/etrans/egath_spec.h b/src/etrans/include/etrans/egath_spec.h new file mode 100644 index 0000000..5a2842d --- /dev/null +++ b/src/etrans/include/etrans/egath_spec.h @@ -0,0 +1,64 @@ +INTERFACE +SUBROUTINE EGATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,KMSMAX,LDZA0IP) + +!**** *EGATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDIM1_IS_FLD - If TRUE first dimension of PSCPEC and PSPECG is the field dimension [.T.] +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- EGATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC + +END INTERFACE diff --git a/src/etrans/include/etrans/egpnorm_trans.h b/src/etrans/include/etrans/egpnorm_trans.h new file mode 100644 index 0000000..8c7fc4e --- /dev/null +++ b/src/etrans/include/etrans/egpnorm_trans.h @@ -0,0 +1,59 @@ +INTERFACE +SUBROUTINE EGPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *EGPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather +! than an approach using a more expensive global gather collective communication + +!** Interface. +! ---------- +! CALL EGPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! A.Bogatchev after gpnorm_trans + +! Modifications. +! -------------- +! Original : 12th Jun 2009 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB),INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +END SUBROUTINE EGPNORM_TRANS +END INTERFACE diff --git a/src/etrans/include/etrans/einv_trans.h b/src/etrans/include/etrans/einv_trans.h new file mode 100644 index 0000000..143d883 --- /dev/null +++ b/src/etrans/include/etrans/einv_trans.h @@ -0,0 +1,151 @@ +INTERFACE +SUBROUTINE EINV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTINV_CTL - control of Legendre transform +! EFTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANV(:) + + +END SUBROUTINE EINV_TRANS + +END INTERFACE diff --git a/src/etrans/include/etrans/einv_transad.h b/src/etrans/include/etrans/einv_transad.h new file mode 100644 index 0000000..9238649 --- /dev/null +++ b/src/etrans/include/etrans/einv_transad.h @@ -0,0 +1,150 @@ +INTERFACE +SUBROUTINE EINV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EINV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. +! +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANV(:) + + +END SUBROUTINE EINV_TRANSAD + +END INTERFACE diff --git a/src/etrans/include/etrans/esetup_trans.h b/src/etrans/include/etrans/esetup_trans.h new file mode 100644 index 0000000..d29d9bb --- /dev/null +++ b/src/etrans/include/etrans/esetup_trans.h @@ -0,0 +1,86 @@ +INTERFACE +SUBROUTINE ESETUP_TRANS(KMSMAX,KSMAX,KDGL,KDGUX,KLOEN,LDSPLIT,& + & KTMAX,KRESOL,PEXWN,PEYWN,PWEIGHT,LDGRIDONLY,KNOEXTZL,KNOEXTZG,& + & LDUSEFFTW) +!**** *ESETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL ESETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space +! LDGRIDONLY - true if only grid space is required + + +! LDSPLIT describe the distribution among processors of +! grid-point data and has no relevance if you are using a single processor + +! LDUSEFFTW - Use FFTW for FFTs + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESETUP_DIMS - setup distribution independent dimensions +! SUEMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! ESETUP_GEOM - Compute arrays related to grid-point geometry +! SUEMP_TRANS - Second part of setup of distributed environment +! SUEFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 02-04-11 A. Bogatchev: Passing of TCDIS +! 02-11-14 C. Fischer: soften test on KDGL +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Dummy arguments +INTEGER(KIND=JPIM),INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +LOGICAL ,OPTIONAL,INTENT(IN) :: LDGRIDONLY +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEXWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEYWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZG +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUSEFFTW + +END SUBROUTINE ESETUP_TRANS +END INTERFACE diff --git a/src/etrans/include/etrans/especnorm.h b/src/etrans/include/etrans/especnorm.h new file mode 100644 index 0000000..7edf5d7 --- /dev/null +++ b/src/etrans/include/etrans/especnorm.h @@ -0,0 +1,56 @@ +INTERFACE +SUBROUTINE ESPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *ESPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL ESPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPECNORM +END INTERFACE diff --git a/src/etrans/include/etrans/etrans_end.h b/src/etrans/include/etrans/etrans_end.h new file mode 100644 index 0000000..fb1090f --- /dev/null +++ b/src/etrans/include/etrans/etrans_end.h @@ -0,0 +1,41 @@ +INTERFACE +SUBROUTINE ETRANS_END(CDMODE) + +!**** *ETRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL ETRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 after G.Radnoti + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE + +END SUBROUTINE ETRANS_END +END INTERFACE diff --git a/src/etrans/include/etrans/etrans_inq.h b/src/etrans/include/etrans/etrans_inq.h new file mode 100644 index 0000000..04f2e56 --- /dev/null +++ b/src/etrans/include/etrans/etrans_inq.h @@ -0,0 +1,172 @@ +INTERFACE +SUBROUTINE ETRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + & KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + & KMYMS,KESM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + & KULTPP,KPTRLS,& + & KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + & LDSPLITLAT,LDLINEAR_GRID,& + & KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,KDEF_RESOL,LDLAM,& + & PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KCPL2M,KCPL4M,KPROCM) + +!**** *ETRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL ETRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KESM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation - n direction +! KMSMAX - spectral truncation - m direction +! KNVALUE - n value for each KSPEC2 spectral coeffient +! KMVALUE - m value for each KSPEC2 spectral coeffient +! LDLINEAR_GRID : .TRUE. if the grid is linear + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLEPINM - Eigen-values of the inverse Laplace operator + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing with TFL 36R4 +! R. El Khatib 08-Aug-2012 KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,LDLAM,KDEF_RESOL,LDLINEAR_GRID + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2G +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KNUMP +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOT +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTG +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTMX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KMYMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KESM0(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KUMPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPOSSP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KALLMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSTA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KONL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KULTPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL,INTENT(INOUT) :: LDSPLITLAT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL :: PGW(:) ! Argument NOT used +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLEI3 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPOLEGL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPMS(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL2M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL4M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPROCM(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMVALUE(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PLEPINM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLINEAR_GRID + +END SUBROUTINE ETRANS_INQ +END INTERFACE diff --git a/src/etrans/include/etrans/etrans_release.h b/src/etrans/include/etrans/etrans_release.h new file mode 100644 index 0000000..846424c --- /dev/null +++ b/src/etrans/include/etrans/etrans_release.h @@ -0,0 +1,6 @@ +INTERFACE +SUBROUTINE ETRANS_RELEASE(KRESOL) +USE PARKIND1 ,ONLY : JPIM +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL +END SUBROUTINE ETRANS_RELEASE +END INTERFACE diff --git a/src/etrans/programs/CMakeLists.txt b/src/etrans/programs/CMakeLists.txt new file mode 100644 index 0000000..303f898 --- /dev/null +++ b/src/etrans/programs/CMakeLists.txt @@ -0,0 +1,62 @@ +foreach( prec sp dp ) + +if ( HAVE_CPU ) + + if( HAVE_${prec} ) + + ecbuild_add_executable(TARGET aatestprog_${prec} + SOURCES aatestprog.F90 + LIBS + etrans_${prec} + trans_${prec} + fiat + parkind_${prec} + LINKER_LANGUAGE Fortran + ) + target_link_libraries( aatestprog_${prec} ${OpenMP_Fortran_FLAGS} OpenMP::OpenMP_Fortran ) + + endif() + +endif() # HAVE_CPU + +if ( HAVE_GPU ) + if ( HAVE_${prec} ) + foreach( gpumethod acc ) + if( HAVE_${gpumethod} ) + if( gpumethod STREQUAL acc ) + ecbuild_add_executable(TARGET aatestprog-gpu-${prec}-${gpumethod} + SOURCES aatestprog.F90 + LINKER_LANGUAGE Fortran + LIBS + etrans_gpu_${prec}_${gpumethod} + trans_gpu_${prec}_${gpumethod} + fiat + parkind_${prec} + OpenACC::OpenACC_Fortran + ) + target_compile_definitions( aatestprog-gpu-${prec}-${gpumethod} PRIVATE ACCGPU ) + endif() + + if( gpumethod STREQUAL omp ) + + ecbuild_add_executable(TARGET aatestprog-gpu-${prec}-${gpumethod} + SOURCES aatestprog.F90 + LINKER_LANGUAGE Fortran + LIBS + etrans_gpu_${prec}_${gpumethod} + trans_gpu_${prec}_${gpumethod} + fiat + parkind_${prec} + ) + target_compile_definitions( aatestprog-gpu-${prec}-${gpumethod} PRIVATE OMPGPU ) + endif() + + target_link_options ( aatestprog-gpu-${prec}-${gpumethod} INTERFACE "$<$:${${gpumethod}_link_flags}>" ) + target_compile_options( aatestprog-gpu-${prec}-${gpumethod} PUBLIC $<$:${${gpumethod}_flags}>) + + endif() + endforeach() + endif() # HAVE_${prec} +endif() # HAVE_GPU + +endforeach() # prec diff --git a/src/etrans/programs/aatestprog.F90 b/src/etrans/programs/aatestprog.F90 new file mode 100644 index 0000000..e9b322a --- /dev/null +++ b/src/etrans/programs/aatestprog.F90 @@ -0,0 +1,566 @@ +PROGRAM TEST + +! Author : ?? +! Modified : +! D. Paradis & R. El Khatib : 04-07-22 GRIBEX + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_INIT, MPL_END, MPL_BARRIER, & + & MPL_BUFFER_METHOD, MPL_MYRANK, MPL_NPROC +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: I_NSMAX,I_NDGL,NPROC,NPRGPNS,NPRGPEW,NPRTRW,NPRTRV +INTEGER(KIND=JPIM) :: I_NDLON,I_NMSMAX +INTEGER(KIND=JPIM) :: I_NOUT,I_MYPROC,I_NSPECG,I_NSPEC2G,IFGATHG +INTEGER(KIND=JPIM) :: I_NFLEV +INTEGER(KIND=JPIM) :: JLEV,II,J,I_NFLEVG,JJ,ITAG,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: I_NSPEC2,I_NGPTOTG,I_NGPTOT,I_NPROMA,I_NGPBLKS,I_MYSETV +INTEGER(KIND=JPIM) ,ALLOCATABLE :: I_NLOEN(:),ITO(:) +!rg +INTEGER(KIND=JPIM) ,ALLOCATABLE :: ISNAX(:),ISMAX(:),IILEV(:) +INTEGER(KIND=JPIM) :: I_NDGUX +REAL(KIND=JPRB) :: Z_EXWN,Z_EYWN +!rg +REAL(KIND=JPRB) , ALLOCATABLE :: ZSPEC(:,:),ZVOR(:,:),ZDIV(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZSPECG(:,:),ZSPEC2(:,:),ZNORM(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZNORM1T(:),ZNORM2T(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZNORM1D(:),ZNORM2D(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZNORM1V(:),ZNORM2V(:),ZSEND(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZG(:,:,:),ZGG(:,:),ZVORG(:,:),ZDIVG(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZMEANU(:),ZMEANV(:),ZMEANUG(:),ZMEANVG(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZZMEANUVG(:,:),ZZMEANUV(:) +INTEGER(KIND=JPIM) :: IFROM(1000),IVSET(1000),IBSET + +CHARACTER (LEN = 6) :: CLNAME +CHARACTER (LEN = 10) :: CLNAME1 + +CHARACTER(100):: CL_CFD +INTEGER(KIND=JPIM) :: I_NUFD,I_NBI,IREP,I,IOFF +LOGICAL :: LSPLIT , LMPOFF +INTEGER(KIND=JPIM),PARAMETER :: I_NBRART=3 +INTEGER(KIND=JPIM),PARAMETER :: I_DIM1=512,I_DIM2=512 +INTEGER(KIND=JPIM),PARAMETER :: DIM=I_DIM1*I_DIM2 +CHARACTER(16) :: CL_CNMCA +CHARACTER(6),DIMENSION(1:I_NBRART) :: CL_CPREF +INTEGER(KIND=JPIM) :: ILEV +CHARACTER(16),DIMENSION(1:I_NBRART) :: CL_CVARFA +INTEGER(KIND=JPIM) :: ITYPTR,ITRUNC +REAL(KIND=JPRB) :: ZSLAPO,ZCLOPO,ZSLOPO,ZCODIL,Z_VP00 +INTEGER(KIND=JPIM),ALLOCATABLE :: INLOPA(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: INZOPA(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: I_ZGEOM(:) +REAL(KIND=JPRB),ALLOCATABLE ::Z_VALH(:),Z_VBH(:) +REAL(KIND=JPRB),ALLOCATABLE :: Z_DATA(:,:,:) +REAL(KIND=JPRB),ALLOCATABLE :: Z_DATAG(:) +INTEGER(KIND=JPIM) :: JC +LOGICAL :: LL_LDGARD +INTEGER(KIND=JPIM) :: JM,JN,ISP,IFTM,ISP0,ISP1,ISP2,ISP3 +REAL(KIND=JPRB),ALLOCATABLE :: Z_PSPFILE(:,:),Z_PSPBUF(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZSPU(:,:),ZSPV(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZSPDIV(:,:),ZSPVOR(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: I_NDIM0G1(:) +REAL(KIND=JPRB) :: ZN,ZM +!REAL_B,ALLOCATABLE :: zbuf_data(:,:,:) +!REAL_B,ALLOCATABLE :: zbuf_data(:) +!REAL_B,ALLOCATABLE :: zbufr_data(:) +!REAL_B,ALLOCATABLE :: zbufr_data(:) +INTEGER(KIND=JPIM) :: JPROC +INTEGER(KIND=JPIM) ,ALLOCATABLE :: I_NPRCIDS(:) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +INTEGER(KIND=JPIM) :: INBITS, ISTRON, IPUILA +INTEGER(KIND=JPIM), ALLOCATABLE :: INGRIB(:,:) +LOGICAL :: LLEXIST +LOGICAL :: LLCOSP=.TRUE. +!********************************************************************* + +!NAMELIST/NAMTEST/NPRGPNS,NPRTRW,NPROC,NPRGPEW,NPRTRV,LSPLIT,LMPOFF + +#include "setup_trans0.h" +#include "esetup_trans.h" +#include "einv_trans.h" +#include "edir_trans.h" +#include "edist_spec.h" +#include "egath_spec.h" +#include "egath_grid.h" +#include "etrans_inq.h" +#include "especnorm.h" + +!*********************************************************************** + +LMPOFF=.FALSE. + +IF( LMPOFF ) THEN + NPROC=1 + NPRGPNS=1 + NPRGPEW=1 + NPRTRW=1 + NPRTRV=1 + LSPLIT=.FALSE. + I_NOUT = 6 + WRITE(I_NOUT,*)'voila le fichier de sortie' + I_MYPROC=1 +ELSE + + !READ(4,NAMTEST) + + CALL MPL_INIT + + NPROC=MPL_NPROC() + NPRGPNS=NPROC + NPRGPEW=1 + NPRTRW=NPROC + NPRTRV=1 + LSPLIT=.FALSE. + + I_MYPROC = MPL_MYRANK() + WRITE(*,*) "i am on proc no ",I_MYPROC + I_NOUT = 20+I_MYPROC + WRITE(CLNAME,'(a,i2.2)') 'out.',I_MYPROC + OPEN(I_NOUT,FILE=CLNAME) + ALLOCATE(I_NPRCIDS(NPROC)) + DO JPROC=1,NPROC + I_NPRCIDS(JPROC)=JPROC + ENDDO + + CALL MPL_BUFFER_METHOD(KMP_TYPE=2,KMBX_SIZE=800000,KPROCIDS=I_NPRCIDS) + +ENDIF +I_MYSETV=MOD(I_MYPROC-1,NPRTRV)+1 +WRITE(I_NOUT,*) 'namelist finished' +call flush(I_NOUT) +!*********************************************************************** +IF (LHOOK) CALL DR_HOOK('TEST',0,ZHOOK_HANDLE) +!********************************** +I_NPROMA=8 +ITAG=1 +CL_CFD='inputfile' +I_NUFD=97 +CL_CNMCA='CADRE.STANDARD0 ' + +ALLOCATE(INLOPA(I_DIM1)) +ALLOCATE(INZOPA(I_DIM2)) +ALLOCATE(I_ZGEOM(I_DIM1)) +ALLOCATE(Z_VALH(I_DIM1)) +ALLOCATE(Z_VBH(I_DIM1)) + +! manual settings +ITRUNC=2 ! NSMAX +ITYPTR=3 ! NMSMAX +INLOPA(6)=6 ! NDGUX==NLAT +I_NDGL=6 ! NLAT +I_NDLON=8 ! NLON + +I_NFLEVG=5 +I_NDGUX=INLOPA(6) +CL_CPREF(1)='S' +CL_CPREF(2)='S' +CL_CPREF(3)='S' +CL_CVARFA(1)='WIND.U.PHYS ' +CL_CVARFA(2)='WIND.V.PHYS ' +CL_CVARFA(3)='TEMPERATURE ' +ALLOCATE(Z_DATA(DIM,I_NFLEVG,I_NBRART)) +ALLOCATE(INGRIB(I_NFLEVG,I_NBRART)) +Z_DATA(:,:,:)=123.456 +call flush(I_NOUT) +DEALLOCATE(INLOPA) +DEALLOCATE(INZOPA) +DEALLOCATE(I_ZGEOM) +DEALLOCATE(Z_VALH) +DEALLOCATE(Z_VBH) +!********************************** +I_NSMAX=ITRUNC +I_NMSMAX=ABS(ITYPTR) +WRITE(I_NOUT,*) 'I_NSMAX=',I_NSMAX +WRITE(I_NOUT,*) 'I_NMSMAX=',I_NMSMAX +ALLOCATE(ISNAX(0:I_NMSMAX)) +ALLOCATE(ISMAX(0:I_NSMAX)) +CALL ELLIPS(I_NSMAX,I_NMSMAX,ISNAX,ISMAX) +WRITE(I_NOUT,*) 'definition of ellips:' +WRITE(I_NOUT,*) 'isnax:',ISNAX +WRITE(I_NOUT,*) 'ismax:',ISMAX +call flush(I_NOUT) +!********************************** +I_NSPECG=0 +DO J=0,I_NMSMAX + I_NSPECG=I_NSPECG+2*(ISNAX(J)+1) +ENDDO +WRITE(I_NOUT,*) 'I_NSPECG=',I_NSPECG +I_NSPEC2G=I_NSPECG*2 +WRITE(I_NOUT,*) 'I_NSPEC2G=',I_NSPEC2G +call flush(I_NOUT) +!************************************* +ALLOCATE(I_NLOEN(I_NDGL)) +I_NLOEN(:) = I_NDLON +CALL SETUP_TRANS0(KPROMATR=4,KOUT=I_NOUT,KERR=0,KPRINTLEV=2,KMAX_RESOL=2,& + & KPRGPNS=NPRGPNS,KPRGPEW=NPRGPEW,KPRTRW=NPRTRW) +WRITE(I_NOUT,*) I_MYPROC,'after setup_trans0' + +Z_EXWN=0.3926377E-05 +Z_EYWN=0.5453301E-05 +CALL ESETUP_TRANS(KMSMAX=I_NMSMAX,KSMAX=I_NSMAX,KDGL=I_NDGL,KDGUX=I_NDGUX, & + & KLOEN=I_NLOEN,LDSPLIT=LSPLIT,PEXWN=Z_EXWN,PEYWN=Z_EYWN) +WRITE(0,*) I_MYPROC,'after esetup_trans' +CALL ETRANS_INQ(KSPEC2=I_NSPEC2,KGPTOT=I_NGPTOT,KGPTOTG=I_NGPTOTG) +WRITE(0,*) 'I_NGPTOTG=',I_NGPTOTG +I_NGPBLKS= (I_NGPTOT-1)/I_NPROMA+1 +WRITE(I_NOUT,*) 'I_NGPBLKS=',I_NGPBLKS +WRITE(I_NOUT,*) 'I_NSPEC2=',I_NSPEC2,' I_NGPTOT=',I_NGPTOT +WRITE(0,*) 'I_NSPEC2=',I_NSPEC2,' I_NGPTOT=',I_NGPTOT,'on proc ',I_MYPROC +WRITE(I_NOUT,*) 'setup finished' +CALL FLUSH(I_NOUT) +!******************************************** + +!**************************************** +ALLOCATE(ZSPU(I_NFLEVG,I_NSPEC2G)) +ALLOCATE(ZSPV(I_NFLEVG,I_NSPEC2G)) +ALLOCATE(ZSPDIV(I_NFLEVG,I_NSPEC2G)) +ALLOCATE(ZSPVOR(I_NFLEVG,I_NSPEC2G)) +DO JLEV=1,I_NFLEVG + DO I=1,I_NSPEC2G + ZSPU(JLEV,I)=Z_DATA(I,JLEV,1) + ZSPV(JLEV,I)=Z_DATA(I,JLEV,2) + ENDDO +ENDDO +DO JLEV=1,I_NFLEVG + II=0 + DO JN=0,I_NSMAX + DO JM=0,ISMAX(JN) + II=II+1 + ISP0=II + ISP1=II+1 + ISP2=II+2 + ISP3=II+3 + ZN=REAL(JN) + ZM=REAL(JM) + ZSPDIV(JLEV,II)=-ZN*Z_EYWN*ZSPV(JLEV,ISP1)-ZM*Z_EXWN*ZSPU(JLEV,ISP2) + ZSPVOR(JLEV,II)=ZN*Z_EYWN*ZSPU(JLEV,ISP1)-ZM*Z_EXWN*ZSPV(JLEV,ISP2) + II=II+1 + ZSPDIV(JLEV,II)=ZN*Z_EYWN*ZSPV(JLEV,ISP0)-ZM*Z_EXWN*ZSPU(JLEV,ISP3) + ZSPVOR(JLEV,II)=-ZN*Z_EYWN*ZSPU(JLEV,ISP0)-ZM*Z_EXWN*ZSPV(JLEV,ISP3) + II=II+1 + ZSPDIV(JLEV,II)=-ZN*Z_EYWN*ZSPV(JLEV,ISP3)+ZM*Z_EXWN*ZSPU(JLEV,ISP0) + ZSPVOR(JLEV,II)=ZN*Z_EYWN*ZSPU(JLEV,ISP3)+ZM*Z_EXWN*ZSPV(JLEV,ISP0) + II=II+1 + ZSPDIV(JLEV,II)=ZN*Z_EYWN*ZSPV(JLEV,ISP2)+ZM*Z_EXWN*ZSPU(JLEV,ISP1) + ZSPVOR(JLEV,II)=-ZN*Z_EYWN*ZSPU(JLEV,ISP2)+ZM*Z_EXWN*ZSPV(JLEV,ISP1) + ENDDO + ENDDO +ENDDO +WRITE(I_NOUT,*) 'Computing of divergence and vorticity finished' +DO JLEV=1,I_NFLEVG + DO I=1,I_NSPEC2G + Z_DATA(I,JLEV,1)=ZSPVOR(JLEV,I) + Z_DATA(I,JLEV,2)=ZSPDIV(JLEV,I) + ENDDO +ENDDO +!************************************** +ALLOCATE(ZMEANUG(I_NFLEVG)) +ALLOCATE(ZMEANVG(I_NFLEVG)) +ZMEANUG(1:I_NFLEVG)=ZSPU(1:I_NFLEVG,1) +ZMEANVG(1:I_NFLEVG)=ZSPV(1:I_NFLEVG,1) +!************************************** + +!******************************************* +ALLOCATE(I_NDIM0G1(0:I_NMSMAX)) +CALL ETRANS_INQ(KDIM0G=I_NDIM0G1) +WRITE(I_NOUT,*) 'I_NDIM0G1: ',I_NDIM0G1 +! Rearrange spectral data structure (see espareord) +ALLOCATE(Z_PSPFILE(I_NFLEVG,I_NSPEC2G)) +ALLOCATE(Z_PSPBUF(I_NFLEVG,I_NSPEC2G)) +! IF (I_MYPROC == 1) THEN + ! OPEN(UNIT=17,FILE='avant.glob.nonreorg') + ! DO J=1,I_NSPEC2G + ! WRITE(17,*) Z_DATA(J,1,3) + ! ENDDO + ! CLOSE(17) +! ENDIF +DO JC=1,3 + DO JLEV=1,I_NFLEVG + IF (.NOT.(INGRIB(JLEV,JC)==-1 .OR. INGRIB(JLEV,JC)==3)) THEN + II=0 + DO JN=0,I_NSMAX + IOFF=0 + DO JM=0,ISMAX(JN) + DO IFTM=1,4 + ISP=IOFF+4*JN+IFTM + II=II+1 + Z_PSPFILE(JLEV,II)=Z_DATA(II,JLEV,JC) + Z_PSPBUF(JLEV,ISP)=Z_PSPFILE(JLEV,II) + ENDDO + IOFF=IOFF+4*(ISNAX(JM)+1) + ENDDO + ENDDO + Z_DATA(1:I_NSPEC2G,JLEV,JC)=Z_PSPBUF(JLEV,1:I_NSPEC2G) + ENDIF + ENDDO +ENDDO + +! IF (I_MYPROC == 1) THEN + ! OPEN(UNIT=17,FILE='avant.glob') + ! DO J=1,I_NSPEC2G + ! WRITE(17,*) Z_DATA(J,1,3) + ! ENDDO + ! CLOSE(17) +! ENDIF +DEALLOCATE(Z_PSPFILE) +DEALLOCATE(Z_PSPBUF) +WRITE(I_NOUT,*) 'reorganisation finished' +CALL FLUSH(I_NOUT) +!******************************************* +ALLOCATE(ZSPECG(I_NFLEVG,I_NSPEC2G)) +ALLOCATE(ZVORG(I_NFLEVG,I_NSPEC2G)) +ALLOCATE(ZDIVG(I_NFLEVG,I_NSPEC2G)) +DO J=1,I_NFLEVG + DO I=1,I_NSPEC2G + ZVORG(J,I)=Z_DATA(I,J,1) + ZDIVG(J,I)=Z_DATA(I,J,2) + ZSPECG(J,I)=Z_DATA(I,J,3) + ENDDO +ENDDO +!************************************* +I_NFLEV=0 +DO J=1,I_NFLEVG + IVSET(J)=MOD(J,NPRTRV)+1 + IF(IVSET(J) == I_MYSETV) I_NFLEV=I_NFLEV+1 +ENDDO +WRITE(0,*) 'I_NFLEV on ',I_MYPROC,I_NFLEV +WRITE(I_NOUT,*)' I_NFLEV=',I_NFLEV +ALLOCATE(ZSPEC(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZSPEC2(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZVOR(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZDIV(I_NFLEV,I_NSPEC2)) +IFROM(1:I_NFLEVG)=1 +ZSPEC=300.0 +WRITE(0,*) I_MYPROC,' before edist_spec' +CALL EDIST_SPEC(PSPECG=ZSPECG,KFDISTG=I_NFLEVG,KFROM=IFROM,PSPEC=ZSPEC,KVSET=IVSET) +WRITE(0,*) I_MYPROC,' after edist_spec 1' +CALL EDIST_SPEC(PSPECG=ZVORG,KFDISTG=I_NFLEVG,KFROM=IFROM,PSPEC=ZVOR,KVSET=IVSET) +WRITE(0,*) I_MYPROC,' after edist_spec 2' +CALL EDIST_SPEC(PSPECG=ZDIVG,KFDISTG=I_NFLEVG,KFROM=IFROM,PSPEC=ZDIV,KVSET=IVSET) +WRITE(I_NOUT,*) 'distr finished' +! distribution of mean wind along vertical +ALLOCATE(ZMEANU(I_NFLEV)) +ALLOCATE(ZMEANV(I_NFLEV)) +IF (NPRTRV>1) THEN + ALLOCATE(ZZMEANUVG(NPROC,2*I_NFLEVG)) + ALLOCATE(ZZMEANUV(2*I_NFLEV)) + ALLOCATE(IILEV(NPROC)) + IILEV(1:NPROC)=0 + DO J=1,I_NFLEVG + IBSET=IVSET(J) + DO JA=1,NPRTRW + CALL SET2PE(ISND,0,0,JA,IBSET) + ISND=I_NPRCIDS(ISND) + IILEV(ISND)=IILEV(ISND)+1 + ZZMEANUVG(ISND,2*(IILEV(ISND)-1)+1)=ZMEANUG(J) + ZZMEANUVG(ISND,2*IILEV(ISND))=ZMEANVG(J) + ENDDO + ENDDO + IF (I_MYPROC == 1) THEN + DO JPROC=1,NPROC + DO JLEV=1,I_NFLEVG + ENDDO + ENDDO + ENDIF + IF (I_MYPROC == 1) THEN +! plain copy + IF (I_NFLEV /= IILEV(1)) CALL ABORT_TRANS('lengths are not the same on 1') + DO J=1,I_NFLEV + ZMEANU(J)=ZZMEANUVG(1,2*(J-1)+1) + ZMEANV(J)=ZZMEANUVG(1,2*J) + ENDDO + ALLOCATE(ZSEND(2*I_NFLEVG)) + DO JPROC=2,NPROC + ZSEND(1:2*IILEV(JPROC))=ZZMEANUVG(JPROC,1:2*IILEV(JPROC)) + ITAG=300000+I_NFLEVG*NPROC+JPROC + CALL MPL_SEND(ZSEND(1:2*IILEV(JPROC)),KDEST=JPROC,KTAG=ITAG,CDSTRING='main:') + ENDDO + ELSE + ITAG=300000+I_NFLEVG*NPROC+I_MYPROC + CALL MPL_RECV(ZZMEANUV(1:2*I_NFLEV),KSOURCE=1,KTAG=ITAG,KOUNT=ILEN,CDSTRING='main:') + IF (ILEN /= 2*I_NFLEV) CALL ABORT_TRANS('main: RECV INVALID RECEIVE MESSAGE LENGTH') + DO J=1,I_NFLEV + ZMEANU(J)=ZZMEANUV(2*(J-1)+1) + ZMEANV(J)=ZZMEANUV(2*J) + ENDDO + ENDIF + +ELSE + ZMEANU(:)=ZMEANUG(:) + ZMEANV(:)=ZMEANVG(:) +ENDIF +!******************************************* +WRITE(I_NOUT,*) 'ivset(1:I_NFLEVG)=',IVSET(1:I_NFLEVG) +WRITE(0,*) I_MYPROC,' after edist_spec 3' +CALL FLUSH(I_NOUT) +!******************************************* +ALLOCATE(ZNORM(I_NFLEVG)) +ALLOCATE(ZNORM1T(I_NFLEVG)) +ALLOCATE(ZNORM1V(I_NFLEVG)) +ALLOCATE(ZNORM1D(I_NFLEVG)) +ALLOCATE(ZNORM2T(I_NFLEVG)) +ALLOCATE(ZNORM2V(I_NFLEVG)) +ALLOCATE(ZNORM2D(I_NFLEVG)) +WRITE(0,*) I_MYPROC,' after znorm allocation' +CALL ESPECNORM(PSPEC=ZSPEC,KVSET=IVSET(1:I_NFLEVG),PNORM=ZNORM) +WRITE(0,*) I_MYPROC,' after especnorm 1' +WRITE(I_NOUT,*) 'znorm_avant t ' +DO J=1,I_NFLEVG + ZNORM1T(J)=ZNORM(J) + IF (I_MYPROC == 1) WRITE(I_NOUT,*) ' ',J,' ',ZNORM(J) +ENDDO +!*********************************************** +CALL ESPECNORM(PSPEC=ZVOR,KVSET=IVSET(1:I_NFLEVG),PNORM=ZNORM) +! WRITE(0,*) I_MYPROC,' after especnorm 2' +! WRITE(I_NOUT,*) 'znorm_avant vor ' +! DO J=1,I_NFLEVG + ! ZNORM1V(J)=ZNORM(J) + ! IF (I_MYPROC == 1) WRITE(I_NOUT,*) ' ',J,' ',ZNORM(J) +! ENDDO +!*********************************************** +CALL ESPECNORM(PSPEC=ZDIV,KVSET=IVSET(1:I_NFLEVG),PNORM=ZNORM) +! WRITE(0,*) I_MYPROC,' after especnorm 3' +! WRITE(I_NOUT,*) 'znorm_avant div ' +! DO J=1,I_NFLEVG + ! ZNORM1D(J)=ZNORM(J) + ! IF (I_MYPROC == 1) WRITE(I_NOUT,*) ' ',J,' ',ZNORM(J) +! ENDDO +!*********************************************** +CALL FLUSH(I_NOUT) +ALLOCATE(ZG(I_NPROMA,3*I_NFLEVG,I_NGPBLKS)) +ALLOCATE(ZGG(I_NGPTOTG,3*I_NFLEVG)) +ALLOCATE(ITO(3*I_NFLEVG)) +ZG=0.0 +!write(nout,*) 'temperature avant dirtr' +!write(nout,*) zspec(1,1:10) +!write(nout,*) 'divergence avant dirtr' +!write(nout,*) zdiv(1,1:10) +!***************************************************** +!************************************************************** +! IF ( NPROC>1 ) THEN + ! WRITE(CLNAME1,'(a,i2.2)') 'avant.',I_MYPROC +! ELSE + ! CLNAME1='avant.1' +! ENDIF +! OPEN(UNIT=97,FILE=CLNAME1,FORM='formatted') +! DO J=1,I_NSPEC2 + ! WRITE(97,FMT='(3(E20.8,2x))')ZSPEC(1,J),ZVOR(1,J),ZDIV(1,J) +! ENDDO +! CLOSE(97) +!************************************************************** + +CALL EINV_TRANS(PSPVOR=ZVOR,PSPDIV=ZDIV,PSPSCALAR=ZSPEC,PGP=ZG,PMEANU=ZMEANU,PMEANV=ZMEANV,& + & KPROMA=I_NPROMA,KVSETSC=IVSET(1:I_NFLEVG), KVSETUV=IVSET(1:I_NFLEVG)) + +ITO(:)=1 +IFGATHG=3*I_NFLEVG +CALL EGATH_GRID(PGPG=ZGG,KPROMA=I_NPROMA,KFGATHG=IFGATHG,KTO=ITO,PGP=ZG) + + +!************************************************************** + + +IF (I_MYPROC == 1) THEN + WRITE(0,*) ' after einv_trans' + DO J=1,3*I_NFLEVG + WRITE(I_NOUT,*) (ZGG(JJ,J),JJ=1,I_NGPTOTG,1000) + WRITE(I_NOUT,*) '*************************************' + CALL FLUSH(I_NOUT) + ENDDO + CALL FLUSH(I_NOUT) +ENDIF +!************************************************************ +ZSPEC=0.0 +ZVOR=0.0 +ZDIV=0.0 +CALL EDIR_TRANS(PSPVOR=ZVOR,PSPDIV=ZDIV,PSPSCALAR=ZSPEC,PGP=ZG,PMEANU=ZMEANU,PMEANV=ZMEANV,& + & KPROMA=I_NPROMA,KVSETSC=IVSET(1:I_NFLEVG), KVSETUV=IVSET(1:I_NFLEVG)) +!************************************************************** + +WRITE(I_NOUT,*) 'temperature after dirtr' +WRITE(I_NOUT,*) ZSPEC(1,1:10) + +!************************************************************** +!************************************************************** +! IF ( NPROC>1 ) THEN + ! WRITE(CLNAME1,'(a,i2.2)') 'apres.',I_MYPROC +! ELSE + ! CLNAME1='apres.1' +! ENDIF +! OPEN(UNIT=97,FILE=CLNAME1,FORM='formatted') +! DO J=1,I_NSPEC2 + ! WRITE(97,FMT='(3(E20.8,2x))')ZSPEC(1,J),ZVOR(1,J),ZDIV(1,J) +! ENDDO +! CLOSE(97) +!************************************************************** +!************************************************************* +CALL ESPECNORM(PSPEC=ZSPEC,KVSET=IVSET(1:I_NFLEVG),PNORM=ZNORM) +! WRITE(I_NOUT,*) 'znorm_apres t' +! DO J=1,I_NFLEVG + ! ZNORM2T(J)=ZNORM(J) + ! WRITE(I_NOUT,*) ' ',J,' ',ZNORM(J) +! ENDDO +! CALL FLUSH(I_NOUT) +!************************************** +CALL ESPECNORM(PSPEC=ZVOR,KVSET=IVSET(1:I_NFLEVG),PNORM=ZNORM) +! WRITE(I_NOUT,*) 'znorm_apres vor' +! DO J=1,I_NFLEVG + ! ZNORM2V(J)=ZNORM(J) + ! WRITE(I_NOUT,*) ' ',J,' ',ZNORM(J) +! ENDDO +!************************************** +CALL ESPECNORM(PSPEC=ZDIV,KVSET=IVSET(1:I_NFLEVG),PNORM=ZNORM) +! IF(I_MYPROC == 1) THEN + ! WRITE(I_NOUT,*) 'znorm_apres div' + ! DO J=1,I_NFLEVG + ! ZNORM2D(J)=ZNORM(J) + ! WRITE(I_NOUT,*) ' ',J,' ',ZNORM(J) + ! ENDDO +! ENDIF +DEALLOCATE(ZNORM) +!************************************************************** +IF ( I_MYPROC==1 ) THEN + OPEN(UNIT=97,FILE='fic_normt',FORM='formatted') + DO J=1,I_NFLEVG + WRITE(97,FMT='(2(E24.16)2x)')ZNORM1T(J),ZNORM2T(J) + ENDDO + CLOSE(97) + !************************************************************** + OPEN(UNIT=97,FILE='fic_normd',FORM='formatted') + DO J=1,I_NFLEVG + WRITE(97,FMT='(2(E24.16)2x)')ZNORM1D(J),ZNORM2D(J) + ENDDO + CLOSE(97) + !************************************************************** + OPEN(UNIT=97,FILE='fic_normv',FORM='formatted') + DO J=1,I_NFLEVG + WRITE(97,FMT='(2(E24.16)2x)')ZNORM1V(J),ZNORM2V(J) + ENDDO + CLOSE(97) + !************************************************************** +ENDIF + +DEALLOCATE(ZNORM1T) +DEALLOCATE(ZNORM2T) +DEALLOCATE(ZNORM1D) +DEALLOCATE(ZNORM2D) +DEALLOCATE(ZNORM1V) +DEALLOCATE(ZNORM2V) +DEALLOCATE(Z_DATA) +DEALLOCATE(INGRIB) + +IF (LHOOK) CALL DR_HOOK('TEST',1,ZHOOK_HANDLE) + +!!************************************** +100 CONTINUE +IF(NPROC > 1 .OR. .NOT.LMPOFF ) THEN + CALL MPL_BARRIER() + CALL MPL_END +ENDIF +CALL FLUSH(I_NOUT) +!************************************************************** +END PROGRAM TEST diff --git a/src/etrans/programs/test_adjoint.F90 b/src/etrans/programs/test_adjoint.F90 new file mode 100644 index 0000000..7bbebb9 --- /dev/null +++ b/src/etrans/programs/test_adjoint.F90 @@ -0,0 +1,377 @@ +PROGRAM TEST_ADJOINT +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_ALLREDUCE, MPL_INIT, MPL_END, MPL_BARRIER, & + & MPL_BUFFER_METHOD, MPL_MYRANK +! This is not really correct usage +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: I_NSMAX,NDGL,I_NPROC,NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NREP +INTEGER(KIND=JPIM) :: I_NMSMAX, I_NDLON, I_NDGUX, J +INTEGER(KIND=JPIM) , ALLOCATABLE :: ISNAX(:),ISMAX(:) +INTEGER(KIND=JPIM) :: I_NOUT,I_MYPROC,I_NSPECG,I_NSPEC2G,I_NGPTOTG +INTEGER(KIND=JPIM) :: I_NFLEV,NFLEVG,NSEED +INTEGER(KIND=JPIM) :: I_NSPEC2,I_NGPTOT,NPROMA,I_NGPBLKS,I_MYSETV,I_NUMP +INTEGER(KIND=JPIM) ,ALLOCATABLE :: I_NLOEN(:),ITO(:),I_MYMS(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: I_NESM0(:),I_NCPL2M(:),I_SEED(:),IVSET(:) +INTEGER(KIND=JPIM) :: JLEV,JREP + +CHARACTER (LEN = 6) :: CLNAME + +LOGICAL :: LMPOFF + +REAL(KIND=JPRB) , ALLOCATABLE :: ZSPECX(:,:),ZSPECY(:,:),ZSPECP(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZVORX(:,:),ZVORY(:,:),ZVORP(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZDIVX(:,:),ZDIVY(:,:),ZDIVP(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZGX(:,:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZSPECYG(:,:),ZSPECXG(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZVORYG(:,:),ZVORXG(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZDIVYG(:,:),ZDIVXG(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZRANDSP(:),ZRANDGP(:,:),ZPERT(:,:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZMEANUX(:), ZMEANVX(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZMEANUY(:), ZMEANVY(:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZMEANUP(:), ZMEANVP(:) +REAL(KIND=JPRB) :: ZSC, ZSCUV, ZSCT +REAL(KIND=JPRB) :: Z_EXWN, Z_EYWN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +NAMELIST /NAMCTL/ NPROMA,NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NREP,NDGL,NFLEVG,LMPOFF + +#include "setup_trans0.h" +#include "esetup_trans.h" +#include "etrans_inq.h" +#include "edir_trans.h" +#include "einv_trans.h" +#include "edir_transad.h" +#include "einv_transad.h" +#include "edist_grid.h" +#include "edist_spec.h" + +IF (LHOOK) CALL DR_HOOK('TEST_ADJOINT',0,ZHOOK_HANDLE) + +I_NSMAX=47 +NDGL=144 +NPROMA=8 +I_NMSMAX=66 +I_NDLON=200 +I_NDGUX=133 +NFLEVG=31 +LMPOFF=.FALSE. + +ALLOCATE(ISNAX(0:I_NMSMAX)) +ALLOCATE(ISMAX(0:I_NSMAX)) +ALLOCATE(I_NESM0(0:I_NMSMAX)) +ALLOCATE(I_NCPL2M(0:I_NMSMAX)) +CALL ELLIPS(I_NSMAX,I_NMSMAX,ISNAX,ISMAX) +WRITE(I_NOUT,*) ' I_NSMAX= ',I_NSMAX + +I_NSPECG=0 +DO J=0,I_NMSMAX + I_NSPECG=I_NSPECG+2*(ISNAX(J)+1) +ENDDO +WRITE(*,*) 'I_NSPECG:', I_NSPECG +I_NSPEC2G=I_NSPECG*2 + +NPRGPNS = 5 +NPRGPEW = 1 +NPRTRW = 5 +NPRTRV = 1 +NREP=1 + +READ(4,NAMCTL) +I_NPROC = NPRGPNS*NPRGPEW +IF(I_NPROC /= NPRTRW*NPRTRV) THEN + PRINT *,'NPRGPNS,NPRGPEW,NPRTRW,NPRTRV ',NPRGPNS,NPRGPEW,NPRTRW,NPRTRV + CALL ABORT_TRANS('NPRGPNS*NPRGPEW /= NPRTRW*NPRTRV') +ENDIF +WRITE(I_NOUT,*) ' NDGL=',NDGL + +IF(I_NPROC > 1 .OR..NOT.LMPOFF) THEN + CALL MPL_INIT + I_MYPROC = MPL_MYRANK() + I_NOUT = 20 + WRITE(CLNAME,'(A,I2.2)') 'OUT.',I_MYPROC + OPEN(I_NOUT,FILE=CLNAME) + CALL MPL_BUFFER_METHOD(KMP_TYPE=2,KMBX_SIZE=64000000,& + & KPROCIDS=(/ (J, J=1,I_NPROC) /)) +ELSE + I_NOUT = 6 + I_MYPROC = 1 +ENDIF +I_MYSETV = MOD(I_MYPROC-1,NPRTRV)+1 + +! Allocate global arrays. + +ALLOCATE(ZSPECYG(NFLEVG,I_NSPEC2G)) +ALLOCATE(ZSPECXG(NFLEVG,I_NSPEC2G)) +ALLOCATE(ZVORYG(NFLEVG,I_NSPEC2G)) +ALLOCATE(ZVORXG(NFLEVG,I_NSPEC2G)) +ALLOCATE(ZDIVYG(NFLEVG,I_NSPEC2G)) +ALLOCATE(ZDIVXG(NFLEVG,I_NSPEC2G)) +ALLOCATE(ZRANDSP(I_NSPEC2G)) +ALLOCATE(I_NLOEN(NDGL)) +ALLOCATE(ITO(NFLEVG)) +ALLOCATE(IVSET(NFLEVG)) + +I_NFLEV = 0 +DO JLEV=1,NFLEVG + IVSET(JLEV) = MOD(JLEV,NPRTRV)+1 + IF(IVSET(JLEV) == I_MYSETV) I_NFLEV = I_NFLEV+1 +ENDDO +WRITE(I_NOUT,*)' I_NFLEV=',I_NFLEV,' IVSET=',IVSET + +I_NLOEN(:) = I_NDLON +CALL SETUP_TRANS0(KOUT=I_NOUT,KERR=0,KPRINTLEV=2,KMAX_RESOL=1,& + & KPRGPNS=NPRGPNS,KPRGPEW=NPRGPEW,KPRTRW=NPRTRW) + +Z_EXWN=0.3926377E-05 +Z_EYWN=0.5453301E-05 +CALL ESETUP_TRANS(KMSMAX=I_NMSMAX,KSMAX=I_NSMAX,KDGL=NDGL,KDGUX=I_NDGUX, & + & KLOEN=I_NLOEN,LDSPLIT=.FALSE.,PEXWN=Z_EXWN, PEYWN=Z_EYWN) + +CALL ETRANS_INQ(KSPEC2=I_NSPEC2,KGPTOT=I_NGPTOT,KGPTOTG=I_NGPTOTG,KNUMP=I_NUMP) +I_NGPBLKS = (I_NGPTOT-1)/NPROMA+1 +WRITE(I_NOUT,*) 'I_NSPEC2=',I_NSPEC2,' I_NGPTOT=',I_NGPTOT,' I_NGPBLKS=',I_NGPBLKS + +ALLOCATE(I_MYMS(I_NUMP)) +ALLOCATE(ZRANDGP(I_NGPTOTG,1)) + +CALL ETRANS_INQ(KMYMS=I_MYMS,KESM0=I_NESM0,KCPL2M=I_NCPL2M) +WRITE(I_NOUT,*) 'I_MYMS=',I_MYMS,' I_NESM0=',I_NESM0,' I_NCPL2M=',I_NCPL2M + +WRITE(I_NOUT,*) 'SETUP FINISHED' +CALL FLUSH(I_NOUT) + +! Allocate local arrays. + +ALLOCATE(ZSPECX(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZSPECY(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZSPECP(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZVORX(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZVORY(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZVORP(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZDIVX(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZDIVY(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZDIVP(I_NFLEV,I_NSPEC2)) +ALLOCATE(ZMEANUX(I_NFLEV)) +ALLOCATE(ZMEANVX(I_NFLEV)) +ALLOCATE(ZMEANUY(I_NFLEV)) +ALLOCATE(ZMEANVY(I_NFLEV)) +ALLOCATE(ZMEANUP(I_NFLEV)) +ALLOCATE(ZMEANVP(I_NFLEV)) +ALLOCATE(ZGX(NPROMA,3*NFLEVG,I_NGPBLKS)) +ALLOCATE(ZPERT(NPROMA,1,I_NGPBLKS)) + +! Get seed of random number generator + +CALL RANDOM_SEED(SIZE=NSEED) +ALLOCATE(I_SEED(NSEED)) +CALL RANDOM_SEED(GET=I_SEED) + +! Prepare perturbations + +IF(I_MYPROC == 1) THEN + DO JLEV=1,NFLEVG + CALL RANDOM_NUMBER(ZRANDSP) + ZSPECYG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + CALL RANDOM_NUMBER(ZRANDSP) + ZSPECXG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + CALL RANDOM_NUMBER(ZRANDSP) + ZVORYG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + CALL RANDOM_NUMBER(ZRANDSP) + ZVORXG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + CALL RANDOM_NUMBER(ZRANDSP) + ZDIVYG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + CALL RANDOM_NUMBER(ZRANDSP) + ZDIVXG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + ENDDO + CALL RANDOM_NUMBER(ZRANDGP) + ZRANDGP(:,:) = (1.0_JPRB-2.0_JPRB*ZRANDGP(:,:)) +ENDIF + +ITO(:) = 1 +ZSPECX = 0.0_JPRB +ZSPECY = 0.0_JPRB +ZSPECP = 0.0_JPRB +ZVORX = 0.0_JPRB +ZVORY = 0.0_JPRB +ZVORP = 0.0_JPRB +ZDIVX = 0.0_JPRB +ZDIVY = 0.0_JPRB +ZDIVP = 0.0_JPRB +ZMEANUP= 0.0_JPRB +ZMEANVP= 0.0_JPRB +ZGX = 0.0_JPRB + +! Reset the random number generator, so that all threads +! get the same numbers for ZMEAN{U|V}{X|Y}. + +CALL RANDOM_SEED(PUT=I_SEED) + +CALL RANDOM_NUMBER(ZMEANUX) +CALL RANDOM_NUMBER(ZMEANVX) +CALL RANDOM_NUMBER(ZMEANUY) +CALL RANDOM_NUMBER(ZMEANVY) + +! Distribute perturbations + +CALL EDIST_GRID(PGPG=ZRANDGP,KFDISTG=1,KFROM=ITO,PGP=ZPERT,KPROMA=NPROMA) +CALL EDIST_SPEC(PSPECG=ZSPECXG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZSPECX,& + & KVSET=IVSET) +CALL EDIST_SPEC(PSPECG=ZVORXG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZVORX,& + & KVSET=IVSET) +CALL EDIST_SPEC(PSPECG=ZDIVXG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZDIVX,& + & KVSET=IVSET) + +DO JREP=1,NREP + + CALL EDIST_SPEC(PSPECG=ZSPECYG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZSPECY,& + & KVSET=IVSET) + CALL EDIST_SPEC(PSPECG=ZVORYG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZVORY,& + & KVSET=IVSET) + CALL EDIST_SPEC(PSPECG=ZDIVYG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZDIVY,& + & KVSET=IVSET) + +! Direct calculations + + IF (JREP == NREP) THEN + WRITE(I_NOUT,*) 'DIRECT ELOTT ZMEANUX:', ZMEANUX + WRITE(I_NOUT,*) 'DIRECT ELOTT ZMEANVX:', ZMEANVX + ENDIF + + CALL EINV_TRANS(PSPVOR=ZVORX,PSPDIV=ZDIVX,PSPSCALAR=ZSPECX,PGP=ZGX,& + & PMEANU=ZMEANUX,PMEANV=ZMEANVX, KPROMA=NPROMA, KVSETSC=IVSET,& + & KVSETUV=IVSET) + CALL GPC(ZGX) + CALL EDIR_TRANS(PSPSCALAR=ZSPECP,PSPVOR=ZVORP,PSPDIV=ZDIVP,PGP=ZGX,& + & KPROMA=NPROMA,KVSETSC=IVSET, KVSETUV=IVSET,& + & PMEANU=ZMEANUP,PMEANV=ZMEANVP) + + ZSC=SCALPRODSP(ZSPECP,ZSPECY)+SCALPRODSP(ZVORP,ZVORY)+SCALPRODSP(ZDIVP,ZDIVY) + +! Gather ZSC + + IF (I_NPROC > 1) CALL MPL_ALLREDUCE(ZSC,'SUM',.TRUE.) + + ZSCUV=SUM(ZMEANUP*ZMEANUY+ZMEANVP*ZMEANVY) + +! Gather ZSCUV. Note: In case of b-parallellization, the answer +! gathered is based on NPRTRW groups of NPRTRV processors working +! on that calculation, so the answer is NPRTRW times too large. + + IF (I_NPROC > 1) CALL MPL_ALLREDUCE(ZSCUV,'SUM',.TRUE.) + ZSCUV=ZSCUV/NPRTRW + + ZSCT=ZSC+ZSCUV + IF(JREP == NREP) WRITE(I_NOUT,*)'FINAL ',ZSCT + +! Adjoint calculations + + ZSPECP = 0.0_JPRB + ZVORP = 0.0_JPRB + ZDIVP = 0.0_JPRB + ZMEANUP= 0.0_JPRB + ZMEANVP= 0.0_JPRB + ZGX = 0.0_JPRB + CALL EDIR_TRANSAD(PSPSCALAR=ZSPECY,PSPVOR=ZVORY,PSPDIV=ZDIVY,PGP=ZGX,& + & KPROMA=NPROMA, KVSETSC=IVSET, KVSETUV=IVSET,& + & PMEANU=ZMEANUY,PMEANV=ZMEANVY) +! The grid point calculation is self-adjoint +!! CALL GPCAD(ZGX) + CALL GPC(ZGX) + CALL EINV_TRANSAD(PSPSCALAR=ZSPECP,PSPVOR=ZVORP,PSPDIV=ZDIVP,PGP=ZGX,& + & KPROMA=NPROMA,KVSETSC=IVSET,KVSETUV=IVSET,& + & PMEANU=ZMEANUP,PMEANV=ZMEANVP) + + ZSC=SCALPRODSP(ZSPECX,ZSPECP)+SCALPRODSP(ZVORX,ZVORP)+SCALPRODSP(ZDIVX,ZDIVP) + +! Gather ZSC + + IF (I_NPROC > 1) CALL MPL_ALLREDUCE(ZSC,'SUM',.TRUE.) + + ZSCUV=SUM(ZMEANUX*ZMEANUP+ZMEANVX*ZMEANVP) + +! Gather ZSCUV. Note: In case of b-parallellization, the answer +! gathered is based on NPRTRW groups of NPRTRV processors working +! on that calculation, so the answer is NPRTRW times too large. + + IF (I_NPROC > 1) CALL MPL_ALLREDUCE(ZSCUV,'SUM',.TRUE.) + ZSCUV=ZSCUV/NPRTRW + + ZSCT=ZSC+ZSCUV + IF(JREP == NREP) WRITE(I_NOUT,*)'FINAL ',ZSCT + +ENDDO + +IF(I_NPROC > 1 .OR..NOT.LMPOFF) THEN + CALL MPL_BARRIER() + CALL MPL_END +ENDIF + +IF (LHOOK) CALL DR_HOOK('TEST_ADJOINT',1,ZHOOK_HANDLE) + +CONTAINS + +SUBROUTINE GPC(PGP) +! +USE PARKIND1 ,ONLY : JPIM, JPRB + +IMPLICIT NONE +! Grid-point computations + +REAL(KIND=JPRB) :: PGP(:,:,:) + +INTEGER(KIND=JPIM) :: JLEV,JKGLO,JROF,IEND,IBL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +IF (LHOOK) CALL DR_HOOK('GPC',0,ZHOOK_HANDLE) + +DO JLEV=1,3*NFLEVG + DO JKGLO=1,I_NGPTOT,NPROMA + IEND = MIN(NPROMA,I_NGPTOT-JKGLO+1) + IBL = (JKGLO-1)/NPROMA+1 + DO JROF=1,IEND + PGP(JROF,JLEV,IBL) = PGP(JROF,JLEV,IBL)*ZPERT(JROF,1,IBL) + ENDDO + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('GPC',1,ZHOOK_HANDLE) + +END SUBROUTINE GPC + +REAL(KIND=JPRB) FUNCTION SCALPRODSP(PSP1,PSP2) + +! Scalar product in spectral space +! +USE PARKIND1 ,ONLY : JPIM, JPRB + +IMPLICIT NONE + +REAL(KIND=JPRB) :: PSP1(:,:),PSP2(:,:) + +INTEGER(KIND=JPIM) :: JMLOC,IM,JN,INM,JLEV + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +IF (LHOOK) CALL DR_HOOK('SCALPRODSP',0,ZHOOK_HANDLE) + +SCALPRODSP = 0.0_JPRB +DO JLEV=1,I_NFLEV + DO JMLOC=1,I_NUMP + IM = I_MYMS(JMLOC) + DO JN=0,2*I_NCPL2M(IM)-1 + INM=I_NESM0(IM)+JN + SCALPRODSP = SCALPRODSP + PSP1(JLEV,INM)*PSP2(JLEV,INM) + ENDDO + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('SCALPRODSP',1,ZHOOK_HANDLE) + +END FUNCTION SCALPRODSP + +END PROGRAM TEST_ADJOINT diff --git a/src/etrans/programs/trinfo.F90 b/src/etrans/programs/trinfo.F90 new file mode 100644 index 0000000..40c9662 --- /dev/null +++ b/src/etrans/programs/trinfo.F90 @@ -0,0 +1,209 @@ +PROGRAM TRINFO + +USE PARKIND1, ONLY : JPIM, JPRB +USE XRD_GETOPTIONS +USE FA_MOD, ONLY : FA => FA_COM_DEFAULT +USE SUEMPLAT_MOD +USE SUMPLAT_MOD +USE EQ_REGIONS_MOD + +IMPLICIT NONE + +CHARACTER (LEN=128) :: CLFA +INTEGER (KIND=JPIM) :: INBARP, INBARI, IREP +CHARACTER (LEN=*), PARAMETER :: CLNOMC = 'cadre' +INTEGER (KIND=JPIM), PARAMETER :: ILUN = 77 + +INTEGER (KIND=JPIM) :: ITYPTR, ITRONC, INLATI, INXLON, INIVER +INTEGER (KIND=JPIM), ALLOCATABLE :: INLOPA (:), INOZPA (:) +REAL (KIND=JPRB) :: ZSLAPO, ZCLOPO, ZSLOPO, ZCODIL, ZREFER +REAL (KIND=JPRB), ALLOCATABLE :: ZSINLA (:), ZAHYBR (:), ZBHYBR (:) +LOGICAL :: LLGARD + +LOGICAL :: LELAM +INTEGER (KIND=JPIM) :: NSMAX, NMSMAX, NFLEVG, NDGLG, NDLON +INTEGER (KIND=JPIM), ALLOCATABLE :: NLOENG (:) + +INTEGER (KIND=JPIM) :: NPRINTLEV +INTEGER (KIND=JPIM) :: NPROC, NPRGPNS, NPRGPEW, NDGUXG +LOGICAL :: LEQ_REGIONS, LSPLIT, LLWEIGHTED_DISTR + +INTEGER (KIND=JPIM), ALLOCATABLE :: NFRSTLAT (:), NLSTLAT (:), NPROCAGP (:), & + & NPTRLAT (:), NPTRFRSTLAT (:), NPTRLSTLAT (:) +LOGICAL, ALLOCATABLE :: LSPLITLAT (:) + +INTEGER (KIND=JPIM) :: IFRSTLOFF, IPTRFLOFF, IMEDIAP, IRESTM +REAL (KIND=JPRB) :: ZWEIGHT (1), ZMEDIAP + +INTEGER (KIND=JPIM) :: JGL + +TYPE (EQ_REGIONS_T) :: YLER + +CALL INITOPTIONS (KOPTMIN=0) + +CALL GETOPTION ('--fa-file', CLFA, MND = .TRUE., USE = "FA file") + +NPRINTLEV = 1 +CALL GETOPTION ('--nprintlev', NPRINTLEV) + +NPRGPNS = 1 +CALL GETOPTION ('--nprgpns', NPRGPNS) + +NPRGPEW = 1 +CALL GETOPTION ('--nprgpew', NPRGPEW) + +NPROC = NPRGPNS * NPRGPEW +CALL GETOPTION ('--nproc', NPROC) + +CALL GETOPTION ('--leq_regions', LEQ_REGIONS) + +CALL GETOPTION ('--lsplit', LSPLIT) + +CALL CHECKOPTIONS () + +CALL FAITOU (IREP, ILUN, .TRUE., CLFA, 'OLD', .TRUE., .FALSE., 0_JPIM, INBARP, INBARI, CLNOMC) + +ALLOCATE (INLOPA (FA%JPXPAH), INOZPA (FA%JPXIND), & + & ZSINLA (FA%JPXGEO), ZAHYBR (0:FA%JPXNIV), ZBHYBR (0:FA%JPXNIV)) + +CALL FACIES (CLNOMC, ITYPTR, ZSLAPO, ZCLOPO, ZSLOPO, & +& ZCODIL, ITRONC, INLATI, INXLON, INLOPA, & +& INOZPA, ZSINLA, INIVER, ZREFER, ZAHYBR, & +& ZBHYBR, LLGARD) + + +CALL FAIRME (IREP, ILUN, 'KEEP') + +LELAM = ITYPTR < 0 +NSMAX = ITRONC +IF (LELAM) THEN + NMSMAX = - ITYPTR + NDGUXG = INLOPA (6) +ELSE + NMSMAX = NSMAX + NDGUXG = 0 +ENDIF + +NFLEVG = INIVER +NDGLG = INLATI +NDLON = INXLON + +WRITE (*, *) " NPROC = ", NPROC +WRITE (*, *) " NPRGPNS = ", NPRGPNS +WRITE (*, *) " NPRGPEW = ", NPRGPEW +WRITE (*, *) " LELAM = ", LELAM +WRITE (*, *) " NDLON = ", NDLON +WRITE (*, *) " NDGLG = ", NDGLG +WRITE (*, *) " NDGUXG = ", NDGUXG +WRITE (*, *) " NFLEVG = ", NFLEVG +WRITE (*, *) " NSMAX = ", NSMAX +WRITE (*, *) " NMSMAX = ", NMSMAX +WRITE (*, *) " LSPLIT = ", LSPLIT +WRITE (*, *) " LEQ_REGIONS = ", LEQ_REGIONS + +ALLOCATE (NLOENG (NDGLG)) + +IF (LELAM) THEN + NLOENG = NDLON +ELSE + DO JGL = 1, (NDGLG+1)/2 + NLOENG (JGL) = INLOPA (JGL) + ENDDO + DO JGL = (NDGLG+1)/2+1, NDGLG + NLOENG (JGL) = INLOPA (NDGLG-JGL+1) + ENDDO +ENDIF + +WRITE (*, *) " NLOENG = ", NLOENG + +CALL EQ_REGIONS_SAVE (YLER) + +IF (LEQ_REGIONS) THEN + ALLOCATE (N_REGIONS (NPROC+2)) + N_REGIONS = 0 + CALL EQ_REGIONS (NPROC) +ELSE + N_REGIONS_NS = NPRGPNS + ALLOCATE (N_REGIONS (N_REGIONS_NS)) + N_REGIONS = NPRGPEW + N_REGIONS_EW = NPRGPEW +ENDIF + +WRITE (*, *) " N_REGIONS = ", N_REGIONS +WRITE (*, *) " N_REGIONS_NS = ", N_REGIONS_NS +WRITE (*, *) " N_REGIONS_EW = ", N_REGIONS_EW + +ALLOCATE (NFRSTLAT (N_REGIONS_NS), NLSTLAT (N_REGIONS_NS), NPROCAGP (N_REGIONS_NS), & + & NPTRLAT (NDGLG), NPTRFRSTLAT (N_REGIONS_NS), NPTRLSTLAT (N_REGIONS_NS), & + & LSPLITLAT (NDGLG)) + + +LLWEIGHTED_DISTR = .FALSE. + +IF (LELAM) THEN + + CALL SUEMPLAT (KDGL = NDGLG , & + & KPROC = NPROC , & + & KPROCA = N_REGIONS_NS , & + & KMYSETA = 1_JPIM , & + & LDSPLIT = LSPLIT , & + & LDEQ_REGIONS = LEQ_REGIONS , & + & KFRSTLAT = NFRSTLAT , & + & KLSTLAT = NLSTLAT , & + & KFRSTLOFF = IFRSTLOFF , & + & KPTRLAT = NPTRLAT , & + & KPTRFRSTLAT = NPTRFRSTLAT , & + & KPTRLSTLAT = NPTRLSTLAT , & + & KPTRFLOFF = IPTRFLOFF , & + & PWEIGHT = ZWEIGHT , & + & LDWEIGHTED_DISTR = LLWEIGHTED_DISTR , & + & PMEDIAP = ZMEDIAP , & + & KPROCAGP = NPROCAGP , & + & KMEDIAP = IMEDIAP , & + & KRESTM = IRESTM , & + & LDSPLITLAT = LSPLITLAT , & + & KMYPROC = 1_JPIM , & + & KLOEN = NLOENG , & + & KDGUX = NDGLG) ! Should be NDGUXG+INT((RDISTR_E*(NDGLG-NDGUXG))) + +ELSE + + CALL SUMPLAT (KDGL = NDGLG , & + & KPROC = NPROC , & + & KPROCA = N_REGIONS_NS , & + & KMYSETA = 1_JPIM , & + & LDSPLIT = LSPLIT , & + & LDEQ_REGIONS = LEQ_REGIONS , & + & KFRSTLAT = NFRSTLAT , & + & KLSTLAT = NLSTLAT , & + & KFRSTLOFF = IFRSTLOFF , & + & KPTRLAT = NPTRLAT , & + & KPTRFRSTLAT = NPTRFRSTLAT , & + & KPTRLSTLAT = NPTRLSTLAT , & + & KPTRFLOFF = IPTRFLOFF , & + & PWEIGHT = ZWEIGHT , & + & LDWEIGHTED_DISTR = LLWEIGHTED_DISTR , & + & PMEDIAP = ZMEDIAP , & + & KPROCAGP = NPROCAGP , & + & KMEDIAP = IMEDIAP , & + & KRESTM = IRESTM , & + & LDSPLITLAT = LSPLITLAT , & + & KMYPROC = 1_JPIM , & + & KLOEN = NLOENG ) + +ENDIF + +WRITE (*, *) " NFRSTLAT = ", NFRSTLAT +WRITE (*, *) " NLSTLAT = ", NLSTLAT +WRITE (*, *) " NPTRLAT = ", NPTRLAT +WRITE (*, *) " NPTRFRSTLAT = ", NPTRFRSTLAT +WRITE (*, *) " NPTRLSTLAT = ", NPTRLSTLAT + +IF (LEQ_REGIONS) THEN + DEALLOCATE (N_REGIONS) + NULLIFY (N_REGIONS) + CALL EQ_REGIONS_LOAD (YLER) +ENDIF + +END PROGRAM TRINFO + diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt new file mode 100644 index 0000000..ea7f386 --- /dev/null +++ b/src/programs/CMakeLists.txt @@ -0,0 +1,154 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +if( HAVE_TOOLS ) + + if( HAVE_DOUBLE_PRECISION ) + set( trans trans_dp ) + else() + set( trans trans_sp ) + endif() + + ecbuild_warn_var( ECTRANS_TOOLS_RTABLE_PATH ) + + file( GLOB ectrans_programs *.F90 ) + foreach( _program IN ITEMS ${ectrans_programs} ) + get_filename_component( _program ${_program} NAME_WE ) + + ecbuild_add_executable(TARGET ${_program} + SOURCES ${_program}.F90 + LIBS ${trans} + LINKER_LANGUAGE Fortran + DEFINITIONS ECTRANS_TOOLS_RTABLE_PATH="${ECTRANS_TOOLS_RTABLE_PATH}" ) + + endforeach() + +endif() + + +foreach( prec sp dp ) + if( HAVE_${prec} ) + if ( HAVE_CPU ) + ecbuild_add_executable(TARGET ectrans-benchmark-${prec} + SOURCES ectrans-benchmark.F90 + LINKER_LANGUAGE Fortran + LIBS + trans_${prec} + fiat + parkind_${prec} + ) + if ( HAVE_ETRANS ) + ecbuild_add_executable(TARGET ectrans-lam-benchmark-${prec} + SOURCES ectrans-lam-benchmark.F90 + LINKER_LANGUAGE Fortran + LIBS + etrans_${prec} + trans_${prec} + fiat + parkind_${prec} + ) + + endif() + endif() + + if( HAVE_GPU ) + foreach( gpumethod acc omp ) + if( HAVE_${gpumethod} ) + if( gpumethod STREQUAL acc ) + ecbuild_add_executable(TARGET ectrans-benchmark-gpu-${prec}-${gpumethod} + SOURCES ectrans-benchmark.F90 + LINKER_LANGUAGE Fortran + LIBS + trans_gpu_${prec}_${gpumethod} + fiat + parkind_${prec} + OpenACC::OpenACC_Fortran + ) + target_compile_definitions( ectrans-benchmark-gpu-${prec}-${gpumethod} PRIVATE ACCGPU ) + endif() + + if( gpumethod STREQUAL omp ) + ecbuild_add_executable(TARGET ectrans-benchmark-gpu-${prec}-${gpumethod} + SOURCES ectrans-benchmark.F90 + LINKER_LANGUAGE Fortran + LIBS + trans_gpu_${prec}_${gpumethod} + fiat + parkind_${prec} + ) + target_compile_definitions( ectrans-benchmark-gpu-${prec}-${gpumethod} PRIVATE OMPGPU ) + endif() + + target_link_options ( ectrans-benchmark-gpu-${prec}-${gpumethod} INTERFACE "$<$:${${gpumethod}_link_flags}>" ) + target_compile_options( ectrans-benchmark-gpu-${prec}-${gpumethod} PUBLIC $<$:${${gpumethod}_flags}>) + + if ( HAVE_ETRANS ) + + if( gpumethod STREQUAL acc ) + ecbuild_add_executable(TARGET ectrans-lam-benchmark-gpu-${prec}-${gpumethod} + SOURCES ectrans-lam-benchmark.F90 + LINKER_LANGUAGE Fortran + LIBS + etrans_gpu_${prec}_${gpumethod} + trans_gpu_${prec}_${gpumethod} + fiat + parkind_${prec} + OpenACC::OpenACC_Fortran + ) + target_compile_definitions( ectrans-lam-benchmark-gpu-${prec}-${gpumethod} PRIVATE ACCGPU ) + endif() + + if( gpumethod STREQUAL omp ) + ecbuild_add_executable(TARGET ectrans-lam-benchmark-gpu-${prec}-${gpumethod} + SOURCES ectrans-lam-benchmark.F90 + LINKER_LANGUAGE Fortran + LIBS + etrans_gpu_${prec}_${gpumethod} + trans_gpu_${prec}_${gpumethod} + fiat + parkind_${prec} + ) + target_compile_definitions( ectrans-lam-benchmark-gpu-${prec}-${gpumethod} PRIVATE OMPGPU ) + endif() + + target_link_options ( ectrans-lam-benchmark-gpu-${prec}-${gpumethod} INTERFACE "$<$:${${gpumethod}_link_flags}>" ) + target_compile_options( ectrans-lam-benchmark-gpu-${prec}-${gpumethod} PUBLIC $<$:${${gpumethod}_flags}>) + + endif() + + endif() + + + endforeach() + endif() + endif() +endforeach() + + +# ectrans information tool + +get_property( langs GLOBAL PROPERTY ENABLED_LANGUAGES ) + +foreach( lang ${langs} ) + set( EC_${lang}_FLAGS "${CMAKE_${lang}_FLAGS} ${CMAKE_${lang}_FLAGS_${CMAKE_BUILD_TYPE_CAPS}}" ) +endforeach() + +configure_file( ectrans.in ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/ectrans @ONLY ) + +file(COPY ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/ectrans + DESTINATION ${CMAKE_BINARY_DIR}/bin + FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ + GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + +install( FILES + ${CMAKE_BINARY_DIR}/bin/ectrans + DESTINATION + ${INSTALL_BIN_DIR} + PERMISSIONS + OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE +) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 new file mode 100644 index 0000000..4351011 --- /dev/null +++ b/src/programs/ectrans-benchmark.F90 @@ -0,0 +1,1370 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +program transform_test + +! +! Spectral transform test +! +! This test performs spectral to real and real to spectral transforms repeated in +! timed loop. +! +! 1) One "surface" field is always transformed: +! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk) +! +! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0" +! +! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk) +! +! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and +! can be enabled with "--vordiv" +! +! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk) +! +! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2) +! This must be enabled with "--scders" +! +! 5) Optionally uv East-West derivate can be computed from vorticity/divergence. +! This must be enabled with "--vordiv --uvders" +! +! +! Authors : George Mozdzynski +! Willem Deconinck +! Ioan Hadade +! Sam Hatfield +! + +use parkind1, only: jpim, jprb, jprd +use oml_mod ,only : oml_max_threads +use mpl_module +use yomgstats, only: jpmaxstat +use yomhook, only : dr_hook_init + +implicit none + +! Number of points in top/bottom latitudes +integer(kind=jpim), parameter :: min_octa_points = 20 + +integer(kind=jpim) :: istack, getstackusage +real(kind=jprb), dimension(1) :: zmaxerr(5), zerr(5) +real(kind=jprb) :: zmaxerrg + +! Output unit numbers +integer(kind=jpim), parameter :: nerr = 0 ! Unit number for STDERR +integer(kind=jpim), parameter :: nout = 6 ! Unit number for STDOUT +integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output + +! Default parameters +integer(kind=jpim) :: nsmax = 79 ! Spectral truncation +integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test +integer(kind=jpim) :: nfld = 1 ! Number of scalar fields +integer(kind=jpim) :: nlev = 1 ! Number of vertical levels + +integer(kind=jpim) :: nflevg +integer(kind=jpim) :: ndgl ! Number of latitudes +integer(kind=jpim) :: nspec2 +integer(kind=jpim) :: ngptot +integer(kind=jpim) :: ngptotg +integer(kind=jpim) :: ifld +integer(kind=jpim) :: jroc +integer(kind=jpim) :: jb +integer(kind=jpim) :: nspec2g +integer(kind=jpim) :: i +integer(kind=jpim) :: ja +integer(kind=jpim) :: ib +integer(kind=jpim) :: jprtrv + +integer(kind=jpim), allocatable :: nloen(:), nprcids(:) +integer(kind=jpim) :: myproc, jj +integer :: jstep + +real(kind=jprd) :: ztinit, ztloop, timef, ztstepmax, ztstepmin, ztstepavg, ztstepmed +real(kind=jprd) :: ztstepmax1, ztstepmin1, ztstepavg1, ztstepmed1 +real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 +real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) + +real(kind=jprb), allocatable :: znormsp(:), znormsp1(:), znormdiv(:), znormdiv1(:) +real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormt(:), znormt1(:) +real(kind=jprd) :: zaveave(0:jpmaxstat) + +! Grid-point space data structures +real(kind=jprb), allocatable, target :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), allocatable, target :: zgmvs (:,:,:) ! Single level fields at t and t-dt +real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt + +! Spectral space data structures +real(kind=jprb), allocatable, target :: sp3d(:,:,:) +real(kind=jprb), pointer :: zspvor(:,:) => null() +real(kind=jprb), pointer :: zspdiv(:,:) => null() +real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() +real(kind=jprb), allocatable :: zspsc2(:,:) + +logical :: lstack = .false. ! Output stack info +logical :: luserpnm = .false. +logical :: lkeeprpnm = .false. +logical :: luseflt = .false. ! Use fast legendre transforms +logical :: ltrace_stats = .false. +logical :: lstats_omp = .false. +logical :: lstats_comms = .false. +logical :: lstats_mpl = .false. +logical :: lstats = .true. ! gstats statistics +logical :: lbarrier_stats = .false. +logical :: lbarrier_stats2 = .false. +logical :: ldetailed_stats = .false. +logical :: lstats_alloc = .false. +logical :: lsyncstats = .false. +logical :: lstatscpu = .false. +logical :: lstats_mem = .false. +logical :: lxml_stats = .false. +logical :: lfftw = .true. ! Use FFTW for Fourier transforms +logical :: lvordiv = .false. +logical :: lscders = .false. +logical :: luvders = .false. +logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end + +integer(kind=jpim) :: nstats_mem = 0 +integer(kind=jpim) :: ntrace_stats = 0 +integer(kind=jpim) :: nprnt_stats = 1 + +! The multiplier of the machine epsilon used as a tolerance for correctness checking +! ncheck = 0 (the default) means that correctness checking is disabled +integer(kind=jpim) :: ncheck = 0 + +logical :: lmpoff = .false. ! Message passing switch + +! Verbosity level (0 or 1) +integer :: verbosity = 0 + +real(kind=jprb) :: zra = 6371229._jprb + +integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions +integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib +integer(kind=jpim) :: ncombflen = 1800000 ! Size of comm buffer + +integer(kind=jpim) :: nproc ! Number of procs +integer(kind=jpim) :: nthread +integer(kind=jpim) :: nprgpns ! Grid-point decomp +integer(kind=jpim) :: nprgpew ! Grid-point decomp +integer(kind=jpim) :: nprtrv = 0 ! Spectral decomp +integer(kind=jpim) :: nprtrw = 0 ! Spectral decomp +integer(kind=jpim) :: nspecresmin = 80 ! Minimum spectral resolution, for controlling nprtrw +integer(kind=jpim) :: mysetv +integer(kind=jpim) :: mysetw +integer(kind=jpim) :: mp_type = 2 ! Message passing type +integer(kind=jpim) :: mbx_size = 150000000 ! Mailbox size + +integer(kind=jpim), allocatable :: numll(:), ivset(:) +integer(kind=jpim) :: ivsetsc(1) + +integer(kind=jpim) :: nflevl + +! sumpini +integer(kind=jpim) :: isqr +logical :: lsync_trans = .true. ! Activate barrier sync +logical :: leq_regions = .true. ! Eq regions flag + + +integer(kind=jpim) :: nproma = 0 +integer(kind=jpim) :: ngpblks +! locals +integer(kind=jpim) :: iprtrv +integer(kind=jpim) :: iprtrw +integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev + +integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" +integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" + +integer(kind=jpim) :: jbegin_uv = 0 +integer(kind=jpim) :: jend_uv = 0 +integer(kind=jpim) :: jbegin_sc = 0 +integer(kind=jpim) :: jend_sc = 0 +integer(kind=jpim) :: jbegin_scder_NS = 0 +integer(kind=jpim) :: jend_scder_NS = 0 +integer(kind=jpim) :: jbegin_scder_EW = 0 +integer(kind=jpim) :: jend_scder_EW = 0 +integer(kind=jpim) :: jbegin_uder_EW = 0 +integer(kind=jpim) :: jend_uder_EW = 0 +integer(kind=jpim) :: jbegin_vder_EW = 0 +integer(kind=jpim) :: jend_vder_EW = 0 + +logical :: ldump_values = .false. + +integer, external :: ec_mpirank +logical :: luse_mpi = .true. + +character(len=16) :: cgrid = '' + +!=================================================================================================== + +#include "setup_trans0.h" +#include "setup_trans.h" +#include "inv_trans.h" +#include "dir_trans.h" +#include "trans_inq.h" +#include "specnorm.h" +#include "abor1.intfb.h" +#include "gstats_setup.intfb.h" +#include "ec_meminfo.intfb.h" + +!=================================================================================================== + +luse_mpi = detect_mpirun() + +! Setup +call get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & + & luseflt, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) +if (cgrid == '') cgrid = cubic_octahedral_gaussian_grid(nsmax) +call parse_grid(cgrid, ndgl, nloen) +nflevg = nlev + +!=================================================================================================== + +if (luse_mpi) then + call mpl_init(ldinfo=(verbosity>=1)) + nproc = mpl_nproc() + myproc = mpl_myrank() +else + nproc = 1 + myproc = 1 + mpl_comm = -1 +endif +nthread = oml_max_threads() + +call dr_hook_init() + +!=================================================================================================== + +if( lstats ) call gstats(0,0) +ztinit = timef() + +! only output to stdout on pe 1 +if (nproc > 1) then + if (myproc /= 1) then + open(unit=nout, file='/dev/null') + endif +endif + +if (ldetailed_stats) then + lstats_omp = .true. + lstats_comms = .true. + lstats_mpl = .true. + lstatscpu = .true. + nprnt_stats = nproc +! lstats_mem = .true. +! lstats_alloc = .true. +endif + +!=================================================================================================== + +allocate(nprcids(nproc)) +do jj = 1, nproc + nprcids(jj) = jj +enddo + +if (nproc <= 1) then + lmpoff = .true. +endif + +! Compute nprgpns and nprgpew +! This version selects most square-like distribution +! These will change if leq_regions=.true. +if (nproc == 0) nproc = 1 +isqr = int(sqrt(real(nproc,jprb))) +do ja = isqr, nproc + ib = nproc/ja + if (ja*ib == nproc) then + nprgpns = max(ja,ib) + nprgpew = min(ja,ib) + exit + endif +enddo + +! From sumpini, although this should be specified in namelist +if (nspecresmin == 0) nspecresmin = nproc + +! Compute nprtrv and nprtrw if not provided on the command line +if (nprtrv > 0 .or. nprtrw > 0) then + if (nprtrv == 0) nprtrv = nproc/nprtrw + if (nprtrw == 0) nprtrw = nproc/nprtrv + if (nprtrw*nprtrv /= nproc) call abor1('transform_test:nprtrw*nprtrv /= nproc') + if (nprtrw > nspecresmin) call abor1('transform_test:nprtrw > nspecresmin') +else + do jprtrv = 4, nproc + nprtrv = jprtrv + nprtrw = nproc/nprtrv + if (nprtrv*nprtrw /= nproc) cycle + if (nprtrv > nprtrw) exit + if (nprtrw > nspecresmin) cycle + if (nprtrw <= nspecresmin/(2*oml_max_threads())) exit + enddo + ! Go for approx square partition for backup + if (nprtrv*nprtrw /= nproc .or. nprtrw > nspecresmin .or. nprtrv > nprtrw) then + isqr = int(sqrt(real(nproc,jprb))) + do ja = isqr, nproc + ib = nproc/ja + if (ja*ib == nproc) then + nprtrw = max(ja, ib) + nprtrv = min(ja, ib) + if (nprtrw > nspecresmin ) then + call abor1('transform_test:nprtrw (approx square value) > nspecresmin') + endif + exit + endif + enddo + endif +endif + +! Create communicators for mpi groups +if (.not.lmpoff) then + call mpl_groups_create(nprtrw, nprtrv) +endif + +if (lmpoff) then + mysetw = (myproc - 1)/nprtrv + 1 + mysetv = mod(myproc - 1, nprtrv) + 1 +else + call mpl_cart_coords(myproc, mysetw, mysetv) + + ! Just checking for now... + iprtrv = mod(myproc - 1, nprtrv) + 1 + iprtrw = (myproc - 1)/nprtrv + 1 + if (iprtrv /= mysetv .or. iprtrw /= mysetw) then + call abor1('transform_test:inconsistency when computing mysetw and mysetv') + endif +endif + +if (.not. lmpoff) then + call mpl_buffer_method(kmp_type=mp_type, kmbx_size=mbx_size, kprocids=nprcids, ldinfo=(verbosity>=1)) +endif + +! Determine number of local levels for fourier and legendre calculations +! based on the values of nflevg and nprtrv +allocate(numll(nprtrv+1)) + +! Calculate remainder +iprused = min(nflevg+1, nprtrv) +ilevpp = nflevg/nprtrv +irest = nflevg -ilevpp*nprtrv +do jroc = 1, nprtrv + if (jroc <= irest) then + numll(jroc) = ilevpp+1 + else + numll(jroc) = ilevpp + endif +enddo +numll(iprused+1:nprtrv+1) = 0 + +nflevl = numll(mysetv) + +ivsetsc(1) = iprused +ifld = 0 + +!=================================================================================================== +! Setup gstats +!=================================================================================================== + +if (lstats) then + call gstats_setup(nproc, myproc, nprcids, & + & lstats, lstatscpu, lsyncstats, ldetailed_stats, lbarrier_stats, lbarrier_stats2, & + & lstats_omp, lstats_comms, lstats_mem, nstats_mem, lstats_alloc, & + & ltrace_stats, ntrace_stats, nprnt_stats, lxml_stats) + call gstats_psut + + ! Assign labels to GSTATS regions + call gstats_labels +endif + +!=================================================================================================== +! Call ecTrans setup routines +!=================================================================================================== + +if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans =======' + +call gstats(1, 0) +call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & + & kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, & + & kprtrw=nprtrw, kcombflen=ncombflen, ldsync_trans=lsync_trans, & + & ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi) +call gstats(1, 1) + +call gstats(2, 0) +call setup_trans(kflev=nflevg, ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., & + & ldusefftw=lfftw, lduserpnm=luserpnm, ldkeeprpnm=lkeeprpnm, & + & lduseflt=luseflt) +call gstats(2, 1) + +call trans_inq(kspec2=nspec2, kspec2g=nspec2g, kgptot=ngptot, kgptotg=ngptotg) + +if (nproma == 0) then ! no blocking (default when not specified) + nproma = ngptot +endif + +! Calculate number of NPROMA blocks +ngpblks = (ngptot - 1)/nproma+1 + +!=================================================================================================== +! Print information before starting +!=================================================================================================== + +! Print configuration details +if (verbosity >= 0) then + write(nout,'(" ")') + write(nout,'(a)')'======= Start of runtime parameters =======' + write(nout,'(" ")') + write(nout,'("nsmax ",i0)') nsmax + write(nout,'("grid ",a)') trim(cgrid) + write(nout,'("ndgl ",i0)') ndgl + write(nout,'("nproc ",i0)') nproc + write(nout,'("nthread ",i0)') nthread + write(nout,'("nprgpns ",i0)') nprgpns + write(nout,'("nprgpew ",i0)') nprgpew + write(nout,'("nprtrw ",i0)') nprtrw + write(nout,'("nprtrv ",i0)') nprtrv + write(nout,'("ngptot ",i0)') ngptot + write(nout,'("ngptotg ",i0)') ngptotg + write(nout,'("nfld ",i0)') nfld + write(nout,'("nlev ",i0)') nlev + write(nout,'("nproma ",i0)') nproma + write(nout,'("ngpblks ",i0)') ngpblks + write(nout,'("nspec2 ",i0)') nspec2 + write(nout,'("nspec2g ",i0)') nspec2g + write(nout,'("luseflt ",l)') luseflt + write(nout,'("lvordiv ",l)') lvordiv + write(nout,'("lscders ",l)') lscders + write(nout,'("luvders ",l)') luvders + write(nout,'(" ")') + write(nout,'(a)') '======= End of runtime parameters =======' + write(nout,'(" ")') +end if + +!=================================================================================================== +! Allocate and Initialize spectral arrays +!=================================================================================================== + +! Allocate spectral arrays +! Try to mimick IFS layout as much as possible +nullify(zspvor) +nullify(zspdiv) +nullify(zspsc3a) +allocate(sp3d(nflevl,nspec2,2+nfld)) +allocate(zspsc2(1,nspec2)) + +call initialize_spectral_arrays(nsmax, zspsc2, sp3d) + +! Point convenience variables to storage variable sp3d +zspvor => sp3d(:,:,1) +zspdiv => sp3d(:,:,2) +zspsc3a => sp3d(:,:,3:3+(nfld-1)) + +!=================================================================================================== +! Allocate gridpoint arrays +!=================================================================================================== + +allocate(ivset(nflevg)) + +! Compute spectral distribution +ilev = 0 +do jb = 1, nprtrv + do jlev=1, numll(jb) + ilev = ilev + 1 + ivset(ilev) = jb + enddo +enddo + +! Allocate grid-point arrays +if (lvordiv) then + jbegin_uv = 1 + jend_uv = 2 +endif +if (luvders) then + jbegin_uder_EW = jend_uv + 1 + jend_uder_EW = jbegin_uder_EW + 1 + jbegin_vder_EW = jend_uder_EW + 1 + jend_vder_EW = jbegin_vder_EW + 1 +else + jbegin_uder_EW = jend_uv + jend_uder_EW = jend_uv + jbegin_vder_EW = jend_uv + jend_vder_EW = jend_uv +endif + +jbegin_sc = jend_vder_EW + 1 +jend_sc = jend_vder_EW + nfld + +if (lscders) then + ndimgmvs = 3 + jbegin_scder_NS = jend_sc + 1 + jend_scder_NS = jend_sc + nfld + jbegin_scder_EW = jend_scder_NS + 1 + jend_scder_EW = jend_scder_NS + nfld +else + ndimgmvs = 1 + jbegin_scder_NS = jend_sc + jend_scder_NS = jend_sc + jbegin_scder_EW = jend_sc + jend_scder_EW = jend_sc +endif + +ndimgmv = jend_scder_EW + +!allocate(zgmv(nproma,nflevg,ndimgmv,ngpblks)) +!allocate(zgmvs(nproma,ndimgmvs,ngpblks)) + +! separate allocation to have contiguous arguments +allocate(zgpuv(nproma,nflevg,jend_vder_EW,ngpblks)) +allocate(zgp3a(nproma,nflevg,jend_scder_EW-jbegin_sc+1,ngpblks)) +allocate(zgp2(nproma,ndimgmvs,ngpblks)) +!zgpuv => zgmv(:,:,1:jend_vder_EW,:) +!zgp3a => zgmv(:,:,jbegin_sc:jend_scder_EW,:) +!zgp2 => zgmvs(:,:,:) + +!=================================================================================================== +! Allocate norm arrays +!=================================================================================================== + +if (lprint_norms .or. ncheck > 0) then + allocate(znormsp(1)) + allocate(znormsp1(1)) + allocate(znormvor(nflevg)) + allocate(znormvor1(nflevg)) + allocate(znormdiv(nflevg)) + allocate(znormdiv1(nflevg)) + allocate(znormt(nflevg)) + allocate(znormt1(nflevg)) + + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt1, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp1, kvset=ivsetsc) + + if (verbosity >= 1) then + do ifld = 1, nflevg + write(nout,'("norm zspvor( ",i4,",:) = ",f20.15)') ifld, znormvor1(ifld) + enddo + do ifld = 1, nflevg + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv1(ifld) + enddo + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt1(ifld) + enddo + do ifld = 1, 1 + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp1(ifld) + enddo + endif +endif + +!=================================================================================================== +! Setup timers +!=================================================================================================== + +ztinit = (timef() - ztinit)/1000.0_jprd + +if (verbosity >= 0) then + write(nout,'(" ")') + write(nout,'(a,i6,a,f9.2,a)') "transform_test initialisation, on",nproc,& + & " tasks, took",ztinit," sec" + write(nout,'(" ")') +endif + +if (iters <= 0) call abor1('transform_test:iters <= 0') + +allocate(ztstep(iters)) +allocate(ztstep1(iters)) +allocate(ztstep2(iters)) + +ztstepavg = 0._jprd +ztstepmax = 0._jprd +ztstepmin = 9999999999999999._jprd +ztstepavg1 = 0._jprd +ztstepmax1 = 0._jprd +ztstepmin1 = 9999999999999999._jprd +ztstepavg2 = 0._jprd +ztstepmax2 = 0._jprd +ztstepmin2 = 9999999999999999._jprd + +write(nout,'(a)') '======= Start of spectral transforms =======' +write(nout,'(" ")') + +ztloop = timef() + +!=================================================================================================== +! Do spectral transform loop +!=================================================================================================== + +do jstep = 1, iters + call gstats(3,0) + ztstep(jstep) = timef() + + !================================================================================================= + ! Do inverse transform + !================================================================================================= + + ztstep1(jstep) = timef() + call gstats(4,0) + if (lvordiv) then + call inv_trans(kresol=1, kproma=nproma, & + & pspsc2=zspsc2, & ! spectral surface pressure + & pspvor=zspvor, & ! spectral vorticity + & pspdiv=zspdiv, & ! spectral divergence + & pspsc3a=zspsc3a, & ! spectral scalars + & ldscders=lscders, & + & ldvorgp=.false., & ! no gridpoint vorticity + & lddivgp=.false., & ! no gridpoint divergence + & lduvder=luvders, & + & kvsetuv=ivset, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pgp2=zgp2, & + & pgpuv=zgpuv, & + & pgp3a=zgp3a) + else + call inv_trans(kresol=1, kproma=nproma, & + & pspsc2=zspsc2, & ! spectral surface pressure + & pspsc3a=zspsc3a, & ! spectral scalars + & ldscders=lscders, & ! scalar derivatives + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pgp2=zgp2, & + & pgp3a=zgp3a) + endif + call gstats(4,1) + + ztstep1(jstep) = (timef() - ztstep1(jstep))/1000.0_jprd + + !================================================================================================= + ! While in grid point space, dump the values to disk, for debugging only + !================================================================================================= + + if (ldump_values) then + ! dump a field to a binary file + call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgp2(:,1,:), 'S', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgpuv(:,nflevg,1,:), 'U', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgpuv(:,nflevg,2,:), 'V', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgp3a(:,nflevg,1,:), 'T', noutdump) + endif + + !================================================================================================= + ! Do direct transform + !================================================================================================= + + ztstep2(jstep) = timef() + + call gstats(5,0) + if (lvordiv) then + call dir_trans(kresol=1, kproma=nproma, & + & pgp2=zgp2(:,1:1,:), & + & pgpuv=zgpuv(:,:,1:2,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), & + & pspvor=zspvor, & + & pspdiv=zspdiv, & + & pspsc2=zspsc2, & + & pspsc3a=zspsc3a, & + & kvsetuv=ivset, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset) + else + call dir_trans(kresol=1, kproma=nproma, & + & pgp2=zgp2(:,1:1,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), & + & pspsc2=zspsc2, & + & pspsc3a=zspsc3a, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset) + endif + call gstats(5,1) + ztstep2(jstep) = (timef() - ztstep2(jstep))/1000.0_jprd + + !================================================================================================= + ! Calculate timings + !================================================================================================= + + ztstep(jstep) = (timef() - ztstep(jstep))/1000.0_jprd + + ztstepavg = ztstepavg + ztstep(jstep) + ztstepmin = min(ztstep(jstep), ztstepmin) + ztstepmax = max(ztstep(jstep), ztstepmax) + + ztstepavg1 = ztstepavg1 + ztstep1(jstep) + ztstepmin1 = min(ztstep1(jstep), ztstepmin1) + ztstepmax1 = max(ztstep1(jstep), ztstepmax1) + + ztstepavg2 = ztstepavg2 + ztstep2(jstep) + ztstepmin2 = min(ztstep2(jstep), ztstepmin2) + ztstepmax2 = max(ztstep2(jstep), ztstepmax2) + + !================================================================================================= + ! Print norms + !================================================================================================= + + if (lprint_norms) then + call gstats(6,0) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc(1:1)) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) + + ! Surface pressure + zmaxerr(:) = -999.0 + do ifld = 1, 1 + zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprb) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + enddo + ! Divergence + do ifld = 1, nflevg + zerr(2) = abs(znormdiv1(ifld)/znormdiv(ifld) - 1.0_jprb) + zmaxerr(2) = max(zmaxerr(2), zerr(2)) + enddo + ! Vorticity + do ifld = 1, nflevg + zerr(3) = abs(znormvor1(ifld)/znormvor(ifld) - 1.0_jprb) + zmaxerr(3) = max(zmaxerr(3),zerr(3)) + enddo + ! Temperature + do ifld = 1, nflevg + zerr(4) = abs(znormt1(ifld)/znormt(ifld) - 1.0_jprb) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + enddo + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) + call gstats(6,1) + else + write(nout,'("Time step ",i6," took", f8.4)') jstep, ztstep(jstep) + endif + call gstats(3,1) +enddo + +!=================================================================================================== + +ztloop = (timef() - ztloop)/1000.0_jprd + +write(nout,'(" ")') +write(nout,'(a)') '======= End of spectral transforms =======' +write(nout,'(" ")') + +if (lprint_norms .or. ncheck > 0) then + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) + call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) + + zmaxerr(:) = -999.0 + do ifld = 1, nflevg + zerr(3) = abs(real(znormvor1(ifld),kind=jprd)/real(znormvor(ifld),kind=jprd) - 1.0_jprd) + zmaxerr(3) = max(zmaxerr(3), zerr(3)) + if (verbosity >= 1) then + write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor1(ifld), zerr(3) + endif + enddo + do ifld = 1, nflevg + zerr(2) = abs(real(znormdiv1(ifld),kind=jprd)/real(znormdiv(ifld),kind=jprd) - 1.0d0) + zmaxerr(2) = max(zmaxerr(2),zerr(2)) + if (verbosity >= 1) then + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv1(ifld), zerr(2) + endif + enddo + do ifld = 1, nflevg + zerr(4) = abs(real(znormt1(ifld),kind=jprd)/real(znormt(ifld),kind=jprd) - 1.0d0) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (verbosity >= 1) then + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt1(ifld), zerr(4) + endif + enddo + do ifld = 1, 1 + zerr(1) = abs(real(znormsp1(ifld),kind=jprd)/real(znormsp(ifld),kind=jprd) - 1.0d0) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + if (verbosity >= 1) then + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp1(ifld), zerr(1) + endif + enddo + + ! maximum error across all fields + zmaxerrg = max(max(zmaxerr(1),zmaxerr(2)), max(zmaxerr(2), zmaxerr(3))) + + if (verbosity >= 1) write(nout,*) + write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) + write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) + write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) + write(nout,*) + write(nout,'("max error combined = = ",e10.3)') zmaxerrg + write(nout,*) + + if (ncheck > 0 .and. myproc == 1) then + ! If the maximum spectral norm error across all fields is greater than 100 times the machine + ! epsilon, fail the test + if (zmaxerrg > real(ncheck, jprb) * epsilon(1.0_jprb)) then + write(nout, '(a)') '*******************************' + write(nout, '(a)') 'Correctness test failed' + write(nout, '(a,1e7.2)') 'Maximum spectral norm error = ', zmaxerrg + write(nout, '(a,1e7.2)') 'Error tolerance = ', real(ncheck, jprb) * epsilon(1.0_jprb) + write(nout, '(a)') '*******************************' + error stop + endif + endif +endif + +if (luse_mpi) then + call mpl_allreduce(ztloop, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstep, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin, 'min', ldreprod=.false.) + + call mpl_allreduce(ztstep1, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg1, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax1, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin1, 'min', ldreprod=.false.) + + call mpl_allreduce(ztstep2, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg2, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax2, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) +endif + +ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters,jprd) +ztloop = ztloop/real(nproc,jprd) +ztstep(:) = ztstep(:)/real(nproc,jprd) + +call sort(ztstep,iters) +ztstepmed = ztstep(iters/2) + +ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters,jprd) +ztstep1(:) = ztstep1(:)/real(nproc,jprd) + +call sort(ztstep1, iters) +ztstepmed1 = ztstep1(iters/2) + +ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters,jprd) +ztstep2(:) = ztstep2(:)/real(nproc,jprd) + +call sort(ztstep2,iters) +ztstepmed2 = ztstep2(iters/2) + +write(nout,'(a)') '======= Start of time step stats =======' +write(nout,'(" ")') +write(nout,'("Inverse transforms")') +write(nout,'("------------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg1 +write(nout,'("min (s): ",f8.4)') ztstepmin1 +write(nout,'("max (s): ",f8.4)') ztstepmax1 +write(nout,'("med (s): ",f8.4)') ztstepmed1 +write(nout,'(" ")') +write(nout,'("Direct transforms")') +write(nout,'("-----------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg2 +write(nout,'("min (s): ",f8.4)') ztstepmin2 +write(nout,'("max (s): ",f8.4)') ztstepmax2 +write(nout,'("med (s): ",f8.4)') ztstepmed2 +write(nout,'(" ")') +write(nout,'("Inverse-direct transforms")') +write(nout,'("-------------------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg +write(nout,'("min (s): ",f8.4)') ztstepmin +write(nout,'("max (s): ",f8.4)') ztstepmax +write(nout,'("med (s): ",f8.4)') ztstepmed +write(nout,'("loop (s): ",f8.4)') ztloop +write(nout,'(" ")') +write(nout,'(a)') '======= End of time step stats =======' +write(nout,'(" ")') + +if (lstack) then + ! Gather stack usage statistics + istack = getstackusage() + if (myproc == 1) then + print 9000, istack + 9000 format("Stack utilisation information",/,& + &"=============================",//,& + &"Task size(bytes)",/,& + &"==== ===========",//,& + &" 1",11x,i10) + + do i = 2, nproc + call mpl_recv(istack, ksource=nprcids(i), ktag=i, cdstring='transform_test:') + print '(i4,11x,i10)', i, istack + enddo + else + call mpl_send(istack, kdest=nprcids(1), ktag=myproc, cdstring='transform_test:') + endif +endif + + +!=================================================================================================== +! Cleanup +!=================================================================================================== + +!deallocate(zgmv) +!deallocate(zgmvs) + +!=================================================================================================== + +if (lstats) then + call gstats(0,1) + call gstats_print(nout, zaveave, jpmaxstat) +endif + +if (lmeminfo) then + write(nout,*) + call ec_meminfo(nout, "", mpl_comm, kbarr=1, kiotask=-1, & + & kcall=1) +endif + +!=================================================================================================== +! Finalize MPI +!=================================================================================================== + +if (luse_mpi) then + call mpl_end(ldmeminfo=.false.) +endif + +!=================================================================================================== +! Close file +!=================================================================================================== + +if (nproc > 1) then + if (myproc /= 1) then + close(unit=nout) + endif +endif + +!=================================================================================================== + +contains + +!=================================================================================================== + +subroutine parse_grid(cgrid,ndgl,nloen) + + character(len=*) :: cgrid + integer, intent(inout) :: ndgl + integer, intent(inout), allocatable :: nloen(:) + integer :: ios + integer :: gaussian_number + read(cgrid(2:len_trim(cgrid)),*,IOSTAT=ios) gaussian_number + if (ios==0) then + ndgl = 2 * gaussian_number + allocate(nloen(ndgl)) + if (cgrid(1:1) == 'F') then ! Regular Gaussian grid + nloen(:) = gaussian_number * 4 + return + endif + if (cgrid(1:1) == 'O') then ! Octahedral Gaussian grid + do i = 1, ndgl / 2 + nloen(i) = 20 + 4 * (i - 1) + nloen(ndgl - i + 1) = nloen(i) + end do + return + endif + endif + call parsing_failed("ERROR: Unsupported grid specified: "// trim(cgrid)) + +end subroutine + +!=================================================================================================== + +function get_int_value(cname, iarg) result(value) + + integer :: value + character(len=*), intent(in) :: cname + integer, intent(inout) :: iarg + character(len=128) :: carg + integer :: stat + + carg = get_str_value(cname, iarg) + call str2int(carg, value, stat) + + if (stat /= 0) then + call parsing_failed("Invalid argument for " // trim(cname) // ": " // trim(carg)) + end if + +end function + +!=================================================================================================== + +function get_str_value(cname, iarg) result(value) + + character(len=128) :: value + character(len=*), intent(in) :: cname + integer, intent(inout) :: iarg + + iarg = iarg + 1 + call get_command_argument(iarg, value) + + if (value == "") then + call parsing_failed("Invalid argument for " // trim(cname) // ": no value provided") + end if + +end function + +!=================================================================================================== + +subroutine parsing_failed(message) + + character(len=*), intent(in) :: message + if (luse_mpi) call mpl_init(ldinfo=.false.) + if (ec_mpirank() == 0) then + write(nerr,"(a)") trim(message) + call print_help(unit=nerr) + endif + if (luse_mpi) call mpl_end(ldmeminfo=.false.) + stop + +end subroutine + +!=================================================================================================== + +subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & + & luseflt, nproma, verbosity, ldump_values, lprint_norms, & + & lmeminfo, nprtrv, nprtrw, ncheck) + + integer, intent(inout) :: nsmax ! Spectral truncation + character(len=16), intent(inout) :: cgrid ! Spectral truncation + integer, intent(inout) :: iters ! Number of iterations for transform test + integer, intent(inout) :: nfld ! Number of scalar fields + integer, intent(inout) :: nlev ! Number of vertical levels + logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence + logical, intent(inout) :: lscders ! Compute scalar derivatives + logical, intent(inout) :: luvders ! Compute uv East-West derivatives + logical, intent(inout) :: luseflt ! Use fast Legendre transforms + integer, intent(inout) :: nproma ! NPROMA + integer, intent(inout) :: verbosity ! Level of verbosity + logical, intent(inout) :: ldump_values ! Dump values of grid point fields for debugging + logical, intent(inout) :: lprint_norms ! Calculate and print spectral norms of fields + logical, intent(inout) :: lmeminfo ! Show information from FIAT ec_meminfo routine at the + ! end + integer, intent(inout) :: nprtrv ! Size of V set (spectral decomposition) + integer, intent(inout) :: nprtrw ! Size of W set (spectral decomposition) + integer, intent(inout) :: ncheck ! The multiplier of the machine epsilon used as a + ! tolerance for correctness checking + + character(len=128) :: carg ! Storage variable for command line arguments + integer :: iarg = 1 ! Argument index + integer :: stat ! For storing success status of string->integer conversion + integer :: myproc + + do while (iarg <= command_argument_count()) + call get_command_argument(iarg, carg) + + select case(carg) + ! Parse help argument + case('-h', '--help') + if (luse_mpi) call mpl_init(ldinfo=.false.) + if (ec_mpirank()==0) call print_help() + if (luse_mpi) call mpl_end(ldmeminfo=.false.) + stop + ! Parse verbosity argument + case('-v') + verbosity = 1 + ! Parse number of iterations argument + case('-n', '--niter') + iters = get_int_value('-n', iarg) + if (iters < 1) then + call parsing_failed("Invalid argument for -n: must be > 0") + end if + ! Parse spectral truncation argument + case('-t', '--truncation') + nsmax = get_int_value('-t', iarg) + if (nsmax < 1) then + call parsing_failed("Invalid argument for -t: must be > 0") + end if + case('-g', '--grid'); cgrid = get_str_value('-g', iarg) + case('-f', '--nfld'); nfld = get_int_value('-f', iarg) + case('-l', '--nlev'); nlev = get_int_value('-l', iarg) + case('--vordiv'); lvordiv = .True. + case('--scders'); lscders = .True. + case('--uvders'); luvders = .True. + case('--flt'); luseflt = .True. + case('--nproma'); nproma = get_int_value('--nproma', iarg) + case('--dump-values'); ldump_values = .true. + case('--norms'); lprint_norms = .true. + case('--meminfo'); lmeminfo = .true. + case('--nprtrv'); nprtrv = get_int_value('--nprtrv', iarg) + case('--nprtrw'); nprtrw = get_int_value('--nprtrw', iarg) + case('-c', '--check'); ncheck = get_int_value('-c', iarg) + case default + call parsing_failed("Unrecognised argument: " // trim(carg)) + + end select + iarg = iarg + 1 + end do + + if (.not. lvordiv) then + luvders = .false. + endif + +end subroutine get_command_line_arguments + +!=================================================================================================== + +function cubic_octahedral_gaussian_grid(nsmax) result(cgrid) + + character(len=16) :: cgrid + integer, intent(in) :: nsmax + write(cgrid,'(a,i0)') 'O',nsmax+1 + +end function + +!=================================================================================================== + +subroutine str2int(str, int, stat) + + character(len=*), intent(in) :: str + integer, intent(out) :: int + integer, intent(out) :: stat + read(str, *, iostat=stat) int + +end subroutine str2int + +!=================================================================================================== + +subroutine sort(a, n) + + real(kind=jprd), intent(inout) :: a(n) + integer(kind=jpim), intent(in) :: n + + real(kind=jprd) :: x + + integer :: i, j + + do i = 2, n + x = a(i) + j = i - 1 + do while (j >= 1) + if (a(j) <= x) exit + a(j + 1) = a(j) + j = j - 1 + end do + a(j + 1) = x + end do + +end subroutine sort + +!=================================================================================================== + +subroutine print_help(unit) + + integer, optional :: unit + integer :: nout = 6 + if (present(unit)) then + nout = unit + endif + + write(nout, "(a)") "" + + if (jprb == jprd) then + write(nout, "(a)") "NAME ectrans-benchmark-dp" + else + write(nout, "(a)") "NAME ectrans-benchmark-sp" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "DESCRIPTION" + write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& + & between spectral " + if (jprb == jprd) then + write(nout, "(a)") " space and grid-point space (double-precision version)" + else + write(nout, "(a)") " space and grid-point space (single-precision version)" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "USAGE" + if (jprb == jprd) then + write(nout, "(a)") " ectrans-benchmark-dp [options]" + else + write(nout, "(a)") " ectrans-benchmark-sp [options]" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "OPTIONS" + write(nout, "(a)") " -h, --help Print this message" + write(nout, "(a)") " -v Run with verbose output" + write(nout, "(a)") " -t, --truncation T Run with this triangular spectral truncation& + & (default = 79)" + write(nout, "(a)") " -g, --grid GRID Run with this grid. Possible values: O, F" + write(nout, "(a)") " If not specified, O is used with N=truncation+1& + & (cubic relation)" + write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& + & iterations (default = 10)" + write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" + write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" + write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" + write(nout, "(a)") " --scders Compute scalar derivatives (default off)" + write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& + & when also --vordiv is given" + write(nout, "(a)") " --flt Run with fast Legendre transforms (default off)" + write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" + write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& + & fields" + write(nout, "(a)") " The computation of spectral norms will skew overall& + & timings" + write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& + & subroutine on memory usage, thread-binding etc." + write(nout, "(a)") " --nprtrv Size of V set in spectral decomposition" + write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" + write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& + & tolerance for correctness checking" + write(nout, "(a)") "" + write(nout, "(a)") "DEBUGGING" + write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" + write(nout, "(a)") "" + +end subroutine print_help + +!=================================================================================================== + +subroutine initialize_spectral_arrays(nsmax, zsp, sp3d) + + integer, intent(in) :: nsmax ! Spectral truncation + real(kind=jprb), intent(inout) :: zsp(:,:) ! Surface pressure + real(kind=jprb), intent(inout) :: sp3d(:,:,:) ! 3D fields + + integer(kind=jpim) :: nflevl + integer(kind=jpim) :: nfield + + integer :: i, j + + nflevl = size(sp3d, 1) + nfield = size(sp3d, 3) + + ! First initialize surface pressure + call initialize_2d_spectral_field(nsmax, zsp(1,:)) + + ! Then initialize all of the 3D fields + do i = 1, nflevl + do j = 1, nfield + call initialize_2d_spectral_field(nsmax, sp3d(i,:,j)) + end do + end do + +end subroutine initialize_spectral_arrays + +!=================================================================================================== + +subroutine initialize_2d_spectral_field(nsmax, field) + + integer, intent(in) :: nsmax ! Spectral truncation + real(kind=jprb), intent(inout) :: field(:) ! Field to initialize + + integer :: i, index, num_my_zon_wns + integer, allocatable :: my_zon_wns(:), nasm0(:) + + ! Choose a spherical harmonic to initialize arrays + integer :: m_num = 4 ! Zonal wavenumber + integer :: l_num = 19 ! Total wavenumber + + ! First initialise all spectral coefficients to zero + field(:) = 0.0 + + ! Get zonal wavenumbers this rank is responsible for + call trans_inq(knump=num_my_zon_wns) + allocate(my_zon_wns(num_my_zon_wns)) + call trans_inq(kmyms=my_zon_wns) + + ! If rank is responsible for the chosen zonal wavenumber... + if (any(my_zon_wns == m_num) ) then + ! Get array of spectral array addresses (this maps (m, n=m) to array index) + allocate(nasm0(0:nsmax)) + call trans_inq(kasm0=nasm0) + + ! Find out local array index of chosen spherical harmonic + index = nasm0(m_num) + 2 * (l_num - m_num) + 1 + + ! Set just that element to a constant value + field(index) = 1.0 + else + return + end if + +end subroutine initialize_2d_spectral_field + +!=================================================================================================== + +subroutine dump_gridpoint_field(jstep, myproc, nproma, ngpblks, fld, fldchar, noutdump) + + ! Dump a 2d field to a binary file. + + integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file + integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file + integer(kind=jpim), intent(in) :: nproma ! Size of nproma + integer(kind=jpim), intent(in) :: ngpblks ! Number of nproma blocks + real(kind=jprb) , intent(in) :: fld(nproma,ngpblks) ! 2D field + character , intent(in) :: fldchar ! Single character field identifier + integer(kind=jpim), intent(in) :: noutdump ! Tnit number for output file + + character(len=14) :: filename = "x.xxx.xxxx.dat" + + write(filename(1:1),'(a1)') fldchar + write(filename(3:5),'(i3.3)') jstep + write(filename(7:10),'(i4.4)') myproc + + open(noutdump, file=filename, form="unformatted") + write(noutdump) reshape(fld, (/ nproma*ngpblks /)) + close(noutdump) + +end subroutine dump_gridpoint_field + +!=================================================================================================== + +function detect_mpirun() result(lmpi_required) + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 5 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv_dr_hook_assert_mpi_initialized + integer :: ivar + + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced + + lmpi_required = .false. + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo +end function + +!=================================================================================================== + +! Assign GSTATS labels to the main regions of ecTrans +subroutine gstats_labels + + call gstats_label(0, ' ', 'PROGRAM - Total') + call gstats_label(1, ' ', 'SETUP_TRANS0 - Setup ecTrans') + call gstats_label(2, ' ', 'SETUP_TRANS - Setup ecTrans handle') + call gstats_label(3, ' ', 'TIME STEP - Time step') + call gstats_label(4, ' ', 'INV_TRANS - Inverse transform') + call gstats_label(5, ' ', 'DIR_TRANS - Direct transform') + call gstats_label(6, ' ', 'NORMS - Norm comp. (optional)') + call gstats_label(102, ' ', 'LTINV_CTL - Inv. Legendre transform') + call gstats_label(103, ' ', 'LTDIR_CTL - Dir. Legendre transform') + call gstats_label(106, ' ', 'FTDIR_CTL - Dir. Fourier transform') + call gstats_label(107, ' ', 'FTINV_CTL - Inv. Fourier transform') + call gstats_label(140, ' ', 'SULEG - Comp. of Leg. poly.') + call gstats_label(152, ' ', 'LTINV_CTL - M to L transposition') + call gstats_label(153, ' ', 'LTDIR_CTL - L to M transposition') + call gstats_label(157, ' ', 'FTINV_CTL - L to G transposition') + call gstats_label(158, ' ', 'FTDIR_CTL - G to L transposition') + call gstats_label(400, ' ', 'GSTATS - GSTATS itself') + +end subroutine gstats_labels + +end program transform_test + +!=================================================================================================== diff --git a/src/programs/ectrans-lam-benchmark.F90 b/src/programs/ectrans-lam-benchmark.F90 new file mode 100644 index 0000000..810813b --- /dev/null +++ b/src/programs/ectrans-lam-benchmark.F90 @@ -0,0 +1,1493 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +program ectrans_lam_benchmark + +! +! Spectral transform test +! +! This test performs spectral to real and real to spectral transforms repeated in +! timed loop. +! +! 1) One "surface" field is always transformed: +! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk) +! +! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0" +! +! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk) +! +! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and +! can be enabled with "--vordiv" +! +! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk) +! +! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2) +! This must be enabled with "--scders" +! +! 5) Optionally uv East-West derivate can be computed from vorticity/divergence. +! This must be enabled with "--vordiv --uvders" +! +! +! Authors : George Mozdzynski +! Willem Deconinck +! Ioan Hadade +! Sam Hatfield +! Daan Degrauwe + +use parkind1, only: jpim, jprb, jprd +use oml_mod ,only : oml_max_threads +use omp_lib, only: omp_get_wtime +use mpl_module +use yomgstats, only: jpmaxstat +use yomhook, only : dr_hook_init + +implicit none + +integer(kind=jpim) :: istack, getstackusage +real(kind=jprb), dimension(1) :: zmaxerr(5), zerr(5) +real(kind=jprb) :: zmaxerrg + +! Output unit numbers +integer(kind=jpim), parameter :: nerr = 0 ! Unit number for STDERR +integer(kind=jpim), parameter :: nout = 6 ! Unit number for STDOUT +integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output + +! Default parameters +integer(kind=jpim) :: nlon = 128 ! Zonal dimension +integer(kind=jpim) :: nlat = 128 ! Meridional dimension +integer(kind=jpim) :: nsmax = 0 ! Spectral meridional truncation +integer(kind=jpim) :: nmsmax = 0 ! Spectral zonal truncation +integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test +integer(kind=jpim) :: nfld = 1 ! Number of scalar fields +integer(kind=jpim) :: nlev = 1 ! Number of vertical levels + +integer(kind=jpim) :: nloen(1) ! only one value needed for LAM +integer(kind=jpim) :: nflevg +integer(kind=jpim) :: nspec2 +integer(kind=jpim) :: ngptot +integer(kind=jpim) :: ngptotg +integer(kind=jpim) :: ifld +integer(kind=jpim) :: jroc +integer(kind=jpim) :: jb +integer(kind=jpim) :: nspec2g +integer(kind=jpim) :: i +integer(kind=jpim) :: ja +integer(kind=jpim) :: ib +integer(kind=jpim) :: jprtrv + +integer(kind=jpim), allocatable :: nprcids(:) +integer(kind=jpim) :: myproc, jj +integer :: jstep + +real(kind=jprd) :: ztinit, ztloop, ztstepmax, ztstepmin, ztstepavg, ztstepmed +real(kind=jprd) :: ztstepmax1, ztstepmin1, ztstepavg1, ztstepmed1 +real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 +real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) + +real(kind=jprb), allocatable :: znormsp(:), znormsp0(:), znormdiv(:), znormdiv0(:) +real(kind=jprb), allocatable :: znormvor(:), znormvor0(:), znormt(:), znormt0(:) +real(kind=jprd) :: zaveave(0:jpmaxstat) + +! Grid-point space data structures +real(kind=jprb), allocatable, target :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), allocatable, target :: zgmvs (:,:,:) ! Single level fields at t and t-dt +real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt + +! Spectral space data structures +real(kind=jprb), allocatable, target :: sp3d(:,:,:) +real(kind=jprb), pointer :: zspvor(:,:) => null() +real(kind=jprb), pointer :: zspdiv(:,:) => null() +real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() +real(kind=jprb), allocatable :: zspsc2(:,:) +real(kind=jprb), allocatable :: zmeanu(:), zmeanv(:) + +logical :: lstack = .false. ! Output stack info +logical :: luserpnm = .false. +logical :: lkeeprpnm = .false. +logical :: ltrace_stats = .false. +logical :: lstats_omp = .false. +logical :: lstats_comms = .false. +logical :: lstats_mpl = .false. +logical :: lstats = .false. ! gstats statistics +logical :: lbarrier_stats = .false. +logical :: lbarrier_stats2 = .false. +logical :: ldetailed_stats = .false. +logical :: lstats_alloc = .false. +logical :: lsyncstats = .false. +logical :: lstatscpu = .false. +logical :: lstats_mem = .false. +logical :: lxml_stats = .false. +logical :: lfftw = .false. ! Use FFTW for Fourier transforms +logical :: lvordiv = .false. +logical :: lscders = .false. +logical :: luvders = .false. +logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end + +integer(kind=jpim) :: nstats_mem = 0 +integer(kind=jpim) :: ntrace_stats = 0 +integer(kind=jpim) :: nprnt_stats = 1 + +! The multiplier of the machine epsilon used as a tolerance for correctness checking +! ncheck = 0 (the default) means that correctness checking is disabled +integer(kind=jpim) :: ncheck = 0 + +logical :: lmpoff = .false. ! Message passing switch + +! Verbosity level (0 or 1) +integer :: verbosity = 0 + +integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions +integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib +integer(kind=jpim) :: ncombflen = 1800000 ! Size of comm buffer + +integer(kind=jpim) :: nproc ! Number of procs +integer(kind=jpim) :: nthread +integer(kind=jpim) :: nprgpns = 0 ! Grid-point decomp +integer(kind=jpim) :: nprgpew = 0 ! Grid-point decomp +integer(kind=jpim) :: nprtrv = 0 ! Spectral decomp +integer(kind=jpim) :: nprtrw = 0 ! Spectral decomp +integer(kind=jpim) :: nspecresmin = 80 ! Minimum spectral resolution, for controlling nprtrw +integer(kind=jpim) :: mysetv +integer(kind=jpim) :: mysetw +integer(kind=jpim) :: mp_type = 2 ! Message passing type +integer(kind=jpim) :: mbx_size = 150000000 ! Mailbox size + +integer(kind=jpim), allocatable :: numll(:), ivset(:) +integer(kind=jpim) :: ivsetsc(1) + +integer(kind=jpim) :: nflevl + +! sumpini +integer(kind=jpim) :: isqr +logical :: lsync_trans = .false. ! Activate barrier sync + + +integer(kind=jpim) :: nproma = 0 +integer(kind=jpim) :: ngpblks +! locals +integer(kind=jpim) :: iprtrv +integer(kind=jpim) :: iprtrw +integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev + +integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" +integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" + +integer(kind=jpim) :: jbegin_uv = 0 +integer(kind=jpim) :: jend_uv = 0 +integer(kind=jpim) :: jbegin_sc = 0 +integer(kind=jpim) :: jend_sc = 0 +integer(kind=jpim) :: jbegin_scder_NS = 0 +integer(kind=jpim) :: jend_scder_NS = 0 +integer(kind=jpim) :: jbegin_scder_EW = 0 +integer(kind=jpim) :: jend_scder_EW = 0 +integer(kind=jpim) :: jbegin_uder_EW = 0 +integer(kind=jpim) :: jend_uder_EW = 0 +integer(kind=jpim) :: jbegin_vder_EW = 0 +integer(kind=jpim) :: jend_vder_EW = 0 + +logical :: ldump_values = .false. + +integer, external :: ec_mpirank +logical :: luse_mpi = .true. + +real(kind=jprb) :: zexwn, zeywn + +!=================================================================================================== + +#include "setup_trans0.h" +#include "esetup_trans.h" +#include "einv_trans.h" +#include "edir_trans.h" +#include "etrans_inq.h" +#include "especnorm.h" +#include "abor1.intfb.h" +#include "gstats_setup.intfb.h" +#include "ec_meminfo.intfb.h" + +!=================================================================================================== + +luse_mpi = detect_mpirun() + +! Setup +call get_command_line_arguments(nlon, nlat, nsmax, nmsmax, iters, nfld, nlev, lvordiv, lscders, luvders, & + & nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprgpns, nprgpew, nprtrv, nprtrw, ncheck) +! derived defaults +if ( nsmax == 0 ) nsmax = nlat/2-1 +if ( nmsmax == 0 ) nmsmax = nlon/2-1 +nflevg = nlev + +!=================================================================================================== + +if (luse_mpi) then + call mpl_init(ldinfo=(verbosity>=1)) + nproc = mpl_nproc() + myproc = mpl_myrank() +else + nproc = 1 + myproc = 1 + mpl_comm = -1 +endif +nthread = oml_max_threads() + +call dr_hook_init() + +!=================================================================================================== + +if( lstats ) call gstats(0,0) +ztinit = omp_get_wtime() + +! only output to stdout on pe 1 +!if (nproc > 1) then + !if (myproc /= 1) then + !open(unit=nout, file='output_'//char(myproc/10+48)//char(myproc+48)//'.dat') + !endif +!endif + +if (ldetailed_stats) then + lstats_omp = .true. + lstats_comms = .true. + lstats_mpl = .true. + lstatscpu = .true. + nprnt_stats = nproc +! lstats_mem = .true. +! lstats_alloc = .true. +endif + +!=================================================================================================== + +allocate(nprcids(nproc)) +do jj = 1, nproc + nprcids(jj) = jj +enddo + +if (nproc <= 1) then + lmpoff = .true. +endif + +! Compute nprgpns and nprgpew +! This version selects most square-like distribution +if (nproc == 0) nproc = 1 +if ( nprgpew == 0 .and. nprgpns == 0 ) then + isqr = int(sqrt(real(nproc,jprb))) + do ja = isqr, nproc + ib = nproc/ja + if (ja*ib == nproc) then + nprgpns = max(ja,ib) + nprgpew = min(ja,ib) + exit + endif + enddo +elseif (nprgpns == 0 ) then + nprgpns=nproc/nprgpew +elseif (nprgpew == 0 ) then + nprgpew=nproc/nprgpns +endif +if (nprgpns*nprgpew /= nproc) call abor1('transform_test:nprgpns*nprgpew /= nproc') + +! From sumpini, although this should be specified in namelist +if (nspecresmin == 0) nspecresmin = nproc + +! Compute nprtrv and nprtrw if not provided on the command line +if (nprtrv ==0 .and. nprtrw == 0 ) then + nprtrv=nprgpew + nprtrw=nprgpns +elseif (nprtrv == 0 ) then + nprtrv=nproc/nprtrw +elseif (nprtrw == 0 ) then + nprtrw=nproc/nprtrv +endif +if (nprtrv*nprtrw /= nproc) call abor1('transform_test:nprtrv*nprtrw /= nproc') + +mysetv=mod(myproc-1,nprtrv)+1 + +! Determine number of local levels for zonal and meridional fourier calculations +! based on the values of nflevg and nprtrv +allocate(numll(nprtrv)) +numll=nflevg/nprtrv +numll(1:modulo(nflevg,nprtrv))=numll(1:modulo(nflevg,nprtrv))+1 +ivsetsc(1)=min(nflevg+1, nprtrv) +nflevl = numll(mysetv) + +!=================================================================================================== +! Setup gstats +!=================================================================================================== + +if (lstats) then + call gstats_setup(nproc, myproc, nprcids, & + & lstats, lstatscpu, lsyncstats, ldetailed_stats, lbarrier_stats, lbarrier_stats2, & + & lstats_omp, lstats_comms, lstats_mem, nstats_mem, lstats_alloc, & + & ltrace_stats, ntrace_stats, nprnt_stats, lxml_stats) + call gstats_psut + + ! Assign labels to GSTATS regions + call gstats_labels +endif + +!=================================================================================================== +! Call ecTrans setup routines +!=================================================================================================== + +if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans =======' + +if( lstats ) call gstats(1, 0) +call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & + & kmax_resol=nmax_resol, kpromatr=0, kprgpns=nprgpns, kprgpew=nprgpew, & + & kprtrw=nprtrw, kcombflen=ncombflen, ldsync_trans=lsync_trans, & + & ldalloperm=.true., ldmpoff=.not.luse_mpi) + if( lstats ) call gstats(1, 1) + + if( lstats ) call gstats(2, 0) +zexwn=1._jprb ! 2*pi/(nx*dx): spectral resolution +zeywn=1._jprb ! 2*pi/(ny*dy) +nloen=nlon +call esetup_trans(ksmax=nsmax, kmsmax=nmsmax, kdgl=nlat, kdgux=nlat, kloen=nloen, ldsplit=.true., & + & ldusefftw=lfftw,pexwn=zexwn,peywn=zeywn) + + if( lstats ) call gstats(2, 1) + +call etrans_inq(kspec2=nspec2, kspec2g=nspec2g, kgptot=ngptot, kgptotg=ngptotg) + +if (nproma == 0) then ! no blocking (default when not specified) + nproma = ngptot +endif + +! Calculate number of NPROMA blocks +ngpblks = (ngptot - 1)/nproma+1 + +!=================================================================================================== +! Print information before starting +!=================================================================================================== + +! Print configuration details +if (verbosity >= 0) then + write(nout,'(" ")') + write(nout,'(a)')'======= Start of runtime parameters =======' + write(nout,'(" ")') + write(nout,'("nlon ",i0)') nlon + write(nout,'("nlat ",i0)') nlat + write(nout,'("nsmax ",i0)') nsmax + write(nout,'("nmsmax ",i0)') nmsmax + write(nout,'("nproc ",i0)') nproc + write(nout,'("nthread ",i0)') nthread + write(nout,'("nprgpns ",i0)') nprgpns + write(nout,'("nprgpew ",i0)') nprgpew + write(nout,'("nprtrw ",i0)') nprtrw + write(nout,'("nprtrv ",i0)') nprtrv + write(nout,'("ngptot ",i0)') ngptot + write(nout,'("ngptotg ",i0)') ngptotg + write(nout,'("nfld ",i0)') nfld + write(nout,'("nlev ",i0)') nlev + write(nout,'("nflevl ",i0)') nflevl + write(nout,'("nproma ",i0)') nproma + write(nout,'("ngpblks ",i0)') ngpblks + write(nout,'("nspec2 ",i0)') nspec2 + write(nout,'("nspec2g ",i0)') nspec2g + write(nout,'("lvordiv ",l)') lvordiv + write(nout,'("lscders ",l)') lscders + write(nout,'("luvders ",l)') luvders + write(nout,'(" ")') + write(nout,'(a)') '======= End of runtime parameters =======' + write(nout,'(" ")') +end if + +!=================================================================================================== +! Allocate and Initialize spectral arrays +!=================================================================================================== + +! Allocate spectral arrays +! Try to mimick IFS layout as much as possible +nullify(zspvor) +nullify(zspdiv) +nullify(zspsc3a) +allocate(sp3d(nflevl,nspec2,2+nfld)) +allocate(zspsc2(1,nspec2)) +allocate(zmeanu(nflevl),zmeanv(nflevl)) +zmeanu(:)=0._jprb +zmeanv(:)=0._jprb + +call initialize_spectral_arrays(nsmax, nmsmax, zspsc2, sp3d) + +! Point convenience variables to storage variable sp3d +zspvor => sp3d(:,:,1) +zspdiv => sp3d(:,:,2) +zspsc3a => sp3d(:,:,3:3+(nfld-1)) + +!=================================================================================================== +! Allocate gridpoint arrays +!=================================================================================================== + +allocate(ivset(nflevg)) + +! Compute spectral distribution +ilev = 0 +do jb = 1, nprtrv + do jlev=1, numll(jb) + ilev = ilev + 1 + ivset(ilev) = jb + enddo +enddo + +! Allocate grid-point arrays +if (lvordiv) then + jbegin_uv = 1 + jend_uv = 2 +endif +if (luvders) then + jbegin_uder_EW = jend_uv + 1 + jend_uder_EW = jbegin_uder_EW + 1 + jbegin_vder_EW = jend_uder_EW + 1 + jend_vder_EW = jbegin_vder_EW + 1 +else + jbegin_uder_EW = jend_uv + jend_uder_EW = jend_uv + jbegin_vder_EW = jend_uv + jend_vder_EW = jend_uv +endif + +jbegin_sc = jend_vder_EW + 1 +jend_sc = jend_vder_EW + nfld + +if (lscders) then + ndimgmvs = 3 + jbegin_scder_NS = jend_sc + 1 + jend_scder_NS = jend_sc + nfld + jbegin_scder_EW = jend_scder_NS + 1 + jend_scder_EW = jend_scder_NS + nfld +else + ndimgmvs = 1 + jbegin_scder_NS = jend_sc + jend_scder_NS = jend_sc + jbegin_scder_EW = jend_sc + jend_scder_EW = jend_sc +endif + +ndimgmv = jend_scder_EW + +!allocate(zgmv(nproma,nflevg,ndimgmv,ngpblks)) +!allocate(zgmvs(nproma,ndimgmvs,ngpblks)) +!zgpuv => zgmv(:,:,1:jend_vder_EW,:) +!zgp3a => zgmv(:,:,jbegin_sc:jend_scder_EW,:) +!zgp2 => zgmvs(:,:,:) + +! allocate separately since non-contiguous host-device transfers are not supported. +allocate(zgpuv(nproma,nflevg,jend_vder_EW,ngpblks)) +allocate(zgp3a(nproma,nflevg,jend_scder_EW-jbegin_sc+1,ngpblks)) +allocate(zgp2(nproma,ndimgmvs,ngpblks)) + +zgp2=0. +zgp3a=0. +zgpuv=0. + +!=================================================================================================== +! Allocate norm arrays +!=================================================================================================== + +if (lprint_norms .or. ncheck > 0) then + allocate(znormsp(1)) + allocate(znormsp0(1)) + allocate(znormvor(nflevg)) + allocate(znormvor0(nflevg)) + allocate(znormdiv(nflevg)) + allocate(znormdiv0(nflevg)) + allocate(znormt(nflevg)) + allocate(znormt0(nflevg)) + + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor0, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv0, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt0, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp0, kvset=ivsetsc) + + if (verbosity >= 1 .and. myproc == 1) then + do ifld = 1, nflevg + write(nout,'("norm zspvor( ",i4,",:) = ",f20.15)') ifld, znormvor0(ifld) + enddo + do ifld = 1, nflevg + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv0(ifld) + enddo + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt0(ifld) + enddo + do ifld = 1, 1 + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp0(ifld) + enddo + endif +endif + +!=================================================================================================== +! Setup timers +!=================================================================================================== + +ztinit = (omp_get_wtime() - ztinit) + +if (verbosity >= 0) then + write(nout,'(" ")') + write(nout,'(a,i6,a,f9.2,a)') "transform_test initialisation, on",nproc,& + & " tasks, took",ztinit," sec" + write(nout,'(" ")') +endif + +if (iters <= 0) call abor1('transform_test:iters <= 0') + +allocate(ztstep(iters)) +allocate(ztstep1(iters)) +allocate(ztstep2(iters)) + +ztstepavg = 0._jprd +ztstepmax = 0._jprd +ztstepmin = 9999999999999999._jprd +ztstepavg1 = 0._jprd +ztstepmax1 = 0._jprd +ztstepmin1 = 9999999999999999._jprd +ztstepavg2 = 0._jprd +ztstepmax2 = 0._jprd +ztstepmin2 = 9999999999999999._jprd + +!================================================================================================= +! Dump the values to disk, for debugging only +!================================================================================================= + +if (ldump_values) then + ! dump a field to a binary file + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc2(1,:),ivsetsc(1:1), 'S', noutdump) + if (lvordiv) then + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspdiv(1,:),ivset(1:1), 'D', noutdump) + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspvor(1,:),ivset(1:1), 'V', noutdump) + endif + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc3a(1,:,1),ivset(1:1), 'T', noutdump) +endif + +write(nout,'(a)') '======= Start of spectral transforms =======' +write(nout,'(" ")') + +ztloop = omp_get_wtime() + +!=================================================================================================== +! Do spectral transform loop +!=================================================================================================== + +do jstep = 1, iters + if( lstats ) call gstats(3,0) + ztstep(jstep) = omp_get_wtime() + + !================================================================================================= + ! Do inverse transform + !================================================================================================= + + ztstep1(jstep) = omp_get_wtime() + if( lstats ) call gstats(4,0) + if (lvordiv) then + + call einv_trans(kresol=1, kproma=nproma, & + & pspsc2=zspsc2, & ! spectral surface pressure + & pspvor=zspvor, & ! spectral vorticity + & pspdiv=zspdiv, & ! spectral divergence + & pspsc3a=zspsc3a, & ! spectral scalars + & ldscders=lscders, & + & ldvorgp=.false., & ! no gridpoint vorticity + & lddivgp=.false., & ! no gridpoint divergence + & lduvder=luvders, & + & kvsetuv=ivset, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pgp2=zgp2, & + & pgpuv=zgpuv, & + & pgp3a=zgp3a, & + & pmeanu=zmeanu, & + & pmeanv=zmeanv) + + else + + call einv_trans(kresol=1, kproma=nproma, & + & pspsc2=zspsc2, & ! spectral surface pressure + & pspsc3a=zspsc3a, & ! spectral scalars + & ldscders=lscders, & ! scalar derivatives + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pgp2=zgp2, & + & pgp3a=zgp3a) + + endif + + if( lstats ) call gstats(4,1) + + ztstep1(jstep) = (omp_get_wtime() - ztstep1(jstep)) + + !================================================================================================= + ! While in grid point space, dump the values to disk, for debugging only + !================================================================================================= + + if (ldump_values) then + ! dump a field to a binary file + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgp2(:,1,:), 'S', noutdump) + if (lvordiv) then + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgpuv(:,nflevg,1,:), 'U', noutdump) + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgpuv(:,nflevg,2,:), 'V', noutdump) + endif + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgp3a(:,nflevg,1,:), 'T', noutdump) + endif + + !================================================================================================= + ! Do direct transform + !================================================================================================= + + ztstep2(jstep) = omp_get_wtime() + + if( lstats ) call gstats(5,0) + + + if (lvordiv) then + call edir_trans(kresol=1, kproma=nproma, & + & pgp2=zgp2(:,1:1,:), & + & pgpuv=zgpuv(:,:,1:2,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), & + & pspvor=zspvor, & + & pspdiv=zspdiv, & + & pspsc2=zspsc2, & + & pspsc3a=zspsc3a, & + & kvsetuv=ivset, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pmeanu=zmeanu, & + & pmeanv=zmeanv) + else + + call edir_trans(kresol=1, kproma=nproma, & + & pgp2=zgp2(:,1:1,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), & + & pspsc2=zspsc2, & + & pspsc3a=zspsc3a, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset) + endif + if( lstats ) call gstats(5,1) + ztstep2(jstep) = (omp_get_wtime() - ztstep2(jstep)) + + !================================================================================================= + ! Dump the values to disk, for debugging only + !================================================================================================= + + if (ldump_values) then + ! dump a field to a binary file + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc2(1,:),ivsetsc(1:1), 'S', noutdump) + if (lvordiv) then + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspdiv(1,:),ivset(1), 'D', noutdump) + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspvor(1,:),ivset(1:1), 'V', noutdump) + endif + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc3a(1,:,1),ivset(1:1), 'T', noutdump) + endif + + !================================================================================================= + ! Calculate timings + !================================================================================================= + + ztstep(jstep) = (omp_get_wtime() - ztstep(jstep)) + + ztstepavg = ztstepavg + ztstep(jstep) + ztstepmin = min(ztstep(jstep), ztstepmin) + ztstepmax = max(ztstep(jstep), ztstepmax) + + ztstepavg1 = ztstepavg1 + ztstep1(jstep) + ztstepmin1 = min(ztstep1(jstep), ztstepmin1) + ztstepmax1 = max(ztstep1(jstep), ztstepmax1) + + ztstepavg2 = ztstepavg2 + ztstep2(jstep) + ztstepmin2 = min(ztstep2(jstep), ztstepmin2) + ztstepmax2 = max(ztstep2(jstep), ztstepmax2) + + !================================================================================================= + ! Print norms + !================================================================================================= + + if (lprint_norms) then + if( lstats ) call gstats(6,0) + call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc(1:1)) + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) + + if ( myproc == 1 ) then + + ! Surface pressure + zmaxerr(:) = -999.0 + do ifld = 1, 1 + zerr(1) = abs(znormsp(ifld)/znormsp0(ifld) - 1.0_jprb) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + enddo + ! Divergence + do ifld = 1, nflevg + zerr(2) = abs(znormdiv(ifld)/znormdiv0(ifld) - 1.0_jprb) + zmaxerr(2) = max(zmaxerr(2), zerr(2)) + enddo + ! Vorticity + do ifld = 1, nflevg + zerr(3) = abs(znormvor(ifld)/znormvor0(ifld) - 1.0_jprb) + zmaxerr(3) = max(zmaxerr(3),zerr(3)) + enddo + ! Temperature + do ifld = 1, nflevg + zerr(4) = abs(znormt(ifld)/znormt0(ifld) - 1.0_jprb) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + enddo + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) + if( lstats )call gstats(6,1) + else + write(nout,'("Time step ",i6," took", f8.4)') jstep, ztstep(jstep) + endif + + endif + + if( lstats ) call gstats(3,1) + +enddo + +!=================================================================================================== + +ztloop = (omp_get_wtime() - ztloop) + +write(nout,'(" ")') +write(nout,'(a)') '======= End of spectral transforms =======' +write(nout,'(" ")') + +if (lprint_norms .or. ncheck > 0) then + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) + call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) + call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) + + if ( myproc == 1 ) then + + zmaxerr(:) = -999.0 + do ifld = 1, nflevg + zerr(3) = abs(real(znormvor(ifld),kind=jprd)/real(znormvor0(ifld),kind=jprd) - 1.0_jprd) + zmaxerr(3) = max(zmaxerr(3), zerr(3)) + if (verbosity >= 1) then + write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor0(ifld), zerr(3) + endif + enddo + do ifld = 1, nflevg + zerr(2) = abs(real(znormdiv(ifld),kind=jprd)/real(znormdiv0(ifld),kind=jprd) - 1.0d0) + zmaxerr(2) = max(zmaxerr(2),zerr(2)) + if (verbosity >= 1) then + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv0(ifld), zerr(2) + endif + enddo + do ifld = 1, nflevg + zerr(4) = abs(real(znormt(ifld),kind=jprd)/real(znormt0(ifld),kind=jprd) - 1.0d0) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (verbosity >= 1) then + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt0(ifld), zerr(4) + endif + enddo + do ifld = 1, 1 + zerr(1) = abs(real(znormsp(ifld),kind=jprd)/real(znormsp0(ifld),kind=jprd) - 1.0d0) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + if (verbosity >= 1) then + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp0(ifld), zerr(1) + endif + enddo + + ! maximum error across all fields + zmaxerrg = max(max(zmaxerr(1),zmaxerr(2)), max(zmaxerr(2), zmaxerr(3))) + + if (verbosity >= 1) write(nout,*) + write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) + write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) + write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) + write(nout,*) + write(nout,'("max error combined = = ",e10.3)') zmaxerrg + write(nout,*) + + if (ncheck > 0) then + ! If the maximum spectral norm error across all fields is greater than 100 times the machine + ! epsilon, fail the test + if (zmaxerrg > real(ncheck, jprb) * epsilon(1.0_jprb)) then + write(nout, '(a)') '*******************************' + write(nout, '(a)') 'Correctness test failed' + write(nout, '(a,1e7.2)') 'Maximum spectral norm error = ', zmaxerrg + write(nout, '(a,1e7.2)') 'Error tolerance = ', real(ncheck, jprb) * epsilon(1.0_jprb) + write(nout, '(a)') '*******************************' + error stop + endif + endif + endif +endif + +if (luse_mpi) then + call mpl_allreduce(ztloop, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstep, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin, 'min', ldreprod=.false.) + + call mpl_allreduce(ztstep1, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg1, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax1, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin1, 'min', ldreprod=.false.) + + call mpl_allreduce(ztstep2, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg2, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax2, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) +endif + +ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters,jprd) +ztloop = ztloop/real(nproc,jprd) +ztstep(:) = ztstep(:)/real(nproc,jprd) + +call sort(ztstep,iters) +ztstepmed = ztstep(iters/2) + +ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters,jprd) +ztstep1(:) = ztstep1(:)/real(nproc,jprd) + +call sort(ztstep1, iters) +ztstepmed1 = ztstep1(iters/2) + +ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters,jprd) +ztstep2(:) = ztstep2(:)/real(nproc,jprd) + +call sort(ztstep2,iters) +ztstepmed2 = ztstep2(iters/2) + + +write(nout,'(a)') '======= Start of time step stats =======' +write(nout,'(" ")') +write(nout,'("Inverse transforms")') +write(nout,'("------------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg1 +write(nout,'("min (s): ",f8.4)') ztstepmin1 +write(nout,'("max (s): ",f8.4)') ztstepmax1 +write(nout,'("med (s): ",f8.4)') ztstepmed1 +write(nout,'(" ")') +write(nout,'("Direct transforms")') +write(nout,'("-----------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg2 +write(nout,'("min (s): ",f8.4)') ztstepmin2 +write(nout,'("max (s): ",f8.4)') ztstepmax2 +write(nout,'("med (s): ",f8.4)') ztstepmed2 +write(nout,'(" ")') +write(nout,'("Inverse-direct transforms")') +write(nout,'("-------------------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg +write(nout,'("min (s): ",f8.4)') ztstepmin +write(nout,'("max (s): ",f8.4)') ztstepmax +write(nout,'("med (s): ",f8.4)') ztstepmed +write(nout,'("loop (s): ",f8.4)') ztloop +write(nout,'(" ")') +write(nout,'(a)') '======= End of time step stats =======' +write(nout,'(" ")') + +if (lstack) then + ! Gather stack usage statistics + istack = getstackusage() + if (myproc == 1) then + print 9000, istack + 9000 format("Stack utilisation information",/,& + &"=============================",//,& + &"Task size(bytes)",/,& + &"==== ===========",//,& + &" 1",11x,i10) + + do i = 2, nproc + call mpl_recv(istack, ksource=nprcids(i), ktag=i, cdstring='transform_test:') + print '(i4,11x,i10)', i, istack + enddo + else + call mpl_send(istack, kdest=nprcids(1), ktag=myproc, cdstring='transform_test:') + endif +endif + +!=================================================================================================== +! Cleanup +!=================================================================================================== + +! TODO: many more arrays to deallocate + +!=================================================================================================== + +if (lstats) then + call gstats(0,1) + call gstats_print(nout, zaveave, jpmaxstat) +endif + +if (lmeminfo) then + write(nout,*) + call ec_meminfo(nout, "", mpl_comm, kbarr=1, kiotask=-1, & + & kcall=1) +endif + +!=================================================================================================== +! Finalize MPI +!=================================================================================================== + +if (luse_mpi) then + call mpl_end(ldmeminfo=.false.) +endif + +!=================================================================================================== +! Close file +!=================================================================================================== + +if (nproc > 1) then + if (myproc /= 1) then + close(unit=nout) + endif +endif + +!=================================================================================================== + +contains + +!=================================================================================================== + +function get_int_value(cname, iarg) result(value) + + integer :: value + character(len=*), intent(in) :: cname + integer, intent(inout) :: iarg + character(len=128) :: carg + integer :: stat + + carg = get_str_value(cname, iarg) + call str2int(carg, value, stat) + + if (stat /= 0) then + call parsing_failed("Invalid argument for " // trim(cname) // ": " // trim(carg)) + end if + +end function + +!=================================================================================================== + +function get_str_value(cname, iarg) result(value) + + character(len=128) :: value + character(len=*), intent(in) :: cname + integer, intent(inout) :: iarg + + iarg = iarg + 1 + call get_command_argument(iarg, value) + + if (value == "") then + call parsing_failed("Invalid argument for " // trim(cname) // ": no value provided") + end if + +end function + +!=================================================================================================== + +subroutine parsing_failed(message) + + character(len=*), intent(in) :: message + if (luse_mpi) call mpl_init(ldinfo=.false.) + if (ec_mpirank() == 0) then + write(nerr,"(a)") trim(message) + call print_help(unit=nerr) + endif + if (luse_mpi) call mpl_end(ldmeminfo=.false.) + stop + +end subroutine + +!=================================================================================================== + +subroutine get_command_line_arguments(nlon, nlat, nsmax, nmsmax, & + & iters, nfld, nlev, lvordiv, lscders, luvders, & + & nproma, verbosity, ldump_values, lprint_norms, & + & lmeminfo, nprgpns, nprgpew, nprtrv, nprtrw, ncheck) + + integer, intent(inout) :: nlon ! Zonal dimension + integer, intent(inout) :: nlat ! Meridional dimension + integer, intent(inout) :: nsmax ! Meridional truncation + integer, intent(inout) :: nmsmax ! Zonal trunciation + integer, intent(inout) :: iters ! Number of iterations for transform test + integer, intent(inout) :: nfld ! Number of scalar fields + integer, intent(inout) :: nlev ! Number of vertical levels + logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence + logical, intent(inout) :: lscders ! Compute scalar derivatives + logical, intent(inout) :: luvders ! Compute uv East-West derivatives + integer, intent(inout) :: nproma ! NPROMA + integer, intent(inout) :: verbosity ! Level of verbosity + logical, intent(inout) :: ldump_values ! Dump values of grid point fields for debugging + logical, intent(inout) :: lprint_norms ! Calculate and print spectral norms of fields + logical, intent(inout) :: lmeminfo ! Show information from FIAT ec_meminfo routine at the + ! end + integer, intent(inout) :: nprgpns ! Size of NS set (gridpoint decomposition) + integer, intent(inout) :: nprgpew ! Size of EW set (gridpoint decomposition) + integer, intent(inout) :: nprtrv ! Size of V set (spectral decomposition) + integer, intent(inout) :: nprtrw ! Size of W set (spectral decomposition) + integer, intent(inout) :: ncheck ! The multiplier of the machine epsilon used as a + ! tolerance for correctness checking + + character(len=128) :: carg ! Storage variable for command line arguments + integer :: iarg = 1 ! Argument index + integer :: stat ! For storing success status of string->integer conversion + integer :: myproc + + do while (iarg <= command_argument_count()) + call get_command_argument(iarg, carg) + + select case(carg) + ! Parse help argument + case('-h', '--help') + if (luse_mpi) call mpl_init(ldinfo=.false.) + if (ec_mpirank()==0) call print_help() + if (luse_mpi) call mpl_end(ldmeminfo=.false.) + stop + ! Parse verbosity argument + case('-v') + verbosity = 1 + ! Parse number of iterations argument + case('-n', '--niter') + iters = get_int_value('-n', iarg) + if (iters < 1) then + call parsing_failed("Invalid argument for -n: must be > 0") + end if + ! Parse spectral truncation argument + case('--nlon'); nlon = get_int_value('--nlon', iarg) + case('--nlat'); nlat = get_int_value('--nlat', iarg) + case('--nsmax'); nsmax = get_int_value('--nsmax', iarg) + case('--nmsmax'); nmsmax = get_int_value('--nmsmax', iarg) + case('-f', '--nfld'); nfld = get_int_value('-f', iarg) + case('-l', '--nlev'); nlev = get_int_value('-l', iarg) + case('--vordiv'); lvordiv = .True. + case('--scders'); lscders = .True. + case('--uvders'); luvders = .True. + case('--nproma'); nproma = get_int_value('--nproma', iarg) + case('--dump-values'); ldump_values = .true. + case('--norms'); lprint_norms = .true. + case('--meminfo'); lmeminfo = .true. + case('--nprgpns'); nprgpns = get_int_value('--nprgpns', iarg) + case('--nprgpew'); nprgpew = get_int_value('--nprgpew', iarg) + case('--nprtrv'); nprtrv = get_int_value('--nprtrv', iarg) + case('--nprtrw'); nprtrw = get_int_value('--nprtrw', iarg) + case('-c', '--check'); ncheck = get_int_value('-c', iarg) + case default + call parsing_failed("Unrecognised argument: " // trim(carg)) + + end select + iarg = iarg + 1 + end do + + if (.not. lvordiv) then + luvders = .false. + endif + +end subroutine get_command_line_arguments + +!=================================================================================================== + +subroutine str2int(str, int, stat) + + character(len=*), intent(in) :: str + integer, intent(out) :: int + integer, intent(out) :: stat + read(str, *, iostat=stat) int + +end subroutine str2int + +!=================================================================================================== + +subroutine sort(a, n) + + real(kind=jprd), intent(inout) :: a(n) + integer(kind=jpim), intent(in) :: n + + real(kind=jprd) :: x + + integer :: i, j + + do i = 2, n + x = a(i) + j = i - 1 + do while (j >= 1) + if (a(j) <= x) exit + a(j + 1) = a(j) + j = j - 1 + end do + a(j + 1) = x + end do + +end subroutine sort + +!=================================================================================================== + +subroutine print_help(unit) + + integer, optional :: unit + integer :: nout = 6 + if (present(unit)) then + nout = unit + endif + + write(nout, "(a)") "" + + if (jprb == jprd) then + write(nout, "(a)") "NAME ectrans-lam-benchmark-dp" + else + write(nout, "(a)") "NAME ectrans-lam-benchmark-sp" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "DESCRIPTION" + write(nout, "(a)") " This program tests ecTrans-lam by transforming fields back and forth& + & between spectral " + if (jprb == jprd) then + write(nout, "(a)") " space and grid-point space (double-precision version)" + else + write(nout, "(a)") " space and grid-point space (single-precision version)" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "USAGE" + if (jprb == jprd) then + write(nout, "(a)") " ectrans-lam-benchmark-dp [options]" + else + write(nout, "(a)") " ectrans-lam-benchmark-sp [options]" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "OPTIONS" + write(nout, "(a)") " -h, --help Print this message" + write(nout, "(a)") " -v Run with verbose output" + write(nout, "(a)") " --nlon NLON Number of gridpoints in zonal direction (default = 128)" + write(nout, "(a)") " --nlat NLAT Number of gridpoints in meridional direction (default = 128)" + write(nout, "(a)") " --nsmax NSMAX Spectral truncation in meridional direction (default = NLAT/2-1)" + write(nout, "(a)") " --nmsmax NMSMAX Spectral truncation in zonal direction (default = NLON/2-1)" + write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& + & iterations (default = 10)" + write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" + write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" + write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" + write(nout, "(a)") " --scders Compute scalar derivatives (default off)" + write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& + & when also --vordiv is given" + write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" + write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& + & fields" + write(nout, "(a)") " The computation of spectral norms will skew overall& + & timings" + write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& + & subroutine on memory usage, thread-binding etc." + write(nout, "(a)") " --nprgpew Size of East-West set in gridpoint decomposition" + write(nout, "(a)") " --nprgpns Size of North-South set in gridpoint decomposition" + write(nout, "(a)") " --nprtrv Size of Vertical set in spectral decomposition" + write(nout, "(a)") " --nprtrw Size of Wave set in spectral decomposition" + write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& + & tolerance for correctness checking" + write(nout, "(a)") "" + write(nout, "(a)") "DEBUGGING" + write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" + write(nout, "(a)") "" + +end subroutine print_help + +!=================================================================================================== + +subroutine initialize_spectral_arrays(nsmax, nmsmax, zsp, sp3d) + + integer, intent(in) :: nsmax ! Spectral truncation in meridional direction + integer, intent(in) :: nmsmax ! Spectral truncation in zonal direction + real(kind=jprb), intent(inout) :: zsp(:,:) ! Surface pressure + real(kind=jprb), intent(inout) :: sp3d(:,:,:) ! 3D fields + + integer(kind=jpim) :: nflevl + integer(kind=jpim) :: nfield + + integer :: i, j + + nflevl = size(sp3d, 1) + nfield = size(sp3d, 3) + + ! First initialize surface pressure + call initialize_2d_spectral_field(nsmax, nmsmax, zsp(1,:)) + + ! Then initialize all of the 3D fields + do i = 1, nflevl + do j = 1, nfield + call initialize_2d_spectral_field(nsmax, nmsmax, sp3d(i,:,j)) + end do + end do + +end subroutine initialize_spectral_arrays + +!=================================================================================================== + +subroutine initialize_2d_spectral_field(nsmax, nmsmax, field) + + integer, intent(in) :: nsmax ! Spectral truncation in meridional direction + integer, intent(in) :: nmsmax ! Spectral truncation in zonal direction + real(kind=jprb), intent(inout) :: field(:) ! Field to initialize + + integer :: ispec, kspec2 + integer, allocatable :: my_km(:), my_kn(:) + real(kind=jprb) :: zz(1) ! random amplitude + + ! Choose a spherical harmonic to initialize arrays + integer :: m_num = 4 ! Zonal wavenumber + integer :: n_num = 12 ! Meridional wavenumber + logical, save :: llfirst=.true. + integer :: seed(20) + integer(kind=jpim) :: jcmp + seed=20220428 + + if (llfirst) call random_seed(put=seed) + call random_number(zz) + jcmp=int(4*zz(1)) + call random_number(zz) + + llfirst=.false. + + ! First initialise all spectral coefficients to zero + field(:) = 0.0 + + ! make sure wavenumbers are within truncation + if ( m_num>nmsmax .or. n_num > nsmax .or. & + & ( nsmax>0 .and. nmsmax>0 .and. ( (m_num/real(nmsmax))**2+(n_num/real(nsmax))**2 ) > 1.) ) then + write (nerr,*) + write (nerr,*) 'WARNING: INITIAL WAVENUMBERS OUTSIDE OF TRUNCATION! ' + write (nerr,*) 'm_num = ',m_num,'; nmsmax = ',nmsmax,'; n_num = ',n_num,'; nsmax = ',nsmax,& + & '; ellips check: ',(m_num/real(nmsmax))**2+(n_num/real(nsmax))**2 + write (nerr,*) 'USING (kx=',NMSMAX/2,', ky=', NSMAX/2,') instead' + write (nerr,*) + m_num=nmsmax/2 + n_num=nsmax/2 + endif + + ! Get wavenumbers this rank is responsible for + call etrans_inq(kspec2=kspec2) + allocate(my_kn(kspec2),my_km(kspec2)) + call etrans_inq(knvalue=my_kn,kmvalue=my_km) + + ! If rank is responsible for the chosen zonal wavenumber... + do ispec=1,nspec2,4 + if ( my_kn(ispec)== n_num .and. my_km(ispec) == m_num ) then + field(ispec)=1.0 ! cos*cos + !field(ispec+1)=1.0 ! cos*sin + !field(ispec+2)=1.0 ! sin*cos + !field(ispec+3)=1.0 ! sin*sin + !field(ispec+jcmp)=zz(1) ! random component + end if + enddo + + ! random power spectrum + call random_number(field) + field=2*field-1. + ! set some components to zero because they are unphysical + do ispec=1,nspec2,4 + if ( my_kn(ispec)== 0 .and. my_km(ispec) == 0 ) field(ispec:ispec+3)=0. ! remove mean value for vorticity and divergence + if ( my_kn(ispec)== 0 ) field(ispec+1:ispec+3:2)=0. ! remove sine component on zero-wavenumber + if ( my_kn(ispec)== nmsmax ) field(ispec+1:ispec+3:2)=0. ! remove sine component on last-wavenumber + if ( my_km(ispec)== 0 ) field(ispec+2:ispec+3)=0. ! remove sine component on zero-wavenumber + if ( my_km(ispec)== nsmax ) field(ispec+2:ispec+3)=0. ! remove sine component on last-wavenumber + enddo + ! scale according to wavenumber**2 + do ispec=1,nspec2 + field(ispec)=field(ispec)/(0.01+(my_kn(ispec)/real(nsmax))**2+(my_km(ispec)/real(nmsmax))**2) + enddo + +end subroutine initialize_2d_spectral_field + +!=================================================================================================== + +subroutine dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, fld, fldchar, noutdump) + + ! Dump a 2d gridpoint field to screen or a binary file. + + integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file + integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file + integer(kind=jpim), intent(in) :: nlat ! Number of latitudes + integer(kind=jpim), intent(in) :: nproma ! Size of nproma + integer(kind=jpim), intent(in) :: ngpblks ! Number of nproma blocks + real(kind=jprb) , intent(in) :: fld(nproma,1,ngpblks) ! 2D field + character , intent(in) :: fldchar ! Single character field identifier + integer(kind=jpim), intent(in) :: noutdump ! Unit number for output file + + integer(kind=jpim) :: kgptotg ! global number of gridpoints + real(kind=jprb), allocatable :: fldg(:,:) ! global field + integer(kind=jpim) :: kfgathg=1 ! number of fields to gather + integer(kind=jpim) :: kto(1)=(/1/) ! processor where to gather + character(len=14) :: filename = "x.xxx.xxx.grid" + character(len=13) :: frmt='(4X,xxxxF8.2)' + +#include "etrans_inq.h" +#include "egath_grid.h" + + call etrans_inq(kgptotg=kgptotg) + + if ( myproc == 1 ) allocate(fldg(kgptotg,1)) + + call egath_grid(pgpg=fldg,kproma=nproma,kfgathg=kfgathg,kto=kto,pgp=fld) + + if ( myproc == 1 ) then + + ! write to file + write(filename(1:1),'(a1)') fldchar + write(filename(3:5),'(i3.3)') jstep +#ifdef ACCGPU + write(filename(7:9),'(a3)') 'gpu' +#else + write(filename(7:9),'(a3)') 'cpu' +#endif + open(noutdump, file=filename, form="unformatted", access="stream") + write(noutdump) kgptotg/nlat,nlat ! dimensions + write(noutdump) fldg ! data + close(noutdump) + + ! write to screen + write(frmt(5:8),'(i4.4)') kgptotg/nlat + write (*,*) fldchar,' at iteration ',jstep,':' + write (*,frmt) fldg + call flush(6) + + deallocate(fldg) + + endif + + +end subroutine dump_gridpoint_field + +!=================================================================================================== + +subroutine dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, fld, kvset, fldchar, noutdump) + + ! Dump a 2d spectral field to screen or a binary file. + + integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file + integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file + integer(kind=jpim), intent(in) :: nspec2 ! Size of nspec2 (number of waves on this proc in M-space) + integer(kind=jpim), intent(in) :: nsmax + integer(kind=jpim), intent(in) :: nmsmax + real(kind=jprb) , intent(in) :: fld(1,nspec2) ! 2D field + integer(kind=jpim), intent(in) :: kvset(1) ! B-set on which the field resides + character , intent(in) :: fldchar ! Single character field identifier + integer(kind=jpim), intent(in) :: noutdump ! Unit number for output file + + integer(kind=jpim) :: nspec2g ! global number of gridpoints + real(kind=jprb), allocatable :: fldg(:,:) ! global field (nspec2g) + integer(kind=jpim) :: kfgathg=1 ! number of fields to gather + integer(kind=jpim) :: kto(1)=(/1/) ! processor where to gather + character(len=14) :: filename = "x.xxx.xxx.spec" + character(len=13) :: frmt='(4X,xxxxF8.2)' ! for printing to screen + integer(kind=jpim) :: knse(0:nmsmax),kmse(0:nsmax) ! elliptic truncation + real(kind=jprb) :: fld2g(0:2*nmsmax+1,0:2*nsmax+1) ! 2D representation of spectral field + integer(kind=jpim) :: jj, jms, jns + +#include "etrans_inq.h" +#include "egath_spec.h" + + if ( myproc == 1 ) then + call etrans_inq(kspec2g=nspec2g) + allocate(fldg(1,nspec2g)) + call ellips(nsmax,nmsmax,knse,kmse) + endif + + call egath_spec(PSPECG=fldg,kfgathg=kfgathg,kto=kto,kvset=kvset,PSPEC=fld) + + if ( myproc == 1 ) then + + fld2g=0. + jj=1 + do jms=0,nmsmax + do jns=0,knse(jms) + fld2g(2*jms+0,2*jns+0)=fldg(1,jj) + fld2g(2*jms+0,2*jns+1)=fldg(1,jj+1) + fld2g(2*jms+1,2*jns+0)=fldg(1,jj+2) + fld2g(2*jms+1,2*jns+1)=fldg(1,jj+3) + jj=jj+4 + enddo + enddo + + ! write to binary file + write(filename(1:1),'(a1)') fldchar + write(filename(3:5),'(i3.3)') jstep +#ifdef ACCGPU + write(filename(7:9),'(a3)') 'gpu' +#else + write(filename(7:9),'(a3)') 'cpu' +#endif + open(noutdump, file=filename, form="unformatted", access="stream") + write(noutdump) 2*nmsmax+2,2*nsmax+2 ! dimensions + write(noutdump) fld2g ! data + close(noutdump) + + ! write to screen + write(frmt(5:8),'(i4.4)') 2*(nmsmax+1) + write (*,*) fldchar,' at iteration ',jstep,':' + write (*,frmt) fld2g + call flush(6) + + deallocate(fldg) + + endif + + +end subroutine dump_spectral_field + +!=================================================================================================== + +function detect_mpirun() result(lmpi_required) + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 5 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv_dr_hook_assert_mpi_initialized + integer :: ivar + + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced + + lmpi_required = .false. + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo +end function + +!=================================================================================================== + +! Assign GSTATS labels to the main regions of ecTrans +subroutine gstats_labels + + call gstats_label(0, ' ', 'PROGRAM - Total') + call gstats_label(1, ' ', 'SETUP_TRANS0 - Setup ecTrans') + call gstats_label(2, ' ', 'SETUP_TRANS - Setup ecTrans handle') + call gstats_label(3, ' ', 'TIME STEP - Time step') + call gstats_label(4, ' ', 'INV_TRANS - Inverse transform') + call gstats_label(5, ' ', 'DIR_TRANS - Direct transform') + call gstats_label(6, ' ', 'NORMS - Norm comp. (optional)') + call gstats_label(102, ' ', 'LTINV_CTL - Inv. Legendre transform') + call gstats_label(103, ' ', 'LTDIR_CTL - Dir. Legendre transform') + call gstats_label(106, ' ', 'FTDIR_CTL - Dir. Fourier transform') + call gstats_label(107, ' ', 'FTINV_CTL - Inv. Fourier transform') + call gstats_label(140, ' ', 'SULEG - Comp. of Leg. poly.') + call gstats_label(152, ' ', 'LTINV_CTL - M to L transposition') + call gstats_label(153, ' ', 'LTDIR_CTL - L to M transposition') + call gstats_label(157, ' ', 'FTINV_CTL - L to G transposition') + call gstats_label(158, ' ', 'FTDIR_CTL - G to L transposition') + call gstats_label(400, ' ', 'GSTATS - GSTATS itself') + +end subroutine gstats_labels + +end program ectrans_lam_benchmark + +!=================================================================================================== diff --git a/src/programs/ectrans.in b/src/programs/ectrans.in new file mode 100755 index 0000000..a80893d --- /dev/null +++ b/src/programs/ectrans.in @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +ECTRANS_VERSION_STR="@ectrans_VERSION_STR@" +ECTRANS_VERSION="@ectrans_VERSION@" +ECTRANS_GIT_SHA1="@ectrans_GIT_SHA1@" + +################################################################# +# Commands +################################################################# + +usage() +{ + echo "Usage: ectrans [--version] [--info] [--git]" + exit $1 +} + +version() +{ + echo "${ECTRANS_VERSION_STR}" +} + +append_git() +{ + if (( $# > b )); then + git="$@" + echo ", git-sha1 ${git::${#git}-33}" + fi +} + +info() +{ + echo "ectrans version (${ECTRANS_VERSION_STR})$(append_git ${ECTRANS_GIT_SHA1})" + echo "" + echo "Build:" + echo " build type : @CMAKE_BUILD_TYPE@" + echo " timestamp : @EC_BUILD_TIMESTAMP@" + echo " op. system : @CMAKE_SYSTEM@ (@EC_OS_NAME@.@EC_OS_BITS@)" + echo " processor : @CMAKE_SYSTEM_PROCESSOR@" + echo " c compiler : @CMAKE_C_COMPILER_ID@ @CMAKE_C_COMPILER_VERSION@" + echo " flags : @EC_C_FLAGS@" + echo " fortran compiler: @CMAKE_Fortran_COMPILER_ID@ @CMAKE_Fortran_COMPILER_VERSION@" + echo " flags : @EC_Fortran_FLAGS@" + echo "" + echo "Features:" + echo " MPI : @ectrans_HAVE_MPI@" + echo " OMP : @ectrans_HAVE_OMP@" + echo " MKL : @ectrans_HAVE_MKL@" + echo " FFTW : @ectrans_HAVE_FFTW@" + echo " TRANSI : @ectrans_HAVE_TRANSI@" + echo "" + echo "Dependencies: " + echo " fiat version (@fiat_VERSION_STR@)$(append_git @fiat_GIT_SHA1@)" + +} + +gitsha1() +{ + echo "${ECTRANS_GIT_SHA1}" +} + + +################################################################# +# Parse command-line +################################################################# + +if test $# -eq 0; then + usage 1 +fi + +while test $# -gt 0; do + + # Split --option=value in $opt="--option" and $val="value" + + opt="" + val="" + + case "$1" in + --*=*) + opt=`echo "$1" | sed 's/=.*//'` + val=`echo "$1" | sed 's/--[_a-zA-Z0-9]*=//'` + ;; + --*) + opt=$1 + ;; + *) + break + ;; + esac + + # Parse options + case "$opt" in + --version) + version + ;; + --git) + gitsha1 + ;; + --info) + info + ;; + --) + shift + break + ;; + *) + echo "unknown option: $opt" + usage 1 + ;; + esac + shift +done diff --git a/src/trans/CMakeLists.txt b/src/trans/CMakeLists.txt new file mode 100644 index 0000000..28097ed --- /dev/null +++ b/src/trans/CMakeLists.txt @@ -0,0 +1,23 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +if( HAVE_CPU) + add_subdirectory( cpu ) +endif() +if( HAVE_GPU ) + message("Entering trans/gpu") + add_subdirectory( gpu ) +endif() + +## Install trans interface + +file( GLOB trans_interface include/ectrans/* ) +install( + FILES ${trans_interface} + DESTINATION include/ectrans +) + diff --git a/src/trans/cpu/CMakeLists.txt b/src/trans/cpu/CMakeLists.txt new file mode 100644 index 0000000..cc11c83 --- /dev/null +++ b/src/trans/cpu/CMakeLists.txt @@ -0,0 +1,73 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +## Apply workarounds for some known compilers + +if(CMAKE_Fortran_COMPILER_ID MATCHES "Cray") + if( CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER_EQUAL 8.7 ) + + # Fix for IFS "CONGRAD: SPTSV/DPTSV returned non-zero info with crayftn 8.7.7 (cdt/18.12) + ectrans_add_compile_options( + SOURCES internal/ftinv_ctlad_mod.F90 + FLAGS "-O0,fp1,omp") + + endif() +endif() + +## Assemble sources + +ecbuild_list_add_pattern( LIST trans_src + GLOB + sharedmem/* + algor/* + internal/* + external/* + QUIET + ) + +if( NOT HAVE_FFTW ) + ecbuild_list_exclude_pattern( LIST trans_src REGEX tpm_fftw.F90 ) +endif() + +foreach( prec sp dp ) + if( HAVE_${prec} ) + + ecbuild_add_library( + TARGET trans_${prec} + LINKER_LANGUAGE Fortran + SOURCES ${trans_src} + PUBLIC_INCLUDES $ + $ + $ + $ + PUBLIC_LIBS fiat parkind_${prec} + ) + ectrans_target_fortran_module_directory( + TARGET trans_${prec} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/trans_${prec} + INSTALL_DIRECTORY module/trans_${prec} + ) + target_link_libraries( trans_${prec} PUBLIC fiat parkind_${prec} ) + if( HAVE_FFTW ) + set( FFTW_LINK PRIVATE ) + if( LAPACK_${prec} MATCHES "mkl" AND NOT FFTW_LIBRARIES MATCHES "mkl" ) + ecbuild_warn( "Danger: Both MKL and FFTW are linked in trans_${prec}. " + "No guarantees on link order can be made for the final executable.") + set( FFTW_LINK PUBLIC ) # Attempt anyway to give FFTW precedence + endif() + target_link_libraries( trans_${prec} ${FFTW_LINK} ${FFTW_LIBRARIES} ) + target_include_directories( trans_${prec} PRIVATE ${FFTW_INCLUDE_DIRS} ) + target_compile_definitions( trans_${prec} PRIVATE WITH_FFTW ) + endif() + target_link_libraries( trans_${prec} ${LAPACK_LINK} ${LAPACK_${prec}} ) + if( HAVE_OMP ) + target_link_libraries( trans_${prec} PRIVATE OpenMP::OpenMP_Fortran ) + endif() + + endif() +endforeach() diff --git a/src/trans/cpu/algor/bluestein_mod.F90 b/src/trans/cpu/algor/bluestein_mod.F90 new file mode 100644 index 0000000..13aafc0 --- /dev/null +++ b/src/trans/cpu/algor/bluestein_mod.F90 @@ -0,0 +1,387 @@ +! (C) Copyright 2015- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE BLUESTEIN_MOD + +! Implementation of the Bluestein FFT algorithm as described in a paper titled +! "Bluestein's FFT for Arbitrary N on the Hypercube", Paul N. Swarztrauber et al., +! Parallel Computing, 17 (1991), pp. 607-617. +! +! George Mozdzynski and Nils Wedi, June 2015 +! +! The naming convention follows the algorithm description in the above paper. +! +USE PARKIND1, ONLY : JPIM, JPRB + +IMPLICIT NONE + +PRIVATE +PUBLIC BLUESTEIN_FFT, BLUESTEIN_INIT, BLUESTEIN_TERM, FFTB_TYPE + +TYPE FFTB_PLAN + INTEGER(KIND=JPIM) :: NSIZE ! latitude length security check + REAL(KIND=JPRB),ALLOCATABLE :: HS(:,:,:) + REAL(KIND=JPRB),ALLOCATABLE :: H2xT(:,:,:) +END TYPE FFTB_PLAN + +TYPE FFTB_TYPE + INTEGER(KIND=JPIM) :: NDLON ! maximum number of points on a latitude + REAL(KIND=JPRB) ,ALLOCATABLE :: TRIGS(:,:) ! list of trigonometric function values (PO2) + INTEGER(KIND=JPIM),ALLOCATABLE :: NFAX(:,:) ! list of factors of truncation (PO2) + INTEGER(KIND=JPIM) :: NLAT_COUNT ! number of lats requiring bluestein FFT + INTEGER(KIND=JPIM),ALLOCATABLE :: NLATS(:) ! the latitude lengths of these latitudes + TYPE(FFTB_PLAN),ALLOCATABLE :: FFTB(:) +END TYPE FFTB_TYPE + +CONTAINS +!----------------------------------------------------------------------------- +SUBROUTINE BLUESTEIN_FFT(TB,N,KSIGN,KLOT,PDAT) +! N : FFT LENGTH +! KSIGN : FFT DIRECTION +! -1 DIRECT (R2C) +! 1 INVERSE (C2R) +IMPLICIT NONE +TYPE(FFTB_TYPE),INTENT(INOUT) :: TB +INTEGER,INTENT(IN) :: N,KSIGN,KLOT +REAL(KIND=JPRB),INTENT(INOUT) :: PDAT (:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZDATAR(:,:), ZDATAI(:,:),ZY(:,:) +REAL(KIND=JPRB) :: ZR(KLOT),ZI(KLOT),ZX0(KLOT) +REAL(KIND=JPRB) :: ZWR,ZWI +INTEGER(KIND=JPIM) :: I,K,M,JLOT,NN,II,IR,IPO2 +INTEGER(KIND=JPIM) :: IJUMP,ILOT,IINC,ISIGN,IFFTSIGN + +!WRITE(*,'("BLUESTEIN_FFT: N=",I6," KSIGN=",I2," KLOT=",I4)')& +! & N,KSIGN,KLOT + +IF( KSIGN/=-1 .AND. KSIGN/=1 )THEN + CALL ABOR1('BLUESTEIN_FFT: INVALID KSIGN') +ENDIF + +NN=N/2+1 + +IF( TB%FFTB(N)%NSIZE /= N )THEN + WRITE(0,'("BLUESTEIN_FFT: UNEXPECTED PLAN LATITUDE LENGTH, N=",I6," TB%FFTB(N)%NSIZE=",I6)')& + & N,TB%FFTB(N)%NSIZE + CALL ABOR1('BLUESTEIN_FFT: UNEXPECTED PLAN LATITUDE LENGTH') +ENDIF + +IF( KSIGN==-1 )THEN + ISIGN=1 +ELSE + ISIGN=2 +ENDIF + +! input data preparation + +ALLOCATE(ZDATAR(KLOT,0:2*NN-1)) +ALLOCATE(ZDATAI(KLOT,0:2*NN-1)) +ZDATAR(:,:)=0.0D0 +ZDATAI(:,:)=0.0D0 +IF( KSIGN==-1 )THEN + DO K=0,N-1 + DO JLOT=1,KLOT + ZDATAR(JLOT,K)=PDAT(JLOT,K+1) + ENDDO + ENDDO +ELSEIF( KSIGN==1 )THEN + DO JLOT=1,KLOT + DO K=0,NN-1 + ZDATAR(JLOT,K)=PDAT(JLOT,K*2+1) + ZDATAR(JLOT,N-K)=PDAT(JLOT,K*2+1) + ZDATAI(JLOT,K)=PDAT(JLOT,K*2+2) + ZDATAI(JLOT,N-K) = -PDAT(JLOT,K*2+2) + ENDDO + ZDATAI(JLOT,0)=0._JPRB + ENDDO + +ENDIF + +! +! Compute M as the smallest power of two that is greater than or equal to 2N-2 +! and compute the M vector H2 from equations (2.16) +! + +M=1 +IPO2=0 +DO WHILE( M<=2*N-2) + M=M*2 + IPO2=IPO2+1 +ENDDO + +ALLOCATE(ZY(2*KLOT,0:(M/2+1)*2)) + +! create Y by mult with bluestein n**2 + +ZX0(1:KLOT) = ZDATAR(1:KLOT,0) +DO I=0,N-1 + + ZR=ZDATAR(1:KLOT,I) + ZI=ZDATAI(1:KLOT,I) + + ZWR=TB%FFTB(N)%HS(1,I,ISIGN) + ZWI=TB%FFTB(N)%HS(2,I,ISIGN) + + DO K=1,KLOT + ZY((K-1)*2+1,I) = ZR(K)*ZWR + ZI(K)*ZWI + ZY((K-1)*2+2,I) = ZI(K)*ZWR - ZR(K)*ZWI + ENDDO + +ENDDO + +! zero padding of Y + +DO I=N,(M/2+1)*2 + ZY(:,I) = 0._JPRB +ENDDO + +! FFT of Y + +ILOT=2*KLOT +IINC=ILOT +IJUMP=1 +IFFTSIGN=-1 ! R->C + +CALL FFT992(ZY,TB%TRIGS(1,IPO2),TB%NFAX(1,IPO2),IINC,IJUMP,M,ILOT,IFFTSIGN) +CALL FFT992_CC(ZY, IINC, IJUMP, M, ILOT, IFFTSIGN) + +! convert real FFT output, pointwise multiplication with h_hat(n-k) and real/imag +! swap in preparation for inverse FFT + +DO I=0,M-1 + DO K=1,KLOT + ZR(K)=ZY((K-1)*2+1,I) + ZI(K)=ZY((K-1)*2+2,I) + ENDDO + + ZWR=TB%FFTB(N)%H2xT(1,I,ISIGN) + ZWI=TB%FFTB(N)%H2xT(2,I,ISIGN) + +! swap + DO K=1,KLOT + ZY((K-1)*2+1,I) = ZI(K)*ZWR + ZR(K)*ZWI + ZY((K-1)*2+2,I) = ZR(K)*ZWR - ZI(K)*ZWI + ENDDO +ENDDO + +! IFFT as a FFT with swapped input and swapped output + +CALL FFT992(ZY,TB%TRIGS(1,IPO2),TB%NFAX(1,IPO2),IINC,IJUMP,M,ILOT,IFFTSIGN) +CALL FFT992_CC (ZY, IINC, IJUMP, M, ILOT, IFFTSIGN) + +! create final by mult with another bluestein n**2 and swap output of prev FFT +! postprocessing + +IF( KSIGN==-1) THEN + + DO I=0,N/2 + DO K=1,KLOT + ZI(K)=ZY((K-1)*2+1,I) + ZR(K)=ZY((K-1)*2+2,I) + ENDDO + + ZWR=TB%FFTB(N)%HS(1,I,ISIGN) + ZWI=TB%FFTB(N)%HS(2,I,ISIGN) + IR=I*2+1 + II=I*2+2 + DO K=1,KLOT + PDAT(K,IR) = ZR(K)*ZWR + ZI(K)*ZWI + PDAT(K,II) = ZI(K)*ZWR - ZR(K)*ZWI + ENDDO + ENDDO + +ELSE + + DO I=0,N-1 + + DO K=1,KLOT + ZI(K)=ZY((K-1)*2+1,I) + ZR(K)=ZY((K-1)*2+2,I) + ENDDO + + ZWR=TB%FFTB(N)%HS(1,I,ISIGN) + ZWI=TB%FFTB(N)%HS(2,I,ISIGN) + + DO K=1,KLOT + PDAT(K,I+1) = ZR(K)*ZWR + ZI(K)*ZWI + ENDDO + ENDDO + DO K=1,KLOT + PDAT(K,N) =PDAT(K,N) + ZX0(K) + ENDDO + +ENDIF + +DEALLOCATE(ZY) +DEALLOCATE(ZDATAR) +DEALLOCATE(ZDATAI) + +RETURN +END SUBROUTINE BLUESTEIN_FFT + + +!============================================================================= +SUBROUTINE BLUESTEIN_INIT(TB) +! +! Initialize data structures required by Bluestein FFT +! +! +TYPE(FFTB_TYPE),INTENT(INOUT) :: TB +INTEGER(KIND=JPIM) :: N,M,IPO2,JLAT,J,K,ISIGN,KSIGN +INTEGER(KIND=JPIM) :: ICURR,IPREV +INTEGER(KIND=JPIM) :: IJUMP,ILOT,IINC,IFFTSIGN + +LOGICAL :: LLUSEFFT992 +REAL(KIND=JPRB) :: DEL,ANGLE,ZSIGN + +! determine number of PO2 FFT sizes needed by Bluestein FFTs +M=1 +IPO2=0 +DO WHILE( M<=2*TB%NDLON-2) + M=M*2 + IPO2=IPO2+1 +ENDDO + +!WRITE(*,'("BLUESTEIN_INIT: 2*KLON-2=",I5," M=",I5," IPO2=",I2)')2*TB%NDLON-2,M,IPO2 + +! now go and generate the trigs for the above number of PO2 FFT sizes +ALLOCATE(TB%TRIGS(M,IPO2)) +ALLOCATE(TB%NFAX(19,IPO2)) +TB%TRIGS(:,:)=0.0D0 +TB%NFAX (:,:)=0.0D0 + +M=1 +IPO2=0 +DO WHILE( M<=2*TB%NDLON-2) + M=M*2 + IPO2=IPO2+1 + CALL SET99B(TB%TRIGS(1,IPO2),TB%NFAX(1,IPO2),M,LLUSEFFT992) + IF( .NOT.LLUSEFFT992 )THEN + CALL ABOR1("BLUESTEIN_INIT: UNEXPECTED LLUSEFFT992=F") + ENDIF +ENDDO + +ALLOCATE(TB%FFTB(TB%NDLON)) +DO J=1,TB%NDLON + TB%FFTB(J)%NSIZE=-1 +ENDDO + +DO JLAT=1,TB%NLAT_COUNT + + N=TB%NLATS(JLAT) + + IF( TB%FFTB(N)%NSIZE==N )THEN + ! we have already initialised this latitude length + ! WRITE(0,'("BLUESTEIN_INIT: WARNING - LATITUDE LENGTH ",I6," ALREADY INITIALIZED")')N + CYCLE + ENDIF + + IF( N > TB%NDLON )THEN + CALL ABOR1("BLUESTEIN_INIT: N > TB%NDLON UNEXPECTED") + ENDIF + + ! now set the specific PO2 (i.e. M and IPO2) for the N length of + ! this latitude being initialized + M=1 + IPO2=0 + DO WHILE( M<=2*N-2) + M=M*2 + IPO2=IPO2+1 + ENDDO + + TB%FFTB(N)%NSIZE=N + + + DEL=2.0D0*ASIN(1.0D0)/REAL(N,JPRB) + + ALLOCATE(TB%FFTB(N)%HS(2,0:N-1,2)) + ALLOCATE(TB%FFTB(N)%H2xT(2,0:(M/2+1)*2,2)) + + DO ISIGN=1,2 + + IF( ISIGN==1 )THEN + KSIGN=-1 + ELSE + KSIGN= 1 + ENDIF + + ZSIGN=-KSIGN + + ! conjugate bluestein sequence + + DO K=0,N-1 + ANGLE=REAL(K*K,JPRB)*DEL + TB%FFTB(N)%HS(1,K,ISIGN)=COS(ANGLE) + TB%FFTB(N)%HS(2,K,ISIGN)=ZSIGN*SIN(ANGLE) + ENDDO + + DO K=0,(M/2+1)*2 + TB%FFTB(N)%H2xT(1,K,ISIGN) = 0._JPRB + TB%FFTB(N)%H2xT(2,K,ISIGN) = 0._JPRB + ENDDO + TB%FFTB(N)%H2xT(1,0,ISIGN) = TB%FFTB(N)%HS(1,0,ISIGN) + TB%FFTB(N)%H2xT(2,0,ISIGN) = TB%FFTB(N)%HS(2,0,ISIGN) + + DO K=1,N-1 + TB%FFTB(N)%H2xT(1,K,ISIGN) = TB%FFTB(N)%HS(1,K,ISIGN) + TB%FFTB(N)%H2xT(1,M-K,ISIGN) = TB%FFTB(N)%HS(1,K,ISIGN) + TB%FFTB(N)%H2xT(2,K,ISIGN) = TB%FFTB(N)%HS(2,K,ISIGN) + TB%FFTB(N)%H2xT(2,M-K,ISIGN) = TB%FFTB(N)%HS(2,K,ISIGN) + ENDDO + IF( M > 2*N-2 ) THEN + DO K=N,M-N+1 + TB%FFTB(N)%H2xT(1,K,ISIGN) = 0._JPRB + TB%FFTB(N)%H2xT(2,K,ISIGN) = 0._JPRB + ENDDO + ENDIF + + + ! + ! Compute an unnormalized discrete Fourier transform of H2 -> H_hat + ! + ILOT=2 + IINC=ILOT + IJUMP=1 + IFFTSIGN=1 ! C->R + CALL FFT992_CC(TB%FFTB(N)%H2xT(:,:,ISIGN),IINC,IJUMP,M,ILOT,IFFTSIGN) + CALL FFT992(TB%FFTB(N)%H2xT(:,:,ISIGN),TB%TRIGS(1,IPO2),TB%NFAX(1,IPO2),IINC,IJUMP,M,ILOT,IFFTSIGN) + + ENDDO ! ISIGN + +ENDDO ! JLAT + +RETURN +END SUBROUTINE BLUESTEIN_INIT + + +!============================================================================= +SUBROUTINE BLUESTEIN_TERM(TB) +! +! Remove data structures used by Bluestein FFT +! +! +TYPE(FFTB_TYPE),INTENT(INOUT) :: TB +INTEGER(KIND=JPIM) :: N,JLAT + +IF( ALLOCATED(TB%TRIGS) ) DEALLOCATE(TB%TRIGS) +IF( ALLOCATED(TB%NFAX) ) DEALLOCATE(TB%NFAX) +DO JLAT=1,TB%NLAT_COUNT + N=TB%NLATS(JLAT) + IF( ALLOCATED(TB%FFTB(N)%HS) ) DEALLOCATE(TB%FFTB(N)%HS) + IF( ALLOCATED(TB%FFTB(N)%H2xT) ) DEALLOCATE(TB%FFTB(N)%H2xT) +ENDDO +IF( ALLOCATED(TB%NLATS) ) DEALLOCATE(TB%NLATS) +IF( ALLOCATED(TB%FFTB) ) DEALLOCATE(TB%FFTB) + +RETURN +END SUBROUTINE BLUESTEIN_TERM + + +!============================================================================= + +END MODULE BLUESTEIN_MOD diff --git a/src/trans/cpu/algor/butterfly_alg_mod.F90 b/src/trans/cpu/algor/butterfly_alg_mod.F90 new file mode 100644 index 0000000..1cb785d --- /dev/null +++ b/src/trans/cpu/algor/butterfly_alg_mod.F90 @@ -0,0 +1,1222 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE BUTTERFLY_ALG_MOD +USE PARKIND1, ONLY : JPRD, JPIM, JPRB, JPIB +USE INTERPOL_DECOMP_MOD +USE SHAREDMEM_MOD + +use, intrinsic :: ieee_exceptions + +IMPLICIT NONE + +PRIVATE +PUBLIC NODE_TYPE,LEV_STRUCT,BUTTERFLY_STRUCT,CONSTRUCT_BUTTERFLY,MULT_BUTV,MULT_BUTM,CLONE,& + & PACK_BUTTERFLY_STRUCT,UNPACK_BUTTERFLY_STRUCT + +! Butterfly package. + +! Butterfly algorithm for matrix multiplication +! Coded from: "An algorithm for the rapid evaluation of special function transform" by +! Michael O'Neill, Franco Woolfe and Vladimir Rohklin, Appl.Comput.Harmon.Anal. 2009? +! referred to in the following as ONWR + +TYPE NODE_TYPE +INTEGER(KIND=JPIM) :: ILEV =0 ! Level of this node +INTEGER(KIND=JPIM) :: IFCOL =0 ! First column +INTEGER(KIND=JPIM) :: ILCOL =0 ! Last column +INTEGER(KIND=JPIM) :: IFROW =0 ! first row +INTEGER(KIND=JPIM) :: ILROW =0 ! Last row +INTEGER(KIND=JPIM) :: ICOLS =0 ! Number of columns +INTEGER(KIND=JPIM) :: IROWS =0 ! Number of rows +INTEGER(KIND=JPIM) :: IRANK =0 ! Rank of interpolative decomposition +INTEGER(KIND=JPIM) :: IOFFBETA=0 ! Offset in "beta" work space +INTEGER(KIND=JPIM),POINTER :: ICLIST(:) => NULL() ! List of columns in B (column skeleton matrix) +REAL(KIND=JPRB),POINTER :: PNONIM(:) => NULL() ! Non-identety part of interpolation matrix +REAL(KIND=JPRB),POINTER :: B(:,:) => NULL() ! Column skeleton matrix +REAL(KIND=JPRD),POINTER :: DB(:,:) => NULL() ! Column skeleton matrix, as part of pre-computations only +END TYPE NODE_TYPE + +TYPE LEV_STRUCT +INTEGER(KIND=JPIM) :: IJ =0 ! Number of row boxes at this level +INTEGER(KIND=JPIM) :: IK =0 ! Number of column boxes at this level +INTEGER(KIND=JPIM) :: IBETALEN=0 ! Workspace needed at this level of interim results "beta" +TYPE(NODE_TYPE),POINTER :: NODE(:,:) => NULL() ! Box info +END TYPE LEV_STRUCT + +TYPE BUTTERFLY_STRUCT +INTEGER(KIND=JPIM) :: M_ORDER =0 ! M of original matrix +INTEGER(KIND=JPIM) :: N_ORDER =0 ! N of original matrix +INTEGER(KIND=JPIM) :: N_CMAX =0 ! Max number of columns in each submatrix at level 0 +INTEGER(KIND=JPIM) :: N_LEVELS =0 ! Max level in dyadic hierarchy +INTEGER(KIND=JPIM) :: IBETALEN_MAX=0 ! Max workspace for one level of interim results "beta" +TYPE(LEV_STRUCT),POINTER :: SLEV(:) => NULL() ! Level structure (dimensioned 0:n_levels) +END TYPE BUTTERFLY_STRUCT + +TYPE CLONE +REAL(KIND=JPRB) , ALLOCATABLE :: COMMSBUF(:) ! for communicating packed bufferfly_structs +END TYPE CLONE ! between MPI tasks + +#ifdef WITH_IEEE_HALT +LOGICAL, PARAMETER :: LL_IEEE_HALT = .TRUE. +#else +LOGICAL, PARAMETER :: LL_IEEE_HALT = .FALSE. +#endif + +LOGICAL, PARAMETER :: LLDOUBLE = (JPRB == JPRD) + +CONTAINS +!================================================================================ +SUBROUTINE CONSTRUCT_BUTTERFLY(PEPS,KCMAX,KM,KN,PMAT,YD_STRUCT) +IMPLICIT NONE + +! Constuct butterfly + +REAL(KIND=JPRD),INTENT(IN) :: PEPS ! Precision +INTEGER(KIND=JPIM),INTENT(IN) :: KCMAX ! Max number of columns in each submatrix at level 0 +INTEGER(KIND=JPIM),INTENT(IN) :: KM ! Number of rows in matrix pmat +INTEGER(KIND=JPIM),INTENT(IN) :: KN ! Number of columns in matrix pmat +REAL(KIND=JPRD),INTENT(IN) :: PMAT(:,:) ! original matrix +TYPE(BUTTERFLY_STRUCT),INTENT(INOUT) :: YD_STRUCT ! Structure needed to apply butterfly + +REAL(KIND=JPRD),ALLOCATABLE :: ZSUB(:,:),ZBCOMB(:,:) +INTEGER(KIND=JPIM) :: ILEVELS,JL,JJ,JK,IM,II,JR,IJ,IK +INTEGER(KIND=JPIM) :: IROWS,ICOLS,IRANK,ICLIST(KN) +INTEGER(KIND=JPIM) :: ILM1,IJL,IKL,IJR,IKR,IRANKL,IRANKR,IOFFROW,IBLEV,IBLEVM1 +INTEGER(KIND=JPIM) :: IFR,ILR,IFC,IROFF,IRSTRIDE,IOFFBETA +INTEGER(KIND=JPIM) :: ILEN,I,J,J1,J2,JIJ,JIK +REAL(KIND=JPRD) :: ZNORMS(KN) +TYPE(NODE_TYPE),POINTER :: YNODEL,YNODER,YNODE +TYPE(NODE_TYPE),POINTER :: YBNODEL,YBNODER,YBNODE +TYPE(LEV_STRUCT) :: YTEMPB(0:1) + +!-------------------------------------------------------------------------------- + + +! ONWR 5.4.1 +YD_STRUCT%M_ORDER = KM +YD_STRUCT%N_ORDER = KN +YD_STRUCT%N_CMAX = KCMAX + +!Find number of levels +ILEVELS = 0 +DO + IF(2**ILEVELS >= (YD_STRUCT%N_ORDER+YD_STRUCT%N_CMAX-1) /YD_STRUCT%N_CMAX ) EXIT + ILEVELS = ILEVELS+1 +ENDDO +YD_STRUCT%N_LEVELS = ILEVELS +ALLOCATE(YD_STRUCT%SLEV(0:YD_STRUCT%N_LEVELS)) + +! Number of boxes at each level +IJ = 1 +IK = (KN-1)/KCMAX+1 +DO JL=0,YD_STRUCT%N_LEVELS + YD_STRUCT%SLEV(JL)%IJ = IJ + YD_STRUCT%SLEV(JL)%IK = IK + IJ = IJ*2 + IK = MAX((IK+1)/2,1) +ENDDO + +DO JL=0,YD_STRUCT%N_LEVELS + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(YD_STRUCT%SLEV(JL)%IJ,YD_STRUCT%SLEV(JL)%IK)) + CALL GSTATS(1253,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JJ,JK,YNODE,ILM1,IJL,IKL,IJR,IKR,IRSTRIDE) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + YNODE%ILEV = JL + IF(JL == 0) THEN + YNODE%IFCOL = 1+(JK-1)*KCMAX + YNODE%ILCOL = MIN(JK*KCMAX,KN) + YNODE%ICOLS = YNODE%ILCOL - YNODE%IFCOL+1 + YNODE%IFROW = 1 + YNODE%ILROW = KM + ELSE + YNODE%IFCOL = -99 + YNODE%ILCOL = -99 + YNODE%ICOLS = -99 + ILM1 = JL-1 + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRSTRIDE = (YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IROWS+1)/2 + IF(MOD(JJ,2) == 1) THEN + YNODE%IFROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW + YNODE%ILROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW+IRSTRIDE -1 + ELSE + YNODE%IFROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW+IRSTRIDE + YNODE%ILROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%ILROW + ENDIF + ENDIF + YNODE%IROWS = YNODE%ILROW - YNODE%IFROW+1 + YNODE%IROWS = MAX(YNODE%IROWS,0) + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1253,1) +ENDDO + + +! ONWR 5.4.2 + +DO JL=0,YD_STRUCT%N_LEVELS + IBLEV = MOD(JL,2) + IF(JL > 0) THEN + IBLEVM1 = MOD(JL-1,2) + ELSE + IBLEVM1 = -1 + ENDIF + ALLOCATE(YTEMPB(IBLEV)%NODE(YD_STRUCT%SLEV(JL)%IJ,YD_STRUCT%SLEV(JL)%IK)) + CALL GSTATS(1253,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JJ,JK,YNODE,YBNODE,IROWS,ICOLS,& + !$OMP& ZSUB,ILM1,IJL,IKL,IJR,IKR,YNODEL,YBNODEL,IRANKL,YNODER,YBNODER,IRANKR,IOFFROW,ZBCOMB) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + YBNODE => YTEMPB(IBLEV)%NODE(JJ,JK) + IF(JL == 0) THEN + IROWS=YNODE%IROWS + ICOLS=YNODE%ICOLS + ALLOCATE(ZSUB(IROWS,ICOLS)) + CALL EXTRACT_SUB(YNODE,PMAT,ZSUB) + CALL COMPRESS_MAT(YNODE,YBNODE,PEPS,IROWS,ICOLS,ZSUB) + DEALLOCATE(ZSUB) + ELSE + ILM1 = JL-1 + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + YNODEL => YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL) + YBNODEL => YTEMPB(IBLEVM1)%NODE(IJL,IKL) + IRANKL = YNODEL%IRANK + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + YNODER => YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR) + YBNODER => YTEMPB(IBLEVM1)%NODE(IJR,IKR) + IRANKR = YNODER%IRANK + ELSE + YNODER => YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL) + IRANKR = 0 + ENDIF + IROWS = YNODE%IROWS + ICOLS = IRANKL+IRANKR + YNODE%ICOLS=ICOLS + IOFFROW = YNODE%IFROW-& + & YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW + ALLOCATE(ZBCOMB(IROWS,ICOLS)) + CALL COMBINE_B(YBNODEL%DB,IRANKL,& + & YBNODER%DB,IRANKR,& + & IROWS,IOFFROW,ZBCOMB) + CALL COMPRESS_MAT(YNODE,YBNODE,PEPS,IROWS,ICOLS,ZBCOMB) + DEALLOCATE(ZBCOMB) + ENDIF + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1253,1) + IF(IBLEVM1 >= 0) THEN +!Deallocate Bs no longer needed + DO JJ=1,YD_STRUCT%SLEV(JL-1)%IJ + DO JK=1,YD_STRUCT%SLEV(JL-1)%IK + DEALLOCATE(YTEMPB(IBLEVM1)%NODE(JJ,JK)%DB) + ENDDO + ENDDO + DEALLOCATE(YTEMPB(IBLEVM1)%NODE) + ENDIF +! Permanently store B for last level + IF(JL == YD_STRUCT%N_LEVELS) THEN + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + ALLOCATE(YNODE%DB(YNODE%IROWS,YNODE%IRANK)) + YNODE%DB(:,:) = YTEMPB(IBLEV)%NODE(JJ,JK)%DB(:,:) + DEALLOCATE(YTEMPB(IBLEV)%NODE(JJ,JK)%DB) + ENDDO + ENDDO + DEALLOCATE(YTEMPB(IBLEV)%NODE) + ENDIF +ENDDO + +CALL GSTATS(1901,0) +! Compute work space +YD_STRUCT%IBETALEN_MAX = 0 +DO JL=0,YD_STRUCT%N_LEVELS + IOFFBETA = 0 + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IF( ASSOCIATED(YNODE%DB) ) THEN + ALLOCATE(YNODE%B(SIZE(YNODE%DB(:,1)),SIZE(YNODE%DB(1,:)))) + YNODE%B(:,:) = YNODE%DB(:,:) + DEALLOCATE(YNODE%DB) + ENDIF + YNODE%IOFFBETA = IOFFBETA + IOFFBETA = IOFFBETA+YNODE%IRANK + ENDDO + ENDDO + YD_STRUCT%SLEV(JL)%IBETALEN = IOFFBETA + YD_STRUCT%IBETALEN_MAX = MAX(YD_STRUCT%IBETALEN_MAX,YD_STRUCT%SLEV(JL)%IBETALEN) +ENDDO + +CALL GSTATS(1901,1) + +RETURN + +END SUBROUTINE CONSTRUCT_BUTTERFLY +!============================================================================= +SUBROUTINE PACK_BUTTERFLY_STRUCT(YD_STRUCT,YD_CLONE) +IMPLICIT NONE +! Pack butterfly struct into array +TYPE(BUTTERFLY_STRUCT),INTENT(IN) :: YD_STRUCT ! Structure needed to apply butterfly +TYPE(CLONE), TARGET, INTENT(OUT) :: YD_CLONE ! for communicating packed bufferfly_structs + +INTEGER(KIND=JPIM) :: ILEN,I,JL,JIK,JIJ,J,J1,J2 +!-------------------------------------------------------------------------------- + +ILEN=0 +ILEN=ILEN+5 +DO JL=0,YD_STRUCT%N_LEVELS + ILEN=ILEN+3 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE) )THEN + DO JIK=1,YD_STRUCT%SLEV(JL)%IK + DO JIJ=1,YD_STRUCT%SLEV(JL)%IJ + ILEN=ILEN+9 + ILEN=ILEN+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) )THEN + ILEN=ILEN+SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) + ENDIF + ILEN=ILEN+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) )THEN + ILEN=ILEN+SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) + ENDIF + ILEN=ILEN+2 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B) )THEN + ILEN=ILEN+SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B) + ENDIF + ENDDO + ENDDO + ENDIF +ENDDO +ALLOCATE(YD_CLONE%COMMSBUF(ILEN)) +I=0 +YD_CLONE%COMMSBUF(I+1)=YD_STRUCT%M_ORDER +YD_CLONE%COMMSBUF(I+2)=YD_STRUCT%N_ORDER +YD_CLONE%COMMSBUF(I+3)=YD_STRUCT%N_CMAX +YD_CLONE%COMMSBUF(I+4)=YD_STRUCT%N_LEVELS +YD_CLONE%COMMSBUF(I+5)=YD_STRUCT%IBETALEN_MAX +I=I+5 +DO JL=0,YD_STRUCT%N_LEVELS + YD_CLONE%COMMSBUF(I+1)=YD_STRUCT%SLEV(JL)%IJ + YD_CLONE%COMMSBUF(I+2)=YD_STRUCT%SLEV(JL)%IK + YD_CLONE%COMMSBUF(I+3)=YD_STRUCT%SLEV(JL)%IBETALEN + I=I+3 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE) )THEN + DO JIK=1,YD_STRUCT%SLEV(JL)%IK + DO JIJ=1,YD_STRUCT%SLEV(JL)%IJ + YD_CLONE%COMMSBUF(I+1)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILEV + YD_CLONE%COMMSBUF(I+2)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFCOL + YD_CLONE%COMMSBUF(I+3)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILCOL + YD_CLONE%COMMSBUF(I+4)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFROW + YD_CLONE%COMMSBUF(I+5)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILROW + YD_CLONE%COMMSBUF(I+6)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICOLS + YD_CLONE%COMMSBUF(I+7)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IROWS + YD_CLONE%COMMSBUF(I+8)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IRANK + YD_CLONE%COMMSBUF(I+9)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IOFFBETA + I=I+9 + YD_CLONE%COMMSBUF(I+1)=0 + I=I+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) )THEN + J=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) + YD_CLONE%COMMSBUF(I)=J + YD_CLONE%COMMSBUF(I+1:I+J)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST(:) + I=I+J + ENDIF + YD_CLONE%COMMSBUF(I+1)=0 + I=I+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) )THEN + J=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) + YD_CLONE%COMMSBUF(I)=J + YD_CLONE%COMMSBUF(I+1:I+J)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM(:) + I=I+J + ENDIF + YD_CLONE%COMMSBUF(I+1)=0 + YD_CLONE%COMMSBUF(I+2)=0 + I=I+2 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B) )THEN + J1=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B,DIM=1) + J2=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B,DIM=2) + YD_CLONE%COMMSBUF(I-1)=J1 + YD_CLONE%COMMSBUF(I )=J2 + DO J=1,J2 + YD_CLONE%COMMSBUF(I+1:I+J1)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B(:,J) + I=I+J1 + ENDDO + ENDIF + ENDDO + ENDDO + ENDIF +ENDDO +IF( I /= ILEN )THEN + CALL ABOR1('PACK_BUTTERFLY_STRUCT: PACKED LENGTH /= PRECOMPUTED LENGTH') +ENDIF + +END SUBROUTINE PACK_BUTTERFLY_STRUCT +!===================================================================================== +SUBROUTINE UNPACK_BUTTERFLY_STRUCT(YD_STRUCT,YD_CLONE,YDMEMBUF) +IMPLICIT NONE +! Construct butterfly struct from packed array +TYPE(BUTTERFLY_STRUCT),INTENT(OUT) :: YD_STRUCT ! Structure needed to apply butterfly +TYPE(CLONE), TARGET, OPTIONAL,INTENT(IN) :: YD_CLONE ! for communicating packed bufferfly_structs +TYPE(SHAREDMEM),OPTIONAL,INTENT(INOUT) :: YDMEMBUF ! Memory buffer +INTEGER(KIND=JPIM) :: ILEN,I,JL,JIK,JIJ,J,J1,J2,II +REAL(KIND=JPRB),POINTER :: ZBUF(:) +LOGICAL :: LLMEMBUF +!-------------------------------------------------------------------------------- +IF(PRESENT(YDMEMBUF)) THEN + LLMEMBUF = .TRUE. +ELSE + IF(.NOT.PRESENT(YD_CLONE)) CALL ABOR1('UNPACK_BUTTERFLY_STRUCT: YD_CLONE ARGUMENT MISSING') + LLMEMBUF = .FALSE. +ENDIF +I=0 +IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,5,ZBUF,ADVANCE=.TRUE.) +ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+5) +ENDIF +YD_STRUCT%M_ORDER = NINT(ZBUF(1),JPRB) +YD_STRUCT%N_ORDER = NINT(ZBUF(2),JPRB) +YD_STRUCT%N_CMAX = NINT(ZBUF(3),JPRB) +YD_STRUCT%N_LEVELS = NINT(ZBUF(4),JPRB) +YD_STRUCT%IBETALEN_MAX = NINT(ZBUF(5),JPRB) +I=I+5 + +ALLOCATE(YD_STRUCT%SLEV(0:YD_STRUCT%N_LEVELS)) +DO JL=0,YD_STRUCT%N_LEVELS + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,3,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+3) + ENDIF + YD_STRUCT%SLEV(JL)%IJ =NINT(ZBUF(1),JPRB) + YD_STRUCT%SLEV(JL)%IK =NINT(ZBUF(2),JPRB) + YD_STRUCT%SLEV(JL)%IBETALEN=NINT(ZBUF(3),JPRB) + I=I+3 + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(YD_STRUCT%SLEV(JL)%IJ,YD_STRUCT%SLEV(JL)%IK)) + DO JIK=1,YD_STRUCT%SLEV(JL)%IK + DO JIJ=1,YD_STRUCT%SLEV(JL)%IJ + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,10,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+10) + ENDIF + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILEV = NINT(ZBUF(1),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFCOL = NINT(ZBUF(2),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILCOL = NINT(ZBUF(3),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFROW = NINT(ZBUF(4),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILROW = NINT(ZBUF(5),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICOLS = NINT(ZBUF(6),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IROWS = NINT(ZBUF(7),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IRANK = NINT(ZBUF(8),JPRB) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IOFFBETA= NINT(ZBUF(9),JPRB) + J = NINT(ZBUF(10)) + I=I+10 + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST(J)) + IF( J > 0 )THEN + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,J,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+J) + ENDIF + DO II=1,J + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST(II)=NINT(ZBUF(II),JPRB) + END DO + I=I+J + ENDIF + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,1,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+1) + ENDIF + J=NINT(ZBUF(1),JPRB) + I=I+1 + IF( J > 0 )THEN + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,J,YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM,ADVANCE=.TRUE.) + ELSE + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM(J)) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM(:)=YD_CLONE%COMMSBUF(I+1:I+J) + ENDIF + I=I+J + ENDIF + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,2,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+2) + ENDIF + J1=NINT(ZBUF(1),JPRB) + J2=NINT(ZBUF(2),JPRB) + I=I+2 + IF( J1 > 0 .AND. J2 > 0 )THEN + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,J1,J2,YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B,ADVANCE=.TRUE.) + I=I+J1*J2 + ELSE + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B(J1,J2)) + DO J=1,J2 + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B(:,J)=YD_CLONE%COMMSBUF(I+1:I+J1) + I=I+J1 + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO +ENDDO +IF(.NOT.LLMEMBUF) THEN + IF( I /= SIZE(YD_CLONE%COMMSBUF) )THEN + CALL ABOR1('UNPACK_BUTTERFLY_STRUCT: UNPACKED LENGTH /= ALLOCATED LENGTH') + ENDIF +ENDIF +END SUBROUTINE UNPACK_BUTTERFLY_STRUCT +!=========================================================================== +SUBROUTINE EXTRACT_SUB(YDNODE,PMAT,PSUB) +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(IN) :: YDNODE +REAL(KIND=JPRD),INTENT(IN) :: PMAT(:,:) +REAL(KIND=JPRD),INTENT(OUT) :: PSUB(:,:) + +INTEGER(KIND=JPIM) :: ICOL,IROW,JCOL,JROW +!-------------------------------------------------------------------- + +ICOL = 0 +DO JCOL=YDNODE%IFCOL,YDNODE%ILCOL + ICOL = ICOL+1 + IROW = 0 + DO JROW=YDNODE%IFROW,YDNODE%ILROW + IROW = IROW+1 + PSUB(IROW,ICOL) = PMAT(JROW,JCOL) + ENDDO +ENDDO + +END SUBROUTINE EXTRACT_SUB +!=================================================================== +SUBROUTINE COMBINE_B(PBL,KRANKL,PBR,KRANKR,KROWS,KOFFROW,PBCOMB) +IMPLICIT NONE +REAL(KIND=JPRD),INTENT(IN) :: PBL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KRANKL +REAL(KIND=JPRD),INTENT(IN) :: PBR(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KRANKR +INTEGER(KIND=JPIM),INTENT(IN) :: KROWS +INTEGER(KIND=JPIM),INTENT(IN) :: KOFFROW +REAL(KIND=JPRD),INTENT(OUT) :: PBCOMB(:,:) + +INTEGER(KIND=JPIM) :: JCOL,JM +!-------------------------------------------------------------------- +DO JCOL=1,KRANKL + DO JM=1,KROWS + PBCOMB(JM,JCOL) = PBL(KOFFROW+JM,JCOL) + ENDDO +ENDDO +DO JCOL=1,KRANKR + DO JM=1,KROWS + PBCOMB(JM,KRANKL+JCOL) = PBR(KOFFROW+JM,JCOL) + ENDDO +ENDDO + +END SUBROUTINE COMBINE_B +!=================================================================== +SUBROUTINE COMPRESS_MAT(YDNODE,YDBNODE,PEPS,KROWS,KCOLS,PSUB) +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDBNODE +REAL(KIND=JPRD),INTENT(IN) :: PEPS +INTEGER(KIND=JPIM),INTENT(IN) :: KROWS,KCOLS +REAL(KIND=JPRD),INTENT(IN) :: PSUB(:,:) + +INTEGER(KIND=JPIM) :: JR,IRANK,ICLIST(KCOLS),JN,JM,II +REAL(KIND=JPRD) :: ZNORMS(KCOLS) +REAL(KIND=JPRD) :: ZSUB(KROWS,KCOLS),ZPNONIM(KROWS,KCOLS) +REAL(KIND=JPRD),ALLOCATABLE :: ZP(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZB(:,:) +!-------------------------------------------------------------------- + +II = 0 +DO JN=1,KCOLS + DO JM=1,KROWS + II = II+1 + ZSUB(JM,JN) = PSUB(JM,JN) + ENDDO +ENDDO + +CALL COMPUTE_ID(PEPS,KROWS,KCOLS,ZSUB,IRANK,ICLIST,ZPNONIM) +YDNODE%IRANK = IRANK +ALLOCATE(YDNODE%PNONIM(IRANK*(KCOLS-IRANK))) +ALLOCATE(YDNODE%ICLIST(KCOLS)) +ALLOCATE(YDBNODE%DB(KROWS,IRANK)) +YDNODE%ICLIST(:) = ICLIST(1:KCOLS) +II = 0 +DO JN=1,KCOLS-IRANK + DO JM=1,IRANK + II = II+1 + YDNODE%PNONIM(II) = REAL(ZPNONIM(JM,JN), JPRB) + ENDDO +ENDDO +DO JR=1,IRANK + YDBNODE%DB(:,JR) = PSUB(:,ICLIST(JR)) +ENDDO + +END SUBROUTINE COMPRESS_MAT +!==================================================================== +SUBROUTINE MULT_BUTV(CDTRANS,YD_STRUCT,PVECIN,PVECOUT) +IMPLICIT NONE +! Multiply vector by matrix represented by buttervfly + +TYPE(BUTTERFLY_STRUCT),INTENT(IN) :: YD_STRUCT ! Structure from constucT-butterfly +CHARACTER(LEN=1),INTENT(IN) :: CDTRANS ! 'N' normal matmul, 'T' with transpose of matrix +REAL(KIND=JPRB),INTENT(IN) :: PVECIN(:) ! Input vector +REAL(KIND=JPRB),INTENT(OUT) :: PVECOUT(:) ! Output vector + +REAL(KIND=JPRB),ALLOCATABLE :: ZBETA(:,:) +INTEGER(KIND=JPIM) :: JL,JJ,JK,ILEVS,IFR,ILR,IROWS +INTEGER(KIND=JPIM) :: ILM1,IJL,IKL,IJR,IKR,IRANKL,IRANKR +INTEGER(KIND=JPIM) :: IBETALV,IBTST,IBTEN,IBETALVM1,IBTSTL,IBTENL,IBTSTR,IBTENR +REAL(KIND=JPRB) :: ZVECOUT(SIZE(PVECOUT)) +LOGICAL :: LLTRANSPOSE +TYPE(NODE_TYPE),POINTER :: YNODEL,YNODER,YNODE +!---------------------------------------------------------------------------------- +LLTRANSPOSE = (CDTRANS == 'T' .OR. CDTRANS == 't') + +ILEVS = YD_STRUCT%N_LEVELS +ALLOCATE(ZBETA(YD_STRUCT%IBETALEN_MAX,0:1)) ! Work space for "beta" + +! ONWR 5.4.3 +IF(LLTRANSPOSE) THEN + DO JL=ILEVS,0,-1 + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + CALL MULT_P_TR(YNODE,ZBETA(IBTST:IBTEN,IBETALV),PVECOUT(IFR:ILR)) + ELSE + IF(JL == ILEVS) THEN + IFR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IFROW + ILR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%ILROW + IROWS=YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IROWS + IF (LLDOUBLE) THEN + CALL DGEMV('T',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRD,YNODE%B,IROWS,PVECIN(IFR:ILR),1,& + & 0.0_JPRD,ZBETA(IBTST:IBTEN,IBETALV),1) + ELSE + CALL SGEMV('T',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRB,YNODE%B,IROWS,PVECIN(IFR:ILR),1,& + & 0.0_JPRB,ZBETA(IBTST:IBTEN,IBETALV),1) + ENDIF + ENDIF + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + ELSE + IRANKR = 0 + ENDIF + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + CALL MULT_P_TR(YNODE,ZBETA(IBTST:IBTEN,IBETALV),ZVECOUT(1:IRANKL+IRANKR)) + IF(MOD(JJ,2) == 1) THEN + ZBETA(IBTSTL:IBTENL,IBETALVM1)= ZVECOUT(1:IRANKL) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,IBETALVM1)=ZVECOUT(IRANKL+1:IRANKL+IRANKR) + ENDIF + ELSE + ZBETA(IBTSTL:IBTENL,IBETALVM1)=ZBETA(IBTSTL:IBTENL,IBETALVM1)+ & + & ZVECOUT(1:IRANKL) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,IBETALVM1)=ZBETA(IBTSTR:IBTENR,IBETALVM1) + & + & ZVECOUT(IRANKL+1:IRANKL+IRANKR) + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +ELSE + DO JL=0,ILEVS + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN ! ONWR (115) + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + CALL MULT_P(YNODE,PVECIN(IFR:ILR),ZBETA(IBTST:IBTEN,IBETALV) ) + ELSE ! ONWR (116) + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + ELSE + IRANKR = 0 + ENDIF + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + CALL MULT_P(YNODE,ZBETA(IBTSTL:IBTENR,IBETALVM1),ZBETA(IBTST:IBTEN,IBETALV)) + ENDIF + IF(JL == ILEVS) THEN ! ONWR (117) + IFR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IFROW + ILR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%ILROW + IROWS = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IROWS + IF (LLDOUBLE) THEN + CALL DGEMV('N',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRD,YNODE%B,IROWS,ZBETA(IBTST:IBTEN,IBETALV),1,& + & 0.0_JPRD,PVECOUT(IFR:ILR),1) + ELSE + CALL SGEMV('N',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRB,YNODE%B,IROWS,ZBETA(IBTST:IBTEN,IBETALV),1,& + & 0.0_JPRB,PVECOUT(IFR:ILR),1) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +ENDIF +END SUBROUTINE MULT_BUTV +!==================================================================== +SUBROUTINE MULT_BUTM(CDTRANS,YD_STRUCT,KF,PVECIN,PVECOUT,KWV) +IMPLICIT NONE + +! Multiply matrix by matrix represented by butterfly + +CHARACTER(LEN=1),INTENT(IN) :: CDTRANS ! 'N' normal matmul, 'T' with transpose of matrix +TYPE(BUTTERFLY_STRUCT),INTENT(IN) :: YD_STRUCT ! Structure from constucT-butterfly +INTEGER(KIND=JPIM),INTENT(IN) :: KF ! Number of fields +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KWV ! zonal wave number m (special_case) +REAL(KIND=JPRB),INTENT(IN) :: PVECIN(:,:) ! Input vector +REAL(KIND=JPRB),INTENT(OUT) :: PVECOUT(:,:) ! Output vector + +INTEGER(KIND=JPIM) :: JL,JJ,JK,ILEVS,IFR,ILR,IROWS,JF +INTEGER(KIND=JPIM) :: ILM1,IJL,IKL,IJR,IKR,IRANKL,IRANKR,IROUT,IRIN +INTEGER(KIND=JPIM) :: IRANK,IM,IN,JN,JM,IDX,IKWV,II +INTEGER(KIND=JPIM) :: IBETALV,IBTST,IBTEN,IBETALVM1,IBTSTL,IBTENL,IBTSTR,IBTENR,ILBETA +REAL(KIND=JPRB) :: ZVECIN(YD_STRUCT%N_ORDER,KF),ZVECOUT(YD_STRUCT%N_ORDER,KF) +REAL(KIND=JPRB),ALLOCATABLE :: ZBETA(:,:,:) +LOGICAL :: LLTRANSPOSE +LOGICAL :: LL_HALT_INVALID + +! IKWV==0 only, LLTRANSPOSE = true only +REAL(KIND=JPRD),ALLOCATABLE :: ZPNONIM_D(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZBETA_D(:,:), ZB_D(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZOUT_D(:,:), ZIN_D(:,:) + +TYPE(NODE_TYPE),POINTER :: YNODEL,YNODER,YNODE + +IKWV=10 +IF( PRESENT(KWV) ) THEN + IKWV=KWV +ENDIF + +!---------------------------------------------------------------------------------- +LLTRANSPOSE = (CDTRANS == 'T' .OR. CDTRANS == 't') +IROUT=SIZE(PVECOUT(:,1)) +IRIN=SIZE(PVECIN(:,1)) + +ILEVS = YD_STRUCT%N_LEVELS +ILBETA = YD_STRUCT%IBETALEN_MAX +ALLOCATE(ZBETA(ILBETA,KF,0:1)) ! Work space for "beta" + +! ONWR 5.4.3 +IF(LLTRANSPOSE) THEN + IF( IKWV == 0 ) THEN + ALLOCATE(ZBETA_D(ILBETA,KF)) + ALLOCATE(ZOUT_D(YD_STRUCT%N_ORDER,KF)) + ALLOCATE(ZIN_D(IRIN,KF)) + ENDIF + + DO JL=ILEVS,0,-1 + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + IN = YNODE%ICOLS-YNODE%IRANK + IM = YNODE%IRANK + IF( IM <=0 ) CALL ABOR1('mult_butm: IM<=0 not allowed') + IF(IN>0) THEN + IF (LLDOUBLE.OR.(IKWV == 0)) THEN + IF(.not.LLDOUBLE) THEN + ALLOCATE(ZPNONIM_D(IM,IN)) + II=0 + DO JN=1,IN + DO JM=1,IM + II = II+1 + ZPNONIM_D(JM,JN) = REAL(YNODE%PNONIM(II),JPRD) + ENDDO + ENDDO + ZBETA_D(1:IM,1:KF)=REAL(ZBETA(IBTST:IBTST+IM-1,1:KF,IBETALV),JPRD) + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & ZPNONIM_D,IM,ZBETA_D,ILBETA,0.0_JPRD,& + & ZOUT_D,YD_STRUCT%N_ORDER) + ZVECOUT(YNODE%IRANK+1:YNODE%IRANK+IN,1:KF) = REAL(ZOUT_D(1:IN,1:KF),JPRB) + DEALLOCATE(ZPNONIM_D) + ELSE + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRD,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + ENDIF + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IN,KF,IM,1.0_JPRB,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRB,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + DO JF=1,KF + DO JN=1,YNODE%IRANK + IDX = YNODE%ICLIST(JN) + PVECOUT(IFR+IDX-1,JF) = ZBETA(IBTST+JN-1,JF,IBETALV) + ENDDO + DO JN=YNODE%IRANK+1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + PVECOUT(IFR+IDX-1,JF) = ZVECOUT(JN,JF) + ENDDO + ENDDO + ELSE + IF(JL == ILEVS) THEN + IFR = YNODE%IFROW + ILR = YNODE%ILROW + IROWS =YNODE%IROWS + IRANK = YNODE%IRANK + IF (LLDOUBLE.OR.(IKWV == 0)) THEN + IF(.not.LLDOUBLE) THEN + ALLOCATE(ZB_D(IROWS,IRANK)) + ZB_D(1:IROWS,1:IRANK) = REAL(YNODE%B(1:IROWS,1:IRANK),JPRD) + ZIN_D(1:ILR-IFR+1,1:KF) = REAL(PVECIN(IFR:ILR,1:KF),JPRD) + + CALL DGEMM('T','N',IRANK,KF,IROWS,1.0_JPRD,& + & ZB_D,IROWS,ZIN_D,IRIN,0.0_JPRD,& + & ZBETA_D,ILBETA) + + ZBETA(IBTST:IBTST+IRANK-1,1:KF,IBETALV)=REAL(ZBETA_D(1:IRANK,1:KF),JPRB) + DEALLOCATE(ZB_D) + + ELSE + CALL DGEMM('T','N',IRANK,KF,IROWS,1.0_JPRD,& + & YNODE%B,IROWS,PVECIN(IFR,1),IRIN,0.0_JPRD,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + END IF + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IRANK,KF,IROWS,1.0_JPRB,& + & YNODE%B,IROWS,PVECIN(IFR,1),IRIN,0.0_JPRB,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + ELSE + IRANKR = 0 + ENDIF + IN = YNODE%ICOLS-YNODE%IRANK + IM = YNODE%IRANK + IF( IM <=0 ) CALL ABOR1('mult_butm: IM<=0 not allowed') + IF(IN>0) THEN + IF (LLDOUBLE.OR.(IKWV == 0)) THEN + IF(.not.LLDOUBLE) THEN + ALLOCATE(ZPNONIM_D(IM,IN)) + II=0 + DO JN=1,IN + DO JM=1,IM + II = II+1 + ZPNONIM_D(JM,JN) = REAL(YNODE%PNONIM(II),JPRD) + ENDDO + ENDDO + ZBETA_D(1:IM,1:KF)=REAL(ZBETA(IBTST:IBTST+IM-1,1:KF,IBETALV),JPRD) + + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & ZPNONIM_D,IM,ZBETA_D,ILBETA,0.0_JPRD,& + & ZOUT_D,YD_STRUCT%N_ORDER) + + ZVECOUT(YNODE%IRANK+1:YNODE%IRANK+IN,1:KF) = REAL(ZOUT_D(1:IN,1:KF),JPRB) + DEALLOCATE(ZPNONIM_D) + ELSE + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRD,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + ENDIF + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IN,KF,IM,1.0_JPRB,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRB,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + DO JF=1,KF + DO JN=1,YNODE%IRANK + IDX = YNODE%ICLIST(JN) + ZVECIN(IDX,JF) = ZBETA(IBTST+JN-1,JF,IBETALV) + ENDDO + DO JN=YNODE%IRANK+1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + ZVECIN(IDX,JF) = ZVECOUT(JN,JF) + ENDDO + ENDDO + + DO JF=1,KF + IF(MOD(JJ,2) == 1) THEN + ZBETA(IBTSTL:IBTENL,JF,IBETALVM1)= ZVECIN(1:IRANKL,JF) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,JF,IBETALVM1)=ZVECIN(IRANKL+1:IRANKL+IRANKR,JF) + ENDIF + ELSE + ZBETA(IBTSTL:IBTENL,JF,IBETALVM1)=ZBETA(IBTSTL:IBTENL,JF,IBETALVM1)+ & + & ZVECIN(1:IRANKL,JF) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,JF,IBETALVM1)=ZBETA(IBTSTR:IBTENR,JF,IBETALVM1) + & + & ZVECIN(IRANKL+1:IRANKL+IRANKR,JF) + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + ENDDO + ENDDO + + IF( IKWV == 0 ) THEN + DEALLOCATE(ZBETA_D) + DEALLOCATE(ZOUT_D) + DEALLOCATE(ZIN_D) + ENDIF + +ELSE + DO JL=0,ILEVS + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + IRANK = YNODE%IRANK + IM = IRANK + IN = YNODE%ICOLS-IRANK + DO JF=1,KF + DO JN=1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZBETA(IBTST+JN-1,JF,IBETALV) = PVECIN(IFR+IDX-1,JF) + ELSE + ZVECIN(JN,JF) = PVECIN(IFR+IDX-1,JF) + ENDIF + ENDDO + ENDDO + IF( IRANK <=0 ) CALL ABOR1('mult_butm: IRANK<=0 not allowed') + IF(YNODE%ICOLS > IRANK) THEN + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',IRANK,KF,IN,1.0_JPRD,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRD,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IRANK,KF,IN,1.0_JPRB,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRB,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ELSE + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + ELSE + IRANKR = 0 + IBTENR = IBTENL + ENDIF + IRANK = YNODE%IRANK + IM = IRANK + IN = YNODE%ICOLS-IRANK + DO JF=1,KF + DO JN=1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZBETA(IBTST+JN-1,JF,IBETALV) = ZBETA(IBTSTL+IDX-1,JF,IBETALVM1) + ELSE + ZVECIN(JN,JF) = ZBETA(IBTSTL+IDX-1,JF,IBETALVM1) + ENDIF + ENDDO + ENDDO + IF( IRANK <=0 ) CALL ABOR1('mult_butm: IRANK<=0 not allowed') + IF(YNODE%ICOLS > IRANK) THEN + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',IRANK,KF,IN,1.0_JPRD,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRD,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IRANK,KF,IN,1.0_JPRB,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRB,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ENDIF + IF( IRANK <=0 ) CALL ABOR1('mult_butm: IRANK<=0 not allowed') + IF(JL == ILEVS) THEN + IFR = YNODE%IFROW + ILR = YNODE%ILROW + IROWS = YNODE%IROWS + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',IROWS,KF,YNODE%IRANK,1.0_JPRD,& + & YNODE%B,IROWS,ZBETA(IBTST,1,IBETALV),YD_STRUCT%IBETALEN_MAX,0.0_JPRD,& + & PVECOUT(IFR,1),IROUT) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IROWS,KF,YNODE%IRANK,1.0_JPRB,& + & YNODE%B,IROWS,ZBETA(IBTST,1,IBETALV),YD_STRUCT%IBETALEN_MAX,0.0_JPRB,& + & PVECOUT(IFR,1),IROUT) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +ENDIF +DEALLOCATE(ZBETA) +END SUBROUTINE MULT_BUTM +!===================================================================== +SUBROUTINE MULT_P(YDNODE,PVECIN,PVECOUT) +! Multiply vector by projection matrix +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +REAL(KIND=JPRB),INTENT(IN) :: PVECIN(:) +REAL(KIND=JPRB),INTENT(OUT) :: PVECOUT(:) + +REAL(KIND=JPRB) :: ZVECIN(YDNODE%ICOLS), ZVECOUT(SIZE(PVECOUT)) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN +!--------------------------------------------------------- + +IRANK = YDNODE%IRANK +DO JN=1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZVECOUT(JN) = PVECIN(IDX) + ELSE + ZVECIN(JN) = PVECIN(IDX) + ENDIF +ENDDO + +IF(YDNODE%ICOLS > IRANK) THEN + IM = IRANK + IN = YDNODE%ICOLS-IRANK + IF (JPRB == JPRD) THEN + CALL DGEMV('N',IM,IN,1.0_JPRB,YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1),1,1.0_JPRB,ZVECOUT,1) + PVECOUT(:)=ZVECOUT(:) + ELSE + CALL SGEMV('N',IM,IN,1.0_JPRB,YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1),1,1.0_JPRB,PVECOUT,1) + ENDIF +ENDIF + +END SUBROUTINE MULT_P +!===================================================================== +SUBROUTINE MULT_PM(YDNODE,KF,KLBETA,PVECIN,PVECOUT) +IMPLICIT NONE +! Multiply matrix by projection matrix +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +INTEGER(KIND=JPIM),INTENT(IN) :: KF +INTEGER(KIND=JPIM),INTENT(IN) :: KLBETA +REAL(KIND=JPRB),INTENT(IN) :: PVECIN(:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PVECOUT(:,:) + +REAL(KIND=JPRB) :: ZVECIN(YDNODE%ICOLS,KF), ZVECOUT(SIZE(PVECOUT(:,1)),KF) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN,JF + +LOGICAL :: LL_HALT_INVALID +!--------------------------------------------------------- + +IRANK = YDNODE%IRANK +IM = IRANK +IN = YDNODE%ICOLS-IRANK +DO JF=1,KF + DO JN=1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZVECOUT(JN,JF) = PVECIN(IDX,JF) + ELSE + ZVECIN(JN,JF) = PVECIN(IDX,JF) + ENDIF + ENDDO +ENDDO +IF(YDNODE%ICOLS > IRANK) THEN + IF (JPRB == JPRD) THEN + CALL DGEMM('N','N',IRANK,KF,IN,1.0_JPRD,& + & YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YDNODE%ICOLS,1.0_JPRD,& + & PVECOUT,IRANK) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IRANK,KF,IN,1.0_JPRB,& + & YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YDNODE%ICOLS,1.0_JPRB,& + & PVECOUT,IRANK) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF +ENDIF +END SUBROUTINE MULT_PM +!================================================================== +SUBROUTINE MULT_P_TR(YDNODE,PVECIN,PVECOUT) +! Multiply vector by transposed procetion matrix +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +REAL(KIND=JPRB),INTENT(IN) :: PVECIN(:) +REAL(KIND=JPRB),INTENT(OUT) :: PVECOUT(:) + +REAL(KIND=JPRB) :: ZVECOUT(YDNODE%ICOLS), ZVECIN(SIZE(PVECIN)) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN +!--------------------------------------------------------- + +IRANK = YDNODE%IRANK +IN = YDNODE%ICOLS-IRANK +IF(IN>0) THEN + IM = IRANK + IF (JPRB == JPRD) THEN + ZVECIN(:) = PVECIN(:) + CALL DGEMV('T',IM,IN,1.0_JPRD,YDNODE%PNONIM,IRANK,ZVECIN,1,0.0_JPRD,ZVECOUT(IRANK+1),1) + ELSE + CALL SGEMV('T',IM,IN,1.0_JPRB,YDNODE%PNONIM,IRANK,PVECIN,1,0.0_JPRB,ZVECOUT(IRANK+1),1) + ENDIF +ENDIF +DO JK=1,IRANK + IDX = YDNODE%ICLIST(JK) + PVECOUT(IDX) = PVECIN(JK) +ENDDO +DO JN=IRANK+1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + PVECOUT(IDX) = ZVECOUT(JN) +ENDDO + +END SUBROUTINE MULT_P_TR +!================================================================== +SUBROUTINE MULT_P_TRM(YDNODE,KF,PVECIN,PVECOUT) +! Multiply matrix by transposed procetion matrix +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +INTEGER(KIND=JPIM),INTENT(IN) :: KF +REAL(KIND=JPRB),INTENT(IN) :: PVECIN(:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PVECOUT(:,:) + +REAL(KIND=JPRB) :: ZVECOUT(YDNODE%ICOLS,KF), ZVECIN(SIZE(PVECIN(:,1)),KF) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN,JF + +LOGICAL :: LL_HALT_INVALID + +!------------------------------------------------------------------ + +IN = YDNODE%ICOLS-YDNODE%IRANK +IM = YDNODE%IRANK +IF(IN>0) THEN + IF (JPRB == JPRD) THEN + ZVECIN(:,:) = PVECIN(:,:) + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & YDNODE%PNONIM(1),IM,ZVECIN,IM,0.0_JPRD,& + & ZVECOUT(YDNODE%IRANK+1,1),YDNODE%ICOLS) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IN,KF,IM,1.0_JPRB,& + & YDNODE%PNONIM(1),IM,PVECIN,IM,0.0_JPRB,& + & ZVECOUT(YDNODE%IRANK+1,1),YDNODE%ICOLS) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF +ENDIF +DO JF=1,KF + DO JK=1,YDNODE%IRANK + IDX = YDNODE%ICLIST(JK) + PVECOUT(IDX,JF) = PVECIN(JK,JF) + ENDDO + DO JN=YDNODE%IRANK+1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + PVECOUT(IDX,JF) = ZVECOUT(JN,JF) + ENDDO +ENDDO +END SUBROUTINE MULT_P_TRM +!==================================================================== +END MODULE BUTTERFLY_ALG_MOD + diff --git a/src/trans/cpu/algor/fft992.F90 b/src/trans/cpu/algor/fft992.F90 new file mode 100644 index 0000000..efc9afa --- /dev/null +++ b/src/trans/cpu/algor/fft992.F90 @@ -0,0 +1,2327 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +! +! SUBROUTINE 'FFT992' - MULTIPLE FAST REAL PERIODIC TRANSFORM +! +! Author: Clive Temperton, January 1998 +! +! This routine is a modernized and enhanced version of FFT991 +! - Cray directives and ancient Fortran constructs removed +! - "vector chopping" removed +! - WORK array is now dynamically allocated +! - stride in WORK array is now always 1 +! +! REAL TRANSFORM OF LENGTH N PERFORMED BY REMOVING REDUNDANT +! OPERATIONS FROM COMPLEX TRANSFORM OF LENGTH N +! +! A IS THE ARRAY CONTAINING INPUT & OUTPUT DATA +! TRIGS IS A PREVIOUSLY PREPARED LIST OF TRIG FUNCTION VALUES +! IFAX IS A PREVIOUSLY PREPARED LIST OF FACTORS OF N +! INC IS THE INCREMENT WITHIN EACH DATA 'VECTOR' +! (E.G. INC=1 FOR CONSECUTIVELY STORED DATA) +! JUMP IS THE INCREMENT BETWEEN THE START OF EACH DATA VECTOR +! N IS THE LENGTH OF THE DATA VECTORS +! LOT IS THE NUMBER OF DATA VECTORS +! ISIGN = +1 FOR TRANSFORM FROM SPECTRAL TO GRIDPOINT +! = -1 FOR TRANSFORM FROM GRIDPOINT TO SPECTRAL +! +! ORDERING OF COEFFICIENTS: +! A(0),B(0),A(1),B(1),A(2),B(2),...,A(N/2),B(N/2) +! WHERE B(0)=B(N/2)=0; (N+2) LOCATIONS REQUIRED +! +! ORDERING OF DATA: +! X(0),X(1),X(2),...,X(N-1), 0 , 0 ; (N+2) LOCATIONS REQUIRED +! +! VECTORIZATION IS ACHIEVED BY DOING THE TRANSFORMS IN PARALLEL +! +! N MUST BE COMPOSED OF FACTORS 2,3 & 5 BUT DOES NOT HAVE TO BE EVEN +! +! DEFINITION OF TRANSFORMS: +! ------------------------- +! +! ISIGN=+1: X(J)=SUM(K=0,...,N-1)(C(K)*EXP(2*I*J*K*PI/N)) +! WHERE C(K)=A(K)+I*B(K) AND C(N-K)=A(K)-I*B(K) +! +! ISIGN=-1: A(K)=(1/N)*SUM(J=0,...,N-1)(X(J)*COS(2*J*K*PI/N)) +! B(K)=-(1/N)*SUM(J=0,...,N-1)(X(J)*SIN(2*J*K*PI/N)) +! +#ifdef MATHKEISAN +! MathKeisan is a scientific library optimized for NEC (www.mathkeisan.com) + + SUBROUTINE FFT992(A,TRIGS_,IFAX_,INC,JUMP,N,LOT,ISIGN) +!AUTOPROMOTE + USE PARKIND1, ONLY : JPIM, JPRB + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + IMPLICIT NONE + INTEGER(KIND=JPIM) :: N + REAL(KIND=JPRB) :: A(*) + REAL(KIND=JPRB) :: TRIGS_(N) + INTEGER(KIND=JPIM) :: IFAX_(10) + + INTEGER(KIND=JPIM) :: INC + INTEGER(KIND=JPIM) :: JUMP + INTEGER(KIND=JPIM) :: LOT + INTEGER(KIND=JPIM) :: ISIGN + + REAL(KIND=JPRB),ALLOCATABLE,DIMENSION(:),SAVE :: WORK , TRIGS + INTEGER(KIND=JPIM),SAVE :: IFAX (32) + + + INTEGER(KIND=JPIM), SAVE :: N_OLD=-1 + INTEGER(KIND=JPIM), SAVE :: LOT_OLD=-1 + +!$OMP threadprivate(ifax,n_old,lot_old,trigs,work) + + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + IF (LHOOK) CALL DR_HOOK('FFT992',0,ZHOOK_HANDLE) + IF (N .NE. N_OLD) THEN + + IF( ALLOCATED( WORK ) ) DEALLOCATE( WORK ) + IF( ALLOCATED( TRIGS ) ) DEALLOCATE( TRIGS ) + + ALLOCATE(WORK(3*N*LOT)) + ALLOCATE(TRIGS(2*N)) + + CALL DFTFAX ( N, IFAX, TRIGS ) + + N_OLD = N + LOT_OLD = LOT + + ELSE + + IF (LOT .GT. LOT_OLD) THEN + + IF( ALLOCATED( WORK ) ) DEALLOCATE( WORK ) + ALLOCATE(WORK(3*N*LOT)) + LOT_OLD = LOT + + ENDIF + + ENDIF + + CALL DFFTMLT ( A, WORK, TRIGS, IFAX, INC, JUMP, N, LOT, ISIGN ) + + IF (LHOOK) CALL DR_HOOK('FFT992',1,ZHOOK_HANDLE) + RETURN + + END SUBROUTINE FFT992 +#else +! +! SUBROUTINE 'FFT992' - MULTIPLE FAST REAL PERIODIC TRANSFORM +! +! Author: Clive Temperton, January 1998 +! +! This routine is a modernized and enhanced version of FFT991 +! - Cray directives and ancient Fortran constructs removed +! - "vector chopping" removed +! - WORK array is now dynamically allocated +! - stride in WORK array is now always 1 +! +! REAL TRANSFORM OF LENGTH N PERFORMED BY REMOVING REDUNDANT +! OPERATIONS FROM COMPLEX TRANSFORM OF LENGTH N +! +! A IS THE ARRAY CONTAINING INPUT & OUTPUT DATA +! TRIGS IS A PREVIOUSLY PREPARED LIST OF TRIG FUNCTION VALUES +! IFAX IS A PREVIOUSLY PREPARED LIST OF FACTORS OF N +! INC IS THE INCREMENT WITHIN EACH DATA 'VECTOR' +! (E.G. INC=1 FOR CONSECUTIVELY STORED DATA) +! JUMP IS THE INCREMENT BETWEEN THE START OF EACH DATA VECTOR +! N IS THE LENGTH OF THE DATA VECTORS +! LOT IS THE NUMBER OF DATA VECTORS +! ISIGN = +1 FOR TRANSFORM FROM SPECTRAL TO GRIDPOINT +! = -1 FOR TRANSFORM FROM GRIDPOINT TO SPECTRAL +! +! ORDERING OF COEFFICIENTS: +! A(0),B(0),A(1),B(1),A(2),B(2),...,A(N/2),B(N/2) +! WHERE B(0)=B(N/2)=0; (N+2) LOCATIONS REQUIRED +! +! ORDERING OF DATA: +! X(0),X(1),X(2),...,X(N-1), 0 , 0 ; (N+2) LOCATIONS REQUIRED +! +! VECTORIZATION IS ACHIEVED BY DOING THE TRANSFORMS IN PARALLEL +! +! N MUST BE COMPOSED OF FACTORS 2,3 & 5 BUT DOES NOT HAVE TO BE EVEN +! +! DEFINITION OF TRANSFORMS: +! ------------------------- +! +! ISIGN=+1: X(J)=SUM(K=0,...,N-1)(C(K)*EXP(2*I*J*K*PI/N)) +! WHERE C(K)=A(K)+I*B(K) AND C(N-K)=A(K)-I*B(K) +! +! ISIGN=-1: A(K)=(1/N)*SUM(J=0,...,N-1)(X(J)*COS(2*J*K*PI/N)) +! B(K)=-(1/N)*SUM(J=0,...,N-1)(X(J)*SIN(2*J*K*PI/N)) +! + SUBROUTINE FFT992(A,TRIGS,IFAX,INC,JUMP,N,LOT,ISIGN) +!disabled for now. REK.!DEC$ OPTIMIZE:3 +!AUTOPROMOTE + USE PARKIND1, ONLY : JPIM, JPRB + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + IMPLICIT NONE +! + INTEGER(KIND=JPIM) :: NU + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: INC + INTEGER(KIND=JPIM) :: JBASE + INTEGER(KIND=JPIM) :: JUMP + INTEGER(KIND=JPIM) :: J,JJ,JUMPA + INTEGER(KIND=JPIM) :: LOT + INTEGER(KIND=JPIM) :: K,LA,NFAX + INTEGER(KIND=JPIM) :: ISIGN + INTEGER(KIND=JPIM) :: I,IA,IBASE,IERR,IFAC,IGO,II,INCA,IX + + REAL(KIND=JPRB) :: A(*) + REAL(KIND=JPRB) :: TRIGS(N) + INTEGER(KIND=JPIM) :: IFAX(10) +! Dynamically allocated work array: + REAL(KIND=JPRB) :: WORK(N*LOT+1) + LOGICAL :: LIPL +! + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + IF (LHOOK) CALL DR_HOOK('FFT992',0,ZHOOK_HANDLE) + NFAX=IFAX(1) + IF (ISIGN.EQ.+1) THEN +! +! ISIGN=+1, SPECTRAL TO GRIDPOINT TRANSFORM +! ----------------------------------------- +! + I=1 +!OCL NOVREC +!DEC$ IVDEP + DO J=1,LOT + A(I+INC)=0.5_JPRB*A(I) + I=I+JUMP + ENDDO + IF (MOD(N,2).EQ.0) THEN + I=N*INC+1 +!OCL NOVREC +!DEC$ IVDEP + DO J=1,LOT + A(I)=0.5_JPRB*A(I) + I=I+JUMP + ENDDO + ENDIF +! + IA=INC+1 + LA=1 + IGO=+1 +! + DO K=1,NFAX + IFAC=IFAX(K+1) + IERR=-1 + IF (K.EQ.NFAX.AND.NFAX.GT.2.AND.IGO.EQ.+1) THEN + LIPL=.TRUE. + ELSE + LIPL=.FALSE. + ENDIF + IF (INC.EQ.1.AND.JUMP.LT.(2*N).AND. & + & K.GT.1.AND.K.LT.(NFAX-MOD(NFAX,2))) THEN + INCA=LOT + JUMPA=1 + ELSE + INCA=INC + JUMPA=JUMP + ENDIF + IF (IGO.EQ.+1) THEN +!DEC$ FORCEINLINE + CALL RPASSF(A(IA),A(IA+LA*INCA),WORK(1),WORK(IFAC*LA*LOT+1), & + & TRIGS,INCA,LOT,JUMPA,1,LOT,N,IFAC,LA,IERR,LIPL) + ELSE +!DEC$ FORCEINLINE + CALL RPASSF(WORK(1),WORK(LA*LOT+1),A(IA),A(IA+IFAC*LA*INCA), & + & TRIGS,LOT,INCA,1,JUMPA,LOT,N,IFAC,LA,IERR,LIPL) + ENDIF + IF (IERR.NE.0) THEN + IF (IERR.EQ.2) WRITE(6,901) IFAC + IF (IERR.EQ.3) WRITE(6,902) IFAC + IF (LHOOK) CALL DR_HOOK('FFT992',1,ZHOOK_HANDLE) + RETURN + ENDIF + LA=IFAC*LA + IGO=-IGO + IA=1 + ENDDO +! +! IF NECESSARY, COPY RESULTS BACK TO A +! ------------------------------------ + IF (NFAX.EQ.1) THEN + IBASE=1 + JBASE=1 + DO JJ=1,N + I=IBASE + J=JBASE + DO II=1,LOT + A(J)=WORK(I) + I=I+1 + J=J+JUMP + ENDDO + IBASE=IBASE+LOT + JBASE=JBASE+INC + ENDDO + ENDIF +! +! FILL IN ZEROS AT END +! -------------------- + IX=N*INC+1 +!OCL NOVREC +!DEC$ IVDEP + DO J=1,LOT + A(IX)=0.0_JPRB + A(IX+INC)=0.0_JPRB + IX=IX+JUMP + ENDDO +! + ELSEIF (ISIGN.EQ.-1) THEN +! +! ISIGN=-1, GRIDPOINT TO SPECTRAL TRANSFORM +! ----------------------------------------- + IA=1 + LA=N + IGO=+1 +! + DO K=1,NFAX + IFAC=IFAX(NFAX+2-K) + LA=LA/IFAC + IERR=-1 + IF (K.EQ.1.AND.NFAX.GT.2.AND.MOD(NFAX,2).EQ.1) THEN + LIPL=.TRUE. + ELSE + LIPL=.FALSE. + ENDIF + IF (INC.EQ.1.AND.JUMP.LT.(2*N).AND. & + & K.GT.(1+MOD(NFAX,2)).AND.K.LT.NFAX) THEN + INCA=LOT + JUMPA=1 + ELSE + INCA=INC + JUMPA=JUMP + ENDIF + IF (IGO.EQ.+1) THEN +!DEC$ FORCEINLINE + CALL QPASSF(A(IA),A(IA+IFAC*LA*INCA),WORK(1),WORK(LA*LOT+1), & + & TRIGS,INCA,LOT,JUMPA,1,LOT,N,IFAC,LA,IERR,LIPL) + ELSE +!DEC$ FORCEINLINE + CALL QPASSF(WORK(1),WORK(IFAC*LA*LOT+1),A(IA),A(IA+LA*INCA), & + & TRIGS,LOT,INCA,1,JUMPA,LOT,N,IFAC,LA,IERR,LIPL) + ENDIF + IF (IERR.NE.0) THEN + IF (IERR.EQ.2) WRITE(6,901) IFAC + IF (IERR.EQ.3) WRITE(6,902) IFAC + IF (LHOOK) CALL DR_HOOK('FFT992',1,ZHOOK_HANDLE) + RETURN + ENDIF + IF (LIPL) THEN + IA=1 + ELSE + IGO=-IGO + IA=INC+1 + ENDIF + ENDDO +! +! IF NECESSARY, COPY RESULTS BACK TO A +! ------------------------------------ + IF (NFAX.EQ.1) THEN + IBASE=1 + JBASE=INC+1 + DO JJ=1,N + I=IBASE + J=JBASE + DO II=1,LOT + A(J)=WORK(I) + I=I+1 + J=J+JUMP + ENDDO + IBASE=IBASE+LOT + JBASE=JBASE+INC + ENDDO + ENDIF +! +! SHIFT A(0) & FILL IN ZERO IMAG PARTS +! ------------------------------------ + IX=1 +!OCL NOVREC +!DEC$ IVDEP + DO J=1,LOT + A(IX)=A(IX+INC) + A(IX+INC)=0.0_JPRB + IX=IX+JUMP + ENDDO + IF (MOD(N,2).EQ.0) THEN + IX=(N+1)*INC+1 + DO J=1,LOT + A(IX)=0.0_JPRB + IX=IX+JUMP + ENDDO + ENDIF +! + ENDIF +! +! FORMAT STATEMENTS FOR ERROR MESSAGES: + 901 FORMAT(' FACTOR =',I3,' NOT CATERED FOR') + 902 FORMAT(' FACTOR =',I3,' ONLY CATERED FOR IF LA*IFAC=N') +! + IF (LHOOK) CALL DR_HOOK('FFT992',1,ZHOOK_HANDLE) + + CONTAINS +! SUBROUTINE 'RPASSF' - PERFORMS ONE PASS THROUGH DATA AS PART +! OF MULTIPLE REAL FFT (FOURIER SYNTHESIS) ROUTINE +! +! A IS FIRST REAL INPUT VECTOR +! EQUIVALENCE B(1) WITH A (LA*INC1+1) +! C IS FIRST REAL OUTPUT VECTOR +! EQUIVALENCE D(1) WITH C(IFAC*LA*INC2+1) +! TRIGS IS A PRECALCULATED LIST OF SINES & COSINES +! INC1 IS THE ADDRESSING INCREMENT FOR A +! INC2 IS THE ADDRESSING INCREMENT FOR C +! INC3 IS THE INCREMENT BETWEEN INPUT VECTORS A +! INC4 IS THE INCREMENT BETWEEN OUTPUT VECTORS C +! LOT IS THE NUMBER OF VECTORS +! N IS THE LENGTH OF THE VECTORS +! IFAC IS THE CURRENT FACTOR OF N +! LA IS THE PRODUCT OF PREVIOUS FACTORS +! IERR IS AN ERROR INDICATOR: +! 0 - PASS COMPLETED WITHOUT ERROR +! 1 - LOT GREATER THAN 64 +! 2 - IFAC NOT CATERED FOR +! 3 - IFAC ONLY CATERED FOR IF LA=N/IFAC +! LIPL=.T. => RESULTS ARE RETURNED TO INPUT ARRAY +! (ONLY VALID IF LA=N/IFAC, I.E. ON LAST PASS) +! +!----------------------------------------------------------------------- +! + SUBROUTINE RPASSF(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,LOT,N,IFAC, & + & LA,IERR,LIPL) +!AUTOPROMOTE +! + USE PARKIND1, ONLY : JPIM, JPRB + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +! + IMPLICIT NONE +! + INTEGER(KIND=JPIM) :: N + REAL(KIND=JPRB) :: A(*) + REAL(KIND=JPRB) :: B(*) + REAL(KIND=JPRB) :: C(*) + REAL(KIND=JPRB) :: D(*) + REAL(KIND=JPRB) :: TRIGS(N) + REAL(KIND=JPRB) :: A10,A11,A20,A21 + REAL(KIND=JPRB) :: B10,B11,B20,B21 + REAL(KIND=JPRB) :: C1,C2,C3,C4,C5 + REAL(KIND=JPRB) :: S1,S2,S3,S4,S5 + REAL(KIND=JPRB) :: SIN36,SIN45,SIN60,SIN72 + REAL(KIND=JPRB) :: SSIN36,SSIN45,SSIN60,SSIN72 + REAL(KIND=JPRB) :: QRT5,QQRT5 + REAL(KIND=JPRB) :: T1,T2,T3,T4,T5,T6,T7 + INTEGER(KIND=JPIM) :: IERR + INTEGER(KIND=JPIM) :: INC1 + INTEGER(KIND=JPIM) :: INC2 + INTEGER(KIND=JPIM) :: INC3 + INTEGER(KIND=JPIM) :: INC4 + INTEGER(KIND=JPIM) :: LOT + INTEGER(KIND=JPIM) :: IFAC + INTEGER(KIND=JPIM) :: LA + INTEGER(KIND=JPIM) :: INC21,IINK,IJK,ILOT,ILA + INTEGER(KIND=JPIM) :: I,IA,IB,IBAD,IBASE,IC,ID,IE,IF + INTEGER(KIND=JPIM) :: J,JA,JB,JBASE,JC,JD,JE,JF,JG,JH,JINK,JUMP + INTEGER(KIND=JPIM) :: K,KB,KC,KD,KE,KF,KSTOP + INTEGER(KIND=JPIM) :: L,M + LOGICAL :: LIPL +! + DATA SIN36/0.587785252292473_JPRB/,SIN72/0.951056516295154_JPRB/, & + & QRT5/0.559016994374947_JPRB/,SIN60/0.866025403784437_JPRB/ +! + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + M=N/IFAC + IINK=LA*INC1 + JINK=LA*INC2 + JUMP=(IFAC-1)*JINK + KSTOP=(N-IFAC)/(2*IFAC) +! + IBASE=0 + JBASE=0 + IBAD=0 +! +! Increase the vector length by fusing the loops if the +! data layout is appropriate: + IF (INC1.EQ.LOT.AND.INC2.EQ.LOT.AND.INC3.EQ.1.AND.INC4.EQ.1) THEN + ILA=1 + ILOT=LA*LOT + INC21=LA*LOT + ELSE + ILA=LA + ILOT=LOT + INC21=INC2 + ENDIF +! + IF (IFAC.EQ.2) THEN +! +! CODING FOR FACTOR 2 +! ------------------- + 200 CONTINUE + IA=1 + IB=IA+(2*M-LA)*INC1 + JA=1 + JB=JA+JINK +! + IF (LA.NE.M) THEN +! + DO 220 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 210 IJK=1,ILOT + C(JA+J)=A(IA+I)+A(IB+I) + C(JB+J)=A(IA+I)-A(IB+I) + I=I+INC3 + J=J+INC4 + 210 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 220 CONTINUE + IA=IA+IINK + IINK=2*IINK + IB=IB-IINK + IBASE=0 + JBASE=JBASE+JUMP + JUMP=2*JUMP+JINK +! + IF (IA.LT.IB) THEN + DO 250 K=LA,KSTOP,LA + KB=K+K + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + IBASE=0 + DO 240 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 230 IJK=1,ILOT + C(JA+J)=A(IA+I)+A(IB+I) + D(JA+J)=B(IA+I)-B(IB+I) + C(JB+J)=C1*(A(IA+I)-A(IB+I))-S1*(B(IA+I)+B(IB+I)) + D(JB+J)=S1*(A(IA+I)-A(IB+I))+C1*(B(IA+I)+B(IB+I)) + I=I+INC3 + J=J+INC4 + 230 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 240 CONTINUE + IA=IA+IINK + IB=IB-IINK + JBASE=JBASE+JUMP + 250 CONTINUE + ENDIF +! + IF (IA.EQ.IB) THEN + IBASE=0 + DO 280 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 270 IJK=1,ILOT + C(JA+J)=A(IA+I) + C(JB+J)=-B(IA+I) + I=I+INC3 + J=J+INC4 + 270 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 280 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + IF (LIPL) THEN + DO 294 L=1,ILA + I=IBASE +!OCL NOVREC + DO 292 IJK=1,ILOT + T1=2.0*(A(IA+I)-A(IB+I)) + A(IA+I)=2.0_JPRB*(A(IA+I)+A(IB+I)) + A(IB+I)=T1 + I=I+INC3 + 292 CONTINUE + IBASE=IBASE+INC1 + 294 CONTINUE + ELSE + DO 298 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 296 IJK=1,ILOT + C(JA+J)=2.0_JPRB*(A(IA+I)+A(IB+I)) + C(JB+J)=2.0_JPRB*(A(IA+I)-A(IB+I)) + I=I+INC3 + J=J+INC4 + 296 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 298 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.3) THEN +! +! CODING FOR FACTOR 3 +! ------------------- + 300 CONTINUE + IA=1 + IB=IA+(2*M-LA)*INC1 + IC=IB + JA=1 + JB=JA+JINK + JC=JB+JINK +! + IF (LA.NE.M) THEN +! + DO 320 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 310 IJK=1,ILOT + C(JA+J)=A(IA+I)+A(IB+I) + C(JB+J)=(A(IA+I)-0.5_JPRB*A(IB+I))-(SIN60*(B(IB+I))) + C(JC+J)=(A(IA+I)-0.5_JPRB*A(IB+I))+(SIN60*(B(IB+I))) + I=I+INC3 + J=J+INC4 + 310 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 320 CONTINUE + IA=IA+IINK + IINK=2*IINK + IB=IB+IINK + IC=IC-IINK + JBASE=JBASE+JUMP + JUMP=2*JUMP+JINK +! + IF (IA.LT.IC) THEN + DO 350 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + IBASE=0 + DO 340 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 330 IJK=1,ILOT + C(JA+J)=A(IA+I)+(A(IB+I)+A(IC+I)) + D(JA+J)=B(IA+I)+(B(IB+I)-B(IC+I)) + C(JB+J)= & + & C1*((A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I)))- & + & (SIN60*(B(IB+I)+B(IC+I)))) & + & -S1*((B(IA+I)-0.5_JPRB*(B(IB+I)-B(IC+I)))+ & + & (SIN60*(A(IB+I)-A(IC+I)))) + D(JB+J)= & + & S1*((A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I)))- & + & (SIN60*(B(IB+I)+B(IC+I)))) & + & +C1*((B(IA+I)-0.5_JPRB*(B(IB+I)-B(IC+I)))+ & + & (SIN60*(A(IB+I)-A(IC+I)))) + C(JC+J)= & + & C2*((A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I)))+ & + & (SIN60*(B(IB+I)+B(IC+I)))) & + & -S2*((B(IA+I)-0.5_JPRB*(B(IB+I)-B(IC+I)))- & + & (SIN60*(A(IB+I)-A(IC+I)))) + D(JC+J)= & + & S2*((A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I)))+ & + & (SIN60*(B(IB+I)+B(IC+I)))) & + & +C2*((B(IA+I)-0.5_JPRB*(B(IB+I)-B(IC+I)))- & + & (SIN60*(A(IB+I)-A(IC+I)))) + I=I+INC3 + J=J+INC4 + 330 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 340 CONTINUE + IA=IA+IINK + IB=IB+IINK + IC=IC-IINK + JBASE=JBASE+JUMP + 350 CONTINUE + ENDIF +! + IF (IA.EQ.IC) THEN + IBASE=0 + DO 380 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 370 IJK=1,ILOT + C(JA+J)=A(IA+I)+A(IB+I) + C(JB+J)=(0.5_JPRB*A(IA+I)-A(IB+I))-(SIN60*B(IA+I)) + C(JC+J)=-(0.5_JPRB*A(IA+I)-A(IB+I))-(SIN60*B(IA+I)) + I=I+INC3 + J=J+INC4 + 370 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 380 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + SSIN60=2.0*SIN60 + IF (LIPL) THEN + DO 394 L=1,ILA + I=IBASE +!OCL NOVREC + DO 392 IJK=1,ILOT + T1=(2.0_JPRB*A(IA+I)-A(IB+I))-(SSIN60*B(IB+I)) + T2=(2.0_JPRB*A(IA+I)-A(IB+I))+(SSIN60*B(IB+I)) + A(IA+I)=2.0_JPRB*(A(IA+I)+A(IB+I)) + A(IB+I)=T1 + B(IB+I)=T2 + I=I+INC3 + 392 CONTINUE + IBASE=IBASE+INC1 + 394 CONTINUE + ELSE + DO 398 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 396 IJK=1,ILOT + C(JA+J)=2.0_JPRB*(A(IA+I)+A(IB+I)) + C(JB+J)=(2.0_JPRB*A(IA+I)-A(IB+I))-(SSIN60*B(IB+I)) + C(JC+J)=(2.0_JPRB*A(IA+I)-A(IB+I))+(SSIN60*B(IB+I)) + I=I+INC3 + J=J+INC4 + 396 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 398 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.4) THEN +! +! CODING FOR FACTOR 4 +! ------------------- + 400 CONTINUE + IA=1 + IB=IA+(2*M-LA)*INC1 + IC=IB+2*M*INC1 + ID=IB + JA=1 + JB=JA+JINK + JC=JB+JINK + JD=JC+JINK +! + IF (LA.NE.M) THEN +! + DO 420 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 410 IJK=1,ILOT + C(JA+J)=(A(IA+I)+A(IC+I))+A(IB+I) + C(JB+J)=(A(IA+I)-A(IC+I))-B(IB+I) + C(JC+J)=(A(IA+I)+A(IC+I))-A(IB+I) + C(JD+J)=(A(IA+I)-A(IC+I))+B(IB+I) + I=I+INC3 + J=J+INC4 + 410 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 420 CONTINUE + IA=IA+IINK + IINK=2*IINK + IB=IB+IINK + IC=IC-IINK + ID=ID-IINK + JBASE=JBASE+JUMP + JUMP=2*JUMP+JINK +! + IF (IB.LT.IC) THEN + DO 450 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + KD=KC+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + C3=TRIGS(KD+1) + S3=TRIGS(KD+2) + IBASE=0 + DO 440 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 430 IJK=1,ILOT + C(JA+J)=(A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I)) + D(JA+J)=(B(IA+I)-B(IC+I))+(B(IB+I)-B(ID+I)) + C(JC+J)= & + & C2*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) & + & -S2*((B(IA+I)-B(IC+I))-(B(IB+I)-B(ID+I))) + D(JC+J)= & + & S2*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) & + & +C2*((B(IA+I)-B(IC+I))-(B(IB+I)-B(ID+I))) + C(JB+J)= & + & C1*((A(IA+I)-A(IC+I))-(B(IB+I)+B(ID+I))) & + & -S1*((B(IA+I)+B(IC+I))+(A(IB+I)-A(ID+I))) + D(JB+J)= & + & S1*((A(IA+I)-A(IC+I))-(B(IB+I)+B(ID+I))) & + & +C1*((B(IA+I)+B(IC+I))+(A(IB+I)-A(ID+I))) + C(JD+J)= & + & C3*((A(IA+I)-A(IC+I))+(B(IB+I)+B(ID+I))) & + & -S3*((B(IA+I)+B(IC+I))-(A(IB+I)-A(ID+I))) + D(JD+J)= & + & S3*((A(IA+I)-A(IC+I))+(B(IB+I)+B(ID+I))) & + & +C3*((B(IA+I)+B(IC+I))-(A(IB+I)-A(ID+I))) + I=I+INC3 + J=J+INC4 + 430 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 440 CONTINUE + IA=IA+IINK + IB=IB+IINK + IC=IC-IINK + ID=ID-IINK + JBASE=JBASE+JUMP + 450 CONTINUE + ENDIF +! + IF (IB.EQ.IC) THEN + IBASE=0 + SIN45=SQRT(0.5_JPRB) + DO 480 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 470 IJK=1,ILOT + C(JA+J)=A(IA+I)+A(IB+I) + C(JB+J)=SIN45*((A(IA+I)-A(IB+I))-(B(IA+I)+B(IB+I))) + C(JC+J)=B(IB+I)-B(IA+I) + C(JD+J)=-SIN45*((A(IA+I)-A(IB+I))+(B(IA+I)+B(IB+I))) + I=I+INC3 + J=J+INC4 + 470 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 480 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + IF (LIPL) THEN + DO 494 L=1,ILA + I=IBASE +!OCL NOVREC + DO 492 IJK=1,ILOT + T1=2.0_JPRB*((A(IA+I)-A(IC+I))-B(IB+I)) + T2=2.0_JPRB*((A(IA+I)+A(IC+I))-A(IB+I)) + T3=2.0_JPRB*((A(IA+I)-A(IC+I))+B(IB+I)) + A(IA+I)=2.0_JPRB*((A(IA+I)+A(IC+I))+A(IB+I)) + A(IB+I)=T1 + B(IB+I)=T2 + A(IC+I)=T3 + I=I+INC3 + 492 CONTINUE + IBASE=IBASE+INC1 + 494 CONTINUE + ELSE + DO 498 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 496 IJK=1,ILOT + C(JA+J)=2.0_JPRB*((A(IA+I)+A(IC+I))+A(IB+I)) + C(JB+J)=2.0_JPRB*((A(IA+I)-A(IC+I))-B(IB+I)) + C(JC+J)=2.0_JPRB*((A(IA+I)+A(IC+I))-A(IB+I)) + C(JD+J)=2.0_JPRB*((A(IA+I)-A(IC+I))+B(IB+I)) + I=I+INC3 + J=J+INC4 + 496 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 498 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.5) THEN +! +! CODING FOR FACTOR 5 +! ------------------- + 500 CONTINUE + IA=1 + IB=IA+(2*M-LA)*INC1 + IC=IB+2*M*INC1 + ID=IC + IE=IB + JA=1 + JB=JA+JINK + JC=JB+JINK + JD=JC+JINK + JE=JD+JINK +! + IF (LA.NE.M) THEN +! + DO 520 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 510 IJK=1,ILOT + C(JA+J)=A(IA+I)+(A(IB+I)+A(IC+I)) + C(JB+J)=((A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I)))+ & + & QRT5*(A(IB+I)-A(IC+I)))-(SIN72*B(IB+I)+SIN36*B(IC+I)) + C(JC+J)=((A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I)))- & + & QRT5*(A(IB+I)-A(IC+I)))-(SIN36*B(IB+I)-SIN72*B(IC+I)) + C(JD+J)=((A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I)))- & + & QRT5*(A(IB+I)-A(IC+I)))+(SIN36*B(IB+I)-SIN72*B(IC+I)) + C(JE+J)=((A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I)))+ & + & QRT5*(A(IB+I)-A(IC+I)))+(SIN72*B(IB+I)+SIN36*B(IC+I)) + I=I+INC3 + J=J+INC4 + 510 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 520 CONTINUE + IA=IA+IINK + IINK=2*IINK + IB=IB+IINK + IC=IC+IINK + ID=ID-IINK + IE=IE-IINK + JBASE=JBASE+JUMP + JUMP=2*JUMP+JINK +! + IF (IB.LT.ID) THEN + DO 550 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + KD=KC+KB + KE=KD+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + C3=TRIGS(KD+1) + S3=TRIGS(KD+2) + C4=TRIGS(KE+1) + S4=TRIGS(KE+2) + IBASE=0 + DO 540 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 530 IJK=1,ILOT +! + A10=(A(IA+I)-0.25_JPRB*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & + & +QRT5*((A(IB+I)+A(IE+I))-(A(IC+I)+A(ID+I))) + A20=(A(IA+I)-0.25_JPRB*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & + & -QRT5*((A(IB+I)+A(IE+I))-(A(IC+I)+A(ID+I))) + B10=(B(IA+I)-0.25_JPRB*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & + & +QRT5*((B(IB+I)-B(IE+I))-(B(IC+I)-B(ID+I))) + B20=(B(IA+I)-0.25_JPRB*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & + & -QRT5*((B(IB+I)-B(IE+I))-(B(IC+I)-B(ID+I))) + A11=SIN72*(B(IB+I)+B(IE+I))+SIN36*(B(IC+I)+B(ID+I)) + A21=SIN36*(B(IB+I)+B(IE+I))-SIN72*(B(IC+I)+B(ID+I)) + B11=SIN72*(A(IB+I)-A(IE+I))+SIN36*(A(IC+I)-A(ID+I)) + B21=SIN36*(A(IB+I)-A(IE+I))-SIN72*(A(IC+I)-A(ID+I)) +! + C(JA+J)=A(IA+I)+((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I))) + D(JA+J)=B(IA+I)+((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I))) + C(JB+J)=C1*(A10-A11)-S1*(B10+B11) + D(JB+J)=S1*(A10-A11)+C1*(B10+B11) + C(JE+J)=C4*(A10+A11)-S4*(B10-B11) + D(JE+J)=S4*(A10+A11)+C4*(B10-B11) + C(JC+J)=C2*(A20-A21)-S2*(B20+B21) + D(JC+J)=S2*(A20-A21)+C2*(B20+B21) + C(JD+J)=C3*(A20+A21)-S3*(B20-B21) + D(JD+J)=S3*(A20+A21)+C3*(B20-B21) +! + I=I+INC3 + J=J+INC4 + 530 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 540 CONTINUE + IA=IA+IINK + IB=IB+IINK + IC=IC+IINK + ID=ID-IINK + IE=IE-IINK + JBASE=JBASE+JUMP + 550 CONTINUE + ENDIF +! + IF (IB.EQ.ID) THEN + IBASE=0 + DO 580 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 570 IJK=1,ILOT + C(JA+J)=(A(IA+I)+A(IB+I))+A(IC+I) + C(JB+J)=(QRT5*(A(IA+I)-A(IB+I))+ & + & (0.25_JPRB*(A(IA+I)+A(IB+I))-A(IC+I))) & + & -(SIN36*B(IA+I)+SIN72*B(IB+I)) + C(JE+J)=-(QRT5*(A(IA+I)-A(IB+I))+ & + & (0.25_JPRB*(A(IA+I)+A(IB+I))-A(IC+I))) & + & -(SIN36*B(IA+I)+SIN72*B(IB+I)) + C(JC+J)=(QRT5*(A(IA+I)-A(IB+I))- & + & (0.25_JPRB*(A(IA+I)+A(IB+I))-A(IC+I))) & + & -(SIN72*B(IA+I)-SIN36*B(IB+I)) + C(JD+J)=-(QRT5*(A(IA+I)-A(IB+I))- & + & (0.25_JPRB*(A(IA+I)+A(IB+I))-A(IC+I))) & + & -(SIN72*B(IA+I)-SIN36*B(IB+I)) + I=I+INC3 + J=J+INC4 + 570 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 580 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + QQRT5=2.0*QRT5 + SSIN36=2.0*SIN36 + SSIN72=2.0*SIN72 + IF (LIPL) THEN + DO 594 L=1,ILA + I=IBASE +!OCL NOVREC + DO 592 IJK=1,ILOT + T1=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & +QQRT5*(A(IB+I)-A(IC+I)))-(SSIN72*B(IB+I)+SSIN36*B(IC+I)) + T2=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & -QQRT5*(A(IB+I)-A(IC+I)))-(SSIN36*B(IB+I)-SSIN72*B(IC+I)) + T3=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & -QQRT5*(A(IB+I)-A(IC+I)))+(SSIN36*B(IB+I)-SSIN72*B(IC+I)) + T4=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & +QQRT5*(A(IB+I)-A(IC+I)))+(SSIN72*B(IB+I)+SSIN36*B(IC+I)) + A(IA+I)=2.0_JPRB*(A(IA+I)+(A(IB+I)+A(IC+I))) + A(IB+I)=T1 + B(IB+I)=T2 + A(IC+I)=T3 + B(IC+I)=T4 + I=I+INC3 + 592 CONTINUE + IBASE=IBASE+INC1 + 594 CONTINUE + ELSE + DO 598 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 596 IJK=1,ILOT + C(JA+J)=2.0_JPRB*(A(IA+I)+(A(IB+I)+A(IC+I))) + C(JB+J)=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & +QQRT5*(A(IB+I)-A(IC+I)))-(SSIN72*B(IB+I)+SSIN36*B(IC+I)) + C(JC+J)=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & -QQRT5*(A(IB+I)-A(IC+I)))-(SSIN36*B(IB+I)-SSIN72*B(IC+I)) + C(JD+J)=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & -QQRT5*(A(IB+I)-A(IC+I)))+(SSIN36*B(IB+I)-SSIN72*B(IC+I)) + C(JE+J)=(2.0_JPRB*(A(IA+I)-0.25_JPRB*(A(IB+I)+A(IC+I))) & + & +QQRT5*(A(IB+I)-A(IC+I)))+(SSIN72*B(IB+I)+SSIN36*B(IC+I)) + I=I+INC3 + J=J+INC4 + 596 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 598 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.6) THEN +! +! CODING FOR FACTOR 6 +! ------------------- + 600 CONTINUE + IA=1 + IB=IA+(2*M-LA)*INC1 + IC=IB+2*M*INC1 + ID=IC+2*M*INC1 + IE=IC + IF=IB + JA=1 + JB=JA+JINK + JC=JB+JINK + JD=JC+JINK + JE=JD+JINK + JF=JE+JINK +! + IF (LA.NE.M) THEN +! + DO 620 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 610 IJK=1,ILOT + C(JA+J)=(A(IA+I)+A(ID+I))+(A(IB+I)+A(IC+I)) + C(JD+J)=(A(IA+I)-A(ID+I))-(A(IB+I)-A(IC+I)) + C(JB+J)=((A(IA+I)-A(ID+I))+0.5_JPRB*(A(IB+I)-A(IC+I))) & + & -(SIN60*(B(IB+I)+B(IC+I))) + C(JF+J)=((A(IA+I)-A(ID+I))+0.5_JPRB*(A(IB+I)-A(IC+I))) & + & +(SIN60*(B(IB+I)+B(IC+I))) + C(JC+J)=((A(IA+I)+A(ID+I))-0.5_JPRB*(A(IB+I)+A(IC+I))) & + & -(SIN60*(B(IB+I)-B(IC+I))) + C(JE+J)=((A(IA+I)+A(ID+I))-0.5_JPRB*(A(IB+I)+A(IC+I))) & + & +(SIN60*(B(IB+I)-B(IC+I))) + I=I+INC3 + J=J+INC4 + 610 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 620 CONTINUE + IA=IA+IINK + IINK=2*IINK + IB=IB+IINK + IC=IC+IINK + ID=ID-IINK + IE=IE-IINK + IF=IF-IINK + JBASE=JBASE+JUMP + JUMP=2*JUMP+JINK +! + IF (IC.LT.ID) THEN + DO 650 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + KD=KC+KB + KE=KD+KB + KF=KE+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + C3=TRIGS(KD+1) + S3=TRIGS(KD+2) + C4=TRIGS(KE+1) + S4=TRIGS(KE+2) + C5=TRIGS(KF+1) + S5=TRIGS(KF+2) + IBASE=0 + DO 640 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 630 IJK=1,ILOT +! + A11= (A(IE+I)+A(IB+I))+(A(IC+I)+A(IF+I)) + A20=(A(IA+I)+A(ID+I))-0.5_JPRB*A11 + A21=SIN60*((A(IE+I)+A(IB+I))-(A(IC+I)+A(IF+I))) + B11= (B(IB+I)-B(IE+I))+(B(IC+I)-B(IF+I)) + B20=(B(IA+I)-B(ID+I))-0.5_JPRB*B11 + B21=SIN60*((B(IB+I)-B(IE+I))-(B(IC+I)-B(IF+I))) +! + C(JA+J)=(A(IA+I)+A(ID+I))+A11 + D(JA+J)=(B(IA+I)-B(ID+I))+B11 + C(JC+J)=C2*(A20-B21)-S2*(B20+A21) + D(JC+J)=S2*(A20-B21)+C2*(B20+A21) + C(JE+J)=C4*(A20+B21)-S4*(B20-A21) + D(JE+J)=S4*(A20+B21)+C4*(B20-A21) +! + A11=(A(IE+I)-A(IB+I))+(A(IC+I)-A(IF+I)) + B11=(B(IE+I)+B(IB+I))-(B(IC+I)+B(IF+I)) + A20=(A(IA+I)-A(ID+I))-0.5_JPRB*A11 + A21=SIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) + B20=(B(IA+I)+B(ID+I))+0.5_JPRB*B11 + B21=SIN60*((B(IE+I)+B(IB+I))+(B(IC+I)+B(IF+I))) +! + C(JD+J)= & + & C3*((A(IA+I)-A(ID+I))+A11)-S3*((B(IA+I)+B(ID+I))-B11) + D(JD+J)= & + & S3*((A(IA+I)-A(ID+I))+A11)+C3*((B(IA+I)+B(ID+I))-B11) + C(JB+J)=C1*(A20-B21)-S1*(B20-A21) + D(JB+J)=S1*(A20-B21)+C1*(B20-A21) + C(JF+J)=C5*(A20+B21)-S5*(B20+A21) + D(JF+J)=S5*(A20+B21)+C5*(B20+A21) +! + I=I+INC3 + J=J+INC4 + 630 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 640 CONTINUE + IA=IA+IINK + IB=IB+IINK + IC=IC+IINK + ID=ID-IINK + IE=IE-IINK + IF=IF-IINK + JBASE=JBASE+JUMP + 650 CONTINUE + ENDIF +! + IF (IC.EQ.ID) THEN + IBASE=0 + DO 680 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 670 IJK=1,ILOT + C(JA+J)=A(IB+I)+(A(IA+I)+A(IC+I)) + C(JD+J)=B(IB+I)-(B(IA+I)+B(IC+I)) + C(JB+J)=(SIN60*(A(IA+I)-A(IC+I)))- & + & (0.5_JPRB*(B(IA+I)+B(IC+I))+B(IB+I)) + C(JF+J)=-(SIN60*(A(IA+I)-A(IC+I)))- & + & (0.5_JPRB*(B(IA+I)+B(IC+I))+B(IB+I)) + C(JC+J)=SIN60*(B(IC+I)-B(IA+I))+ & + & (0.5_JPRB*(A(IA+I)+A(IC+I))-A(IB+I)) + C(JE+J)=SIN60*(B(IC+I)-B(IA+I))- & + & (0.5_JPRB*(A(IA+I)+A(IC+I))-A(IB+I)) + I=I+INC3 + J=J+INC4 + 670 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 680 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + SSIN60=2.0_JPRB*SIN60 + IF (LIPL) THEN + DO 694 L=1,ILA + I=IBASE +!OCL NOVREC + DO 692 IJK=1,ILOT + T1=(2.0_JPRB*(A(IA+I)-A(ID+I))+(A(IB+I)-A(IC+I))) & + & -(SSIN60*(B(IB+I)+B(IC+I))) + T5=(2.0_JPRB*(A(IA+I)-A(ID+I))+(A(IB+I)-A(IC+I))) & + & +(SSIN60*(B(IB+I)+B(IC+I))) + T2=(2.0_JPRB*(A(IA+I)+A(ID+I))-(A(IB+I)+A(IC+I))) & + & -(SSIN60*(B(IB+I)-B(IC+I))) + T4=(2.0_JPRB*(A(IA+I)+A(ID+I))-(A(IB+I)+A(IC+I))) & + & +(SSIN60*(B(IB+I)-B(IC+I))) + T3=(2.0_JPRB*(A(IA+I)-A(ID+I)))-(2.0_JPRB*(A(IB+I)-A(IC+I))) + A(IA+I)=(2.0_JPRB*(A(IA+I)+A(ID+I)))+ & + & (2.0_JPRB*(A(IB+I)+A(IC+I))) + A(IB+I)=T1 + B(IB+I)=T2 + A(IC+I)=T3 + B(IC+I)=T4 + A(ID+I)=T5 + I=I+INC3 + 692 CONTINUE + IBASE=IBASE+INC1 + 694 CONTINUE + ELSE + DO 698 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 696 IJK=1,ILOT + C(JA+J)=(2.0_JPRB*(A(IA+I)+A(ID+I)))+ & + & (2.0_JPRB*(A(IB+I)+A(IC+I))) + C(JD+J)=(2.0_JPRB*(A(IA+I)-A(ID+I)))- & + & (2.0_JPRB*(A(IB+I)-A(IC+I))) + C(JB+J)=(2.0_JPRB*(A(IA+I)-A(ID+I))+(A(IB+I)-A(IC+I))) & + & -(SSIN60*(B(IB+I)+B(IC+I))) + C(JF+J)=(2.0_JPRB*(A(IA+I)-A(ID+I))+(A(IB+I)-A(IC+I))) & + & +(SSIN60*(B(IB+I)+B(IC+I))) + C(JC+J)=(2.0_JPRB*(A(IA+I)+A(ID+I))-(A(IB+I)+A(IC+I))) & + & -(SSIN60*(B(IB+I)-B(IC+I))) + C(JE+J)=(2.0_JPRB*(A(IA+I)+A(ID+I))-(A(IB+I)+A(IC+I))) & + & +(SSIN60*(B(IB+I)-B(IC+I))) + I=I+INC3 + J=J+INC4 + 696 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 698 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.8) THEN +! +! CODING FOR FACTOR 8 +! ------------------- + 800 CONTINUE + IF (LA.NE.M) THEN + IBAD=3 + ELSE + IA=1 + IB=IA+LA*INC1 + IC=IB+2*LA*INC1 + ID=IC+2*LA*INC1 + IE=ID+2*LA*INC1 + JA=1 + JB=JA+JINK + JC=JB+JINK + JD=JC+JINK + JE=JD+JINK + JF=JE+JINK + JG=JF+JINK + JH=JG+JINK + SSIN45=SQRT(2.0_JPRB) +! + IF (LIPL) THEN + DO 820 L=1,ILA + I=IBASE +!OCL NOVREC + DO 810 IJK=1,ILOT + T2=2.0_JPRB*(((A(IA+I)+A(IE+I))-A(IC+I))-(B(IB+I)-B(ID+I))) + T6=2.0_JPRB*(((A(IA+I)+A(IE+I))-A(IC+I))+(B(IB+I)-B(ID+I))) + T1=2.0_JPRB*((A(IA+I)-A(IE+I))-B(IC+I)) & + & +SSIN45*((A(IB+I)-A(ID+I))-(B(IB+I)+B(ID+I))) + T5=2.0_JPRB*((A(IA+I)-A(IE+I))-B(IC+I)) & + & -SSIN45*((A(IB+I)-A(ID+I))-(B(IB+I)+B(ID+I))) + T3=2.0_JPRB*((A(IA+I)-A(IE+I))+B(IC+I)) & + & -SSIN45*((A(IB+I)-A(ID+I))+(B(IB+I)+B(ID+I))) + T7=2.0_JPRB*((A(IA+I)-A(IE+I))+B(IC+I)) & + & +SSIN45*((A(IB+I)-A(ID+I))+(B(IB+I)+B(ID+I))) + T4=2.0_JPRB*(((A(IA+I)+A(IE+I))+A(IC+I))-(A(IB+I)+A(ID+I))) + A(IA+I)=2.0_JPRB*(((A(IA+I)+A(IE+I))+A(IC+I))+(A(IB+I)+A(ID+I))) + A(IB+I)=T1 + B(IB+I)=T2 + A(IC+I)=T3 + B(IC+I)=T4 + A(ID+I)=T5 + B(ID+I)=T6 + A(IE+I)=T7 + I=I+INC3 + 810 CONTINUE + IBASE=IBASE+INC1 + 820 CONTINUE + ELSE + DO 840 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 830 IJK=1,ILOT + C(JA+J)=2.0_JPRB*(((A(IA+I)+A(IE+I))+A(IC+I))+(A(IB+I)+A(ID+I))) + C(JE+J)=2.0_JPRB*(((A(IA+I)+A(IE+I))+A(IC+I))-(A(IB+I)+A(ID+I))) + C(JC+J)=2.0_JPRB*(((A(IA+I)+A(IE+I))-A(IC+I))-(B(IB+I)-B(ID+I))) + C(JG+J)=2.0_JPRB*(((A(IA+I)+A(IE+I))-A(IC+I))+(B(IB+I)-B(ID+I))) + C(JB+J)=2.0_JPRB*((A(IA+I)-A(IE+I))-B(IC+I)) & + & +SSIN45*((A(IB+I)-A(ID+I))-(B(IB+I)+B(ID+I))) + C(JF+J)=2.0_JPRB*((A(IA+I)-A(IE+I))-B(IC+I)) & + & -SSIN45*((A(IB+I)-A(ID+I))-(B(IB+I)+B(ID+I))) + C(JD+J)=2.0_JPRB*((A(IA+I)-A(IE+I))+B(IC+I)) & + & -SSIN45*((A(IB+I)-A(ID+I))+(B(IB+I)+B(ID+I))) + C(JH+J)=2.0_JPRB*((A(IA+I)-A(IE+I))+B(IC+I)) & + & +SSIN45*((A(IB+I)-A(ID+I))+(B(IB+I)+B(ID+I))) + I=I+INC3 + J=J+INC4 + 830 CONTINUE + IBASE=IBASE+INC1 + JBASE=JBASE+INC21 + 840 CONTINUE + ENDIF +! + ENDIF +! + ELSE +! + IBAD=2 !!! Illegal factor +! + ENDIF +! +! RETURN +! ------ + 900 CONTINUE + IERR=IBAD + ENDSUBROUTINE RPASSF + +! SUBROUTINE 'QPASSF' - PERFORMS ONE PASS THROUGH DATA AS PART +! OF MULTIPLE REAL FFT (FOURIER ANALYSIS) ROUTINE +! +! A IS FIRST REAL INPUT VECTOR +! EQUIVALENCE B(1) WITH A(IFAC*LA*INC1+1) +! C IS FIRST REAL OUTPUT VECTOR +! EQUIVALENCE D(1) WITH C(LA*INC2+1) +! TRIGS IS A PRECALCULATED LIST OF SINES & COSINES +! INC1 IS THE ADDRESSING INCREMENT FOR A +! INC2 IS THE ADDRESSING INCREMENT FOR C +! INC3 IS THE INCREMENT BETWEEN INPUT VECTORS A +! INC4 IS THE INCREMENT BETWEEN OUTPUT VECTORS C +! LOT IS THE NUMBER OF VECTORS +! N IS THE LENGTH OF THE VECTORS +! IFAC IS THE CURRENT FACTOR OF N +! LA = N/(PRODUCT OF FACTORS USED SO FAR) +! IERR IS AN ERROR INDICATOR: +! 0 - PASS COMPLETED WITHOUT ERROR +! 1 - LOT GREATER THAN 64 +! 2 - IFAC NOT CATERED FOR +! 3 - IFAC ONLY CATERED FOR IF LA=N/IFAC +! LIPL=.T. => RESULTS ARE RETURNED TO INPUT ARRAY +! (ONLY VALID IF LA=N/IFAC, I.E. ON FIRST PASS) +! +!----------------------------------------------------------------------- +! + SUBROUTINE QPASSF(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,LOT,N,IFAC, & + & LA,IERR,LIPL) +!AUTOPROMOTE + USE PARKIND1, ONLY : JPIM, JPRB + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +! + IMPLICIT NONE +! + INTEGER(KIND=JPIM) :: N + REAL(KIND=JPRB) :: A(*) + REAL(KIND=JPRB) :: B(*) + REAL(KIND=JPRB) :: C(*) + REAL(KIND=JPRB) :: D(*) + REAL(KIND=JPRB) :: TRIGS(N) + REAL(KIND=JPRB) :: A0,A1,A2,A3,A4,A5,A6,A10,A11,A20,A21 + REAL(KIND=JPRB) :: B0,B1,B2,B3,B4,B5,B6,B10,B11,B20,B21 + REAL(KIND=JPRB) :: C1,C2,C3,C4,C5 + REAL(KIND=JPRB) :: S1,S2,S3,S4,S5 + REAL(KIND=JPRB) :: T1,T2,T3,T4,T5,T6,T7 + REAL(KIND=JPRB) :: Z + REAL(KIND=JPRB) :: QRT5,SIN36,SIN45,SIN60,SIN72 + REAL(KIND=JPRB) :: ZQRT5,ZSIN36,ZSIN45,ZSIN60,ZSIN72 + INTEGER(KIND=JPIM) :: IERR + INTEGER(KIND=JPIM) :: INC1 + INTEGER(KIND=JPIM) :: INC2 + INTEGER(KIND=JPIM) :: INC3 + INTEGER(KIND=JPIM) :: INC4 + INTEGER(KIND=JPIM) :: LOT + INTEGER(KIND=JPIM) :: IFAC + INTEGER(KIND=JPIM) :: LA + INTEGER(KIND=JPIM) :: IINK,IJK,ILOT + INTEGER(KIND=JPIM) :: I,IA,IB,IBAD,IBASE,IC,ID,IE,IF,IG,IH + INTEGER(KIND=JPIM) :: IJUMP,ILA,INC11 + INTEGER(KIND=JPIM) :: J,JA,JB,JC,JD,JE,JBASE,JF,JINK + INTEGER(KIND=JPIM) :: K,KB,KC,KD,KE,KF,KSTOP + INTEGER(KIND=JPIM) :: L,M + LOGICAL :: LIPL +! + DATA SIN36/0.587785252292473_JPRB/,SIN72/0.951056516295154_JPRB/, & + & QRT5/0.559016994374947_JPRB/,SIN60/0.866025403784437_JPRB/ +! + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + M=N/IFAC + IINK=LA*INC1 + JINK=LA*INC2 + IJUMP=(IFAC-1)*IINK + KSTOP=(N-IFAC)/(2*IFAC) +! + IBASE=0 + JBASE=0 + IBAD=0 +! +! Increase the vector length by fusing the loops if the +! data layout is appropriate: + IF (INC1.EQ.LOT.AND.INC2.EQ.LOT.AND.INC3.EQ.1.AND.INC4.EQ.1) THEN + ILA=1 + ILOT=LA*LOT + INC11=LA*LOT + ELSE + ILA=LA + ILOT=LOT + INC11=INC1 + ENDIF + +! + IF (IFAC.EQ.2) THEN +! +! CODING FOR FACTOR 2 +! ------------------- + 200 CONTINUE + IA=1 + IB=IA+IINK + JA=1 + JB=JA+(2*M-LA)*INC2 +! + IF (LA.NE.M) THEN +! + DO 220 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 210 IJK=1,ILOT + C(JA+J)=A(IA+I)+A(IB+I) + C(JB+J)=A(IA+I)-A(IB+I) + I=I+INC3 + J=J+INC4 + 210 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 220 CONTINUE + JA=JA+JINK + JINK=2*JINK + JB=JB-JINK + IBASE=IBASE+IJUMP + IJUMP=2*IJUMP+IINK +! + IF (JA.LT.JB) THEN + DO 250 K=LA,KSTOP,LA + KB=K+K + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + JBASE=0 + DO 240 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 230 IJK=1,ILOT + C(JA+J)=A(IA+I)+(C1*A(IB+I)+S1*B(IB+I)) + C(JB+J)=A(IA+I)-(C1*A(IB+I)+S1*B(IB+I)) + D(JA+J)=(C1*B(IB+I)-S1*A(IB+I))+B(IA+I) + D(JB+J)=(C1*B(IB+I)-S1*A(IB+I))-B(IA+I) + I=I+INC3 + J=J+INC4 + 230 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 240 CONTINUE + IBASE=IBASE+IJUMP + JA=JA+JINK + JB=JB-JINK + 250 CONTINUE + ENDIF +! + IF (JA.EQ.JB) THEN + JBASE=0 + DO 280 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 270 IJK=1,ILOT + C(JA+J)=A(IA+I) + D(JA+J)=-A(IB+I) + I=I+INC3 + J=J+INC4 + 270 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 280 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + Z=1.0_JPRB/REAL(N,KIND=JPRB) + IF (LIPL) THEN + DO 294 L=1,ILA + I=IBASE +!OCL NOVREC +!DEC$ IVDEP + DO 292 IJK=1,ILOT + T1=Z*(A(IA+I)-A(IB+I)) + A(IA+I)=Z*(A(IA+I)+A(IB+I)) + A(IB+I)=T1 + I=I+INC3 + 292 CONTINUE + IBASE=IBASE+INC11 + 294 CONTINUE + ELSE + DO 298 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 296 IJK=1,ILOT + C(JA+J)=Z*(A(IA+I)+A(IB+I)) + C(JB+J)=Z*(A(IA+I)-A(IB+I)) + I=I+INC3 + J=J+INC4 + 296 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 298 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.3) THEN +! +! CODING FOR FACTOR 3 +! ------------------- + 300 CONTINUE + IA=1 + IB=IA+IINK + IC=IB+IINK + JA=1 + JB=JA+(2*M-LA)*INC2 + JC=JB +! + IF (LA.NE.M) THEN +! + DO 320 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 310 IJK=1,ILOT + C(JA+J)=A(IA+I)+(A(IB+I)+A(IC+I)) + C(JB+J)=A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I)) + D(JB+J)=SIN60*(A(IC+I)-A(IB+I)) + I=I+INC3 + J=J+INC4 + 310 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 320 CONTINUE + JA=JA+JINK + JINK=2*JINK + JB=JB+JINK + JC=JC-JINK + IBASE=IBASE+IJUMP + IJUMP=2*IJUMP+IINK +! + IF (JA.LT.JC) THEN + DO 350 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + JBASE=0 + DO 340 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 330 IJK=1,ILOT + A1=(C1*A(IB+I)+S1*B(IB+I))+(C2*A(IC+I)+S2*B(IC+I)) + B1=(C1*B(IB+I)-S1*A(IB+I))+(C2*B(IC+I)-S2*A(IC+I)) + A2=A(IA+I)-0.5_JPRB*A1 + B2=B(IA+I)-0.5_JPRB*B1 + A3=SIN60*((C1*A(IB+I)+S1*B(IB+I))-(C2*A(IC+I)+S2*B(IC+I))) + B3=SIN60*((C1*B(IB+I)-S1*A(IB+I))-(C2*B(IC+I)-S2*A(IC+I))) + C(JA+J)=A(IA+I)+A1 + D(JA+J)=B(IA+I)+B1 + C(JB+J)=A2+B3 + D(JB+J)=B2-A3 + C(JC+J)=A2-B3 + D(JC+J)=-(B2+A3) + I=I+INC3 + J=J+INC4 + 330 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 340 CONTINUE + IBASE=IBASE+IJUMP + JA=JA+JINK + JB=JB+JINK + JC=JC-JINK + 350 CONTINUE + ENDIF +! + IF (JA.EQ.JC) THEN + JBASE=0 + DO 380 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 370 IJK=1,ILOT + C(JA+J)=A(IA+I)+0.5_JPRB*(A(IB+I)-A(IC+I)) + D(JA+J)=-SIN60*(A(IB+I)+A(IC+I)) + C(JB+J)=A(IA+I)-(A(IB+I)-A(IC+I)) + I=I+INC3 + J=J+INC4 + 370 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 380 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + Z=1.0_JPRB/REAL(N,KIND=JPRB) + ZSIN60=Z*SIN60 + IF (LIPL) THEN + DO 394 L=1,ILA + I=IBASE +!OCL NOVREC +!DEC$ IVDEP + DO 392 IJK=1,ILOT + T1=Z*(A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I))) + T2=ZSIN60*(A(IC+I)-A(IB+I)) + A(IA+I)=Z*(A(IA+I)+(A(IB+I)+A(IC+I))) + A(IB+I)=T1 + A(IC+I)=T2 + I=I+INC3 + 392 CONTINUE + IBASE=IBASE+INC11 + 394 CONTINUE + ELSE + DO 398 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 396 IJK=1,ILOT + C(JA+J)=Z*(A(IA+I)+(A(IB+I)+A(IC+I))) + C(JB+J)=Z*(A(IA+I)-0.5_JPRB*(A(IB+I)+A(IC+I))) + D(JB+J)=ZSIN60*(A(IC+I)-A(IB+I)) + I=I+INC3 + J=J+INC4 + 396 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 398 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.4) THEN +! +! CODING FOR FACTOR 4 +! ------------------- + 400 CONTINUE + IA=1 + IB=IA+IINK + IC=IB+IINK + ID=IC+IINK + JA=1 + JB=JA+(2*M-LA)*INC2 + JC=JB+2*M*INC2 + JD=JB +! + IF (LA.NE.M) THEN +! + DO 420 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 410 IJK=1,ILOT + C(JA+J)=(A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I)) + C(JC+J)=(A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I)) + C(JB+J)=A(IA+I)-A(IC+I) + D(JB+J)=A(ID+I)-A(IB+I) + I=I+INC3 + J=J+INC4 + 410 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 420 CONTINUE + JA=JA+JINK + JINK=2*JINK + JB=JB+JINK + JC=JC-JINK + JD=JD-JINK + IBASE=IBASE+IJUMP + IJUMP=2*IJUMP+IINK +! + IF (JB.LT.JC) THEN + DO 450 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + KD=KC+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + C3=TRIGS(KD+1) + S3=TRIGS(KD+2) + JBASE=0 + DO 440 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 430 IJK=1,ILOT + A0=A(IA+I)+(C2*A(IC+I)+S2*B(IC+I)) + A2=A(IA+I)-(C2*A(IC+I)+S2*B(IC+I)) + A1=(C1*A(IB+I)+S1*B(IB+I))+(C3*A(ID+I)+S3*B(ID+I)) + A3=(C1*A(IB+I)+S1*B(IB+I))-(C3*A(ID+I)+S3*B(ID+I)) + B0=B(IA+I)+(C2*B(IC+I)-S2*A(IC+I)) + B2=B(IA+I)-(C2*B(IC+I)-S2*A(IC+I)) + B1=(C1*B(IB+I)-S1*A(IB+I))+(C3*B(ID+I)-S3*A(ID+I)) + B3=(C1*B(IB+I)-S1*A(IB+I))-(C3*B(ID+I)-S3*A(ID+I)) + C(JA+J)=A0+A1 + C(JC+J)=A0-A1 + D(JA+J)=B0+B1 + D(JC+J)=B1-B0 + C(JB+J)=A2+B3 + C(JD+J)=A2-B3 + D(JB+J)=B2-A3 + D(JD+J)=-(B2+A3) + I=I+INC3 + J=J+INC4 + 430 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 440 CONTINUE + IBASE=IBASE+IJUMP + JA=JA+JINK + JB=JB+JINK + JC=JC-JINK + JD=JD-JINK + 450 CONTINUE + ENDIF +! + IF (JB.EQ.JC) THEN + SIN45=SQRT(0.5_JPRB) + JBASE=0 + DO 480 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 470 IJK=1,ILOT + C(JA+J)=A(IA+I)+SIN45*(A(IB+I)-A(ID+I)) + C(JB+J)=A(IA+I)-SIN45*(A(IB+I)-A(ID+I)) + D(JA+J)=-A(IC+I)-SIN45*(A(IB+I)+A(ID+I)) + D(JB+J)=A(IC+I)-SIN45*(A(IB+I)+A(ID+I)) + I=I+INC3 + J=J+INC4 + 470 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 480 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + Z=1.0_JPRB/REAL(N,KIND=JPRB) + IF (LIPL) THEN + DO 494 L=1,ILA + I=IBASE +!OCL NOVREC +!DEC$ IVDEP + DO 492 IJK=1,ILOT + T1=Z*(A(IA+I)-A(IC+I)) + T3=Z*(A(ID+I)-A(IB+I)) + T2=Z*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) + A(IA+I)=Z*((A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I))) + A(IB+I)=T1 + A(IC+I)=T2 + A(ID+I)=T3 + I=I+INC3 + 492 CONTINUE + IBASE=IBASE+INC11 + 494 CONTINUE + ELSE + DO 498 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 496 IJK=1,ILOT + C(JA+J)=Z*((A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I))) + C(JC+J)=Z*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) + C(JB+J)=Z*(A(IA+I)-A(IC+I)) + D(JB+J)=Z*(A(ID+I)-A(IB+I)) + I=I+INC3 + J=J+INC4 + 496 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 498 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.5) THEN +! +! CODING FOR FACTOR 5 +! ------------------- + 500 CONTINUE + IA=1 + IB=IA+IINK + IC=IB+IINK + ID=IC+IINK + IE=ID+IINK + JA=1 + JB=JA+(2*M-LA)*INC2 + JC=JB+2*M*INC2 + JD=JC + JE=JB +! + IF (LA.NE.M) THEN +! + DO 520 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 510 IJK=1,ILOT + A1=A(IB+I)+A(IE+I) + A3=A(IB+I)-A(IE+I) + A2=A(IC+I)+A(ID+I) + A4=A(IC+I)-A(ID+I) + A5=A(IA+I)-0.25_JPRB*(A1+A2) + A6=QRT5*(A1-A2) + C(JA+J)=A(IA+I)+(A1+A2) + C(JB+J)=A5+A6 + C(JC+J)=A5-A6 + D(JB+J)=-SIN72*A3-SIN36*A4 + D(JC+J)=-SIN36*A3+SIN72*A4 + I=I+INC3 + J=J+INC4 + 510 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 520 CONTINUE + JA=JA+JINK + JINK=2*JINK + JB=JB+JINK + JC=JC+JINK + JD=JD-JINK + JE=JE-JINK + IBASE=IBASE+IJUMP + IJUMP=2*IJUMP+IINK +! + IF (JB.LT.JD) THEN + DO 550 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + KD=KC+KB + KE=KD+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + C3=TRIGS(KD+1) + S3=TRIGS(KD+2) + C4=TRIGS(KE+1) + S4=TRIGS(KE+2) + JBASE=0 + DO 540 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 530 IJK=1,ILOT + A1=(C1*A(IB+I)+S1*B(IB+I))+(C4*A(IE+I)+S4*B(IE+I)) + A3=(C1*A(IB+I)+S1*B(IB+I))-(C4*A(IE+I)+S4*B(IE+I)) + A2=(C2*A(IC+I)+S2*B(IC+I))+(C3*A(ID+I)+S3*B(ID+I)) + A4=(C2*A(IC+I)+S2*B(IC+I))-(C3*A(ID+I)+S3*B(ID+I)) + B1=(C1*B(IB+I)-S1*A(IB+I))+(C4*B(IE+I)-S4*A(IE+I)) + B3=(C1*B(IB+I)-S1*A(IB+I))-(C4*B(IE+I)-S4*A(IE+I)) + B2=(C2*B(IC+I)-S2*A(IC+I))+(C3*B(ID+I)-S3*A(ID+I)) + B4=(C2*B(IC+I)-S2*A(IC+I))-(C3*B(ID+I)-S3*A(ID+I)) + A5=A(IA+I)-0.25_JPRB*(A1+A2) + A6=QRT5*(A1-A2) + B5=B(IA+I)-0.25_JPRB*(B1+B2) + B6=QRT5*(B1-B2) + A10=A5+A6 + A20=A5-A6 + B10=B5+B6 + B20=B5-B6 + A11=SIN72*B3+SIN36*B4 + A21=SIN36*B3-SIN72*B4 + B11=SIN72*A3+SIN36*A4 + B21=SIN36*A3-SIN72*A4 + C(JA+J)=A(IA+I)+(A1+A2) + C(JB+J)=A10+A11 + C(JE+J)=A10-A11 + C(JC+J)=A20+A21 + C(JD+J)=A20-A21 + D(JA+J)=B(IA+I)+(B1+B2) + D(JB+J)=B10-B11 + D(JE+J)=-(B10+B11) + D(JC+J)=B20-B21 + D(JD+J)=-(B20+B21) + I=I+INC3 + J=J+INC4 + 530 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 540 CONTINUE + IBASE=IBASE+IJUMP + JA=JA+JINK + JB=JB+JINK + JC=JC+JINK + JD=JD-JINK + JE=JE-JINK + 550 CONTINUE + ENDIF +! + IF (JB.EQ.JD) THEN + JBASE=0 + DO 580 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 570 IJK=1,ILOT + A1=A(IB+I)+A(IE+I) + A3=A(IB+I)-A(IE+I) + A2=A(IC+I)+A(ID+I) + A4=A(IC+I)-A(ID+I) + A5=A(IA+I)+0.25_JPRB*(A3-A4) + A6=QRT5*(A3+A4) + C(JA+J)=A5+A6 + C(JB+J)=A5-A6 + C(JC+J)=A(IA+I)-(A3-A4) + D(JA+J)=-SIN36*A1-SIN72*A2 + D(JB+J)=-SIN72*A1+SIN36*A2 + I=I+INC3 + J=J+INC4 + 570 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 580 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + Z=1.0_JPRB/REAL(N,KIND=JPRB) + ZQRT5=Z*QRT5 + ZSIN36=Z*SIN36 + ZSIN72=Z*SIN72 + IF (LIPL) THEN + DO 594 L=1,ILA + I=IBASE +!OCL NOVREC +!DEC$ IVDEP + DO 592 IJK=1,ILOT + A1=A(IB+I)+A(IE+I) + A3=A(IB+I)-A(IE+I) + A2=A(IC+I)+A(ID+I) + A4=A(IC+I)-A(ID+I) + A5=Z*(A(IA+I)-0.25_JPRB*(A1+A2)) + A6=ZQRT5*(A1-A2) + A(IA+I)=Z*(A(IA+I)+(A1+A2)) + A(IB+I)=A5+A6 + A(ID+I)=A5-A6 + A(IC+I)=-ZSIN72*A3-ZSIN36*A4 + A(IE+I)=-ZSIN36*A3+ZSIN72*A4 + I=I+INC3 + 592 CONTINUE + IBASE=IBASE+INC11 + 594 CONTINUE + ELSE + DO 598 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 596 IJK=1,ILOT + A1=A(IB+I)+A(IE+I) + A3=A(IB+I)-A(IE+I) + A2=A(IC+I)+A(ID+I) + A4=A(IC+I)-A(ID+I) + A5=Z*(A(IA+I)-0.25_JPRB*(A1+A2)) + A6=ZQRT5*(A1-A2) + C(JA+J)=Z*(A(IA+I)+(A1+A2)) + C(JB+J)=A5+A6 + C(JC+J)=A5-A6 + D(JB+J)=-ZSIN72*A3-ZSIN36*A4 + D(JC+J)=-ZSIN36*A3+ZSIN72*A4 + I=I+INC3 + J=J+INC4 + 596 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 598 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.6) THEN +! +! CODING FOR FACTOR 6 +! ------------------- + 600 CONTINUE + IA=1 + IB=IA+IINK + IC=IB+IINK + ID=IC+IINK + IE=ID+IINK + IF=IE+IINK + JA=1 + JB=JA+(2*M-LA)*INC2 + JC=JB+2*M*INC2 + JD=JC+2*M*INC2 + JE=JC + JF=JB +! + IF (LA.NE.M) THEN +! + DO 620 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 610 IJK=1,ILOT + A11=(A(IC+I)+A(IF+I))+(A(IB+I)+A(IE+I)) + C(JA+J)=(A(IA+I)+A(ID+I))+A11 + C(JC+J)=(A(IA+I)+A(ID+I)-0.5_JPRB*A11) + D(JC+J)=SIN60*((A(IC+I)+A(IF+I))-(A(IB+I)+A(IE+I))) + A11=(A(IC+I)-A(IF+I))+(A(IE+I)-A(IB+I)) + C(JB+J)=(A(IA+I)-A(ID+I))-0.5_JPRB*A11 + D(JB+J)=SIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) + C(JD+J)=(A(IA+I)-A(ID+I))+A11 + I=I+INC3 + J=J+INC4 + 610 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 620 CONTINUE + JA=JA+JINK + JINK=2*JINK + JB=JB+JINK + JC=JC+JINK + JD=JD-JINK + JE=JE-JINK + JF=JF-JINK + IBASE=IBASE+IJUMP + IJUMP=2*IJUMP+IINK +! + IF (JC.LT.JD) THEN + DO 650 K=LA,KSTOP,LA + KB=K+K + KC=KB+KB + KD=KC+KB + KE=KD+KB + KF=KE+KB + C1=TRIGS(KB+1) + S1=TRIGS(KB+2) + C2=TRIGS(KC+1) + S2=TRIGS(KC+2) + C3=TRIGS(KD+1) + S3=TRIGS(KD+2) + C4=TRIGS(KE+1) + S4=TRIGS(KE+2) + C5=TRIGS(KF+1) + S5=TRIGS(KF+2) + JBASE=0 + DO 640 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 630 IJK=1,ILOT + A1=C1*A(IB+I)+S1*B(IB+I) + B1=C1*B(IB+I)-S1*A(IB+I) + A2=C2*A(IC+I)+S2*B(IC+I) + B2=C2*B(IC+I)-S2*A(IC+I) + A3=C3*A(ID+I)+S3*B(ID+I) + B3=C3*B(ID+I)-S3*A(ID+I) + A4=C4*A(IE+I)+S4*B(IE+I) + B4=C4*B(IE+I)-S4*A(IE+I) + A5=C5*A(IF+I)+S5*B(IF+I) + B5=C5*B(IF+I)-S5*A(IF+I) + A11=(A2+A5)+(A1+A4) + A20=(A(IA+I)+A3)-0.5_JPRB*A11 + A21=SIN60*((A2+A5)-(A1+A4)) + B11=(B2+B5)+(B1+B4) + B20=(B(IA+I)+B3)-0.5_JPRB*B11 + B21=SIN60*((B2+B5)-(B1+B4)) + C(JA+J)=(A(IA+I)+A3)+A11 + D(JA+J)=(B(IA+I)+B3)+B11 + C(JC+J)=A20-B21 + D(JC+J)=A21+B20 + C(JE+J)=A20+B21 + D(JE+J)=A21-B20 + A11=(A2-A5)+(A4-A1) + A20=(A(IA+I)-A3)-0.5_JPRB*A11 + A21=SIN60*((A4-A1)-(A2-A5)) + B11=(B5-B2)-(B4-B1) + B20=(B3-B(IA+I))-0.5_JPRB*B11 + B21=SIN60*((B5-B2)+(B4-B1)) + C(JB+J)=A20-B21 + D(JB+J)=A21-B20 + C(JD+J)=A11+(A(IA+I)-A3) + D(JD+J)=B11+(B3-B(IA+I)) + C(JF+J)=A20+B21 + D(JF+J)=A21+B20 + I=I+INC3 + J=J+INC4 + 630 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 640 CONTINUE + IBASE=IBASE+IJUMP + JA=JA+JINK + JB=JB+JINK + JC=JC+JINK + JD=JD-JINK + JE=JE-JINK + JF=JF-JINK + 650 CONTINUE + ENDIF +! + IF (JC.EQ.JD) THEN + JBASE=0 + DO 680 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 670 IJK=1,ILOT + C(JA+J)=(A(IA+I)+0.5_JPRB*(A(IC+I)-A(IE+I)))+ & + & SIN60*(A(IB+I)-A(IF+I)) + D(JA+J)=-(A(ID+I)+0.5_JPRB*(A(IB+I)+A(IF+I)))- & + & SIN60*(A(IC+I)+A(IE+I)) + C(JB+J)=A(IA+I)-(A(IC+I)-A(IE+I)) + D(JB+J)=A(ID+I)-(A(IB+I)+A(IF+I)) + C(JC+J)=(A(IA+I)+0.5_JPRB*(A(IC+I)-A(IE+I)))- & + & SIN60*(A(IB+I)-A(IF+I)) + D(JC+J)=-(A(ID+I)+0.5_JPRB*(A(IB+I)+ & + & A(IF+I)))+SIN60*(A(IC+I)+A(IE+I)) + I=I+INC3 + J=J+INC4 + 670 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 680 CONTINUE + ENDIF +! + ELSE !!! Case LA=M + Z=1.0_JPRB/REAL(N,KIND=JPRB) + ZSIN60=Z*SIN60 + IF (LIPL) THEN + DO 694 L=1,ILA + I=IBASE +!OCL NOVREC +!DEC$ IVDEP + DO 692 IJK=1,ILOT + A11=(A(IC+I)-A(IF+I))+(A(IE+I)-A(IB+I)) + T1=Z*((A(IA+I)-A(ID+I))-0.5_JPRB*A11) + T5=Z*((A(IA+I)-A(ID+I))+A11) + T2=ZSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) + T4=ZSIN60*((A(IC+I)+A(IF+I))-(A(IB+I)+A(IE+I))) + A11=(A(IC+I)+A(IF+I))+(A(IB+I)+A(IE+I)) + T3=Z*((A(IA+I)+A(ID+I))-0.5_JPRB*A11) + A(IA+I)=Z*((A(IA+I)+A(ID+I))+A11) + A(IB+I)=T1 + A(IC+I)=T2 + A(ID+I)=T3 + A(IE+I)=T4 + A(IF+I)=T5 + I=I+INC3 + 692 CONTINUE + IBASE=IBASE+INC11 + 694 CONTINUE + ELSE + DO 698 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 696 IJK=1,ILOT + A11=(A(IC+I)+A(IF+I))+(A(IB+I)+A(IE+I)) + C(JA+J)=Z*((A(IA+I)+A(ID+I))+A11) + C(JC+J)=Z*((A(IA+I)+A(ID+I))-0.5_JPRB*A11) + D(JC+J)=ZSIN60*((A(IC+I)+A(IF+I))-(A(IB+I)+A(IE+I))) + A11=(A(IC+I)-A(IF+I))+(A(IE+I)-A(IB+I)) + C(JB+J)=Z*((A(IA+I)-A(ID+I))-0.5_JPRB*A11) + D(JB+J)=ZSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) + C(JD+J)=Z*((A(IA+I)-A(ID+I))+A11) + I=I+INC3 + J=J+INC4 + 696 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 698 CONTINUE + ENDIF + ENDIF +! + ELSEIF (IFAC.EQ.8) THEN +! +! CODING FOR FACTOR 8 +! ------------------- + 800 CONTINUE + IF (LA.NE.M) THEN + IBAD=3 + ELSE + IA=1 + IB=IA+IINK + IC=IB+IINK + ID=IC+IINK + IE=ID+IINK + IF=IE+IINK + IG=IF+IINK + IH=IG+IINK + JA=1 + JB=JA+LA*INC2 + JC=JB+2*M*INC2 + JD=JC+2*M*INC2 + JE=JD+2*M*INC2 + Z=1.0_JPRB/REAL(N,KIND=JPRB) + ZSIN45=Z*SQRT(0.5_JPRB) +! + IF (LIPL) THEN + DO 820 L=1,ILA + I=IBASE +!OCL NOVREC +!DEC$ IVDEP + DO 810 IJK=1,ILOT + T3=Z*((A(IA+I)+A(IE+I))-(A(IC+I)+A(IG+I))) + T4=Z*((A(ID+I)+A(IH+I))-(A(IB+I)+A(IF+I))) + T1=Z*(A(IA+I)-A(IE+I)) & + & +ZSIN45*((A(IH+I)-A(ID+I))-(A(IF+I)-A(IB+I))) + T5=Z*(A(IA+I)-A(IE+I)) & + & -ZSIN45*((A(IH+I)-A(ID+I))-(A(IF+I)-A(IB+I))) + T2=ZSIN45*((A(IH+I)-A(ID+I))+(A(IF+I)-A(IB+I))) & + & +Z*(A(IG+I)-A(IC+I)) + T6=ZSIN45*((A(IH+I)-A(ID+I))+(A(IF+I)-A(IB+I))) & + & -Z*(A(IG+I)-A(IC+I)) + T7=Z*(((A(IA+I)+A(IE+I))+(A(IC+I)+A(IG+I)))- & + & ((A(ID+I)+A(IH+I))+(A(IB+I)+A(IF+I)))) + A(IA+I)=Z*(((A(IA+I)+A(IE+I))+(A(IC+I)+A(IG+I)))+ & + & ((A(ID+I)+A(IH+I))+(A(IB+I)+A(IF+I)))) + A(IB+I)=T1 + A(IC+I)=T2 + A(ID+I)=T3 + A(IE+I)=T4 + A(IF+I)=T5 + A(IG+I)=T6 + A(IH+I)=T7 + I=I+INC3 + 810 CONTINUE + IBASE=IBASE+INC11 + 820 CONTINUE + ELSE + DO 840 L=1,ILA + I=IBASE + J=JBASE +!OCL NOVREC +!DEC$ IVDEP + DO 830 IJK=1,ILOT + C(JA+J)=Z*(((A(IA+I)+A(IE+I))+(A(IC+I)+A(IG+I)))+ & + & ((A(ID+I)+A(IH+I))+(A(IB+I)+A(IF+I)))) + C(JE+J)=Z*(((A(IA+I)+A(IE+I))+(A(IC+I)+A(IG+I)))- & + & ((A(ID+I)+A(IH+I))+(A(IB+I)+A(IF+I)))) + C(JC+J)=Z*((A(IA+I)+A(IE+I))-(A(IC+I)+A(IG+I))) + D(JC+J)=Z*((A(ID+I)+A(IH+I))-(A(IB+I)+A(IF+I))) + C(JB+J)=Z*(A(IA+I)-A(IE+I)) & + & +ZSIN45*((A(IH+I)-A(ID+I))-(A(IF+I)-A(IB+I))) + C(JD+J)=Z*(A(IA+I)-A(IE+I)) & + & -ZSIN45*((A(IH+I)-A(ID+I))-(A(IF+I)-A(IB+I))) + D(JB+J)=ZSIN45*((A(IH+I)-A(ID+I))+(A(IF+I)-A(IB+I))) & + & +Z*(A(IG+I)-A(IC+I)) + D(JD+J)=ZSIN45*((A(IH+I)-A(ID+I))+(A(IF+I)-A(IB+I))) & + & -Z*(A(IG+I)-A(IC+I)) + I=I+INC3 + J=J+INC4 + 830 CONTINUE + IBASE=IBASE+INC11 + JBASE=JBASE+INC2 + 840 CONTINUE + ENDIF +! + ENDIF +! + ELSE +! + IBAD=2 !!! Illegal factor +! + ENDIF +! +! RETURN +! ------ + 900 CONTINUE + IERR=IBAD + ENDSUBROUTINE QPASSF + + ENDSUBROUTINE FFT992 +#endif diff --git a/src/trans/cpu/algor/fft992_cc.F90 b/src/trans/cpu/algor/fft992_cc.F90 new file mode 100644 index 0000000..85d213b --- /dev/null +++ b/src/trans/cpu/algor/fft992_cc.F90 @@ -0,0 +1,139 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE FFT992_CC (A, KINC, KJUMP, KN, KLOT, KSIGN) +! +! Perform complex transforms with FFT992 like interface +! +! For KSIGN=-1 (Real -> Complex) call after of FFT992 +! For KSIGN=1 (Complex -> Real) call before FFT992 +! +USE PARKIND1, ONLY : JPIM, JPRB +IMPLICIT NONE +REAL(KIND=JPRB),INTENT(INOUT):: A(*) +INTEGER(KIND=JPIM),INTENT(IN):: KINC,KJUMP,KN,KLOT,KSIGN +REAL(KIND=JPRB),ALLOCATABLE :: ZWORK(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWORK1(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWORK2(:,:) +REAL(KIND=JPRB) :: ZN +INTEGER(KIND=JPIM) :: NH, NLOTH, I1, INCD, JLOT, I2, I1P, I2P, J, I1N, I2N + +NH=KN/2 +NLOTH=KLOT/2 +I1=1 +INCD=KINC*2 +ZN=SQRT(REAL(KN,JPRB)) + + +IF( KSIGN==-1)THEN + + IF( KJUMP /= 1 )THEN + ALLOCATE(ZWORK(1:2*KLOT,0:2*KN)) + DO JLOT=1,NLOTH + I2=I1+1 + I1P=I1+KINC + I2P=I2+KINC + DO J=0,NH + ZWORK(1,J)=A(I1+INCD*J)-A(I2P+INCD*J) + ZWORK(2,J)=A(I2+INCD*J)+A(I1P+INCD*J) + ZWORK(1,KN-J)=A(I1+INCD*J)+A(I2P+INCD*J) + ZWORK(2,KN-J)=A(I2+INCD*J)-A(I1P+INCD*J) + ENDDO + ! M normalization requires sqrt(M) ?? + DO J=0,KN-1 + A(I1+KINC*J)=ZWORK(1,J)*ZN + A(I2+KINC*J)=ZWORK(2,J)*ZN + ENDDO + I1=I1+KJUMP*2 + ENDDO + DEALLOCATE(ZWORK) + ELSE + ALLOCATE(ZWORK1(1:NLOTH,0:KN)) + ALLOCATE(ZWORK2(1:NLOTH,0:KN)) + DO J=0,NH + DO JLOT=1,NLOTH + ZWORK1(JLOT, J)=A((JLOT-1)*2+1+INCD*J)-A((JLOT-1)*2+2+KINC+INCD*J) + ZWORK2(JLOT, J)=A((JLOT-1)*2+2+INCD*J)+A((JLOT-1)*2+1+KINC+INCD*J) + ZWORK1(JLOT,KN-J)=A((JLOT-1)*2+1+INCD*J)+A((JLOT-1)*2+2+KINC+INCD*J) + ZWORK2(JLOT,KN-J)=A((JLOT-1)*2+2+INCD*J)-A((JLOT-1)*2+1+KINC+INCD*J) + ENDDO + ENDDO + DO J=0,KN-1 + DO JLOT=1,NLOTH + A((JLOT-1)*2+1+J*KINC)=ZWORK1(JLOT,J)*ZN + ENDDO + DO JLOT=1,NLOTH + A((JLOT-1)*2+2+J*KINC)=ZWORK2(JLOT,J)*ZN + ENDDO + ENDDO + DEALLOCATE(ZWORK1,ZWORK2) + ENDIF + +ELSE + + IF( KJUMP /= 1 )THEN + ALLOCATE(ZWORK(1:2*KLOT,0:2*KN)) + DO JLOT=1,NLOTH + I2=I1+1 + I1N=I1+KN*KINC + I2N=I2+KN*KINC + DO J=1,NH-1 + ZWORK(1,2*J)=0.5D0*(A(I1+KINC*J)+A(I1N-KINC*J)) + ZWORK(2,2*J)=0.5D0*(A(I2+KINC*J)+A(I2N-KINC*J)) + ZWORK(1,2*J+1)=0.5D0*(A(I2+KINC*J)-A(I2N-KINC*J)) + ZWORK(2,2*J+1)=0.5D0*(A(I1N-KINC*J)-A(I1+KINC*J)) + ENDDO + ZWORK(1,0)=A(I1) + ZWORK(2,0)=A(I2) + ZWORK(1,1)=0.0D0 + ZWORK(2,1)=0.0D0 + ZWORK(1,KN)=A(I1N) + ZWORK(2,KN)=A(I2N) + ZWORK(1,KN+1)=0.0D0 + ZWORK(2,KN+1)=0.0D0 + DO J=0,KN+1 + A(I1+KINC*J)=ZWORK(1,J) + A(I2+KINC*J)=ZWORK(2,J) + ENDDO + I1=I1+KJUMP*2 + ENDDO + DEALLOCATE(ZWORK) + ELSE + ALLOCATE(ZWORK1(1:NLOTH,0:KN+1)) + ALLOCATE(ZWORK2(1:NLOTH,0:KN+1)) + DO J=1,NH-1 + DO JLOT=1,NLOTH + ZWORK1(JLOT,2*J )=0.5D0*(A((JLOT-1)*2+1+KINC*J)+A((JLOT-1)*2+1+(KN-J)*KINC)) + ZWORK2(JLOT,2*J )=0.5D0*(A((JLOT-1)*2+2+KINC*J)+A((JLOT-1)*2+2+(KN-J)*KINC)) + ZWORK1(JLOT,2*J+1)=0.5D0*(A((JLOT-1)*2+2+KINC*J)-A((JLOT-1)*2+2+(KN-J)*KINC)) + ZWORK2(JLOT,2*J+1)=0.5D0*(A((JLOT-1)*2+1+(KN-J)*KINC)-A((JLOT-1)*2+1+KINC*J)) + ZWORK1(JLOT,0 )=A((JLOT-1)*2+1) + ZWORK2(JLOT,0 )=A((JLOT-1)*2+2) + ZWORK1(JLOT,1 )=0.0D0 + ZWORK2(JLOT,1 )=0.0D0 + ZWORK1(JLOT,KN )=A((JLOT-1)*2+1+KN*KINC) + ZWORK2(JLOT,KN )=A((JLOT-1)*2+2+KN*KINC) + ZWORK1(JLOT,KN+1 )=0.0D0 + ZWORK2(JLOT,KN+1 )=0.0D0 + ENDDO + ENDDO + DO J=0,KN+1 + DO JLOT=1,NLOTH + A((JLOT-1)*2+1+KINC*J)=ZWORK1(JLOT,J) + A((JLOT-1)*2+2+KINC*J)=ZWORK2(JLOT,J) + ENDDO + ENDDO + DEALLOCATE(ZWORK1,ZWORK2) + ENDIF + +ENDIF + +RETURN +END SUBROUTINE FFT992_CC diff --git a/src/trans/cpu/algor/interpol_decomp_mod.F90 b/src/trans/cpu/algor/interpol_decomp_mod.F90 new file mode 100644 index 0000000..b057678 --- /dev/null +++ b/src/trans/cpu/algor/interpol_decomp_mod.F90 @@ -0,0 +1,246 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INTERPOL_DECOMP_MOD + +! Compute Interpolative Decomposions (ID) + +! See Cheng,H., Gimbutas,Z., Martinsson,P.G. and Rokhlin,V. (2005) +! "On the compression of low rank matrices", SIAM.J.Sci.Comput., +! Vol. 26, No. 4, pp1389-1404 + +! Also lecture notes "Mulilevel compression of Linear Operators: +! Descendents of Fast Multiple Methods and Calderon-Zygmund Theory" +! P.G.Martinsson and Mark Tygert, 2011. Chapter 7. + +! Author: Mats Hamrud + + +USE PARKIND1, ONLY : JPRB, JPIM, JPRD, JPIB +IMPLICIT NONE +CONTAINS +!=========================================================================== +SUBROUTINE COMPUTE_ID(PEPS,KM,KN,PMAT,KRANK,KBCLIST,PNONIM) +IMPLICIT NONE + +! Compute ID + +REAL(KIND=JPRD),INTENT(IN) :: PEPS ! Precision for computation + ! of numerical rank +INTEGER(KIND=JPIM),INTENT(IN) :: KM ! Number of rows in matrix pmat +INTEGER(KIND=JPIM),INTENT(IN) :: KN ! Number of columns in matrix pmat +REAL(KIND=JPRD) ,INTENT(IN) :: PMAT(:,:) ! Original matrix +INTEGER(KIND=JPIM),INTENT(OUT) :: KRANK ! Numerical rank +INTEGER(KIND=JPIM),INTENT(OUT) :: KBCLIST(:) ! List of columns +REAL(KIND=JPRD) ,INTENT(OUT) :: PNONIM(:,:) ! Non-identity part of projection + ! matrix + +INTEGER(KIND=JPIM) :: JM,JN +REAL(KIND=JPRD) :: ZR(KM,KN) +REAL(KIND=JPRD),ALLOCATABLE :: ZS(:,:),ZT(:,:) +!---------------------------------------------------------------------------- +!Avoid destroying input matrix +ZR(:,:) = PMAT(1:KM,1:KN) +! Householder QR +CALL ALG541(PEPS,KM,KN,ZR,KRANK,KBCLIST) + +DO JN=1,KN + DO JM=JN+1,KM + ZR(JM,JN) = 0.0_JPRD + ENDDO +ENDDO +! S leftmost kxk block of R +ALLOCATE(ZS(KRANK,KRANK)) +DO JN=1,KRANK + DO JM=1,KRANK + IF(JM <= KM ) THEN + ZS(JM,JN) = ZR(JM,JN) + ELSE + ZS(JM,JN) = 0.0_JPRD + ENDIF + ENDDO +ENDDO +! T Rightmost kx(k-n) block of R +ALLOCATE(ZT(KRANK,KN-KRANK)) +DO JN=1,KN-KRANK + DO JM=1,KRANK + IF(JM <= KM ) THEN + ZT(JM,JN) = ZR(JM,JN+KRANK) + ELSE + ZT(JM,JN) = 0.0_JPRD + ENDIF + ENDDO +ENDDO +!Solve linear equation (BLAS level 3 routine) +IF( KRANK <= 0 ) THEN + write(0,*) 'warning: KRANK DTRSM ', KRANK, KM, KN + CALL ABOR1('DTRSM : KRANK <=0 not allowed') +ENDIF + +! IF (JPRB == JPRD) THEN +CALL DTRSM('Left','Upper','No transpose','Non-unit',KRANK,KN-KRANK,1.0_JPRD, & + & ZS,KRANK,ZT,KRANK) +! ELSE +! CALL STRSM('Left','Upper','No transpose','Non-unit',KRANK,KN-KRANK,1.0_JPRD, & +! & ZS,KRANK,ZT,KRANK) +! ENDIF + +DO JM=1,KRANK + DO JN=1,KN-KRANK + PNONIM(JM,JN) = ZT(JM,JN) + ENDDO +ENDDO +DEALLOCATE(ZS,ZT) +!!$IF(KRANK < KN) THEN +!!$ PRINT *,'MAXVAL PNONIM ',KM,KM,KRANK,MAXVAL( PNONIM(1:KRANK,1:KN-KRANK)) +!!$ENDIF +END SUBROUTINE COMPUTE_ID + +!============================================================================== +SUBROUTINE ALG541(PEPS,KM,KN,PA,KRANK,KLIST) +IMPLICIT NONE + +! Householder QR with Column Pivoting +! Algorithm 5.4.1 from Matrix Computations, G.H.Golub & C.F van Loen, third ed. + +! Algorithm modified to terminate at numerical precision "peps" + +REAL(KIND=JPRD),INTENT(IN) :: PEPS ! Precision +INTEGER(KIND=JPIM),INTENT(IN) :: KM ! Number of rows in matrix pa +INTEGER(KIND=JPIM),INTENT(IN) :: KN ! Number of columns in matrix pa +REAL(KIND=JPRD),INTENT(INOUT) :: PA(:,:) ! On input : original matrix + ! on output : R in upper triangle etc + ! see Golub&Van Loen +INTEGER(KIND=JPIM),INTENT(OUT) :: KRANK ! Numerical rank of matrix +INTEGER(KIND=JPIM),INTENT(OUT) :: KLIST(:) ! List of columns (pivots) + +INTEGER(KIND=JPIM) :: JM,JN,ISWAP,JK,IK,IIK,IM,IN,IMIN,ILIST(KN) +REAL(KIND=JPRD) :: ZC(KN),ZTAU,ZSWAPA(KM),ZSWAP,ZV(KM),ZBETA,ZWORK(KN),ZTAU_IN +REAL(KIND=JPRD) :: ZTAU_REC,ZEPS +!------------------------------------------------------------------------------- +ZEPS = 10000.0_JPRD*EPSILON(ZEPS) +IMIN=MIN(KM,KN) +! Compute initial column norms,its max and the first column where c=tau +IK = 0 +ZTAU = 0._JPRD +DO JN=1,KN + ZC(JN) = DOT_PRODUCT(PA(1:KM,JN),PA(1:KM,JN)) + IF(ZC(JN) > ZTAU) THEN + IK = JN + ZTAU = ZC(JN) + ENDIF +ENDDO +ZTAU_IN = ZTAU +ZTAU_REC= ZTAU +KRANK = 0 +DO WHILE (ZTAU > PEPS**2*ZTAU_IN) + KRANK = KRANK+1 + IF( KRANK <= IMIN ) THEN + ILIST(KRANK) = IK + ! Column swap KRANK with IK + ZSWAPA(:) = PA(:,KRANK) + PA(:,KRANK) = PA(:,IK) + PA(:,IK) = ZSWAPA(:) + ZSWAP = ZC(KRANK) + ZC(KRANK) = ZC(IK) + ZC(IK) = ZSWAP + ! Compute Householder vector + ZBETA=0.0_JPRD + IF( KM-KRANK >= 0 ) THEN + CALL ALG511(ZEPS,KM-KRANK+1,PA(KRANK:KM,KRANK),ZV,ZBETA) + ENDIF + ! Apply Householder matrix + IM = KM-KRANK+1 + IN = KN-KRANK+1 + ! LAPACK + CALL DLARF('Left',IM,IN,ZV,1,ZBETA,PA(KRANK,KRANK),KM,ZWORK) + ENDIF + +! Update column norms + ZTAU = 0.0_JPRD + IF(KRANK < IMIN) THEN + PA(KRANK+1:KM,KRANK) = ZV(2:IM) + DO JN=KRANK+1,KN + ZC(JN) = ZC(JN)-PA(KRANK,JN)**2 + IF(ZC(JN) > ZTAU) THEN + IK = JN + ZTAU = ZC(JN) + ENDIF + ENDDO +! Re-compute column norms due to round-off error + IF(ZTAU < ZEPS*ZTAU_REC .OR. ZTAU < 0._JPRD .or. (KN-KRANK) > 100 ) THEN + DO JN=KRANK+1,KN + ZC(JN) = DOT_PRODUCT(PA(KRANK+1:,JN),PA(KRANK+1:,JN)) + IF(ZC(JN) > ZTAU) THEN + IK = JN + ZTAU = ZC(JN) + ENDIF + ENDDO + !write(0,*) 'RECOMPUTE TAU ',KRANK,ZTAU_REC,ZTAU + ZTAU_REC = ZTAU + ENDIF + ENDIF +ENDDO +! Make sure klist is filled also beyond krank +DO JN=1,KN + KLIST(JN) = JN +ENDDO +DO JN=1,KRANK + ISWAP = KLIST(JN) + KLIST(JN) = KLIST(ILIST(JN)) + KLIST(ILIST(JN)) = ISWAP +ENDDO + +END SUBROUTINE ALG541 + +!============================================================================== +SUBROUTINE ALG511(PEPS,KSIZE,PX,PV,PBETA) +IMPLICIT NONE +! Compute Householder vector +! Algorithm 5.1.1 from Matrix Computations, G.H.Golub & C.F van Loen, third ed. +REAL(KIND=JPRD),INTENT(IN) :: PEPS ! Precision +REAL(KIND=JPRD),INTENT(IN) :: PX(:) +INTEGER(KIND=JPIM), INTENT(IN) :: KSIZE +REAL(KIND=JPRD),INTENT(OUT) :: PV(:) +REAL(KIND=JPRD),INTENT(OUT) :: PBETA +INTEGER(KIND=JPIM) :: IL +REAL(KIND=JPRD) :: ZSIGMA,ZMU, ZNORM +REAL(KIND=JPRD) :: ZX(KSIZE) +!------------------------------------------------------------------------------- +! normalize +ZNORM=0._JPRD +DO IL=1,KSIZE + ZNORM = ZNORM + PX(IL)*PX(IL) +ENDDO +ZNORM=SQRT(ZNORM) +ZX(:)=PX(1:KSIZE) +IF( ZNORM > PEPS ) ZX(:)=PX(1:KSIZE)/ZNORM + +ZSIGMA=0._JPRD +IF( KSIZE > 1 ) ZSIGMA = DOT_PRODUCT(ZX(2:KSIZE),ZX(2:KSIZE)) +PV(1) = 1.0_JPRD +IF( KSIZE > 1 ) PV(2:KSIZE) = ZX(2:KSIZE) +IF(ABS(ZSIGMA) < PEPS**2) THEN + PBETA = 0.0_JPRD +ELSE + ZMU = SQRT(ZX(1)**2+ZSIGMA) + IF(ZX(1) <= 0.0_JPRD) THEN + PV(1) = ZX(1)-ZMU + ELSE + PV(1) = -ZSIGMA/(ZX(1)+ZMU) + ENDIF + PBETA = 2.0_JPRD*PV(1)**2/(ZSIGMA+PV(1)**2) + PV(:) = PV(:)/(PV(1)) +ENDIF + +END SUBROUTINE ALG511 +!================================================================================ + +END MODULE INTERPOL_DECOMP_MOD diff --git a/src/trans/cpu/algor/seefmm_mix.F90 b/src/trans/cpu/algor/seefmm_mix.F90 new file mode 100644 index 0000000..c2ec327 --- /dev/null +++ b/src/trans/cpu/algor/seefmm_mix.F90 @@ -0,0 +1,553 @@ +! (C) Copyright 2009- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +module seefmm_mix +!**** *SEEFMM_MIX* - Implementation of Simple Exponential Expansion FMM + +! Purpose. +! -------- +! Implementation of Simple Exponential Expansion FMM + +!** Interface. +! ---------- + +! Method. +! ------- +! Based on Algorithm described in Section 4 of the article +! "An improved fast multipole algorithm for potential fields on the line " + + +! Reference. +! ---------- +! "An improved fast multipole algorithm for potential fields on the line " +! by Norman Yarvin and Vladimir Rohklin, SIAM J. Numer. Anal. Vol. 36,No. 2,629-666. [1] +! +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2009-06-04 +! ------------------------------------------------------------------ + + +use parkind1,only : jpim ,jprb, jprd +use ecsort_mix, only : keysort +use wts500_mod, only: wts500 + +private + +integer(kind=jpim) :: nfmm_lim=200 ! Appr. break-even limit for FMM +integer(kind=jpim),parameter :: nquadEm14=28 ! Quadrature size for eps~=1.e-14 +integer(kind=jpim),parameter :: nquadEm10=20! Quadrature size for eps~=1.e-10 +integer(kind=jpim),parameter :: nquadEm07=14! Quadrature size for eps~=1.e-07 + +type fmm_type +integer(kind=jpim) :: nxy ! Total number of point "nx+ny" +integer(kind=jpim) :: nx ! Number of 'x' points +integer(kind=jpim) :: nquad ! Quadrature N +integer(kind=jpim) :: ncik ! Number of elem. in cik +real(kind=jprb) :: rw(56) ! Quadrature weights +real(kind=jprb) , pointer :: rdexp(:,:) ! exp(xy(i)-xy(i-1)) +integer(kind=jpim), pointer :: index(:) ! index for sorted xy +integer(kind=jpim), pointer :: nclose(:) ! No of "close" points +real(kind=jprb) , pointer :: cik(:) ! Correction term (142 in [1]) + +end type fmm_type + +public :: fmm_type, setup_seefmm, free_seefmm, seefmm_mulm + +contains +recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) + +implicit none + +!**** *SETUP_SEEFMM* - Setup seefmm + +! Purpose - Pre-computations for applying SEEFMM + +! Explicit arguments : +! -------------------- +! kx - Number of x points +! px - x points +! ky - Number of y points +! py - y points +! ydfmm - result of pre-computations +! pdiff - difference matrix (optional) + +integer(kind=jpim),intent(in) :: kx +real(kind=jprd) ,intent(in) :: px(:) +integer(kind=jpim),intent(in) :: ky +real(kind=jprb) ,intent(in) :: py(:) +type(fmm_type) ,intent(out) :: ydfmm +real(kind=jprb),optional,intent(in) :: pdiff(:,:) + +real(kind=jprb) :: zxy(kx+ky),zrt(56),zcik((kx+ky)*(kx+ky)) +real(kind=jprb) :: zr +integer(kind=jpim) :: ixy +!--------------------------------------------------------------------------- +ydfmm%nx=kx +ixy=kx+ky +ydfmm%nxy=ixy +allocate(ydfmm%index(ixy)) +!ydfmm%nquad=nquadEm14 !Set precicion to 1.E-14 +ydfmm%nquad=nquadEm07 !Set precicion to 1.E-07 +! Combine px and py to form xxy, compute ascending index for xxy +call comb_xy(kx,px,ky,py,ixy,zxy,ydfmm%index) +! Setup quadrature, scale (see 3.1.1 in [1]) +call suquad(ixy,zxy(ydfmm%index(1))-zxy(ydfmm%index(ixy)),& + & ydfmm%nquad,ydfmm%rw,zrt,zr) +allocate(ydfmm%rdexp(ydfmm%nquad,ixy)) +allocate(ydfmm%nclose(ixy)) +! Main pre-computation +call prepotf(kx,ixy,ydfmm%nquad,ydfmm%rw,zrt,zr,zxy,ydfmm%index,& + & ydfmm%rdexp,ydfmm%nclose,zcik,ydfmm%ncik,pdiff) +! Needed as size of cik unknown beforehand +allocate(ydfmm%cik(ydfmm%ncik)) +ydfmm%cik(:)=zcik(1:ydfmm%ncik) + +end subroutine setup_seefmm +!========================================================================== + +subroutine free_seefmm(ydfmm) +implicit none + +!**** *FREE_SEEFMM* - Release memory + +! Purpose - Release memory used by ydfmm + +! Explicit arguments : +! -------------------- +! ydfmm - result of pre-computations +type(fmm_type) ,intent(inout) :: ydfmm + +deallocate(ydfmm%index) +deallocate(ydfmm%rdexp) +deallocate(ydfmm%nclose) +deallocate(ydfmm%cik) + +end subroutine free_seefmm + +!========================================================================== +recursive subroutine potf(kn,kx,ldxout,kquad,prw,pq,prdexp,kindex,kclosel,kcik,pcik,ptheta) +implicit none + +integer(kind=jpim),intent(in) :: kn +integer(kind=jpim),intent(in) :: kx +logical ,intent(in) :: ldxout +integer(kind=jpim),intent(in) :: kquad +real(kind=jprb) ,intent(in) :: prw(:) +real(kind=jprb) ,intent(in) :: pq(:) +real(kind=jprb) ,intent(in) :: prdexp(:,:) +integer(kind=jpim),intent(in) :: kindex(:) +integer(kind=jpim),intent(in) :: kclosel(:) +integer(kind=jpim),intent(in) :: kcik +real(kind=jprb) ,intent(in) :: pcik(:) +real(kind=jprb) ,intent(out) :: ptheta(:) + +real(kind=jprb) :: zalpha(kquad),zq(kn),ztheta(kn) +integer(kind=jpim) :: j1,j2,jm,inumc,idist,iquad +integer(kind=jpim) :: iout,iq,i1,i1p1,i1pd,ik1,ix,iy +logical :: lxy,llxy(kn) + +lxy(ik1) = (ik1 <= kx .eqv. ldxout) +!------------------------------------------------------------------------- + +ztheta(:)=0.0_JPRB +if(ldxout) then + ix=0 + iy=-kx +else + ix=-kx + iy=0 +endif + +do j1=1,kn + i1=kindex(j1) + llxy(j1)=lxy(i1) + if(llxy(j1)) then + zq(j1)=pq(kindex(j1)+ix) + else + zq(j1)=0.0_jprb + endif +enddo + +zalpha(:)=zq(1) +do j1=2,kn + if(llxy(j1)) then + do jm=1,kquad + zalpha(jm)=zalpha(jm)*prdexp(jm,j1)+zq(j1) + enddo + else + do jm=1,kquad + zalpha(jm)=zalpha(jm)*prdexp(jm,j1) + ztheta(j1)=ztheta(j1)+prw(jm)*zalpha(jm) + enddo + endif +enddo + +zalpha(1:kquad)=zq(kn) +do j1=kn-1,1,-1 + if(llxy(j1)) then + do jm=1,kquad + zalpha(jm)=zalpha(jm)*prdexp(jm,j1+1)+zq(j1) + enddo + else + do jm=1,kquad + zalpha(jm)=zalpha(jm)*prdexp(jm,j1+1) + ztheta(j1)=ztheta(j1)-prw(jm)*zalpha(jm) + enddo + endif +enddo + + +IF(kcik > 0) then + inumc=0 + do j1=1,kn-1 + do j2=1,kclosel(j1) + idist=j2 + if(.not.llxy(j1) .and. llxy(j1+idist)) then + inumc=inumc+1 + ztheta(j1)=ztheta(j1)-pcik(inumc)*zq(j1+idist) + elseif(llxy(j1) .and. .not.llxy(j1+idist)) then + inumc=inumc+1 + ztheta(j1+idist)=ztheta(j1+idist)+pcik(inumc)*zq(j1) + endif + enddo + enddo +endif + +do j1=1,kn + if(.not. llxy(j1)) then + i1=kindex(j1) + ptheta(i1+iy)=ztheta(j1) + endif +enddo + +end subroutine potf +!========================================================================== +recursive subroutine seefmm_mulv(ydfmm,ldxout,pq,ptheta) +implicit none + +type(fmm_type) ,intent(in) :: ydfmm +logical ,intent(in) :: ldxout +real(kind=jprb) ,intent(in) :: pq(:) +real(kind=jprb) ,intent(out) :: ptheta(:) + +!------------------------------------------------------------------------- +call potf(ydfmm%nxy,ydfmm%nx,ldxout,ydfmm%nquad,& + & ydfmm%rw,pq,ydfmm%rdexp,ydfmm%index,& + & ydfmm%nclose,ydfmm%ncik,ydfmm%cik,ptheta) + +end subroutine seefmm_mulv +!========================================================================== +recursive subroutine seefmm_mulm(ydfmm,km,kskip,ldxout,pq,ptheta) +implicit none + +type(fmm_type) ,intent(in) :: ydfmm +integer(kind=jpim),intent(in) :: km +integer(kind=jpim),intent(in) :: kskip +logical ,intent(in) :: ldxout +real(kind=jprb) ,intent(in) :: pq(:,:) +real(kind=jprb) ,intent(out) :: ptheta(:,:) + +!------------------------------------------------------------------------- +call potfm(ydfmm%nxy,km,kskip,ydfmm%nx,ldxout,ydfmm%nquad,& + & ydfmm%rw,pq,ydfmm%rdexp,ydfmm%index,& + & ydfmm%nclose,ydfmm%ncik,ydfmm%cik,ptheta) +end subroutine seefmm_mulm +!========================================================================== + +recursive subroutine potfm(kn,km,kskip,kx,ldxout,kquad,prw,pq,prdexp,kindex,kclosel,kcik,pcik,ptheta) +implicit none + +integer(kind=jpim),intent(in) :: kn +integer(kind=jpim),intent(in) :: km +integer(kind=jpim),intent(in) :: kskip +integer(kind=jpim),intent(in) :: kx +logical ,intent(in) :: ldxout +integer(kind=jpim),intent(in) :: kquad +real(kind=jprb) ,intent(in) :: prw(:) +real(kind=jprb) ,intent(in) :: pq(:,:) +real(kind=jprb) ,intent(in) :: prdexp(:,:) +integer(kind=jpim),intent(in) :: kindex(:) +integer(kind=jpim),intent(in) :: kclosel(:) +integer(kind=jpim),intent(in) :: kcik +real(kind=jprb) ,intent(in) :: pcik(:) +real(kind=jprb) ,intent(out) :: ptheta(:,:) + +real(kind=jprb) :: zalpha(kquad,km) +integer(kind=jpim) :: j1,j2,jm,jq,inumc,idist,iquad +integer(kind=jpim) :: iout,iq,i1,i1p1,i1pd,ik1,ix,iy +logical :: lxy,llxy(kn) + +lxy(ik1) = (ik1 <= kx .eqv. ldxout) +!------------------------------------------------------------------------- + +!CALL GSTATS(209,0) +ptheta(:,:)=0.0_JPRB +if(ldxout) then + ix=0 + iy=-kx +else + ix=-kx + iy=0 +endif +do j1=1,kn + i1=kindex(j1) + llxy(j1)=lxy(i1) +enddo + +if(llxy(1)) then + do jm=1,km,kskip + zalpha(:,jm)=pq(jm,kindex(1)+ix) + enddo +else + zalpha(:,:)=0.0_jprb +endif +!CALL GSTATS(209,1) +!CALL GSTATS(210,0) +do j1=2,kn + i1=kindex(j1) + if(llxy(j1) ) then + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + endif + else + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)+prw(jq)*zalpha(jq,jm) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)+prw(jq)*zalpha(jq,jm) + enddo + enddo + endif + endif +enddo +!CALL GSTATS(210,1) + +!CALL GSTATS(211,0) +if(llxy(kn)) then + do jm=1,km,kskip + zalpha(:,jm)=pq(jm,kindex(kn)+ix) + enddo +else + zalpha(:,:)=0.0 +endif +!CALL GSTATS(211,1) +!CALL GSTATS(212,0) +do j1=kn-1,1,-1 + i1=kindex(j1) + i1p1=kindex(j1+1) + if(llxy(j1)) then + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + endif + else + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)-prw(jq)*zalpha(jq,jm) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)-prw(jq)*zalpha(jq,jm) + enddo + enddo + endif + endif +enddo +!CALL GSTATS(212,1) + + +IF(kcik > 0) then +! CALL GSTATS(213,0) + inumc=0 + do j1=1,kn-1 + do j2=1,kclosel(j1) + idist=j2 + i1=kindex(j1) + i1pd=kindex(j1+idist) + if(.not.llxy(j1) .and. llxy(j1+idist)) then + inumc=inumc+1 + do jm=1,km,kskip + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)-pcik(inumc)*pq(jm,i1pd+ix) + enddo + elseif(llxy(j1) .and. .not.llxy(j1+idist)) then + inumc=inumc+1 + do jm=1,km,kskip + ptheta(jm,i1pd+iy)=ptheta(jm,i1pd+iy)+pcik(inumc)*pq(jm,i1+ix) + enddo + endif + enddo + enddo +! CALL GSTATS(213,1) +endif + +end subroutine potfm +!========================================================================= +recursive subroutine suquad(kn,prange,kquad,prw,prt,pr) +implicit none + +integer(kind=jpim) ,intent(in) :: kn +real(kind=jprb),intent(in) :: prange +integer(kind=jpim) ,intent(in) :: kquad +real(kind=jprb),intent(out) :: prw(:) +real(kind=jprb),intent(out) :: prt(:) +real(kind=jprb),intent(out) :: pr + +real(kind=jprb) :: za,zb,zs +integer(kind=jpim) :: jm +!------------------------------------------------------------------------- + +za=1.0 +zb=500.0 +zs=zb/prange +pr=za/zs +call wts500(prt,prw,kquad) +do jm=1,kquad + prw(jm)=prw(jm)*zs + prt(jm)=prt(jm)*zs +enddo +end subroutine suquad +!========================================================================== + +recursive subroutine comb_xy(kx,px,ky,py,kxy,pxy,kindex) + +implicit none + +integer(kind=jpim), intent(in) :: kx,ky +real(kind=jprd), intent(in) :: px(:) +real(kind=jprb), intent(in) :: py(:) +integer(kind=jpim), intent(in) :: kxy +real(kind=jprb), intent(out) :: pxy(:) +integer(kind=jpim), intent(out) :: kindex(:) +integer(kind=jpim) :: jxy,ix,iy,iret + +!------------------------------------------------------------------------- + +pxy(1:kx)=px(1:kx) +pxy(kx+1:kx+ky)=py(1:ky) +!call m01daf(pxy,1,kxy,'D',irank,ifail) +call keysort(iret,pxy,kxy,descending=.true.,index=kindex,init=.true.) +!!$do jxy=1,kxy +!!$ kindex(irank(jxy))=jxy +!!$enddo + +end subroutine comb_xy +!========================================================================== +recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& + & kclosel,pcik,knocik,pdiff) + +implicit none + +integer(kind=jpim), intent(in) :: kx +integer(kind=jpim), intent(in) :: kxy +integer(kind=jpim), intent(in) :: kquad +real(kind=jprb), intent(in) :: pxy(:) +real(kind=jprb), intent(in) :: prw(:) +real(kind=jprb), intent(in) :: pr +real(kind=jprb), intent(in) :: prt(:) +integer(kind=jpim), intent(in) :: kindex(:) +real(kind=jprb), intent(out) :: prdexp(:,:) +integer(kind=jpim), intent(out) :: kclosel(:) +real(kind=jprb), intent(out) :: pcik(:) +integer(kind=jpim), intent(out) :: knocik +real(kind=jprb),optional, intent(in) :: pdiff(:,:) + +real(kind=jprb) :: zdx +real(kind=jprb) :: zsum +real(kind=jprb) :: zdiff(kxy,kxy) +integer(kind=jpim) :: jxy,jq,isize,jdist,ixy,ixym1,i1,i1pd,j1,j2 +logical :: llexit +!------------------------------------------------------------------------- +if(present(pdiff)) then + zdiff(:,:)=pdiff(:,:) +else + do j1=1,kxy + do j2=1,kxy + zdiff(j1,j2)=pxy(j1)-pxy(j2) + enddo + enddo +endif +do jxy=2,kxy + ixy=kindex(jxy) + ixym1=kindex(jxy-1) + do jq=1,kquad + prdexp(jq,jxy)=exp(zdiff(ixy,ixym1)*prt(jq)) + enddo +enddo +kclosel(:)=0 +knocik=0 +isize=size(pcik) +llexit=.true. +do jxy=1,kxy-1 + do jdist=1,kxy-jxy + i1=kindex(jxy) + i1pd=kindex(jxy+jdist) + zdx=zdiff(i1,i1pd) + if(zdx < pr) then + llexit=.false. + kclosel(jxy)=kclosel(jxy)+1 + if((i1 > kx .and. i1pd <= kx) .or. (i1pd > kx .and. i1 <= kx)) then + knocik=knocik+1 + zsum=0.0_jprb + do jq=1,kquad + zsum=zsum+prw(jq)*exp(-zdx*prt(jq)) + enddo + pcik(knocik)=1.0_jprb/zdx-zsum + endif + else + exit + endif + enddo + if(knocik > isize) stop ' precompfint : pcik tto small' +enddo + +end subroutine prepotf +!========================================================================== + +end module seefmm_mix diff --git a/src/trans/cpu/algor/set99.F90 b/src/trans/cpu/algor/set99.F90 new file mode 100644 index 0000000..0b25af5 --- /dev/null +++ b/src/trans/cpu/algor/set99.F90 @@ -0,0 +1,82 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + SUBROUTINE SET99(TRIGS,IFAX,N) +!AUTOPROMOTE + USE PARKIND1, ONLY : JPIM, JPRB + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +! + IMPLICIT NONE +! + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: I,IFAC,IL,IXXX,K,BFAX,NHL,NIL,NFAX,NU + REAL(KIND=JPRB) :: ANGLE,DEL + REAL(KIND=JPRB) :: TRIGS(N) + INTEGER(KIND=JPIM) :: IFAX(*) + INTEGER(KIND=JPIM) :: JFAX(10),NLFAX(7) +! +! SUBROUTINE 'SET99' - COMPUTES FACTORS OF N & TRIGONOMETRIC +! FUNCTIONS REQUIRED BY FFT99 & FFT991 +! + SAVE NLFAX +! + DATA NLFAX/6,8,5,4,3,2,1/ +! + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + IF (LHOOK) CALL DR_HOOK('SET99',0,ZHOOK_HANDLE) + IXXX=1 +! + DEL=4.0E0_JPRB*ASIN(1.0E0_JPRB)/REAL(N,KIND=JPRB) + NIL=0 + NHL=(N/2)-1 + DO 10 K=NIL,NHL + ANGLE=REAL(K,KIND=JPRB)*DEL + TRIGS(2*K+1)=COS(ANGLE) + TRIGS(2*K+2)=SIN(ANGLE) + 10 CONTINUE +! +! FIND FACTORS OF N (8,6,5,4,3,2; ONLY ONE 8 ALLOWED) +! LOOK FOR SIXES FIRST, STORE FACTORS IN DESCENDING ORDER + NU=N + IFAC=6 + K=0 + IL=1 + 20 CONTINUE + IF (MOD(NU,IFAC).NE.0) GO TO 30 + K=K+1 + JFAX(K)=IFAC + IF (IFAC.NE.8) GO TO 25 + IF (K.EQ.1) GO TO 25 + JFAX(1)=8 + JFAX(K)=6 + 25 CONTINUE + NU=NU/IFAC + IF (NU.EQ.1) GO TO 50 + IF (IFAC.NE.8) GO TO 20 + 30 CONTINUE + IL=IL+1 + IFAC=NLFAX(IL) + IF (IFAC.GT.1) GO TO 20 +! + WRITE(6,40) N + 40 FORMAT(4H1N =,I4,27H - CONTAINS ILLEGAL FACTORS) + IF (LHOOK) CALL DR_HOOK('SET99',1,ZHOOK_HANDLE) + RETURN +! +! NOW REVERSE ORDER OF FACTORS + 50 CONTINUE + NFAX=K + IFAX(1)=NFAX + DO 60 I=1,NFAX + IFAX(NFAX+2-I)=JFAX(I) + 60 CONTINUE + IFAX(10)=N + IF (LHOOK) CALL DR_HOOK('SET99',1,ZHOOK_HANDLE) + ENDSUBROUTINE SET99 diff --git a/src/trans/cpu/algor/set99b.F90 b/src/trans/cpu/algor/set99b.F90 new file mode 100644 index 0000000..a91ab3d --- /dev/null +++ b/src/trans/cpu/algor/set99b.F90 @@ -0,0 +1,81 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + SUBROUTINE SET99B(TRIGS,IFAX,N,LDUSEFFT992) +!AUTOPROMOTE + USE PARKIND1, ONLY : JPIM, JPRB +! + IMPLICIT NONE +! + INTEGER(KIND=JPIM),INTENT(IN) :: N + REAL(KIND=JPRB),INTENT(OUT) :: TRIGS(N) + INTEGER(KIND=JPIM),INTENT(OUT) :: IFAX(*) + LOGICAL,INTENT(OUT) :: LDUSEFFT992 + + INTEGER(KIND=JPIM) :: I,IFAC,IL,IXXX,K,BFAX,NHL,NIL,NFAX,NU + REAL(KIND=JPRB) :: ANGLE,DEL + INTEGER(KIND=JPIM) :: JFAX(10),NLFAX(7) +! +! SUBROUTINE 'SET99B' - COMPUTES FACTORS OF N & TRIGONOMETRIC +! FUNCTIONS REQUIRED BY FFT992. +! BASED ON SET99, SET99B ALSO RETURNS VIA LUSEFFT992 WHETHER +! FACTORS HAVE BEEN FOUND THAT CAN PERMIT (OR NOT) FFT992 TO BE USED. +! + SAVE NLFAX +! + DATA NLFAX/6,8,5,4,3,2,1/ +! + IXXX=1 +! + DEL=4.0E0_JPRB*ASIN(1.0E0_JPRB)/REAL(N,KIND=JPRB) + NIL=0 + NHL=(N/2)-1 + DO 10 K=NIL,NHL + ANGLE=REAL(K,KIND=JPRB)*DEL + TRIGS(2*K+1)=COS(ANGLE) + TRIGS(2*K+2)=SIN(ANGLE) + 10 CONTINUE +! +! FIND FACTORS OF N (8,6,5,4,3,2; ONLY ONE 8 ALLOWED) +! LOOK FOR SIXES FIRST, STORE FACTORS IN DESCENDING ORDER + NU=N + IFAC=6 + K=0 + IL=1 + 20 CONTINUE + IF (MOD(NU,IFAC).NE.0) GO TO 30 + K=K+1 + JFAX(K)=IFAC + IF (IFAC.NE.8) GO TO 25 + IF (K.EQ.1) GO TO 25 + JFAX(1)=8 + JFAX(K)=6 + 25 CONTINUE + NU=NU/IFAC + IF (NU.EQ.1) GO TO 50 + IF (IFAC.NE.8) GO TO 20 + 30 CONTINUE + IL=IL+1 + IFAC=NLFAX(IL) + IF (IFAC.GT.1) GO TO 20 +! + LDUSEFFT992=.FALSE. + RETURN +! +! NOW REVERSE ORDER OF FACTORS + 50 CONTINUE + NFAX=K + IFAX(1)=NFAX + DO 60 I=1,NFAX + IFAX(NFAX+2-I)=JFAX(I) + 60 CONTINUE + IFAX(10)=N + LDUSEFFT992=.TRUE. + END SUBROUTINE SET99B diff --git a/src/trans/cpu/algor/wts500_mod.F90 b/src/trans/cpu/algor/wts500_mod.F90 new file mode 100644 index 0000000..af9bb7e --- /dev/null +++ b/src/trans/cpu/algor/wts500_mod.F90 @@ -0,0 +1,3765 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE WTS500_MOD +CONTAINS +SUBROUTINE WTS500(PX,PW,KN) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KN +REAL(KIND=JPRB), INTENT(OUT) :: PX(:),PW(:) + +! This routine returns a set of Gaussian nodes and weights for +! integrating the functions exp(lambda*x)dx over the range x=0 to x=infinity. +! They work for lambda in the range [1,501]. The accuracy +! of the quadrature for each n is given in the tables below. + +! Input arguments: +! n - number of weights and nodes in the quadrature. This must +! be an integer in the range [2,56]. +! +! Output arguments: +! w - weights +! x - nodes +! +! +! The following table gives the approximate accuracy of the weights in +! this file, that is to say the experimentally determined maximum +! absolute error for lambda in the range [1,501]. +! +! 2 0.76126E-01 +! 3 0.26903E-01 +! 4 0.88758E-02 +! 5 0.28110E-02 +! 6 0.86785E-03 +! 7 0.26276E-03 +! 8 0.78346E-04 +! 9 0.23066E-04 +! 10 0.67184E-05 +! 11 0.19386E-05 +! 12 0.55482E-06 +! 13 0.15762E-06 +! 14 0.44478E-07 +! 15 0.12474E-07 +! 16 0.34787E-08 +! 17 0.96498E-09 +! 18 0.26636E-09 +! 19 0.73174E-10 +! 20 0.20013E-10 +! 21 0.54503E-11 +! 22 0.14783E-11 +! 23 0.39937E-12 +! 24 0.10749E-12 +! 25 0.28822E-13 +! 26 0.77011E-14 +! 27 0.20993E-14 +! 28 0.59593E-15 + +! (The accuracies beyond this point are +! only available if this routine is converted +! to extended precision.) +! +! 29 0.16665E-15 +! 30 0.45938E-16 +! 31 0.12483E-16 +! 32 0.33436E-17 +! 33 0.88209E-18 +! 34 0.22896E-18 +! 35 0.58363E-19 +! 36 0.15182E-19 +! 37 0.45892E-20 +! 38 0.13452E-20 +! 39 0.38384E-21 +! 40 0.10683E-21 +! 41 0.29025E-22 +! 42 0.76955E-23 +! 43 0.19878E-23 +! 44 0.49867E-24 +! 45 0.12879E-24 +! 46 0.38890E-25 +! 47 0.11493E-25 +! 48 0.32717E-26 +! 49 0.89977E-27 +! 50 0.23916E-27 +! 51 0.66534E-28 +! 52 0.20256E-28 +! 53 0.60754E-29 +! 54 0.17974E-29 +! 55 0.52173E-30 +! 56 0.14656E-30 +! 57 0.39867E-31 +! 58 0.17622E-31 +! 59 0.11941E-31 + + +if(kn < 1 .or. kn > 59) CALL ABOR1('kn out of bounds in wts500') + +if(kn == 1) then + px( 1)= 0.30029234138173323099658823269124393D+00 + pw( 1)= 0.10474544159373900054024730385996879D+01 +endif +if(kn == 2) then + px( 1)= 0.44614645646035084305052271657195780D-01 + px( 2)= 0.69921614559509068409005059560416520D+00 + pw( 1)= 0.15994862626671497398269903651565162D+00 + pw( 2)= 0.15511944041990193294522419186360192D+01 +endif +if(kn == 3) then + px( 1)= 0.11857389353662594950547350532174902D-01 + px( 2)= 0.16764835416208964726306668598724940D+00 + px( 3)= 0.11277491807394385305149473243132366D+01 + pw( 1)= 0.40534466810113107834140226328650886D-01 + pw( 2)= 0.36261372044374320167772351965467234D+00 + pw( 3)= 0.19347454032003660753035080254268649D+01 +endif +if(kn == 4) then + px( 1)= 0.57654208655188821571537226422468374D-02 + px( 2)= 0.62523167781181198280172417136126064D-01 + px( 3)= 0.36533207087496350173593536437526512D+00 + px( 4)= 0.16157524591822212021884702173778156D+01 + pw( 1)= 0.17868545000488806425866630797084082D-01 + pw( 2)= 0.12328668633600752343455555544669733D+00 + pw( 3)= 0.58372673164630844216911861958288950D+00 + pw( 4)= 0.22766170591050845697618451516268360D+01 +endif +if(kn == 5) then + px( 1)= 0.38286655100887720869535305553503767D-02 + px( 2)= 0.32638654131646742439488396483219523D-01 + px( 3)= 0.15979343932440245421190120326583297D+00 + px( 4)= 0.62593598592475461862138219614406608D+00 + px( 5)= 0.21448850159686373839369069493121931D+01 + pw( 1)= 0.11006282598632254556602552524231582D-01 + pw( 2)= 0.57230982733825644334586234468135637D-01 + pw( 3)= 0.23302193704059872350599634911774233D+00 + pw( 4)= 0.80399187922220639155300759119683454D+00 + pw( 5)= 0.25817835060095957471682462945418890D+01 +endif +if(kn == 6) then + px( 1)= 0.29149725976167556773168063131622477D-02 + px( 2)= 0.21103262424016434877304140537976324D-01 + px( 3)= 0.87672665103786085283260733993906787D-01 + px( 4)= 0.30362191434277672991331660483079722D+00 + px( 5)= 0.93772122523975475283401768866415608D+00 + px( 6)= 0.27034204049910086432949276908848690D+01 + pw( 1)= 0.80264082196721958922253358145693741D-02 + pw( 2)= 0.33206285080799577093824435206894388D-01 + pw( 3)= 0.11527314673621704717905199327206330D+00 + pw( 4)= 0.35781407343251622864391524621893261D+00 + pw( 5)= 0.10155006971456147012687855907386742D+01 + pw( 6)= 0.28564602273790248848620772701419901D+01 +endif +if(kn == 7) then + px( 1)= 0.23756434216797693908355583003051663D-02 + px( 2)= 0.15535118461800569190547935165674497D-01 + px( 3)= 0.56551273947723240042834250291153011D-01 + px( 4)= 0.17450996559452745580564075650691432D+00 + px( 5)= 0.49112500667928528178358415821262749D+00 + px( 6)= 0.12919504684055881399873474932878924D+01 + px( 7)= 0.32851412356973227675206901468229149D+01 + pw( 1)= 0.63886696350675163666711135442919764D-02 + pw( 2)= 0.22442242541224196499797804610442189D-01 + pw( 3)= 0.67162457789171790971598253749543852D-01 + pw( 4)= 0.18766490984256656291382113580254403D+00 + pw( 5)= 0.48993342911988760795594155472540210D+00 + pw( 6)= 0.12160105557427987424854903052204337D+01 + pw( 7)= 0.31065668529411483170389922159304207D+01 +endif +if(kn == 8) then + px( 1)= 0.20132180607225834852983700686031058D-02 + px( 2)= 0.12364145427770238507453563351438370D-01 + px( 3)= 0.40779487004322342225816753965428875D-01 + px( 4)= 0.11391148589791378330792648499651137D+00 + px( 5)= 0.29412883145944878287611616309747638D+00 + px( 6)= 0.71837098644914450185415022533227351D+00 + px( 7)= 0.16816853747148161279323645594606599D+01 + px( 8)= 0.38855951611162649511701947653116899D+01 + pw( 1)= 0.53408830450397378295721475271713427D-02 + pw( 2)= 0.16787984180357441921393643943808306D-01 + pw( 3)= 0.44204970292771876400860637563463140D-01 + pw( 4)= 0.11185011190895920238718302559774708D+00 + pw( 5)= 0.27016191944753418682586773667091217D+00 + pw( 6)= 0.62450892495416087531923817566640810D+00 + pw( 7)= 0.14051940584215133597891405916743820D+01 + pw( 8)= 0.33364764999199772405176908767598180D+01 +endif +if(kn == 9) then + px( 1)= 0.17503557878075214519879036524833375D-02 + px( 2)= 0.10330054871723184754037951908660005D-01 + px( 3)= 0.31732206325445549848937295392421049D-01 + px( 4)= 0.81679121803401719155156637120993002D-01 + px( 5)= 0.19535573613157974971806765507983509D+00 + px( 6)= 0.44595223788074039354969352640100852D+00 + px( 7)= 0.98135220945386835905397405725519927D+00 + px( 8)= 0.21015170770020791654126380210416672D+01 + px( 9)= 0.45016099625796459085057556421321067D+01 + pw( 1)= 0.46046107862322611600702922635137510D-02 + pw( 2)= 0.13429769279555420282230890098322098D-01 + pw( 3)= 0.31849272894061216190385847667286999D-01 + pw( 4)= 0.73619249380405585299506449662883408D-01 + pw( 5)= 0.16535947795274881422091783969932427D+00 + pw( 6)= 0.35934417872243713158616629815331565D+00 + pw( 7)= 0.75859660458155596407004967675485659D+00 + pw( 8)= 0.15835611580085742332645675327785911D+01 + pw( 9)= 0.35495334121390922353050388191171761D+01 +endif +if(kn == 10) then + px( 1)= 0.15499542669794147311761132610985078D-02 + px( 2)= 0.89096688398509678030924958952986541D-02 + px( 3)= 0.26016129693043413683698040760210971D-01 + px( 4)= 0.62749208598031754292711006026615926D-01 + px( 5)= 0.14050353407849426047312501552279980D+00 + px( 6)= 0.30181414123622395598472969521213967D+00 + px( 7)= 0.62857185766887058395879604119303521D+00 + px( 8)= 0.12763299399934852402803633611287669D+01 + px( 9)= 0.25471652578226896546390447678170800D+01 + px(10)= 0.51308067782658204682801321291953140D+01 + pw( 1)= 0.40548253986991389304086648382669812D-02 + pw( 2)= 0.11236188731318700188319947717229244D-01 + pw( 3)= 0.24538592970816425402207400802497825D-01 + pw( 4)= 0.52343517158460444258926972420783323D-01 + pw( 5)= 0.10996558531241373841512937168184451D+00 + pw( 6)= 0.22576984249893556435278829920283551D+00 + pw( 7)= 0.45261772769497660240184928082408646D+00 + pw( 8)= 0.89046194530658792052229355508350808D+00 + pw( 9)= 0.17519181692872931385803490555209237D+01 + pw(10)= 0.37483184442175079452559119390314762D+01 +endif +if(kn == 11) then + px( 1)= 0.13916503869954620980214122938112018D-02 + px( 2)= 0.78558050039620362116422912802704097D-02 + px( 3)= 0.22120351963459063625179184464273817D-01 + px( 4)= 0.50713242013627782652925282235350304D-01 + px( 5)= 0.10742319887758113859279057980251631D+00 + px( 6)= 0.21869117548117680828718663672766937D+00 + px( 7)= 0.43336030838054496927092430378951163D+00 + px( 8)= 0.84018288532692476091948497009100848D+00 + px( 9)= 0.15999462842086233335871841089805130D+01 + px(10)= 0.30151891304900922172275251687511795D+01 + px(11)= 0.57713337136325357659044166408988553D+01 + pw( 1)= 0.36266510989460802397873164288690734D-02 + pw( 2)= 0.96950369134995745371692684658412310D-02 + pw( 3)= 0.19868559916434820960232503677049332D-01 + pw( 4)= 0.39531505533000821496522977944503206D-01 + pw( 5)= 0.78139977968766268496958310477317134D-01 + pw( 6)= 0.15225813478813020210433348166511355D+00 + pw( 7)= 0.29137456345793780649576074277273596D+00 + pw( 8)= 0.54807495314077461646456170049418256D+00 + pw( 9)= 0.10191215020827390969142637205874466D+01 + pw(10)= 0.19111370110068844260874568369820828D+01 + pw(11)= 0.39348541670555911675625708842098543D+01 +endif +if(kn == 12) then + px( 1)= 0.12632156319939695579215561432558385D-02 + px( 2)= 0.70387146180879286642550585568639377D-02 + px( 3)= 0.19302659916972801592515201957656444D-01 + px( 4)= 0.42551194105513329489354627472007290D-01 + px( 5)= 0.86085992365439625679560348707191417D-01 + px( 6)= 0.16725813918676372888922850902058409D+00 + px( 7)= 0.31700589361943171070304767229555238D+00 + px( 8)= 0.58951191619791589683106843254408061D+00 + px( 9)= 0.10788413480839543106858840025589018D+01 + px(10)= 0.19492439011651360999619060490611511D+01 + px(11)= 0.35027911449093405858739078699931569D+01 + px(12)= 0.64217203451802643126959421992550844D+01 + pw( 1)= 0.32827550127604954815335522044868924D-02 + pw( 2)= 0.85507540197432347733759971108613029D-02 + pw( 3)= 0.16691067543209908975186757261638663D-01 + pw( 4)= 0.31305443669027793014029811945580099D-01 + pw( 5)= 0.58567896813505682343320309424861089D-01 + pw( 6)= 0.10877780693120356209434163632243069D+00 + pw( 7)= 0.19948368504455875259042629374456601D+00 + pw( 8)= 0.36073967408442558747147434040847228D+00 + pw( 9)= 0.64434179114194185133722660716595021D+00 + pw(10)= 0.11440591869200469793282529105709381D+01 + pw(11)= 0.20620613323447639907885178297356148D+01 + pw(12)= 0.41107547758236817466309181956604953D+01 +endif +if(kn == 13) then + px( 1)= 0.11568070520917003084172899381406952D-02 + px( 2)= 0.63841031288591798815234066736007959D-02 + px( 3)= 0.17167711063873432612672836118321923D-01 + px( 4)= 0.36716019315047555758945584779212588D-01 + px( 5)= 0.71538434902470635056036514447143755D-01 + px( 6)= 0.13352689214829569833793677431883262D+00 + px( 7)= 0.24327491638178311179091055382048232D+00 + px( 8)= 0.43570388054574854249210127119178039D+00 + px( 9)= 0.76944422789933215499607649746035088D+00 + px(10)= 0.13426076062689412695687622546331410D+01 + px(11)= 0.23216397056754496795639041380757885D+01 + px(12)= 0.40076692644610800412009568941378368D+01 + px(13)= 0.70807767026725092855482684561044825D+01 + pw( 1)= 0.29999461201538405335426373656007144D-02 + pw( 2)= 0.76647148893919466896210631066410433D-02 + pw( 3)= 0.14413099397767107036014918126327008D-01 + pw( 4)= 0.25735254465096214146045248201904623D-01 + pw( 5)= 0.45838911095175772187879197848974379D-01 + pw( 6)= 0.81479609103017647524386799211775902D-01 + pw( 7)= 0.14368582127715272775954689518716653D+00 + pw( 8)= 0.25069362938769053221329919008840415D+00 + pw( 9)= 0.43269827884396605348016529885550754D+00 + pw(10)= 0.74044407474304235266031833325121847D+00 + pw(11)= 0.12650477516195139055095418091901788D+01 + pw(12)= 0.22054684388256755415929800672572855D+01 + pw(13)= 0.42773285665753897495906588616832298D+01 +endif +if(kn == 14) then + px( 1)= 0.10671394835726993643401190254501129D-02 + px( 2)= 0.58463219387999205854721615603871185D-02 + px( 3)= 0.15490001691602291418567227600617406D-01 + px( 4)= 0.32358969391919711170669944447127038D-01 + px( 5)= 0.61149820125492142314645657324953302D-01 + px( 6)= 0.11030358671474348741774495105493271D+00 + px( 7)= 0.19410127886489828338777312891458955D+00 + px( 8)= 0.33608975279087339935499749076365196D+00 + px( 9)= 0.57467416136431217184690034730315880D+00 + px(10)= 0.97213677138209888520957230352753264D+00 + px(11)= 0.16296229550658990298708599801796051D+01 + px(12)= 0.27148835813646440924906285656041458D+01 + px(13)= 0.45279078084008377593720415669623160D+01 + px(14)= 0.77475240501693856896302484250295344D+01 + pw( 1)= 0.27629693325443496023249306681806622D-02 + pw( 2)= 0.69561289060164080762372517703735452D-02 + pw( 3)= 0.12707947219219552524391040926686251D-01 + pw( 4)= 0.21789865957161394780641970526254899D-01 + pw( 5)= 0.37165450147146819315861454426090832D-01 + pw( 6)= 0.63467023298815675308212729382023282D-01 + pw( 7)= 0.10796235746711791223910582849233129D+00 + pw( 8)= 0.18227038265561301471910246064953376D+00 + pw( 9)= 0.30504176678107803021400309104091856D+00 + pw(10)= 0.50632019264974725562617387788020652D+00 + pw(11)= 0.83570187185256971962451509332190849D+00 + pw(12)= 0.13820370228408991881296397708221031D+01 + pw(13)= 0.23420578929692185520173441027903024D+01 + pw(14)= 0.44356505634271121964311698817472400D+01 +endif +if(kn == 15) then + px( 1)= 0.99051088742239546911827900108699496D-03 + px( 2)= 0.53956859621998690434579925957171424D-02 + px( 3)= 0.14133099949775560668841655685250135D-01 + px( 4)= 0.28986898223070841544449976160392634D-01 + px( 5)= 0.53435705913102094881195404152789995D-01 + px( 6)= 0.93645206162765297669714244403422362D-01 + px( 7)= 0.15985934779166946137952895657247365D+00 + px( 8)= 0.26857062154409215737173322640809941D+00 + px( 9)= 0.44599642616020670957476031778753703D+00 + px(10)= 0.73354905454082877060570463263769745D+00 + px(11)= 0.11964718339441153959477756602491571D+01 + px(12)= 0.19381471569679916567453627291281352D+01 + px(13)= 0.31270149297417650854875367303912466D+01 + px(14)= 0.50618963332858424622137075227057629D+01 + px(15)= 0.84211464685237105475663901928632362D+01 + pw( 1)= 0.25613355491269301472247025631698555D-02 + pw( 2)= 0.63749573647622919143443050089843640D-02 + pw( 3)= 0.11385424145275303734569902166128714D-01 + pw( 4)= 0.18885541036270664981042580089520502D-01 + pw( 5)= 0.31017529676997219516938356388920347D-01 + pw( 6)= 0.51073868255802230619626900228351940D-01 + pw( 7)= 0.84046732560351404937544837831163072D-01 + pw( 8)= 0.13765675735555740692176543168654955D+00 + pw( 9)= 0.22395807420699105358370420342497012D+00 + pw(10)= 0.36179555862328041144887460703035761D+00 + pw(11)= 0.58087464283264030346041192108355511D+00 + pw(12)= 0.92964960101952292137644303560667260D+00 + pw(13)= 0.14950837580230651438254975578030658D+01 + pw(14)= 0.24724522131384988410164432454754806D+01 + pw(15)= 0.45866150536692880851084073088444439D+01 +endif +if(kn == 16) then + px( 1)= 0.92424556053971804395203579030996377D-03 + px( 2)= 0.50119857035276309167085959684639920D-02 + px( 3)= 0.13010148344461010136325444162584406D-01 + px( 4)= 0.26298847872231801244580127340702382D-01 + px( 5)= 0.47514597563807100058998705367026609D-01 + px( 6)= 0.81270265524596050675960333331275375D-01 + px( 7)= 0.13512212854897371400148043770796492D+00 + px( 8)= 0.22099831669763609789452208029307415D+00 + px( 9)= 0.35743533198723356545740999174337465D+00 + px(10)= 0.57303934564748771037392604303226621D+00 + px(11)= 0.91178803183594107838418321994075593D+00 + px(12)= 0.14412990877209031034459072051939979D+01 + px(13)= 0.22665732295803028877227882175486339D+01 + px(14)= 0.35563219327028564804732957321586739D+01 + px(15)= 0.56082683107347449605789692452752341D+01 + px(16)= 0.91009557053769985645449386780285663D+01 + pw( 1)= 0.23875730074587121522484947794868875D-02 + pw( 2)= 0.58885836354644078094393546205524519D-02 + pw( 3)= 0.10329316404953667658388475423521547D-01 + pw( 4)= 0.16675469828365142363971545189694061D-01 + pw( 5)= 0.26509288186703737427234519675535818D-01 + pw( 6)= 0.42238486164148838677474311842225533D-01 + pw( 7)= 0.67416914217096425505948847545251978D-01 + pw( 8)= 0.10736855475828194921843388490206803D+00 + pw( 9)= 0.17018723045468088538746817095795499D+00 + pw(10)= 0.26821766647062034548841613907117539D+00 + pw(11)= 0.42033354906191972945041277677914052D+00 + pw(12)= 0.65579398374758854246146234684717478D+00 + pw(13)= 0.10219768938841077866955934226960186D+01 + pw(14)= 0.16043073953579345130225854215735434D+01 + pw(15)= 0.25972025694404090887006269510370113D+01 + pw(16)= 0.47309741605753804978373532621330190D+01 +endif +if(kn == 17) then + px( 1)= 0.86635871969021922697368620380719171D-03 + px( 2)= 0.46809392638055264395521402125297454D-02 + px( 3)= 0.12063365169996780691336553708488918D-01 + px( 4)= 0.24103101327380368111361062755741909D-01 + px( 5)= 0.42839783531484774994275135488102184D-01 + px( 6)= 0.71796215026314425758110076991702409D-01 + px( 7)= 0.11667729790867741765510249018437646D+00 + px( 8)= 0.18634666322578944021297170782701070D+00 + px( 9)= 0.29431002802504656436686613469103506D+00 + px(10)= 0.46097947485749393786168585286436753D+00 + px(11)= 0.71707474488613285211211458276745483D+00 + px(12)= 0.11087399814864640384073502346191564D+01 + px(13)= 0.17054769509601451753469079240400202D+01 + px(14)= 0.26134294141755383267430920003219774D+01 + px(15)= 0.40013054555328633047267069424482020D+01 + px(16)= 0.61658542528138849120122933226961818D+01 + px(17)= 0.97863651496368699835621922821544266D+01 + pw( 1)= 0.22362031087732985183223720565899613D-02 + pw( 2)= 0.54748197495181657019575138084577960D-02 + pw( 3)= 0.94654762542541339323585186529928706D-02 + pw( 4)= 0.14944978294023789536885689508849665D-01 + pw( 5)= 0.23104141337215046610391678093559219D-01 + pw( 6)= 0.35743858431643263735692324511510319D-01 + pw( 7)= 0.55471485911701630593769305151799996D-01 + pw( 8)= 0.86081253422553517325473099895533971D-01 + pw( 9)= 0.13319208512009976788628468120231006D+00 + pw(10)= 0.20518383340535621745997474262468218D+00 + pw(11)= 0.31457026902282458163459435604989060D+00 + pw(12)= 0.48013605826872593355323421318271994D+00 + pw(13)= 0.73064164224806746286555643417201840D+00 + pw(14)= 0.11124853152432080885402590631214998D+01 + pw(15)= 0.17098620915289443753971369096340138D+01 + pw(16)= 0.27167962114974631513284929060677873D+01 + pw(17)= 0.48693666605809874327613580765223186D+01 +endif +if(kn == 18) then + px( 1)= 0.81534546944719247652148851160350705D-03 + px( 2)= 0.43921383032957290852797835084086745D-02 + px( 3)= 0.11252831287113979876218879126262041D-01 + px( 4)= 0.22272809918423842477230910840641111D-01 + px( 5)= 0.39059190621733757694717358109890744D-01 + px( 6)= 0.64351345855435130240707418934923649D-01 + px( 7)= 0.10254178168021558618169365620644254D+00 + px( 8)= 0.16036912239056014178785088963204099D+00 + px( 9)= 0.24793085437822483127871211432445117D+00 + px(10)= 0.38020606680363380296594904808373420D+00 + px(11)= 0.57931198537370336370768123946111904D+00 + px(12)= 0.87782030094596229067122251201903469D+00 + px(13)= 0.13236886505899049407415999004449793D+01 + px(14)= 0.19878981731926587403713993880950166D+01 + px(15)= 0.29773742063276571130424960439239811D+01 + px(16)= 0.44606479473592453417809733684642453D+01 + px(17)= 0.67336454736569532214104241445939923D+01 + px(18)= 0.10476870213785278025831291196419849D+02 + pw( 1)= 0.21031106134613254745224559686561387D-02 + pw( 2)= 0.51180225478690087298803997939678307D-02 + pw( 3)= 0.87447370935970058836913883371899363D-02 + pw( 4)= 0.13556333730223126136121123231259769D-01 + pw( 5)= 0.20464410216338036789779126385561199D-01 + pw( 6)= 0.30840826466947410241261173555971053D-01 + pw( 7)= 0.46646725563835069845910497771424517D-01 + pw( 8)= 0.70665992632893967524146851106623791D-01 + pw( 9)= 0.10691711075454395961181073477729829D+00 + pw(10)= 0.16126547210903485792084905962898804D+00 + pw(11)= 0.24229509616610255685205188073761953D+00 + pw(12)= 0.36259234617456851620630123413473169D+00 + pw(13)= 0.54077318770758097024862643911836327D+00 + pw(14)= 0.80508517291294611337977692389747849D+00 + pw(15)= 0.12010568605667241321654935386099247D+01 + pw(16)= 0.18119190754724382445935517697030275D+01 + pw(17)= 0.28316640971192016310500153967949394D+01 + pw(18)= 0.50023398513295527439214736503709354D+01 +endif +if(kn == 19) then + px( 1)= 0.77004314038027538186473565824644713D-03 + px( 2)= 0.41377999330050882914315571254404182D-02 + px( 3)= 0.10550059721986121680218103475882631D-01 + px( 4)= 0.20721165151890931602860316370210977D-01 + px( 5)= 0.35938464219963308088025295472146865D-01 + px( 6)= 0.58366846240533488939212086566679739D-01 + px( 7)= 0.91445878837421954880767431790624735D-01 + px( 8)= 0.14039874091074934933241224785817280D+00 + px( 9)= 0.21294392165350590964714225395783962D+00 + px(10)= 0.32034449388214261059174416345147381D+00 + px(11)= 0.47894630556295055579310759106626288D+00 + px(12)= 0.71240272881825772752142518345001522D+00 + px(13)= 0.10548945187200170944433178271823931D+01 + px(14)= 0.15558853016268620673915640887496031D+01 + px(15)= 0.22875048294126069831169385958820912D+01 + px(16)= 0.33571879025245227476638520498261293D+01 + px(17)= 0.49331870217088227943005162552018204D+01 + px(18)= 0.73107657529555946558412435928344152D+01 + px(19)= 0.11172033293322081236666941378477642D+02 + pw( 1)= 0.19851404434292128161572603749012797D-02 + pw( 2)= 0.48068259850224485883558544189622861D-02 + pw( 3)= 0.81333622551403917938013382271885652D-02 + pw( 4)= 0.12418290368147926797125266347472059D-01 + pw( 5)= 0.18370520996399132266929646675963512D-01 + pw( 6)= 0.27051260748772011764068846294046263D-01 + pw( 7)= 0.39965808856363914121506612420685851D-01 + pw( 8)= 0.59209186306872358867005896800202963D-01 + pw( 9)= 0.87733163743117132114765866319084195D-01 + pw(10)= 0.12975590467993236932824761605587432D+00 + pw(11)= 0.19133746801777096221136436300603368D+00 + pw(12)= 0.28119521004597734924657796391935552D+00 + pw(13)= 0.41191451657988898367993353735036478D+00 + pw(14)= 0.60189235614006428503841831846326255D+00 + pw(15)= 0.87887424161089233540150542700936886D+00 + pw(16)= 0.12876310724220064351766502884115249D+01 + pw(17)= 0.19106555786053829247748848866287425D+01 + pw(18)= 0.29421880423760456779000496029582960D+01 + pw(19)= 0.51303663807899157605725316747503964D+01 +endif +if(kn == 20) then + px( 1)= 0.72953870581824875898426590899779881D-03 + px( 2)= 0.39119765013576279090594732249736674D-02 + px( 3)= 0.99341482544177994609830844666447236D-02 + px( 4)= 0.19386948343749964006247704258211447D-01 + px( 5)= 0.33316937960159820123580449168857215D-01 + px( 6)= 0.53460240758464289437871006955420897D-01 + px( 7)= 0.82550373662718413944945968267436420D-01 + px( 8)= 0.12470333365766989270462564119778831D+00 + px( 9)= 0.18593248929460522396945432673813683D+00 + px(10)= 0.27488824477709750979845021236505632D+00 + px(11)= 0.40393075219294011341163041535306531D+00 + px(12)= 0.59066371193107531104946391158049526D+00 + px(13)= 0.86011311520208939992657730367961144D+00 + px(14)= 0.12478475018108315971898483004517581D+01 + px(15)= 0.18045718085352853467882826664457563D+01 + px(16)= 0.26032962838705539076929338309800964D+01 + px(17)= 0.37517626775845063477806691210141684D+01 + px(18)= 0.54178931704948062041809621881307184D+01 + px(19)= 0.78964489442873309920085649476831128D+01 + px(20)= 0.11871472067202549660846380067126100D+02 + pw( 1)= 0.18798307004678227911126701301382902D-02 + pw( 2)= 0.45327579066213798867372241143837770D-02 + pw( 3)= 0.76074915984009747299662992379957293D-02 + pw( 4)= 0.11468617772837279819666929117999041D-01 + pw( 5)= 0.16675567273387276975224875991907841D-01 + pw( 6)= 0.24060338209931569028899923305997740D-01 + pw( 7)= 0.34797589593340192429611675508702300D-01 + pw( 8)= 0.50498483539096564292114787807163509D-01 + pw( 9)= 0.73383972617080340693094946803759227D-01 + pw(10)= 0.10656241858153477344295213679085888D+00 + pw(11)= 0.15442089967699606004539206241769081D+00 + pw(12)= 0.22316517325789412150938929616587900D+00 + pw(13)= 0.32158746804288047814456613183637546D+00 + pw(14)= 0.46221802166864090447194287669501892D+00 + pw(15)= 0.66320650916245908698703462902787156D+00 + pw(16)= 0.95182294118921990028099509733606027D+00 + pw(17)= 0.13721884223731472188837264029436159D+01 + pw(18)= 0.20062479833990429444286070644467212D+01 + pw(19)= 0.30487071357319501782576369389563967D+01 + pw(20)= 0.52538573669990067214113994100342659D+01 +endif +if(kn == 21) then + px( 1)= 0.69310486169589490054767779485652272D-03 + px( 2)= 0.37100374076510691098395748526914731D-02 + px( 3)= 0.93893881986074420164580675137225674D-02 + px( 4)= 0.18225818290917406339248365551994452D-01 + px( 5)= 0.31081498488793808069000194256763116D-01 + px( 6)= 0.49367420729419602783748969736699563D-01 + px( 7)= 0.75284896750519291461000850881359128D-01 + px( 8)= 0.11212409497044480168252045403791321D+00 + px( 9)= 0.16464732212808249809258249926002432D+00 + px(10)= 0.23962225781352520103022311097593359D+00 + px(11)= 0.34658518012264822513642471749928831D+00 + px(12)= 0.49892252217405192191396093341852149D+00 + px(13)= 0.71538674271382417280638826801508549D+00 + px(14)= 0.10222215706608203725632212510312547D+01 + px(15)= 0.14561846864798943048112298539958500D+01 + px(16)= 0.20689966860645877444413948503855558D+01 + px(17)= 0.29343325491322992822962883455667422D+01 + px(18)= 0.41600923418527605289681744860608923D+01 + px(19)= 0.59138510177367345119623187111536027D+01 + px(20)= 0.84900211059998571872908583995965399D+01 + px(21)= 0.12574850278864023350809778583936857D+02 + pw( 1)= 0.17852307235103793959959450796716884D-02 + pw( 2)= 0.42893635684453385047417019848134615D-02 + pw( 3)= 0.71497939239830042802199211798008382D-02 + pw( 4)= 0.10663735418026519276123538929713931D-01 + pw( 5)= 0.15278738558420813530874306732239620D-01 + pw( 6)= 0.21655078725685226823011775486802689D-01 + pw( 7)= 0.30722016510142553890174563226245394D-01 + pw( 8)= 0.43741403514742541630405757195910036D-01 + pw( 9)= 0.62420477800170703110133642765679935D-01 + pw(10)= 0.89101011801730193914410815797468243D-01 + pw(11)= 0.12703103910528559102170033988553577D+00 + pw(12)= 0.18073427743399167030424190804804963D+00 + pw(13)= 0.25651888703698665673377349665161239D+00 + pw(14)= 0.36320523524230891449268691236436560D+00 + pw(15)= 0.51323006313409951821184870857512202D+00 + pw(16)= 0.72448352105440485035350350605592673D+00 + pw(17)= 0.10237957354057707173113819962280747D+01 + pw(18)= 0.14547382360170350806458262214201030D+01 + pw(19)= 0.20988676887781728328899280598296916D+01 + pw(20)= 0.31515233601433750723678379202457850D+01 + pw(21)= 0.53731727475008532237843822389416189D+01 +endif +if(kn == 22) then + px( 1)= 0.66015487793114579672026064530843580D-03 + px( 2)= 0.35283188763257655749934705032913356D-02 + px( 3)= 0.89037268484852140335335192775132156D-02 + px( 4)= 0.17204887313904086178312640698870646D-01 + px( 5)= 0.29150571838447577747778236163957035D-01 + px( 6)= 0.45901615793714000736387715594203021D-01 + px( 7)= 0.69252127065257752538232461849428663D-01 + px( 8)= 0.10186530460793644200891559033979032D+00 + px( 9)= 0.14756646191534939234956638050262194D+00 + px(10)= 0.21173627302990231446602872154763540D+00 + px(11)= 0.30186392834135377102923595739844832D+00 + px(12)= 0.42832456502483048109845734510065518D+00 + px(13)= 0.60545815580026546162531862085612489D+00 + px(14)= 0.85305900599439063193443527117705937D+00 + px(15)= 0.11984444149249300732981072917233551D+01 + px(16)= 0.16793849380767164321266153811733636D+01 + px(17)= 0.23484259421990250059871794075320909D+01 + px(18)= 0.32797346839031417791643463913803933D+01 + px(19)= 0.45812624093642886254220734381797637D+01 + px(20)= 0.64202435545325521457505355440378480D+01 + px(21)= 0.90908861089411744472574137099194608D+01 + px(22)= 0.13281870386631548366752315803495071D+02 + pw( 1)= 0.16997739815079876291270142303147830D-02 + pw( 2)= 0.40716314869569174969368080298995427D-02 + pw( 3)= 0.67473810622389953786984914722964259D-02 + pw( 4)= 0.99723772588960829502246883384624672D-02 + pw( 5)= 0.14109253034463888804804922921072868D-01 + pw( 6)= 0.19687945390172770224722572299963933D-01 + pw( 7)= 0.27452171417575159428955066586690985D-01 + pw( 8)= 0.38405330139190366155267988240486796D-01 + pw( 9)= 0.53884692766803369761362767720533598D-01 + pw(10)= 0.75690346610129349832179626694927492D-01 + pw(11)= 0.10627642236395592176596973365055862D+00 + pw(12)= 0.14901040431853077087285757833822327D+00 + pw(13)= 0.20852197820697925399881980495903384D+00 + pw(14)= 0.29118491216948479495401093995994782D+00 + pw(15)= 0.40581134430662519733362245846705041D+00 + pw(16)= 0.56471875154845478179304492458155087D+00 + pw(17)= 0.78553695828755046999062010778990119D+00 + pw(18)= 0.10946963582465286266450172551190923D+01 + pw(19)= 0.15353099122697920414083249854348604D+01 + pw(20)= 0.21886787280255501097880613119022761D+01 + pw(21)= 0.32509064591655802312953073550183727D+01 + pw(22)= 0.54886295320492726768301302193581765D+01 +endif +if(kn == 23) then + px( 1)= 0.63021006363433841831474149369938629D-03 + px( 2)= 0.33638809698722088589479529444503357D-02 + px( 3)= 0.84677484260852652320615476282127309D-02 + px( 4)= 0.16299243886097943905725295278638059D-01 + px( 5)= 0.27464030423556247673984741054128575D-01 + px( 6)= 0.42927866788160825736932098072471507D-01 + px( 7)= 0.64169139230662564082303838471065529D-01 + px( 8)= 0.93367767657860048911427490092659179D-01 + px( 9)= 0.13363456957733254533882679272072078D+00 + px(10)= 0.18930799757692553449088970134122038D+00 + px(11)= 0.26636229253087365091814709204751323D+00 + px(12)= 0.37297580801326442775998852633706569D+00 + px(13)= 0.52031366792206514283148817023645741D+00 + px(14)= 0.72359600291057382682653618452186879D+00 + px(15)= 0.10035561979187412137041345167147008D+01 + px(16)= 0.13884527722049286720427614547615987D+01 + px(17)= 0.19169141925914051220031027488430615D+01 + px(18)= 0.26421501677657864516298531009004741D+01 + px(19)= 0.36386833380196488331409942198293287D+01 + px(20)= 0.50144408008467284169777642211293427D+01 + px(21)= 0.69363388633219643316723260573935269D+01 + px(22)= 0.96985139442985089392873665294609340D+01 + px(23)= 0.13992267642385099650757505185917464D+02 + pw( 1)= 0.16221873306891233611184498841470861D-02 + pw( 2)= 0.38756062368028647892929037543456805D-02 + pw( 3)= 0.63904681228155448198104664799116312D-02 + pw( 4)= 0.93716130533642383510190970859726025D-02 + pw( 5)= 0.13116359133127231514779092120465823D-01 + pw( 6)= 0.18054564644654175010476006376716577D-01 + pw( 7)= 0.24787545528949442975874855915817576D-01 + pw( 8)= 0.34123347100699979880664417166869481D-01 + pw( 9)= 0.47126644724531756094751076593005203D-01 + pw(10)= 0.65206569654682359265380728357008572D-01 + pw(11)= 0.90252198579742822099518198813744744D-01 + pw(12)= 0.12481812207643272140418893825548652D+00 + pw(13)= 0.17237035227520038668196439463328275D+00 + pw(14)= 0.23761696881618806722148067227743494D+00 + pw(15)= 0.32696691109252817236452507243569941D+00 + pw(16)= 0.44919654054160466696484020754729982D+00 + pw(17)= 0.61648810596906235080169489216474494D+00 + pw(18)= 0.84621818814094735945360350883771170D+00 + pw(19)= 0.11644590806092632658493343351752018D+01 + pw(20)= 0.16139465349437736570093587472680170D+01 + pw(21)= 0.22758365152455412423280162288521126D+01 + pw(22)= 0.33470981208584956190993329351906399D+01 + pw(23)= 0.56005084488693785143169408072395655D+01 +endif +if(kn == 24) then + px( 1)= 0.60287590220675965883655392550708199D-03 + px( 2)= 0.32143351024974124786544026559297415D-02 + px( 3)= 0.80739802149419819753399469823810803D-02 + px( 4)= 0.15489662347070758196056101724102825D-01 + px( 5)= 0.25976667055963881164895151817795199D-01 + px( 6)= 0.40346714011175627204924600258024553D-01 + px( 7)= 0.59830440001259694739569609351980817D-01 + px( 8)= 0.86230308264184025703796753562250373D-01 + px( 9)= 0.12210371973165121163051429982451776D+00 + px(10)= 0.17099203191550984584070639962768156D+00 + px(11)= 0.23772719534691312495205955287685848D+00 + px(12)= 0.32885366959546582684337055007155510D+00 + px(13)= 0.45320556860130100192674333119232671D+00 + px(14)= 0.62268761188094067470211791701931515D+00 + px(15)= 0.85332750333845602641945072572045710D+00 + px(16)= 0.11667004858019451293051164929872329D+01 + px(17)= 0.15918861333740258915421115101398109D+01 + px(18)= 0.21682356132478165458928387387641436D+01 + px(19)= 0.29494889123245918407695084553095216D+01 + px(20)= 0.40104161879498382932676206134824540D+01 + px(21)= 0.54588693247348625992863433295378994D+01 + px(22)= 0.74614789122023638202247166496000371D+01 + px(23)= 0.10312431148373425365575271866282924D+02 + px(24)= 0.14705805275028081231659123827294642D+02 + pw( 1)= 0.15514249847644583248110080072115634D-02 + pw( 2)= 0.36981205611282502501592938773909080D-02 + pw( 3)= 0.60714888966684178001398442739385021D-02 + pw( 4)= 0.88442862964874279166007983229029862D-02 + pw( 5)= 0.12262956109957475661400953934190561D-01 + pw( 6)= 0.16679687959122678642462992762851947D-01 + pw( 7)= 0.22585160392521754760445705422367202D-01 + pw( 8)= 0.30637073979795749450832614038637313D-01 + pw( 9)= 0.41694955770756579068288981525602823D-01 + pw(10)= 0.56880053645721037701497821632682136D-01 + pw(11)= 0.77671508906845583575064408093070968D-01 + pw(12)= 0.10604093207274572585305524523636662D+00 + pw(13)= 0.14462967696642788409146183249874326D+00 + pw(14)= 0.19698231180562617603349065754159954D+00 + pw(15)= 0.26786134570104304270817950330971854D+00 + pw(16)= 0.36368625547483729027913381140943346D+00 + pw(17)= 0.49317740444788268405884668163369343D+00 + pw(18)= 0.66837334895456593354822252636195142D+00 + pw(19)= 0.90640972513509685558875644594827397D+00 + pw(20)= 0.12330418540439459719440490731353612D+01 + pw(21)= 0.16907002256383886184087957665579435D+01 + pw(22)= 0.23604873140910405927871490349730829D+01 + pw(23)= 0.34403155641323731993571442081042100D+01 + pw(24)= 0.57090593461669661084141390492124683D+01 +endif +if(kn == 25) then + px( 1)= 0.57782426626576571901902524785967824D-03 + px( 2)= 0.30777191085612843075800020147563616D-02 + px( 3)= 0.77164086160794063370485387996841129D-02 + px( 4)= 0.14761056704626219657716466804157254D-01 + px( 5)= 0.24653875400014389680962462419049193D-01 + px( 6)= 0.38083523913823039614894844760656273D-01 + px( 7)= 0.56084067240729530165951958613013393D-01 + px( 8)= 0.80159727346580499135438064461924847D-01 + px( 9)= 0.11243339120217788148662085401543468D+00 + px(10)= 0.15582715928119113928223955420169306D+00 + px(11)= 0.21429698577498381337290048538797344D+00 + px(12)= 0.29315054947238081211765761052065623D+00 + px(13)= 0.39947904356246012931942329550682875D+00 + px(14)= 0.54273750890918728697108592749256418D+00 + px(15)= 0.73551917539468211697041027264605642D+00 + px(16)= 0.99458876650351865510403034983347981D+00 + px(17)= 0.13422725867611823095435983972364716D+01 + px(18)= 0.18083631354696402035562158086478313D+01 + px(19)= 0.24328170422327489041615980739248420D+01 + px(20)= 0.32697931199298648820663632485790012D+01 + px(21)= 0.43942247576362161609644700980578340D+01 + px(22)= 0.59138559751958628054368604601307713D+01 + px(23)= 0.79950700685028878658475844781145359D+01 + px(24)= 0.10932212901976999668719864628273880D+02 + px(25)= 0.15422270538187376197239585881075748D+02 + pw( 1)= 0.14866196466384881027031457863846341D-02 + pw( 2)= 0.35366057944120495831689126436344645D-02 + pw( 3)= 0.57844969402891304739165399375745324D-02 + pw( 4)= 0.83773253924994338826352544649181741D-02 + pw( 5)= 0.11521432119169876731207250699755894D-01 + pw( 6)= 0.15508119705558278186014992003540867D-01 + pw( 7)= 0.20741213416907048628693122469214068D-01 + pw( 8)= 0.27760968610727861118254402368893756D-01 + pw( 9)= 0.37269537287121025856202203197461521D-01 + pw(10)= 0.50172098341486540969354039874345299D-01 + pw(11)= 0.67645050740466106776500656782887390D-01 + pw(12)= 0.91234375023821592290664320838692913D-01 + pw(13)= 0.12298569754451901426257783074280505D+00 + pw(14)= 0.16561310646694880188580709432419945D+00 + pw(15)= 0.22272153335996509389159575477724314D+00 + pw(16)= 0.29910756989135904284632341805878551D+00 + pw(17)= 0.40118169825940023204127855629976398D+00 + pw(18)= 0.53759403804464314015326297165302822D+00 + pw(19)= 0.72023661788993173698912869326221741D+00 + pw(20)= 0.96601967120274407325842915516032939D+00 + pw(21)= 0.13004209336797472935438387369012951D+01 + pw(22)= 0.17656287674999046987390122872170472D+01 + pw(23)= 0.24427681629137658222037012630588612D+01 + pw(24)= 0.35307546103133160002458888441211917D+01 + pw(25)= 0.58145056190411423438373377369723833D+01 +endif +if(kn == 26) then + px( 1)= 0.55477997323631075943068059436768679D-03 + px( 2)= 0.29524051251057578595813790584093855D-02 + px( 3)= 0.73901342939831943165981049698675567D-02 + px( 4)= 0.14101412872584234971190262785177463D-01 + px( 5)= 0.23468728331528566192654626405819210D-01 + px( 6)= 0.36081354457771003437927577005080307D-01 + px( 7)= 0.52815777662090440120493976780463113D-01 + px( 8)= 0.74938112557816762151465822385814066D-01 + px( 9)= 0.10422602290395549848995179022541619D+00 + px(10)= 0.14311379843177524483131355370021475D+00 + px(11)= 0.19487526595566569453971371848061316D+00 + px(12)= 0.26386679946717607062454498658707156D+00 + px(13)= 0.35585463477981546153201478673425350D+00 + px(14)= 0.47845228003668789580973663677048048D+00 + px(15)= 0.64169971258466993649179145930192297D+00 + px(16)= 0.85882779202859317392945336605019422D+00 + px(17)= 0.11472706757497270472184438297710363D+01 + px(18)= 0.15300218181930435465993945481075142D+01 + px(19)= 0.20374900555366768284567833572011801D+01 + px(20)= 0.27101363667403762213767958798451042D+01 + px(21)= 0.36024461925895008266595796162422643D+01 + px(22)= 0.47894509506391986224214442346207227D+01 + px(23)= 0.63787680281602106661197309343576824D+01 + px(24)= 0.85365750410497181159653468391328011D+01 + px(25)= 0.11557476465733555694328533876083768D+02 + px(26)= 0.16141471440934226918800792463390524D+02 + pw( 1)= 0.14270457888511795867992646035078330D-02 + pw( 2)= 0.33889549852374609060800978480342579D-02 + pw( 3)= 0.55247507422590039192859514393700253D-02 + pw( 4)= 0.79606058991690688534072920870913751D-02 + pw( 5)= 0.10870893529433384055135393826453360D-01 + pw( 6)= 0.14498723203402237674680094273702632D-01 + pw( 7)= 0.19179116372035461554716799339102955D-01 + pw( 8)= 0.25359449796805531683017831774564858D-01 + pw( 9)= 0.33619093306389351199382063563600456D-01 + pw(10)= 0.44698102947503307879411438578940957D-01 + pw(11)= 0.59545498569788992362177569759542076D-01 + pw(12)= 0.79391390756669405807121619168425508D-01 + pw(13)= 0.10584364527942384492476511067473085D+00 + pw(14)= 0.14101237929374191599558996117239049D+00 + pw(15)= 0.18767097313836850144218059320809785D+00 + pw(16)= 0.24946862930962964921754344345139933D+00 + pw(17)= 0.33121906265157831801486301115689293D+00 + pw(18)= 0.43930860961154203611926221969681638D+00 + pw(19)= 0.58230770087411222173176311911927048D+00 + pw(20)= 0.77196313761963428481446391672957021D+00 + pw(21)= 0.10249770981257475300463739775919616D+01 + pw(22)= 0.13665866627564720110102420582560601D+01 + pw(23)= 0.18387931590238615303343058387625137D+01 + pw(24)= 0.25228070818980592228138174960180006D+01 + pw(25)= 0.36185923153701902774641344711352413D+01 + pw(26)= 0.59170478659334870019465678075549678D+01 +endif +if(kn == 27) then + px( 1)= 0.53351049035404632679792315747760926D-03 + px( 2)= 0.28370305792687798355464055618059207D-02 + px( 3)= 0.70911216690725917919709661777310467D-02 + px( 4)= 0.13501035871356777604646936827660401D-01 + px( 5)= 0.22399961695605187088994736400122990D-01 + px( 6)= 0.34296092152256244946377676950669096D-01 + px( 7)= 0.49938369643043464349412733250120371D-01 + px( 8)= 0.70401007218931052290887962696404315D-01 + px( 9)= 0.97184515439527403402719960641852663D-01 + px(10)= 0.13233419679650342626453438780924062D+00 + px(11)= 0.17858535768221430523992541052027664D+00 + px(12)= 0.23955186967002279067082055689473205D+00 + px(13)= 0.31997738343786115574032202180084953D+00 + px(14)= 0.42606917808870858677237819912563285D+00 + px(15)= 0.56593757336952698275709554352322422D+00 + px(16)= 0.75017087050170008688238981674255895D+00 + px(17)= 0.99258767627203407312800418206037430D+00 + px(18)= 0.13112276119862221456283706350994460D+01 + px(19)= 0.17296743945926671138206419268165977D+01 + px(20)= 0.22788674435500397192690477951071238D+01 + px(21)= 0.29996852862458953100640794763905730D+01 + px(22)= 0.39468640965897806748062604397538661D+01 + px(23)= 0.51954835049739316659023548298272390D+01 + px(24)= 0.68530258860308177417175245026171396D+01 + px(25)= 0.90855060116903171155308252608094055D+01 + px(26)= 0.12187875689542501045614051917426613D+02 + px(27)= 0.16863234023420842677139640506273211D+02 + pw( 1)= 0.13720916834443970883055143999864006D-02 + pw( 2)= 0.32534222826312859680330393960878445D-02 + pw( 3)= 0.52884203958454954233826913886323010D-02 + pw( 4)= 0.75861682815905379946822876423968075D-02 + pw( 5)= 0.10295285639112777079908697606376495D-01 + pw( 6)= 0.13620382459545697869019461721069283D-01 + pw( 7)= 0.17841523884178755603525268045244929D-01 + pw( 8)= 0.23331884086195134219312146516778989D-01 + pw( 9)= 0.30573638051336305998992920739412241D-01 + pw(10)= 0.40178513173563771124515253029295068D-01 + pw(11)= 0.52921980029762362141777841892525060D-01 + pw(12)= 0.69796141123370781915050562748149785D-01 + pw(13)= 0.92082178946617789793924526199756695D-01 + pw(14)= 0.12144364501581543284664137255263233D+00 + pw(15)= 0.16004546398125829773973180676428128D+00 + pw(16)= 0.21070793305215927164839501305159383D+00 + pw(17)= 0.27711057981819535787363684480367458D+00 + pw(18)= 0.36407033983503565406434335329539225D+00 + pw(19)= 0.47793795110360227457939282572737921D+00 + pw(20)= 0.62719851165950099532441318332470642D+00 + pw(21)= 0.82345785449976155240467438227528273D+00 + pw(22)= 0.10832282284259041619866566987699138D+01 + pw(23)= 0.14315401678117573067846471990768551D+01 + pw(24)= 0.19102558501900861570470238950653852D+01 + pw(25)= 0.26007234476536547511161089599236255D+01 + pw(26)= 0.37039892290540711004993184577181103D+01 + pw(27)= 0.60168669305235587630364842700538762D+01 +endif +if(kn == 28) then + px( 1)= 0.51381795837439443363513212680713947D-03 + px( 2)= 0.27304457410704199858650659828620367D-02 + px( 3)= 0.68160137769119765426643490207146340D-02 + px( 4)= 0.12952008943105054557760398423212956D-01 + px( 5)= 0.21430556623995591107026232001066913D-01 + px( 6)= 0.32693077435416129189765100617149847D-01 + px( 7)= 0.47384339361452651219408943088094832D-01 + px( 8)= 0.66422538658869906464895332297645726D-01 + px( 9)= 0.91083609265701992171879031038032271D-01 + px(10)= 0.12309931515680468843361103443044334D+00 + px(11)= 0.16477454361350713713658223760718724D+00 + px(12)= 0.21913585560877712250760133646822826D+00 + px(13)= 0.29012664768975497883815066595179815D+00 + px(14)= 0.38286487674840579443374001038670456D+00 + px(15)= 0.50398058179729621330814443966676445D+00 + px(16)= 0.66205451410726746024107068348273274D+00 + px(17)= 0.86818664253955900521872813145997494D+00 + px(18)= 0.11367350670636899400604869200366967D+01 + px(19)= 0.14862849200104679887431662518488448D+01 + px(20)= 0.19409402276831122951996033127610769D+01 + px(21)= 0.25320952507423009788362491203678215D+01 + px(22)= 0.33009718613493033820591483341014688D+01 + px(23)= 0.43024948141012255149771188379035923D+01 + px(24)= 0.56117545045133971321428621533897157D+01 + px(25)= 0.73360976078470116280524716610880778D+01 + px(26)= 0.96414187592042593009284671180300365D+01 + px(27)= 0.12823096392169689557014539066503465D+02 + px(28)= 0.17587400071042305461725873529323834D+02 + pw( 1)= 0.13212378315760757100801294124815478D-02 + pw( 2)= 0.31285477801160114283501913408130908D-02 + pw( 3)= 0.50723762615025144279692888527822532D-02 + pw( 4)= 0.72476698125814896247817149986171532D-02 + pw( 5)= 0.97820955598317266921336611328577874D-02 + pw( 6)= 0.12849233584715652412922486858932270D-01 + pw( 7)= 0.16684914458309663575212478712400069D-01 + pw( 8)= 0.21602517467719514930691067702478109D-01 + pw( 9)= 0.28006326728639319667000527469785398D-01 + pw(10)= 0.36406794940307839233444412699072219D-01 + pw(11)= 0.47444873633013210207704765440613480D-01 + pw(12)= 0.61930727493054842346818886062879310D-01 + pw(13)= 0.80898273560459582900261577117872215D-01 + pw(14)= 0.10567594748292560400210313567244803D+00 + pw(15)= 0.13797620661979101239766714101470269D+00 + pw(16)= 0.18000947429659496345933986602370689D+00 + pw(17)= 0.23463186153608726981379368239281040D+00 + pw(18)= 0.30554132794139847744947603213040709D+00 + pw(19)= 0.39754682244155206328794798554812257D+00 + pw(20)= 0.51695511158335980560554454196568031D+00 + pw(21)= 0.67216328370835447637864341433166332D+00 + pw(22)= 0.87464250694849374310652866430377750D+00 + pw(23)= 0.11407332853853718344895028360165344D+01 + pw(24)= 0.14952907674277354153396565164371127D+01 + pw(25)= 0.19800794802566744638917545875047599D+01 + pw(26)= 0.26766284602650483548086237399782999D+01 + pw(27)= 0.37870913379223214170502998382833988D+01 + pw(28)= 0.61141264493435815963177300026057897D+01 +endif +if(kn == 29) then + px( 1)= 0.49553294523216557706121043094899937D-03 + px( 2)= 0.26316733478032430241054271443133571D-02 + px( 3)= 0.65619932921719095950945647167842097D-02 + px( 4)= 0.12447798146998465679635850497317547D-01 + px( 5)= 0.20546725218853503579585565380355127D-01 + px( 6)= 0.31244723477287375693064121565353181D-01 + px( 7)= 0.45100744250336127000529420647899915D-01 + px( 8)= 0.62905104337181109718071876082818174D-01 + px( 9)= 0.85750236217882191338049895922209411D-01 + px(10)= 0.11511276616778913843095284495474961D+00 + px(11)= 0.15294975780371902992191410154541288D+00 + px(12)= 0.20181754709239365590998602589064153D+00 + px(13)= 0.26502522465295890912360438565915637D+00 + px(14)= 0.34683572458174845871862351357320832D+00 + px(15)= 0.45272795494425557401215215391241853D+00 + px(16)= 0.58973560996706034541847942850272405D+00 + px(17)= 0.76688297098562182278932098742313991D+00 + px(18)= 0.99574549679776398032257261690761656D+00 + px(19)= 0.12911746195797983348858102746352939D+01 + px(20)= 0.16722452518287396697078251913438203D+01 + px(21)= 0.21635184617418031866723236843209620D+01 + px(22)= 0.27967767483991669703409180904294707D+01 + px(23)= 0.36135221397956750546277975652240522D+01 + px(24)= 0.46688173594398401975825823496477303D+01 + px(25)= 0.60377360285854709978780788792051344D+01 + px(26)= 0.78274940573494599108741305430804683D+01 + px(27)= 0.10203907613754336363839677845176062D+02 + px(28)= 0.13462852450631673420328416214301979D+02 + px(29)= 0.18313825184349771602172383716912527D+02 + pw( 1)= 0.12740401404642070584625642978316674D-02 + pw( 2)= 0.30131005567070877653374960845857871D-02 + pw( 3)= 0.48740340828768558996439695359657849D-02 + pw( 4)= 0.69399937174499322748248258405693356D-02 + pw( 5)= 0.93214421069590292260830707831625273D-02 + pw( 6)= 0.12166736320428934613437189621950636D-01 + pw( 7)= 0.15675841191975760517355581582896129D-01 + pw( 8)= 0.20113588991116143831392394792507242D-01 + pw( 9)= 0.25821196242227502811986031677074846D-01 + pw(10)= 0.33228090412405890048263783552877789D-01 + pw(11)= 0.42869421776728397919368187941044920D-01 + pw(12)= 0.55414351585478714766063918835679369D-01 + pw(13)= 0.71707159949828379722440698604318357D-01 + pw(14)= 0.92821391683596645191185627202531776D-01 + pw(15)= 0.12012815275736363503642593112420207D+00 + pw(16)= 0.15538195214533728663259981412132171D+00 + pw(17)= 0.20083006704520997993532092391882379D+00 + pw(18)= 0.25935464058941676571399243604292685D+00 + pw(19)= 0.33466206352796625147483719389400201D+00 + pw(20)= 0.43154442816926686793937626854331538D+00 + pw(21)= 0.55625869020489676787739935371459343D+00 + pw(22)= 0.71711352998990444136642620781136560D+00 + pw(23)= 0.92545309501307835297306650109719589D+00 + pw(24)= 0.11974638996261713170731294345952752D+01 + pw(25)= 0.15578539402639460936307650070163272D+01 + pw(26)= 0.20483259849497670771611527406588667D+01 + pw(27)= 0.27506256539950821112799212513803514D+01 + pw(28)= 0.38680317406459942528573299847997115D+01 + pw(29)= 0.62089749987608497692655462325987093D+01 +endif +if(kn == 30) then + px( 1)= 0.47850950616663001797979108558563715D-03 + px( 2)= 0.25398771556977649765987433184628366D-02 + px( 3)= 0.63266767276005636636918971079049107D-02 + px( 4)= 0.11982958698830863481679603697851263D-01 + px( 5)= 0.19737172990592845187334346934200923D-01 + px( 6)= 0.29928809539955562795388150514477224D-01 + px( 7)= 0.43045554614365273736449489448924963D-01 + px( 8)= 0.59772102008795894644542468648119854D-01 + px( 9)= 0.81049794509547598102960610576252333D-01 + px(10)= 0.10814588049834038786751975710018494D+00 + px(11)= 0.14273357759635461536908362003919156D+00 + px(12)= 0.18698857852398412902900671067259807D+00 + px(13)= 0.24371127323293364985980734512310093D+00 + px(14)= 0.31648527240822183351218895583590417D+00 + px(15)= 0.40988303161683259239808922937843934D+00 + px(16)= 0.52973043830387724896707606257507982D+00 + px(17)= 0.68344507067026327204085106743008657D+00 + px(18)= 0.88046769972296521320190133680773247D+00 + px(19)= 0.11328140045719997686900182451437385D+01 + px(20)= 0.14557849987988940136735253178131959D+01 + px(21)= 0.18688939211845819607060281336643708D+01 + px(22)= 0.23971019483057447245002052718973892D+01 + px(23)= 0.30725214791534315669485745292475362D+01 + px(24)= 0.39368810885201871521056281051541589D+01 + px(25)= 0.50453405183264296277555982679398449D+01 + px(26)= 0.64729369863961542161639397127927023D+01 + px(27)= 0.83267646024511909496921957165518850D+01 + px(28)= 0.10772601108679545696397581453580679D+02 + px(29)= 0.14106882472397927743465848536293191D+02 + px(30)= 0.19042377139722396576925840568797290D+02 + pw( 1)= 0.12301166672602858132749403305585138D-02 + pw( 2)= 0.29060349388917145213464128010319648D-02 + pw( 3)= 0.46912396989831005493100286944698875D-02 + pw( 4)= 0.66589658633863613568668981891460303D-02 + pw( 5)= 0.89054272701749525193189873717149483D-02 + pw( 6)= 0.11558310934603853176421212815818196D-01 + pw( 7)= 0.14788296604172157875123267577237353D-01 + pw( 8)= 0.18820536178193913710320280585533146D-01 + pw( 9)= 0.23944736371120032144351948458331453D-01 + pw(10)= 0.30524720054213913983988370110313231D-01 + pw(11)= 0.39011278230203159299747340391501401D-01 + pw(12)= 0.49962838489107032100208706917355628D-01 + pw(13)= 0.64076419786867156599743093793566683D-01 + pw(14)= 0.82229278957901320956487642041730955D-01 + pw(15)= 0.10553162871986467114052832148689872D+00 + pw(16)= 0.13539231869402525697097331669289294D+00 + pw(17)= 0.17360130147051396517985693386136263D+00 + pw(18)= 0.22243486135302704942601690670544171D+00 + pw(19)= 0.28479267765730766074850970825016903D+00 + pw(20)= 0.36438127580923131688892141299329716D+00 + pw(21)= 0.46596902265107476258412993277856600D+00 + pw(22)= 0.59575928418635779724375261556025579D+00 + pw(23)= 0.76197365269330802425420319974692784D+00 + pw(24)= 0.97583770654840390135150485526335360D+00 + pw(25)= 0.12534009762825410047987823857212526D+01 + pw(26)= 0.16192497318165861534514297610388291D+01 + pw(27)= 0.21150559758743170143427430064927639D+01 + pw(28)= 0.28228114202692982120171862876336191D+01 + pw(29)= 0.39469320964297990895048746179623006D+01 + pw(30)= 0.63015479148815916165232775496615043D+01 +endif +if(kn == 31) then + px( 1)= 0.46262124155980990900658158435387435D-03 + px( 2)= 0.24543371823976573134043990222488764D-02 + px( 3)= 0.61080328529962740945375030974273550D-02 + px( 4)= 0.11552913696384311289392080456928671D-01 + px( 5)= 0.18992554406021427042667932899509712D-01 + px( 6)= 0.28727239767175649142836330874634688D-01 + px( 7)= 0.41185025043396586435794925588583817D-01 + px( 8)= 0.56962729337650718045473296117050346D-01 + px( 9)= 0.76876408592321896648696565625767740D-01 + px(10)= 0.10202017613750426995368814592543654D+00 + px(11)= 0.13383358226439616664891093380483172D+00 + px(12)= 0.17418111785696519564699242954723688D+00 + px(13)= 0.22545079288954335283945305080525555D+00 + px(14)= 0.29068039334037578871072959727864135D+00 + px(15)= 0.37372028651206644042103206921271373D+00 + px(16)= 0.47944207171169672906363477630137984D+00 + px(17)= 0.61400401892256187595697112035275391D+00 + px(18)= 0.78518739405456531463788153132800130D+00 + px(19)= 0.10028226298243299869022545200635749D+01 + px(20)= 0.12793315955946422764804637028085576D+01 + px(21)= 0.16304237404723823714635864157191701D+01 + px(22)= 0.20760033968995280036753528016386266D+01 + px(23)= 0.26413808369702250796358745676110842D+01 + px(24)= 0.33589474379157213932868619266312477D+01 + px(25)= 0.42706130083047912404753160217494471D+01 + px(26)= 0.54316014237021642862351869609909389D+01 + px(27)= 0.69169001596879784406900118967237735D+01 + px(28)= 0.88334933035422052410960663437339313D+01 + px(29)= 0.11347158219793510037091924273351551D+02 + px(30)= 0.14754946949122040546764367410101515D+02 + px(31)= 0.19772934489331283651619534072783573D+02 + pw( 1)= 0.11891370745192323541501363233610476D-02 + pw( 2)= 0.28064565117632969596616229162402416D-02 + pw( 3)= 0.45221820039177658202628966334564400D-02 + pw( 4)= 0.64011462699419650278998414745094563D-02 + pw( 5)= 0.85276672843451997393681235521893987D-02 + pw( 6)= 0.11012361472143567538937306026897082D-01 + pw( 7)= 0.14001834101185433452944676115710984D-01 + pw( 8)= 0.17688602970920088694526112750101821D-01 + pw( 9)= 0.22319993264776239711478935025076064D-01 + pw(10)= 0.28206159264133299993412221361362783D-01 + pw(11)= 0.35729779624362463483758438125027756D-01 + pw(12)= 0.45361209930192015109636119041934318D-01 + pw(13)= 0.57681745959882977997939097436077625D-01 + pw(14)= 0.73415730766069123013128503531856238D-01 + pw(15)= 0.93471597750154348901340915093546613D-01 + pw(16)= 0.11899278980540690788248491553604027D+00 + pw(17)= 0.15142094037875549556146133150023155D+00 + pw(18)= 0.19257526453331204560446543583285304D+00 + pw(19)= 0.24475404687248924789069417791551804D+00 + pw(20)= 0.31086721331790994056051025981812504D+00 + pw(21)= 0.39461463811187169999440896691515521D+00 + pw(22)= 0.50073578853625257469356342726971100D+00 + pw(23)= 0.63537831862561035023942323431330555D+00 + pw(24)= 0.80667931860754124288487929852367162D+00 + pw(25)= 0.10257546571965496140352844249415068D+01 + pw(26)= 0.13085329420794059702124934248171024D+01 + pw(27)= 0.16795015057251400866456256943483557D+01 + pw(28)= 0.21803283205051475095028864283497279D+01 + pw(29)= 0.28932755231818831561701787327064654D+01 + pw(30)= 0.40239038808605675586573707403263756D+01 + pw(31)= 0.63919688446180182394784276398611383D+01 +endif +if(kn == 32) then + px( 1)= 0.44775812457284018024389459757135253D-03 + px( 2)= 0.23744300256081390660731331947131448D-02 + px( 3)= 0.59043190530701947087959015074527131D-02 + px( 4)= 0.11153785163059738085602585751236982D-01 + px( 5)= 0.18305065328015338438613004118489460D-01 + px( 6)= 0.27625127970368047372371104083117018D-01 + px( 7)= 0.39491775186977265606198670142306198D-01 + px( 8)= 0.54428213199640073895058009136940445D-01 + px( 9)= 0.73145916598392207837439901430911881D-01 + px(10)= 0.96594842317146387626737019235932073D-01 + px(11)= 0.12602067507135613816122919853447211D+00 + px(12)= 0.16303119243744080803051027229475087D+00 + px(13)= 0.20967680096838206807696535069330795D+00 + px(14)= 0.26855214787001775388966483767701730D+00 + px(15)= 0.34292617657317298468348748407414908D+00 + px(16)= 0.43690813207789007326976756359678361D+00 + px(17)= 0.55565789063347173772943038765658058D+00 + px(18)= 0.70565099753808518503172131364185913D+00 + px(19)= 0.89501204411614229330337167600565325D+00 + px(20)= 0.11339348063954267650829490706793417D+01 + px(21)= 0.14352147947875517113501672147412666D+01 + px(22)= 0.18149314500489324404656062560197686D+01 + px(23)= 0.22933370525154514327968223407435420D+01 + px(24)= 0.28960454333191297694038803298227801D+01 + px(25)= 0.36556826421582258456995650363192002D+01 + px(26)= 0.46143015669042314317688786912840126D+01 + px(27)= 0.58271640488452237241101113536108246D+01 + px(28)= 0.73691994618471152327249495515078891D+01 + px(29)= 0.93472955333283431319402613994267131D+01 + px(30)= 0.11927265101386731589772177284607045D+02 + px(31)= 0.15406825810545224143258580731663182D+02 + px(32)= 0.20505385359315843909738804845728816D+02 + pw( 1)= 0.11508141693310289431516002902194819D-02 + pw( 2)= 0.27135954170378174829279299706770743D-02 + pw( 3)= 0.43653263744209382080072687149869796D-02 + pw( 4)= 0.61636735125379730211346708661816263D-02 + pw( 5)= 0.81829488620066496094938528145972129D-02 + pw( 6)= 0.10519566782024470065554644097490174D-01 + pw( 7)= 0.13300211432891686360240172032295580D-01 + pw( 8)= 0.16690405167066082789755182863245534D-01 + pw( 9)= 0.20902378399561394483383583702025306D-01 + pw(10)= 0.26201993681677797113818409392334214D-01 + pw(11)= 0.32916309597556149563466321558626500D-01 + pw(12)= 0.41444783686249708806280965671571355D-01 + pw(13)= 0.52276724501596285458733044616038927D-01 + pw(14)= 0.66016045915514602434595691235181054D-01 + pw(15)= 0.83413404413409668999356206612085646D-01 + pw(16)= 0.10540610055778962043592758135111163D+00 + pw(17)= 0.13316716078519390466674733202657980D+00 + pw(18)= 0.16816620873421840783275153829642565D+00 + pw(19)= 0.21224605690896243222098764241728942D+00 + pw(20)= 0.26772081279710238967904075331055395D+00 + pw(21)= 0.33750446543867324459731453510204593D+00 + pw(22)= 0.42528477444911122706373139673310844D+00 + pw(23)= 0.53576855465657922120503664249148172D+00 + pw(24)= 0.67504694124786279657121038348068956D+00 + pw(25)= 0.85117601343595945001255999545786031D+00 + pw(26)= 0.10751709034800588277387396074134080D+01 + pw(27)= 0.13628543051087082785935353862196062D+01 + pw(28)= 0.17386349660199069072599128814460612D+01 + pw(29)= 0.22441998697757930470131299950344772D+01 + pw(30)= 0.29621015955004271568195551910771395D+01 + pw(31)= 0.40990494779803030065007758020292907D+01 + pw(32)= 0.64803510743879183341918928981358705D+01 +endif +if(kn == 33) then + px( 1)= 0.43382392840086627333673371770410791D-03 + px( 2)= 0.22996130762133902306219348170634844D-02 + px( 3)= 0.57140311482877757226936621231899489D-02 + px( 4)= 0.10782263460575904849778834680881905D-01 + px( 5)= 0.17668133941325186930637700801463696D-01 + px( 6)= 0.26610114005745400437311454869685139D-01 + px( 7)= 0.37943370827578419250767127256762961D-01 + px( 8)= 0.52129041240975080218154665697626616D-01 + px( 9)= 0.69790754616119288011923380403081974D-01 + px(10)= 0.91757677110053287920628073294896820D-01 + px(11)= 0.11911351898045213836878312985804824D+00 + px(12)= 0.15325258383592569039382713986292282D+00 + px(13)= 0.19594640045311118270956256575773479D+00 + px(14)= 0.24942637714448661140410427601352782D+00 + px(15)= 0.31648861015431273270737411206235176D+00 + px(16)= 0.40062704984338027141229486836727772D+00 + px(17)= 0.50620162303052368324000671362943743D+00 + px(18)= 0.63864913663861731335041615079256755D+00 + px(19)= 0.80474696130540033570426941368276095D+00 + px(20)= 0.10129427268888740212312948144945017D+01 + px(21)= 0.12737679814899102853785194013828910D+01 + px(22)= 0.16003609719649203492661497770183358D+01 + px(23)= 0.20091354103290292174609108669156369D+01 + px(24)= 0.25206522780436439397099624663858851D+01 + px(25)= 0.31607884518783084627961967518655955D+01 + px(26)= 0.39623662204110196024719874486085284D+01 + px(27)= 0.49675495548589556752790709024651368D+01 + px(28)= 0.62316176745314993859698590995521575D+01 + px(29)= 0.78294374119411180631665027515007813D+01 + px(30)= 0.98678149766403120157427026982285647D+01 + px(31)= 0.12512632243648644312040507648114218D+02 + px(32)= 0.16062316312799007384705365546800182D+02 + px(33)= 0.21239626413140571003413447643564263D+02 + pw( 1)= 0.11148970595521907012019732812607055D-02 + pw( 2)= 0.26267851653678976569258782421234610D-02 + pw( 3)= 0.42193631422457547168621098688321193D-02 + pw( 4)= 0.59441470797152969047545135707624789D-02 + pw( 5)= 0.78669738899974671911070000143132306D-02 + pw( 6)= 0.10072359597590157564525137294462730D-01 + pw( 7)= 0.12670399361862170180506056379944600D-01 + pw( 8)= 0.15804160264438711138334353671976234D-01 + pw( 9)= 0.19656646222033544724001127769742948D-01 + pw(10)= 0.24456891716604317647752096255377977D-01 + pw(11)= 0.30486078976025291042502816433302855D-01 + pw(12)= 0.38085942822052281180984432235900915D-01 + pw(13)= 0.47671859587250413816889365465789990D-01 + pw(14)= 0.59751905747580346721160295363733596D-01 + pw(15)= 0.74952101551419066714546625345670463D-01 + pw(16)= 0.94047940645329027189027157260871966D-01 + pw(17)= 0.11800297644299036213439491803576584D+00 + pw(18)= 0.14801615944289654934244517014490498D+00 + pw(19)= 0.18558059212548173745682851099099352D+00 + pw(20)= 0.23255756871893273043599614945440298D+00 + pw(21)= 0.29127163202367449192902938057774536D+00 + pw(22)= 0.36463564879320343679994202632934432D+00 + pw(23)= 0.45632094652154984485718193074718634D+00 + pw(24)= 0.57099911532700834474505095568484057D+00 + pw(25)= 0.71470499479041253227766745277939393D+00 + pw(26)= 0.89541776346343118676696712049315123D+00 + pw(27)= 0.11240606917291058823868093321319567D+01 + pw(28)= 0.14163644716761897658439794410119483D+01 + pw(29)= 0.17966773927205272792519385809395869D+01 + pw(30)= 0.23067252939728463992747791116176630D+01 + pw(31)= 0.30293676082841986853392483236331039D+01 + pw(32)= 0.41724631327442166841549815997399232D+01 + pw(33)= 0.65667986737889171343050053820031099D+01 +endif +if(kn == 34) then + px( 1)= 0.42073412472404638893756652613349609D-03 + px( 2)= 0.22294117501223585040813842121338791D-02 + px( 3)= 0.55358634424631592178894736933123541D-02 + px( 4)= 0.10435505221909818911938312253494801D-01 + px( 5)= 0.17076183522530106340715524871890215D-01 + px( 6)= 0.25671846892716699934599581849440412D-01 + px( 7)= 0.36521262393930458152775347026295878D-01 + px( 8)= 0.50032905506982571537052624490236219D-01 + px( 9)= 0.66756179329851771946428938115562858D-01 + px(10)= 0.87418440517600995276435533200107158D-01 + px(11)= 0.11296720871152482793171861979004167D+00 + px(12)= 0.14461798209060507185491668160548863D+00 + px(13)= 0.18391003445626398421131159916752177D+00 + px(14)= 0.23277438392452600064632190536358711D+00 + px(15)= 0.29361900416298607661742971283165405D+00 + px(16)= 0.36943648509469646191303516064166821D+00 + px(17)= 0.46393948817397095233405427748578045D+00 + px(18)= 0.58173003909628716647282591793935082D+00 + px(19)= 0.72851013492593350843955115700236997D+00 + px(20)= 0.91134335818633179015533214138153748D+00 + px(21)= 0.11389803746065945729570613409749246D+01 + px(22)= 0.14222658613929989350760708533653619D+01 + px(23)= 0.17746516415212834951524711400325322D+01 + px(24)= 0.22128526659161258391280177907354742D+01 + px(25)= 0.27577030475922356628315880749135163D+01 + px(26)= 0.34353067716163467499117583277012187D+01 + px(27)= 0.42786491225875840394307069342936437D+01 + px(28)= 0.53299784436903981664272138662385839D+01 + px(29)= 0.66445753694632339412682622877981687D+01 + px(30)= 0.82972428161435530717136911066799313D+01 + px(31)= 0.10394720964339982803669830220514570D+02 + px(32)= 0.13102991988894450504911092547924425D+02 + px(33)= 0.16721231207604090231249152699717947D+02 + px(34)= 0.21975561953387058269749184334695681D+02 + pw( 1)= 0.10811655766145357018010507192110778D-02 + pw( 2)= 0.25454456692504695230374260776656758D-02 + pw( 3)= 0.40831672721796992810291858236827121D-02 + pw( 4)= 0.57405373464649057612542381878716024D-02 + pw( 5)= 0.75761674957710259616551754950037804D-02 + pw( 6)= 0.96645393330211172110854747921485211D-02 + pw( 7)= 0.12101849132180839391980047456808267D-01 + pw( 8)= 0.15012385166203703798856896165367048D-01 + pw( 9)= 0.18554685664439102794960148200939741D-01 + pw(10)= 0.22926965460919156862511787954586100D-01 + pw(11)= 0.28372234446912034618472293586026471D-01 + pw(12)= 0.35184737546237040728179133891634893D-01 + pw(13)= 0.43719795721391424383284929004984363D-01 + pw(14)= 0.54408450285118084483187831793976080D-01 + pw(15)= 0.67777311678116535143958157494852949D-01 + pw(16)= 0.84473623053475603301350103260219498D-01 + pw(17)= 0.10529589998535694380806537413267952D+00 + pw(18)= 0.13123120457466970233581600829200986D+00 + pw(19)= 0.16350086670529523991639926152378893D+00 + pw(20)= 0.20361729924366521691008806803033027D+00 + pw(21)= 0.25345571161549014721855872141678839D+00 + pw(22)= 0.31534642949457072923890964012293452D+00 + pw(23)= 0.39219690177745437575400107500840528D+00 + pw(24)= 0.48765869057533013816866773942301700D+00 + pw(25)= 0.60636656060951114856441189594293935D+00 + pw(26)= 0.75430007270808930870580918261471173D+00 + pw(27)= 0.93936601064002840188234207354112438D+00 + pw(28)= 0.11724044094922051599035401593357573D+01 + pw(29)= 0.14690667743714044794229623979582298D+01 + pw(30)= 0.18536570456604450828475022635489700D+01 + pw(31)= 0.23679569977291335149484204767547713D+01 + pw(32)= 0.30951463106197815052582135293694448D+01 + pw(33)= 0.42442317841513325464351782732665369D+01 + pw(34)= 0.66514074844570206292427905989816795D+01 +endif +if(kn == 35) then + px( 1)= 0.40841415548604021515920964002399699D-03 + px( 2)= 0.21634090823285078411638264192646176D-02 + px( 3)= 0.53686766256969092483856645653707325D-02 + px( 4)= 0.10111052749147679432588829471203234D-01 + px( 5)= 0.16524448303260385083329478751404387D-01 + px( 6)= 0.24801589512429266354562963867459375D-01 + px( 7)= 0.35209981971294629902224557027954734D-01 + px( 8)= 0.48113158246267914322027983645621137D-01 + px( 9)= 0.63997447579595411241665965285069235D-01 + px(10)= 0.83503921106837065850088749934585509D-01 + px(11)= 0.10746492183189159858451002479100191D+00 + px(12)= 0.13694520147247420040072594436271529D+00 + px(13)= 0.17328916996785784591140856733770825D+00 + px(14)= 0.21817740121438829093802932764502942D+00 + px(15)= 0.27369653832797779961249433698659440D+00 + px(16)= 0.34242699906149186851837117836266744D+00 + px(17)= 0.42755291549796611179923030387067599D+00 + px(18)= 0.53299909554182158428637150014155340D+00 + px(19)= 0.66360070964681593124502281725999578D+00 + px(20)= 0.82531292882837242125696121653082133D+00 + px(21)= 0.10254699397214209538105608220109125D+01 + px(22)= 0.12731058972360937198805843671213415D+01 + px(23)= 0.15793550240911338542780101952325349D+01 + px(24)= 0.19579553515472245197868811537799063D+01 + px(25)= 0.24258926480980508600328748991363509D+01 + px(26)= 0.30042420249956940401285542677229005D+01 + px(27)= 0.37193027838966272865717795755051677D+01 + px(28)= 0.46041945352246751335584778757254638D+01 + px(29)= 0.57012278072022800341169042054436900D+01 + px(30)= 0.70656725104389005421933634969655744D+01 + px(31)= 0.87722686454754899579881428713693370D+01 + px(32)= 0.10927706100825575628425432470397431D+02 + px(33)= 0.13698096354347216687650292593862930D+02 + px(34)= 0.17383397148588794631800252981912492D+02 + px(35)= 0.22713103140185533251728138884147593D+02 + pw( 1)= 0.10494256988526777696106600190246748D-02 + pw( 2)= 0.24690695403322712989700976425347600D-02 + pw( 3)= 0.39557664886781607598248979520245308D-02 + pw( 4)= 0.55511159047852988512979064225320046D-02 + pw( 5)= 0.73075320876513631863948214849392405D-02 + pw( 6)= 0.92909810870320629850049703997767543D-02 + pw( 7)= 0.11585945487945439996692114097667348D-01 + pw( 8)= 0.14300927726198362348901143963929085D-01 + pw( 9)= 0.17573886933485282780152536853558582D-01 + pw(10)= 0.21577101857175674214083208317224449D-01 + pw(11)= 0.26521578525384159056539231969282277D-01 + pw(12)= 0.32662117530178998186687978899517756D-01 + pw(13)= 0.40304760583700356024679394121082348D-01 + pw(14)= 0.49818024406493848323885700890345396D-01 + pw(15)= 0.61648498278387927702353594142316029D-01 + pw(16)= 0.76340842519971003579540175209086024D-01 + pw(17)= 0.94562313841812711417919902804637839D-01 + pw(18)= 0.11713243611448668905011510723033361D+00 + pw(19)= 0.14505903302187588429490257418340878D+00 + pw(20)= 0.17958246056420660249030476513346108D+00 + pw(21)= 0.22223064193925674596660827103307295D+00 + pw(22)= 0.27488866495140173131704069241445894D+00 + pw(23)= 0.33988865913831065785773004058455433D+00 + pw(24)= 0.42012914561889293897263514396102618D+00 + pw(25)= 0.51923942630667304009247143973261919D+00 + pw(26)= 0.64181663147268449329914151912099321D+00 + pw(27)= 0.79378665914719288398623606417929212D+00 + pw(28)= 0.98298862630823929161929765759110274D+00 + pw(29)= 0.12201876100804632086249205242578178D+01 + pw(30)= 0.15209676736777719585360185637783971D+01 + pw(31)= 0.19096027011177832508222236236206329D+01 + pw(32)= 0.24279450923678125311674033890233300D+01 + pw(33)= 0.31595056381881229211813965570928908D+01 + pw(34)= 0.43144357961085612515538420099471387D+01 + pw(35)= 0.67342659787038143488625655899472123D+01 +endif +if(kn == 36) then + px( 1)= 0.39679800273035937692456569764529929D-03 + px( 2)= 0.21012371857514574429814634636370868D-02 + px( 3)= 0.52114717733736355471865173697424917D-02 + px( 4)= 0.98067697534794548776488857189272695D-02 + px( 5)= 0.16008829048610610861085787414831567D-01 + px( 6)= 0.23991913025422802197171963117636148D-01 + px( 7)= 0.33996529414545629919769339896568646D-01 + px( 8)= 0.46347640349961743012995653495638967D-01 + px( 9)= 0.61477688475695067453069318914244194D-01 + px(10)= 0.79954234262530625440149978974196142D-01 + px(11)= 0.10251169364078550312390054066216880D+00 + px(12)= 0.13008697511618887542147079025562156D+00 + px(13)= 0.16385989560749921185926347303565033D+00 + px(14)= 0.20530066324396880599546126878370330D+00 + px(15)= 0.25622775706568656239933185280550197D+00 + px(16)= 0.31887992047473434247218676095668498D+00 + px(17)= 0.39600601270766777602081700578327931D+00 + px(18)= 0.49097660869784478263571961512354166D+00 + px(19)= 0.60792179107232897416464078744063313D+00 + px(20)= 0.75190061348713360022081088193051979D+00 + px(21)= 0.92910925563205451252801298079690580D+00 + px(22)= 0.11471370551908310980346755006364606D+01 + px(23)= 0.14152827011474745738425470935393175D+01 + px(24)= 0.17449475260933662620173991491940897D+01 + px(25)= 0.21501302020535220058465237512287759D+01 + px(26)= 0.26480593984665582591226977578025315D+01 + px(27)= 0.32600222787200297912657865580064671D+01 + px(28)= 0.40124854077257132134482639353308378D+01 + px(29)= 0.49386780680939238173283121803851184D+01 + px(30)= 0.60809546519546778854776584514884910D+01 + px(31)= 0.74945653594942201002132544001003708D+01 + px(32)= 0.92541900969385753944715137178141946D+01 + px(33)= 0.11466484149563363201326491294746713D+02 + px(34)= 0.14297715117728357842590639672319382D+02 + px(35)= 0.18048653298710078396919876650411551D+02 + px(36)= 0.23452167308415151157069277254133065D+02 + pw( 1)= 0.10195057714700151361278566695804673D-02 + pw( 2)= 0.23972109360881477362914694357034945D-02 + pw( 3)= 0.38363158447342177123425836110343923D-02 + pw( 4)= 0.53744010839767438647013296257593752D-02 + pw( 5)= 0.70585351522933460613686733697482296D-02 + pw( 6)= 0.89474146486528988019752703037120466D-02 + pw( 7)= 0.11115594177177423398237576162176609D-01 + pw( 8)= 0.13658239363054657454269471936854599D-01 + pw( 9)= 0.16695920500566358906782047930663171D-01 + pw(10)= 0.20378981713905551927970998274789754D-01 + pw(11)= 0.24891420496520210335919294073059272D-01 + pw(12)= 0.30454995468399097263969267647541847D-01 + pw(13)= 0.37334924544188163864246559109559135D-01 + pw(14)= 0.45848499525488242728225859778156080D-01 + pw(15)= 0.56377323577091624057815057252308315D-01 + pw(16)= 0.69383296835632602012769529156119272D-01 + pw(17)= 0.85428366438632838815674340562473751D-01 + pw(18)= 0.10519836473842813187954075539197942D+00 + pw(19)= 0.12953172788969281249628541902884322D+00 + pw(20)= 0.15945437502320699870857044488189859D+00 + pw(21)= 0.19622256830996434214924230362068723D+00 + pw(22)= 0.24137631349243177937384183525770442D+00 + pw(23)= 0.29680703930305420431908018221051720D+00 + pw(24)= 0.36484530976842630509122266207581834D+00 + pw(25)= 0.44837789623740881716704249442275046D+00 + pw(26)= 0.55101005426642996452229825498001978D+00 + pw(27)= 0.67730110868383751051906031936646973D+00 + pw(28)= 0.83312535104283525888024444527691004D+00 + pw(29)= 0.10262590489160128936079715697075387D+01 + pw(30)= 0.12674001847129782122095271537034034D+01 + pw(31)= 0.15720761022006313395632370048802129D+01 + pw(32)= 0.19645432933987013230055443656345311D+01 + pw(33)= 0.24867374093856289898664232230940440D+01 + pw(34)= 0.32225090907915900251361180974071379D+01 + pw(35)= 0.43831496004036045715808964702697293D+01 + pw(36)= 0.68154560080748566632769223367688120D+01 +endif +if(kn == 37) then + px( 1)= 0.38582699810911109314412610582600285D-03 + px( 2)= 0.20425701942290192876130910611009103D-02 + px( 3)= 0.50633691210660737148684629408797998D-02 + px( 4)= 0.95207896708291718422773083291578438D-02 + px( 5)= 0.15525778682735758953134513377993514D-01 + px( 6)= 0.23236458251477143892762148491421503D-01 + px( 7)= 0.32869898315502381365977793756399700D-01 + px( 8)= 0.44717783821182009237592740999277480D-01 + px( 9)= 0.59166282378413474386219652173387790D-01 + px(10)= 0.76720016407611302507577044991006663D-01 + px(11)= 0.98029725568599136751267367485311366D-01 + px(12)= 0.12392331356464118155739304688774820D+00 + px(13)= 0.15544072437387706131982741857227680D+00 + px(14)= 0.19387425549171156055801006554376491D+00 + px(15)= 0.24081693190720646255336253748180444D+00 + px(16)= 0.29822205905402883495704063798959865D+00 + px(17)= 0.36847715057286422135924881742734873D+00 + px(18)= 0.45449546602004465241771763312107545D+00 + px(19)= 0.55982869948207391699702170763669914D+00 + px(20)= 0.68880504649787296744511192196333508D+00 + px(21)= 0.84669796033901533360995646038438838D+00 + px(22)= 0.10399324356558473561934598413040205D+01 + px(23)= 0.12763377882779834661586906782407420D+01 + px(24)= 0.15654589805265212137286807835074586D+01 + px(25)= 0.19189432205583964070987477122905362D+01 + px(26)= 0.23510260307961591423359389441199440D+01 + px(27)= 0.28791534439182687258499291870153821D+01 + px(28)= 0.35247986674670581588018777649051460D+01 + px(29)= 0.43145708343894232186639323097990119D+01 + px(30)= 0.52817877652195029549414854087069499D+01 + px(31)= 0.64688326916686594614365796118970700D+01 + px(32)= 0.79309297085796581254349823893132444D+01 + px(33)= 0.97427028243618144296296506331896705D+01 + px(34)= 0.12010788145502681400420281251059076D+02 + px(35)= 0.14901634128904268951043131468444574D+02 + px(36)= 0.18716850108988162034804184337830585D+02 + px(37)= 0.24192677368936148334878341907713337D+02 + pw( 1)= 0.99125336551824862194889482679760082D-03 + pw( 2)= 0.23294764153889055024996601218914981D-02 + pw( 3)= 0.37240772547834537676742266421935443D-02 + pw( 4)= 0.52091149342528848273366607113774535D-02 + pw( 5)= 0.68270221227859789716885624541247654D-02 + pw( 6)= 0.86302549871569688435939302492406456D-02 + pw( 7)= 0.10684907929606001826647765915296935D-01 + pw( 8)= 0.13074823650284587459124044679067858D-01 + pw( 9)= 0.15905815137012769943635728692865399D-01 + pw(10)= 0.19309592949421631021202995586098476D-01 + pw(11)= 0.23447231955796843102356124526512731D-01 + pw(12)= 0.28512602643555882480364043596258204D-01 + pw(13)= 0.34736803269082359319855891282489532D-01 + pw(14)= 0.42394778196431500823106022890118590D-01 + pw(15)= 0.51814900990726921245206073768999637D-01 + pw(16)= 0.63391754887352977638748380428399567D-01 + pw(17)= 0.77602103176771937423249866012581660D-01 + pw(18)= 0.95024188094564273453247510650970162D-01 + pw(19)= 0.11636084820208068310423973571076075D+00 + pw(20)= 0.14246733764500807361275561785541076D+00 + pw(21)= 0.17438513757282787980358846897741684D+00 + pw(22)= 0.21338354906834882772151573726954513D+00 + pw(23)= 0.26101159566291693588807442987832548D+00 + pw(24)= 0.31916397294414160306456288368801219D+00 + pw(25)= 0.39016685675452558044568607656388734D+00 + pw(26)= 0.47689304458379631259135969323199221D+00 + pw(27)= 0.58292255373275454371013059783773154D+00 + pw(28)= 0.71277724055369180393369777699867214D+00 + pw(29)= 0.87228215826925362283182363826585130D+00 + pw(30)= 0.10691555317433509592820312414251932D+01 + pw(31)= 0.13140356602358313756291999585335256D+01 + pw(32)= 0.16224029261605229346735838359067505D+01 + pw(33)= 0.20185076394260072518838683149008566D+01 + pw(34)= 0.25443795429863924820797324346795088D+01 + pw(35)= 0.32842160798477830990525297742207531D+01 + pw(36)= 0.44504422639266402367212097908451198D+01 + pw(37)= 0.68950534584189288110300715731079937D+01 +endif +if(kn == 38) then + px( 1)= 0.37544882641214739544404487691314902D-03 + px( 2)= 0.19871183957226785505693683210337852D-02 + px( 3)= 0.49235906141676391015802154321946533D-02 + px( 4)= 0.92514737512580193448510992395727495D-02 + px( 5)= 0.15072210889929832914852564552062275D-01 + px( 6)= 0.22529747565558688599709359818783270D-01 + px( 7)= 0.31820706479818463453808764992549706D-01 + px( 8)= 0.43207917804566229579979815937989733D-01 + px( 9)= 0.57037614703055219788340245932475170D-01 + px(10)= 0.73760278305643433875885418326364923D-01 + px(11)= 0.93954814072806105125535625384970094D-01 + px(12)= 0.11835572279751275812340426098616966D+00 + px(13)= 0.14788342387597970850254556771271985D+00 + px(14)= 0.18367881184867197784381084444678116D+00 + px(15)= 0.22714406717510235393336995794234796D+00 + px(16)= 0.27999230770730436478836636945630613D+00 + px(17)= 0.34430881911279975709736095638802572D+00 + px(18)= 0.42262660561797928237112864643574725D+00 + px(19)= 0.51801914793576724661446993655612171D+00 + px(20)= 0.63421369082081852676499438226656058D+00 + px(21)= 0.77572913890901541203968257694714068D+00 + px(22)= 0.94804373045176433040869132064288662D+00 + px(23)= 0.11577991608735723735204737419458983D+01 + px(24)= 0.14130499332620481162978195094240590D+01 + px(25)= 0.17235697910838497297077019443263907D+01 + px(26)= 0.21012318099421305470189826928381899D+01 + px(27)= 0.25604863031155004117350909468147106D+01 + px(28)= 0.31189733700354116534218512680792939D+01 + px(29)= 0.37983289491193925871766329237375059D+01 + px(30)= 0.46252830528399268638347963582117115D+01 + px(31)= 0.56332239825265713634766419255858690D+01 + px(32)= 0.68645515916046830561016745830456758D+01 + px(33)= 0.83744595975704688906750898197385297D+01 + px(34)= 0.10237521325244942798063968606118549D+02 + px(35)= 0.12560368707162995423911721031640974D+02 + px(36)= 0.15509653816608570103011114649053437D+02 + px(37)= 0.19387848243793684608247770401886161D+02 + px(38)= 0.24934561281635323945874914765928781D+02 + pw( 1)= 0.96453265301318427548094362468201990D-03 + pw( 2)= 0.22655173902979609807058505788853476D-02 + pw( 3)= 0.36184028900176044156504296271444603D-02 + pw( 4)= 0.50541489537150279336470105887486457D-02 + pw( 5)= 0.66111480676528721493086153967962652D-02 + pw( 6)= 0.83364709970141383682744167961798317D-02 + pw( 7)= 0.10288965239682486008404941690219778D-01 + pw( 8)= 0.12542814650103402794331337460320134D-01 + pw( 9)= 0.15191255363614986206935175799741358D-01 + pw(10)= 0.18350103409130288787013350611437802D-01 + pw(11)= 0.22160882105318682870151221782836377D-01 + pw(12)= 0.26793767627663720473790893510507021D-01 + pw(13)= 0.32451109787899399883524910095936794D-01 + pw(14)= 0.39372541954009629417292538828294592D-01 + pw(15)= 0.47842474290684012119499234323721647D-01 + pw(16)= 0.58200301955035707564120883548436910D-01 + pw(17)= 0.70853350044334604149559560301899009D-01 + pw(18)= 0.86292591208817884216614254072269241D-01 + pw(19)= 0.10511141477535783724613732416344133D+00 + pw(20)= 0.12802804286358628862797652201096219D+00 + pw(21)= 0.15591251149396720172437659323068423D+00 + pw(22)= 0.18981949259837187914390190669151597D+00 + pw(23)= 0.23102871661757404151219056182072738D+00 + pw(24)= 0.28109550616216987958688311291596089D+00 + pw(25)= 0.34191517456611856887400710976438959D+00 + pw(26)= 0.41580717321823436217712748531850542D+00 + pw(27)= 0.50562861773538920954280122211069833D+00 + pw(28)= 0.61493358954575527626904288123143367D+00 + pw(29)= 0.74820721198453838888752387245859917D+00 + pw(30)= 0.91122787666637256187790465660240598D+00 + pw(31)= 0.11116604876702928314541969577850448D+01 + pw(32)= 0.13600906035248579424433723117814169D+01 + pw(33)= 0.16719605033700192144793651594580592D+01 + pw(34)= 0.20715242290059813187830495943964160D+01 + pw(35)= 0.26009149126553161918961209793496358D+01 + pw(36)= 0.33446822473746498943306001989892373D+01 + pw(37)= 0.45163779904182883931050846768355054D+01 + pw(38)= 0.69731288252045659086112524629580346D+01 +endif +if(kn == 39) then + px( 1)= 0.36561668715257863266695867839020084D-03 + px( 2)= 0.19346233268023844607752680140092771D-02 + px( 3)= 0.47914454654721250429608171020940566D-02 + px( 4)= 0.89973768150331118636464557997751850D-02 + px( 5)= 0.14645426459456932116061991386768235D-01 + px( 6)= 0.21867035291160181873696376563265340D-01 + px( 7)= 0.30840905275941465760959181463130484D-01 + px( 8)= 0.41804727278858172089611260923812570D-01 + px( 9)= 0.55070109596011254633584203260158210D-01 + px(10)= 0.71040748192925559559810356000599258D-01 + px(11)= 0.90233607305000360208735980487986444D-01 + px(12)= 0.11330278644013276387294932973826412D+00 + px(13)= 0.14106605202734226946949558771740988D+00 + px(14)= 0.17453471922456742887333856505611605D+00 + px(15)= 0.21494840120203884103453089817606487D+00 + px(16)= 0.26381673974086082406623827494703215D+00 + px(17)= 0.32297045833724193805138734154727157D+00 + px(18)= 0.39462409178162717260419023658129017D+00 + px(19)= 0.48145279757007806226628785793904093D+00 + px(20)= 0.58668591241992432419881469135301121D+00 + px(21)= 0.71422043441619441875490470925759117D+00 + px(22)= 0.86875839397007365611188934469437619D+00 + px(23)= 0.10559731561016268223241013124184012D+01 + px(24)= 0.12827111749931695832113875967148563D+01 + px(25)= 0.15572378120557119987867239447717751D+01 + px(26)= 0.18895389181496921659373840308737441D+01 + px(27)= 0.22916946565666188923157402572614733D+01 + px(28)= 0.27783497396142931899333530655336223D+01 + px(29)= 0.33673171344416414054075236409384439D+01 + px(30)= 0.40803746579421435360342926127423516D+01 + px(31)= 0.49443541982197303363350679681860647D+01 + px(32)= 0.59926991656565097356612507658187921D+01 + px(33)= 0.72678162022521051390335450639073869D+01 + px(34)= 0.88248661080276432761936204880867089D+01 + px(35)= 0.10738377470289614240120984197486960D+02 + px(36)= 0.13114992524642316318648160324994074D+02 + px(37)= 0.16121587864037157860973324745148775D+02 + px(38)= 0.20061517632007615495343028194490514D+02 + px(39)= 0.25677751590102375173572742100364982D+02 + pw( 1)= 0.93922220165197278758235278705125378D-03 + pw( 2)= 0.22050238560788499833482429614154492D-02 + pw( 3)= 0.35187216063020038804486790499409390D-02 + pw( 4)= 0.49085365506617880092326144941967522D-02 + pw( 5)= 0.64093236535073721253961413565734053D-02 + pw( 6)= 0.80634829478885867255854065621590218D-02 + pw( 7)= 0.99236234747694546900134742610818704D-02 + pw( 8)= 0.12055651790884970603397688654161516D-01 + pw( 9)= 0.14542041549622581450180752510105879D-01 + pw(10)= 0.17484998327571243644029476510788471D-01 + pw(11)= 0.21009295862219636601744654763649681D-01 + pw(12)= 0.25264862509860073296774992740630911D-01 + pw(13)= 0.30429647240913452161840939756263150D-01 + pw(14)= 0.36713599504420717007290949145895236D-01 + pw(15)= 0.44364526109307181291869691370038599D-01 + pw(16)= 0.53676231693459070390920038412533532D-01 + pw(17)= 0.64999020675906355562370375168522980D-01 + pw(18)= 0.78752553989958863994856994484156486D-01 + pw(19)= 0.95441198327713183692637121288756175D-01 + pw(20)= 0.11567225339206538896170499893449828D+00 + pw(21)= 0.14017770592600709076251201422928256D+00 + pw(22)= 0.16984043156916862434884153031547730D+00 + pw(23)= 0.20572609864904223612685592628540649D+00 + pw(24)= 0.24912251113833289072621517399741476D+00 + pw(25)= 0.30158889670929686638710684005186011D+00 + pw(26)= 0.36501892348491283180440889799004221D+00 + pw(27)= 0.44172341189659552954879007161166844D+00 + pw(28)= 0.53454253015480261760607954897116394D+00 + pw(29)= 0.64700413373280623862585575306735597D+00 + pw(30)= 0.78355765539183802574608463599148275D+00 + pw(31)= 0.94993752822514424638014213628449162D+00 + pw(32)= 0.11537599191614455897031555123375337D+01 + pw(33)= 0.14055641164435818777056131176331966D+01 + pw(34)= 0.17207623206557334615184702866032957D+01 + pw(35)= 0.21236210670677286367475649834301224D+01 + pw(36)= 0.26563848390712257212223490954097065D+01 + pw(37)= 0.34039597582687380169009802962894561D+01 + pw(38)= 0.45810165654673262153491823163228067D+01 + pw(39)= 0.70497477205153040846297597338553262D+01 +endif +if(kn == 40) then + px( 1)= 0.35628858567646346686837324510549370D-03 + px( 2)= 0.18848536486770311807591243848705546D-02 + px( 3)= 0.46663181280317856746168443158323968D-02 + px( 4)= 0.87572190745324032833162010260613025D-02 + px( 5)= 0.14243053462025729117427416370397854D-01 + px( 6)= 0.21244187714177671012923575144774521D-01 + px( 7)= 0.29923549070178329860704059006245261D-01 + px( 8)= 0.40496827285887221934804797848083118D-01 + px( 9)= 0.53245474481585595162008164942410512D-01 + px(10)= 0.68532582430157398594384714629729053D-01 + px(11)= 0.86821479567961854490494392488927025D-01 + px(12)= 0.10869675959524067911380119096839804D+00 + px(13)= 0.13488761722649275069059959653024014D+00 + px(14)= 0.16629388885455273807852247897310019D+00 + px(15)= 0.20401590432120076472183920609098846D+00 + px(16)= 0.24938984749465339706889280753165521D+00 + px(17)= 0.30403061355446670063478194165459695D+00 + px(18)= 0.36988419909316149270323787724602738D+00 + px(19)= 0.44929166750312582192671647579960907D+00 + px(20)= 0.54506686870623140996608099899850594D+00 + px(21)= 0.66059043398617261201896956069992349D+00 + px(22)= 0.79992312516554243682533149390065526D+00 + px(23)= 0.96794240207734869040410161024196399D+00 + px(24)= 0.11705071340447720718704866053067197D+01 + px(25)= 0.14146568411163837104501550620112388D+01 + px(26)= 0.17088539364613371655586392377700409D+01 + px(27)= 0.20632805527205428543659205472373927D+01 + px(28)= 0.24902063739619416723013658330633921D+01 + px(29)= 0.30044517126674338175114949672570867D+01 + px(30)= 0.36239831566943951780098581431720407D+01 + px(31)= 0.43707017882279510268089904263627705D+01 + px(32)= 0.52715247584257797079839546521646347D+01 + px(33)= 0.63599375545114314246728093354610643D+01 + px(34)= 0.76783457965158492083638000098773087D+01 + px(35)= 0.92818762327705176821137326934557272D+01 + px(36)= 0.11245019162985129683252871377987811D+02 + px(37)= 0.13674441002824139783929760948169562D+02 + px(38)= 0.16737262031081031207287624813172862D+02 + px(39)= 0.20737736626704553460329127303821744D+02 + px(40)= 0.26422185009410264431343933932848600D+02 + pw( 1)= 0.91521311273862401110414707754929676D-03 + pw( 2)= 0.21477191521928219197974773759587103D-02 + pw( 3)= 0.34245277733745781136392870149979323D-02 + pw( 4)= 0.47714307428687057651870425259278087D-02 + pw( 5)= 0.62201720373839252470536835470530964D-02 + pw( 6)= 0.78090816762799270025355432702422127D-02 + pw( 7)= 0.95853728776285881753692296587104674D-02 + pw( 8)= 0.11607827206228475028893871423142139D-01 + pw( 9)= 0.13949671718334027814092300225470600D-01 + pw(10)= 0.16701414648594482319533882783791014D-01 + pw(11)= 0.19973423842092704666661066907283955D-01 + pw(12)= 0.23898237345016617036372142753573396D-01 + pw(13)= 0.28632957303620028137537791558953675D-01 + pw(14)= 0.34362389756581172586707762233025920D-01 + pw(15)= 0.41303629060426009015015262453667296D-01 + pw(16)= 0.49712538117407657728121783442595623D-01 + pw(17)= 0.59892265779927519547458491064456622D-01 + pw(18)= 0.72203793116717427042782050909270304D-01 + pw(19)= 0.87078559354632307141427375007562753D-01 + pw(20)= 0.10503340154082661838457995134010675D+00 + pw(21)= 0.12668825810872029062869005987170137D+00 + pw(22)= 0.15278730399841792433216511718627756D+00 + pw(23)= 0.18422442714170367120354764302353497D+00 + pw(24)= 0.22207427969059917966672557751489214D+00 + pw(25)= 0.26763062752730913486480650477422812D+00 + pw(26)= 0.32245451049240744596849032983153986D+00 + pw(27)= 0.38843603675318055142995014015287773D+00 + pw(28)= 0.46787586661529213993652263387714350D+00 + pw(29)= 0.56359633220992492533897924842606201D+00 + pw(30)= 0.67909910572915995345517074647986275D+00 + pw(31)= 0.81879920277693732798543935642566763D+00 + pw(32)= 0.98838986255598396167621024931216632D+00 + pw(33)= 0.11954429228293928218210248618287692D+01 + pw(34)= 0.14504574076282353803601676013439121D+01 + pw(35)= 0.17688226967485673903033088821262440D+01 + pw(36)= 0.21748255570140773474399755838047824D+01 + pw(37)= 0.27108286283847583355764140776741630D+01 + pw(38)= 0.34620975677969687591016085409314313D+01 + pw(39)= 0.46444137521824838114376662229079725D+01 + pw(40)= 0.71249713212825268738104553603659849D+01 +endif +if(kn == 41) then + px( 1)= 0.34742673101001169298125786189192247D-03 + px( 2)= 0.18376016626066348589572611512977315D-02 + px( 3)= 0.45476582212664459469467031011454626D-02 + px( 4)= 0.85298627941631364768947297314276555D-02 + px( 5)= 0.13862998302969016172574004035541326D-01 + px( 6)= 0.20657586094570613395846290407493021D-01 + px( 7)= 0.29062610848297752288163047809306784D-01 + px( 8)= 0.39274425358723378021088136325757383D-01 + px( 9)= 0.51548104852787877566528890250659503D-01 + px(10)= 0.66211354354233299700140534259286291D-01 + px(11)= 0.83680871232624778086967339679729237D-01 + px(12)= 0.10448092042565902676173295142297193D+00 + px(13)= 0.12926394759018944327153762885108248D+00 + px(14)= 0.15883342588000497563552833153551216D+00 + px(15)= 0.19416971506631978910646170849840836D+00 + px(16)= 0.23646026989593002896655227863024343D+00 + px(17)= 0.28713586785719640965554266593644607D+00 + px(18)= 0.34791462052391896376462581083932962D+00 + px(19)= 0.42085552853141849349998238918417102D+00 + px(20)= 0.50842339932407666570140739641291868D+00 + px(21)= 0.61356716058210435249023757196485689D+00 + px(22)= 0.73981399451706291428067431492187345D+00 + px(23)= 0.89138229125166542251073985530751701D+00 + px(24)= 0.10733171950376849976883148880289368D+01 + px(25)= 0.12916535619608503598632452641060769D+01 + px(26)= 0.15536125962495445292827177181517718D+01 + px(27)= 0.18678405221272713224762723085314113D+01 + px(28)= 0.22447007897223064544921318025044505D+01 + px(29)= 0.26966362208010185020303083586792707D+01 + px(30)= 0.32386254398637317225899745563640301D+01 + px(31)= 0.38887712166747737454815122744695001D+01 + px(32)= 0.46690813167550880509031095796259491D+01 + px(33)= 0.56065436673995350526181894949125319D+01 + px(34)= 0.67346748355004755553478375342343373D+01 + px(35)= 0.80958733206377174584850755308484861D+01 + px(36)= 0.97452318197102039374768068435419398D+01 + px(37)= 0.11757209117436549173752389088784801D+02 + px(38)= 0.14238509041698890616269777375250039D+02 + px(39)= 0.17356513104267573046562740943972043D+02 + px(40)= 0.21416391258741087275776630427364534D+02 + px(41)= 0.27167802059826190961056594217938860D+02 + pw( 1)= 0.89240744144922233411809208993761819D-03 + pw( 2)= 0.20933555605342582529457970383304286D-02 + pw( 3)= 0.33353720205083480430061442115773668D-02 + pw( 4)= 0.46420859644861606778799308270413399D-02 + pw( 5)= 0.60424942046045364422720752662654441D-02 + pw( 6)= 0.75713643924191847990582966217707736D-02 + pw( 7)= 0.92712216138022670728066109402065568D-02 + pw( 8)= 0.11194687905391631640874958915231129D-01 + pw( 9)= 0.13407015211365284658909716719520612D-01 + pw(10)= 0.15988623214362774067334406180515621D-01 + pw(11)= 0.19037444847571012625771486857897508D-01 + pw(12)= 0.22671015569010270758765332286863717D-01 + pw(13)= 0.27028523256092179219838374149240132D-01 + pw(14)= 0.32273327230549150524506142920429224D-01 + pw(15)= 0.38596560710149393493004578367090095D-01 + pw(16)= 0.46222282500302385970118307137327059D-01 + pw(17)= 0.55414377032731402665476377100171599D-01 + pw(18)= 0.66485217878875067216476264141658569D-01 + pw(19)= 0.79806099857296581997108727302834078D-01 + pw(20)= 0.95819567831341990475832520086841170D-01 + pw(21)= 0.11505394471321825395206987370902480D+00 + pw(22)= 0.13814054039338535506658629671808687D+00 + pw(23)= 0.16583420916823077593541262981490681D+00 + pw(24)= 0.19903814998014368860888042911909173D+00 + pw(25)= 0.23883416675372027779180574899396133D+00 + pw(26)= 0.28652010702264135124796999487042423D+00 + pw(27)= 0.34365700663826115610743525613053815D+00 + pw(28)= 0.41212981100557300459399743393003457D+00 + pw(29)= 0.49422782047790986141383860345034700D+00 + pw(30)= 0.59275496121528084438932784828140258D+00 + pw(31)= 0.71118703347840582157967515903715531D+00 + pw(32)= 0.85390607737164494406520343373727721D+00 + pw(33)= 0.10265669138579149759586965414767668D+01 + pw(34)= 0.12367012590999922651850699786000809D+01 + pw(35)= 0.14947734294405908884100490275594732D+01 + pw(36)= 0.18161565391675246807943861856738552D+01 + pw(37)= 0.22251644165626364386518263812767700D+01 + pw(38)= 0.27642836611922099334373971118073338D+01 + pw(39)= 0.35191416662378097344643779658011469D+01 + pw(40)= 0.47066216438742057965343731115092608D+01 + pw(41)= 0.71988567667853812451766331822159390D+01 +endif +if(kn == 42) then + px( 1)= 0.33899702213207280303556565703741990D-03 + px( 2)= 0.17926803514586918122543179233298713D-02 + px( 3)= 0.44349720472477033588788261295928634D-02 + px( 4)= 0.83142928381370954944600712429031326D-02 + px( 5)= 0.13503405398912139202218387144909328D-01 + px( 6)= 0.20104047687222803992574530787065418D-01 + px( 7)= 0.28252833643724993821423023769509983D-01 + px( 8)= 0.38129051839561461833536104861505433D-01 + px( 9)= 0.49964611823304399745794720191846551D-01 + px(10)= 0.64056255466771758626847757441909748D-01 + px(11)= 0.80779982442382913230902383270370264D-01 + px(12)= 0.10060749467110151157610640414769097D+00 + px(13)= 0.12412446888143911749100232205634547D+00 + px(14)= 0.15205071694865663047292718989423185D+00 + px(15)= 0.18526275842687218439176583557408384D+00 + px(16)= 0.22481983515906206676758114216568014D+00 + px(17)= 0.27199475262711723613680204884879081D+00 + px(18)= 0.32831107191481630404052581830045252D+00 + px(19)= 0.39558818386929789909718409480658300D+00 + px(20)= 0.47599581233231530250697095670252798D+00 + px(21)= 0.57211961616026616366050109797592391D+00 + px(22)= 0.68703982771101430011400322499023268D+00 + px(23)= 0.82442528240215569955771735381318182D+00 + px(24)= 0.98864576667159865631116805089235696D+00 + px(25)= 0.11849063745873302190560203655789150D+01 + px(26)= 0.14194085949505718343903162487063753D+01 + px(27)= 0.16995442942632463982698312503343140D+01 + px(28)= 0.20341308620656637127889186046327503D+01 + px(29)= 0.24336989628881715953740329050451161D+01 + px(30)= 0.29108493179186889815360617341447733D+01 + px(31)= 0.34807029996457758665062194913386789D+01 + px(32)= 0.41614831892896507985708808145127823D+01 + px(33)= 0.49752895914290239340663575251573810D+01 + px(34)= 0.59491683086745559122027634515663744D+01 + px(35)= 0.71166577579837023101788394059159047D+01 + px(36)= 0.85201446660709564225208283238116453D+01 + px(37)= 0.10214688587324610607847909840137625D+02 + px(38)= 0.12274723743508615874901666750507663D+02 + px(39)= 0.14807003938009410302904866873102566D+02 + px(40)= 0.17979187958243548948159702687043315D+02 + px(41)= 0.22097374571880291313408725390611447D+02 + px(42)= 0.27914546740390271130933515005738687D+02 + pw( 1)= 0.87071685062486345289172157206777171D-03 + pw( 2)= 0.20417105878963868357298871515770460D-02 + pw( 3)= 0.32508535230574150323908782582998050D-02 + pw( 4)= 0.45198431212516341141392418533095799D-02 + pw( 5)= 0.58752408889593859079959250826252330D-02 + pw( 6)= 0.73486832934924937540882060804195150D-02 + pw( 7)= 0.89786045784956752332612520710862566D-02 + pw( 8)= 0.10812279756759579335639253148771645D-01 + pw( 9)= 0.12908056239717444457676015564846892D-01 + pw(10)= 0.15337623022218768136530241218842739D-01 + pw(11)= 0.18188143417513304840817866168341234D-01 + pw(12)= 0.21564159053085002516938026776853287D-01 + pw(13)= 0.25589384327944258948303670138994570D-01 + pw(14)= 0.30408768217117848936156421895538494D-01 + pw(15)= 0.36191344708155839281431088511826248D-01 + pw(16)= 0.43134326776658801296020544503200249D-01 + pw(17)= 0.51468687405796338041097105695766697D-01 + pw(18)= 0.61466278505771427877600196882159870D-01 + pw(19)= 0.73448477231342626126309188685807545D-01 + pw(20)= 0.87796417742468560052699023987407758D-01 + pw(21)= 0.10496300171125714320281462758628502D+00 + pw(22)= 0.12548703017795381911118827176515123D+00 + pw(23)= 0.15000994875313221416387070158771740D+00 + pw(24)= 0.17929586510892162331076812701124762D+00 + pw(25)= 0.21425571841544983392913772225490756D+00 + pw(26)= 0.25597680754299265400920520155620068D+00 + pw(27)= 0.30575939808892163354965075699382712D+00 + pw(28)= 0.36516295819168300959340602426394554D+00 + pw(29)= 0.43606594548816172375527529406447798D+00 + pw(30)= 0.52074538635856855968547701789163874D+00 + pw(31)= 0.62198649880275266624161896479324475D+00 + pw(32)= 0.74323973657036909817079545894769618D+00 + pw(33)= 0.88885572273739267168333040860537136D+00 + pw(34)= 0.10644536078580082753759483481056876D+01 + pw(35)= 0.12775289785980491630656102334631630D+01 + pw(36)= 0.15385165702010908217004674883905600D+01 + pw(37)= 0.18627791456676687262611182457669180D+01 + pw(38)= 0.22746636192228746631618234661193503D+01 + pw(39)= 0.28167854835143406837432208820997977D+01 + pw(40)= 0.35751353025579259567249078677450433D+01 + pw(41)= 0.47676889791677097956471776553934610D+01 + pw(42)= 0.72714575121908501417344894592801099D+01 +endif +if(kn == 43) then + px( 1)= 0.33096860786757750154898847120917057D-03 + px( 2)= 0.17499208566123461094898550046151874D-02 + px( 3)= 0.43278154096847331355188904178071447D-02 + px( 4)= 0.81096003646317825364067625754120221D-02 + px( 5)= 0.13162623744349233672062735003036974D-01 + px( 6)= 0.19580760979979784765554480104376286D-01 + px( 7)= 0.27489609951663532195103068673087354D-01 + px( 8)= 0.37053342869799653592283933396820960D-01 + px( 9)= 0.48483444452707627033448163720522761D-01 + px(10)= 0.62049459953072389488156028383179189D-01 + px(11)= 0.78091737934971368286739663026054961D-01 + px(12)= 0.97036017163386886705197231591959380D-01 + px(13)= 0.11940967168918372175774848212559756D+00 + px(14)= 0.14585958712300992040134315459436264D+00 + px(15)= 0.17717200022543900754299287107723618D+00 + px(16)= 0.21429507577429224752942159606670995D+00 + px(17)= 0.25836535107930525415060613221934697D+00 + px(18)= 0.31073935543877479582966549813156744D+00 + px(19)= 0.37303174367650550564362643090028699D+00 + px(20)= 0.44716127946900452807387231860775343D+00 + px(21)= 0.53540606537110705599704958284246183D+00 + px(22)= 0.64046959170961720285075028881968045D+00 + px(23)= 0.76555947611245457114222548768598476D+00 + px(24)= 0.91448119032995475750346963701721081D+00 + px(25)= 0.10917496366295753151128433644125688D+01 + px(26)= 0.13027221869155879323859021531153006D+01 + px(27)= 0.15537578204695570075088633454278321D+01 + px(28)= 0.18524084405877676482171967152697008D+01 + px(29)= 0.22076505684206197863173711190960219D+01 + px(30)= 0.26301688303884504911030778880813732D+01 + px(31)= 0.31327077074207879423269172652003364D+01 + px(32)= 0.37305161917200709143270025763858640D+01 + px(33)= 0.44419236396092482776254163193454757D+01 + px(34)= 0.52891086092067566005114585507151512D+01 + px(35)= 0.62991644484541209348967251319712091D+01 + px(36)= 0.75056437278338836321053009023954332D+01 + px(37)= 0.89509179672327616775374703555124487D+01 + px(38)= 0.10690015208487653327362214867738207D+02 + px(39)= 0.12797352129263264861734948864857745D+02 + px(40)= 0.15379744394428552764554128123427935D+02 + px(41)= 0.18605142714959248273487455172506479D+02 + px(42)= 0.22780586028957730000926904102977113D+02 + px(43)= 0.28662366237232832381924388811320297D+02 + pw( 1)= 0.85006145870848429334757469982573891D-03 + pw( 2)= 0.19925838109657703878469708707415329D-02 + pw( 3)= 0.31706135365278565227115398483457554D-02 + pw( 4)= 0.44041172343279662518790702958553625D-02 + pw( 5)= 0.57174896648731729427179253475841935D-02 + pw( 6)= 0.71396041280184287136596813041547263D-02 + pw( 7)= 0.87053105293648014281590926123488040D-02 + pw( 8)= 0.10457223593582426133616067442291247D-01 + pw( 9)= 0.12447691003362071183889633075575652D-01 + pw(10)= 0.14740821116468595319728157346456377D-01 + pw(11)= 0.17414420402114891406970230872875573D-01 + pw(12)= 0.20561736545282396913149978083028889D-01 + pw(13)= 0.24293057998247351709738591334952020D-01 + pw(14)= 0.28737438822145949516145087089517893D-01 + pw(15)= 0.34044977279791239841839057487094614D-01 + pw(16)= 0.40390072912737209513481740353650117D-01 + pw(17)= 0.47975933504865389303614731289416436D-01 + pw(18)= 0.57040421240269648291641111667509080D-01 + pw(19)= 0.67863232374253637764436196090399013D-01 + pw(20)= 0.80774426563059878523617995686503505D-01 + pw(21)= 0.96164420051293883535914595526514204D-01 + pw(22)= 0.11449568019439733156432157598398321D+00 + pw(23)= 0.13631648279559252800439663886965342D+00 + pw(24)= 0.16227722290947194423527738296713650D+00 + pw(25)= 0.19314992693280818601029240961091576D+00 + pw(26)= 0.22985183401280394837726875217480564D+00 + pw(27)= 0.27347424857196858115127321375763169D+00 + pw(28)= 0.32531839177175415639431272795218179D+00 + pw(29)= 0.38694082912221267186717847996208730D+00 + pw(30)= 0.46021245154573169215949688893911839D+00 + pw(31)= 0.54739734403912358955407316606691874D+00 + pw(32)= 0.65126193729202891760603881973382504D+00 + pw(33)= 0.77523203175189474974013051456241330D+00 + pw(34)= 0.92362846689464044743151918155531626D+00 + pw(35)= 0.11020374135362457925484267398369432D+01 + pw(36)= 0.13179220978816197700516342241847467D+01 + pw(37)= 0.15816923933252917714931130477654841D+01 + pw(38)= 0.19087060424955494066311220499293247D+01 + pw(39)= 0.23233483559535813743753279936284135D+01 + pw(40)= 0.28683678978287494020672145638838434D+01 + pw(41)= 0.36301191889236335565768841054222539D+01 + pw(42)= 0.48276614242022139042477288493084378D+01 + pw(43)= 0.73428236438762783138460481364952627D+01 +endif +if(kn == 44) then + px( 1)= 0.32331350836957546603744084419849788D-03 + px( 2)= 0.17091703169716826666769331280363544D-02 + px( 3)= 0.42257875064315455206561743770986376D-02 + px( 4)= 0.79149690832372160827916911872302865D-02 + px( 5)= 0.12839179023186853233140061142661062D-01 + px( 6)= 0.19085232241568949577899855509474977D-01 + px( 7)= 0.26768883186791699077493305776558926D-01 + px( 8)= 0.36040864557027892650839059295103373D-01 + px( 9)= 0.47094585778667144264845412823636449D-01 + px(10)= 0.60175615761544264401796703227341314D-01 + px(11)= 0.75592961225388113581704029754538741D-01 + px(12)= 0.93732029387412788080736336661325883D-01 + px(13)= 0.11506910593622805003749038841255850D+00 + px(14)= 0.14018727035639085866142640394142696D+00 + px(15)= 0.16979393946354437271616691990993614D+00 + px(16)= 0.20474060430292067802523043906109989D+00 + px(17)= 0.24604566740448100845633933054720796D+00 + px(18)= 0.29492149096685690036119497895105667D+00 + px(19)= 0.35280682708785925696562700896642017D+00 + px(20)= 0.42140579664772257270434756016372204D+00 + px(21)= 0.50273460611520704557595850856713180D+00 + px(22)= 0.59917729885865279633574477547906320D+00 + px(23)= 0.71355204803416102949757204396579206D+00 + px(24)= 0.84918981260514039761248477195520438D+00 + px(25)= 0.10100276016887074982125673033831345D+01 + px(26)= 0.12007191488994777606413935958511916D+01 + px(27)= 0.14267665395522811775994265815554724D+01 + px(28)= 0.16946773518359189852395063278947950D+01 + px(29)= 0.20121533231038550963171865171844820D+01 + px(30)= 0.23883186915697571717337880438736199D+01 + px(31)= 0.28339996248002878664983052972434529D+01 + px(32)= 0.33620712713198530053650295556681017D+01 + px(33)= 0.39878972622760453246577762221089257D+01 + px(34)= 0.47299002992789324698627794306541718D+01 + px(35)= 0.56103262027415485179449762966436919D+01 + px(36)= 0.66563061139504917392538125507749794D+01 + px(37)= 0.79014003881557845126238132373008625D+01 + px(38)= 0.93879629282953731177093056973506411D+01 + px(39)= 0.11170992458953013913700288894894255D+02 + px(40)= 0.13324895111520577881664368628734089D+02 + px(41)= 0.15956559624176004069472090284905753D+02 + px(42)= 0.19234241988629565032886618545011883D+02 + px(43)= 0.23465930980103016989495364982244146D+02 + px(44)= 0.29411210662212999597346209126697707D+02 + pw( 1)= 0.83036884987403513181911166534208663D-03 + pw( 2)= 0.19457941864115460112729277032771097D-02 + pw( 3)= 0.30943299472937825975912487677647078D-02 + pw( 4)= 0.42943871620956718877444401513682917D-02 + pw( 5)= 0.55684261348013693314562032089920088D-02 + pw( 6)= 0.69428725530123470368722128932621244D-02 + pw( 7)= 0.84494234604398957532432243635753483D-02 + pw( 8)= 0.10126616172976027067397699017267216D-01 + pw( 9)= 0.12021566160747622141057949671782224D-01 + pw(10)= 0.14191778414242213809164508501045497D-01 + pw(11)= 0.16706905485358088359523445571590018D-01 + pw(12)= 0.19650346938023694904494456157357191D-01 + pw(13)= 0.23120695042785977245044097938079732D-01 + pw(14)= 0.27233209875082837015975056631389154D-01 + pw(15)= 0.32121665600246232435212867479076854D-01 + pw(16)= 0.37940949845194110884289250317530407D-01 + pw(17)= 0.44870698984096406199701289089460328D-01 + pw(18)= 0.53120093303752446444612781898982047D-01 + pw(19)= 0.62933823516449817737731500047201570D-01 + pw(20)= 0.74599224373678150636698930396457382D-01 + pw(21)= 0.88454634536330462816566048592135618D-01 + pw(22)= 0.10489914083790778708121373941299966D+00 + pw(23)= 0.12440396953470249784020277878699461D+00 + pw(24)= 0.14752589155074737609776847913675882D+00 + pw(25)= 0.17492312579420798336376479913219788D+00 + pw(26)= 0.20737437787760038519638439806427540D+00 + pw(27)= 0.24580187433567244121381237836878940D+00 + pw(28)= 0.29129959389673368311222620480570831D+00 + pw(29)= 0.34516843605203008284533614069195425D+00 + pw(30)= 0.40896093500380134048288051036096437D+00 + pw(31)= 0.48453955284019804235911876681118743D+00 + pw(32)= 0.57415497731524515579134334664918167D+00 + pw(33)= 0.68055495684587130214889340452349758D+00 + pw(34)= 0.80714146055673592921708826105860249D+00 + pw(35)= 0.95820721891454345652447310233062356D+00 + pw(36)= 0.11393080348437704220650007990276423D+01 + pw(37)= 0.13578783180672521841897407574039451D+01 + pw(38)= 0.16243074162684344243952826001273292D+01 + pw(39)= 0.19539528530618256474917072512577991D+01 + pw(40)= 0.23712430126501098428281754997758214D+01 + pw(41)= 0.29190630527561174415810627420409994D+01 + pw(42)= 0.36841316877299013922727992267156054D+01 + pw(43)= 0.48865818259133309055220603437140743D+01 + pw(44)= 0.74130021614058319715789731577046943D+01 +endif +if(kn == 45) then + px( 1)= 0.31600628834973779292559127048190037D-03 + px( 2)= 0.16702900106190273328935895866739893D-02 + px( 3)= 0.41285257114804008864642830706317964D-02 + px( 4)= 0.77296636135218507544800673352591724D-02 + px( 5)= 0.12531750213154611452085965505028540D-01 + px( 6)= 0.18615241131540077290889348326202186D-01 + px( 7)= 0.26087066631843252886061485477515189D-01 + px( 8)= 0.35085969566222178513934460224332887D-01 + px( 9)= 0.45789306577033888813174278261897663D-01 + px(10)= 0.58421434435784388362459047968056087D-01 + px(11)= 0.73263711622882621260808485115770070D-01 + px(12)= 0.90666037430716786669379135384670844D-01 + px(13)= 0.11105978210403381887459852100321712D+00 + px(14)= 0.13497200418547898709664745719407710D+00 + px(15)= 0.16304104546216750230118904547804422D+00 + px(16)= 0.19603390202336562386932779699909816D+00 + px(17)= 0.23486608544820513725469302847128384D+00 + px(18)= 0.28062490631545969684155812688765467D+00 + px(19)= 0.33459720050887095268667406972580638D+00 + px(20)= 0.39830252448023971951147973257838648D+00 + px(21)= 0.47353284765622731594152923198004578D+00 + px(22)= 0.56239982933240531713761978532829046D+00 + px(23)= 0.66739091019498584233201402129369733D+00 + px(24)= 0.79143567909811363215071199148744656D+00 + px(25)= 0.93798429449898007763809445265874043D+00 + px(26)= 0.11111001581385252565298438994097319D+01 + px(27)= 0.13155695863345333806714751926528375D+01 + px(28)= 0.15570319587074322621285835341493748D+01 + px(29)= 0.18421348417902183390620002381054988D+01 + px(30)= 0.21787200444979152172206336828110265D+01 + px(31)= 0.25760487257631498506605264603704116D+01 + px(32)= 0.30450769803228709332081128619189764D+01 + px(33)= 0.35987985256158263799485414495379126D+01 + px(34)= 0.42526795117768400994862312123212388D+01 + px(35)= 0.50252244422171176606674140586086445D+01 + px(36)= 0.59387361521365530542536690567593329D+01 + px(37)= 0.70203754298618509877391417240792276D+01 + px(38)= 0.83037051949719291418317245580593492D+01 + px(39)= 0.98310601809174348734008913125923897D+01 + px(40)= 0.11657412426662143324540788749464127D+02 + px(41)= 0.13857164426266931434200566791289955D+02 + px(42)= 0.16537288540540952756080764303901777D+02 + px(43)= 0.19866358206258941542582357564294186D+02 + px(44)= 0.24153320185396072162240284411839110D+02 + px(45)= 0.30161032818176703094230836042654050D+02 + pw( 1)= 0.81157322026241886094392714295783692D-03 + pw( 2)= 0.19011777475163676304804330211208542D-02 + pw( 3)= 0.30217126568442239903479316417011601D-02 + pw( 4)= 0.41901870013204838998208477401165665D-02 + pw( 5)= 0.54273283828323581210343431353844446D-02 + pw( 6)= 0.67573866388465515363650925512746062D-02 + pw( 7)= 0.82092751225427288897814574011554460D-02 + pw( 8)= 0.98179504955536258107199257981433439D-02 + pw( 9)= 0.11625949429528062575538287604998941D-01 + pw(10)= 0.13685006654390859174410475886206370D-01 + pw(11)= 0.16057648452306267238525538418198228D-01 + pw(12)= 0.18818661412340581662760322741958838D-01 + pw(13)= 0.22056412054981119160700553625749748D-01 + pw(14)= 0.25874134633155885709619608590472202D-01 + pw(15)= 0.30391451859719384524106580753360271D-01 + pw(16)= 0.35746460932048448724214073937351505D-01 + pw(17)= 0.42098664685115079575098628197326142D-01 + pw(18)= 0.49632898864791727296990224145235518D-01 + pw(19)= 0.58564289944594589064829987997677920D-01 + pw(20)= 0.69144234514984419908558444457124034D-01 + pw(21)= 0.81667423637721859042077146566843517D-01 + pw(22)= 0.96480011247988074883978780802066204D-01 + pw(23)= 0.11398911344507944971212279753122991D+00 + pw(24)= 0.13467391267458107409324694436955753D+00 + pw(25)= 0.15909873207292332703499649578337212D+00 + pw(26)= 0.18792855598692739186974920544448162D+00 + pw(27)= 0.22194762553095863671304473597844931D+00 + pw(28)= 0.26208196494751982936477406183051153D+00 + pw(29)= 0.30942704406840347450420829122426871D+00 + pw(30)= 0.36528233311686115136869183803961988D+00 + pw(31)= 0.43119539125683582249214467187083528D+00 + pw(32)= 0.50901957973734783536801637831942790D+00 + pw(33)= 0.60099191357850333533977421143095798D+00 + pw(34)= 0.70984171451926250865508439176773176D+00 + pw(35)= 0.83894803839540918949226578045417203D+00 + pw(36)= 0.99257719538081268693583754948673040D+00 + pw(37)= 0.11762571380368294152931731126534612D+01 + pw(38)= 0.13973967807100820386062004887130792D+01 + pw(39)= 0.16663689232723581345802345799199851D+01 + pw(40)= 0.19985351917633034604206124546990186D+01 + pw(41)= 0.24183711599931949498296065604711404D+01 + pw(42)= 0.29689015304223315699189706042837967D+01 + pw(43)= 0.37372089827137538865937716176457237D+01 + pw(44)= 0.49444904398530770204722137069339350D+01 + pw(45)= 0.74820372303276097896128738491191685D+01 +endif +if(kn == 46) then + px( 1)= 0.30902377399120893788354424898855426D-03 + px( 2)= 0.16331537506671398843215268321368443D-02 + px( 3)= 0.40357010980175320995569579449408174D-02 + px( 4)= 0.75530195763806797812195436379199476D-02 + px( 5)= 0.12239149854724502952182976196758722D-01 + px( 6)= 0.18168803621919060471207612598191755D-01 + px( 7)= 0.25440976363694145320552734001486569D-01 + px( 8)= 0.34183679422904259346408703316308100D-01 + px( 9)= 0.44559964643964573543760075913259475D-01 + px(10)= 0.56775358517699538859278511856645348D-01 + px(11)= 0.71086748757462020933356688085151754D-01 + px(12)= 0.87812673145710441814424785984121246D-01 + px(13)= 0.10734488854150171164189046430233515D+00 + px(14)= 0.13016110718036063404346759877348097D+00 + px(15)= 0.15683892196176944176774935350653314D+00 + px(16)= 0.18807118924767432734960867652772029D+00 + px(17)= 0.22468341948583423108691031411123462D+00 + px(18)= 0.26765394743983501400088905846999460D+00 + px(19)= 0.31813776531045013135926232571664330D+00 + px(20)= 0.37749492399880062738311332663588965D+00 + px(21)= 0.44732440247018853385136813722855067D+00 + px(22)= 0.52950437210775343555603255060398518D+00 + px(23)= 0.62623987488687805361419457851831370D+00 + px(24)= 0.74011910006308993968762196795007188D+00 + px(25)= 0.87417968361959824522374584708126947D+00 + px(26)= 0.10319867719667534605241559659266134D+01 + px(27)= 0.12177250028776661981781648200867157D+01 + px(28)= 0.14363070973204841479279696750353849D+01 + px(29)= 0.16935024864806789035793857077605729D+01 + px(30)= 0.19960904185444808460178685393801009D+01 + px(31)= 0.23520434618090888941520463681053941D+01 + px(32)= 0.27707495103418382337989450786653183D+01 + px(33)= 0.32632837492864188626066742354315931D+01 + px(34)= 0.38427473041855077266339137871017428D+01 + px(35)= 0.45246978009253960226440639549407685D+01 + px(36)= 0.53277111750428588477775437304742452D+01 + px(37)= 0.62741382355089597665586817773799499D+01 + px(38)= 0.73911624234084726194161817156844889D+01 + px(39)= 0.87123449938120379656808053147144537D+01 + px(40)= 0.10280000673796667050592943428615469D+02 + px(41)= 0.12149077777983097408939533602647387D+02 + px(42)= 0.14393981931423795298134131653931530D+02 + px(43)= 0.17121779022116518315203538273624565D+02 + px(44)= 0.20501370994989532793115106347627070D+02 + px(45)= 0.24842669385551363138030331359754804D+02 + px(46)= 0.30911787987864913738727989247192145D+02 + pw( 1)= 0.79361463897360195929686273302248409D-03 + pw( 2)= 0.18585856237700307630029880256700761D-02 + pw( 3)= 0.29524996535416808392477309033103772D-02 + pw( 4)= 0.40910988545579399083185141944050702D-02 + pw( 5)= 0.52935540515794390997929859327326122D-02 + pw( 6)= 0.65821742586378490181117941215141045D-02 + pw( 7)= 0.79834063288880762331149927032946792D-02 + pw( 8)= 0.95290513069482303557158918089126201D-02 + pw( 9)= 0.11257625314697109024742951848074159D-01 + pw(10)= 0.13215805246967286000173950239611213D-01 + pw(11)= 0.15459871723048351392169871195100160D-01 + pw(12)= 0.18057057593147761999943506400696980D-01 + pw(13)= 0.21086760440076034016469007977829453D-01 + pw(14)= 0.24641687279129405797304341158994351D-01 + pw(15)= 0.28829130387470289935818007648098355D-01 + pw(16)= 0.33772655230285352209194232463306431D-01 + pw(17)= 0.39614465986840122572670076622475638D-01 + pw(18)= 0.46518617394519992951670269787487787D-01 + pw(19)= 0.54675131186708511535131483491430821D-01 + pw(20)= 0.64305014007496806948186778346285377D-01 + pw(21)= 0.75666179654741774777744798242374952D-01 + pw(22)= 0.89060332032660676125924796117040007D-01 + pw(23)= 0.10484093752103114356647229133814935D+00 + pw(24)= 0.12342248951027967461470307235875341D+00 + pw(25)= 0.14529134199630383752180561542530020D+00 + pw(26)= 0.17101847250539549716018144547422270D+00 + pw(27)= 0.20127464277089058198410341836023683D+00 + pw(28)= 0.23684858001359149093932878615193101D+00 + pw(29)= 0.27866903348006793962100039913921415D+00 + pw(30)= 0.32783191849208153321924464180174769D+00 + pw(31)= 0.38563432289935890460881756854144066D+00 + pw(32)= 0.45361805217372703197887339399899876D+00 + pw(33)= 0.53362686058844350813392769068823926D+00 + pw(34)= 0.62788396771362463542546047416846800D+00 + pw(35)= 0.73910064578742344795091934329070699D+00 + pw(36)= 0.87063402416829911198578365788259413D+00 + pw(37)= 0.10267256741807495552845728772900802D+01 + pw(38)= 0.12128781106626951287021151112542470D+01 + pw(39)= 0.14364778560404412196974965116370614D+01 + pw(40)= 0.17078848068460459936312054922566995D+01 + pw(41)= 0.20424685786283419841913953641602057D+01 + pw(42)= 0.24647555529206493569854377641616308D+01 + pw(43)= 0.30179124308683067944200720815335092D+01 + pw(44)= 0.37893852356394974569134386395867514D+01 + pw(45)= 0.50014251355936200780723745695031871D+01 + pw(46)= 0.75499704094498894150628414754712103D+01 +endif +if(kn == 47) then + px( 1)= 0.30234480684949870042066556848012510D-03 + px( 2)= 0.15976464953770958135599778441796202D-02 + px( 3)= 0.39470145814685269106194100213451950D-02 + px( 4)= 0.73844351214347704596731706115099534D-02 + px( 5)= 0.11960307325670953208852468939751834D-01 + px( 6)= 0.17744140854528926114461559850667081D-01 + px( 7)= 0.24827775421109866384949286855042231D-01 + px( 8)= 0.33329587337597891764863922994014555D-01 + px( 9)= 0.43399840203304717839217570161011672D-01 + px(10)= 0.55227290263964079065971805634237348D-01 + px(11)= 0.69047097585245923081163450963219921D-01 + px(12)= 0.85150014954361349896311981444336255D-01 + px(13)= 0.10389275615680304450176721539326430D+00 + px(14)= 0.12570943253264044220914087598374078D+00 + px(15)= 0.15112403488297752197534393801499475D+00 + px(16)= 0.18076413027130191024242456354592003D+00 + px(17)= 0.21537618776652269918043941270387835D+00 + px(18)= 0.25584316244558279667676325421037561D+00 + px(19)= 0.30320509530959575930710602679134976D+00 + px(20)= 0.35868352704264273864602902528848421D+00 + px(21)= 0.42371052026058185226325968835995444D+00 + px(22)= 0.49996309228509504682760172037312828D+00 + px(23)= 0.58940391513730639900316144249394630D+00 + px(24)= 0.69432925544472178824662119719828653D+00 + px(25)= 0.81742530558526965352748082134155990D+00 + px(26)= 0.96183429918308909086642705501460506D+00 + px(27)= 0.11312321171355587417637105018144043D+01 + px(28)= 0.13299194954977380580964549469885521D+01 + px(29)= 0.15629294821033111247460192707385455D+01 + px(30)= 0.18361545182421012128782235750936341D+01 + px(31)= 0.21564975473013180685304927966921050D+01 + px(32)= 0.25320530389913426066138968271032273D+01 + px(33)= 0.29723260351325192884401956439321237D+01 + px(34)= 0.34885007189662000039152029330111157D+01 + px(35)= 0.40937753452277820469237089959367401D+01 + px(36)= 0.48037889682999426543864264911219419D+01 + px(37)= 0.56371796552402367761125198516216112D+01 + px(38)= 0.66163382295353580459275111274902267D+01 + px(39)= 0.77684648059929853861219213698259439D+01 + px(40)= 0.91271156012149482363243828361549091D+01 + px(41)= 0.10734585093601096152552856517558275D+02 + px(42)= 0.12645801076306643342528896213826369D+02 + px(43)= 0.14935178894323617316649392464504258D+02 + px(44)= 0.17709887244612807742256277065719222D+02 + px(45)= 0.21139166627448714331281414920298006D+02 + px(46)= 0.25533898913637103231788330664255958D+02 + px(47)= 0.31663433742152343555689779493983534D+02 + pw( 1)= 0.77643840612392469558233568878288438D-03 + pw( 2)= 0.18178823313702654997790171434491737D-02 + pw( 3)= 0.28864536542127938374932216849800024D-02 + pw( 4)= 0.39967467153526294501560789189634526D-02 + pw( 5)= 0.51665295391631236792192347882343893D-02 + pw( 6)= 0.64163743838733321438814922251353746D-02 + pw( 7)= 0.77705352297994246418018494260403994D-02 + pw( 8)= 0.92580225788462023682293420241402079D-02 + pw( 9)= 0.10913810605238600296521875414785753D-01 + pw(10)= 0.12780129454852165115348470540659794D-01 + pw(11)= 0.14907770873253776058864543333587366D-01 + pw(12)= 0.17357325452550337474212111231031977D-01 + pw(13)= 0.20200301191038969448754000899427036D-01 + pw(14)= 0.23520156080274576647610305543693418D-01 + pw(15)= 0.27413389239659681777619137570103249D-01 + pw(16)= 0.31990921824324695951826704036400849D-01 + pw(17)= 0.37380010931288310349162200884914999D-01 + pw(18)= 0.43726873583833196750460928410582440D-01 + pw(19)= 0.51200100443073974036149329120700021D-01 + pw(20)= 0.59994868260574886373334055836155630D-01 + pw(21)= 0.70337945020122803930065727247244319D-01 + pw(22)= 0.82493514750755835638795021587104324D-01 + pw(23)= 0.96769906495425836064670059167481399D-01 + pw(24)= 0.11352737495114584133685628457043583D+00 + pw(25)= 0.13318714247714292913023639085868125D+00 + pw(26)= 0.15624197763113426788311768418626933D+00 + pw(27)= 0.18326866459826594224170442595536027D+00 + pw(28)= 0.21494282570862350034472593398023850D+00 + pw(29)= 0.25205671631617380517032575867290262D+00 + pw(30)= 0.29554084830882541265073842592331588D+00 + pw(31)= 0.34649066398539118260401510223685881D+00 + pw(32)= 0.40620005573936491411597998731411544D+00 + pw(33)= 0.47620444337408185641294547642135994D+00 + pw(34)= 0.55833761203594537949491272468091006D+00 + pw(35)= 0.65480899160066358211037008014529884D+00 + pw(36)= 0.76831227871509708311671525382157320D+00 + pw(37)= 0.90218370924525609794334519875989345D+00 + pw(38)= 0.10606417731219318086869090064772246D+01 + pw(39)= 0.12491658485426170682375819102086492D+01 + pw(40)= 0.14751229591934938816623767074938966D+01 + pw(41)= 0.17488634335620615325269065049873221D+01 + pw(42)= 0.20857683710668100441568568919092486D+01 + pw(43)= 0.25104181373755596577440533667319287D+01 + pw(44)= 0.30661234529278624717819692903450825D+01 + pw(45)= 0.38406927297086597567375854792414220D+01 + pw(46)= 0.50574215821071962594952640986635070D+01 + pw(47)= 0.76168408553950413361259182112809766D+01 +endif +if(kn == 48) then + px( 1)= 0.29595002899720089598048509729240858D-03 + px( 2)= 0.15636631385928177658857961799981344D-02 + px( 3)= 0.38621935810659565095543543710661065D-02 + px( 4)= 0.72233636458828631437195788212396685D-02 + px( 5)= 0.11694254587023519966120947669484770D-01 + px( 6)= 0.17339652834546167382576155326885531D-01 + px( 7)= 0.24244927054495278219738066751504960D-01 + px( 8)= 0.32519777490587042844934209629601974D-01 + px( 9)= 0.42303000131579013083618262933841541D-01 + px(10)= 0.53768369083049019518863347968872785D-01 + px(11)= 0.67131693007024900369022557250738329D-01 + px(12)= 0.82659034800333661590098251027713411D-01 + px(13)= 0.10067601794995718748717592252167950D+00 + px(14)= 0.12157811670283109994038391671204829D+00 + px(15)= 0.14584188025331271469532975454293184D+00 + px(16)= 0.17403718708094337911584686617663455D+00 + px(17)= 0.20684083223970121872293135877750441D+00 + px(18)= 0.24505195226053213810882336066380088D+00 + px(19)= 0.28960993053758356990356801037005909D+00 + px(20)= 0.34161548376800915367159074480126869D+00 + px(21)= 0.40235563464585336049157109682672131D+00 + px(22)= 0.47333327369938541669100296412222937D+00 + px(23)= 0.55630204160312520623134123151678925D+00 + px(24)= 0.65330734108265942786541154430622162D+00 + px(25)= 0.76673441875356785236785315155850152D+00 + px(26)= 0.89936464142274902321479563230619771D+00 + px(27)= 0.10544413316663370284059399235987637D+01 + px(28)= 0.12357468356208411453551987100600846D+01 + px(29)= 0.14476928958143963682846712437006035D+01 + px(30)= 0.16954269342764845689137922115223447D+01 + px(31)= 0.19849575816646326604339901913933094D+01 + px(32)= 0.23233038211583159605827592291675721D+01 + px(33)= 0.27186736189401563011050998256085975D+01 + px(34)= 0.31806801589007531007613766645355501D+01 + px(35)= 0.37206072396727045282926190726802162D+01 + px(36)= 0.43517407927213647930655693816847034D+01 + px(37)= 0.50897921727561576322594218577457258D+01 + px(38)= 0.59534532497729963738598299894774892D+01 + px(39)= 0.69651478712196520721782791126144665D+01 + px(40)= 0.81520877402662049285349974267562675D+01 + px(41)= 0.95478213967711421151781315680800985D+01 + px(42)= 0.11194623319536797531467272595329506D+02 + px(43)= 0.13147404152263011059682758275653972D+02 + px(44)= 0.15480595341123512509584325553694590D+02 + px(45)= 0.18301477075836510534702330167968349D+02 + px(46)= 0.21779637522565706043537218918652790D+02 + px(47)= 0.26226933347488070796043946213342574D+02 + px(48)= 0.32415929770269557720899017070958329D+02 + pw( 1)= 0.75999449282242525498888775227070420D-03 + pw( 2)= 0.17789442907920029044278935841370033D-02 + pw( 3)= 0.28233592185581488393945265516117315D-02 + pw( 4)= 0.39067912705549311303324714399927545D-02 + pw( 5)= 0.50457409167121635114256894882489535D-02 + pw( 6)= 0.62592215195201857161143217251824231D-02 + pw( 7)= 0.75695311456923674293727515403732119D-02 + pw( 8)= 0.90032044940945145010068536558863271D-02 + pw( 9)= 0.10592085506839418968886878624905230D-01 + pw(10)= 0.12374483314193827446279695880913988D-01 + pw(11)= 0.14396352965559528046529852650738501D-01 + pw(12)= 0.16712429540159363311802064676696312D-01 + pw(13)= 0.19387262265658075348794679025267961D-01 + pw(14)= 0.22496156590284673178523069618335189D-01 + pw(15)= 0.26126125032895265676414993536188666D-01 + pw(16)= 0.30377032282566849853328475248140570D-01 + pw(17)= 0.35363150724079551172748521328984452D-01 + pw(18)= 0.41215303456842402904128482278539693D-01 + pw(19)= 0.48083691328284411416338444917462566D-01 + pw(20)= 0.56141427938196717060539102662208221D-01 + pw(21)= 0.65588776105316823321919603829156998D-01 + pw(22)= 0.76658094133956830129535839504567249D-01 + pw(23)= 0.89619543419632664042116217247700679D-01 + pw(24)= 0.10478766192717970201342332439078370D+00 + pw(25)= 0.12252896123742271255427206161487662D+00 + pw(26)= 0.14327075828486752574672706211131092D+00 + pw(27)= 0.16751151313821774235835757278858330D+00 + pw(28)= 0.19583302163349966411912965960570620D+00 + pw(29)= 0.22891492049551029451481007469340092D+00 + pw(30)= 0.26755212283797206211340817029488241D+00 + pw(31)= 0.31267604416833112127254011348613261D+00 + pw(32)= 0.36538085205037423661137648346933768D+00 + pw(33)= 0.42695655668634922335366944118783047D+00 + pw(34)= 0.49893168999297620297105696955480497D+00 + pw(35)= 0.58312983017364951950857474996168896D+00 + pw(36)= 0.68174673032193870559875685439839375D+00 + pw(37)= 0.79745906091752647870294820280336694D+00 + pw(38)= 0.93358322488211662322908533451921218D+00 + pw(39)= 0.10943162513760950011430094815688344D+01 + pw(40)= 0.12851165678797307366335354072388698D+01 + pw(41)= 0.15133343910778260993594764587330245D+01 + pw(42)= 0.17893135308834332452949764559455513D+01 + pw(43)= 0.21284497101212601640922819767089646D+01 + pw(44)= 0.25553800629427160180922388763367066D+01 + pw(45)= 0.31135609716998794233456050246781466D+01 + pw(46)= 0.38911620012948672120134104867773046D+01 + pw(47)= 0.51125134158119019180657428081205730D+01 + pw(48)= 0.76826855076385203411408423144047970D+01 +endif +if(kn == 49) then + px( 1)= 0.28982169661609259925974979296199715D-03 + px( 2)= 0.15311074628945562334530690364685912D-02 + px( 3)= 0.37809891427061498988417968394892015D-02 + px( 4)= 0.70693075551075721251308809528074262D-02 + px( 5)= 0.11440114051043380640580809174873546D-01 + px( 6)= 0.16953896205505568651585337372141638D-01 + px( 7)= 0.23690155524198397880316141824091359D-01 + px( 8)= 0.31750757826730201808237900276821105D-01 + px( 9)= 0.41264185607363735111209847472395680D-01 + px(10)= 0.52390788296171869395123759706096667D-01 + px(11)= 0.65329088415812943007951010158851832D-01 + px(12)= 0.80323145971104464698417160245071838D-01 + px(13)= 0.97670923692398180646763841694521778D-01 + px(14)= 0.11773356193272553739176999855177642D+00 + px(15)= 0.14094549927124676527452589226795674D+00 + px(16)= 0.16782548211787615810300942831732404D+00 + px(17)= 0.19898867709139685594599685847717735D+00 + px(18)= 0.23516028351061490409755316887724481D+00 + px(19)= 0.27719118480638011914316315022611666D+00 + px(20)= 0.32607625000818758221137748238031822D+00 + px(21)= 0.38297591208948031388506643958671483D+00 + px(22)= 0.44924164526126338307000295982995931D+00 + px(23)= 0.52644597452520216232192817378809657D+00 + px(24)= 0.61641769981161764547143482902113440D+00 + px(25)= 0.72128311119138160917726410382682506D+00 + px(26)= 0.84351411110478227605310059636301640D+00 + px(27)= 0.98598434482611919288728229535556820D+00 + px(28)= 0.11520346774611530676609320335187635D+01 + px(29)= 0.13455496592189661370306885171963069D+01 + px(30)= 0.15710470166859281003411527674326210D+01 + px(31)= 0.18337827389082834345983893278310805D+01 + px(32)= 0.21398750605157030327208430120547271D+01 + px(33)= 0.24964516861943417767039670584804599D+01 + px(34)= 0.29118261213763337159423664681275174D+01 + px(35)= 0.33957112485642962686719816570055312D+01 + px(36)= 0.39594817726282308325228740791813504D+01 + px(37)= 0.46165026215025079679688539374253711D+01 + px(38)= 0.53825491684099448048962771604007379D+01 + px(39)= 0.62763596401674826758369077814162276D+01 + px(40)= 0.73203847840971806343130509762279760D+01 + px(41)= 0.85418435921307030922239809237588315D+01 + px(42)= 0.99742749209607261981040189301013931D+01 + px(43)= 0.11659933902343682924575082774292738D+02 + px(44)= 0.13653717512175632633212031175648009D+02 + px(45)= 0.16030079451319704520212844485652403D+02 + px(46)= 0.18896419514536338819621757969794054D+02 + px(47)= 0.22422681781072229358731374027395797D+02 + px(48)= 0.26921701181159762662101522272189023D+02 + px(49)= 0.33169237709310133472812163437610434D+02 + pw( 1)= 0.74423705557468321912970974468971512D-03 + pw( 2)= 0.17416585473194369192981311717828790D-02 + pw( 3)= 0.27630202767273380717880972293549723D-02 + pw( 4)= 0.38209254857123832686825908868042634D-02 + pw( 5)= 0.49307262850810818969143173604709699D-02 + pw( 6)= 0.61100327216620297105479543777143453D-02 + pw( 7)= 0.73793929142489472766538930130422414D-02 + pw( 8)= 0.87631380820224889920133086400922484D-02 + pw( 9)= 0.10290337290113942038466358200627605D-01 + pw(10)= 0.11995832288470777205381700382547014D-01 + pw(11)= 0.13921304955008894938778083468431898D-01 + pw(12)= 0.16116315842092009170407309532189491D-01 + pw(13)= 0.18639261057732707055179140040607199D-01 + pw(14)= 0.21558238862705300435120252889679016D-01 + pw(15)= 0.24951892641527327511221421263371520D-01 + pw(16)= 0.28910375260277562929269397616107752D-01 + pw(17)= 0.33536621862509534402237961505810400D-01 + pw(18)= 0.38948101356497020534071631819477111D-01 + pw(19)= 0.45279154574446597766189875771988559D-01 + pw(20)= 0.52683958194920854977905259654150493D-01 + pw(21)= 0.61340113116439272718820743217394536D-01 + pw(22)= 0.71452855489322219691816819653732876D-01 + pw(23)= 0.83259918269799951973839847297358939D-01 + pw(24)= 0.97037114546374637815371486877102608D-01 + pw(25)= 0.11310475960635753107742085931930780D+00 + pw(26)= 0.13183509368952202468340313538628788D+00 + pw(27)= 0.15366091498274177979540995552638069D+00 + pw(28)= 0.17908568983953526314195097583061572D+00 + pw(29)= 0.20869548441677582027963091462587493D+00 + pw(30)= 0.24317317246706239890716235938537564D+00 + pw(31)= 0.28331553768501980083448486564952605D+00 + pw(32)= 0.33005413638688135108429388605654924D+00 + pw(33)= 0.38448116653084696134266394851745611D+00 + pw(34)= 0.44788218293156059076871251650381853D+00 + pw(35)= 0.52177844169743715495966266074704591D+00 + pw(36)= 0.60798318304777180959943540325649013D+00 + pw(37)= 0.70867868475109945308012661652305988D+00 + pw(38)= 0.82652519802949145510247997374762292D+00 + pw(39)= 0.96482036576439726113478189858839344D+00 + pw(40)= 0.11277413304728328935012105843530012D+01 + pw(41)= 0.13207276384535346696169033036111123D+01 + pw(42)= 0.15511151994361137315814825047961978D+01 + pw(43)= 0.18292440906220512275179521250816934D+01 + pw(44)= 0.21705274773423284009902208179302011D+01 + pw(45)= 0.25996616983900981131162894691162348D+01 + pw(46)= 0.31602501106700710441030906881720498D+01 + pw(47)= 0.39408219589198821554199569326106068D+01 + pw(48)= 0.51667323905056736314902652284285907D+01 + pw(49)= 0.77475392524477876553320371372226975D+01 +endif +if(kn == 50) then + px( 1)= 0.28394351112953297748994559783102511D-03 + px( 2)= 0.14998911954688725774856401484033629D-02 + px( 3)= 0.37031733634109224832474380329849766D-02 + px( 4)= 0.69218127312350228406485064567062889D-02 + px( 5)= 0.11197087943784824700478952929697149D-01 + px( 6)= 0.16585564985245590131106268275448670D-01 + px( 7)= 0.23161412500517665834595664639261402D-01 + px( 8)= 0.31019403043677038055911010779170235D-01 + px( 9)= 0.40278717647655643037391307728658037D-01 + px(10)= 0.51087642190675989042234002017798487D-01 + px(11)= 0.63629213809160599368347959049041399D-01 + px(12)= 0.78127829475675329032296849094039453D-01 + px(13)= 0.94856775739499734452627500076424815D-01 + px(14)= 0.11414660148335876745918230546444594D+00 + px(15)= 0.13639426440178507981483041317230735D+00 + px(16)= 0.16207305783142124363194134470205347D+00 + px(17)= 0.19174346191330080395615598185222217D+00 + px(18)= 0.22606522557261662878988725676839652D+00 + px(19)= 0.26581112483702385355663297234020894D+00 + px(20)= 0.31188292620679003206265405632135533D+00 + px(21)= 0.36533011131301634013423764015010912D+00 + px(22)= 0.42737191711071555589396273204369958D+00 + px(23)= 0.49942324705020639374541286536052638D+00 + px(24)= 0.58312503667951418472684968751768941D+00 + px(25)= 0.68037972242344487980438298325566643D+00 + px(26)= 0.79339256658260940896953648094143840D+00 + px(27)= 0.92471973446804368904394005417532194D+00 + px(28)= 0.10773242035057135225415445929049064D+01 + px(29)= 0.12546408175407001854792111537007726D+01 + px(30)= 0.14606520990918797361653158063467201D+01 + px(31)= 0.16999768257560608388590285583482666D+01 + px(32)= 0.19779739082773843717702150700803880D+01 + px(33)= 0.23008648456607864889580088643288349D+01 + px(34)= 0.26758790850855905290663568936243531D+01 + px(35)= 0.31114281531421848676256035865658428D+01 + px(36)= 0.36173167280945029265548597355573382D+01 + px(37)= 0.42050023489136640359196188798116726D+01 + px(38)= 0.48879209793922610146974780441291671D+01 + px(39)= 0.56819045073984933891246336517741761D+01 + px(40)= 0.66057308705566818707545720531514149D+01 + px(41)= 0.76818723662188017987959388336177329D+01 + px(42)= 0.89375516656298253147474053069187414D+01 + px(43)= 0.10406296477480503239729452647809424D+02 + px(44)= 0.12130343567869783578305872825995901D+02 + px(45)= 0.14164579787764828196349642136898031D+02 + px(46)= 0.16583487005376427804066458589286026D+02 + px(47)= 0.19494592188944424125485472332517264D+02 + px(48)= 0.23068202782732093092557399540639023D+02 + px(49)= 0.27618134557703623179690912010041751D+02 + px(50)= 0.33923321036009768333728618541703920D+02 + pw( 1)= 0.72912399684308635017096115410335056D-03 + pw( 2)= 0.17059216228519449314704147393870548D-02 + pw( 3)= 0.27052579385463033654518158126252911D-02 + pw( 4)= 0.37388707495255014610097719618737042D-02 + pw( 5)= 0.48210691945239681765906911252349642D-02 + pw( 6)= 0.59681965658636724574936823069437344D-02 + pw( 7)= 0.71992306962236770905536136519431856D-02 + pw( 8)= 0.85365357689533034566484478187242343D-02 + pw( 9)= 0.10006713677868850439161417859632759D-01 + pw(10)= 0.11641531351319812896113767691847514D-01 + pw(11)= 0.13478885680498147705267761047018866D-01 + pw(12)= 0.15563753671054168683182913698539199D-01 + pw(13)= 0.17949077884564351780048811341841315D-01 + pw(14)= 0.20696568103487745734831198958981091D-01 + pw(15)= 0.23877459843864173187990416648243479D-01 + pw(16)= 0.27573340091582106763900700105376174D-01 + pw(17)= 0.31877198204074685512995246247087960D-01 + pw(18)= 0.36894860399462428730109186000241340D-01 + pw(19)= 0.42746921785947037355880124311792745D-01 + pw(20)= 0.49571228532988571977201451960147272D-01 + pw(21)= 0.57525917359503163899312028414966321D-01 + pw(22)= 0.66793006913272852902897497204087823D-01 + pw(23)= 0.77582552855288975335680322806095329D-01 + pw(24)= 0.90137412508095941124740779645748160D-01 + pw(25)= 0.10473870396294021438411474581832967D+00 + pw(26)= 0.12171208319009365070220694175921390D+00 + pw(27)= 0.14143500171709279543618029697282644D+00 + pw(28)= 0.16434515151851001863886624876712371D+00 + pw(29)= 0.19095035999365251682074028234920175D+00 + pw(30)= 0.22184027575178191861381490980142531D+00 + pw(31)= 0.25770029856713080526288855613179455D+00 + pw(32)= 0.29932837398793730342885176518781178D+00 + pw(33)= 0.34765552544247195237609202772613096D+00 + pw(34)= 0.40377138372726537143521552364635440D+00 + pw(35)= 0.46895657668751681273934682099459395D+00 + pw(36)= 0.54472479692767961940426388856919793D+00 + pw(37)= 0.63287890678130081055700714208328112D+00 + pw(38)= 0.73558798250487807944749198073740843D+00 + pw(39)= 0.85549650522124124655227075039375323D+00 + pw(40)= 0.99588443073362582872294183483256617D+00 + pw(41)= 0.11609105355219986209989146786182259D+01 + pw(42)= 0.13559974384949635725484334023159422D+01 + pw(43)= 0.15884690610914334611993355370110858D+01 + pw(44)= 0.18686642910659719608475187905997900D+01 + pw(45)= 0.22120162660997500537540215058314098D+01 + pw(46)= 0.26432826564193229543032457728289448D+01 + pw(47)= 0.32062148179068205963629824250927723D+01 + pw(48)= 0.39897000025706165355011208836568943D+01 + pw(49)= 0.52201085267504392582124308457577559D+01 + pw(50)= 0.78114350904888610285435916161990246D+01 +endif +if(kn == 51) then + px( 1)= 0.27830048331694745146626670725003848D-03 + px( 2)= 0.14699332467589495012214296420135614D-02 + px( 3)= 0.36285373076700958382889506232860957D-02 + px( 4)= 0.67804640421568805932750563900953938D-02 + px( 5)= 0.10964449641475153555650317586921338D-01 + px( 6)= 0.16233474841656748209131399424045480D-01 + px( 7)= 0.22656849606742837535759660703189982D-01 + px( 8)= 0.30322907994226545082257492807954794D-01 + px( 9)= 0.39342419929775285730416663052397862D-01 + px(10)= 0.49852801135275356633188650023787799D-01 + px(11)= 0.62023178388698049878991488778075058D-01 + px(12)= 0.76060329272036397821127225875497380D-01 + px(13)= 0.92215469045402426815676149451909456D-01 + px(14)= 0.11079181963288772179472869891025245D+00 + px(15)= 0.13215289192145648970655359123013550D+00 + px(16)= 0.15673146375949390138182262372756365D+00 + px(17)= 0.18503934411172685168403222212810987D+00 + px(18)= 0.21767815425667758980667415659377157D+00 + px(19)= 0.25535148902919646272821292167480645D+00 + px(20)= 0.29887891101338956958304222194317216D+00 + px(21)= 0.34921226930606420176060402127989888D+00 + px(22)= 0.40745483836472093899131085922052085D+00 + px(23)= 0.47488376920818361679977841816170633D+00 + px(24)= 0.55297635865326739663234570169538373D+00 + px(25)= 0.64344068535303042620990947995897496D+00 + px(26)= 0.74825123790612149826635700308884120D+00 + px(27)= 0.86969026995696913003053787424248180D+00 + px(28)= 0.10103957605313364799613553988071921D+01 + px(29)= 0.11734170393027801618266450011528511D+01 + px(30)= 0.13622793663980858168640978903164547D+01 + px(31)= 0.15810590528845442660473438585783950D+01 + px(32)= 0.18344711002533369402909515294518176D+01 + px(33)= 0.21279718700902070387593931590109607D+01 + px(34)= 0.24678800386227544163678584032999521D+01 + px(35)= 0.28615201544104645116093866929518600D+01 + px(36)= 0.33173946748043709269495501699195293D+01 + px(37)= 0.38453926882082167185869634333011899D+01 + px(38)= 0.44570470958112522799784043488271277D+01 + px(39)= 0.51658576054751924442339769813379872D+01 + px(40)= 0.59877058293844537789530450556075720D+01 + px(41)= 0.69414034941300311714983901016833412D+01 + px(42)= 0.80494397879808445218602940428136405D+01 + px(43)= 0.93390380570666107887290025527994856D+01 + px(44)= 0.10843713859839632308899371210210092D+02 + px(45)= 0.12605686843057786008572981971452711D+02 + px(46)= 0.14679837296994134584444426535907765D+02 + px(47)= 0.17140680920657630892046419784075754D+02 + px(48)= 0.20095878907672599896988947474029035D+02 + px(49)= 0.23716108785958653467610019526488283D+02 + px(50)= 0.28316168939449414612178766745269557D+02 + px(51)= 0.34678144812502309409979036489309933D+02 + pw( 1)= 0.71461661125073618577714141168068574D-03 + pw( 2)= 0.16716385878603382275615092392545966D-02 + pw( 3)= 0.26499087137809206936489894716951881D-02 + pw( 4)= 0.36603737280291944817450300201489801D-02 + pw( 5)= 0.47163932617577667464231453856574222D-02 + pw( 6)= 0.58331641163766286245746932185477944D-02 + pw( 7)= 0.70282510906775725331369422805168084D-02 + pw( 8)= 0.83222573157223186581625603590243717D-02 + pw( 9)= 0.97395848045625290059070106665033066D-02 + pw(10)= 0.11309266338983814689829245251610443D-01 + pw(11)= 0.13065837815189701273276681449362080D-01 + pw(12)= 0.15050206824622500427636027066505286D-01 + pw(13)= 0.17310471604431563174843365788224820D-01 + pw(14)= 0.19902665286202140668402395991957144D-01 + pw(15)= 0.22891446750016473901306940204721219D-01 + pw(16)= 0.26350819616702910168573443501795867D-01 + pw(17)= 0.30365009714194435950502154476602598D-01 + pw(18)= 0.35029644330122852911457703936925401D-01 + pw(19)= 0.40453348181743022646044235704229779D-01 + pw(20)= 0.46759819486523129479791042090399526D-01 + pw(21)= 0.54090403179921190709407057449782309D-01 + pw(22)= 0.62607156912964304588823654347725686D-01 + pw(23)= 0.72496411785130069944054386511386143D-01 + pw(24)= 0.83972854815470973907373202544753383D-01 + pw(25)= 0.97284192841846608857881098157165865D-01 + pw(26)= 0.11271649103721755058596361082284142D+00 + pw(27)= 0.13060031219247571052268358514865611D+00 + pw(28)= 0.15131781797811823631871970343701060D+00 + pw(29)= 0.17531103556726408969705872673699634D+00 + pw(30)= 0.20309154904134215752202092778295388D+00 + pw(31)= 0.23525195397891395078569762059044835D+00 + pw(32)= 0.27247952854009146289412622775860388D+00 + pw(33)= 0.31557274490479512420214647476118810D+00 + pw(34)= 0.36546150205090068791751615370418030D+00 + pw(35)= 0.42323235431744446559543345018447882D+00 + pw(36)= 0.49016062156217930068156275755723687D+00 + pw(37)= 0.56775223265564612092706940030068368D+00 + pw(38)= 0.65779970939872780221136201150769308D+00 + pw(39)= 0.76245925865498574135551621597788489D+00 + pw(40)= 0.88436027001142532199927186614862192D+00 + pw(41)= 0.10267660753266581074365019471728486D+01 + pw(42)= 0.11938185471963720076518527335025432D+01 + pw(43)= 0.13909252171844327541246452342529830D+01 + pw(44)= 0.16254001662180292675140925291992103D+01 + pw(45)= 0.19075834129441296076092154965785291D+01 + pw(46)= 0.22529303361525593567401491175421270D+01 + pw(47)= 0.26862617892200971765670039600369766D+01 + pw(48)= 0.32514778994133779406256785815727141D+01 + pw(49)= 0.40378220858095016239167419409137174D+01 + pw(50)= 0.52726701882528895779137444037618021D+01 + pw(51)= 0.78744042074131644922750515876542347D+01 +endif +if(kn == 52) then + px( 1)= 0.27287878963778979318704099764976979D-03 + px( 2)= 0.14411589143135589309910830756029883D-02 + px( 3)= 0.35568888680936775757132750276695479D-02 + px( 4)= 0.66448808369926751816655531142947976D-02 + px( 5)= 0.10741535203421646235306609286779033D-01 + px( 6)= 0.15896548124765745985567116234974430D-01 + px( 7)= 0.22174792928607557748215031029525379D-01 + px( 8)= 0.29658745389012720604963104746455902D-01 + px( 9)= 0.38451550078535438960644609304592703D-01 + px(10)= 0.48680802154773981897502480211717432D-01 + px(11)= 0.60503099784552369897160088385145451D-01 + px(12)= 0.74109391214600302911242894598461603D-01 + px(13)= 0.89731100249233358933686871661989877D-01 + px(14)= 0.10764697753584156876969725074282997D+00 + px(15)= 0.12819061301398605430851761152295284D+00 + px(16)= 0.15175857698395808054368327545045674D+00 + px(17)= 0.17881924018609623916197948354619579D+00 + px(18)= 0.20992244196465392763256291141326856D+00 + px(19)= 0.24571029757551810503161176844891960D+00 + px(20)= 0.28692952833595684927481466342699473D+00 + px(21)= 0.33444574647578471028054731992057917D+00 + px(22)= 0.38926013793139080772351691924522347D+00 + px(23)= 0.45252898276941636881394275842894023D+00 + px(24)= 0.52558645699160000314972991962565110D+00 + px(25)= 0.60997118585865879765197048254341330D+00 + px(26)= 0.70745707342364955349644867464416164D+00 + px(27)= 0.82008901627275511415852311384357046D+00 + px(28)= 0.95022422122473475017786104441461093D+00 + px(29)= 0.11005799888944176702824780847485411D+01 + px(30)= 0.12742890037078181125625630187828027D+01 + px(31)= 0.14749633979156047676958663456164965D+01 + px(32)= 0.17067691516542664863528589999065341D+01 + px(33)= 0.19745127831074488312044849464771686D+01 + px(34)= 0.22837428183447160072986023000468876D+01 + px(35)= 0.26408692804764055817543358579323038D+01 + px(36)= 0.30533055131576702465080896333143573D+01 + px(37)= 0.35296382295469342100204889068765045D+01 + px(38)= 0.40798340368281393790208425276818839D+01 + px(39)= 0.47154942919829600315975357986809138D+01 + px(40)= 0.54501757769677042873771351203731296D+01 + px(41)= 0.62998036929575447450649046764701569D+01 + px(42)= 0.72832182898545812637798931719272845D+01 + px(43)= 0.84229216047310103819935078476256484D+01 + px(44)= 0.97461351851146406033204591955381092D+01 + px(45)= 0.11286361832018459964955857919026558D+02 + px(46)= 0.13085805530780086242892657357654051D+02 + px(47)= 0.15199343565107891341827069072222156D+02 + px(48)= 0.17701530875675334700619075035054618D+02 + px(49)= 0.20700169445006682515691632852528091D+02 + px(50)= 0.24366312928914848142376439425799257D+02 + px(51)= 0.29015743374923417716784066471530138D+02 + px(52)= 0.35433676271462821390664880501380081D+02 + pw( 1)= 0.70067921248512656450750262380470989D-03 + pw( 2)= 0.16387221028734144344416061643064582D-02 + pw( 3)= 0.25968227302892772688118502909161922D-02 + pw( 4)= 0.35852033221618217540588075672036645D-02 + pw( 5)= 0.46163571334057459414651619546435755D-02 + pw( 6)= 0.57044407177542779516540853577376776D-02 + pw( 7)= 0.68657439258466701284209801337979038D-02 + pw( 8)= 0.81192888642852760066695573485214022D-02 + pw( 9)= 0.94875105621027290016976460295608743D-02 + pw(10)= 0.10997004127430955685924352255220372D-01 + pw(11)= 0.12679313613778005716132640549939369D-01 + pw(12)= 0.14571725519850453422967526162847044D-01 + pw(13)= 0.16718025683719221072849622604121732D-01 + pw(14)= 0.19169191632602931509237238888017571D-01 + pw(15)= 0.21984027773632325146538486813698425D-01 + pw(16)= 0.25229801526425832500489421276402268D-01 + pw(17)= 0.28982985579859711073310312330962579D-01 + pw(18)= 0.33330232727989722055601631534296414D-01 + pw(19)= 0.38369695026986305327273292677722862D-01 + pw(20)= 0.44212758195453754037273486501473222D-01 + pw(21)= 0.50986218113528798819802588773272107D-01 + pw(22)= 0.58834899219859082186151202608595314D-01 + pw(23)= 0.67924711805560266218170048520749527D-01 + pw(24)= 0.78446161793212774417972991998090425D-01 + pw(25)= 0.90618353089338093281832862102708959D-01 + pw(26)= 0.10469355154807580861182066828267783D+00 + pw(27)= 0.12096240801949377693281668581524884D+00 + pw(28)= 0.13975996681192581185584791335037507D+00 + pw(29)= 0.16147261854647111561641477797041919D+00 + pw(30)= 0.18654619771836799217214159212739546D+00 + pw(31)= 0.21549548172910307786548214775429476D+00 + pw(32)= 0.24891542853693721010435838752243091D+00 + pw(33)= 0.28749460731095023155833868404490821D+00 + pw(34)= 0.33203145035164386811290281590906319D+00 + pw(35)= 0.38345421620808062938143753495150014D+00 + pw(36)= 0.44284595340052667020006768746408026D+00 + pw(37)= 0.51147637334768443075077758376770258D+00 + pw(38)= 0.59084351705391666304914198628651331D+00 + pw(39)= 0.68272966822652633355509618616923460D+00 + pw(40)= 0.78927854268555272214405511831212374D+00 + pw(41)= 0.91310513570381533776392939171880649D+00 + pw(42)= 0.10574572006171471782817226512789618D+01 + pw(43)= 0.12264611065174215544931924431769419D+01 + pw(44)= 0.14255110268002246494143307711209540D+01 + pw(45)= 0.16619131890762190850734830449377183D+01 + pw(46)= 0.19460108584649183568553387986228161D+01 + pw(47)= 0.22932836878593317486878267058034769D+01 + pw(48)= 0.27286173208169332405445956554845995D+01 + pw(49)= 0.32960612083291505532244371231002347D+01 + pw(50)= 0.40852129572893008083223241182607456D+01 + pw(51)= 0.53244443718708726283429813785502501D+01 + pw(52)= 0.79364763274033460235909669710486787D+01 +endif +if(kn == 53) then + px( 1)= 0.26766570337770727528170071246520492D-03 + px( 2)= 0.14134994888717316196512141359660423D-02 + px( 3)= 0.34880516519107403970758408932728686D-02 + px( 4)= 0.65147144546990128201893001657184119D-02 + px( 5)= 0.10527738385818098548917273507385571D-01 + px( 6)= 0.15573804518451021912970981414950172D-01 + px( 7)= 0.21713726252830234938777297490158659D-01 + px( 8)= 0.29024636763636674603190500526652300D-01 + px( 9)= 0.37602750842526664184644761079770188D-01 + px(10)= 0.47566769031392334409003814047662918D-01 + px(11)= 0.59061976687480610814082596844534784D-01 + px(12)= 0.72265065129161553717681820608077034D-01 + px(13)= 0.87389667514901234333023139285043850D-01 + px(14)= 0.10469256811716735138855696367380459D+00 + px(15)= 0.12448052659100388617653071564391793D+00 + px(16)= 0.14711767656647036665309334227739246D+00 + px(17)= 0.17303351979129595018460716414084922D+00 + px(18)= 0.20273163525968242510069482779745799D+00 + px(19)= 0.23679933260205764383581082765297944D+00 + px(20)= 0.27591857089423386003627193488624766D+00 + px(21)= 0.32087851933017891678519245554349762D+00 + px(22)= 0.37259015544714682612264560035248998D+00 + px(23)= 0.43210329558213462840685308411772863D+00 + px(24)= 0.50062645115169427006331566594320087D+00 + px(25)= 0.57954991863929172590111454477122563D+00 + px(26)= 0.67047254853929904297053555520671948D+00 + px(27)= 0.77523270078396767247020251586833006D+00 + px(28)= 0.89594398099521297070364552635590329D+00 + px(29)= 0.10350364637633983911134434941276054D+01 + px(30)= 0.11953042494029295972874450876803876D+01 + px(31)= 0.13799603766985451495079757379930050D+01 + px(32)= 0.15927003388200029769046949954648636D+01 + px(33)= 0.18377757429440148519213952318668567D+01 + px(34)= 0.21200800469036983170430724963400520D+01 + px(35)= 0.24452488452376621255574183769872919D+01 + px(36)= 0.28197779343346462148849901776278631D+01 + px(37)= 0.32511634739679835132078134711319616D+01 + px(38)= 0.37480701572008120067601493824866792D+01 + px(39)= 0.43205356864545827578744429205984365D+01 + px(40)= 0.49802234963192223418120333779580923D+01 + px(41)= 0.57407413466949074338019856932754499D+01 + px(42)= 0.66180524896462577279982649502155498D+01 + px(43)= 0.76310210235736203134564527928377033D+01 + px(44)= 0.88021583399572758584848067436385530D+01 + px(45)= 0.10158682176971824690390966321630474D+02 + px(46)= 0.11734082305033337839638441179290729D+02 + px(47)= 0.13570548670500422261838718291627263D+02 + px(48)= 0.15722959067717953266995872699981198D+02 + px(49)= 0.18265912835593835999181449267925846D+02 + px(50)= 0.21307358846087134932506642776378880D+02 + px(51)= 0.25018732302848078195180809407087663D+02 + px(52)= 0.29716799310767845023829953456809187D+02 + px(53)= 0.36189883296712405656605145166567620D+02 + pw( 1)= 0.68727895311546056672417632556060663D-03 + pw( 2)= 0.16070919274300689247610198780862261D-02 + pw( 3)= 0.25458627429238886643700982329323737D-02 + pw( 4)= 0.35131488282427671750845547282299960D-02 + pw( 5)= 0.45206512132016346669133873690489887D-02 + pw( 6)= 0.55815803493477684696883612785461534D-02 + pw( 7)= 0.67110727810055987477008256080221629D-02 + pw( 8)= 0.79267274337364857488746447630284210D-02 + pw( 9)= 0.92492158987355566092832344937099689D-02 + pw(10)= 0.10702954335651349887056984538693118D-01 + pw(11)= 0.12316817159979657159477748940914048D-01 + pw(12)= 0.14124861608422353998695630535116428D-01 + pw(13)= 0.16167026688242225669124227980203198D-01 + pw(14)= 0.18489777850309322975434696546792003D-01 + pw(15)= 0.21146695079329887928180449259296579D-01 + pw(16)= 0.24199043898015304232347701717858443D-01 + pw(17)= 0.27716412274949780256699383093693372D-01 + pw(18)= 0.31777522531875989191876025347365721D-01 + pw(19)= 0.36471323604726302437811675512366006D-01 + pw(20)= 0.41898438814168148316223242028468085D-01 + pw(21)= 0.48173004736710585958446082731843080D-01 + pw(22)= 0.55424906910282799399495351801946886D-01 + pw(23)= 0.63802408146079784845031445360740918D-01 + pw(24)= 0.73475174123291445512755683573657914D-01 + pw(25)= 0.84637721415891914195614185274215305D-01 + pw(26)= 0.97513337772431529356043555523731762D-01 + pw(27)= 0.11235854926743224508497153739859653D+00 + pw(28)= 0.12946823338997306958433565807904794D+00 + pw(29)= 0.14918150326264123120255847449750618D+00 + pw(30)= 0.17188851951158054500285261747356087D+00 + pw(31)= 0.19803842749441616169319688491554627D+00 + pw(32)= 0.22814867480320173542334621460640035D+00 + pw(33)= 0.26281604587933027973782593098613157D+00 + pw(34)= 0.30272987012943215567370818850475517D+00 + pw(35)= 0.34868803701893003485156344297085159D+00 + pw(36)= 0.40161671758475143724478782201274920D+00 + pw(37)= 0.46259509691660897469854774936577795D+00 + pw(38)= 0.53288704858998218510998379047232847D+00 + pw(39)= 0.61398266720080789199956346065784397D+00 + pw(40)= 0.70765415497287477021582999321525042D+00 + pw(41)= 0.81603315079286754570846341816085723D+00 + pw(42)= 0.94172096259934976721215747985393842D+00 + pw(43)= 0.10879507869255648374416119995114567D+01 + pw(44)= 0.12588348263164235013060998002290594D+01 + pw(45)= 0.14597555182997943253538598754860232D+01 + pw(46)= 0.16980130743144824162948847512548702D+01 + pw(47)= 0.19839559340047588104729344306844787D+01 + pw(48)= 0.23330898446405588037915551380406672D+01 + pw(49)= 0.27703666290115585782188239169095417D+01 + pw(50)= 0.33399854209196729287026099030910255D+01 + pw(51)= 0.41318959182038353492816091595673578D+01 + pw(52)= 0.53754564214506164841297568531690314D+01 + pw(53)= 0.79976793171584998097855897868836215D+01 +endif +if(kn == 54) then + px( 1)= 0.26264930117348429139290996151281804D-03 + px( 2)= 0.13868906738912660879236605780496720D-02 + px( 3)= 0.34218609389365101881877496771598208D-02 + px( 4)= 0.63896402666404078488978957960735342D-02 + px( 5)= 0.10322496845387460828947292469844905D-01 + px( 6)= 0.15264338738352449380895430335814458D-01 + px( 7)= 0.21272256455056810596723516130341971D-01 + px( 8)= 0.28418500058997028295178690126035519D-01 + px( 9)= 0.36792971921502915811121828397263107D-01 + px(10)= 0.46506297013822420204383294802715589D-01 + px(11)= 0.57693520466708188814704660142883650D-01 + px(12)= 0.70518461365129452425223403139712676D-01 + px(13)= 0.85178722402024364366080809274553805D-01 + px(14)= 0.10191132396313916367597436543864246D+00 + px(15)= 0.12099891148146303511007328280200670D+00 + px(16)= 0.14277649208960370764126858545635037D+00 + px(17)= 0.16763870128060588007507633768412165D+00 + px(18)= 0.19604767989373694212165892917330544D+00 + px(19)= 0.22854173844903711798992185084177464D+00 + px(20)= 0.26574507414867220998786541508194950D+00 + px(21)= 0.30837886575038189865518112878881341D+00 + px(22)= 0.35727409810156122358477142059464743D+00 + px(23)= 0.41338647141242328011007195602712556D+00 + px(24)= 0.47781374735909608618161123128437916D+00 + px(25)= 0.55181589013188334829869817262449587D+00 + px(26)= 0.63683838468354892203676383796234712D+00 + px(27)= 0.73453915990617498625433330235628987D+00 + px(28)= 0.84681961134902351472977037719948889D+00 + px(29)= 0.97586030619425103922072317930864173D+00 + px(30)= 0.11241620640819289778534857523108689D+01 + px(31)= 0.12945932455227004001780815602771556D+01 + px(32)= 0.14904442534576446040956072788546865D+01 + px(33)= 0.17154904765677712078499252639933687D+01 + px(34)= 0.19740651959466252026766816200100743D+01 + px(35)= 0.22711443710983615908928413364407126D+01 + px(36)= 0.26124457644974208978288838251819154D+01 + px(37)= 0.30045456290378285221329470833486270D+01 + px(38)= 0.34550172827009750892780305838794194D+01 + px(39)= 0.39725975078115629937328151923773107D+01 + px(40)= 0.45673891239659119753724152909459628D+01 + px(41)= 0.52511117619579365605498903719507942D+01 + px(42)= 0.60374185967841005869825918871706947D+01 + px(43)= 0.69423059418930191122711841795147301D+01 + px(44)= 0.79846576055279832356713682899398131D+01 + px(45)= 0.91869913294368492995853064476380612D+01 + px(46)= 0.10576519438659101773107722114056823D+02 + px(47)= 0.12186718685254033866362337537527451D+02 + px(48)= 0.14059766727898227283287996455084238D+02 + px(49)= 0.16250545334714196738451761057164415D+02 + px(50)= 0.18833703150856317701270861469808901D+02 + px(51)= 0.21917341606064316865260155485126217D+02 + px(52)= 0.25673282302478359060862571209297198D+02 + px(53)= 0.30419275213506007788817412536110986D+02 + px(54)= 0.36946729449797287732704130194371723D+02 + pw( 1)= 0.67438506760335604670255898997892204D-03 + pw( 2)= 0.15766730795758068292367252096595575D-02 + pw( 3)= 0.24969010052497357618191188751223522D-02 + pw( 4)= 0.34440151585437669876044394575278031D-02 + pw( 5)= 0.44289906416819842278218395665719176D-02 + pw( 6)= 0.54641754472827296169646638159218540D-02 + pw( 7)= 0.65636602656678083773300472655876417D-02 + pw( 8)= 0.77437596216345299454674361542457447D-02 + pw( 9)= 0.90235600454359303552602178356724465D-02 + pw(10)= 0.10425525098028000202217706397525802D-01 + pw(11)= 0.11976141415278903324803398081755125D-01 + pw(12)= 0.13706580129463991816022845593243222D-01 + pw(13)= 0.15653341765432086164765180782288484D-01 + pw(14)= 0.17858856637993859978963097726835278D-01 + pw(15)= 0.20372031993532375434976430093094102D-01 + pw(16)= 0.23248770947110602095264163700609205D-01 + pw(17)= 0.26552527146765384924391807859062013D-01 + pw(18)= 0.30354987338193146683949319082938468D-01 + pw(19)= 0.34736978572221172926784909895879575D-01 + pw(20)= 0.39789676392153673565656352708108709D-01 + pw(21)= 0.45616156632160377923066059864692116D-01 + pw(22)= 0.52333303181858472447283586460111768D-01 + pw(23)= 0.60074068936523280823710913114576385D-01 + pw(24)= 0.68990089360969502777474092821797731D-01 + pw(25)= 0.79254662793149992222714102849062817D-01 + pw(26)= 0.91066132125499050058268572358948574D-01 + pw(27)= 0.10465172413589772775444532528313942D+00 + pw(28)= 0.12027192389960804141840562034167157D+00 + pw(29)= 0.13822548329913942877038198233392259D+00 + pw(30)= 0.15885518710020893698686679081438881D+00 + pw(31)= 0.18255453080015552034691247423767820D+00 + pw(32)= 0.20977550590982571093431208426850228D+00 + pw(33)= 0.24103774649612254567565390373246475D+00 + pw(34)= 0.27693937431815337409878200394680401D+00 + pw(35)= 0.31817000170668241587144774061721975D+00 + pw(36)= 0.36552653149467718606906358503261169D+00 + pw(37)= 0.41993266332639492829188554992634159D+00 + pw(38)= 0.48246342603227571148223509952223773D+00 + pw(39)= 0.55437668888757955166034857478955110D+00 + pw(40)= 0.63715459820483206603034925190427611D+00 + pw(41)= 0.73255947628580611457525194256778522D+00 + pw(42)= 0.84271132620425470830602701627255624D+00 + pw(43)= 0.97019847802255398271823872088809738D+00 + pw(44)= 0.11182405648635278467279403261340421D+01 + pw(45)= 0.12909368955309583178279432436240182D+01 + pw(46)= 0.14936596913948188663729129981816013D+01 + pw(47)= 0.17337048448489904526596452227676677D+01 + pw(48)= 0.20214277264670434357889736680445585D+01 + pw(49)= 0.23723618064324051170579931680799604D+01 + pw(50)= 0.28115262822357354520836275890201874D+01 + pw(51)= 0.33832701615264074051804266553052254D+01 + pw(52)= 0.41778930411553237976334045242367218D+01 + pw(53)= 0.54257303561036728308093456793849096D+01 + pw(54)= 0.80580396891998440461864612470334815D+01 +endif +if(kn == 55) then + px( 1)= 0.25781894164194012434309377930977151D-03 + px( 2)= 0.13612751031594850206775668181403743D-02 + px( 3)= 0.33581698458852455723214904600493989D-02 + px( 4)= 0.62693690413467873882583513720977616D-02 + px( 5)= 0.10125310136665687971355854015542415D-01 + px( 6)= 0.14967346351298576942771412456996075D-01 + px( 7)= 0.20849147888131304033053596047200679D-01 + px( 8)= 0.27838492572990899694403556585128016D-01 + px( 9)= 0.36019520279673976527480271121267550D-01 + px(10)= 0.45495507499439646885138144349015201D-01 + px(11)= 0.56392208694907062019327988961512485D-01 + px(12)= 0.68861794220674272839206633722494427D-01 + px(13)= 0.83087389784370585294971502938757785D-01 + px(14)= 0.99288194535557030324060332733237417D-01 + px(15)= 0.11772513411433556959001042015562167D+00 + px(16)= 0.13870700468575135671370372276360989D+00 + px(17)= 0.16259709487800222734179448321576442D+00 + px(18)= 0.18982033572772041366926851284912359D+00 + px(19)= 0.22087111215348614429940107012361562D+00 + px(20)= 0.25632195206452177680013231303976781D+00 + px(21)= 0.29683337125272468206259039405023979D+00 + px(22)= 0.34316518501958180617486163242847902D+00 + px(23)= 0.39618960592837583486273888758703435D+00 + px(24)= 0.45690644457913117767731277056887464D+00 + px(25)= 0.52646073119585672648955063711186170D+00 + px(26)= 0.60616309005755762712921556869139875D+00 + px(27)= 0.69751323092341496562260118366147217D+00 + px(28)= 0.80222697243582393516449526286850739D+00 + px(29)= 0.92226728168578278287496843309443837D+00 + px(30)= 0.10598799021605760545446954338407357D+01 + px(31)= 0.12176342516911872309482334694729854D+01 + px(32)= 0.13984704081792695653834769855727069D+01 + px(33)= 0.16057531729310064020228982054108760D+01 + px(34)= 0.18433344234846220454903450952844757D+01 + px(35)= 0.21156252611660201772094982169604363D+01 + px(36)= 0.24276798551755632926179359953601148D+01 + px(37)= 0.27852934330452205314765679671405731D+01 + px(38)= 0.31951176405057028717258322130425570D+01 + px(39)= 0.36647976062397870566073557102018905D+01 + px(40)= 0.42031366779907243753664331178612278D+01 + px(41)= 0.48202972331234718143106561639280279D+01 + px(42)= 0.55280496786915488679928146699422870D+01 + px(43)= 0.63400875319084531166198158934082316D+01 + px(44)= 0.72724356766190935880980645516629891D+01 + px(45)= 0.83439939737765991151533907320407321D+01 + px(46)= 0.95772839454414406682247983833388953D+01 + px(47)= 0.10999511253155353900722704602341888D+02 + px(48)= 0.12644139881765944224874013113714956D+02 + px(49)= 0.14553337047736742937007375682160668D+02 + px(50)= 0.16781991883653111960448246164544552D+02 + px(51)= 0.19404807001908978674636447631480543D+02 + px(52)= 0.22530041649957455873120088574563768D+02 + px(53)= 0.26329908156600549662558562174411129D+02 + px(54)= 0.31123139662609753954221361691834122D+02 + px(55)= 0.37704208661211514525413812039019937D+02 + pw( 1)= 0.66197010017150176035001943503791612D-03 + pw( 2)= 0.15473986838973462236228078334406893D-02 + pw( 3)= 0.24498237063923531340555158367628164D-02 + pw( 4)= 0.33776287859319922812719456819175282D-02 + pw( 5)= 0.43411225775038708068974684768107364D-02 + pw( 6)= 0.53518651954260805354105863317050609D-02 + pw( 7)= 0.64229967432330231179415055321225939D-02 + pw( 8)= 0.75696698103592431759756449888418760D-02 + pw( 9)= 0.88095427367590507023412664390713554D-02 + pw(10)= 0.10163325161348951569439668089332658D-01 + pw(11)= 0.11655363240567140813674097334911855D-01 + pw(12)= 0.13314243348515058027067949270881268D-01 + pw(13)= 0.15173386635166955733192695121243376D-01 + pw(14)= 0.17271608028279645041513857207256865D-01 + pw(15)= 0.19653626901657341159725115198646941D-01 + pw(16)= 0.22370543498465587594423534082963911D-01 + pw(17)= 0.25480329057857957803698239091828550D-01 + pw(18)= 0.29048405984491612079844523910580431D-01 + pw(19)= 0.33148404854392620654285014862228099D-01 + pw(20)= 0.37863173170688566168679168381831541D-01 + pw(21)= 0.43286083546221674152033551748574709D-01 + pw(22)= 0.49522660152701866580552473522264330D-01 + pw(23)= 0.56692523854953065740425656620598762D-01 + pw(24)= 0.64931653102898776070657891627893174D-01 + pw(25)= 0.74394966967471873327666467149784361D-01 + pw(26)= 0.85259253137910454276787766290602372D-01 + pw(27)= 0.97726482395605256876361711073457773D-01 + pw(28)= 0.11202756962445724523518796251371951D+00 + pw(29)= 0.12842665975742201914453661139337216D+00 + pw(30)= 0.14722603673926230968313687924515801D+00 + pw(31)= 0.16877177710790063461389831494800560D+00 + pw(32)= 0.19346030041623174522759333491248744D+00 + pw(33)= 0.22174601068744206398277053899636196D+00 + pw(34)= 0.25415028231904386989193251104458640D+00 + pw(35)= 0.29127212894662705701804000206750184D+00 + pw(36)= 0.33380101778748297671825708760982179D+00 + pw(37)= 0.38253247505964083905642476699352139D+00 + pw(38)= 0.43838740189609651989190850364739389D+00 + pw(39)= 0.50243643541811971842045067743697266D+00 + pw(40)= 0.57593132895181935261643585397322053D+00 + pw(41)= 0.66034632680141149455034526996759720D+00 + pw(42)= 0.75743410948623930877746056875550536D+00 + pw(43)= 0.86930350487407763483137649785144665D+00 + pw(44)= 0.99853057127864209173395623916848905D+00 + pw(45)= 0.11483223403291556759761625679375804D+01 + pw(46)= 0.13227664361633706905774477225292859D+01 + pw(47)= 0.15272262858299299746420952422140745D+01 + pw(48)= 0.17689950292216856169402049903686857D+01 + pw(49)= 0.20584365667376688423615789562990280D+01 + pw(50)= 0.24111135465716154769717216299933567D+01 + pw(51)= 0.28521135641444982391089026540840319D+01 + pw(52)= 0.34259355468040848187361378226527105D+01 + pw(53)= 0.42232267265008794416077388243344182D+01 + pw(54)= 0.54752904412621487070917591943555763D+01 + pw(55)= 0.81175842524558588852537391429064074D+01 +endif +if(kn == 56) then + px( 1)= 0.25316296361047986979222704481695825D-03 + px( 2)= 0.13365901075979378376059799633963547D-02 + px( 3)= 0.32968187867088652972746128579161298D-02 + px( 4)= 0.61535889280423947494589016534393146D-02 + px( 5)= 0.99356438145931069871255487366443905D-02 + px( 6)= 0.14681977810167156277573501598213788D-01 + px( 7)= 0.20443111678193472909091734497338465D-01 + px( 8)= 0.27282717458052044439797443722558727D-01 + px( 9)= 0.35279661237909244138393562138044293D-01 + px(10)= 0.44530515113621224893066466054989604D-01 + px(11)= 0.55152581833421451048846493819641517D-01 + px(12)= 0.67287461982589797279652325494947566D-01 + px(13)= 0.81105172698463510127775615113455155D-01 + px(14)= 0.96808802192712951405093425956382214D-01 + px(15)= 0.11463966370426778674305108921559686D+00 + px(16)= 0.13488290707039924587391549901601733D+00 + px(17)= 0.15787356616502069558695735577341045D+00 + px(18)= 0.18400306948311470201893098688372094D+00 + px(19)= 0.21372631172205091315476055948506776D+00 + px(20)= 0.24756945964825417876144186941921348D+00 + px(21)= 0.28613872754179262008712244069824825D+00 + px(22)= 0.33013039508573925156714087895320715D+00 + px(23)= 0.38034235430595768392206349401494352D+00 + px(24)= 0.43768747182717257038476928803614522D+00 + px(25)= 0.50320905100551354990170632643254963D+00 + px(26)= 0.57809868566859133942855647933251305D+00 + px(27)= 0.66371681885395609603081291900385610D+00 + px(28)= 0.76161635782113158520389067408690172D+00 + px(29)= 0.87356975058892174416448481275277481D+00 + px(30)= 0.10015999992219932485646137689436440D+01 + px(31)= 0.11480161722880751230648197166792158D+01 + px(32)= 0.13154540866711925408169656894558134D+01 + px(33)= 0.15069229633419957830339656064577451D+01 + px(34)= 0.17258590324121035302982461790011709D+01 + px(35)= 0.19761872844199233411044622815807802D+01 + px(36)= 0.22623928590392104330658026022941003D+01 + px(37)= 0.25896039617088162412729887555211206D+01 + px(38)= 0.29636887519045083121995707405502705D+01 + px(39)= 0.33913694283389338916129064504517107D+01 + px(40)= 0.38803578612346352983211862453492671D+01 + px(41)= 0.44395187703404766581147350250334258D+01 + px(42)= 0.50790689079212912344545291525845662D+01 + px(43)= 0.58108244499131239038620507188960897D+01 + px(44)= 0.66485146168191270558987216454001789D+01 + px(45)= 0.76081888077726672589265324613407006D+01 + px(46)= 0.87087596930040374091019857045701857D+01 + px(47)= 0.99727503684543062151028368853863376D+01 + px(48)= 0.11427358864753300284154140246256655D+02 + px(49)= 0.13106036822460464759217266858454354D+02 + px(50)= 0.15050942893772414274408031537252686D+02 + px(51)= 0.17316976836533264840463009058352564D+02 + px(52)= 0.19978899582721245916009884455806908D+02 + px(53)= 0.23145132959066649402957025145184967D+02 + px(54)= 0.26988283646585129742188842153188599D+02 + px(55)= 0.31828066291287360025547176837209814D+02 + px(56)= 0.38461992963555384952401394913962473D+02 + pw( 1)= 0.65000398579028612344523107765589882D-03 + pw( 2)= 0.15191959531332206241117313092588598D-02 + pw( 3)= 0.24045082453807306790230685497814259D-02 + pw( 4)= 0.33138053120687689905848031561566419D-02 + pw( 5)= 0.42567825727886822265204218213429885D-02 + pw( 6)= 0.52442786039205566768969009014580993D-02 + pw( 7)= 0.62885672143814305898041140071920990D-02 + pw( 8)= 0.74037469337422375540169840316881048D-02 + pw( 9)= 0.86061856387838724696966695518776445D-02 + pw(10)= 0.99150131408851259539215604893253718D-02 + pw(11)= 0.11352651712245621514495559004367745D-01 + pw(12)= 0.12945367107597103270871461076597819D-01 + pw(13)= 0.14723816350026995714384635716386299D-01 + pw(14)= 0.16723567921281172025119870508991203D-01 + pw(15)= 0.18985579339702223096072943965050960D-01 + pw(16)= 0.21556638092713088509079450535846802D-01 + pw(17)= 0.24489800737865776493594940653820355D-01 + pw(18)= 0.27844892154300886517220254607831059D-01 + pw(19)= 0.31689141187091977270960955143508341D-01 + pw(20)= 0.36098024102012043921594821940947720D-01 + pw(21)= 0.41156366531808391430233301214909888D-01 + pw(22)= 0.46959728529751547723837911521797776D-01 + pw(23)= 0.53616077334483042468992867566071648D-01 + pw(24)= 0.61247744777900280040241146591130434D-01 + pw(25)= 0.69993670734405775857440933512557351D-01 + pw(26)= 0.80011946473114843210018552446024989D-01 + pw(27)= 0.91482687623385083454637087474649108D-01 + pw(28)= 0.10461128275712219242263696233465467D+00 + pw(29)= 0.11963207950030344698937570198730592D+00 + pw(30)= 0.13681258639855890430946376814155933D+00 + pw(31)= 0.15645828725394505928296678986188780D+00 + pw(32)= 0.17891818774495993299205732107037433D+00 + pw(33)= 0.20459124493705478925339465025762632D+00 + pw(34)= 0.23393387292296502681950858384393993D+00 + pw(35)= 0.26746877815837016930288816174815801D+00 + pw(36)= 0.30579546472966641872754674677137451D+00 + pw(37)= 0.34960287592590148622876766322786034D+00 + pw(38)= 0.39968482435244330897498051519252302D+00 + pw(39)= 0.45695914023983362115797429164759424D+00 + pw(40)= 0.52249188739719558018523893289646747D+00 + pw(41)= 0.59752864128661802876559766037133258D+00 + pw(42)= 0.68353583227497725169589493456556878D+00 + pw(43)= 0.78225676673770165676669761907089189D+00 + pw(44)= 0.89578957040518890914540096033762699D+00 + pw(45)= 0.10266987265790718339050961200134187D+01 + pw(46)= 0.11781795916828648269313771182072094D+01 + pw(47)= 0.13543092491251640404184331203718613D+01 + pw(48)= 0.15604436574176829660651085498995850D+01 + pw(49)= 0.18038746446191042006169647129755746D+01 + pw(50)= 0.20949760879837258079028998605170623D+01 + pw(51)= 0.24493410971001376006947638889013500D+01 + pw(52)= 0.28921265037563651103311910556760302D+01 + pw(53)= 0.34679810059374950896183058369361721D+01 + pw(54)= 0.42678969880522956032021408252168488D+01 + pw(55)= 0.55241361350341697933563283440847313D+01 + pw(56)= 0.81763098024027352046055737635386992D+01 +endif +if(kn == 57) then + px( 1)= 0.24868815335455860789772463829278776D-03 + px( 2)= 0.13128708050967756872359979837581267D-02 + px( 3)= 0.32378911411275754242453920014506796D-02 + px( 4)= 0.60424464389102377765134145508034116D-02 + px( 5)= 0.97537140744139523885808398942622953D-02 + px( 6)= 0.14408512776948249945597359956767419D-01 + px( 7)= 0.20054465803924087173957630667856565D-01 + px( 8)= 0.26751478412571506944557766303264093D-01 + px( 9)= 0.34573592603908669936189665703461548D-01 + px(10)= 0.43611265799596486187161513111221049D-01 + px(11)= 0.53974113832253153451922084032312655D-01 + px(12)= 0.65794147604388044446693612420778637D-01 + px(13)= 0.79229515286929499370440341287450519D-01 + px(14)= 0.94468740236461490024906772241244278D-01 + px(15)= 0.11173542497728950196695294189711635D+00 + px(16)= 0.13129338283356472153949413479135588D+00 + px(17)= 0.15345217055234222351399145423334369D+00 + px(18)= 0.17857303240016059517363555848698504D+00 + px(19)= 0.20707532489301037480311227730726369D+00 + px(20)= 0.23944355888568364571611377084388653D+00 + px(21)= 0.27623525598764641501713800314236035D+00 + px(22)= 0.31808985711331926597193028676339348D+00 + px(23)= 0.36573893965362420393418300064924317D+00 + px(24)= 0.42001800250816678509315665005653842D+00 + px(25)= 0.48188007571772045658000477338572683D+00 + px(26)= 0.55241141366947723712332135298933352D+00 + px(27)= 0.63284954422789175567060868091083275D+00 + px(28)= 0.72460397335389008193055638126809504D+00 + px(29)= 0.82927988618893103652768340546531032D+00 + px(30)= 0.94870524097232991512519332944115950D+00 + px(31)= 0.10849617219948828718866932170405878D+01 + px(32)= 0.12404201040143688064542721435857076D+01 + px(33)= 0.14177806871234317057617629235375845D+01 + px(34)= 0.16201195944991234133183611776590787D+01 + px(35)= 0.18509418957597958737177594237457576D+01 + px(36)= 0.21142427406807205051181553190404025D+01 + px(37)= 0.24145779838341929681853920899859986D+01 + px(38)= 0.27571461831926801287738694538112624D+01 + px(39)= 0.31478844142641227801141212544520951D+01 + px(40)= 0.35935811310102033845744816463442230D+01 + px(41)= 0.41020104413877221957804467425939289D+01 + px(42)= 0.46820938302391253474628239056559849D+01 + px(43)= 0.53440978451440606526835982001965101D+01 + px(44)= 0.60998800348958771477889866745643881D+01 + px(45)= 0.69632012890426281929489833882602384D+01 + px(46)= 0.79501320435284778078392913107490947D+01 + px(47)= 0.90795950558462101790938546852706470D+01 + px(48)= 0.10374113225206633587054123726355172D+02 + px(49)= 0.11860876304821553920803819021800250D+02 + px(50)= 0.13573324197835825157192895761042269D+02 + px(51)= 0.15553608911071130591094968547049722D+02 + px(52)= 0.17856644009398398334027651945364308D+02 + px(53)= 0.20557252972621646881571546529989799D+02 + px(54)= 0.23764024975150934121434317734353805D+02 + px(55)= 0.27649965226105594436613117812938989D+02 + px(56)= 0.32535770450738589606101398766605691D+02 + px(57)= 0.39221977092927801982893352173282336D+02 + pw( 1)= 0.63850406831505099738052476325581961D-03 + pw( 2)= 0.14921037986204782494894229786059731D-02 + pw( 3)= 0.23610114029305975624758426308210372D-02 + pw( 4)= 0.32526128541318438561754430100405078D-02 + pw( 5)= 0.41760397770836242676248541207424580D-02 + pw( 6)= 0.51414703399848581702756105177606384D-02 + pw( 7)= 0.61603891216042467763280301070012439D-02 + pw( 8)= 0.72459385496994110627368996655782062D-02 + pw( 9)= 0.84133204261124725154997593044037002D-02 + pw(10)= 0.96802413566072434418561861828501851D-02 + pw(11)= 0.11067394131957919015201993364016383D-01 + pw(12)= 0.12598960733898038415484720369114255D-01 + pw(13)= 0.14303116690973784728070109566032249D-01 + pw(14)= 0.16212514694032192842344075317239116D-01 + pw(15)= 0.18364731480288355645565506966903663D-01 + pw(16)= 0.20802678116001007719911702716244623D-01 + pw(17)= 0.23574997949263392825181094032890825D-01 + pw(18)= 0.26736501608313518430082020410358951D-01 + pw(19)= 0.30348704821972667356360038317766333D-01 + pw(20)= 0.34480535573107543829081934087773551D-01 + pw(21)= 0.39209262449594068155056630028607624D-01 + pw(22)= 0.44621673558809258382775767008026743D-01 + pw(23)= 0.50815515123079612189194336850658426D-01 + pw(24)= 0.57901188015516540741960287519003280D-01 + pw(25)= 0.66003700690181420273674403867109883D-01 + pw(26)= 0.75264885586780504125481386653660311D-01 + pw(27)= 0.85845899110248805481309245561278484D-01 + pw(28)= 0.97930039537017286368934956545788095D-01 + pw(29)= 0.11172593126446497910307506265827343D+00 + pw(30)= 0.12747113783580932628750351291841410D+00 + pw(31)= 0.14543628129357976189998302298577329D+00 + pw(32)= 0.16592976340385132601117576239709225D+00 + pw(33)= 0.18930320741347600799686592812399379D+00 + pw(34)= 0.21595777019635163082254656264589623D+00 + pw(35)= 0.24635151796681218166031807821910603D+00 + pw(36)= 0.28100812005137097745126867432418044D+00 + pw(37)= 0.32052720324236309080623061527301095D+00 + pw(38)= 0.36559683724134186969654741198617812D+00 + pw(39)= 0.41700881002095348086483260374822063D+00 + pw(40)= 0.47567763253365164936264867132129384D+00 + pw(41)= 0.54266463607013432598384544728159736D+00 + pw(42)= 0.61920917583183104118650977432015768D+00 + pw(43)= 0.70676996963437778493319526328106689D+00 + pw(44)= 0.80708121914722438188398403384545040D+00 + pw(45)= 0.92223080476089922545257576125601390D+00 + pw(46)= 0.10547722906765838946076024117162519D+01 + pw(47)= 0.12078902142342952742295563718452734D+01 + pw(48)= 0.13856521611695241681852077225258712D+01 + pw(49)= 0.15934078194682780100598346033527563D+01 + pw(50)= 0.18384490024798631769423787694389017D+01 + pw(51)= 0.21311608735825934018647767200886737D+01 + pw(52)= 0.24871681932345592025950623054739285D+01 + pw(53)= 0.29316978875123326489119117358318389D+01 + pw(54)= 0.35095485536224900059615141531182325D+01 + pw(55)= 0.43120561559297553662108510075135951D+01 + pw(56)= 0.55724340409598520094151224510004002D+01 + pw(57)= 0.82344141971751154156256216589492573D+01 +endif +if(kn == 58) then + px( 1)= 0.24434033394451184831332088274972614D-03 + px( 2)= 0.12898297855758084794990735012824795D-02 + px( 3)= 0.31806717757901164181366233077269143D-02 + px( 4)= 0.59345896357739671411939250656689172D-02 + px( 5)= 0.95773007935647589740131510133326933D-02 + px( 6)= 0.14143599916031750336288464659818307D-01 + px( 7)= 0.19678421471099466334328004427638463D-01 + px( 8)= 0.26238185206866063929609421458766724D-01 + px( 9)= 0.33892482730395244265993385655932016D-01 + px(10)= 0.42726147636423146495787112568669502D-01 + px(11)= 0.52841746547136186264664886791106024D-01 + px(12)= 0.64362519550464307152961734809637400D-01 + px(13)= 0.77435783944099717432445083945677444D-01 + px(14)= 0.92236796334736021064525541096205779D-01 + px(15)= 0.10897304960833762497368263739776640D+00 + px(16)= 0.12788897058127535161532867316020021D+00 + px(17)= 0.14927098966869470910429970787632708D+00 + px(18)= 0.17345298118495822795560109857844673D+00 + px(19)= 0.20082212068477187733619503336787526D+00 + px(20)= 0.23182526492157509727427115110610957D+00 + px(21)= 0.26697601677385238610408934208835716D+00 + px(22)= 0.30686267976111180750915583977482233D+00 + px(23)= 0.35215732947079012938113786105805915D+00 + px(24)= 0.40362623554087235036623549904310772D+00 + px(25)= 0.46214186627722416843651050017300999D+00 + px(26)= 0.52869670779772405928275015906498196D+00 + px(27)= 0.60441913758500942132872353014906915D+00 + px(28)= 0.69059161190173186977923080505129435D+00 + px(29)= 0.78867145852021566336079167775472318D+00 + px(30)= 0.90031461037842010940804732183197484D+00 + px(31)= 0.10274026720013124888351576363117054D+01 + px(32)= 0.11720737797358867139870870772361403D+01 + px(33)= 0.13367578014779413237552856365102921D+01 + px(34)= 0.15242165260123579138727083350791305D+01 + px(35)= 0.17375896232753198202855353912673167D+01 + px(36)= 0.19804473251817493601350778629165516D+01 + px(37)= 0.22568509975250307614430520806868566D+01 + px(38)= 0.25714230695306034625372310347575688D+01 + px(39)= 0.29294281925088078526353597436284922D+01 + px(40)= 0.33368680628505624786084197643705525D+01 + px(41)= 0.38005931431891197468681483744664603D+01 + px(42)= 0.43284356654318541562597981524559705D+01 + px(43)= 0.49293699827199698643746537542247355D+01 + px(44)= 0.56137088447382837752405108085311518D+01 + px(45)= 0.63933479763797142778167198658680473D+01 + px(46)= 0.72820772388702724076103924229296585D+01 + px(47)= 0.82959860217191960273479806666773572D+01 + px(48)= 0.94540058246887833757920831704609262D+01 + px(49)= 0.10778658868884060967669218732823142D+02 + px(50)= 0.12297127123681394967235226888868496D+02 + px(51)= 0.14042840296906789602475373898648457D+02 + px(52)= 0.16057946341424185599637733181299734D+02 + px(53)= 0.18397376144609008660129440553886667D+02 + px(54)= 0.21136019829441359432294268771943368D+02 + px(55)= 0.24382637830465314053970048093781607D+02 + px(56)= 0.28310635883229105824662724848457888D+02 + px(57)= 0.33241688156491882652244516947157465D+02 + px(58)= 0.39979324854730350560380760310991567D+02 + pw( 1)= 0.62733109048409588703595816867276199D-03 + pw( 2)= 0.14657936036557848053358560746138226D-02 + pw( 3)= 0.23188039412803108240913072658971970D-02 + pw( 4)= 0.31933033503497732665912080929284740D-02 + pw( 5)= 0.40979006451946182384357721499095629D-02 + pw( 6)= 0.50421643688026695837303498584333168D-02 + pw( 7)= 0.60368540232012649771110859344923619D-02 + pw( 8)= 0.70942376382161779284891095459836907D-02 + pw( 9)= 0.82284548942576416691892667668446138D-02 + pw(10)= 0.94559203825706345816982076964964040D-02 + pw(11)= 0.10795760712764688238988523701079555D-01 + pw(12)= 0.12270273749594030379848145803872545D-01 + pw(13)= 0.13905392767905254997074541953428596D-01 + pw(14)= 0.15731135789435791324224040917880572D-01 + pw(15)= 0.17782024053941809054144110879593548D-01 + pw(16)= 0.20097465728382493095345225347477278D-01 + pw(17)= 0.22722120795621834323838288686748639D-01 + pw(18)= 0.25706285556264310174964040630928441D-01 + pw(19)= 0.29106352315788903005352971189456581D-01 + pw(20)= 0.32985404610440883148326461811537831D-01 + pw(21)= 0.37413999049710482757623335110546207D-01 + pw(22)= 0.42471166374810272031277917315265662D-01 + pw(23)= 0.48245645122530704885901911499992297D-01 + pw(24)= 0.54837348649234653699497897577026759D-01 + pw(25)= 0.62359063229977042722185054531332040D-01 + pw(26)= 0.70938380290334218998361325437647613D-01 + pw(27)= 0.80719876243549163356586986213036934D-01 + pw(28)= 0.91867565718926819665093909060870324D-01 + pw(29)= 0.10456766632576442199326583277715739D+00 + pw(30)= 0.11903172509910440103985964305294291D+00 + pw(31)= 0.13550016907993941758980848856857065D+00 + pw(32)= 0.15424635639536902570179058036488656D+00 + pw(33)= 0.17558122145590420153365262455970893D+00 + pw(34)= 0.19985863067967063227542616526431877D+00 + pw(35)= 0.22748159641691842637396351126311924D+00 + pw(36)= 0.25890954059680923306019616384762021D+00 + pw(37)= 0.29466686202428234212246962384893466D+00 + pw(38)= 0.33535315105334115324523894344163479D+00 + pw(39)= 0.38165552584862205474730474843490408D+00 + pw(40)= 0.43436375602075320325783351993460999D+00 + pw(41)= 0.49438912403001643077208212506747590D+00 + pw(42)= 0.56278840340808053436660241948913705D+00 + pw(43)= 0.64079498868817746739885701660482042D+00 + pw(44)= 0.72986023375748618090030528388862540D+00 + pw(45)= 0.83170968157393847753518524949795654D+00 + pw(46)= 0.94842152201009860199494152865552426D+00 + pw(47)= 0.10825390752513919957077425880259437D+01 + pw(48)= 0.12372368612987946539691608858506276D+01 + pw(49)= 0.14165738915071239610049524678978730D+01 + pw(50)= 0.16258946038408501735019290717589660D+01 + pw(51)= 0.18724918616166796018089411475069718D+01 + pw(52)= 0.21667631278657422335640585222239716D+01 + pw(53)= 0.25243656135025940850141001255242091D+01 + pw(54)= 0.29705966338517197699939016107637128D+01 + pw(55)= 0.35504038802739003258026682341550974D+01 + pw(56)= 0.43554634619204172726228503521716960D+01 + pw(57)= 0.56199288682781632030749398177962085D+01 + pw(58)= 0.82915991164603216241594990786246181D+01 +endif +if(kn == 59) then + px( 1)= 0.24017044279779292814629839617243450D-03 + px( 2)= 0.12677357311318245204340430050679257D-02 + px( 3)= 0.31258222629100185940076720172210997D-02 + px( 4)= 0.58312499497336295459920293987796347D-02 + px( 5)= 0.94083841957770971773603088185375410D-02 + px( 6)= 0.13890148671899779318807930205580027D-01 + px( 7)= 0.19318997801708323654202530335822085D-01 + px( 8)= 0.25748144263471581820366574897844328D-01 + px( 9)= 0.33243095406506599413902998194831696D-01 + px(10)= 0.41883537206821979634762367168491752D-01 + px(11)= 0.51765600230040784734413248495245234D-01 + px(12)= 0.63004534953194280528857501029401263D-01 + px(13)= 0.75737811688489614503254126944665394D-01 + px(14)= 0.90128644081599828866050645306435530D-01 + px(15)= 0.10636991817245456656640674998882425D+00 + px(16)= 0.12468849734796975917708256298381635D+00 + px(17)= 0.14534987449428730815870543082646967D+00 + px(18)= 0.16866316194781867947963273417414352D+00 + px(19)= 0.19498644807014926777480289949553731D+00 + px(20)= 0.22473260025151679743882657221256944D+00 + px(21)= 0.25837564665252656619516749971741362D+00 + px(22)= 0.29645791158221640514428954276611043D+00 + px(23)= 0.33959810551825596456611237045554791D+00 + px(24)= 0.38850058094912455808801515111809716D+00 + px(25)= 0.44396596557949377138507728624091020D+00 + px(26)= 0.50690338304849533824426898328468191D+00 + px(27)= 0.57834447492359249583873987633512907D+00 + px(28)= 0.65945945037149605268093152551109157D+00 + px(29)= 0.75157541311753698549281538653947915D+00 + px(30)= 0.85619724923675556880411715666490139D+00 + px(31)= 0.97503140385009140556694003085244755D+00 + px(32)= 0.11100129303358947930559321811557219D+01 + px(33)= 0.12633362636366230834317390982154572D+01 + px(34)= 0.14374902524376435821063430688457459D+01 + px(35)= 0.16352980880451859683554892839580029D+01 + px(36)= 0.18599628978969427131632789868222316D+01 + px(37)= 0.21151199396574527695286927569279424D+01 + px(38)= 0.24048965537486799801854034104807364D+01 + px(39)= 0.27339813311614543030519151016136162D+01 + px(40)= 0.31077043634013766751037593199231006D+01 + px(41)= 0.35321310127538594381872851430342547D+01 + px(42)= 0.40141724497033117837380156117688880D+01 + px(43)= 0.45617173671065324231220714157123560D+01 + px(44)= 0.51837909802502262421170872012101186D+01 + px(45)= 0.58907499495757681122761284245660513D+01 + px(46)= 0.66945256944244027786731357744463874D+01 + px(47)= 0.76089345000234897625689957165319158D+01 + px(48)= 0.86500822381865882984046085645261005D+01 + px(49)= 0.98369069052844445818188741380057179D+01 + px(50)= 0.11191928172141132463081500916511436D+02 + px(51)= 0.12742318869317979750127012420299866D+02 + px(52)= 0.14521497812382557030759980660881216D+02 + px(53)= 0.16571609002403235432729979108495388D+02 + px(54)= 0.18947601696583022289134197021834890D+02 + px(55)= 0.21724434819141199485567406823338980D+02 + px(56)= 0.25011043569653595241704228425344735D+02 + px(57)= 0.28981239539754668238717062448027987D+02 + px(58)= 0.33957682861442574052880069059391550D+02 + px(59)= 0.40746915059614258028591599305878563D+02 + pw( 1)= 0.61661580843002818474199774713820435D-03 + pw( 2)= 0.14405704070514471179034636856714158D-02 + pw( 3)= 0.22783669060606481294489644369246025D-02 + pw( 4)= 0.31365358110962922351769249715961996D-02 + pw( 5)= 0.40232040175153947001508790877931979D-02 + pw( 6)= 0.49473803193663214737587320055863858D-02 + pw( 7)= 0.59191612038299517714653169073172258D-02 + pw( 8)= 0.69500182956008661963671339567494311D-02 + pw( 9)= 0.80531272876872073780988243944927837D-02 + pw(10)= 0.92437335233207542919233267694815444D-02 + pw(11)= 0.10539549268603537847804050413706850D-01 + pw(12)= 0.11961173126551657822377551429583253D-01 + pw(13)= 0.13532517058206043238763114996607351D-01 + pw(14)= 0.15281223576152454708908655408040014D-01 + pw(15)= 0.17239057603729693052163344051819092D-01 + pw(16)= 0.19442266458961476989325558184455495D-01 + pw(17)= 0.21931917469546401944382556564398834D-01 + pw(18)= 0.24754242433746641427431268228835038D-01 + pw(19)= 0.27961035103797151420897993567315053D-01 + pw(20)= 0.31610155514745908523448651363558848D-01 + pw(21)= 0.35766190345276654301560678957468317D-01 + pw(22)= 0.40501304167252803397704165063681030D-01 + pw(23)= 0.45896299064297235092911388363815978D-01 + pw(24)= 0.52041886574768681501627601462919698D-01 + pw(25)= 0.59040170127350183777203679746281117D-01 + pw(26)= 0.67006338167425270345951219198202241D-01 + pw(27)= 0.76070575665505304542628473598135760D-01 + pw(28)= 0.86380211831507007368019890251542642D-01 + pw(29)= 0.98102132636666032695755579145966437D-01 + pw(30)= 0.11142549737101106673605436812360518D+00 + pw(31)= 0.12656480912918894407887994146661412D+00 + pw(32)= 0.14376340063800550321586640229070301D+00 + pw(33)= 0.16329741041010875411766070944591531D+00 + pw(34)= 0.18548034139204283612129505343431894D+00 + pw(35)= 0.21066831722133520869183148142544999D+00 + pw(36)= 0.23926618297652114852711313447455029D+00 + pw(37)= 0.27173464220061857023595757633130234D+00 + pw(38)= 0.30859868602838502541227655874922030D+00 + pw(39)= 0.35045766213824028933896241045941680D+00 + pw(40)= 0.39799746405898883695366965337609207D+00 + pw(41)= 0.45200551538659797325171404962194440D+00 + pw(42)= 0.51338951072222608042425293298227461D+00 + pw(43)= 0.58320130681121704145811176462102830D+00 + pw(44)= 0.66266801682533587367291708503855157D+00 + pw(45)= 0.75323338727944523148159051268884834D+00 + pw(46)= 0.85661417022141803759749168951905898D+00 + pw(47)= 0.97487886818484736512715176832767718D+00 + pw(48)= 0.11105607080548591676445817081025167D+01 + pw(49)= 0.12668244926674778382406916781590943D+01 + pw(50)= 0.14477211050240886139248358867709201D+01 + pw(51)= 0.16585903209293941982874152551002654D+01 + pw(52)= 0.19067267819120553533052269878705101D+01 + pw(53)= 0.22025411213234530205031685493395576D+01 + pw(54)= 0.25617240229328119031730477599209231D+01 + pw(55)= 0.30096444005352699081587070335073080D+01 + pw(56)= 0.35914005587611010508966597641663763D+01 + pw(57)= 0.43990110029151586972435889347654638D+01 + pw(58)= 0.56675740145564914236518411943414860D+01 + pw(59)= 0.83489730609712306624049954354151761D+01 +endif +end subroutine wts500 +end MODULE WTS500_MOD diff --git a/src/trans/cpu/external/dir_trans.F90 b/src/trans/cpu/external/dir_trans.F90 new file mode 100644 index 0000000..518e0e7 --- /dev/null +++ b/src/trans/cpu/external/dir_trans.F90 @@ -0,0 +1,508 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& LDLATLON,KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *DIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! LDLATLON - indicating if regular lat-lon is the input data +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, & + & NF_SC2, NF_SC3A, NF_SC3B, & + & NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIR_TRANS_CTL_MOD ,ONLY : DIR_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDLATLON + +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DIR_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(1808,0) +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +! This is for use in TRGTOL which is shared with adjoint inverse transform +LSCDERS=.FALSE. +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. +LATLON=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDLATLON)) THEN + LATLON = LDLATLON +ENDIF + +! Compute derived variables + + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANS : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1808,1) + +! ------------------------------------------------------------------ + +CALL DIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('DIR_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE DIR_TRANS + diff --git a/src/trans/cpu/external/dir_transad.F90 b/src/trans/cpu/external/dir_transad.F90 new file mode 100644 index 0000000..6815ca2 --- /dev/null +++ b/src/trans/cpu/external/dir_transad.F90 @@ -0,0 +1,506 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *DIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL DIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split +! +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIR_TRANS_CTLAD - control routine +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NGPBLKS, NF_SC2, NF_SC3A, NF_SC3B, NPROMA +USE TPM_DISTR ,ONLY : D, MYSETV, NPRTRV + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIR_TRANS_CTLAD_MOD ,ONLY : DIR_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DIR_TRANSAD',0,ZHOOK_HANDLE) + +CALL GSTATS(1810,0) +! Set current resolution + +CALL SET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +LSCDERS=.FALSE. ! This is for use in TRLTOG which is shared with inverse transform +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETUV CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1810,1) + +! Perform transform + +CALL DIR_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('DIR_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE DIR_TRANSAD + + diff --git a/src/trans/cpu/external/dist_grid.F90 b/src/trans/cpu/external/dist_grid.F90 new file mode 100644 index 0000000..5926e19 --- /dev/null +++ b/src/trans/cpu/external/dist_grid.F90 @@ -0,0 +1,147 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *DIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! KSORT (:) - Re-order fields on output +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! P.Marguinaud : 10-10-14 Add KSORT + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('DIST_GRID',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_GRID: KFROM TOO SHORT!') +ENDIF +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_GRID:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_GRID:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'DIST_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'DIST_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('DIST_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'DIST_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('DIST_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('DIST_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (UBOUND (KSORT, 1) /= UBOUND (PGP, 2)) THEN + CALL ABORT_TRANS('DIST_GRID: DIMENSION MISMATCH KSORT, PGP') + ENDIF +ENDIF + +CALL DIST_GRID_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP,KSORT) + +IF (LHOOK) CALL DR_HOOK('DIST_GRID',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID + diff --git a/src/trans/cpu/external/dist_grid_32.F90 b/src/trans/cpu/external/dist_grid_32.F90 new file mode 100644 index 0000000..fab750c --- /dev/null +++ b/src/trans/cpu/external/dist_grid_32.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIST_GRID_32(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP) + +!**** *DIST_GRID_32* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_32(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_GRID_32_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN +USE TPM_DIM +USE TPM_DISTR + +USE SET_RESOL_MOD +USE DIST_GRID_32_CTL_MOD +USE ABORT_TRANS_MOD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRM) , INTENT(OUT) :: PGP(:,:,:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('DIST_GRID_32',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_GRID_32: KFROM TOO SHORT!') +ENDIF + +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_GRID_32:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_GRID_32:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'DIST_GRID_32:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('DIST_GRID_32:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'DIST_GRID_32:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('DIST_GRID_32:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'DIST_GRID_32:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('DIST_GRID_32:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('DIST_GRID_32:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('DIST_GRID_32:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('DIST_GRID_32:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + + +CALL DIST_GRID_32_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP) + +IF (LHOOK) CALL DR_HOOK('DIST_GRID_32',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_32 + diff --git a/src/trans/cpu/external/dist_spec.F90 b/src/trans/cpu/external/dist_spec.F90 new file mode 100644 index 0000000..1ff3610 --- /dev/null +++ b/src/trans/cpu/external/dist_spec.F90 @@ -0,0 +1,214 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSMAX,KSORT) + +!**** *DIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL DIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! KSORT (:) - Re-order fields on output +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! P.Marguinaud : 10-10-14 Add KSORT + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIST_SPEC_CONTROL_MOD ,ONLY : DIST_SPEC_CONTROL +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFDISTG) +INTEGER(KIND=JPIM) :: IFSEND,IFRECV,J,IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G, ISPEC2MX +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('DIST_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_SPEC: KFROM TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +ALLOCATE(IALLMS(ISMAX+1)) +ALLOCATE(IKN(0:ISMAX)) +IF(ISMAX /= R%NSMAX) THEN + CALL SUWAVEDI(ISMAX,ISMAX,NPRTRW,MYSETW,KPOSSP=IPOSSP,KSPEC2=ISPEC2,& + & KDIM0G=IDIM0G,KSPEC2MX=ISPEC2MX,KUMPP=IUMPP,KALLMS=IALLMS,KPTRMS=IPTRMS) + ISPEC2_G = (ISMAX+1)*(ISMAX+2) +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) + ISPEC2MX = D%NSPEC2MX + IUMPP(:) = D%NUMPP(:) + IALLMS(:) = D%NALLMS(:) + IPTRMS(:) = D%NPTRMS(:) +ENDIF +DO J=0,ISMAX + IKN(J)=2*(ISMAX+1-J) +ENDDO +IFSEND = 0 +IFRECV = 0 + +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_SPEC:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_SPEC:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('DIST_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFSEND) THEN + WRITE(NERR,*) 'DIST_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFSEND + CALL ABORT_TRANS('DIST_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'DIST_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('DIST_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFDISTG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'DIST_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('DIST_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFRECV = IFRECV+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFDISTG) +ELSE + IFRECV = KFDISTG + IVSET(:) = MYSETV +ENDIF + +IF(IFRECV > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('DIST_SPEC: FIELDS TO RECEIVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFRECV) THEN + CALL ABORT_TRANS('DIST_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('DIST_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (.NOT. PRESENT (PSPEC)) THEN + CALL ABORT_TRANS('DIST_SPEC: KSORT REQUIRES PSPEC') + ENDIF + IF (UBOUND (KSORT, 1) /= UBOUND (PSPEC, IFLD)) THEN + CALL ABORT_TRANS('DIST_SPEC: DIMENSION MISMATCH KSORT, PSPEC') + ENDIF +ENDIF + +CALL DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2MX,ISPEC2_G,IPOSSP,IDIM0G,IUMPP,IALLMS,IPTRMS,IKN,KSORT) + +DEALLOCATE(IDIM0G) + +IF (LHOOK) CALL DR_HOOK('DIST_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_SPEC + diff --git a/src/trans/cpu/external/gath_grid.F90 b/src/trans/cpu/external/gath_grid.F90 new file mode 100644 index 0000000..d51564d --- /dev/null +++ b/src/trans/cpu/external/gath_grid.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *GATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL GATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('GATH_GRID',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) + +IF (LHOOK) CALL DR_HOOK('GATH_GRID',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID + diff --git a/src/trans/cpu/external/gath_grid_32.F90 b/src/trans/cpu/external/gath_grid_32.F90 new file mode 100644 index 0000000..5f2b232 --- /dev/null +++ b/src/trans/cpu/external/gath_grid_32.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GATH_GRID_32(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *GATH_GRID_32* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_32(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_32_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPRM + +!ifndef INTERFACE + +USE TPM_GEN +USE TPM_DIM +USE TPM_DISTR + +USE SET_RESOL_MOD +USE GATH_GRID_32_CTL_MOD +USE ABORT_TRANS_MOD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRM) , INTENT(IN) :: PGP(:,:,:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('GATH_GRID_32',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID_32: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID_32:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID_32:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID_32:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID_32:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID_32:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID_32:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID_32:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID_32:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID_32:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID_32:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID_32:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_32_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) + +IF (LHOOK) CALL DR_HOOK('GATH_GRID_32',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_32 + diff --git a/src/trans/cpu/external/gath_spec.F90 b/src/trans/cpu/external/gath_spec.F90 new file mode 100644 index 0000000..4f2ff2a --- /dev/null +++ b/src/trans/cpu/external/gath_spec.F90 @@ -0,0 +1,194 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LDZA0IP) + +!**** *GATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero imaginary part of first coefficients +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Modified 03-09-30 Y. Seity, bug correction IFSEND=0 +! Modified 13-10-10 P. Marguinaud add LDZA0IP option +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE GATH_SPEC_CONTROL_MOD ,ONLY : GATH_SPEC_CONTROL +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFGATHG) +INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J +INTEGER(KIND=JPIM) :: IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('GATH_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_SPEC: KTO TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +IF(ISMAX /= R%NSMAX) THEN + CALL SUWAVEDI(ISMAX,ISMAX,NPRTRW,MYSETW,KPOSSP=IPOSSP,KSPEC2=ISPEC2,& + & KDIM0G=IDIM0G) + ISPEC2_G = (ISMAX+1)*(ISMAX+2) +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) +ENDIF + +IFSEND = 0 +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_SPEC:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_SPEC:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('GATH_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFRECV) THEN + WRITE(NERR,*) 'GATH_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFRECV + CALL ABORT_TRANS('GATH_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'GATH_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('GATH_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFGATHG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'GATH_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('GATH_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFSEND = IFSEND+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFGATHG) +ELSEIF(NPRTRV > 1) THEN + WRITE(NERR,*) 'GATH_SPEC:KVSET MISSING, NPRTRV ',NPRTRV + CALL ABORT_TRANS('GATH_SPEC:KVSET MISSING, NPRTRV > 1') +ELSE + IFSEND = KFGATHG + IVSET(:) = 1 +ENDIF + +IF(IFSEND > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('GATH_SPEC: FIELDS TO RECIEVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFSEND) THEN + CALL ABORT_TRANS('GATH_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('GATH_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,LDZA0IP) +DEALLOCATE(IDIM0G) + +IF (LHOOK) CALL DR_HOOK('GATH_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_SPEC + diff --git a/src/trans/cpu/external/get_current.F90 b/src/trans/cpu/external/get_current.F90 new file mode 100644 index 0000000..85d94b2 --- /dev/null +++ b/src/trans/cpu/external/get_current.F90 @@ -0,0 +1,67 @@ +! (C) Copyright 2012- Meteo-France. +! (C) Copyright 2012- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GET_CURRENT(KRESOL,LDLAM) + +!**** *GET_CURRENT* - Extract current information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting current information from the T.P. + +!** Interface. +! ---------- +! CALL GET_CURRENT(...) + +! Explicit arguments : (all optional) +! -------------------- +! KRESOL - Current resolution +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Ryad El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 24-Aug-2012 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE TPM_GEN, ONLY : NCUR_RESOL +USE TPM_GEOMETRY, ONLY : G + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM + +!ifndef INTERFACE + +! Get current resolution +IF (PRESENT(KRESOL)) KRESOL= NCUR_RESOL +IF (PRESENT(LDLAM)) LDLAM = G%LAM + + +!endif INTERFACE + +END SUBROUTINE GET_CURRENT diff --git a/src/trans/cpu/external/gpnorm_trans.F90 b/src/trans/cpu/external/gpnorm_trans.F90 new file mode 100644 index 0000000..7402b47 --- /dev/null +++ b/src/trans/cpu/external/gpnorm_trans.F90 @@ -0,0 +1,99 @@ +! (C) Copyright 2008- ECMWF. +! (C) Copyright 2008- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *GPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms + +!** Interface. +! ---------- +! CALL GPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 +! R. El Khatib 07-08-2009 Optimisation directive for NEC +! R. El Khatib 16-Sep-2019 merge with LAM code +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_FIELDS ,ONLY : F +USE TPM_DIM ,ONLY : R +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE GPNORM_TRANS_CTL_MOD, ONLY : GPNORM_TRANS_CTL + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL ,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KRESOL + +!ifndef INTERFACE + +! Local variables +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS',0,ZHOOK_HANDLE) + +! Set current resolution +CALL SET_RESOL(KRESOL) + +CALL GPNORM_TRANS_CTL(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,F%RW(1:R%NDGL)) + +IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE GPNORM_TRANS diff --git a/src/trans/cpu/external/ini_spec_dist.F90 b/src/trans/cpu/external/ini_spec_dist.F90 new file mode 100644 index 0000000..d6480ea --- /dev/null +++ b/src/trans/cpu/external/ini_spec_dist.F90 @@ -0,0 +1,90 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS) + + +!**** *INI_SPEC_DIST* - Initialize spectral wave distribution + +! Purpose. +! -------- +! Initialize arrays controlling spectral wave distribution + +!** Interface. +! ---------- +! CALL INI_SPEC_DIST(...) + +! Explicit arguments : +! -------------------- +! KSMAX - spectral truncation required +! KTMAX - Overtruncation for KSMAX (input) +! KPRTRW - Number of processors in A-direction (input) +! KMYSETW - A-set for present processor (input) +! KASM0 - Offsets for spectral waves (output) +! KSPOLEGL - Local version of NSPOLEG (output) +! KPROCM - Where a certain spectral wave belongs (output) +! KUMPP - Number of spectral waves on this PE (output) +! KSPEC - Local version on NSPEC (output) +! KSPEC2 - Local version on NSPEC2 (output) +! KSPEC2MX - Maximum KSPEC2 across PEs (output) +! KPOSSP - Global spectral fields partitioning (output) +! KMYMS - This PEs spectral zonal wavenumbers (output) +! Implicit arguments : NONE +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. SUWAVEDI +! ---------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +!ifndef INTERFACE +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) + +!ifndef INTERFACE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('INI_SPEC_DIST',0,ZHOOK_HANDLE) + +CALL SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS) + +IF (LHOOK) CALL DR_HOOK('INI_SPEC_DIST',1,ZHOOK_HANDLE) + +!endif INTERFACE + +END SUBROUTINE INI_SPEC_DIST diff --git a/src/trans/cpu/external/inv_trans.F90 b/src/trans/cpu/external/inv_trans.F90 new file mode 100644 index 0000000..106cc8d --- /dev/null +++ b/src/trans/cpu/external/inv_trans.F90 @@ -0,0 +1,621 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE INV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,LDLATLON,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! LDLATLON - indicating if regular lat-lon output requested +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE INV_TRANS_CTL_MOD ,ONLY : INV_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +LOGICAL ,OPTIONAL, INTENT(IN) :: LDLATLON +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('INV_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(1807,0) +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +LATLON =.FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF(.NOT. PRESENT(PSPSCALAR) ) THEN + CALL ABORT_TRANS('INV_TRANS : KVSETSC PRESENT BUT PSPSCALAR MISSING') + ENDIF + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G + IF_SC2_G + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + &('INV_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS_G > 0 ) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +IF(PRESENT(LDLATLON)) THEN + LATLON = LDLATLON +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPDIV TOO SHORT') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANS:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1807,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL INV_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('INV_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE INV_TRANS + diff --git a/src/trans/cpu/external/inv_transad.F90 b/src/trans/cpu/external/inv_transad.F90 new file mode 100644 index 0000000..b552dd0 --- /dev/null +++ b/src/trans/cpu/external/inv_transad.F90 @@ -0,0 +1,619 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE INV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL INV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. +! +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTLAD - control of Legendre transform +! FTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, & + & NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE INV_TRANS_CTLAD_MOD ,ONLY : INV_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('INV_TRANSAD',0,ZHOOK_HANDLE) +CALL GSTATS(1809,0) +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + &('INV_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + + + +! Compute derived variables + + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS("INV_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING") + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS("INV_TRANSAD : PSPVOR TOO SHORT") + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS("INV_TRANSAD : IF_UV > 0 BUT PSPDIV MISSING") + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS("INV_TRANSAD : PSPDIV TOO SHORT") + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANSAD:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1809,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL INV_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('INV_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE INV_TRANSAD + diff --git a/src/trans/cpu/external/setup_trans.F90 b/src/trans/cpu/external/setup_trans.F90 new file mode 100644 index 0000000..557ffb0 --- /dev/null +++ b/src/trans/cpu/external/setup_trans.F90 @@ -0,0 +1,430 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& +&KFLEV,KTMAX,KRESOL,PWEIGHT,LDGRIDONLY,LDUSERPNM,LDKEEPRPNM,LDUSEFLT,& +&LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,& +&LDLL,LDSHIFTLL,CDIO_LEGPOL,CDLEGPOLFNAME,KLEGPOLPTR,KLEGPOLPTR_LEN) + +!**** *SETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL SETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KDLON - number of points on each Gaussian latitude [2*KDGL] +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! PWEIGHT - the weight per grid-point (for a weighted distribution) +! LDGRIDONLY - true if only grid space is required + +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space + +! LDSPLIT describe the distribution among processors of grid-point data and +! has no relevance if you are using a single processor + +! PSTRET - stretching factor - for the case the Legendre polynomials are +! computed on the stretched sphere - works with LSOUTHPNM +! LDUSEFLT - use Fast Legandre Transform (Butterfly algorithm) +! LDUSERPNM - Use Belusov algorithm to compute legendre pol. (else new alg.) +! LDKEEPRPNM - Keep Legendre Polynomials (only applicable when using +! FLT, otherwise always kept) +! LDPNMONLY - Compute the Legendre polynomials only, not the FFTs. +! LDUSEFFTW - Use FFTW for FFTs +! LDLL - Setup second set of input/output latitudes +! the number of input/output latitudes to transform is equal KDGL +! or KDGL+2 in the case that includes poles + equator +! the number of input/output longitudes to transform is 2*KDGL +! LDSHIFTLL - Shift output lon/lat data by 0.5*dx and 0.5*dy +! CDIO_LEGPOL - IO option on Legendre polinomials : N.B. Only works for NPROC=1 +! Options: +! 'READF' - read Leg.Pol. from file CDLEGPOLFNAME +! 'WRITEF' - write Leg.Pol. to file CDLEGPOLFNAME +! 'MEMBUF' - Leg. Pol provided in shared memory segment pointed to by KLEGPOLPTR of +! length KLEGPOLPTR_LEN +! CDLEGPOLFNAME - file name for Leg.Pol. IO +! KLEGPOLPTR - pointer to Legendre polynomials memory segment +! KLEGPOLPTR_LEN - length of Legendre polynomials memory segment + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- SETUP_DIMS - setup distribution independent dimensions +! SUMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! SUMP_TRANS - Second part of setup of distributed environment +! SUFFT - setup for FFT +! SHAREDMEM_CREATE - create memory buffer for Leg.pol. + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Daan Degrauwe : Mar 2012 E'-zone dimensions +! R. El Khatib 09-Aug-2012 %LAM in GEOM_TYPE +! R. El Khatib 14-Jun-2013 PSTRET, LDPNMONLY, LENABLED +! G. Mozdzynski : Oct 2014 Support f +! N. Wedi : Apr 2015 Support dual set of lat/lon +! G. Mozdzynski : Jun 2015 Support alternative FFTs to FFTW +! M.Hamrud/W.Deconinck : July 2015 IO options for Legenndre polynomials +! R. El Khatib 07-Mar-2016 Better flexibility for Legendre polynomials computation in stretched mode +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT,C_ASSOCIATED,C_SIZE_T + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NDEF_RESOL, & + & NMAX_RESOL, NPRINTLEV, LENABLED, NERR +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL,NPROC +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +USE TPM_FLT +USE TPM_CTL + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE SETUP_DIMS_MOD ,ONLY : SETUP_DIMS +USE SUMP_TRANS_MOD ,ONLY : SUMP_TRANS +USE SUMP_TRANS_PRELEG_MOD ,ONLY : SUMP_TRANS_PRELEG +USE SULEG_MOD ,ONLY : SULEG +USE PRE_SULEG_MOD ,ONLY : PRE_SULEG +USE SUFFT_MOD ,ONLY : SUFFT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SHAREDMEM_MOD ,ONLY : SHAREDMEM_CREATE +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KSMAX,KDGL +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KDLON +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT):: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSTRET +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KFLEV +LOGICAL ,OPTIONAL,INTENT(IN):: LDGRIDONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFLT +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSERPNM +LOGICAL ,OPTIONAL,INTENT(IN):: LDKEEPRPNM +LOGICAL ,OPTIONAL,INTENT(IN):: LDSPSETUPONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDPNMONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFFTW +LOGICAL ,OPTIONAL,INTENT(IN):: LDLL +LOGICAL ,OPTIONAL,INTENT(IN):: LDSHIFTLL +CHARACTER(LEN=*),OPTIONAL,INTENT(IN):: CDIO_LEGPOL +CHARACTER(LEN=*),OPTIONAL,INTENT(IN):: CDLEGPOLFNAME +TYPE(C_PTR) ,OPTIONAL,INTENT(IN) :: KLEGPOLPTR +INTEGER(C_SIZE_T) ,OPTIONAL,INTENT(IN) :: KLEGPOLPTR_LEN + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: JGL,JRES,IDEF_RESOL + +LOGICAL :: LLP1,LLP2, LLSPSETUPONLY +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +#include "user_clock.intfb.h" +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SETUP_TRANS',0,ZHOOK_HANDLE) + +IF(MSETUP0 == 0) THEN + CALL ABORT_TRANS('SETUP_TRANS: SETUP_TRANS0 HAS TO BE CALLED BEFORE SETUP_TRANS') +ENDIF +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS ===' + +! Allocate resolution dependent structures +IF(.NOT. ALLOCATED(DIM_RESOL)) THEN + IDEF_RESOL = 1 + ALLOCATE(DIM_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(GEOM_RESOL(NMAX_RESOL)) + ALLOCATE(DISTR_RESOL(NMAX_RESOL)) + ALLOCATE(FFT_RESOL(NMAX_RESOL)) + ALLOCATE(FFTB_RESOL(NMAX_RESOL)) +#ifdef WITH_FFTW + ALLOCATE(FFTW_RESOL(NMAX_RESOL)) +#endif + ALLOCATE(FLT_RESOL(NMAX_RESOL)) + ALLOCATE(CTL_RESOL(NMAX_RESOL)) + GEOM_RESOL(:)%LAM=.FALSE. + ALLOCATE(LENABLED(NMAX_RESOL)) + LENABLED(:)=.FALSE. +ELSE + IDEF_RESOL = NMAX_RESOL+1 + DO JRES=1,NMAX_RESOL + IF(.NOT.LENABLED(JRES)) THEN + IDEF_RESOL = JRES + EXIT + ENDIF + ENDDO + IF(IDEF_RESOL > NMAX_RESOL) THEN + CALL ABORT_TRANS('SETUP_TRANS:IDEF_RESOL > NMAX_RESOL') + ENDIF +ENDIF + +IF (PRESENT(KRESOL)) THEN + KRESOL=IDEF_RESOL +ENDIF + +! Point at structures due to be initialized +CALL SET_RESOL(IDEF_RESOL,LDSETUP=.TRUE.) + +IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL + + + +! Defaults for optional arguments + + +G%LREDUCED_GRID = .FALSE. +G%RSTRET=1.0_JPRB +D%LGRIDONLY = .FALSE. +D%LSPLIT = .FALSE. +D%LCPNMONLY=.FALSE. +S%LUSE_BELUSOV=.TRUE. ! use Belusov algorithm to compute RPNM array instead of per m +S%LKEEPRPNM=.FALSE. ! Keep Legendre polonomials (RPNM) +S%LUSEFLT=.FALSE. ! Use fast legendre transforms +#ifdef WITH_FFTW +TW%LFFTW=.FALSE. ! Use FFTW interface for FFTs +#endif +LLSPSETUPONLY = .FALSE. ! Only create distributed spectral setup +S%LDLL = .FALSE. ! use mapping to/from second set of latitudes +S%LSHIFTLL = .FALSE. ! shift output lat-lon by 0.5dx, 0.5dy +C%LREAD_LEGPOL = .FALSE. +C%LWRITE_LEGPOL = .FALSE. + + +! NON-OPTIONAL ARGUMENTS +R%NSMAX = KSMAX +R%NDGL = KDGL +! E'-defaults +R%NNOEXTZL=0 +R%NNOEXTZG=0 + +! IMPLICIT argument : +G%LAM = .FALSE. + +IF(PRESENT(KDLON)) THEN + R%NDLON = KDLON +ELSE + R%NDLON = 2*R%NDGL +ENDIF + +IF(PRESENT(LDLL)) THEN + S%LDLL=LDLL + IF( LDLL ) THEN + S%NDLON=R%NDLON + ! account for pole + equator + R%NDGL=R%NDGL+2 + IF(PRESENT(LDSHIFTLL)) THEN + S%LSHIFTLL = LDSHIFTLL + ! geophysical (shifted) lat-lon without pole and equator + IF(S%LSHIFTLL) R%NDGL=R%NDGL-2 + ENDIF + S%NDGL=R%NDGL + ENDIF +ENDIF + +IF (R%NDGL <= 0 .OR. MOD(R%NDGL,2) /= 0) THEN + CALL ABORT_TRANS ('SETUP_TRANS: KDGL IS NOT A POSITIVE, EVEN NUMBER') +ENDIF + +! Optional arguments + +ALLOCATE(G%NLOEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) +IF(PRESENT(KLOEN)) THEN + IF( MINVAL(KLOEN(:)) <= 0 )THEN + CALL ABORT_TRANS ('SETUP_TRANS: KLOEN INVALID (ONE or MORE POINTS <= 0)') + ENDIF + R%NDLON=MAXVAL(KLOEN(:)) + DO JGL=1,R%NDGL + IF(KLOEN(JGL) /= R%NDLON) THEN + G%LREDUCED_GRID = .TRUE. + EXIT + ENDIF + ENDDO +ENDIF + +IF (G%LREDUCED_GRID) THEN + G%NLOEN(:) = KLOEN(1:R%NDGL) +ELSE + G%NLOEN(:) = R%NDLON +ENDIF + +IF(PRESENT(LDSPLIT)) THEN + D%LSPLIT = LDSPLIT +ENDIF + +IF(PRESENT(KTMAX)) THEN + R%NTMAX = KTMAX +ELSE + R%NTMAX = R%NSMAX +ENDIF + +IF(PRESENT(PWEIGHT)) THEN + D%LWEIGHTED_DISTR = .TRUE. + IF( D%LWEIGHTED_DISTR .AND. .NOT.D%LSPLIT )THEN + CALL ABORT_TRANS('SETUP_TRANS: LWEIGHTED_DISTR=T AND LSPLIT=F NOT SUPPORTED') + ENDIF + IF(SIZE(PWEIGHT) /= SUM(G%NLOEN(:)) )THEN + CALL ABORT_TRANS('SETUP_TRANS:SIZE(PWEIGHT) /= SUM(G%NLOEN(:))') + ENDIF + IF( MINVAL(PWEIGHT(:)) < 0.0_JPRB )THEN + CALL ABORT_TRANS('SETUP_TRANS: INVALID WEIGHTS') + ENDIF + ALLOCATE(D%RWEIGHT(SIZE(PWEIGHT))) + D%RWEIGHT(:)=PWEIGHT(:) +ELSE + D%LWEIGHTED_DISTR = .FALSE. +ENDIF + +IF(PRESENT(LDGRIDONLY)) THEN + D%LGRIDONLY=LDGRIDONLY +ENDIF + +IF(PRESENT(LDSPSETUPONLY)) THEN + LLSPSETUPONLY=LDSPSETUPONLY +ENDIF + +IF(PRESENT(LDPNMONLY)) THEN + D%LCPNMONLY=LDPNMONLY +ENDIF + + +#ifdef WITH_FFTW +IF(PRESENT(LDUSEFFTW)) THEN + TW%LFFTW=LDUSEFFTW +ENDIF +IF( LLSPSETUPONLY .OR. D%LGRIDONLY ) THEN + TW%LFFTW = .FALSE. +ENDIF +#endif + +S%LSOUTHPNM=.FALSE. +IF(PRESENT(PSTRET)) THEN + IF (ABS(PSTRET-1.0_JPRB)>100._JPRB*EPSILON(1._JPRB)) THEN + G%RSTRET=PSTRET + S%LSOUTHPNM=.TRUE. + ENDIF +ENDIF + +IF(PRESENT(CDIO_LEGPOL)) THEN + IF(NPROC > 1) CALL ABORT_TRANS('SETUP_TRANS:CDIO_LEGPOL OPTIONS ONLY FOR NPROC=1 ') + IF(R%NSMAX > 511 ) S%LUSEFLT = .TRUE. !To save IO and memory + IF(TRIM(CDIO_LEGPOL) == 'readf' .OR. TRIM(CDIO_LEGPOL) == 'READF' ) THEN + IF(.NOT.PRESENT(CDLEGPOLFNAME)) CALL ABORT_TRANS('SETUP_TRANS: CDLEGPOLFNAME ARGUMENT MISSING') + C%LREAD_LEGPOL = .TRUE. + C%CLEGPOLFNAME = TRIM(CDLEGPOLFNAME) + C%CIO_TYPE='file' + ELSEIF(TRIM(CDIO_LEGPOL) == 'writef' .OR. TRIM(CDIO_LEGPOL) == 'WRITEF') THEN + IF(.NOT.PRESENT(CDLEGPOLFNAME)) CALL ABORT_TRANS('SETUP_TRANS: CDLEGPOLFNAME ARGUMENT MISSING') + C%LWRITE_LEGPOL = .TRUE. + C%CLEGPOLFNAME = TRIM(CDLEGPOLFNAME) + C%CIO_TYPE='file' + ELSEIF(TRIM(CDIO_LEGPOL) == 'membuf' .OR. TRIM(CDIO_LEGPOL) == 'MEMBUF') THEN + IF(.NOT.PRESENT(KLEGPOLPTR)) CALL ABORT_TRANS('SETUP_TRANS: KLEGPOLPTR ARGUMENT MISSING') + IF(.NOT.C_ASSOCIATED(KLEGPOLPTR)) CALL ABORT_TRANS('SETUP_TRANS: KLEGPOLPTR NULL POINTER') + IF(.NOT.PRESENT(KLEGPOLPTR_LEN)) CALL ABORT_TRANS('SETUP_TRANS: KLEGPOLPTR_LEN ARGUMENT MISSING') + C%LREAD_LEGPOL = .TRUE. + C%CIO_TYPE='mbuf' + CALL SHAREDMEM_CREATE( C%STORAGE,KLEGPOLPTR,KLEGPOLPTR_LEN) + ELSE + WRITE(NERR,*) 'CDIO_LEGPOL ', TRIM(CDIO_LEGPOL) + CALL ABORT_TRANS('SETUP_TRANS:CDIO_LEGPOL UNKNOWN METHOD ') + ENDIF +ENDIF + +IF(PRESENT(LDUSEFLT)) THEN + S%LUSEFLT=LDUSEFLT +ENDIF +IF(PRESENT(LDUSERPNM)) THEN + S%LUSE_BELUSOV=LDUSERPNM +ENDIF +IF(PRESENT(LDKEEPRPNM)) THEN + IF(S%LUSEFLT) THEN + IF(LDKEEPRPNM.AND..NOT.LDUSERPNM) THEN + CALL ABORT_TRANS('SETUP_TRANS: LDKEEPRPNM=true with LDUSERPNM=false') + ENDIF + ENDIF + S%LKEEPRPNM=LDKEEPRPNM +ENDIF +! Setup resolution dependent structures +! ------------------------------------- + +! Setup distribution independent dimensions +CALL SETUP_DIMS + +! First part of setup of distributed environment +CALL SUMP_TRANS_PRELEG + +IF( .NOT.LLSPSETUPONLY ) THEN + +! Compute Legendre polonomial and Gaussian Latitudes and Weights + CALL SULEG + +! Second part of setup of distributed environment + CALL SUMP_TRANS + CALL GSTATS(1802,0) + +! Initialize Fast Fourier Transform package + IF (.NOT.D%LCPNMONLY) CALL SUFFT + CALL GSTATS(1802,1) +ELSE + CALL PRE_SULEG +ENDIF + +! Signal the current resolution is active +LENABLED(IDEF_RESOL)=.TRUE. +NDEF_RESOL = COUNT(LENABLED) + +IF (LHOOK) CALL DR_HOOK('SETUP_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!endif INTERFACE + +END SUBROUTINE SETUP_TRANS diff --git a/src/trans/cpu/external/setup_trans0.F90 b/src/trans/cpu/external/setup_trans0.F90 new file mode 100644 index 0000000..1d11a90 --- /dev/null +++ b/src/trans/cpu/external/setup_trans0.F90 @@ -0,0 +1,213 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& +& KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN,& +& LDMPOFF,LDSYNC_TRANS,KTRANS_SYNC_LEVEL,& +& LDEQ_REGIONS,K_REGIONS_NS,K_REGIONS_EW,K_REGIONS,& +& PRAD,LDALLOPERM) + +!**** *SETUP_TRANS0* - General setup routine for transform package + +! Purpose. +! -------- +! Resolution independent part of setup of transform package +! Has to be called BEFORE SETUP_TRANS + +!** Interface. +! ---------- +! CALL SETUP_TRANS0(...) + +! Explicit arguments : All arguments are optional, [..] default value +! ------------------- +! KOUT - Unit number for listing output [6] +! KERR - Unit number for error messages [0] +! KPRINTLEV - level of output to KOUT, 0->no output,1->normal,2->debug [0] +! KMAX_RESOL - maximum number of different resolutions for this run [1] +! KPRGPNS - splitting level in N-S direction in grid-point space [1] +! KPRGPEW - splitting level in E-W direction in grid-point space [1] +! KPRTRW - splitting level in wave direction in spectral space [1] +! KCOMBFLEN - Size of communication buffer [1800000 (*8bytes) ] +! LDMPOFF - switch off message passing [false] +! LDSYNC_TRANS - switch to activate barriers in trmtol trltom [false] +! KTRANS_SYNC_LEVEL - use of synchronization/blocking [0] +! LDEQ_REGIONS - true if new eq_regions partitioning [false] +! K_REGIONS - Number of regions (1D or 2D partitioning) +! K_REGIONS_NS - Maximum number of NS partitions +! K_REGIONS_EW - Maximum number of EW partitions +! PRAD - Radius of the planet +! LDALLOPERM - Allocate certain arrays permanently +! The total number of (MPI)-processors has to be equal to KPRGPNS*KPRGPEW + +! Method. +! ------- + +! Externals. SUMP_TRANS0 - initial setup routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! R. El Khatib 03-01-24 LDMPOFF +! G. Mozdzynski 2006-09-13 LDEQ_REGIONS +! N. Wedi 2009-11-30 add radius + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, LMPOFF, LSYNC_TRANS, NTRANS_SYNC_LEVEL, MSETUP0, & + & NMAX_RESOL, NPRINTLEV, NPROMATR, LALLOPERM +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NCOMBFLEN, NPRGPEW,NPRGPNS, NPRTRW +USE TPM_CONSTANTS ,ONLY : RA + +USE SUMP_TRANS0_MOD ,ONLY : SUMP_TRANS0 +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_EW, N_REGIONS_NS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN +LOGICAL ,OPTIONAL,INTENT(IN) :: LDMPOFF +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSYNC_TRANS +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTRANS_SYNC_LEVEL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDALLOPERM +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PRAD +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS(:) +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_NS +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_EW + +!ifndef INTERFACE + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +IF(MSETUP0 /= 0) THEN +!gr CALL ABORT_TRANS('SETUP_TRANS0: SETUP_TRANS0 MAY ONLY BE CALLED ONCE') +ENDIF + +! Default values + +NOUT = 6 +NERR = 0 +NPRINTLEV = 0 +NMAX_RESOL = 1 +NPRGPNS = 1 +NPRGPEW = 1 +NPRTRW = 1 +N_REGIONS_NS=1 +N_REGIONS_EW=1 +NPROMATR = 0 +NCOMBFLEN = 1800000 +LMPOFF = .FALSE. +LSYNC_TRANS=.FALSE. +NTRANS_SYNC_LEVEL=0 +LEQ_REGIONS=.FALSE. +RA=6371229._JPRB +LALLOPERM=.FALSE. + +! Optional arguments + +IF(PRESENT(KOUT)) THEN + NOUT = KOUT +ENDIF +IF(PRESENT(KERR)) THEN + NERR = KERR +ENDIF +IF(PRESENT(KPRINTLEV)) THEN + NPRINTLEV = KPRINTLEV +ENDIF + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS0 ===' + +IF(PRESENT(KMAX_RESOL))THEN + NMAX_RESOL = KMAX_RESOL +ENDIF +IF(PRESENT(KPROMATR))THEN + IF(MOD(KPROMATR,2) /= 0) THEN + CALL ABORT_TRANS('SETUP_TRANS0: KPROMATR HAS TO BE MULTIPLE OF 2') + ENDIF + NPROMATR = KPROMATR +ENDIF +IF(PRESENT(KPRGPNS)) THEN + NPRGPNS = KPRGPNS +ENDIF +IF(PRESENT(KPRGPEW)) THEN + NPRGPEW = KPRGPEW +ENDIF +IF(PRESENT(KPRTRW)) THEN + NPRTRW = KPRTRW +ENDIF +IF(PRESENT(KCOMBFLEN)) THEN + NCOMBFLEN = KCOMBFLEN +ENDIF +IF(PRESENT(LDMPOFF)) THEN + LMPOFF = LDMPOFF +ENDIF +IF(PRESENT(LDSYNC_TRANS)) THEN + LSYNC_TRANS = LDSYNC_TRANS +ENDIF +IF(PRESENT(KTRANS_SYNC_LEVEL)) THEN + NTRANS_SYNC_LEVEL = KTRANS_SYNC_LEVEL +ENDIF +IF(PRESENT(LDEQ_REGIONS)) THEN + LEQ_REGIONS = LDEQ_REGIONS +ENDIF + +! Initial setup +CALL SUMP_TRANS0 + +IF(PRESENT(K_REGIONS_NS)) THEN + K_REGIONS_NS = N_REGIONS_NS +ENDIF + +IF(PRESENT(K_REGIONS_EW)) THEN + K_REGIONS_EW = N_REGIONS_EW +ENDIF + +IF(PRESENT(K_REGIONS)) THEN + IF(UBOUND(K_REGIONS,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('SETUP_TRANS0: K_REGIONS TOO SMALL') + ELSE + K_REGIONS(1:N_REGIONS_NS)=N_REGIONS(1:N_REGIONS_NS) + ENDIF +ENDIF + +IF(PRESENT(PRAD)) THEN + RA=PRAD +ENDIF + +IF(PRESENT(LDALLOPERM)) THEN + LALLOPERM=LDALLOPERM +ENDIF + +! Setup level 0 complete +MSETUP0 = 1 + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE SETUP_TRANS0 + + diff --git a/src/trans/cpu/external/specnorm.F90 b/src/trans/cpu/external/specnorm.F90 new file mode 100644 index 0000000..50f8a4a --- /dev/null +++ b/src/trans/cpu/external/specnorm.F90 @@ -0,0 +1,144 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *SPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL SPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- SPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE SPNORM_CTL_MOD ,ONLY : SPNORM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IMASTER,IFLD,IFLD_G,J + +! ------------------------------------------------------------------ + +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults +IMASTER = 1 +IFLD = 0 + + +IF(PRESENT(KMASTER)) THEN + IMASTER = KMASTER +ENDIF + +IF(PRESENT(KVSET)) THEN + IFLD_G = UBOUND(KVSET,1) + DO J=1,IFLD_G + IF(KVSET(J) > NPRTRV) THEN + WRITE(NERR,*) 'SPECNORM:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('SPECNORM:KVSET TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFLD = IFLD+1 + ENDIF + ENDDO +ELSE + IF(PRESENT(PSPEC)) THEN + IFLD = UBOUND(PSPEC,1) + ENDIF + IFLD_G = IFLD +ENDIF + +IF(NPRTRV >1) THEN + IF(IFLD > 0 .AND. .NOT. PRESENT(KVSET)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IFLD > 0 AND NOT PRESENT(KVSET)',& + &NPRTRV,IFLD + CALL ABORT_TRANS('SPECNORM: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF +IF(MYPROC == IMASTER) THEN + IF(.NOT. PRESENT(PNORM)) THEN + CALL ABORT_TRANS('SPECNORM: PNORM NOT PRESENT') + ENDIF + IF(UBOUND(PNORM,1) < IFLD_G) THEN + CALL ABORT_TRANS('SPECNORM: PNORM TOO SMALL') + ENDIF +ENDIF +IF(IFLD > 0 ) THEN + IF(.NOT. PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('SPECNORM: PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,1) < IFLD) THEN + CALL ABORT_TRANS('SPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,2) < D%NSPEC2) THEN + CALL ABORT_TRANS('SPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL SPNORM_CTL(PSPEC,IFLD,IFLD_G,KVSET,IMASTER,PMET,PNORM) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE SPECNORM + diff --git a/src/trans/cpu/external/sugawc.F90 b/src/trans/cpu/external/sugawc.F90 new file mode 100644 index 0000000..7f90637 --- /dev/null +++ b/src/trans/cpu/external/sugawc.F90 @@ -0,0 +1,102 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SUGAWC(KDGLG,PMU,PW) + +!**** *SUGAWC* - Compute Gaussian latitudes and weights + +! Purpose. +! -------- +! Compute Gaussian latitudes and weights. + +!** Interface. +! ---------- +! CALL SUGAWC(...) + +! Explicit arguments : +! -------------------- +! INPUT: +! KDGLG - number of latitudes. + +! OUTPUT: +! PMU - sine of Gaussian latitudes. +! PW - Gaussian weights. + +! Method. +! ------- + +! Externals. SUGAW +! ---------- + +! Author. +! ------- +! K. Yessad, from SUGAWA and SULEG (trans) +! Original : May 2012 + +! Modifications. +! -------------- +! F. Vana 05-Mar-2015 Support for single precision + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +!ifndef INTERFACE + +USE SUGAW_MOD + +!endif INTERFACE + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KDGLG +REAL(KIND=JPRD) ,INTENT(OUT) :: PMU(:) +REAL(KIND=JPRD) ,INTENT(OUT) :: PW(:) + +!ifndef INTERFACE + +REAL(KIND=JPRD) :: ZANM +INTEGER(KIND=JPIM) :: ISTART,IK,IODD,JN,JGL +REAL(KIND=JPRD) :: ZFN(0:KDGLG,0:KDGLG) +REAL(KIND=JPRD) :: ZFNN + +! ------------------------------------------------------------------ + +! * preliminary calculations to compute input quantities ZANM and ZFN +! (k.y.: coded after what I found in tfl/module/suleg_mod.F90). +ISTART=1 +! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) +! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 +ZFN(0,0)=2._JPRD +DO JN=ISTART,KDGLG + ZFNN=ZFN(0,0) + DO JGL=1,JN + ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) + ENDDO + IODD=MOD(JN,2) + ZFN(JN,JN)=ZFNN + DO JGL=2,JN-IODD,2 + ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) + ENDDO +ENDDO + +ZANM=SQRT(REAL(2*KDGLG+1,JPRD)*REAL(KDGLG**2,JPRD)/REAL(2*KDGLG-1,JPRD)) + +! * call to SUGAW (output: PW, PMU): +CALL SUGAW(KDGLG,0,KDGLG,PMU,PW,ZANM,ZFN) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE SUGAWC + diff --git a/src/trans/cpu/external/trans_end.F90 b/src/trans/cpu/external/trans_end.F90 new file mode 100644 index 0000000..41961ae --- /dev/null +++ b/src/trans/cpu/external/trans_end.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_END(CDMODE) + +!**** *TRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL TRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: 19-03-2009: intermediate end of transf to allow to switch to mono-task transforms +! R. El Khatib 09-Jul-2013 LENABLED + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NMAX_RESOL, LENABLED,NDEF_RESOL +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL, TB, FFTB_RESOL +USE TPM_CTL ,ONLY : C, CTL_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +USE TPM_FLT +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN + +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DEALLOC_RESOL_MOD ,ONLY : DEALLOC_RESOL +! + +IMPLICIT NONE +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE +! Local variables +INTEGER(KIND=JPIM) :: JRES +CHARACTER*5 :: CLMODE +! ------------------------------------------------------------------ +CLMODE='FINAL' +IF (PRESENT(CDMODE)) CLMODE=CDMODE +IF (CLMODE == 'FINAL') THEN + + IF( ALLOCATED( LENABLED ) ) THEN + DO JRES=1,NMAX_RESOL + IF(LENABLED(JRES)) THEN + CALL DEALLOC_RESOL(JRES) + ENDIF + ENDDO + DEALLOCATE(LENABLED) + ENDIF + + NULLIFY(R) + IF( ALLOCATED(DIM_RESOL) ) DEALLOCATE(DIM_RESOL) + + NULLIFY(D) + IF( ALLOCATED(DISTR_RESOL) ) DEALLOCATE(DISTR_RESOL) + + !TPM_FFT + NULLIFY(T) + IF( ALLOCATED(FFT_RESOL) ) DEALLOCATE(FFT_RESOL) + NULLIFY(TB) + IF( ALLOCATED(FFTB_RESOL) ) DEALLOCATE(FFTB_RESOL) + +#ifdef WITH_FFTW + !TPM_FFTW + NULLIFY(TW) + IF( ALLOCATED(FFTW_RESOL) ) DEALLOCATE(FFTW_RESOL) +#endif + + !TPM_FLT + NULLIFY(S) + IF( ALLOCATED(FLT_RESOL) ) DEALLOCATE(FLT_RESOL) + + !TPM_CTL + NULLIFY(C) + IF( ALLOCATED(CTL_RESOL) ) DEALLOCATE(CTL_RESOL) + + !TPM_FIELDS + NULLIFY(F) + IF( ALLOCATED(FIELDS_RESOL) ) DEALLOCATE(FIELDS_RESOL) + + + !TPM_GEOMETRY + NULLIFY(G) + IF( ALLOCATED(GEOM_RESOL) ) DEALLOCATE(GEOM_RESOL) + + !TPM_TRANS + IF(ALLOCATED(FOUBUF_IN)) DEALLOCATE(FOUBUF_IN) + IF(ALLOCATED(FOUBUF)) DEALLOCATE(FOUBUF) + + MSETUP0 = 0 + NMAX_RESOL = 0 + NCUR_RESOL = 0 + NDEF_RESOL = 0 +ENDIF +IF (CLMODE == 'FINAL' .OR. CLMODE == 'INTER') THEN + !EQ_REGIONS + IF( ASSOCIATED(N_REGIONS) ) DEALLOCATE(N_REGIONS) + !TPM_DISTR + IF( ALLOCATED(NPRCIDS) ) DEALLOCATE(NPRCIDS) +ENDIF + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE TRANS_END diff --git a/src/trans/cpu/external/trans_inq.F90 b/src/trans/cpu/external/trans_inq.F90 new file mode 100644 index 0000000..cc80aad --- /dev/null +++ b/src/trans/cpu/external/trans_inq.F90 @@ -0,0 +1,535 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + &KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + &KMYMS,KASM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + &KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + &KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + &KULTPP,KPTRLS,KNMENG,& + &KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + &LDSPLITLAT,& + &KSMAX,PLAPIN,KNVALUE,KDEF_RESOL,LDLAM,& + &PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KDGLU) + +!**** *TRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL TRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KASM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation +! KNVALUE - n value for each KSPEC2 spectral coeffient + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations +! KNMENG - associated (with NLOENG) cut-off zonal wavenumber + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLAPIN - Eigen-values of the inverse Laplace operator +! KDGLU - Number of active points in an hemisphere for a given wavenumber "m" + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M. Hortal : 2001-03-05 Dimensions of the Legendre polynomials +! R. El Khatib 08-Aug-2012 KSMAX,PLAPIN,KNVALUE,LDLAM,KDEF_RESOL + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NDEF_RESOL +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW, NPRTRV +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FLT + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS_EW, N_REGIONS_NS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2G +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNUMP +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTG +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTMX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRFLOFF + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KASM0(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KUMPP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPOSSP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KALLMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSTA(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KONL(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL, INTENT(OUT) :: LDSPLITLAT(:) + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KULTPP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNMENG(:) + +REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGW(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLEI3 +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPOLEGL +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPMS(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDGLU(0:) + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PLAPIN(-1:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1,IU2 +INTEGER(KIND=JPIM) :: IC, JN, JMLOC +INTEGER(KIND=JPIM) :: IPRTRV,JSETV,IMLOC,IM,ISL,IA,ILA,IS,ILS,IDGLU,J,I +! ------------------------------------------------------------------ + + +! Set current resolution +CALL SET_RESOL(KRESOL) + +IF(PRESENT(KSPEC)) KSPEC = D%NSPEC +IF(PRESENT(KSPEC2)) KSPEC2 = D%NSPEC2 +IF(PRESENT(KSPEC2G)) KSPEC2G = R%NSPEC2_G +IF(PRESENT(KSPEC2MX)) KSPEC2MX = D%NSPEC2MX +IF(PRESENT(KNUMP)) KNUMP = D%NUMP +IF(PRESENT(KGPTOT)) KGPTOT = D%NGPTOT +IF(PRESENT(KGPTOTG)) KGPTOTG = D%NGPTOTG +IF(PRESENT(KGPTOTMX)) KGPTOTMX = D%NGPTOTMX +IF(PRESENT(KFRSTLOFF)) KFRSTLOFF = D%NFRSTLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPRTRW)) KPRTRW = NPRTRW +IF(PRESENT(KMYSETW)) KMYSETW = MYSETW +IF(PRESENT(KMYSETV)) KMYSETV = MYSETV +IF(PRESENT(KMY_REGION_NS)) KMY_REGION_NS = MY_REGION_NS +IF(PRESENT(KMY_REGION_EW)) KMY_REGION_EW = MY_REGION_EW +IF(PRESENT(LDLAM)) LDLAM = G%LAM +IF(PRESENT(KDEF_RESOL)) KDEF_RESOL = NDEF_RESOL + +IF(PRESENT(KGPTOTL)) THEN + IF(UBOUND(KGPTOTL,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KGPTOTL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KGPTOTL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('TRANS_INQ: KGPTOTL DIM 2 TOO SMALL') + ELSE + KGPTOTL(1:N_REGIONS_NS,1:N_REGIONS_EW) = D%NGPTOTL(:,:) + ENDIF +ENDIF + +IF(PRESENT(KMYMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KMYMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KMYMS,1) < D%NUMP) THEN + CALL ABORT_TRANS('TRANS_INQ: KMYMS TOO SMALL') + ELSE + KMYMS(1:D%NUMP) = D%MYMS(:) + ENDIF +ENDIF + +IF(PRESENT(KASM0)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KASM0 REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KASM0,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KASM0 TOO SMALL') + ELSE + KASM0(0:R%NSMAX) = D%NASM0(:) + ENDIF +ENDIF + +IF(PRESENT(KUMPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KUMPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KUMPP,1) < NPRTRW) THEN + CALL ABORT_TRANS('TRANS_INQ: KUMPP TOO SMALL') + ELSE + KUMPP(1:NPRTRW) = D%NUMPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPOSSP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPOSSP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPOSSP,1) < NPRTRW+1) THEN + CALL ABORT_TRANS('TRANS_INQ: KPOSSP TOO SMALL') + ELSE + KPOSSP(1:NPRTRW+1) = D%NPOSSP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRMS,1) < NPRTRW) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRMS TOO SMALL') + ELSE + KPTRMS(1:NPRTRW) = D%NPTRMS(:) + ENDIF +ENDIF + +IF(PRESENT(KALLMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KALLMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KALLMS,1) < R%NSMAX+1) THEN + CALL ABORT_TRANS('TRANS_INQ: KALLMS TOO SMALL') + ELSE + KALLMS(1:R%NSMAX+1) = D%NALLMS(:) + ENDIF +ENDIF + +IF(PRESENT(KDIM0G)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KDIM0G REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KDIM0G,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KDIM0G TOO SMALL') + ELSE + KDIM0G(0:R%NSMAX) = D%NDIM0G(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KFRSTLAT)) THEN + IF(UBOUND(KFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KFRSTLAT TOO SMALL') + ELSE + KFRSTLAT(1:N_REGIONS_NS) = D%NFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KLSTLAT)) THEN + IF(UBOUND(KLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KLSTLAT TOO SMALL') + ELSE + KLSTLAT(1:N_REGIONS_NS) = D%NLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLAT)) THEN + IF(UBOUND(KPTRLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLAT TOO SMALL') + ELSE + KPTRLAT(1:R%NDGL) = D%NPTRLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRFRSTLAT)) THEN + IF(UBOUND(KPTRFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRFRSTLAT TOO SMALL') + ELSE + KPTRFRSTLAT(1:N_REGIONS_NS) = D%NPTRFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLSTLAT)) THEN + IF(UBOUND(KPTRLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLSTLAT TOO SMALL') + ELSE + KPTRLSTLAT(1:N_REGIONS_NS) = D%NPTRLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KSTA)) THEN + IF(UBOUND(KSTA,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('TRANS_INQ: KSTA DIM 1 TOO SMALL') + ELSEIF(UBOUND(KSTA,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('TRANS_INQ: KSTA DIM 2 TOO SMALL') + ELSE + KSTA(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NSTA(:,:) + ENDIF +ENDIF + +IF(PRESENT(KONL)) THEN + IF(UBOUND(KONL,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('TRANS_INQ: KONL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KONL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('TRANS_INQ: KONL DIM 2 TOO SMALL') + ELSE + KONL(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NONL(:,:) + ENDIF +ENDIF + +IF(PRESENT(LDSPLITLAT)) THEN + IF(UBOUND(LDSPLITLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: LDSPLITLAT TOO SMALL') + ELSE + LDSPLITLAT(1:R%NDGL) = D%LSPLITLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KULTPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KULTPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KULTPP,1) < NPRTRNS) THEN + CALL ABORT_TRANS('TRANS_INQ: KULTPP TOO SMALL') + ELSE + KULTPP(1:NPRTRNS) = D%NULTPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRLS,1) < NPRTRNS) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLS TOO SMALL') + ELSE + KPTRLS(1:NPRTRNS) = D%NPTRLS(:) + ENDIF +ENDIF + +IF(PRESENT(KNMENG)) THEN + IF(UBOUND(KNMENG,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: KNMENG TOO SMALL') + ELSE + KNMENG(1:R%NDGL) = G%NMEN(1:R%NDGL) + ENDIF +ENDIF + +IF(PRESENT(PMU)) THEN + IF(UBOUND(PMU,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: PMU TOO SMALL') + ELSE + PMU(1:R%NDGL) = F%RMU + ENDIF +ENDIF + +IF(PRESENT(PGW)) THEN + IF(UBOUND(PGW,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: PGW TOO SMALL') + ELSE + PGW(1:R%NDGL) = F%RW + ENDIF +ENDIF + +IF(PRESENT(PRPNM)) THEN + + IF( .NOT. S%LKEEPRPNM ) THEN + CALL ABORT_TRANS('TRANS_INQ: PRPNM REQUIRED BUT S%LKEEPRPNM=F') + ENDIF + + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF + IU1 = UBOUND(PRPNM,1) + IU2 = UBOUND(PRPNM,2) + IF(IU1 < R%NDGNH) THEN + CALL ABORT_TRANS('TRANS_INQ:FIRST DIM. OF PRNM TOO SMALL') + ELSE +! IU1 = MIN(IU1,R%NLEI3) +! IU2 = MIN(IU2,D%NSPOLEGL) +! PRPNM(1:IU1,1:IU2) = F%RPNM(1:IU1,1:IU2) + + DO JMLOC=1,D%NUMP,NPRTRV + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + DO J=1,ILA + DO I=1,IDGLU + PRPNM(ISL+I-1,D%NPMS(IM)+IA+(J-1)*2) = S%FA(IMLOC)%RPNMA(I,J) + ENDDO + ENDDO + DO J=1,ILS + DO I=1,IDGLU + PRPNM(ISL+I-1,D%NPMS(IM)+IS+(J-1)*2) = S%FA(IMLOC)%RPNMS(I,J) + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF +ENDIF +IF(PRESENT(KLEI3)) THEN + KLEI3=R%NLEI3 +ENDIF +IF(PRESENT(KSPOLEGL)) THEN + KSPOLEGL=D%NSPOLEGL +ENDIF +IF(PRESENT(KPMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPMS,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KPMS TOO SMALL') + ELSE + KPMS(0:R%NSMAX) = D%NPMS(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KSMAX)) KSMAX = R%NSMAX +IF(PRESENT(PLAPIN)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: PLAPIN REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(PLAPIN,1) < R%NSMAX+2) THEN + CALL ABORT_TRANS('TRANS_INQ: PLAPIN TOO SMALL') + ELSEIF (LBOUND(PLAPIN,1) /= -1) THEN + CALL ABORT_TRANS('TRANS_INQ: LOWER BOUND OF PLAPIN SHOULD BE -1') + ELSE + PLAPIN(-1:R%NSMAX+2) = F%RLAPIN(:) + ENDIF +ENDIF +IF(PRESENT(KNVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KNVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('TRANS_INQ: KNVALUE TOO SMALL') + ELSE + IC=1 + DO JMLOC=1,D%NUMP + DO JN=D%MYMS(JMLOC),R%NSMAX + KNVALUE(IC )=JN + KNVALUE(IC+1)=JN + IC=IC+2 + ENDDO + ENDDO + ENDIF +ENDIF + + +IF(PRESENT(KDGLU)) THEN + IF(UBOUND(KDGLU,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KDGLU TOO SMALL') + ELSE + KDGLU(0:R%NSMAX) = G%NDGLU(0:R%NSMAX) + ENDIF +ENDIF +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE TRANS_INQ + + + + + + diff --git a/src/trans/cpu/external/trans_pnm.F90 b/src/trans/cpu/external/trans_pnm.F90 new file mode 100644 index 0000000..faa8f84 --- /dev/null +++ b/src/trans/cpu/external/trans_pnm.F90 @@ -0,0 +1,199 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) + +!**** *TRANS_PNM* - Compute Legendre polynomials for a given wavenember + +! Purpose. +! -------- +! Interface routine for computing Legendre polynomials for a given wavenember + +!** Interface. +! ---------- +! CALL TRANS_PNM(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) +! KM - wave number +! PRPNM - Legendre polynomials +! LDTRANSPOSE - Legendre polynomials array is transposed +! LDCHEAP - cheapest but less accurate computation + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 22-Jan-2016 from G. Mozdzynski's getpnm + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM, JPRB + +!ifndef INTERFACE + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FLT ,ONLY : S + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_POL +USE SUPOLF_MOD + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM) ,INTENT(IN) :: KM +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) +LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSE +LOGICAL, OPTIONAL, INTENT(IN) :: LDCHEAP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1, IU2, IMAXN, INMAX, ICHEAP_SYM, ICHEAP_ANTISYM +INTEGER(KIND=JPIM) :: IC, JN, JMLOC, JGL, JI +INTEGER(KIND=JPIM) :: IA, IS, IDGLU, ILA, ILS, ISL +REAL(KIND=JPRD), ALLOCATABLE :: ZLPOL(:) +LOGICAL :: LLTRANSPOSE, LLCHEAP +! ------------------------------------------------------------------ + +! Set current resolution +IF (PRESENT(KRESOL)) THEN + CALL SET_RESOL(KRESOL) +ENDIF + +IF (PRESENT(LDTRANSPOSE)) THEN + LLTRANSPOSE=LDTRANSPOSE +ELSE + LLTRANSPOSE=.FALSE. +ENDIF + +IF (PRESENT(LDCHEAP)) THEN + LLCHEAP=LDCHEAP +ELSE + LLCHEAP=.FALSE. +ENDIF +IF (LLCHEAP) THEN + ICHEAP_SYM =2 + ICHEAP_ANTISYM=3 +ELSE + ICHEAP_SYM =1 + ICHEAP_ANTISYM=1 +ENDIF + +IF (PRESENT(PRPNM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_PNM: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF +ENDIF + +IU1 = UBOUND(PRPNM,1) +IU2 = UBOUND(PRPNM,2) + +IF (LLTRANSPOSE) THEN + + IF(IU2 < R%NLEI3) THEN + CALL ABORT_TRANS('TRANS_PNM : FIRST DIM. OF PRPNM TOO SMALL') + ENDIF + IF(IU1 < R%NTMAX-KM+3) THEN + CALL ABORT_TRANS('TRANS_PNM : SECOND DIM. OF PRPNM TOO SMALL') + ENDIF + + IF (IU2 >= R%NLEI3) THEN + PRPNM(:,R%NLEI3) = 0.0_JPRB + ENDIF + +ELSE + + IF(IU1 < R%NLEI3) THEN + CALL ABORT_TRANS('TRANS_PNM : FIRST DIM. OF PRPNM TOO SMALL') + ENDIF + IF(IU2 < R%NTMAX-KM+3) THEN + CALL ABORT_TRANS('TRANS_PNM : SECOND DIM. OF PRPNM TOO SMALL') + ENDIF + + IF (IU1 >= R%NLEI3) THEN + PRPNM(R%NLEI3,:) = 0.0_JPRB + ENDIF + +ENDIF + +ILA = (R%NTMAX-KM+2)/2 +ILS = (R%NTMAX-KM+3)/2 + +CALL INI_POL(R%NTMAX+2,LDFAST=.TRUE.) + +IMAXN=R%NTMAX+1 + +IA = 1+MOD(R%NTMAX-KM+2,2) +IS = 1+MOD(R%NTMAX-KM+1,2) + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IF (S%LSOUTHPNM) THEN + IDGLU = 2*MIN(R%NDGNH,G%NDGLU(KM)) +ELSE + IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +ENDIF + +IF(MOD(IMAXN-KM,2) == 0) THEN + INMAX=IMAXN+1 +ELSE + INMAX=IMAXN +ENDIF + +ALLOCATE(ZLPOL(0:R%NTMAX+2)) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) +DO JGL=1,IDGLU + CALL SUPOLF(KM,INMAX,REAL (F%RMU(ISL+JGL-1), JPRD),ZLPOL(0:INMAX),KCHEAP=ICHEAP_ANTISYM) + IF (LLTRANSPOSE) THEN + DO JI=1,ILA + PRPNM(IA+(JI-1)*2,ISL+JGL-1) = ZLPOL(KM+2*(ILA-JI)+1) + ENDDO + ELSE + DO JI=1,ILA + PRPNM(ISL+JGL-1,IA+(JI-1)*2) = ZLPOL(KM+2*(ILA-JI)+1) + ENDDO + ENDIF + CALL SUPOLF(KM,INMAX,REAL (F%RMU(ISL+JGL-1), JPRD),ZLPOL(0:INMAX),KCHEAP=ICHEAP_SYM) + IF (LLTRANSPOSE) THEN + DO JI=1,ILS + PRPNM(IS+(JI-1)*2,ISL+JGL-1) = ZLPOL(KM+2*(ILS-JI)) + ENDDO + ELSE + DO JI=1,ILS + PRPNM(ISL+JGL-1,IS+(JI-1)*2) = ZLPOL(KM+2*(ILS-JI)) + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO + +CALL END_POL + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE TRANS_PNM diff --git a/src/trans/cpu/external/trans_release.F90 b/src/trans/cpu/external/trans_release.F90 new file mode 100644 index 0000000..ea97b3c --- /dev/null +++ b/src/trans/cpu/external/trans_release.F90 @@ -0,0 +1,61 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_RELEASE(KRESOL) + +!**** *TRANS_RELEASE* - release a spectral resolution + +! Purpose. +! -------- +! Release all arrays related to a given resolution tag + +!** Interface. +! ---------- +! CALL TRANS_RELEASE + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE DEALLOC_RESOL_MOD ,ONLY : DEALLOC_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL + +!endif INTERFACE + +! ------------------------------------------------------------------ + +CALL DEALLOC_RESOL(KRESOL) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRANS_RELEASE diff --git a/src/trans/cpu/external/vordiv_to_uv.F90 b/src/trans/cpu/external/vordiv_to_uv.F90 new file mode 100644 index 0000000..794a690 --- /dev/null +++ b/src/trans/cpu/external/vordiv_to_uv.F90 @@ -0,0 +1,179 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE VORDIV_TO_UV(PSPVOR,PSPDIV,PSPU,PSPV,KSMAX,KVSETUV) + +!**** *VORDIV_TO_UV* - Convert spectral vorticity and divergence to spectral U (u*cos(theta)) and V (v*cos(theta). + +! Purpose. +! -------- +! Interface routine for Convert spectral vorticity and divergence to spectral U and V + +!** Interface. +! ---------- +! CALL VORDIV_TO_UV(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPU(:,:) - spectral U (u*cos(theta) (output) +! PSPV(:,:) - spectral V (v*cos(theta) (output) +! KSMAX - spectral resolution (input) +! KVSETUV(:) - Optionally indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- VD2UV_CTL - control vordiv to uv + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 15-06-15 + + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT,MSETUP0 +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE VD2UV_CTL_MOD ,ONLY : VD2UV_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB), INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSPU(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSPV(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) + +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IRESOL,IDGL +LOGICAL :: LTMP_SETUP0 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +#include "setup_trans0.h" +#include "setup_trans.h" +#include "trans_release.h" +#include "trans_end.h" + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('VORDIV_TO_UV',0,ZHOOK_HANDLE) + +!CALL GSTATS(XXXX,0) + +IF(MSETUP0 == 0) THEN + CALL SETUP_TRANS0() + LTMP_SETUP0 = .TRUE. +ELSE + LTMP_SETUP0 = .FALSE. +ENDIF +IDGL = 2 ! It doesn't matter as long as it's a positive even number +CALL SETUP_TRANS(KSMAX,IDGL,LDSPSETUPONLY=.TRUE.,KRESOL=IRESOL) +CALL SET_RESOL(IRESOL) + + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'VORDIV_TO_UV:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('VORDIV_TO_UV:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSE + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPVOR TOO SHORT') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPDIV TOO SHORT') + ENDIF + IF(UBOUND(PSPU,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPU,1) < IF_UV ',UBOUND(PSPU,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPU TOO SHORT') + ENDIF + IF(UBOUND(PSPV,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPV,1) < IF_UV ',UBOUND(PSPV,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPV TOO SHORT') + ENDIF +ENDIF + + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +!CALL GSTATS(XXXX,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL VD2UV_CTL(IF_UV,PSPVOR,PSPDIV,PSPU,PSPV) + +CALL TRANS_RELEASE(IRESOL) +IF (LTMP_SETUP0) THEN + CALL TRANS_END() +ENDIF + +IF (LHOOK) CALL DR_HOOK('VORDIV_TO_UV',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE VORDIV_TO_UV + diff --git a/src/trans/cpu/internal/abort_trans_mod.F90 b/src/trans/cpu/internal/abort_trans_mod.F90 new file mode 100644 index 0000000..aee35f5 --- /dev/null +++ b/src/trans/cpu/internal/abort_trans_mod.F90 @@ -0,0 +1,39 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ABORT_TRANS_MOD +CONTAINS +SUBROUTINE ABORT_TRANS(CDTEXT) + +USE TPM_GEN , ONLY : NOUT,NERR +USE TPM_DISTR, ONLY : NPROC,MYPROC +USE MPL_MODULE, ONLY : MPL_ABORT +USE SDL_MOD, ONLY : SDL_TRACEBACK, SDL_SRLABORT + +IMPLICIT NONE + + +CHARACTER(LEN=*),INTENT(IN) :: CDTEXT + +WRITE(NOUT,'(1X,A)') 'ABORT_TRANS CALLED' + +WRITE(NOUT,'(1X,A)') CDTEXT +WRITE(NERR,'(1X,A,1X,I3,1X,A)') 'ABORT! ',MYPROC,CDTEXT +CLOSE(NOUT) +IF (NPROC > 1) THEN + CALL MPL_ABORT(CDTEXT) +ELSE + CALL SDL_TRACEBACK + CALL FLUSH(0) + CALL SDL_SRLABORT +ENDIF + +END SUBROUTINE ABORT_TRANS +END MODULE ABORT_TRANS_MOD diff --git a/src/trans/cpu/internal/asre1_mod.F90 b/src/trans/cpu/internal/asre1_mod.F90 new file mode 100644 index 0000000..bf60d1b --- /dev/null +++ b/src/trans/cpu/internal/asre1_mod.F90 @@ -0,0 +1,92 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1_MOD +CONTAINS +SUBROUTINE ASRE1(KM,KMLOC,KF_OUT_LT,PAOA1,PSOA1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_TRANS + +USE ASRE1B_MOD ,ONLY : ASRE1B + + +!**** *ASRE1* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. ASRE1B - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1 in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRB) , INTENT(IN) :: PSOA1(:,:), PAOA1(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFLDS + + +! ------------------------------------------------------------------ + +IFLDS = KF_OUT_LT + +CALL ASRE1B(IFLDS,KM,KMLOC,PAOA1,PSOA1) + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1 +END MODULE ASRE1_MOD diff --git a/src/trans/cpu/internal/asre1ad_mod.F90 b/src/trans/cpu/internal/asre1ad_mod.F90 new file mode 100644 index 0000000..7572314 --- /dev/null +++ b/src/trans/cpu/internal/asre1ad_mod.F90 @@ -0,0 +1,92 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1AD_MOD +CONTAINS +SUBROUTINE ASRE1AD(KM,KMLOC,KF_OUT_LT,PAOA1,PSOA1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_TRANS + +USE ASRE1BAD_MOD ,ONLY : ASRE1BAD + + +!**** *ASRE1AD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1AD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. ASRE1BAD - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1AD in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRB) , INTENT(OUT) :: PSOA1(:,:), PAOA1(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFLDS + + +! ------------------------------------------------------------------ + +IFLDS = KF_OUT_LT + +CALL ASRE1BAD(IFLDS,KM,KMLOC,PAOA1,PSOA1) + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1AD +END MODULE ASRE1AD_MOD diff --git a/src/trans/cpu/internal/asre1b_mod.F90 b/src/trans/cpu/internal/asre1b_mod.F90 new file mode 100644 index 0000000..b2d67ae --- /dev/null +++ b/src/trans/cpu/internal/asre1b_mod.F90 @@ -0,0 +1,107 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1B_MOD +CONTAINS +SUBROUTINE ASRE1B(KFIELD,KM,KMLOC,PAOA,PSOA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D + + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB), INTENT(IN) :: PSOA(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PAOA(:,:) + +! LOCAL INTEGERS +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS, IDGNH +INTEGER(KIND=JPIM) :: ISTAN(R%NDGNH),ISTAS(R%NDGNH) + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IDGNH = R%NDGNH + +!* 1.2 RECOMBINE + +DO JGL=ISL,IDGNH + IPROC = D%NPROCL(JGL) + ISTAN(JGL) = (D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*2*KFIELD + IGLS = R%NDGL+1-JGL + IPROCS = D%NPROCL(IGLS) + ISTAS(JGL) = (D%NSTAGT0B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*2*KFIELD +ENDDO + +DO JGL=ISL,IDGNH +!OCL NOVREC + DO JFLD=1,2*KFIELD + FOUBUF_IN(ISTAN(JGL)+JFLD) = PAOA(JFLD,JGL)+PSOA(JFLD,JGL) + FOUBUF_IN(ISTAS(JGL)+JFLD) = PSOA(JFLD,JGL)-PAOA(JFLD,JGL) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1B +END MODULE ASRE1B_MOD diff --git a/src/trans/cpu/internal/asre1bad_mod.F90 b/src/trans/cpu/internal/asre1bad_mod.F90 new file mode 100644 index 0000000..52ee7c0 --- /dev/null +++ b/src/trans/cpu/internal/asre1bad_mod.F90 @@ -0,0 +1,108 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1BAD_MOD +CONTAINS +SUBROUTINE ASRE1BAD(KFIELD,KM,KMLOC,PAOA,PSOA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D + + +!**** *ASRE1BAD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1BAD in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB), INTENT(OUT) :: PSOA(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PAOA(:,:) + +! LOCAL INTEGERS +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS, IDGNH +INTEGER(KIND=JPIM) :: ISTAN(R%NDGNH),ISTAS(R%NDGNH) + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IDGNH = R%NDGNH + +!* 1.2 RECOMBINE + +DO JGL=ISL,IDGNH + IPROC = D%NPROCL(JGL) + ISTAN(JGL) = (D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*2*KFIELD + IGLS = R%NDGL+1-JGL + IPROCS = D%NPROCL(IGLS) + ISTAS(JGL) = (D%NSTAGT0B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*2*KFIELD +ENDDO + +DO JGL=ISL,IDGNH +!OCL NOVREC + DO JFLD=1,2*KFIELD + PSOA(JFLD,JGL) = FOUBUF_IN(ISTAN(JGL)+JFLD)+FOUBUF_IN(ISTAS(JGL)+JFLD) + PAOA(JFLD,JGL) = FOUBUF_IN(ISTAN(JGL)+JFLD)-FOUBUF_IN(ISTAS(JGL)+JFLD) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1BAD +END MODULE ASRE1BAD_MOD + diff --git a/src/trans/cpu/internal/cdmap_mod.F90 b/src/trans/cpu/internal/cdmap_mod.F90 new file mode 100644 index 0000000..70b3ef8 --- /dev/null +++ b/src/trans/cpu/internal/cdmap_mod.F90 @@ -0,0 +1,178 @@ +! (C) Copyright 2014- ECMWF. +! (C) Copyright 2014- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE CDMAP_MOD +CONTAINS +SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& +& KFIELDS, PCOEFA, PCOEFS) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_FLT +USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +USE TPM_TRANS ,ONLY : FOUBUF_IN, FOUBUF +USE SEEFMM_MIX + +!**** *CDMAP* - REMAP ROOTS +! +! Purpose. +! -------- +! remap from one set of roots to another using Christoffel-Darboux formula, see Chien + Alpert, 1997. + +!** Interface. +! ---------- +! *CALL* *CDMAP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! Chien + Alpert, 1997. + +! Author. +! ------- +! Nils Wedi *ECMWF* + +! Modifications. +! -------------- +! Original : 14-05-14 +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KSL +INTEGER(KIND=JPIM), INTENT(IN) :: KSLO +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM +INTEGER(KIND=JPIM), INTENT(IN) :: KDIR ! direction of map +INTEGER(KIND=JPIM), INTENT(IN) :: KDGNH +INTEGER(KIND=JPIM), INTENT(IN) :: KDGNHD +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELDS +REAL(KIND=JPRB), INTENT(INOUT) :: PCOEFA(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PCOEFS(:,:) + +INTEGER(KIND=JPIM) :: JGL, IGL, JF +REAL(KIND=JPRB), ALLOCATABLE :: ZALL(:,:), ZQX(:,:) +REAL(KIND=JPRB), ALLOCATABLE :: ZALL1(:,:), ZQY(:,:) +INTEGER(KIND=JPIM) :: ISTN(KDGNH), ISTS(KDGNH) + +INTEGER(KIND=JPIM) :: IGLS, IPROC, IPROCS, IEND, IENDO + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('CDMAP_MOD',0,ZHOOK_HANDLE) + +IF( KDIR == -1 ) THEN + ! inverse map from internal (gg) roots to post-processing roots + + IENDO = 2*KDGNHD - KSLO + 1 + IEND = 2*KDGNH - KSL + 1 + + !!!!! fourier buffer setup in output latitudes, may not work if different from input !!!! + DO IGL=KSLO, KDGNHD + IPROC = D%NPROCL(IGL) + ISTN(IGL) = (D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,IGL))*KFIELDS + IGLS = 2*KDGNH+1-IGL + IPROCS = D%NPROCL(IGLS) + ISTS(IGL) = (D%NSTAGT0B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*KFIELDS + ENDDO + + ALLOCATE(ZALL(KFIELDS, 2*KDGNHD)) + ALLOCATE(ZALL1(KFIELDS, 2*KDGNHD)) + ALLOCATE(ZQX(KFIELDS, 2*KDGNH)) + ALLOCATE(ZQY(KFIELDS, 2*KDGNH)) + ZQX(:,1:KSL) = 0._JPRB + ZQX(:,IEND:2*KDGNH) = 0._JPRB + ZQY(:,1:KSL) = 0._JPRB + ZQY(:,IEND:2*KDGNH) = 0._JPRB + DO JGL=KSL, IEND + ZQX(1:KFIELDS,JGL)=S%FA(KMLOC)%RPNMWI(JGL-KSL+1,1)*PCOEFA(1:KFIELDS,JGL) + ZQY(1:KFIELDS,JGL)=S%FA(KMLOC)%RPNMWI(JGL-KSL+1,2)*PCOEFA(1:KFIELDS,JGL) + ENDDO + CALL SEEFMM_MULM(S%FMM_INTI,KFIELDS,1_JPIM,.TRUE.,ZQX,ZALL1) + CALL SEEFMM_MULM(S%FMM_INTI,KFIELDS,1_JPIM,.TRUE.,ZQY,ZALL) + DEALLOCATE(ZQX) + DEALLOCATE(ZQY) + ! minus sign comes from FMM ?! + ! fill buffer + DO IGL=KSLO,KDGNHD + IGLS = 2*KDGNHD+1-IGL + DO JF=1,KFIELDS + FOUBUF_IN(ISTN(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,1)*ZALL1(JF,IGL) & + & - S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,2)*ZALL(JF,IGL) + FOUBUF_IN(ISTS(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,1)*ZALL1(JF,IGLS) & + & - S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,2)*ZALL(JF,IGLS) + ENDDO + ENDDO + DEALLOCATE(ZALL1) + DEALLOCATE(ZALL) + +ELSE +! direct map from post-processing/input field roots to internal (gg) roots +! this assumes essentially a nearest neighbour interpolation in latitude +! a more accurate approach may be +! a local gridpoint interpolation of the input field to the target latitudes prior to the transforms + + IENDO = 2*KDGNHD - KSLO + 1 + IEND = 2*KDGNH - KSL + 1 + + !!!!! fourier buffer setup in input data latitudes, may not work if different from output !!!! + DO JGL=KSLO, KDGNHD + IPROC = D%NPROCL(JGL) + ISTN(JGL) = (D%NSTAGT1B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*KFIELDS + IGLS = 2*KDGNHD+1-JGL + IPROCS = D%NPROCL(IGLS) + ISTS(JGL) = (D%NSTAGT1B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*KFIELDS + ENDDO + + ALLOCATE( ZQX( KFIELDS, 2*KDGNHD)) + ZQX(:,1:KSLO) = 0._JPRB + ZQX(:,IENDO:2*KDGNHD) = 0._JPRB + DO JGL=KSLO, KDGNHD + IGLS = 2*KDGNHD+1-JGL + DO JF=1,KFIELDS + ZQX(JF,JGL)=FOUBUF(ISTN(JGL)+JF) + ZQX(JF,IGLS)=FOUBUF(ISTS(JGL)+JF) + ENDDO + ENDDO + + ! split into symmetric / antisymmetric + DO IGL=KSL,KDGNH + IGLS = 2*KDGNH+1-IGL + PCOEFS(1:KFIELDS,IGL) = ZQX(1:KFIELDS,IGL) + ZQX(1:KFIELDS,IGLS) + PCOEFA(1:KFIELDS,IGL) = ZQX(1:KFIELDS,IGL) - ZQX(1:KFIELDS,IGLS) + ENDDO + + DEALLOCATE(ZQX) + +ENDIF + +IF (LHOOK) CALL DR_HOOK('CDMAP_MOD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE CDMAP +END MODULE CDMAP_MOD diff --git a/src/trans/cpu/internal/cpledn_mod.F90 b/src/trans/cpu/internal/cpledn_mod.F90 new file mode 100644 index 0000000..9b60b18 --- /dev/null +++ b/src/trans/cpu/internal/cpledn_mod.F90 @@ -0,0 +1,134 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE CPLEDN_MOD +CONTAINS +SUBROUTINE CPLEDN(KN,KODD,PFN,PX,KFLAG,PW,PXN,PXMOD) + +!**** *CPLEDN* - Routine to perform a single Newton iteration step to find +! the zero of the ordinary Legendre polynomial of degree N + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *CPLEDN(KN,KDBLE,PX,KFLAG,PW,PXN,PXMOD)* + +! Explicit arguments : +! -------------------- +! KN : Degree of the Legendre polynomial (in) +! KODD : odd or even number of latitudes (in) +! PFN : Fourier coefficients of series expansion (in) +! for the ordinary Legendre polynomials +! PX : abcissa where the computations are performed (in) +! KFLAG : When KFLAG.EQ.1 computes the weights (in) +! PW : Weight of the quadrature at PXN (out) +! PXN : new abscissa (Newton iteration) (out) +! PXMOD : PXN-PX (out) + +! Implicit arguments : +! -------------------- +! None + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- +! None + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! Michel Rochas, 90-08-30 (Lobatto+cleaning) +! K. Yessad (Sep 2008): cleaning, improve comments. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KN +INTEGER(KIND=JPIM),INTENT(IN) :: KODD +REAL(KIND=JPRD),INTENT(IN) :: PFN(0:KN/2) +REAL(KIND=JPRD),INTENT(IN) :: PX +INTEGER(KIND=JPIM),INTENT(IN) :: KFLAG +REAL(KIND=JPRD),INTENT(OUT) :: PW +REAL(KIND=JPRD),INTENT(INOUT) :: PXN +REAL(KIND=JPRD),INTENT(OUT) :: PXMOD + +! ------------------------------------------------------------------ + +REAL(KIND=JPRD) :: ZDLX,ZDLK,ZDLLDN,ZDLXN,ZDLMOD + +INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(PX) + +INTEGER(KIND=JPIM) :: JN, IK + +! ----------------------------------------------------------------- + +!* 1. NEWTON ITERATION STEP. +! ---------------------- + +ZDLX = PX + +ZDLK = 0.0_JPRD +IF( KODD==0 ) ZDLK=0.5_JPRD*PFN(0) +ZDLXN = 0.0_JPRD +ZDLLDN = 0.0_JPRD +IK=1 + +IF(KFLAG == 0)THEN + DO JN=2-KODD,KN,2 + ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 + ZDLK = ZDLK + PFN(IK)*COS(REAL(JN,JPKD)*ZDLX) + ! normalised derivative == d/d\theta(\overbar{P_n}^0) + ZDLLDN = ZDLLDN - PFN(IK)*REAL(JN,JPKD)*SIN(REAL(JN,JPKD)*ZDLX) + IK=IK+1 + ENDDO + ! Newton method + ZDLMOD = -ZDLK/ZDLLDN + ZDLXN = ZDLX+ZDLMOD + PXN = ZDLXN + PXMOD = ZDLMOD +ENDIF + +! ------------------------------------------------------------------ + +!* 2. Computes weight. +! ---------------- + +IF(KFLAG == 1)THEN + DO JN=2-KODD,KN,2 + ! normalised derivative + ZDLLDN = ZDLLDN - PFN(IK)*REAL(JN,JPKD)*SIN(REAL(JN,JPKD)*ZDLX) + IK=IK+1 + ENDDO + PW = REAL(2*KN+1,JPKD)/ZDLLDN**2 +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE CPLEDN +END MODULE CPLEDN_MOD diff --git a/src/trans/cpu/internal/dealloc_resol_mod.F90 b/src/trans/cpu/internal/dealloc_resol_mod.F90 new file mode 100644 index 0000000..e247d3c --- /dev/null +++ b/src/trans/cpu/internal/dealloc_resol_mod.F90 @@ -0,0 +1,209 @@ +! (C) Copyright 2013- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DEALLOC_RESOL_MOD +CONTAINS +SUBROUTINE DEALLOC_RESOL(KRESOL) + +!**** *DEALLOC_RESOL* - Deallocations of a resolution + +! Purpose. +! -------- +! Release allocated arrays for a given resolution + +!** Interface. +! ---------- +! CALL DEALLOC_RESOL + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 from trans_end + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_GEN ,ONLY : LENABLED, NOUT,NDEF_RESOL +USE TPM_DISTR ,ONLY : D,NPRTRV +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_TERM +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW,DESTROY_PLANS_FFTW +#endif +USE TPM_FLT ,ONLY : S +USE TPM_CTL ,ONLY : C +USE SEEFMM_MIX + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM) :: JMLOC,IPRTRV,JSETV,IMLOC,IM,ILA,ILS, JRESOL + +! ------------------------------------------------------------------ + +IF (.NOT.LENABLED(KRESOL)) THEN + + WRITE(UNIT=NOUT,FMT='('' DEALLOC_RESOL WARNING : KRESOL = '',I3,'' ALREADY DISABLED '')') KRESOL + +ELSE + + CALL SET_RESOL(KRESOL) + + !TPM_FLT + IF( ALLOCATED(S%FA) ) THEN + DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IF( S%LUSEFLT .AND. ILA > S%ITHRESHOLD) THEN + ELSE + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMA)) DEALLOCATE(S%FA(IMLOC)%RPNMA) + ENDIF + IF( S%LUSEFLT .AND. ILS > S%ITHRESHOLD) THEN + ELSE + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMS)) DEALLOCATE(S%FA(IMLOC)%RPNMS) + ENDIF + IF(S%LDLL) THEN + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMWI)) DEALLOCATE(S%FA(IMLOC)%RPNMWI) + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMWO)) DEALLOCATE(S%FA(IMLOC)%RPNMWO) + ENDIF + ENDDO + ENDDO + DEALLOCATE(S%FA) + ENDIF + IF(S%LDLL) THEN + CALL FREE_SEEFMM(S%FMM_INTI) + IF(ASSOCIATED(S%FMM_INTI)) DEALLOCATE(S%FMM_INTI) + ENDIF + + !TPM_DISTR + IF(ALLOCATED(D%NFRSTLAT)) DEALLOCATE(D%NFRSTLAT) + IF(ALLOCATED(D%NLSTLAT)) DEALLOCATE(D%NLSTLAT) + IF(ALLOCATED(D%NPTRLAT)) DEALLOCATE(D%NPTRLAT) + IF(ALLOCATED(D%NPTRFRSTLAT)) DEALLOCATE(D%NPTRFRSTLAT) + IF(ALLOCATED(D%NPTRLSTLAT)) DEALLOCATE(D%NPTRLSTLAT) + IF(ALLOCATED(D%LSPLITLAT)) DEALLOCATE(D%LSPLITLAT) + IF(ALLOCATED(D%NSTA)) DEALLOCATE(D%NSTA) + IF(ALLOCATED(D%NONL)) DEALLOCATE(D%NONL) + IF(ALLOCATED(D%NGPTOTL)) DEALLOCATE(D%NGPTOTL) + IF(ALLOCATED(D%NPROCA_GP)) DEALLOCATE(D%NPROCA_GP) + + IF(D%LWEIGHTED_DISTR) THEN + IF(ALLOCATED(D%RWEIGHT)) DEALLOCATE(D%RWEIGHT) + ENDIF + + IF(ALLOCATED(D%MYMS)) DEALLOCATE(D%MYMS) + IF(ALLOCATED(D%NUMPP)) DEALLOCATE(D%NUMPP) + IF(ALLOCATED(D%NPOSSP)) DEALLOCATE(D%NPOSSP) + IF(ALLOCATED(D%NPROCM)) DEALLOCATE(D%NPROCM) + IF(ALLOCATED(D%NDIM0G)) DEALLOCATE(D%NDIM0G) + IF(ALLOCATED(D%NASM0)) DEALLOCATE(D%NASM0) + IF(ALLOCATED(D%NATM0)) DEALLOCATE(D%NATM0) + IF(ALLOCATED(D%NLATLS)) DEALLOCATE(D%NLATLS) + IF(ALLOCATED(D%NLATLE)) DEALLOCATE(D%NLATLE) + IF(ALLOCATED(D%NPMT)) DEALLOCATE(D%NPMT) + IF(ALLOCATED(D%NPMS)) DEALLOCATE(D%NPMS) + IF(ALLOCATED(D%NPMG)) DEALLOCATE(D%NPMG) + IF(ALLOCATED(D%NULTPP)) DEALLOCATE(D%NULTPP) + IF(ALLOCATED(D%NPROCL)) DEALLOCATE(D%NPROCL) + IF(ALLOCATED(D%NPTRLS)) DEALLOCATE(D%NPTRLS) + IF(ALLOCATED(D%NALLMS)) DEALLOCATE(D%NALLMS) + IF(ALLOCATED(D%NPTRMS)) DEALLOCATE(D%NPTRMS) + IF(ALLOCATED(D%NSTAGT0B)) DEALLOCATE(D%NSTAGT0B) + IF(ALLOCATED(D%NSTAGT1B)) DEALLOCATE(D%NSTAGT1B) + IF(ALLOCATED(D%NPNTGTB0)) DEALLOCATE(D%NPNTGTB0) + IF(ALLOCATED(D%NPNTGTB1)) DEALLOCATE(D%NPNTGTB1) + IF(ALLOCATED(D%NLTSFTB)) DEALLOCATE(D%NLTSFTB) + IF(ALLOCATED(D%NLTSGTB)) DEALLOCATE(D%NLTSGTB) + IF(ALLOCATED(D%MSTABF)) DEALLOCATE(D%MSTABF) + IF(ALLOCATED(D%NSTAGTF)) DEALLOCATE(D%NSTAGTF) + + !TPM_FFT + IF (.NOT.D%LCPNMONLY) THEN + IF( ASSOCIATED(T) ) THEN + IF( ALLOCATED(T%TRIGS) ) DEALLOCATE(T%TRIGS) + IF( ALLOCATED(T%NFAX) ) DEALLOCATE(T%NFAX) + IF( ALLOCATED(T%LUSEFFT992)) DEALLOCATE(T%LUSEFFT992) + ENDIF + IF( ASSOCIATED(TB) ) THEN + IF( T%LBLUESTEIN )THEN + CALL BLUESTEIN_TERM(TB) + T%LBLUESTEIN = .FALSE. + ENDIF + ENDIF + ENDIF + +#ifdef WITH_FFTW + !TPM_FFTW + IF( TW%LFFTW )THEN + CALL DESTROY_PLANS_FFTW + ENDIF +#endif + + + !TPM_FIELDS + IF(ALLOCATED(F%RMU)) DEALLOCATE(F%RMU) + IF(ALLOCATED(F%RW)) DEALLOCATE(F%RW) + IF(ALLOCATED(F%R1MU2)) DEALLOCATE(F%R1MU2) + IF(ALLOCATED(F%RACTHE)) DEALLOCATE(F%RACTHE) + IF(ALLOCATED(F%REPSNM)) DEALLOCATE(F%REPSNM) + IF(ALLOCATED(F%RN)) DEALLOCATE(F%RN) + IF(ALLOCATED(F%RLAPIN)) DEALLOCATE(F%RLAPIN) + IF(ALLOCATED(F%NLTN)) DEALLOCATE(F%NLTN) + IF( S%LKEEPRPNM ) THEN + IF(ALLOCATED(F%RPNM)) DEALLOCATE(F%RPNM) + ENDIF + IF( S%LDLL ) THEN + IF(ALLOCATED(F%RMU2)) DEALLOCATE(F%RMU2) + IF(ALLOCATED(F%RACTHE2)) DEALLOCATE(F%RACTHE2) + ENDIF + + !TPM_GEOMETRY + IF(ALLOCATED(G%NMEN)) DEALLOCATE(G%NMEN) + IF(ALLOCATED(G%NDGLU)) DEALLOCATE(G%NDGLU) + IF(ALLOCATED(G%NLOEN)) DEALLOCATE(G%NLOEN) + + LENABLED(KRESOL)=.FALSE. + NDEF_RESOL = COUNT(LENABLED) + ! Do not stay on a disabled resolution + DO JRESOL=1,SIZE(LENABLED) + IF (LENABLED(JRESOL)) THEN + CALL SET_RESOL(JRESOL) + EXIT + ENDIF + ENDDO + +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE DEALLOC_RESOL +END MODULE DEALLOC_RESOL_MOD diff --git a/src/trans/cpu/internal/dir_trans_ctl_mod.F90 b/src/trans/cpu/internal/dir_trans_ctl_mod.F90 new file mode 100644 index 0000000..ac2d666 --- /dev/null +++ b/src/trans/cpu/internal/dir_trans_ctl_mod.F90 @@ -0,0 +1,195 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *DIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTDIR_CTL_MOD ,ONLY : LTDIR_CTL +USE FTDIR_CTL_MOD ,ONLY : FTDIR_CTL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB + + +! ------------------------------------------------------------------ + +! Perform transform + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ENDIF + CALL LTDIR_CTL(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) + + ENDDO +ELSE + + ! No splitting of fields, transform done in one go + + CALL FTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL LTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIR_TRANS_CTL +END MODULE DIR_TRANS_CTL_MOD diff --git a/src/trans/cpu/internal/dir_trans_ctlad_mod.F90 b/src/trans/cpu/internal/dir_trans_ctlad_mod.F90 new file mode 100644 index 0000000..9b6ad79 --- /dev/null +++ b/src/trans/cpu/internal/dir_trans_ctlad_mod.F90 @@ -0,0 +1,194 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIR_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE DIR_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *DIR_TRANS_CTLAD* - Control routine for direct spectral transform-adj. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTLAD - control of Legendre transform +! FTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTDIR_CTLAD_MOD ,ONLY : LTDIR_CTLAD +USE FTDIR_CTLAD_MOD ,ONLY : FTDIR_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB + + +! ------------------------------------------------------------------ + +! Perform transform + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL LTDIR_CTLAD(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL LTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + + CALL FTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIR_TRANS_CTLAD +END MODULE DIR_TRANS_CTLAD_MOD diff --git a/src/trans/cpu/internal/dist_grid_32_ctl_mod.F90 b/src/trans/cpu/internal/dist_grid_32_ctl_mod.F90 new file mode 100644 index 0000000..967bc96 --- /dev/null +++ b/src/trans/cpu/internal/dist_grid_32_ctl_mod.F90 @@ -0,0 +1,262 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIST_GRID_32_CTL_MOD +CONTAINS +SUBROUTINE DIST_GRID_32_CTL(PGPG,KFDISTG,KPROMA,KFROM,PGP) + +!**** *DIST_GRID_32_CTL* - Distributing global gridpoint array to processors + +! Purpose. +! -------- +! Routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_32_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint output +! KFROM(:) - Processor responsible for distributing each field +! PGP(:,:,:) - Local spectral array + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPRM +USE MPL_MODULE + +USE TPM_DISTR +USE TPM_GEOMETRY + +USE SET2PE_MOD +USE ABORT_TRANS_MOD +USE EQ_REGIONS_MOD + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +REAL(KIND=JPRM) , INTENT(OUT) :: PGP(:,:,:) + +! Declaration of local variables + +REAL(KIND=JPRM) :: ZDUM(D%NGPTOTMX) +REAL(KIND=JPRM),ALLOCATABLE :: ZBUF(:,:,:),ZRCV2(:,:) +REAL(KIND=JPRM) :: ZRCV(D%NGPTOTMX,KFDISTG) +INTEGER(KIND=JPIM) :: JFLD,JB,JA,IGLOFF,IGL1,IGL2,IOFF,ILAST,ILOFF,ILENR +INTEGER(KIND=JPIM) :: JGL,JLON,ISND,ITAG,J,IRCV +INTEGER(KIND=JPIM) :: JKGLO,IEND,JROF,IBL,JROC +INTEGER(KIND=JPIM) :: ISENDREQ(NPROC,KFDISTG),ILEN(NPROC,KFDISTG) +INTEGER(KIND=JPIM) :: IFROM,IMYFIELDS,IFLD,IFLDSFROM(NPROC) +LOGICAL :: LLSAME + +! ------------------------------------------------------------------ + +! Copy for single PE + +IF(NPROC == 1) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,JFLD,IBL) = PGPG(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + +ELSEIF(KFDISTG>0) THEN + +! test if values in KFROM are all the same + LLSAME=.TRUE. + IFROM=KFROM(1) + DO JFLD=2,KFDISTG + IF(KFROM(JFLD) /= IFROM) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + + IMYFIELDS = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + CALL GSTATS(1663,0) + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX,IMYFIELDS,NPROC)) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!$OMP&PRIVATE(JFLD,JA,JB,ISND,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!$OMP&ILOFF,JGL,JLON) + DO JFLD=1,IMYFIELDS + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(ISND,JA,JB,0,0) + + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN(ISND,JFLD) = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + ZBUF(ILEN(ISND,JFLD)+JLON,JFLD,ISND) = & + & PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) + ENDDO + ILEN(ISND,JFLD) = ILEN(ISND,JFLD) + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1663,1) + + ! Message passing + CALL GSTATS_BARRIER(791) + CALL GSTATS(811,0) + ! Send + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + ITAG = MTAGDISTGP + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(:,:,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,1),& + &CDSTRING='DIST_GRID_32_CTL') + ENDDO + ENDIF + ELSE + IF(IMYFIELDS > 0) THEN + ITAG = MTAGDISTGP + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(:,:,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,1),& + &CDSTRING='DIST_GRID_32_CTL') + ENDDO + ENDIF + ENDIF + + ! Receive + + IF( LLSAME )THEN + IRCV = KFROM(1) + ITAG = MTAGDISTGP + CALL MPL_RECV(ZRCV,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_32_CTL:') + IF( ILENR /= D%NGPTOTMX*KFDISTG )THEN + CALL ABORT_TRANS(' DIST_GRID_32_CTL: INVALID RECEIVE MESSAGE LENGTH 1') + ENDIF + ELSE + IFLDSFROM(:)=0 + DO JFLD=1,KFDISTG + IFLDSFROM(KFROM(JFLD)) = IFLDSFROM(KFROM(JFLD))+1 + ENDDO + ITAG = MTAGDISTGP + DO JROC=1,NPROC + IF(IFLDSFROM(JROC) > 0 ) THEN + IRCV = JROC + ALLOCATE(ZRCV2(D%NGPTOTMX,IFLDSFROM(JROC))) + CALL MPL_RECV(ZRCV2,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_32_CTL:') + IF( ILENR /= D%NGPTOTMX*IFLDSFROM(JROC) )THEN + CALL ABORT_TRANS(' DIST_GRID_32_CTL: INVALID RECEIVE MESSAGE LENGTH 2') + ENDIF + IFLD = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == JROC) THEN + IFLD = IFLD+1 + ZRCV(1:D%NGPTOT,JFLD) = ZRCV2(1:D%NGPTOT,IFLD) + ENDIF + ENDDO + DEALLOCATE(ZRCV2) + ENDIF + ENDDO + ENDIF + +! Wait for send to complete + + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,1), & + & CDSTRING='DIST_GRID_32_CTL: WAIT 1') + ENDIF + ELSEIF(IMYFIELDS > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,1), & + & CDSTRING='DIST_GRID_32_CTL: WAIT 2') + ENDIF + CALL GSTATS(811,1) + CALL GSTATS_BARRIER2(791) + + CALL GSTATS(1663,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,JFLD,IBL) = ZRCV(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1663,1) + !Synchronize processors + CALL GSTATS(786,0) + CALL MPL_BARRIER(CDSTRING='DIST_GRID_32_CTL:') + CALL GSTATS(786,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_32_CTL +END MODULE DIST_GRID_32_CTL_MOD + + + + diff --git a/src/trans/cpu/internal/dist_grid_ctl_mod.F90 b/src/trans/cpu/internal/dist_grid_ctl_mod.F90 new file mode 100644 index 0000000..aef0d8b --- /dev/null +++ b/src/trans/cpu/internal/dist_grid_ctl_mod.F90 @@ -0,0 +1,280 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIST_GRID_CTL_MOD +CONTAINS +SUBROUTINE DIST_GRID_CTL(PGPG,KFDISTG,KPROMA,KFROM,PGP,KSORT) + +!**** *DIST_GRID_CTL* - Distributing global gridpoint array to processors + +! Purpose. +! -------- +! Routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint output +! KFROM(:) - Processor responsible for distributing each field +! PGP(:,:,:) - Local spectral array +! KSORT(:) - Add KSORT + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 +! P.Marguinaud : 2014-10-10 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +USE TPM_DISTR ,ONLY : D, MTAGDISTGP, NPRCIDS, MYPROC, NPROC +USE TPM_GEOMETRY ,ONLY : G + +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +! + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN), TARGET :: KSORT (:) + +! Declaration of local variables + +! SS/2018: Removed stack hogs + +!REAL(KIND=JPRB) :: ZDUM(D%NGPTOTMX) -- not used +REAL(KIND=JPRB),ALLOCATABLE :: ZBUF(:,:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZRCV(:,:) ! (D%NGPTOTMX,KFDISTG) +INTEGER(KIND=JPIM) :: JFLD,JB,JA,IGLOFF,IGL1,IGL2,IOFF,ILAST,ILOFF,ILENR +INTEGER(KIND=JPIM) :: JGL,JLON,ISND,ITAG,J,IRCV +INTEGER(KIND=JPIM) :: JKGLO,IEND,JROF,IBL,JROC +INTEGER(KIND=JPIM) :: ISENDREQ(NPROC,KFDISTG),ILEN(NPROC,KFDISTG) +INTEGER(KIND=JPIM) :: IFROM,IMYFIELDS,IFLD +INTEGER(KIND=JPIM), POINTER :: ISORT (:) +LOGICAL :: LLSAME + +! ------------------------------------------------------------------ + +IF (PRESENT (KSORT)) THEN + ISORT => KSORT +ELSE + ALLOCATE (ISORT (KFDISTG)) + DO JFLD = 1, KFDISTG + ISORT (JFLD) = JFLD + ENDDO +ENDIF + +! Copy for single PE + +IF(NPROC == 1) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,ISORT(JFLD),IBL) = PGPG(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + +ELSEIF(KFDISTG>0) THEN + +! test if values in KFROM are all the same + LLSAME=.TRUE. + IFROM=KFROM(1) + DO JFLD=2,KFDISTG + IF(KFROM(JFLD) /= IFROM) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + + IMYFIELDS = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + CALL GSTATS(1663,0) + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX,IMYFIELDS,NPROC)) + +!WARNING: COMMENTING OPENMP OUT AS TEMPORARY WORKAROUND FOR AMD-COMPILER +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!!$OMP&PRIVATE(JFLD,JA,JB,ISND,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!!$OMP&ILOFF,JGL,JLON) + DO JFLD=1,IMYFIELDS + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(ISND,JA,JB,0,0) + + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN(ISND,JFLD) = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + ZBUF(ILEN(ISND,JFLD)+JLON,JFLD,ISND) = & + & PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) + ENDDO + ILEN(ISND,JFLD) = ILEN(ISND,JFLD) + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1663,1) + + ! Message passing + CALL GSTATS_BARRIER(791) + CALL GSTATS(811,0) + ! Send + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + ITAG = MTAGDISTGP + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(:,:,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,1),& + &CDSTRING='DIST_GRID_CTL') + ENDDO + ENDIF + ELSE + IFLD = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IFLD = IFLD+1 + ITAG = MTAGDISTGP+JFLD + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(1:ILEN(JROC,IFLD),IFLD,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,JFLD),& + &CDSTRING='DIST_GRID_CTL') + ENDDO + ENDIF + ENDDO + ENDIF + + ! Receive + + ALLOCATE(ZRCV(D%NGPTOTMX,KFDISTG)) + + IF( LLSAME )THEN + IRCV = KFROM(1) + ITAG = MTAGDISTGP + CALL MPL_RECV(ZRCV,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_CTL:') + IF( ILENR /= D%NGPTOTMX*KFDISTG )THEN + CALL ABORT_TRANS(' DIST_GRID_CTL: INVALID RECEIVE MESSAGE LENGTH 1') + ENDIF + ELSE + DO JFLD=1,KFDISTG + IRCV = KFROM(JFLD) + ITAG = MTAGDISTGP+JFLD + CALL MPL_RECV(ZRCV(:,JFLD),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_CTL:') + IF( ILENR /= D%NGPTOT )THEN + CALL ABORT_TRANS(' DIST_GRID_CTL: INVALID RECEIVE MESSAGE LENGTH 2') + ENDIF + ENDDO + ENDIF + +! Wait for send to complete + + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,1), & + & CDSTRING='DIST_GRID_CTL: WAIT 1') + ENDIF + ELSE + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,JFLD), & + & CDSTRING='DIST_GRID_CTL: WAIT 2') + ENDIF + ENDDO + ENDIF + CALL GSTATS(811,1) + CALL GSTATS_BARRIER2(791) + + CALL GSTATS(1663,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,ISORT(JFLD),IBL) = ZRCV(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1663,1) + DEALLOCATE(ZRCV) + !Synchronize processors + CALL GSTATS(786,0) + CALL MPL_BARRIER(CDSTRING='DIST_GRID_CTL:') + CALL GSTATS(786,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +IF (.NOT. PRESENT (KSORT)) THEN + DEALLOCATE (ISORT) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_CTL +END MODULE DIST_GRID_CTL_MOD + + + + diff --git a/src/trans/cpu/internal/dist_spec_control_mod.F90 b/src/trans/cpu/internal/dist_spec_control_mod.F90 new file mode 100644 index 0000000..d447c84 --- /dev/null +++ b/src/trans/cpu/internal/dist_spec_control_mod.F90 @@ -0,0 +1,420 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIST_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2MX,KSPEC2G,KPOSSP,KDIM0G,KUMPP,KALLMS,KPTRMS,KN,KSORT) + +!**** *DIST_SPEC_CONTROL* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Routine for distributing spectral array + +!** Interface. +! ---------- +! CALL DIST_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array +! LDIM1_IS_FLD - .TRUE. if first dimension contains the fields +! KSMAX - Spectral truncation limit +! KSPEC2 - Local number of spectral coefficients +! KSPEC2MX - Maximum local number of spectral coefficients +! KSPEC2G - Global number of spectral coefficients +! KPOSSP - Position of local waves for each task +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KUMPP - Number of spectral waves on this a-set +! KALLMS - Wave numbers for all a-set concatenated together to give all wave numbers in a-set order +! KPTRMS - Pointer to the first wave number of a given a-set in kallms array. +! KN - Number of spectral coefficients for each m wave +! KSORT(:) - Re-order fields on output + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 +! P.Marguinaud : 2014-10-10 +! R. El Khatib 25-Jul-2019 Optimization by vectorization, proper non-blocking comms and overlapp send/recv with pack/unpack +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, MPL_WAITANY, JP_NON_BLOCKING_STANDARD +USE TPM_DISTR ,ONLY : MTAGDISTSP, MYSETV, MYSETW, NPRCIDS, NPRTRW, MYPROC, NPROC, NPRTRV, D +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +LOGICAL , INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2MX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KUMPP(NPRTRW) +INTEGER(KIND=JPIM) , INTENT(IN) :: KALLMS(KSMAX+1) +INTEGER(KIND=JPIM) , INTENT(IN) :: KPTRMS(NPRTRW) +INTEGER(KIND=JPIM) , INTENT(IN) :: KN(0:KSMAX) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN), TARGET :: KSORT (:) + +REAL(KIND=JPRB) :: ZSPEC(KSPEC2MX,COUNT(KVSET(:)==MYSETV)) +REAL(KIND=JPRB), ALLOCATABLE :: ZBUF(:,:,:) +INTEGER(KIND=JPIM) :: IASM0G(0:KSMAX) +INTEGER(KIND=JPIM) :: JM,JN,IFLDR,IFLD,JFLD,ITAG,JNM,ILEN(NPRTRW),JA,ISND(NPRTRV,NPRTRW), JB, IFLDOFF +INTEGER(KIND=JPIM) :: IRCV,ISTP(NPRTRW),ISENDREQ(NPROC), IREQRCV(NPROC), IPROC(NPROC), JMLOC, IFLDBUF, IFLDSPG, IPOSSP +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, IPOS0,ISENT, INR, IOFFPROC(NPROC+1), IFLDLOC(KFDISTG), IOFF, ILOCFLD(KFDISTG) +INTEGER(KIND=JPIM), POINTER :: ISORT (:) + +! ------------------------------------------------------------------ + + +! Compute help array for distribution + +IF (PRESENT (KSORT)) THEN + ISORT => KSORT +ELSE + ALLOCATE (ISORT (KFDISTG)) + DO JFLD = 1, KFDISTG + ISORT (JFLD) = JFLD + ENDDO +ENDIF + +DO JA=1,NPRTRW + ILEN(JA) = KPOSSP(JA+1)-KPOSSP(JA) + ISTP(JA) = KPOSSP(JA+1)-1 +ENDDO +DO JA=1,NPRTRW + DO JB=1,NPRTRV + CALL SET2PE(ISND(JB,JA),0,0,JA,JB) + ENDDO +ENDDO + +! Post receive +CALL GSTATS_BARRIER(790) +CALL GSTATS(812,0) +IRCV=0 +IOFFPROC(1)=0 +IF (ILEN(MYSETW) > 0) THEN + DO JA=1,NPRTRW + DO JB=1,NPRTRV + IF (ISND(JB,JA) /= MYPROC) THEN + ! count number of fields to receive from each task: + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KFROM(JFLD)==ISND(JB,JA)) THEN + IF (KVSET(JFLD)==MYSETV) THEN + IFLDR = IFLDR+1 + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + ITAG=MTAGDISTSP+ISND(JB,JA) + IRCV=IRCV+1 + CALL MPL_RECV(ZSPEC(:,IOFFPROC(IRCV)+1:IOFFPROC(IRCV)+IFLDR),KSOURCE=NPRCIDS(ISND(JB,JA)),KTAG=ITAG,& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQRCV(IRCV),& + & CDSTRING='DIST_SPEC_CONTROL:') + IPROC(IRCV)=ISND(JB,JA) + IOFFPROC(IRCV+1)=IOFFPROC(IRCV)+IFLDR + ENDIF + ENDIF + ENDDO + ENDDO +ENDIF +CALL GSTATS(812,1) + +!Distribute spectral array + +CALL GSTATS(1804,0) + +IASM0G(0)=1 +DO JM=1,KSMAX + IASM0G(JM)=IASM0G(JM-1)+KN(JM-1) +ENDDO + +CALL GSTATS(1804,1) + +ALLOCATE(ZBUF(KSPEC2MX,COUNT(KFROM(:)==MYPROC),NPRTRW)) +! The next lines ensure the large array zbuf is allocated right here and not inside an omp loop below, +! where an extra omp synchro might be needed : +IF (SIZE(ZBUF) > 0) THEN + ZBUF(LBOUND(ZBUF,DIM=1),LBOUND(ZBUF,DIM=2),LBOUND(ZBUF,DIM=3))=HUGE(1._JPRB) +ENDIF + +IF (LDIM1_IS_FLD) THEN + + ISENT=0 + DO JA=1,NPRTRW + IF (ILEN(JA) > 0) THEN + IFLDOFF=0 + DO JB=1,NPRTRV + IF (ISND(JB,JA) /= MYPROC) THEN + ! Locate received fields in source array : + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KFROM(JFLD)==MYPROC) THEN + IFLD = IFLD+1 + IF (KVSET(JFLD)==JB) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IFLDBUF,IFLDSPG,IPOSSP) + DO JFLD=1,IFLDR + IFLDBUF=IFLDOFF+JFLD + IFLDSPG=IFLDLOC(JFLD) + DO JMLOC=1,KUMPP(JA) + JM=KALLMS(KPTRMS(JA)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(JA)+1 + ZBUF(IPOSSP:IPOSSP+KN(JM)-1,IFLDBUF,JA) = PSPECG(IFLDSPG,IASM0G(JM):IASM0G(JM)+KN(JM)-1) + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + CALL GSTATS(812,0) + ISENT = ISENT+1 + ITAG = MTAGDISTSP+MYPROC + CALL MPL_SEND(ZBUF(:,IFLDOFF+1:IFLDOFF+IFLDR,JA),KDEST=NPRCIDS(ISND(JB,JA)),KTAG=ITAG,& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISENT),& + & CDSTRING='DIST_SPEC_CONTROL:') + IFLDOFF=IFLDOFF+IFLDR + CALL GSTATS(812,1) + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Myself: + IF (ILEN(MYSETW) > 0) THEN + ! Locate received fields in target and source arrays: + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KFROM(JFLD)==MYPROC) THEN + IFLD = IFLD+1 + IF (KVSET(JFLD)==MYSETV) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KVSET(JFLD)==MYSETV) THEN + IFLD = IFLD+1 + IF (KFROM(JFLD)==MYPROC) THEN + IFLDR = IFLDR+1 + ILOCFLD(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IFLDBUF,IFLDSPG,IPOSSP) + DO JFLD=1,IFLDR + IFLDBUF=ISORT(ILOCFLD(JFLD)) + IFLDSPG=IFLDLOC(JFLD) + DO JMLOC=1,KUMPP(MYSETW) + JM=KALLMS(KPTRMS(MYSETW)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(MYSETW)+1 + PSPEC(IFLDBUF,IPOSSP:IPOSSP+KN(JM)-1) = PSPECG(IFLDSPG,IASM0G(JM):IASM0G(JM)+KN(JM)-1) + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + ENDIF + ENDIF + + DO JA=1,IRCV + CALL GSTATS(812,0) + CALL MPL_WAITANY(KREQUEST=IREQRCV(1:IRCV),KINDEX=INR,CDSTRING='DIST_SPEC_CTL: WAIT FOR RECV') + CALL GSTATS(812,1) + ! Locate received fields in target array : + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KVSET(JFLD)==MYSETV) THEN + IFLD=IFLD+1 + IF (KFROM(JFLD)==IPROC(INR)) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD) + DO JFLD=1,IFLDR + PSPEC(ISORT(IFLDLOC(JFLD)),1:KSPEC2) = ZSPEC(1:KSPEC2,IOFFPROC(INR)+JFLD) + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + ENDIF + ENDDO + +ELSE + + ISENT=0 + DO JA=1,NPRTRW + IF (ILEN(JA) > 0) THEN + IFLDOFF=0 + DO JB=1,NPRTRV + IF (ISND(JB,JA) /= MYPROC) THEN + ! Locate received fields in source array : + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KFROM(JFLD)==MYPROC) THEN + IFLD = IFLD+1 + IF (KVSET(JFLD)==JB) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IFLDBUF,IFLDSPG,IPOSSP) + DO JFLD=1,IFLDR + IFLDBUF=IFLDOFF+JFLD + IFLDSPG=IFLDLOC(JFLD) + DO JMLOC=1,KUMPP(JA) + JM=KALLMS(KPTRMS(JA)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(JA)+1 + ZBUF(IPOSSP:IPOSSP+KN(JM)-1,IFLDBUF,JA) = PSPECG(IASM0G(JM):IASM0G(JM)+KN(JM)-1,IFLDSPG) + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + CALL GSTATS(812,0) + ISENT = ISENT+1 + ITAG = MTAGDISTSP+MYPROC + CALL MPL_SEND(ZBUF(:,IFLDOFF+1:IFLDOFF+IFLDR,JA),KDEST=NPRCIDS(ISND(JB,JA)),KTAG=ITAG,& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISENT),& + & CDSTRING='DIST_SPEC_CONTROL:') + IFLDOFF=IFLDOFF+IFLDR + CALL GSTATS(812,1) + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + ! Myself: + IF (ILEN(MYSETW) > 0) THEN + ! Locate received fields in target and source arrays: + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KFROM(JFLD)==MYPROC) THEN + IFLD = IFLD+1 + IF (KVSET(JFLD)==MYSETV) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KVSET(JFLD)==MYSETV) THEN + IFLD = IFLD+1 + IF (KFROM(JFLD)==MYPROC) THEN + IFLDR = IFLDR+1 + ILOCFLD(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IFLDBUF,IFLDSPG,IPOSSP) + DO JFLD=1,IFLDR + IFLDBUF=ISORT(ILOCFLD(JFLD)) + IFLDSPG=IFLDLOC(JFLD) + DO JMLOC=1,KUMPP(MYSETW) + JM=KALLMS(KPTRMS(MYSETW)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(MYSETW)+1 + PSPEC(IPOSSP:IPOSSP+KN(JM)-1,IFLDBUF) = PSPECG(IASM0G(JM):IASM0G(JM)+KN(JM)-1,IFLDSPG) + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + ENDIF + ENDIF + + + DO JA=1,IRCV + CALL GSTATS(812,0) + CALL MPL_WAITANY(KREQUEST=IREQRCV(1:IRCV),KINDEX=INR,CDSTRING='DIST_SPEC_CTL: WAIT FOR RECV') + CALL GSTATS(812,1) + ! Locate received fields in target array : + IFLD=0 + IFLDR=0 + DO JFLD=1,KFDISTG + IF (KVSET(JFLD)==MYSETV) THEN + IFLD=IFLD+1 + IF (KFROM(JFLD)==IPROC(INR)) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD) + DO JFLD=1,IFLDR + PSPEC(1:KSPEC2,ISORT(IFLDLOC(JFLD))) = ZSPEC(1:KSPEC2,IOFFPROC(INR)+JFLD) + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + ENDIF + ENDDO + +ENDIF + +CALL GSTATS(812,0) +DO JA=1,ISENT + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA),CDSTRING='DIST_SPEC_CTL: WAIT FOR SEND') +ENDDO +CALL GSTATS(812,1) + +CALL GSTATS_BARRIER2(790) + +IF (.NOT. PRESENT (KSORT)) THEN + DEALLOCATE (ISORT) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_SPEC_CONTROL +END MODULE DIST_SPEC_CONTROL_MOD diff --git a/src/trans/cpu/internal/eq_regions_mod.F90 b/src/trans/cpu/internal/eq_regions_mod.F90 new file mode 100644 index 0000000..c01c3fc --- /dev/null +++ b/src/trans/cpu/internal/eq_regions_mod.F90 @@ -0,0 +1,443 @@ +! (C) Copyright 2006- ECMWF. +! (C) Copyright 2006- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE eq_regions_mod +! +! Purpose. +! -------- +! eq_regions_mod provides the code to perform a high level +! partitioning of the surface of a sphere into regions of +! equal area and small diameter. +! the type. +! +! Background. +! ----------- +! This Fortran version of eq_regions is a much cut down version of the +! "Recursive Zonal Equal Area (EQ) Sphere Partitioning Toolbox" of the +! same name developed by Paul Leopardi at the University of New South Wales. +! This version has been coded specifically for the case of partitioning the +! surface of a sphere or S^dim (where dim=2) as denoted in the original code. +! Only a subset of the original eq_regions package has been coded to determine +! the high level distribution of regions on a sphere, as the detailed +! distribution of grid points to each region is left to IFS software. +! This is required to take into account the spatial distribution of grid +! points in an IFS gaussian grid and provide an optimal (i.e. exact) +! distribution of grid points over regions. +! +! The following copyright notice for the eq_regions package is included from +! the original MatLab release. +! +! +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! + Release 1.10 2005-06-26 + +! + + +! + Copyright (c) 2004, 2005, University of New South Wales + +! + + +! + Permission is hereby granted, free of charge, to any person obtaining + +! + a copy of this software and associated documentation files (the + +! + "Software"), to deal in the Software without restriction, including + +! + without limitation the rights to use, copy, modify, merge, publish, + +! + distribute, sublicense, and/or sell copies of the Software, and to + +! + permit persons to whom the Software is furnished to do so, subject to + +! + the following conditions: + +! + + +! + The above copyright notice and this permission notice shall be included + +! + in all copies or substantial portions of the Software. + +! + + +! + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + +! + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + +! + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + +! + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + +! + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + +! + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + +! + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +! + + +! +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +! Author. +! ------- +! George Mozdzynski *ECMWF* +! +! Modifications. +! -------------- +! Original : 2006-04-15 +! +!-------------------------------------------------------------------------------- +! +USE PARKIND1 ,ONLY : JPIM, JPRB + +IMPLICIT NONE + +SAVE + +PRIVATE + +PUBLIC eq_regions,l_regions_debug,n_regions_ns,n_regions_ew,n_regions,my_region_ns,my_region_ew +PUBLIC eq_regions_t, eq_regions_save, eq_regions_load, eq_regions_free + +real(kind=jprb) pi + +type eq_regions_t +logical :: l_regions_debug=.false. +integer(kind=jpim) :: n_regions_ns +integer(kind=jpim) :: n_regions_ew +integer(kind=jpim) :: my_region_ns +integer(kind=jpim) :: my_region_ew +integer(kind=jpim),pointer :: n_regions(:) => null () +end type eq_regions_t + +logical :: l_regions_debug=.false. +integer(kind=jpim) :: n_regions_ns +integer(kind=jpim) :: n_regions_ew +integer(kind=jpim) :: my_region_ns +integer(kind=jpim) :: my_region_ew +integer(kind=jpim),pointer :: n_regions(:) => null () + +CONTAINS + +subroutine eq_regions_save (yder) +type (eq_regions_t), intent (inout) :: yder + +yder%l_regions_debug = l_regions_debug +yder%n_regions_ns = n_regions_ns +yder%n_regions_ew = n_regions_ew +yder%my_region_ns = my_region_ns +yder%my_region_ew = my_region_ew +yder%n_regions => n_regions + +nullify (n_regions) + +end subroutine + +subroutine eq_regions_load (yder) +type (eq_regions_t), intent (inout) :: yder + +l_regions_debug = yder%l_regions_debug +n_regions_ns = yder%n_regions_ns +n_regions_ew = yder%n_regions_ew +my_region_ns = yder%my_region_ns +my_region_ew = yder%my_region_ew +n_regions => yder%n_regions + +nullify (yder%n_regions) + +end subroutine + +subroutine eq_regions_free (yder) +type (eq_regions_t), intent (inout) :: yder + +if (associated (yder%n_regions)) then + deallocate (yder%n_regions) + nullify (yder%n_regions) +endif + +end subroutine + +subroutine eq_regions(N) +! +! eq_regions uses the zonal equal area sphere partitioning algorithm to partition +! the surface of a sphere into N regions of equal area and small diameter. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +integer(kind=jpim) :: n_collars,j +real(kind=jprb),allocatable :: r_regions(:) +real(kind=jprb) :: c_polar + +pi=2.0_jprb*asin(1.0_jprb) + +n_regions(:)=0 + +if( N == 1 )then + + ! + ! We have only one region, which must be the whole sphere. + ! + n_regions(1)=1 + n_regions_ns=1 + +else + + ! + ! Given N, determine c_polar + ! the colatitude of the North polar spherical cap. + ! + c_polar = polar_colat(N) + ! + ! Given N, determine the ideal angle for spherical collars. + ! Based on N, this ideal angle, and c_polar, + ! determine n_collars, the number of collars between the polar caps. + ! + n_collars = num_collars(N,c_polar,ideal_collar_angle(N)) + n_regions_ns=n_collars+2 + ! + ! Given N, c_polar and n_collars, determine r_regions, + ! a list of the ideal real number of regions in each collar, + ! plus the polar caps. + ! The number of elements is n_collars+2. + ! r_regions[1] is 1. + ! r_regions[n_collars+2] is 1. + ! The sum of r_regions is N. + allocate(r_regions(n_collars+2)) + call ideal_region_list(N,c_polar,n_collars,r_regions) + ! + ! Given N and r_regions, determine n_regions, a list of the natural number + ! of regions in each collar and the polar caps. + ! This list is as close as possible to r_regions. + ! The number of elements is n_collars+2. + ! n_regions[1] is 1. + ! n_regions[n_collars+2] is 1. + ! The sum of n_regions is N. + ! + call round_to_naturals(N,n_collars,r_regions) + deallocate(r_regions) + if( N /= sum(n_regions(:)) )then + write(*,'("eq_regions: N=",I10," sum(n_regions(:))=",I10)')N,sum(n_regions(:)) + call abor1('eq_regions: N /= sum(n_regions)') + endif + +endif + +if( l_regions_debug )then + write(*,'("eq_regions: N=",I6," n_regions_ns=",I4)') N,n_regions_ns + do j=1,n_regions_ns + write(*,'("eq_regions: n_regions(",I4,")=",I4)') j,n_regions(j) + enddo +endif +n_regions_ew=maxval(n_regions(:)) + +return +end subroutine eq_regions + +function num_collars(N,c_polar,a_ideal) result(num_c) +! +!NUM_COLLARS The number of collars between the polar caps +! +! Given N, an ideal angle, and c_polar, +! determine n_collars, the number of collars between the polar caps. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=jprb),intent(in) :: a_ideal,c_polar +integer(kind=jpim) :: num_c +logical enough +enough = (N > 2) .and. (a_ideal > 0) +if( enough )then + num_c = max(1,nint((pi-2.*c_polar)/a_ideal)) +else + num_c = 0 +endif +return +end function num_collars + +subroutine ideal_region_list(N,c_polar,n_collars,r_regions) +! +!IDEAL_REGION_LIST The ideal real number of regions in each zone +! +! List the ideal real number of regions in each collar, plus the polar caps. +! +! Given N, c_polar and n_collars, determine r_regions, a list of the ideal real +! number of regions in each collar, plus the polar caps. +! The number of elements is n_collars+2. +! r_regions[1] is 1. +! r_regions[n_collars+2] is 1. +! The sum of r_regions is N. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N,n_collars +real(kind=jprb),intent(in) :: c_polar +real(kind=jprb),intent(out) :: r_regions(n_collars+2) +integer(kind=jpim) :: collar_n +real(kind=jprb) :: ideal_region_area,ideal_collar_area +real(kind=jprb) :: a_fitting +r_regions(:)=0.0_jprb +r_regions(1) = 1.0_jprb +if( n_collars > 0 )then + ! + ! Based on n_collars and c_polar, determine a_fitting, + ! the collar angle such that n_collars collars fit between the polar caps. + ! + a_fitting = (pi-2.0_jprb*c_polar)/float(n_collars) + ideal_region_area = area_of_ideal_region(N) + do collar_n=1,n_collars + ideal_collar_area = area_of_collar(c_polar+(collar_n-1)*a_fitting, & + & c_polar+collar_n*a_fitting) + r_regions(1+collar_n) = ideal_collar_area / ideal_region_area + enddo +endif +r_regions(2+n_collars) = 1. +return +end subroutine ideal_region_list + +function ideal_collar_angle(N) result(ideal) +! +! IDEAL_COLLAR_ANGLE The ideal angle for spherical collars of an EQ partition +! +! IDEAL_COLLAR_ANGLE(N) sets ANGLE to the ideal angle for the +! spherical collars of an EQ partition of the unit sphere S^2 into N regions. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=jprb) :: ideal +ideal = area_of_ideal_region(N)**(0.5_jprb) +return +end function ideal_collar_angle + +subroutine round_to_naturals(N,n_collars,r_regions) +! +! ROUND_TO_NATURALS Round off a given list of numbers of regions +! +! Given N and r_regions, determine n_regions, a list of the natural number +! of regions in each collar and the polar caps. +! This list is as close as possible to r_regions, using rounding. +! The number of elements is n_collars+2. +! n_regions[1] is 1. +! n_regions[n_collars+2] is 1. +! The sum of n_regions is N. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N,n_collars +real(kind=jprb),intent(in) :: r_regions(n_collars+2) +integer(kind=jpim) :: zone_n +real(kind=jprb) :: discrepancy +n_regions(1:n_collars+2) = r_regions(:) +discrepancy = 0.0_jprb +do zone_n = 1,n_collars+2 + n_regions(zone_n) = nint(r_regions(zone_n)+discrepancy); + discrepancy = discrepancy+r_regions(zone_n)-float(n_regions(zone_n)); +enddo +return +end subroutine round_to_naturals + +function polar_colat(N) result(polar_c) +! +! Given N, determine the colatitude of the North polar spherical cap. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=jprb) :: area +real(kind=jprb) :: polar_c +if( N == 1 ) polar_c=pi +if( N == 2 ) polar_c=pi/2.0_jprb +if( N > 2 )then + area=area_of_ideal_region(N) + polar_c=sradius_of_cap(area) +endif +return +end function polar_colat + +function area_of_ideal_region(N) result(area) +! +! AREA_OF_IDEAL_REGION(N) sets AREA to be the area of one of N equal +! area regions on S^2, that is 1/N times AREA_OF_SPHERE. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=jprb) :: area_of_sphere +real(kind=jprb) :: area +area_of_sphere = (2.0_jprb*pi**1.5_jprb/gamma(1.5_jprb)) +area = area_of_sphere/float(N) +return +end function area_of_ideal_region + +function sradius_of_cap(area) result(sradius) +! +! SRADIUS_OF_CAP(AREA) returns the spherical radius of +! an S^2 spherical cap of area AREA. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +real(kind=jprb),intent(in) :: area +real(kind=jprb) :: sradius +sradius = 2.0_jprb*asin(sqrt(area/pi)/2.0_jprb) +return +end function sradius_of_cap + +function area_of_collar(a_top, a_bot) result(area) +! +! AREA_OF_COLLAR Area of spherical collar +! +! AREA_OF_COLLAR(A_TOP, A_BOT) sets AREA to be the area of an S^2 spherical +! collar specified by A_TOP, A_BOT, where A_TOP is top (smaller) spherical radius, +! A_BOT is bottom (larger) spherical radius. +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +real(kind=jprb),intent(in) :: a_top,a_bot +real(kind=jprb) area +area = area_of_cap(a_bot) - area_of_cap(a_top) +return +end function area_of_collar + +function area_of_cap(s_cap) result(area) +! +! AREA_OF_CAP Area of spherical cap +! +! AREA_OF_CAP(S_CAP) sets AREA to be the area of an S^2 spherical +! cap of spherical radius S_CAP. +! +real(kind=jprb),intent(in) :: s_cap +real(kind=jprb) area +area = 4.0_jprb*pi * sin(s_cap/2.0_jprb)**2 +return +end function area_of_cap + +function gamma(x) result(gamma_res) +! +USE PARKIND1 ,ONLY : JPIM, JPRB +IMPLICIT NONE +real(kind=jprb),intent(in) :: x +real(kind=jprb) :: gamma_res +real(kind=jprb) :: p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13 +real(kind=jprb) :: w,y +integer(kind=jpim) :: k,n +parameter (& +& p0 = 0.999999999999999990e+00_jprb,& +& p1 = -0.422784335098466784e+00_jprb,& +& p2 = -0.233093736421782878e+00_jprb,& +& p3 = 0.191091101387638410e+00_jprb,& +& p4 = -0.024552490005641278e+00_jprb,& +& p5 = -0.017645244547851414e+00_jprb,& +& p6 = 0.008023273027855346e+00_jprb) +parameter (& +& p7 = -0.000804329819255744e+00_jprb,& +& p8 = -0.000360837876648255e+00_jprb,& +& p9 = 0.000145596568617526e+00_jprb,& +& p10 = -0.000017545539395205e+00_jprb,& +& p11 = -0.000002591225267689e+00_jprb,& +& p12 = 0.000001337767384067e+00_jprb,& +& p13 = -0.000000199542863674e+00_jprb) +n = nint(x - 2) +w = x - (n + 2) +y = ((((((((((((p13 * w + p12) * w + p11) * w + p10) *& +& w + p9) * w + p8) * w + p7) * w + p6) * w + p5) *& +& w + p4) * w + p3) * w + p2) * w + p1) * w + p0 +if (n .gt. 0) then + w = x - 1 + do k = 2, n + w = w * (x - k) + end do +else + w = 1 + do k = 0, -n - 1 + y = y * (x + k) + end do +end if +gamma_res = w / y +return +end function gamma + +END MODULE eq_regions_mod diff --git a/src/trans/cpu/internal/field_split_mod.F90 b/src/trans/cpu/internal/field_split_mod.F90 new file mode 100644 index 0000000..daa2282 --- /dev/null +++ b/src/trans/cpu/internal/field_split_mod.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FIELD_SPLIT_MOD +CONTAINS +SUBROUTINE FIELD_SPLIT(KBLK,KF_GP,KKF_UV_G,KVSETUV,KVSETSC,& + & KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G,& + & KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS) + +!**** *FIELD_SPLIT* - Split fields + +! Purpose. +! -------- +! Split fields + +!** Interface. +! ---------- +! CALL FIELD_SPLIT(...) + +! Explicit arguments : +! -------------------- +! KBLK - block number +! KF_GP - total number of output gridpoint fields +! KKF_UV_G - global number of spectral u-v fields +! KVSETUV - IVSETUV from SHUFFLE +! KVSETSC - IVSETUV from SHUFFLE + +! All the following output arguments are quantities for THIS packet. +! KSTUV_G - +! KENUV_G - +! KF_UV_G - +! KSTSC_G - +! KENSC_G - +! KF_SCALARS_G - +! KSTUV - +! KENUV - +! KF_UV - +! KSTSC - +! KENSC - +! KF_SCALARS - + +! Externals. NONE +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : MYSETV, NPRTRV +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KBLK,KF_GP,KKF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KVSETUV(:),KVSETSC(:) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS + +! Local variables + +INTEGER(KIND=JPIM) :: ISTF,IENF,J + +! ------------------------------------------------------------------ + +ISTF = (KBLK-1)*NPROMATR+1 +IENF = MIN(KBLK*NPROMATR,KF_GP) + +KSTUV_G = (KBLK-1)*NPROMATR/2+1 +KENUV_G = MIN(KBLK*NPROMATR/2,KKF_UV_G) +IF(ISTF > 2*KKF_UV_G) KSTUV_G = KENUV_G+1 +KF_UV_G = KENUV_G-KSTUV_G+1 +KSTSC_G = MAX(ISTF-2*KKF_UV_G,1) +KENSC_G = MAX(IENF-2*KKF_UV_G,0) +KF_SCALARS_G = KENSC_G-KSTSC_G+1 + +! Spectral fields distributed over fields + +IF(NPRTRV > 1) THEN + KF_UV = 0 + KSTUV = 1 + DO J=1,KSTUV_G-1 + IF(KVSETUV(J) == MYSETV) THEN + KSTUV = KSTUV+1 + ENDIF + ENDDO + KENUV = KSTUV-1 + DO J=KSTUV_G,KENUV_G + IF(KVSETUV(J) == MYSETV) THEN + KF_UV = KF_UV+1 + KENUV = KENUV+1 + ENDIF + ENDDO + KF_SCALARS = 0 + KSTSC = 1 + DO J=1,KSTSC_G-1 + IF(KVSETSC(J) == MYSETV) THEN + KSTSC =KSTSC+1 + ENDIF + ENDDO + KENSC = KSTSC-1 + DO J=KSTSC_G,KENSC_G + IF(KVSETSC(J) == MYSETV) THEN + KF_SCALARS = KF_SCALARS+1 + KENSC = KENSC+1 + ENDIF + ENDDO +ELSE + + ! Spectral fields not distributed over fields + + KF_UV = KF_UV_G + KSTUV = KSTUV_G + KENUV = KENUV_G + KF_SCALARS = KF_SCALARS_G + KSTSC = KSTSC_G + KENSC = KENSC_G +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE FIELD_SPLIT +END MODULE FIELD_SPLIT_MOD diff --git a/src/trans/cpu/internal/fourier_in_mod.F90 b/src/trans/cpu/internal/fourier_in_mod.F90 new file mode 100644 index 0000000..67f936a --- /dev/null +++ b/src/trans/cpu/internal/fourier_in_mod.F90 @@ -0,0 +1,74 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_IN_MOD +CONTAINS +SUBROUTINE FOURIER_IN(PREEL,KFIELDS,KGL) + +!**** *FOURIER_IN* - Copy fourier data from buffer to local array + +! Purpose. +! -------- +! Routine for copying fourier data from buffer to local array + +!** Interface. +! ---------- +! CALL FOURIER_IN(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL + +REAL(KIND=JPRB), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +DO JM=0,G%NMEN(IGLG) + IPROC = D%NPROCM(JM) + IR = 2*JM+1+D%NSTAGTF(KGL) + II = 2*JM+2+D%NSTAGTF(KGL) + ISTA = (D%NSTAGT0B(D%MSTABF(IPROC))+D%NPNTGTB0(JM,KGL))*2*KFIELDS + DO JF=1,KFIELDS + PREEL(JF,IR) = FOUBUF(ISTA+2*JF-1) + PREEL(JF,II) = FOUBUF(ISTA+2*JF ) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_IN +END MODULE FOURIER_IN_MOD + diff --git a/src/trans/cpu/internal/fourier_inad_mod.F90 b/src/trans/cpu/internal/fourier_inad_mod.F90 new file mode 100644 index 0000000..982e043 --- /dev/null +++ b/src/trans/cpu/internal/fourier_inad_mod.F90 @@ -0,0 +1,74 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_INAD_MOD +CONTAINS +SUBROUTINE FOURIER_INAD(PREEL,KFIELDS,KGL) + +!**** *FOURIER_INAD* - Copy fourier data from buffer to local array - adjoint + +! Purpose. +! -------- +! Routine for copying fourier data from buffer to local array + +!** Interface. +! ---------- +! CALL FOURIER_INAD(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL + +REAL(KIND=JPRB), INTENT(IN) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +DO JM=0,G%NMEN(IGLG) + IPROC = D%NPROCM(JM) + IR = 2*JM+1+D%NSTAGTF(KGL) + II = 2*JM+2+D%NSTAGTF(KGL) + ISTA = (D%NSTAGT0B(D%MSTABF(IPROC))+D%NPNTGTB0(JM,KGL))*2*KFIELDS + DO JF=1,KFIELDS + FOUBUF(ISTA+2*JF-1) = PREEL(JF,IR) + FOUBUF(ISTA+2*JF ) = PREEL(JF,II) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_INAD +END MODULE FOURIER_INAD_MOD + diff --git a/src/trans/cpu/internal/fourier_out_mod.F90 b/src/trans/cpu/internal/fourier_out_mod.F90 new file mode 100644 index 0000000..9e80bcf --- /dev/null +++ b/src/trans/cpu/internal/fourier_out_mod.F90 @@ -0,0 +1,73 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_OUT_MOD +CONTAINS +SUBROUTINE FOURIER_OUT(PREEL,KFIELDS,KGL) + +!**** *FOURIER_OUT* - Copy fourier data from local array to buffer + +! Purpose. +! -------- +! Routine for copying fourier data from local array to buffer + +!** Interface. +! ---------- +! CALL FOURIER_OUT(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +REAL(KIND=JPRB), INTENT(IN) :: PREEL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +DO JM=0,G%NMEN(IGLG) + IPROC = D%NPROCM(JM) + IR = 2*JM+1+D%NSTAGTF(KGL) + II = 2*JM+2+D%NSTAGTF(KGL) + ISTA = (D%NSTAGT1B(D%MSTABF(IPROC))+D%NPNTGTB0(JM,KGL))*2*KFIELDS + DO JF=1,KFIELDS + FOUBUF_IN(ISTA+2*JF-1) = PREEL(JF,IR) + FOUBUF_IN(ISTA+2*JF ) = PREEL(JF,II) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_OUT +END MODULE FOURIER_OUT_MOD + diff --git a/src/trans/cpu/internal/fourier_outad_mod.F90 b/src/trans/cpu/internal/fourier_outad_mod.F90 new file mode 100644 index 0000000..884c3dd --- /dev/null +++ b/src/trans/cpu/internal/fourier_outad_mod.F90 @@ -0,0 +1,73 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_OUTAD_MOD +CONTAINS +SUBROUTINE FOURIER_OUTAD(PREEL,KFIELDS,KGL) + +!**** *FOURIER_OUTAD* - Copy fourier data from local array to buffer - adjoint + +! Purpose. +! -------- +! Routine for copying fourier data from local array to buffer + +!** Interface. +! ---------- +! CALL FOURIER_OUTAD(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +REAL(KIND=JPRB), INTENT(OUT) :: PREEL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +DO JM=0,G%NMEN(IGLG) + IPROC = D%NPROCM(JM) + IR = 2*JM+1+D%NSTAGTF(KGL) + II = 2*JM+2+D%NSTAGTF(KGL) + ISTA = (D%NSTAGT1B(D%MSTABF(IPROC))+D%NPNTGTB0(JM,KGL))*2*KFIELDS + DO JF=1,KFIELDS + PREEL(JF,IR) = FOUBUF_IN(ISTA+2*JF-1) + PREEL(JF,II) = FOUBUF_IN(ISTA+2*JF ) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_OUTAD +END MODULE FOURIER_OUTAD_MOD + diff --git a/src/trans/cpu/internal/fsc_mod.F90 b/src/trans/cpu/internal/fsc_mod.F90 new file mode 100644 index 0000000..37ef4a0 --- /dev/null +++ b/src/trans/cpu/internal/fsc_mod.F90 @@ -0,0 +1,192 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FSC_MOD +CONTAINS +SUBROUTINE FSC(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSC - Division by a*cos(theta), east-west derivatives + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSC(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_TRANS ,ONLY : LUVDER, LATLON +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_FIELDS ,ONLY : F +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FLT ,ONLY: S +! + +IMPLICIT NONE +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PUVDERS(:,:) + +REAL(KIND=JPRB) :: ZACHTE,ZMUL, ZACHTE2, ZSHIFT, ZPI +REAL(KIND=JPRB) :: ZAMP, ZPHASE +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + + +INTEGER(KIND=JPIM) :: JLON,JF,IGLG,II,IR,JM + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ZACHTE = F%RACTHE(IGLG) +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) +ZACHTE2 = F%RACTHE(IGLG) + +IF( LATLON.AND.S%LDLL ) THEN + ZPI = 2.0_JPRB*ASIN(1.0_JPRB) + ZACHTE2 = 1._JPRB + ZACHTE = F%RACTHE2(IGLG) + + ! apply shift for (even) lat-lon output grid + IF( S%LSHIFTLL ) THEN + ZSHIFT = ZPI/REAL(G%NLOEN(IGLG),JPRB) + + DO JF=1,KF_SCALARS + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + + ! calculate amplitude and add phase shift then reconstruct A,B + ZAMP = SQRT(PSCALAR(JF,IR)**2 + PSCALAR(JF,II)**2) + ZPHASE = ATAN2(PSCALAR(JF,II),PSCALAR(JF,IR)) + REAL(JM,JPRB)*ZSHIFT + + PSCALAR(JF,IR) = ZAMP*COS(ZPHASE) + PSCALAR(JF,II) = ZAMP*SIN(ZPHASE) + ENDDO + ENDDO + IF(KF_SCDERS > 0)THEN + DO JF=1,KF_SCALARS + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ! calculate amplitude and phase shift and reconstruct A,B + ZAMP = SQRT(PNSDERS(JF,IR)**2 + PNSDERS(JF,II)**2) + ZPHASE = ATAN2(PNSDERS(JF,II),PNSDERS(JF,IR)) + REAL(JM,JPRB)*ZSHIFT + PNSDERS(JF,IR) = ZAMP*COS(ZPHASE) + PNSDERS(JF,II) = ZAMP*SIN(ZPHASE) + ENDDO + ENDDO + ENDIF + DO JF=1,2*KF_UV + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ! calculate amplitude and phase shift and reconstruct A,B + ZAMP = SQRT(PUV(JF,IR)**2 + PUV(JF,II)**2) + ZPHASE = ATAN2(PUV(JF,II),PUV(JF,IR)) + REAL(JM,JPRB)*ZSHIFT + PUV(JF,IR) = ZAMP*COS(ZPHASE) + PUV(JF,II) = ZAMP*SIN(ZPHASE) + ENDDO + ENDDO + ENDIF +ENDIF + + ! ------------------------------------------------------------------ + +!* 1. DIVIDE U V AND N-S DERIVATIVES BY A*COS(THETA) +! ---------------------------------------------- + + +!* 1.1 U AND V. + +IF(KF_UV > 0) THEN + DO JLON=ISTAGTF+1,ISTAGTF+2*(IMEN+1) + DO JF=1,2*KF_UV + PUV(JF,JLON) = PUV(JF,JLON)*ZACHTE2 + ENDDO + ENDDO +ENDIF + +!* 1.2 N-S DERIVATIVES + +IF(KF_SCDERS > 0)THEN + DO JLON=ISTAGTF+1,ISTAGTF+2*(IMEN+1) + DO JF=1,KF_SCALARS + PNSDERS(JF,JLON) = PNSDERS(JF,JLON)*ZACHTE2 + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ZMUL = ZACHTE*JM + DO JF=1,2*KF_UV + PUVDERS(JF,IR) = -PUV(JF,II)*ZMUL + PUVDERS(JF,II) = PUV(JF,IR)*ZMUL + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ZMUL = ZACHTE*JM + DO JF=1,KF_SCALARS + PEWDERS(JF,IR) = -PSCALAR(JF,II)*ZMUL + PEWDERS(JF,II) = PSCALAR(JF,IR)*ZMUL + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE FSC +END MODULE FSC_MOD diff --git a/src/trans/cpu/internal/fscad_mod.F90 b/src/trans/cpu/internal/fscad_mod.F90 new file mode 100644 index 0000000..390b4a3 --- /dev/null +++ b/src/trans/cpu/internal/fscad_mod.F90 @@ -0,0 +1,146 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FSCAD_MOD +CONTAINS +SUBROUTINE FSCAD(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSCAD - Division by a*cos(theta), east-west derivatives - adjoint + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSCAD(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_FIELDS ,ONLY : F +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PUVDERS(:,:) + +REAL(KIND=JPRB) :: ZACHTE,ZMUL +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + + +INTEGER(KIND=JPIM) :: JLON,JF,IGLG,II,IR,JM + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ZACHTE = F%RACTHE(IGLG) +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ZMUL = ZACHTE*JM + DO JF=1,2*KF_UV + PUV(JF,II) = PUV(JF,II) - PUVDERS(JF,IR)*ZMUL + PUV(JF,IR) = PUV(JF,IR) + PUVDERS(JF,II)*ZMUL +! PUVDERS(JF,IR) = _ZERO_ +! PUVDERS(JF,II) = _ZERO_ + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ZMUL = ZACHTE*JM + DO JF=1,KF_SCALARS + PSCALAR(JF,II) = PSCALAR(JF,II) - PEWDERS(JF,IR)*ZMUL + PSCALAR(JF,IR) = PSCALAR(JF,IR) + PEWDERS(JF,II)*ZMUL +! PEWDERS(JF,IR) = _ZERO_ +! PEWDERS(JF,II) = _ZERO_ + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V AND N-S DERIVATIVES BY A*COS(THETA) +! ---------------------------------------------- + + +!* 1.1 U AND V. + +IF(KF_UV > 0) THEN + DO JLON=ISTAGTF+1,ISTAGTF+2*(IMEN+1) + DO JF=1,2*KF_UV + PUV(JF,JLON) = PUV(JF,JLON)*ZACHTE + ENDDO + ENDDO +ENDIF + +!* 1.2 N-S DERIVATIVES + +IF(KF_SCDERS > 0)THEN + DO JLON=ISTAGTF+1,ISTAGTF+2*(IMEN+1) + DO JF=1,KF_SCALARS + PNSDERS(JF,JLON) = PNSDERS(JF,JLON)*ZACHTE + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE FSCAD +END MODULE FSCAD_MOD diff --git a/src/trans/cpu/internal/fspgl_int_mod.F90 b/src/trans/cpu/internal/fspgl_int_mod.F90 new file mode 100644 index 0000000..3592f4c --- /dev/null +++ b/src/trans/cpu/internal/fspgl_int_mod.F90 @@ -0,0 +1,111 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FSPGL_INT_MOD +CONTAINS +SUBROUTINE FSPGL_INT(KM,KMLOC,KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,& + & FSPGL_PROC,KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN, LDIVGP, LVORGP +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D +USE TPM_FIELDS ,ONLY : F +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT +EXTERNAL FSPGL_PROC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! +! ZFIELD 2nd dimension is extended from 0 to R%NDGL+1, while only 1 to R%NDGL +! is given from the north/south transforms, and only 1 to R%NDGL rows will be +! passed to the east/west transforms. +! the 2 extra rows are used inside the model Fourier space computations +! (outside the transform package - see FSPGLH in Arpege/IFS). +! +REAL(KIND=JPRB) :: ZFIELD(2*KF_OUT_LT,0:R%NDGL+1) + + +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS +INTEGER(KIND=JPIM) :: IPTRU,IST,J +INTEGER(KIND=JPIM) :: IDGNH,IDGL +INTEGER(KIND=JPIM) :: ISTAN(R%NDGNH),ISTAS(R%NDGNH) +INTEGER(KIND=JPIM) :: IFLDPTRUV(KF_UV),IFLDPTRSC(KF_SCALARS) +! ------------------------------------------------------------------ + +IF(PRESENT(KFLDPTRUV)) THEN + IFLDPTRUV(:) = KFLDPTRUV(1:KF_UV) + IFLDPTRSC(:) = KFLDPTRSC(1:KF_SCALARS) +ELSE + DO J=1,KF_UV + IFLDPTRUV(J) = J + ENDDO + DO J=1,KF_SCALARS + IFLDPTRSC(J) = J + ENDDO +ENDIF + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IDGNH = R%NDGNH +IDGL = R%NDGL +DO JGL=ISL,IDGNH + IPROC = D%NPROCL(JGL) + ISTAN(JGL) = (D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*2*KF_OUT_LT + IGLS = IDGL+1-JGL + IPROCS = D%NPROCL(IGLS) + ISTAS(JGL) = (D%NSTAGT0B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*2*KF_OUT_LT +ENDDO + +DO JGL=ISL,IDGNH + IGLS = IDGL+1-JGL + DO JFLD=1,2*KF_OUT_LT + ZFIELD(JFLD,JGL) = FOUBUF_IN(ISTAN(JGL)+JFLD) + ZFIELD(JFLD,IGLS) = FOUBUF_IN(ISTAS(JGL)+JFLD) + ENDDO +ENDDO + +IST = 1 +IF(LVORGP) THEN + IST = IST+2*KF_UV +ENDIF +IF(LDIVGP) THEN + IST = IST+2*KF_UV +ENDIF +IPTRU = IST + + + + +CALL FSPGL_PROC(KM,ISL,IDGL,KF_OUT_LT,F%R1MU2,ZFIELD,& + & IPTRU,KF_UV,KF_SCALARS,& + & IFLDPTRUV) + +DO JGL=ISL,IDGNH + IGLS = IDGL+1-JGL +!OCL NOVREC + DO JFLD=1,2*KF_OUT_LT + FOUBUF_IN(ISTAN(JGL)+JFLD) = ZFIELD(JFLD,JGL) + FOUBUF_IN(ISTAS(JGL)+JFLD) = ZFIELD(JFLD,IGLS) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FSPGL_INT +END MODULE FSPGL_INT_MOD diff --git a/src/trans/cpu/internal/ftdir_ctl_mod.F90 b/src/trans/cpu/internal/ftdir_ctl_mod.F90 new file mode 100644 index 0000000..029b70a --- /dev/null +++ b/src/trans/cpu/internal/ftdir_ctl_mod.F90 @@ -0,0 +1,196 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_CTL_MOD +CONTAINS +SUBROUTINE FTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTDIR_CTL - Direct Fourier transform control + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTL(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE TRGTOL_MOD ,ONLY : TRGTOL +USE FOURIER_OUT_MOD ,ONLY : FOURIER_OUT +USE FTDIR_MOD ,ONLY : FTDIR +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! Local variables +REAL(KIND=JPRB) :: ZGTF(KF_FS,D%NLENGTF) + +INTEGER(KIND=JPIM) :: IST,JGL,IGL,IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +! Transposition + +CALL GSTATS(158,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(158,1) +CALL GSTATS(106,0) + +! Fourier transform + +IBLEN=D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1640,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=IBEG,IEND,IINC + IGL = JGL + IF(KF_FS>0) THEN + CALL FTDIR(ZGTF,KF_FS,IGL) + ENDIF + +! Save Fourier data in FOUBUF_IN + + CALL FOURIER_OUT(ZGTF,KF_FS,IGL) +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1640,1) +CALL GSTATS(106,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIR_CTL +END MODULE FTDIR_CTL_MOD + + + diff --git a/src/trans/cpu/internal/ftdir_ctlad_mod.F90 b/src/trans/cpu/internal/ftdir_ctlad_mod.F90 new file mode 100644 index 0000000..d53f1eb --- /dev/null +++ b/src/trans/cpu/internal/ftdir_ctlad_mod.F90 @@ -0,0 +1,187 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE FTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTDIR_CTLAD - Direct Fourier transform control - adjoint + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTLAD(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE TRLTOG_MOD ,ONLY : TRLTOG +USE FOURIER_OUTAD_MOD ,ONLY : FOURIER_OUTAD +USE FTDIRAD_MOD ,ONLY : FTDIRAD +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP2(:,:,:) + +! Local variables +REAL(KIND=JPRB) :: ZGTF(KF_FS,D%NLENGTF) + + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: JGL,IGL +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +CALL GSTATS(133,0) + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1642,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=IBEG,IEND,IINC + IGL = JGL + CALL FOURIER_OUTAD(ZGTF,KF_FS,IGL) + +! Fourier transform + + IF(KF_FS>0) THEN + CALL FTDIRAD(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1642,1) +CALL GSTATS(133,1) + +! Transposition + +CALL GSTATS(183,0) +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) + +CALL GSTATS(183,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIR_CTLAD +END MODULE FTDIR_CTLAD_MOD + + + diff --git a/src/trans/cpu/internal/ftdir_mod.F90 b/src/trans/cpu/internal/ftdir_mod.F90 new file mode 100644 index 0000000..2b5ec35 --- /dev/null +++ b/src/trans/cpu/internal/ftdir_mod.F90 @@ -0,0 +1,124 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_MOD +CONTAINS +SUBROUTINE FTDIR(PREEL,KFIELDS,KGL) + + +!**** *FTDIR - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPIB, JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE TPM_DIM ,ONLY : R +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,IST1 +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +! ------------------------------------------------------------------ + +ITYPE=-1 +IJUMP= 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = G%NLOEN(IGLG)+R%NNOEXTZL+3-IST + +IF (G%NLOEN(IGLG)>1) THEN + IOFF=D%NSTAGTF(KGL)+1 + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + +#ifdef WITH_FFTW + IF( .NOT. TW%LFFTW )THEN +#endif + + IF( T%LUSEFFT992(KGL) )THEN + + CALL FFT992(PREEL(:,IOFF:),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,IRLEN,KFIELDS,ITYPE) + + ELSE + + CALL BLUESTEIN_FFT(TB,IRLEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + DO JJ=1,ICLEN + DO JF=1,KFIELDS + PREEL(JF,IOFF+JJ-1)=PREEL(JF,IOFF+JJ-1)/REAL(IRLEN,JPRB) + ENDDO + ENDDO + + ENDIF + +#ifdef WITH_FFTW + ELSE + + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + + ENDIF +#endif + +ENDIF + +IST1=1 +IF (G%NLOEN(IGLG)==1) IST1=0 +DO JJ=IST1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+D%NSTAGTF(KGL)+JJ-1) = 0.0_JPRB + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIR +END MODULE FTDIR_MOD diff --git a/src/trans/cpu/internal/ftdirad_mod.F90 b/src/trans/cpu/internal/ftdirad_mod.F90 new file mode 100644 index 0000000..e3fef0f --- /dev/null +++ b/src/trans/cpu/internal/ftdirad_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIRAD_MOD +CONTAINS +SUBROUTINE FTDIRAD(PREEL,KFIELDS,KGL) + + +!**** *FTDIRAD - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform - adjoint +! -------- + +!** Interface. +! ---------- +! CALL FTDIRAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE TPM_DIM ,ONLY : R + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +REAL(KIND=JPRB) :: ZMUL +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +! ------------------------------------------------------------------ + +ITYPE = 1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IST = 2*(G%NMEN(IGLG)+1)+1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 +IRLEN = ILOEN +ICLEN = (IRLEN/2+1)*2 + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO + +#ifdef WITH_FFTW +IF( .NOT. TW%LFFTW )THEN +#endif + + IF( T%LUSEFFT992(KGL) )THEN + + CALL FFT992(PREEL(:,IOFF:),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + + ELSE + + CALL BLUESTEIN_FFT(TB,ILOEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + + ENDIF + +#ifdef WITH_FFTW +ELSE + + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + +ENDIF +#endif + + ! Change of metric (not in forward routine) + +ZMUL = 1.0_JPRB/ILOEN +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = PREEL(JF,IOFF-1+JJ)*ZMUL + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIRAD +END MODULE FTDIRAD_MOD diff --git a/src/trans/cpu/internal/ftinv_ctl_mod.F90 b/src/trans/cpu/internal/ftinv_ctl_mod.F90 new file mode 100644 index 0000000..0dac3cf --- /dev/null +++ b/src/trans/cpu/internal/ftinv_ctl_mod.F90 @@ -0,0 +1,297 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_CTL_MOD +CONTAINS +SUBROUTINE FTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTINV_CTL - Inverse Fourier transform control + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV_CTL(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP,LATLON +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC +USE TPM_FLT ,ONLY : S +USE FOURIER_IN_MOD ,ONLY : FOURIER_IN +USE FSC_MOD ,ONLY : FSC +USE FTINV_MOD ,ONLY : FTINV +USE TRLTOG_MOD ,ONLY : TRLTOG +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) +#if 0 +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) ! Reducing stack usage here, too +#else +REAL(KIND=JPRB),TARGET,ALLOCATABLE :: ZDUM(:,:) ! When using this (HEAP) alloc Cray CCE 8.6.2 fails in OMP 1639 +#endif +!REAL(KIND=JPRB),TARGET :: ZGTF(KF_FS,D%NLENGTF) ! A stack hog ? +REAL(KIND=JPRB),TARGET,ALLOCATABLE :: ZGTF(:,:) ! (KF_FS,D%NLENGTF) + +ALLOCATE(ZGTF(KF_FS,D%NLENGTF)) +! Certain compilers allocate arrays at the moment they start to be used, not at the moment the user +! allocates them. This is a problem if that moment is an open-mp loop because it would trigger +! an omp barrier to let the array be allocated by the master thread if the array is shared (which +! is the case here for zgtf). +! Therefore the next line ensures zgtf is really allocated here, not inside the omp loop. REK +IF (KF_FS > 0 .AND. D%NLENGTF > 0) ZGTF(1,1)=0._JPRB + +#if 1 +ALLOCATE(ZDUM(1,D%NLENGTF)) +#endif + +ZUV => ZDUM +ZSCALAR => ZDUM +ZNSDERS => ZDUM +ZEWDERS => ZDUM +ZUVDERS => ZDUM + +! ------------------------------------------------------------------ + +! 1. Copy Fourier data to local array + +CALL GSTATS(107,0) + +IF (KF_UV > 0 .OR. KF_SCDERS > 0 .OR. (LATLON.AND.S%LDLL) ) THEN + IST = 1 + IF (LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF (LDIVGP) THEN + IST = IST+KF_UV + ENDIF + IF (KF_UV>0) ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + IF (KF_SCALARS>0) ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + IF (KF_SCDERS>0) ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF (LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ENDIF + IF (KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ENDIF +ENDIF + +IF (MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1639,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=IBEG,IEND,IINC + IGL = JGL + CALL FOURIER_IN(ZGTF,KF_OUT_LT,IGL) + +! 2. Fourier space computations + + IF (KF_UV > 0 .OR. KF_SCDERS > 0 .OR. (LATLON.AND.S%LDLL) ) THEN + CALL FSC(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 3. Fourier transform + + IF (KF_FS > 0) THEN + CALL FTINV(ZGTF,KF_FS,IGL) ! Watch out failures here (Cray CCE 8.6.2 ? Intel 18.0.1 ?) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1639,1) + +NULLIFY(ZUV) +NULLIFY(ZSCALAR) +NULLIFY(ZNSDERS) +NULLIFY(ZUVDERS) +NULLIFY(ZEWDERS) +#if 1 +DEALLOCATE(ZDUM) +#endif + +CALL GSTATS(107,1) + +! 4. Transposition + +IF (PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:)=-1 +IF (PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF (PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF (PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF (LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF (PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF (LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF (IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTL_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF (KF_UV_G > 0) THEN + IF (LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF ( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF (KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF (LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF (KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF (KF_SCALARS_G > 0) THEN + IF (LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(157,0) +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(157,1) + +! ------------------------------------------------------------------ + +DEALLOCATE(ZGTF) + +END SUBROUTINE FTINV_CTL +END MODULE FTINV_CTL_MOD diff --git a/src/trans/cpu/internal/ftinv_ctlad_mod.F90 b/src/trans/cpu/internal/ftinv_ctlad_mod.F90 new file mode 100644 index 0000000..86ec2ba --- /dev/null +++ b/src/trans/cpu/internal/ftinv_ctlad_mod.F90 @@ -0,0 +1,305 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_CTLAD_MOD +CONTAINS +SUBROUTINE FTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTINV_CTLAD - Inverse Fourier transform control - adjoint + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV_CTLAD(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE FOURIER_INAD_MOD ,ONLY : FOURIER_INAD +USE FSCAD_MOD ,ONLY : FSCAD +USE FTINVAD_MOD ,ONLY : FTINVAD +USE TRGTOL_MOD ,ONLY : TRGTOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRB),TARGET :: ZGTF(KF_FS,D%NLENGTF) +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST,IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +! ------------------------------------------------------------------ + +! 4. Transposition + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF + +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV_CTLAD:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTLAD_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(182,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(182,1) + +! 3. Fourier transform + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + IF(KF_UV>0) THEN + ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + ELSE + ZUV => ZDUM(1:1,:) + ENDIF + IST = IST+2*KF_UV + IF(KF_SCALARS>0) THEN + ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + ELSE + ZSCALAR => ZDUM(1:1,:) + ENDIF + IST = IST+KF_SCALARS + IF(KF_SCDERS>0) THEN + ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZNSDERS => ZDUM(1:1,:) + ENDIF + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +CALL GSTATS(132,0) + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1641,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=IBEG,IEND,IINC + IGL = JGL + IF(KF_FS > 0) THEN + CALL FTINVAD(ZGTF,KF_FS,IGL) + ENDIF + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL FSCAD(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 1. Copy Fourier data to local array + + CALL FOURIER_INAD(ZGTF,KF_OUT_LT,IGL) + +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1641,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF + +CALL GSTATS(132,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE FTINV_CTLAD +END MODULE FTINV_CTLAD_MOD + + + diff --git a/src/trans/cpu/internal/ftinv_mod.F90 b/src/trans/cpu/internal/ftinv_mod.F90 new file mode 100644 index 0000000..da7dcbd --- /dev/null +++ b/src/trans/cpu/internal/ftinv_mod.F90 @@ -0,0 +1,116 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_MOD +CONTAINS +SUBROUTINE FTINV(PREEL,KFIELDS,KGL) + +!**** *FTINV - Inverse Fourier transform + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 : 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE TPM_DIM ,ONLY : R + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,IST1 +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE, IFLD +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +! ------------------------------------------------------------------ + +ITYPE = 1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = G%NLOEN(IGLG)+R%NNOEXTZL+3-IST +IST1=1 +IF (G%NLOEN(IGLG)==1) IST1=0 + +DO JJ=IST1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+D%NSTAGTF(KGL)+JJ-1) = 0.0_JPRB + ENDDO +ENDDO + +IF (G%NLOEN(IGLG)>1) THEN + IOFF=D%NSTAGTF(KGL)+1 + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + +#ifdef WITH_FFTW + IF( .NOT. TW%LFFTW )THEN +#endif + + IF( T%LUSEFFT992(KGL) )THEN + + CALL FFT992(PREEL(:,IOFF:),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,IRLEN,KFIELDS,ITYPE) + + ELSE + + CALL BLUESTEIN_FFT(TB,IRLEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + + ENDIF + +#ifdef WITH_FFTW + ELSE + + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + + ENDIF +#endif + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE FTINV +END MODULE FTINV_MOD diff --git a/src/trans/cpu/internal/ftinvad_mod.F90 b/src/trans/cpu/internal/ftinvad_mod.F90 new file mode 100644 index 0000000..5ffac0e --- /dev/null +++ b/src/trans/cpu/internal/ftinvad_mod.F90 @@ -0,0 +1,126 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINVAD_MOD +CONTAINS +SUBROUTINE FTINVAD(PREEL,KFIELDS,KGL) + + +!**** *FTINVAD - Inverse Fourier transform - adjoint + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL FTINVAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPIB, JPRB + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_FFT +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +#endif +USE TPM_DIM ,ONLY : R +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +! ------------------------------------------------------------------ + +ITYPE =-1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 +IRLEN = ILOEN +ICLEN = (IRLEN/2+1)*2 + + ! Change of metric (not in forward routine) +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = PREEL(JF,IOFF-1+JJ)*ILOEN + ENDDO +ENDDO + +#ifdef WITH_FFTW +IF( .NOT. TW%LFFTW )THEN +#endif + + IF( T%LUSEFFT992(KGL) )THEN + + CALL FFT992(PREEL(:,IOFF:),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + + ELSE + + CALL BLUESTEIN_FFT(TB,ILOEN,ITYPE,KFIELDS,PREEL(1:KFIELDS,IOFF:IOFF+ICLEN-1)) + DO JJ=1,ICLEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ)=PREEL(JF,IOFF-1+JJ)/REAL(ILOEN,JPRB) + ENDDO + ENDDO + + ENDIF + +#ifdef WITH_FFTW +ELSE + + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + +ENDIF +#endif + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FTINVAD +END MODULE FTINVAD_MOD diff --git a/src/trans/cpu/internal/gath_grid_32_ctl_mod.F90 b/src/trans/cpu/internal/gath_grid_32_ctl_mod.F90 new file mode 100644 index 0000000..88c0edd --- /dev/null +++ b/src/trans/cpu/internal/gath_grid_32_ctl_mod.F90 @@ -0,0 +1,278 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GATH_GRID_32_CTL_MOD +CONTAINS +SUBROUTINE GATH_GRID_32_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) + +!**** *GATH_GRID_32_CTL* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Routine for gathering gridpoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_32_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! PGP(:,:,:) - Local spectral array +! +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRM +USE MPL_MODULE + +USE TPM_GEN +USE TPM_DIM +USE TPM_GEOMETRY +USE TPM_DISTR + +USE SET2PE_MOD +USE EQ_REGIONS_MOD + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +REAL(KIND=JPRM) , INTENT(IN) :: PGP(:,:,:) + +! Declaration of local variables + +REAL(KIND=JPRM) :: ZFLD(D%NGPTOTMX*KFGATHG),ZDUM(D%NGPTOTMX) +REAL(KIND=JPRM),ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM) :: IFLDR,JFLD,ITAG,ILEN,JA,JB,ISND,JGL,JLON,ILOFF,ILENB,IST +INTEGER(KIND=JPIM) :: IRCV,IOFF,ILAST,IGL1,IGL2,IGLOFF +INTEGER(KIND=JPIM) :: JKGLO,JROF,IEND,J,IBL,IPROC,JROC,IMYFIELDS,ILRECV +INTEGER(KIND=JPIM) :: ISENDREQ(NPROC),IOUNT,ITO +INTEGER(KIND=JPIM) :: ILENS(NPROC),IOFFS(NPROC),ILENR(NPROC),IOFFR(NPROC) +INTEGER(KIND=JPIM) :: IFLDL,IFLDS +LOGICAL :: LLSAME +! ------------------------------------------------------------------ + + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFGATHG + DO JROF=1,IEND + PGPG(IOFF+JROF,JFLD) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1643,1) + +ELSE +! test if values in KTO are all the same + LLSAME=.TRUE. + ITO=KTO(1) + DO JFLD=2,KFGATHG + IF(KTO(JFLD) /= ITO) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + IFLDL=D%NGPTOTMX + IF(LLSAME) THEN + CALL GSTATS(1643,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JFLD=1,KFGATHG + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+(JFLD-1)*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1643,1) + ELSE + ILENS(:)=0 + IOFFS(:)=0 + ILENR(:)=0 + IOFFR(:)=0 + DO JFLD=1,KFGATHG + ILENS(KTO(JFLD))=ILENS(KTO(JFLD))+IFLDL + IF(KTO(JFLD) == MYPROC) THEN + ILENR(:)=ILENR(:)+IFLDL + ENDIF + ENDDO + DO JROC=2,NPROC + IOFFR(JROC)=IOFFR(JROC-1)+ ILENR(JROC-1) + IOFFS(JROC)=IOFFS(JROC-1)+ ILENS(JROC-1) + ENDDO + IFLDS=0 + DO JROC=1,NPROC + IF(ILENS(JROC) > 0) THEN + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == JROC) THEN + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+IFLDS*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + IFLDS=IFLDS+1 + ENDIF + ENDDO + ENDIF + ENDDO + ENDIF + + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX*IMYFIELDS*NPROC)) + ELSE + ALLOCATE(ZBUF(1)) + ENDIF + IFLDR = 0 + CALL GSTATS_BARRIER(789) + CALL GSTATS(809,0) + + IF( LLSAME )THEN + !Send + ISND = KTO(1) + ITAG = MTAGDISTSP+1+17 + CALL MPL_SEND(ZFLD,KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(1),& + &CDSTRING='GATH_GRID_32_CTL:') + + ! RECIEVE + IF(KTO(1) == MYPROC) THEN + IFLDR = KFGATHG + DO JROC=1,NPROC + ITAG = MTAGDISTSP+1+17 + IRCV = JROC + IOFF=IFLDL*KFGATHG*(JROC-1) + CALL MPL_RECV(ZBUF(IOFF+1:IOFF+IFLDL*KFGATHG),KSOURCE=NPRCIDS(IRCV),& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILRECV,& + &KTAG=ITAG,CDSTRING='GATH_GRID_32_CTL:') + ENDDO + ENDIF + CALL MPL_WAIT(KREQUEST=ISENDREQ(1), & + & CDSTRING='GATH_GRID_32_CTL: WAIT') + ELSE + IFLDR=IMYFIELDS + CALL MPL_ALLTOALLV(PSENDBUF=ZFLD,KSENDCOUNTS=ILENS,& + & PRECVBUF=ZBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & CDSTRING='GATH_GRID_32_CTL:') +!!$ ITAG = MTAGDISTSP+1+17 +!!$ DO JROC=1,NPROC +!!$ ISND=JROC +!!$ IOFF=IOFFS(JROC) +!!$ ILEN=ILENS(JROC) +!!$ IF(ILEN > 0 ) THEN +!!$ CALL MPL_SEND(ZFLD(IOFF+1:IOFF+ILEN),KDEST=NPRCIDS(ISND),KTAG=ITAG,& +!!$ &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISND),& +!!$ &CDSTRING='GATH_GRID_32_CTL:') +!!$ ENDIF +!!$ ENDDO +!!$ DO JROC=1,NPROC +!!$ IRCV = JROC +!!$ IOFF = IOFFR(JROC) +!!$ ILEN = ILENR(JROC) +!!$ IF(ILEN > 0 ) THEN +!!$ CALL MPL_RECV(ZBUF(IOFF+1:IOFF+ILEN),KSOURCE=NPRCIDS(IRCV),& +!!$ &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILRECV,& +!!$ &KTAG=ITAG,CDSTRING='GATH_GRID_32_CTL:') +!!$ ENDIF +!!$ ENDDO +!!$ DO JROC=1,NPROC +!!$ ISND=JROC +!!$ ILEN=ILENS(JROC) +!!$ IF(ILEN > 0 ) THEN +!!$ CALL MPL_WAIT(KREQUEST=ISENDREQ(JROC), & +!!$ & CDSTRING='GATH_GRID_32_CTL: WAIT') +!!$ ENDIF +!!$ ENDDO + ENDIF + + CALL GSTATS(809,1) + CALL GSTATS_BARRIER2(789) + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!$OMP&PRIVATE(JA,JB,IPROC,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!$OMP&ILEN,ILOFF,JGL,JLON,JFLD) + DO JFLD=1,IFLDR + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IPROC,JA,JB,0,0) + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) = & + & ZBUF(ILEN+JLON+(JFLD-1)*IFLDL+(IPROC-1)*IFLDL*IMYFIELDS) + ENDDO + ILEN = ILEN + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + + CALL GSTATS(1643,1) +! Synhronize processors +! Should not be necessary +!!$ CALL GSTATS(784,0) +!!$ CALL MPL_BARRIER(CDSTRING='GATH_GRID_32_CTL:') +!!$ CALL GSTATS(784,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_32_CTL +END MODULE GATH_GRID_32_CTL_MOD + + diff --git a/src/trans/cpu/internal/gath_grid_ctl_mod.F90 b/src/trans/cpu/internal/gath_grid_ctl_mod.F90 new file mode 100644 index 0000000..1154c86 --- /dev/null +++ b/src/trans/cpu/internal/gath_grid_ctl_mod.F90 @@ -0,0 +1,296 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GATH_GRID_CTL_MOD +CONTAINS +SUBROUTINE GATH_GRID_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) + +!**** *GATH_GRID_CTL* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Routine for gathering gridpoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! PGP(:,:,:) - Local gridpoint array +! +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_RECV, MPL_SEND, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, MTAGDISTSP, NPRCIDS, MYPROC, NPROC + +USE SET2PE_MOD ,ONLY : SET2PE +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + +! Declaration of local variables + +REAL(KIND=JPRB) :: ZFLD(D%NGPTOTMX*KFGATHG),ZDUM(D%NGPTOTMX) +REAL(KIND=JPRB),ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IREQ(:) +INTEGER(KIND=JPIM) :: IFLDR,JFLD,ITAG,ILEN,JA,JB,ISND,JGL,JLON,ILOFF,ILENB +INTEGER(KIND=JPIM) :: IRCV,IOFF,ILAST,IGL1,IGL2,IGLOFF,IR +INTEGER(KIND=JPIM) :: JKGLO,JROF,IEND,J,IBL,IPROC,JROC,IMYFIELDS,ILRECV +INTEGER(KIND=JPIM) :: ISENDREQ(KFGATHG),ITO +INTEGER(KIND=JPIM) :: ILENS(NPROC),IOFFS(NPROC),ILENR(NPROC),IOFFR(NPROC) +INTEGER(KIND=JPIM) :: IFLDL,IFLDS +LOGICAL :: LLSAME +! ------------------------------------------------------------------ + + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFGATHG + DO JROF=1,IEND + PGPG(IOFF+JROF,JFLD) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1643,1) + +ELSE +! test if values in KTO are all the same + LLSAME=.TRUE. + ITO=KTO(1) + DO JFLD=2,KFGATHG + IF(KTO(JFLD) /= ITO) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO +! IF( MYPROC==1 )THEN +! WRITE(0,'("GATH_GRID_CTL DEBUG: LLSAME=",L1)')LLSAME +! DO JFLD=1,KFGATHG +! WRITE(0,'("GATH_GRID_CTL DEBUG:KFGATHG,JFLD,KTO=",3(2X,I6))')KFGATHG,JFLD,KTO(JFLD) +! ENDDO +! ENDIF + + IFLDL=D%NGPTOTMX + IF(LLSAME) THEN + CALL GSTATS(1643,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JFLD=1,KFGATHG + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+(JFLD-1)*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1643,1) + ELSE + ILENS(:)=0 + IOFFS(:)=0 + ILENR(:)=0 + IOFFR(:)=0 + DO JFLD=1,KFGATHG + ILENS(KTO(JFLD))=ILENS(KTO(JFLD))+IFLDL + IF(KTO(JFLD) == MYPROC) THEN + ILENR(:)=ILENR(:)+IFLDL + ENDIF + ENDDO + DO JROC=2,NPROC + IOFFR(JROC)=IOFFR(JROC-1)+ ILENR(JROC-1) + IOFFS(JROC)=IOFFS(JROC-1)+ ILENS(JROC-1) + ENDDO + IFLDS=0 + DO JROC=1,NPROC + IF(ILENS(JROC) > 0) THEN + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == JROC) THEN + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+IFLDS*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + IFLDS=IFLDS+1 + ENDIF + ENDDO + ENDIF + ENDDO + ENDIF + + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX*IMYFIELDS*NPROC)) + ELSE + ALLOCATE(ZBUF(1)) + ENDIF + IFLDR = 0 + CALL GSTATS_BARRIER(789) + CALL GSTATS(809,0) + + IF( LLSAME )THEN + !Send + ISND = KTO(1) + ITAG = MTAGDISTSP+1+17 + CALL MPL_SEND(ZFLD,KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(1),& + &CDSTRING='GATH_GRID_CTL:') + ! RECIEVE + IF(KTO(1) == MYPROC) THEN + IFLDR = KFGATHG + DO JROC=1,NPROC + ITAG = MTAGDISTSP+1+17 + IRCV = JROC + IOFF=IFLDL*KFGATHG*(JROC-1) + CALL MPL_RECV(ZBUF(IOFF+1:IOFF+IFLDL*KFGATHG),KSOURCE=NPRCIDS(IRCV),& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILRECV,& + &KTAG=ITAG,CDSTRING='GATH_GRID_CTL:') + ENDDO + ENDIF + CALL MPL_WAIT(KREQUEST=ISENDREQ(1), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ELSE + IFLDR=IMYFIELDS + +! ALLTOALLV performance is really slow when number of fields (KFGATHG) is << NPROC +! This was for IBM - and RECV/SEND alternative causes problems for large number of MPI tasks. + +! IF( KFGATHG >= NPROC/8 )THEN + IF( .TRUE. )THEN + CALL MPL_ALLTOALLV(PSENDBUF=ZFLD,KSENDCOUNTS=ILENS,& + & PRECVBUF=ZBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & CDSTRING='GATH_GRID_CTL:') + ELSE + IR=0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IR=IR+NPROC + ENDIF + ENDDO + IR=IR+KFGATHG + ALLOCATE(IREQ(IR)) + IR=0 + ITAG = MTAGDISTSP+1+17 + DO JROC=1,NPROC + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IRCV = JROC + IR=IR+1 + CALL MPL_RECV(ZBUF(1+IOFFR(IRCV):IOFFR(IRCV)+ILENR(IRCV)),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR),& + &CDSTRING='GATH_GRID_CTL:') + ENDIF + ENDDO + ENDDO + DO JFLD=1,KFGATHG + ISND = KTO(JFLD) + IR=IR+1 + CALL MPL_SEND(ZFLD(1+IOFFS(ISND):IOFFS(ISND)+ILENS(ISND)),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR),& + &CDSTRING='GATH_GRID_CTL:') + ENDDO + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='GATH_GRID_CTL: WAIT') + DEALLOCATE(IREQ) + ENDIF + ENDIF + + CALL GSTATS(809,1) + CALL GSTATS_BARRIER2(789) + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!$OMP&PRIVATE(JA,JB,IPROC,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!$OMP&ILEN,ILOFF,JGL,JLON,JFLD) + DO JFLD=1,IFLDR + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IPROC,JA,JB,0,0) + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) = & + & ZBUF(ILEN+JLON+(JFLD-1)*IFLDL+(IPROC-1)*IFLDL*IMYFIELDS) + ENDDO + ILEN = ILEN + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + + CALL GSTATS(1643,1) +! Synhronize processors +! Should not be necessary +!!$ CALL GSTATS(784,0) +!!$ CALL MPL_BARRIER(CDSTRING='GATH_GRID_CTL:') +!!$ CALL GSTATS(784,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_CTL +END MODULE GATH_GRID_CTL_MOD + + diff --git a/src/trans/cpu/internal/gath_spec_control_mod.F90 b/src/trans/cpu/internal/gath_spec_control_mod.F90 new file mode 100644 index 0000000..a4c4e84 --- /dev/null +++ b/src/trans/cpu/internal/gath_spec_control_mod.F90 @@ -0,0 +1,233 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GATH_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,LDZA0IP) + +!**** *GATH_SPEC_CONTROL* - Gather global spectral array from processors + +! Purpose. +! -------- +! Routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be distributed +! KTO(:) - Processor responsible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set first coefficients (imaginary part) to zero + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, & + & MYSETV, MYSETW, MYPROC, NPROC +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE SET2PE_MOD ,ONLY : SET2PE +!USE SUWAVEDI_MOD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG),ZDUM(KSPEC2) +REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISP,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),IPOS0,JNM +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS +LOGICAL :: LLZA0IP + +! ------------------------------------------------------------------ + +LLZA0IP=.TRUE. +IF (PRESENT (LDZA0IP)) LLZA0IP=LDZA0IP + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFGATHG + PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFGATHG + DO JM=1,KSPEC2_G + PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + IF(IMYFIELDS>0) THEN + ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KSMAX + DO JN=JM,KSMAX + IDIST(II+1) = KDIM0G(JM)+(JN-JM)*2 + IDIST(II+2) = KDIM0G(JM)+(JN-JM)*2+1 + II = II+2 + ENDDO + ENDDO + CALL GSTATS(1804,1) + ENDIF + + CALL GSTATS_BARRIER(788) + + !Send + CALL GSTATS(810,0) + IFLDS = 0 + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + + IFLDS = IFLDS+1 + ISND = KTO(JFLD) + ITAG = MTAGDISTSP+JFLD+17 + IF(LDIM1_IS_FLD) THEN + ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) + CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ELSE + CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ENDIF + ENDIF + ENDDO + ENDIF + + ! Recieve + IFLDR = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IBSET = KVSET(JFLD) + IFLDR = IFLDR+1 + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(IRCV,0,0,JA,IBSET) + ITAG = MTAGDISTSP+JFLD+17 + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + &CDSTRING='GATH_SPEC_CONTROL') + IF( ILENR /= ILEN )THEN + WRITE(0,'("GATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& + &JFLD,JA,ILEN,ILENR + CALL ABORT_TRANS('GATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Check for completion of sends + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JFLD), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ENDIF + ENDDO + ENDIF + CALL GSTATS(810,1) + CALL GSTATS_BARRIER2(788) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN,ISP) + DO JFLD=1,IMYFIELDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + ISP = KDIM0G(0)+JN*2+1 + II = II+2 + PSPECG(JFLD,II) = 0.0_JPRB + ENDDO + ENDIF + ELSE + DO JNM=1,KSPEC2_G + PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + ISP = KDIM0G(0)+JN*2+1 + II = II+2 + PSPECG(II,JFLD) = 0.0_JPRB + ENDDO + ENDIF + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) + + !Synchronize processors + CALL GSTATS(785,0) + CALL MPL_BARRIER(CDSTRING='GATH_SPEC_CONTROL:') + CALL GSTATS(785,1) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_SPEC_CONTROL +END MODULE GATH_SPEC_CONTROL_MOD + + diff --git a/src/trans/cpu/internal/gawl_mod.F90 b/src/trans/cpu/internal/gawl_mod.F90 new file mode 100644 index 0000000..c188e59 --- /dev/null +++ b/src/trans/cpu/internal/gawl_mod.F90 @@ -0,0 +1,118 @@ +! (C) Copyright 1992- ECMWF. +! (C) Copyright 1992- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GAWL_MOD +CONTAINS +SUBROUTINE GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) + +!**** *GAWL * - Routine to perform the Newton loop + +! Purpose. +! -------- +! Find 0 of Legendre polynomial with Newton loop +!** Interface. +! ---------- +! *CALL* *GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) + +! Explicit arguments : +! -------------------- +! PFN Fourier coefficients of series expansion +! for the ordinary Legendre polynomials (in) +! PL Gaussian latitude (inout) +! PW Gaussian weight (out) +! PEPS 0 of the machine (in) +! KN Truncation (in) +! KITER Number of iterations (out) +! PMOD Last modification (inout) + +! Implicit arguments : +! -------------------- +! None + +! Method. +! ------- +! Newton Loop. + +! Externals. +! ---------- +! CPLEDN + +! Reference. +! ---------- + +! ARPEGE Documentation vol.2, ch3. + +! Author. +! ------- +! Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 92-12-18 +! K. Yessad (Sep 2008): cleaning, improve comments. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +USE CPLEDN_MOD ,ONLY : CPLEDN + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KN +REAL(KIND=JPRD),INTENT(IN) :: PFN(0:KN/2) +REAL(KIND=JPRD),INTENT(INOUT) :: PL +REAL(KIND=JPRD),INTENT(OUT) :: PW +REAL(KIND=JPRD),INTENT(IN) :: PEPS +INTEGER(KIND=JPIM),INTENT(OUT) :: KITER +REAL(KIND=JPRD),INTENT(INOUT) :: PMOD + +! ------------------------------------------------------------------ + + +INTEGER(KIND=JPIM) :: IFLAG, ITEMAX, JTER, IODD +REAL(KIND=JPRD) :: ZW, ZX, ZXN + +! ------------------------------------------------------------------ + +!* 1. Initialization. +! --------------- + +ITEMAX = 20 +ZX = PL +IFLAG = 0 +IODD=MOD(KN,2) + +! ------------------------------------------------------------------ + +!* 2. Newton iteration. +! ----------------- + +DO JTER=1,ITEMAX+1 + KITER = JTER + CALL CPLEDN(KN,IODD,PFN,ZX,IFLAG,ZW,ZXN,PMOD) + ZX = ZXN + + IF(IFLAG == 1) EXIT + IF(ABS(PMOD) <= PEPS*1000._JPRD) IFLAG = 1 +ENDDO + +PL = ZXN +PW = ZW + +! ------------------------------------------------------------------ + +END SUBROUTINE GAWL +END MODULE GAWL_MOD + + diff --git a/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 b/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 new file mode 100644 index 0000000..60de2a2 --- /dev/null +++ b/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 @@ -0,0 +1,457 @@ +! (C) Copyright 2008- ECMWF. +! (C) Copyright 2008- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GPNORM_TRANS_CTL_MOD +CONTAINS +SUBROUTINE GPNORM_TRANS_CTL(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,PW) + + +!**** *GPNORM_TRANS_CTL* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather +! than an approach using a more expensive global gather collective communication + +!** Interface. +! ---------- +! CALL GPNORM_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 +! R. El Khatib 07-08-2009 Optimisation directive for NEC +! R. El Khatib 16-Sep-2019 merge with LAM code +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LGPNORM, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE TRGTOL_MOD ,ONLY : TRGTOL +USE SET2PE_MOD ,ONLY : SET2PE +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL ,INTENT(IN) :: LDAVE_ONLY +REAL(KIND=JPRB) ,INTENT(IN) :: PW(R%NDGL) + +!ifndef INTERFACE + +! Local variables +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +INTEGER(KIND=JPIM) :: IUBOUND(4) +INTEGER(KIND=JPIM) :: IVSET(KFIELDS) +INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETG(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZGTF(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZAVE(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMINGL(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAXGL(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMIN(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAX(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZAVEG(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMING(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAXG(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZSND(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZRCV(:) +INTEGER(KIND=JPIM) :: J,JGL,IGL,JL,JF,IF_GP,IF_SCALARS_G,IF_FS,JSETV,JSETW,IWLATS +INTEGER(KIND=JPIM) :: IPROC,ITAG,ILEN,ILENR,IBEG,IEND,IND + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS_CTL',0,ZHOOK_HANDLE) + +! Set defaults + +NPROMA = KPROMA +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +! Consistency checks + +IUBOUND(1:3)=UBOUND(PGP) +IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'GPNORM_TRANS_CTL:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('GPNORM_TRANS_CTL:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFIELDS) THEN + WRITE(NOUT,*)'GPNORM_TRANS_CTL:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFIELDS + CALL ABORT_TRANS('GPNORM_TRANS_CTL:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'GPNORM_TRANS_CTL:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('GPNORM_TRANS_CTL:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + + +IF_GP=KFIELDS +IF_SCALARS_G=0 + +IF_FS=0 +DO J=1,KFIELDS + IVSET(J)=MOD(J-1,NPRTRV)+1 + IF(IVSET(J)==MYSETV)THEN + IF_FS=IF_FS+1 + ENDIF +ENDDO + +ALLOCATE(IVSETS(NPRTRV)) +IVSETS(:)=0 +DO J=1,KFIELDS + IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 +ENDDO +ALLOCATE(IVSETG(NPRTRV,MAXVAL(IVSETS(:)))) +IVSETG(:,:)=0 +IVSETS(:)=0 +DO J=1,KFIELDS + IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 + IVSETG(IVSET(J),IVSETS(IVSET(J)))=J +ENDDO + +ALLOCATE(ZGTF(IF_FS,D%NLENGTF)) +LGPNORM=.TRUE. +CALL TRGTOL(ZGTF,IF_FS,IF_GP,IF_SCALARS_G,IVSET,PGP=PGP) +LGPNORM=.FALSE. + +IBEG=1 +IEND=D%NDGL_FS + +ALLOCATE(ZAVE(IF_FS,IBEG:IEND)) +ALLOCATE(ZMIN(IF_FS)) +ALLOCATE(ZMAX(IF_FS)) +IF(.NOT.LDAVE_ONLY)THEN + ALLOCATE(ZMINGL(IF_FS,IBEG:IEND)) + ALLOCATE(ZMAXGL(IF_FS,IBEG:IEND)) +ENDIF + +IF( IF_FS > 0 )THEN + + ZAVE(:,:)=0.0_JPRB + + IF(.NOT.LDAVE_ONLY)THEN + DO JF=1,IF_FS + ZMINGL(JF,:)=ZGTF(JF,D%NSTAGTF(1)+1) + ZMAXGL(JF,:)=ZGTF(JF,D%NSTAGTF(1)+1) + ENDDO + ENDIF + +! FIRST DO SUMS IN EACH FULL LATITUDE + +CALL GSTATS(1429,0) +!WARNING: COMMENTING OPENMP OUT AS TEMPORARY WORKAROUND FOR AMD-COMPILER +!!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JGL,IGL,JF,JL) + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 +!CDIR NOLOOPCHG + DO JF=1,IF_FS +!DIR$ NEXTSCALAR + DO JL=1,G%NLOEN(IGL) + ZAVE(JF,JGL)=ZAVE(JF,JGL)+ZGTF(JF,D%NSTAGTF(JGL)+JL) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + DO JL=1,G%NLOEN(IGL) + ZMINGL(JF,JGL)=MIN(ZMINGL(JF,JGL),ZGTF(JF,D%NSTAGTF(JGL)+JL)) + ZMAXGL(JF,JGL)=MAX(ZMAXGL(JF,JGL),ZGTF(JF,D%NSTAGTF(JGL)+JL)) + ENDDO + ENDIF + ENDDO + ENDDO +!!$OMP END PARALLEL DO +CALL GSTATS(1429,1) + + IF(.NOT.LDAVE_ONLY)THEN + DO JF=1,IF_FS + ZMIN(JF)=MINVAL(ZMINGL(JF,:)) + ZMAX(JF)=MAXVAL(ZMAXGL(JF,:)) + ENDDO + DEALLOCATE(ZMINGL) + DEALLOCATE(ZMAXGL) + ENDIF + + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JF=1,IF_FS + ZAVE(JF,JGL)=ZAVE(JF,JGL)*PW(IGL)/G%NLOEN(IGL) + ENDDO + ENDDO + +ENDIF + +! IT IS IMPORTANT THAT SUMS ARE NOW DONE IN LATITUDE ORDER +ALLOCATE(ZAVEG(R%NDGL,KFIELDS)) +ALLOCATE(ZMING(KFIELDS)) +ALLOCATE(ZMAXG(KFIELDS)) + +ZAVEG(:,:)=0.0_JPRB + +DO JF=1,IF_FS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + ZAVEG(IGL,IVSETG(MYSETV,JF))=ZAVEG(IGL,IVSETG(MYSETV,JF))+ZAVE(JF,JGL) + ENDDO +ENDDO + +IF(LDAVE_ONLY)THEN + ZMING(:)=PMIN(:) + ZMAXG(:)=PMAX(:) +ELSE + DO JF=1,IF_FS + ZMING(IVSETG(MYSETV,JF))=ZMIN(JF) + ZMAXG(IVSETG(MYSETV,JF))=ZMAX(JF) + ENDDO +ENDIF + +! RECEIVE ABOVE FROM OTHER NPRTRV SETS FOR SAME LATS BUT DIFFERENT FIELDS +ITAG=123 + +CALL GSTATS(815,0) + +IF( MYSETV==1 )THEN + + DO JSETV=2,NPRTRV + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*IVSETS(JSETV)+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*IVSETS(JSETV) + ENDIF + IF(ILEN > 0)THEN + ALLOCATE(ZRCV(ILEN)) + CALL SET2PE(IPROC,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='GPNORM_TRANS_CTL:V') + IF(ILENR /= ILEN)THEN + CALL ABOR1('GPNORM_TRANS_CTL:ILENR /= ILEN') + ENDIF + IND=0 + DO JF=1,IVSETS(JSETV) + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZAVEG(IGL,IVSETG(JSETV,JF))=ZRCV(IND) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZMING(IVSETG(JSETV,JF))=ZRCV(IND) + IND=IND+1 + ZMAXG(IVSETG(JSETV,JF))=ZRCV(IND) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDDO + ENDIF + DEALLOCATE(ZRCV) + ENDIF + ENDDO + +ELSE + + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*IVSETS(MYSETV)+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*IVSETS(MYSETV) + ENDIF + IF(ILEN > 0)THEN + CALL SET2PE(IPROC,0,0,MYSETW,1) + ALLOCATE(ZSND(ILEN)) + IND=0 + DO JF=1,IF_FS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZSND(IND)=ZAVEG(IGL,IVSETG(MYSETV,JF)) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZSND(IND)=ZMING(IVSETG(MYSETV,JF)) + IND=IND+1 + ZSND(IND)=ZMAXG(IVSETG(MYSETV,JF)) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZSND(IND)=PMIN(JF) + IND=IND+1 + ZSND(IND)=PMAX(JF) + ENDDO + ENDIF + CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='GPNORM_TRANS_CTL:V') + DEALLOCATE(ZSND) + ENDIF + +ENDIF + +! FINALLY RECEIVE CONTRIBUTIONS FROM OTHER NPRTRW SETS + +IF( MYSETV == 1 )THEN + + IF( MYSETW == 1 )THEN + + DO JSETW=2,NPRTRW + IWLATS=D%NULTPP(JSETW) + IBEG=1 + IEND=IWLATS + IF(LDAVE_ONLY)THEN + ILEN=IWLATS*KFIELDS+2*KFIELDS + ELSE + ILEN=(IWLATS+2)*KFIELDS + ENDIF + IF(ILEN > 0 )THEN + ALLOCATE(ZRCV(ILEN)) + CALL SET2PE(IPROC,0,0,JSETW,1) + CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='GPNORM_TRANS_CTL:W') + IF(ILENR /= ILEN)THEN + CALL ABOR1('GPNORM_TRANS_CTL:ILENR /= ILEN') + ENDIF + IND=0 + DO JF=1,KFIELDS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(JSETW) + JGL - 1 + IND=IND+1 + ZAVEG(IGL,JF)=ZRCV(IND) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDDO + ENDIF + DEALLOCATE(ZRCV) + ENDIF + ENDDO + + ELSE + + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*KFIELDS+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*KFIELDS + ENDIF + IF(ILEN > 0)THEN + CALL SET2PE(IPROC,0,0,1,1) + ALLOCATE(ZSND(ILEN)) + IND=0 + DO JF=1,KFIELDS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZSND(IND)=ZAVEG(IGL,JF) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZSND(IND)=ZMING(JF) + IND=IND+1 + ZSND(IND)=ZMAXG(JF) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZSND(IND)=ZMING(JF) + IND=IND+1 + ZSND(IND)=ZMAXG(JF) + ENDDO + ENDIF + CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='GPNORM_TRANS_CTL:V') + DEALLOCATE(ZSND) + ENDIF + + ENDIF + +ENDIF + +CALL GSTATS(815,1) + +IF( MYSETW == 1 .AND. MYSETV == 1 )THEN + + PAVE(:)=0.0_JPRB + DO JGL=1,R%NDGL + PAVE(:)=PAVE(:)+ZAVEG(JGL,:) + ENDDO + + PMIN(:)=ZMING(:) + PMAX(:)=ZMAXG(:) + +ENDIF + +DEALLOCATE(ZGTF) +DEALLOCATE(ZAVE) +DEALLOCATE(ZMIN) +DEALLOCATE(ZMAX) +DEALLOCATE(ZAVEG) +DEALLOCATE(ZMING) +DEALLOCATE(ZMAXG) +DEALLOCATE(IVSETS) +DEALLOCATE(IVSETG) + +IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE GPNORM_TRANS_CTL +END MODULE GPNORM_TRANS_CTL_MOD diff --git a/src/trans/cpu/internal/inigptr_mod.F90 b/src/trans/cpu/internal/inigptr_mod.F90 new file mode 100644 index 0000000..f30b44a --- /dev/null +++ b/src/trans/cpu/internal/inigptr_mod.F90 @@ -0,0 +1,92 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INIGPTR_MOD +CONTAINS +SUBROUTINE INIGPTR(KGPTRSEND,KGPTRRECV) + +! Compute tables to assist GP to/from Fourier space transpositions + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DISTR ,ONLY : D, NPRTRNS +USE TPM_TRANS ,ONLY : NGPBLKS, NPROMA +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) +INTEGER(KIND=JPIM),INTENT(OUT) :: KGPTRRECV(NPRTRNS) + +INTEGER(KIND=JPIM) :: IBLOCK,IROF,IBFIRST,IPROCLAST,IPROC,IFIRST,ILAST,IBLAST +INTEGER(KIND=JPIM) :: JGL,JBL,JPRTRNS,JBLKS +! Compute tables to assist GP to/from Fourier space transpositions + + +KGPTRSEND(:,:,:)=0 +IBLOCK=1 +IROF=1 +IBFIRST=1 +IPROCLAST=D%NPROCL(D%NFRSTLOFF+1) +DO JGL=1,D%NDGL_GP + ! Find processor which deals with this latitude in Fourier distribution + IPROC=D%NPROCL(D%NFRSTLOFF+JGL) + IF(IPROC > NPRTRNS) THEN + WRITE(NOUT,'(A,I8)')& + &' INIGPTR ERROR : exceeding processor limit ',NPRTRNS + CALL ABORT_TRANS(' INIGPTR ERROR : exceeding processor limit ') + ENDIF + + ! for each latitude on this processor, find first and last points + ! for each NPROMA chunk, for each destination processor + IF(IPROC /= IPROCLAST) THEN + IF(IROF > 1) THEN + KGPTRSEND(1,IBLOCK,IPROCLAST)=IBFIRST + KGPTRSEND(2,IBLOCK,IPROCLAST)=IROF-1 + ENDIF + IF(IROF <= NPROMA) IBFIRST=IROF + IPROCLAST=IPROC + ENDIF + IFIRST=D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + ILAST =IFIRST + D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) -1 + DO JBL=IFIRST,ILAST + IF(IROF == NPROMA) THEN + IBLAST=IROF + KGPTRSEND(1,IBLOCK,IPROC)=IBFIRST + KGPTRSEND(2,IBLOCK,IPROC)=IBLAST + IF(IBLOCK < NGPBLKS) IBLOCK=IBLOCK+1 + IROF=0 + IBFIRST=1 + ENDIF + IROF=IROF+1 + ENDDO +ENDDO +IF(IROF /= 1.AND.IROF /= IBFIRST) THEN +! non-empty residual block after last latitude line + IBLAST=IROF-1 + KGPTRSEND(1,IBLOCK,IPROC)=IBFIRST + KGPTRSEND(2,IBLOCK,IPROC)=IBLAST +ENDIF +! sum up over blocks +KGPTRRECV(:)=0 +DO JPRTRNS=1,NPRTRNS + DO JBLKS=1,NGPBLKS + IF(KGPTRSEND(1,JBLKS,JPRTRNS) > 0) THEN + KGPTRRECV(JPRTRNS)=KGPTRRECV(JPRTRNS)+& + &KGPTRSEND(2,JBLKS,JPRTRNS)-KGPTRSEND(1,JBLKS,JPRTRNS)+1 + ENDIF + ENDDO +ENDDO + +END SUBROUTINE INIGPTR +END MODULE INIGPTR_MOD diff --git a/src/trans/cpu/internal/inv_trans_ctl_mod.F90 b/src/trans/cpu/internal/inv_trans_ctl_mod.F90 new file mode 100644 index 0000000..b6e2b01 --- /dev/null +++ b/src/trans/cpu/internal/inv_trans_ctl_mod.F90 @@ -0,0 +1,299 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTINV_CTL_MOD ,ONLY : LTINV_CTL +USE FTINV_CTL_MOD ,ONLY : FTINV_CTL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB + +! ------------------------------------------------------------------ + +! Perform transform + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL LTINV_CTL(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,FSPGL_PROC=FSPGL_PROC) + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL LTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + &FSPGL_PROC=FSPGL_PROC) + + CALL FTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE INV_TRANS_CTL +END MODULE INV_TRANS_CTL_MOD diff --git a/src/trans/cpu/internal/inv_trans_ctlad_mod.F90 b/src/trans/cpu/internal/inv_trans_ctlad_mod.F90 new file mode 100644 index 0000000..8894269 --- /dev/null +++ b/src/trans/cpu/internal/inv_trans_ctlad_mod.F90 @@ -0,0 +1,296 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INV_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE INV_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS_CTLAD* - Control routine for inverse spectral transform adj. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields +! +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTLAD - control of Legendre transform +! FTINV_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTINV_CTLAD_MOD ,ONLY : LTINV_CTLAD +USE FTINV_CTLAD_MOD ,ONLY : FTINV_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB + +! ------------------------------------------------------------------ + +! Perform transform + + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + CALL LTINV_CTLAD(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) + + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL FTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL LTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE INV_TRANS_CTLAD +END MODULE INV_TRANS_CTLAD_MOD diff --git a/src/trans/cpu/internal/ldfou2_mod.F90 b/src/trans/cpu/internal/ldfou2_mod.F90 new file mode 100644 index 0000000..4a63132 --- /dev/null +++ b/src/trans/cpu/internal/ldfou2_mod.F90 @@ -0,0 +1,97 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LDFOU2_MOD +CONTAINS +SUBROUTINE LDFOU2(KM,KF_UV,PAIA,PSIA) + +!**** *LDFOU2* - Division by a*cos(theta) of u and v + +! Purpose. +! -------- +! In Fourier space divide u and v by a*cos(theta). + +!** Interface. +! ---------- +! CALL LDFOU2(KM,PAIA,PSIA) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! PAIA - antisymmetric fourier fields +! PSIA - symmetric fourierfields + +! Implicit arguments : RACTHE - 1./(a*cos(theta)) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Message Passing option added +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV + +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: J, JGL ,IFLD ,ISL + + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V BY A*COS(THETA) +! -------------------------- + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IFLD = 4*KF_UV + +!* 1.1 U AND V + +DO JGL=ISL,R%NDGNH + DO J=1,IFLD + PAIA(J,JGL) = PAIA(J,JGL)*F%RACTHE(JGL) + PSIA(J,JGL) = PSIA(J,JGL)*F%RACTHE(JGL) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE LDFOU2 +END MODULE LDFOU2_MOD diff --git a/src/trans/cpu/internal/ldfou2ad_mod.F90 b/src/trans/cpu/internal/ldfou2ad_mod.F90 new file mode 100644 index 0000000..681f686 --- /dev/null +++ b/src/trans/cpu/internal/ldfou2ad_mod.F90 @@ -0,0 +1,97 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LDFOU2AD_MOD +CONTAINS +SUBROUTINE LDFOU2AD(KM,KF_UV,PAIA,PSIA) + +!**** *LDFOU2AD* - Division by a*cos(theta) of u and v + +! Purpose. +! -------- +! In Fourier space divide u and v by a*cos(theta). + +!** Interface. +! ---------- +! CALL LDFOU2AD(KM,PAIA,PSIA) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! PAIA - antisymmetric fourier fields +! PSIA - symmetric fourierfields + +! Implicit arguments : RACTHE - 1./(a*cos(theta)) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Message Passing option added +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV + +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: J, JGL ,IFLD ,ISL + + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V BY A*COS(THETA) +! -------------------------- + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IFLD = 4*KF_UV + +!* 1.1 U AND V + +DO JGL=ISL,R%NDGNH + DO J=1,IFLD + PAIA(J,JGL) = PAIA(J,JGL)*F%RACTHE(JGL) + PSIA(J,JGL) = PSIA(J,JGL)*F%RACTHE(JGL) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE LDFOU2AD +END MODULE LDFOU2AD_MOD diff --git a/src/trans/cpu/internal/ledir_mod.F90 b/src/trans/cpu/internal/ledir_mod.F90 new file mode 100644 index 0000000..9c2cfd9 --- /dev/null +++ b/src/trans/cpu/internal/ledir_mod.F90 @@ -0,0 +1,279 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEDIR_MOD +CONTAINS +SUBROUTINE LEDIR(KM,KMLOC,KFC,KIFC,KSL,KDGLU,KLED2,PAIA,PSIA,POA1,PW) + +!**** *LEDIR* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL LEDIR(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- use butterfly or dgemm + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + Mats Hamrud + George Modzynski + +! Modifications. +! -------------- +! J.Hague : Oct 2012 DR_HOOK round calls to DGEMM: +! F. Vana 05-Mar-2015 Support for single precision +! P. Dueben : Dec 2019 Improvements for mass conservation in single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_FLT +USE TPM_FIELDS +USE TPM_DISTR +USE BUTTERFLY_ALG_MOD + +use, intrinsic :: ieee_exceptions + + +IMPLICIT NONE + + +! DUMMY ARGUMENTS +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KIFC +INTEGER(KIND=JPIM), INTENT(IN) :: KSL +INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 + +REAL(KIND=JPRB), INTENT(IN) :: PW(KDGLU+KSL-1) +REAL(KIND=JPRB), INTENT(IN) :: PSIA(:,:), PAIA(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: POA1(:,:) + +! LOCAL VARIABLES +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, IFLD, J, JK, I1, I2, I3, I4 +INTEGER(KIND=JPIM) :: ITHRESHOLD +REAL(KIND=JPRB) :: ZB(KDGLU,KIFC), ZCA((R%NTMAX-KM+2)/2,KIFC), ZCS((R%NTMAX-KM+3)/2,KIFC) +REAL(KIND=JPRD), allocatable :: ZB_D(:,:), ZCA_D(:,:), ZCS_D(:,:),ZRPNMA(:,:), ZRPNMS(:,:) +LOGICAL :: LL_HALT_INVALID +#ifdef WITH_IEEE_HALT +LOGICAL, PARAMETER :: LL_IEEE_HALT = .TRUE. +#else +LOGICAL, PARAMETER :: LL_IEEE_HALT = .FALSE. +#endif +LOGICAL, PARAMETER :: LLDOUBLE = (JPRB == JPRD) +CHARACTER(LEN=1) :: CLX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + +CLX = 'S' +IF (LLDOUBLE) CLX = 'D' + +IA = 1+MOD(R%NTMAX-KM+2,2) +IS = 1+MOD(R%NTMAX-KM+1,2) +ILA = (R%NTMAX-KM+2)/2 +ILS = (R%NTMAX-KM+3)/2 +ISL = KSL + +IF(KM == 0)THEN + ISKIP = 2 +ELSE + ISKIP = 1 +ENDIF + +IF (KIFC > 0 .AND. KDGLU > 0 ) THEN + + ITHRESHOLD=S%ITHRESHOLD + +!* 1. ANTISYMMETRIC PART. + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,KDGLU + ZB(J,IFLD)=PAIA(JK,ISL+J-1)*PW(ISL+J-1) + ENDDO + ENDDO + + IF(ILA <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'GEMM_1',0,ZHOOK_HANDLE) + IF (LLDOUBLE) THEN + CALL DGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZB,KDGLU,0._JPRB,ZCA,ILA) + ELSE + IF(KM>=1)THEN ! DGEM for the mean to improve mass conservation + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZB,KDGLU,0._JPRB,ZCA,ILA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ELSE + I1 = size(S%FA(KMLOC)%RPNMA(:,1)) + I2 = size(S%FA(KMLOC)%RPNMA(1,:)) + ALLOCATE(ZRPNMA(I1,I2)) + ALLOCATE(ZB_D(KDGLU,KIFC)) + ALLOCATE(ZCA_D((R%NTMAX-KM+2)/2,KIFC)) + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,KDGLU + ZB_D(J,IFLD)=ZB(J,IFLD) + ENDDO + ENDDO + DO I3=1,I1 + DO I4=1,I2 + ZRPNMA(I3,I4) = S%FA(KMLOC)%RPNMA(I3,I4) + END DO + END DO + CALL DGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRD,ZRPNMA,KDGLU,& + &ZB_D,KDGLU,0._JPRD,ZCA_D,ILA) + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILA + ZCA(J,IFLD) = ZCA_D(J,IFLD) + ENDDO + ENDDO + DEALLOCATE(ZRPNMA) + DEALLOCATE(ZB_D) + DEALLOCATE(ZCA_D) + END IF + ENDIF + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'GEMM_1',1,ZHOOK_HANDLE) + + ELSE + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'BUTM_1',0,ZHOOK_HANDLE) + CALL MULT_BUTM('T',S%FA(KMLOC)%YBUT_STRUCT_A,KIFC,ZB,ZCA,KM) + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'BUTM_1',1,ZHOOK_HANDLE) + ENDIF + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILA + POA1(IA+(J-1)*2,JK) = ZCA(J,IFLD) + ENDDO + ENDDO + +!* 1.3 SYMMETRIC PART. + + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,KDGLU + ZB(J,IFLD)=PSIA(JK,ISL+J-1)*PW(ISL+J-1) + ENDDO + ENDDO + + IF(ILS <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'GEMM_2',0,ZHOOK_HANDLE) + IF (LLDOUBLE) THEN + CALL DGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZB,KDGLU,0._JPRB,ZCS,ILS) + ELSE + IF(KM>=1)THEN ! DGEM for the mean to improve mass conservation + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZB,KDGLU,0._JPRB,ZCS,ILS) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ELSE + I1 = size(S%FA(KMLOC)%RPNMS(:,1)) + I2 = size(S%FA(KMLOC)%RPNMS(1,:)) + ALLOCATE(ZRPNMS(I1,I2)) + ALLOCATE(ZB_D(KDGLU,KIFC)) + ALLOCATE(ZCS_D((R%NTMAX-KM+3)/2,KIFC)) + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,KDGLU + ZB_D(J,IFLD)=PSIA(JK,ISL+J-1)*PW(ISL+J-1) + ENDDO + ENDDO + DO I3=1,I1 + DO I4=1,I2 + ZRPNMS(I3,I4) = S%FA(KMLOC)%RPNMS(I3,I4) + END DO + END DO + CALL DGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRD,ZRPNMS,KDGLU,& + &ZB_D,KDGLU,0._JPRD,ZCS_D,ILS) + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILS + ZCS(J,IFLD) = ZCS_D(J,IFLD) + ENDDO + ENDDO + DEALLOCATE(ZRPNMS) + DEALLOCATE(ZB_D) + DEALLOCATE(ZCS_D) + END IF + ENDIF + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'GEMM_2',1,ZHOOK_HANDLE) + + ELSE + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'BUTM_2',0,ZHOOK_HANDLE) + CALL MULT_BUTM('T',S%FA(KMLOC)%YBUT_STRUCT_S,KIFC,ZB,ZCS,KM) + IF (LHOOK) CALL DR_HOOK('LEDIR_'//CLX//'BUTM_2',1,ZHOOK_HANDLE) + ENDIF + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILS + POA1(IS+(J-1)*2,JK) = ZCS(J,IFLD) + ENDDO + ENDDO + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE LEDIR +END MODULE LEDIR_MOD diff --git a/src/trans/cpu/internal/ledirad_mod.F90 b/src/trans/cpu/internal/ledirad_mod.F90 new file mode 100644 index 0000000..dca3026 --- /dev/null +++ b/src/trans/cpu/internal/ledirad_mod.F90 @@ -0,0 +1,207 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEDIRAD_MOD +CONTAINS +SUBROUTINE LEDIRAD(KM,KMLOC,KFC,KIFC,KDGLU,KLED2,PAIA,PSIA,POA1) + +!**** *LEDIRAD* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL LEDIRAD(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! Modified ! 16/10/12 J.Hague : DR_HOOK round calls to DGEMM: +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +!USE TPM_TRANS +! +USE TPM_FLT +USE TPM_FIELDS +USE TPM_DISTR +USE BUTTERFLY_ALG_MOD + +IMPLICIT NONE + + +! DUMMY ARGUMENTS +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KIFC +INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 + +REAL(KIND=JPRB), INTENT(OUT) :: PSIA(:,:), PAIA(:,:) +REAL(KIND=JPRB), INTENT(IN) :: POA1(:,:) + +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, J, JK,JGL,J1 +INTEGER(KIND=JPIM) :: IFLD,ITHRESHOLD +REAL(KIND=JPRB) :: ZB(KDGLU,KIFC), ZCA((R%NTMAX-KM+2)/2,KIFC), ZCS((R%NTMAX-KM+3)/2,KIFC) +LOGICAL, PARAMETER :: LLDOUBLE = (JPRD == JPRB) +CHARACTER(LEN=1) :: CLX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + +CLX = 'S' +IF (LLDOUBLE) CLX = 'D' + +IA = 1+MOD(R%NTMAX-KM+2,2) +IS = 1+MOD(R%NTMAX-KM+1,2) +ILA = (R%NTMAX-KM+2)/2 +ILS = (R%NTMAX-KM+3)/2 +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + +IF(KM == 0)THEN + ISKIP = 2 + DO JGL=ISL,R%NDGNH + DO J1=2,KFC,2 + PSIA(J1,JGL)=0.0_JPRB + PAIA(J1,JGL)=0.0_JPRB + ENDDO + ENDDO +ELSE + ISKIP = 1 +ENDIF + + +IF (KIFC > 0 .AND. KDGLU > 0 ) THEN + + ITHRESHOLD=S%ITHRESHOLD + +!* 1. ANTISYMMETRIC PART. + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILA + ZCA(J,IFLD) = POA1(IA+(J-1)*2,JK) + ENDDO + ENDDO + + IF(ILA <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('N','N',KDGLU,KIFC,ILA,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZCA,ILA,0._JPRB,ZB,KDGLU) + ELSE + CALL SGEMM('N','N',KDGLU,KIFC,ILA,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZCA,ILA,0._JPRB,ZB,KDGLU) + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('N',S%FA(KMLOC)%YBUT_STRUCT_A,KIFC,ZCA,ZB) + + ENDIF + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,KDGLU + PAIA(JK,ISL+J-1) = ZB(J,IFLD)*F%RW(ISL+J-1) + ENDDO + ENDDO + + +!* 1.3 SYMMETRIC PART. + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILS + ZCS(J,IFLD) = POA1(IS+(J-1)*2,JK) + ENDDO + ENDDO + + + IF(ILS <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('N','N',KDGLU,KIFC,ILS,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZCS,ILS,0._JPRB,ZB,KDGLU) + ELSE + CALL SGEMM('N','N',KDGLU,KIFC,ILS,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZCS,ILS,0._JPRB,ZB,KDGLU) + + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('N',S%FA(KMLOC)%YBUT_STRUCT_S,KIFC,ZCS,ZB) + + ENDIF + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,KDGLU + PSIA(JK,ISL+J-1) = ZB(J,IFLD)*F%RW(ISL+J-1) + ENDDO + ENDDO + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE LEDIRAD +END MODULE LEDIRAD_MOD diff --git a/src/trans/cpu/internal/leinv_mod.F90 b/src/trans/cpu/internal/leinv_mod.F90 new file mode 100644 index 0000000..5611d57 --- /dev/null +++ b/src/trans/cpu/internal/leinv_mod.F90 @@ -0,0 +1,221 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEINV_MOD +CONTAINS +SUBROUTINE LEINV(KM,KMLOC,KFC,KIFC,KF_OUT_LT,KSL,KDGLU,PIA,PAOA1,PSOA1) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) + +! Implicit arguments : None. +! -------------------- + +! Method. use butterfly or dgemm +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + Mats Hamrud + George Modzynski +! +! Modifications. +! -------------- +! J.Hague : Oct 2012 DR_HOOK round calls to DGEMM: +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_FLT +USE TPM_GEN ! Fpr nout +USE BUTTERFLY_ALG_MOD + +use, intrinsic :: ieee_exceptions + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KIFC +INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KSL +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(IN) :: PIA(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSOA1(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PAOA1(:,:) + +! LOCAL +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, J1, IFLD, JGL,JK, J,JI, IEND +INTEGER(KIND=JPIM) :: ITHRESHOLD +REAL(KIND=JPRB) :: ZBA((R%NSMAX-KM+2)/2,KIFC), ZBS((R%NSMAX-KM+3)/2,KIFC), ZC(KDGLU,KIFC) +LOGICAL :: LL_HALT_INVALID +#ifdef WITH_IEEE_HALT +LOGICAL, PARAMETER :: LL_IEEE_HALT = .TRUE. +#else +LOGICAL, PARAMETER :: LL_IEEE_HALT = .FALSE. +#endif +LOGICAL, PARAMETER :: LLDOUBLE = (JPRB == JPRD) +CHARACTER(LEN=1) :: CLX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + +CLX = 'S' +IF (LLDOUBLE) CLX = 'D' + +!ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +ISL = KSL +IEND = KSL + KDGLU - 1 + +IA = 1+MOD(R%NSMAX-KM+2,2) +IS = 1+MOD(R%NSMAX-KM+1,2) +ILA = (R%NSMAX-KM+2)/2 +ILS = (R%NSMAX-KM+3)/2 + +IF(KM == 0)THEN + ISKIP = 2 + DO J1=2,KFC,2 + DO JGL=ISL,IEND + PSOA1(J1,JGL) = 0.0_JPRB + PAOA1(J1,JGL) = 0.0_JPRB + ENDDO + ENDDO +ELSE + ISKIP = 1 +ENDIF + +IF( KDGLU > 0 ) THEN + + ITHRESHOLD=S%ITHRESHOLD + + ! 1. +++++++++++++ anti-symmetric + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILA + ZBA(J,IFLD)=PIA(IA+1+(J-1)*2,JK) + ENDDO + ENDDO + + IF(ILA <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'GEMM_1',0,ZHOOK_HANDLE) + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',KDGLU,KIFC,ILA,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZBA,ILA,0._JPRB,ZC,KDGLU) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',KDGLU,KIFC,ILA,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZBA,ILA,0._JPRB,ZC,KDGLU) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'GEMM_1',1,ZHOOK_HANDLE) + + ELSE + + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'BUTM_1',0,ZHOOK_HANDLE) + CALL MULT_BUTM('N',S%FA(KMLOC)%YBUT_STRUCT_A,KIFC,ZBA,ZC) + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'BUTM_1',1,ZHOOK_HANDLE) + + ENDIF + + ! we need the transpose of C + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO JI=1,KDGLU + PAOA1(JK,ISL+JI-1) = ZC(JI,IFLD) + ENDDO + ENDDO + + ! 2. +++++++++++++ symmetric + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO J=1,ILS + ZBS(J,IFLD)=PIA(IS+1+(J-1)*2,JK) + ENDDO + ENDDO + + IF(ILS <= ITHRESHOLD .OR. .NOT.S%LUSEFLT ) THEN + + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'GEMM_2',0,ZHOOK_HANDLE) + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',KDGLU,KIFC,ILS,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZBS,ILS,0._JPRB,ZC,KDGLU) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',KDGLU,KIFC,ILS,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZBS,ILS,0._JPRB,ZC,KDGLU) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'GEMM_2',1,ZHOOK_HANDLE) + + ELSE + + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'BUTM_2',0,ZHOOK_HANDLE) + CALL MULT_BUTM('N',S%FA(KMLOC)%YBUT_STRUCT_S,KIFC,ZBS,ZC) + IF (LHOOK) CALL DR_HOOK('LEINV_'//CLX//'BUTM_2',1,ZHOOK_HANDLE) + + ENDIF + + ! we need the transpose of C + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO JI=1,KDGLU + PSOA1(JK,ISL+JI-1) = ZC(JI,IFLD) + ENDDO + ENDDO + +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE LEINV +END MODULE LEINV_MOD diff --git a/src/trans/cpu/internal/leinvad_mod.F90 b/src/trans/cpu/internal/leinvad_mod.F90 new file mode 100644 index 0000000..d23d856 --- /dev/null +++ b/src/trans/cpu/internal/leinvad_mod.F90 @@ -0,0 +1,197 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEINVAD_MOD +CONTAINS +SUBROUTINE LEINVAD(KM,KMLOC,KFC,KIFC,KF_OUT_LT,KDGLU,PIA,PAOA1,PSOA1) + +!**** *LEINVAD* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINVAD(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINVAD in IFS CY22R1 +! Modified ! 16/10/12 J.Hague : DR_HOOK round calls to DGEMM: +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D +! +USE TPM_FLT +USE BUTTERFLY_ALG_MOD + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KIFC +INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(OUT) :: PIA(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PSOA1(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PAOA1(:,:) + +! LOCAL VARIABLES +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, IOAD1, JK,JI +INTEGER(KIND=JPIM) :: IFLD,ITHRESHOLD +REAL(KIND=JPRB) :: ZBA((R%NSMAX-KM+2)/2,KIFC), ZBS((R%NSMAX-KM+3)/2,KIFC), ZC(KDGLU,KIFC) +LOGICAL, PARAMETER :: LLDOUBLE = (JPRD == JPRB) +CHARACTER(LEN=1) :: CLX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + +CLX = 'S' +IF (LLDOUBLE) CLX = 'D' + +IA = 1+MOD(R%NSMAX-KM+2,2) +IS = 1+MOD(R%NSMAX-KM+1,2) +ILA = (R%NSMAX-KM+2)/2 +ILS = (R%NSMAX-KM+3)/2 +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IOAD1 = 2*KF_OUT_LT + +IF(KM == 0)THEN + ISKIP = 2 +ELSE + ISKIP = 1 +ENDIF + +IF( KDGLU > 0 ) THEN + + ITHRESHOLD=S%ITHRESHOLD + + +! 1. +++++++++++++ anti-symmetric + + ! we need the transpose of C + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO JI=1,KDGLU + ZC(JI,IFLD) = PAOA1(JK,ISL+JI-1) + ENDDO + ENDDO + + IF(ILA <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZC,KDGLU,0._JPRB,ZBA,ILA) + ELSE + CALL SGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZC,KDGLU,0._JPRB,ZBA,ILA) + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('T',S%FA(KMLOC)%YBUT_STRUCT_A,KIFC,ZC,ZBA) + + ENDIF + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO JI=1,ILA + PIA(IA+1+(JI-1)*2,JK) = ZBA(JI,IFLD) + ENDDO + ENDDO + +! 2. +++++++++++++ symmetric + + ! we need the transpose of C + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO JI=1,KDGLU + ZC(JI,IFLD) = PSOA1(JK,ISL+JI-1) + ENDDO + ENDDO + + IF(ILS <= ITHRESHOLD .OR. .NOT.S%LUSEFLT ) THEN + + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZC,KDGLU,0._JPRB,ZBS,ILS) + ELSE + CALL SGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRB,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZC,KDGLU,0._JPRB,ZBS,ILS) + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('T',S%FA(KMLOC)%YBUT_STRUCT_S,KIFC,ZC,ZBS) + + ENDIF + + IFLD=0 + DO JK=1,KFC,ISKIP + IFLD=IFLD+1 + DO JI=1,ILS + PIA(IS+1+(JI-1)*2,JK) = ZBS(JI,IFLD) + ENDDO + ENDDO + + +ENDIF +! +! ------------------------------------------------------------------ + + +END SUBROUTINE LEINVAD +END MODULE LEINVAD_MOD diff --git a/src/trans/cpu/internal/ltdir_ctl_mod.F90 b/src/trans/cpu/internal/ltdir_ctl_mod.F90 new file mode 100644 index 0000000..8dce319 --- /dev/null +++ b/src/trans/cpu/internal/ltdir_ctl_mod.F90 @@ -0,0 +1,109 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_CTL_MOD +CONTAINS +SUBROUTINE LTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + +!**** *LTDIR_CTL* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL LTDIR_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_FS - number of fields in Fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! KFLDPTRUV(:) - field pointer for vorticity and divergence (input) +! KFLDPTRSC(:) - field pointer for scalarvalued fields (input) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE LTDIR_MOD ,ONLY : LTDIR +USE TRLTOM_MOD ,ONLY : TRLTOM +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 + +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +CALL GSTATS(153,0) +CALL TRLTOM(FOUBUF_IN,FOUBUF,2*KF_FS) +CALL GSTATS(153,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) + +! Direct Legendre transform + +CALL GSTATS(103,0) +ILED2 = 2*KF_FS +CALL GSTATS(1645,0) +IF(KF_FS>0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTDIR(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1645,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +CALL GSTATS(103,1) + +! ----------------------------------------------------------------- + +END SUBROUTINE LTDIR_CTL +END MODULE LTDIR_CTL_MOD diff --git a/src/trans/cpu/internal/ltdir_ctlad_mod.F90 b/src/trans/cpu/internal/ltdir_ctlad_mod.F90 new file mode 100644 index 0000000..a78e44b --- /dev/null +++ b/src/trans/cpu/internal/ltdir_ctlad_mod.F90 @@ -0,0 +1,110 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE LTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + +!**** *LTDIR_CTLAD* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL LTDIR_CTLAD(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE LTDIRAD_MOD ,ONLY : LTDIRAD +USE TRMTOL_MOD ,ONLY : TRMTOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 + +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +CALL GSTATS(105,0) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1646,0) +IF(KF_FS > 0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTDIRAD(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1646,1) + +CALL GSTATS(105,1) + +CALL GSTATS(181,0) +CALL TRMTOL(FOUBUF,FOUBUF_IN,2*KF_FS) +CALL GSTATS(181,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +! ------------------------------------------------------------------ + +END SUBROUTINE LTDIR_CTLAD +END MODULE LTDIR_CTLAD_MOD diff --git a/src/trans/cpu/internal/ltdir_mod.F90 b/src/trans/cpu/internal/ltdir_mod.F90 new file mode 100644 index 0000000..3fdf122 --- /dev/null +++ b/src/trans/cpu/internal/ltdir_mod.F90 @@ -0,0 +1,199 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_MOD +CONTAINS +SUBROUTINE LTDIR(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +USE TPM_TRANS, ONLY : LATLON +USE TPM_FLT +USE TPM_GEOMETRY + +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI2_MOD ,ONLY : PRFI2 +USE LDFOU2_MOD ,ONLY : LDFOU2 +USE LEDIR_MOD ,ONLY : LEDIR +USE UVTVD_MOD +USE UPDSP_MOD ,ONLY : UPDSP +USE CDMAP_MOD , ONLY : CDMAP + +!**** *LTDIR* - Control of Direct Legendre transform step + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *LTDIR(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI2 - prepares the Fourier work arrays for model variables. +! LDFOU2 - computations in Fourier space +! LEDIR - direct Legendre transform +! UVTVD - +! UPDSP - updating of spectral arrays (fields) + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! R. El Khatib 12-Jul-2012 LDSPC2 replaced by UVTVD +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFC, IIFC, IDGLU +INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE +INTEGER(KIND=JPIM) :: ISL, ISLO + +! LOCAL REALS +!REAL(KIND=JPRB) :: ZSIA(KLED2,R%NDGNH), ZAIA(KLED2,R%NDGNH) +REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB) :: ZOA1(R%NLED4,KLED2), ZOA2(R%NLED4,MAX(4*KF_UV,1)) +REAL(KIND=JPRB), ALLOCATABLE :: ZAIA(:,:), ZSIA(:,:) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +!* 4. DIRECT LEGENDRE TRANSFORM. +! -------------------------- +IFC = 2*KF_FS +IIFC = IFC +IF(KM == 0)THEN + IIFC = IFC/2 +ENDIF + +IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + +ALLOCATE(ZSIA(KLED2,R%NDGNH)) +ALLOCATE(ZAIA(KLED2,R%NDGNH)) + +IF( LATLON.AND.S%LDLL ) THEN + + IF( (S%LSHIFTLL .AND. KM < 2*IDGLU) .OR.& + & (.NOT.S%LSHIFTLL .AND. KM < 2*(IDGLU-1)) ) THEN + + CALL PREPSNM(KM,KMLOC,ZEPSNM) + ISLO = S%FA(KMLOC)%ISLD + ! map from external to internal (gg) roots and split into anti-symmetric / symmetric + CALL CDMAP(KM,KMLOC,ISL,ISLO,ZEPSNM(R%NTMAX+1),1_JPIM,& + & R%NDGNH,S%NDGNHD,IFC,ZAIA,ZSIA) + + ENDIF + +ELSE + + CALL PRFI2(KM,KMLOC,KF_FS,ZAIA,ZSIA) + +ENDIF + +CALL LDFOU2(KM,KF_UV,ZAIA,ZSIA) + +CALL LEDIR(KM,KMLOC,IFC,IIFC,ISL,IDGLU,KLED2,ZAIA,ZSIA,ZOA1,F%RW(1:R%NDGNH)) + +DEALLOCATE(ZAIA) +DEALLOCATE(ZSIA) + +! ------------------------------------------------------------------ + +!* 5. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- + +IF( KF_UV > 0 ) THEN + CALL PREPSNM(KM,KMLOC,ZEPSNM) + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL UVTVD(KM,KF_UV,ZEPSNM,ZOA1(:,IUS:IUE),ZOA1(:,IVS:IVE),& + & ZOA2(:,IVORS:IVORE),ZOA2(:,IDIVS:IDIVE)) +ENDIF + +! ------------------------------------------------------------------ + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL UPDSP(KM,KF_UV,KF_SCALARS,ZOA1,ZOA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',1,ZHOOK_HANDLE) + +END SUBROUTINE LTDIR +END MODULE LTDIR_MOD diff --git a/src/trans/cpu/internal/ltdirad_mod.F90 b/src/trans/cpu/internal/ltdirad_mod.F90 new file mode 100644 index 0000000..2c4c6f1 --- /dev/null +++ b/src/trans/cpu/internal/ltdirad_mod.F90 @@ -0,0 +1,188 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIRAD_MOD +CONTAINS +SUBROUTINE LTDIRAD(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY + +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI2AD_MOD ,ONLY : PRFI2AD +USE LDFOU2AD_MOD ,ONLY : LDFOU2AD +USE LEDIRAD_MOD ,ONLY : LEDIRAD +USE UVTVDAD_MOD +USE UPDSPAD_MOD ,ONLY : UPDSPAD + + +!**** *LTDIRAD* - Control of Direct Legendre transform step - adjoint + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *LTDIRAD(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI2AD - prepares the Fourier work arrays for model variables. +! LDFOU2AD - computations in Fourier space +! LEDIRAD - direct Legendre transform +! UVTVDAD - +! UPDSPAD - updating of spectral arrays (fields) + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! R. El Khatib 12-Jul-2012 LDSPC2AD replaced by UVTVDAD +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFC, IIFC, IDGLU +INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE + +! LOCAL REALS +REAL(KIND=JPRB) :: ZSIA(KLED2,R%NDGNH), ZAIA(KLED2,R%NDGNH) +REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB) :: ZOA1(R%NLED4,KLED2), ZOA2(R%NLED4,MAX(4*KF_UV,1)) + + +! ------------------------------------------------------------------ + +!* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM +! -------------------------------------- + + + + +! ------------------------------------------------------------------ + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL UPDSPAD(KM,KF_UV,KF_SCALARS,ZOA1,ZOA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +! ------------------------------------------------------------------ + +!* 5. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- + +IF( KF_UV > 0 ) THEN + CALL PREPSNM(KM,KMLOC,ZEPSNM) + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV +! SET PART OF ZOA1 CONTAINING U AND V TO 0. + ZOA1(:,IUS:IVE) = 0.0_JPRB + CALL UVTVDAD(KM,KF_UV,ZEPSNM,ZOA1(:,IUS:IUE),ZOA1(:,IVS:IVE),& + & ZOA2(:,IVORS:IVORE),ZOA2(:,IDIVS:IDIVE)) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. DIRECT LEGENDRE TRANSFORM. +! -------------------------- +IFC = 2*KF_FS +IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +IIFC = IFC +IF(KM == 0)THEN + IIFC = IFC/2 +ENDIF +CALL LEDIRAD(KM,KMLOC,IFC,IIFC,IDGLU,KLED2,ZAIA,ZSIA,ZOA1) + +! ------------------------------------------------------------------ + +!* 3. FOURIER SPACE COMPUTATIONS. +! --------------------------- + +CALL LDFOU2AD(KM,KF_UV,ZAIA,ZSIA) + +! ------------------------------------------------------------------ + +!* 2. PREPARE WORK ARRAYS. +! -------------------- + +CALL PRFI2AD(KM,KMLOC,KF_FS,ZAIA,ZSIA) + + +! ------------------------------------------------------------------ + +END SUBROUTINE LTDIRAD +END MODULE LTDIRAD_MOD + diff --git a/src/trans/cpu/internal/ltinv_ctl_mod.F90 b/src/trans/cpu/internal/ltinv_ctl_mod.F90 new file mode 100644 index 0000000..6e16253 --- /dev/null +++ b/src/trans/cpu/internal/ltinv_ctl_mod.F90 @@ -0,0 +1,152 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_CTL_MOD +CONTAINS +SUBROUTINE LTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + +!**** *LTINV_CTL* - Control routine for inverse Legandre transform. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D +USE TPM_FLT ,ONLY : S + +USE LTINV_MOD ,ONLY : LTINV +USE TRMTOL_MOD ,ONLY : TRMTOL + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1 + +! ------------------------------------------------------------------ + +CALL GSTATS(102,0) +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(:) = 0 +ENDIF + +! Following switch necessary when latlon grids are used with different increments in NS and EW direction. +! Otherwise unassigned values will appear in output. This is very likely a bug (ATLAS-149) +IF (S%LDLL) THEN + FOUBUF_IN(:) = 0 +ENDIF + +IF(KF_OUT_LT > 0) THEN + CALL GSTATS(1647,0) + + !!!WARNING!!! Duplication of code besides the FSPGL_PROC argument. + ! It seems that gfortran 10 does not retain the value + ! of FSPGL_PROC within the OMP region. + IF( PRESENT(FSPGL_PROC) ) THEN + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTINV(IM,JM,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + ENDDO + !$OMP END PARALLEL DO + ELSE + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTINV(IM,JM,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + ENDDO + !$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1647,1) +ENDIF + +CALL GSTATS(102,1) + +CALL GSTATS(152,0) +CALL TRMTOL(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +CALL GSTATS(152,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +! ------------------------------------------------------------------ + +END SUBROUTINE LTINV_CTL +END MODULE LTINV_CTL_MOD diff --git a/src/trans/cpu/internal/ltinv_ctlad_mod.F90 b/src/trans/cpu/internal/ltinv_ctlad_mod.F90 new file mode 100644 index 0000000..eb00b8d --- /dev/null +++ b/src/trans/cpu/internal/ltinv_ctlad_mod.F90 @@ -0,0 +1,124 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_CTLAD_MOD +CONTAINS +SUBROUTINE LTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + +!**** *LTINV_CTLAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D +USE LTINVAD_MOD ,ONLY : LTINVAD +USE TRLTOM_MOD ,ONLY : TRLTOM + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1 + +! ------------------------------------------------------------------ + +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF +CALL GSTATS(180,0) +CALL TRLTOM(FOUBUF,FOUBUF_IN,2*KF_OUT_LT) +CALL GSTATS(180,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) + +CALL GSTATS(104,0) +CALL GSTATS(1648,0) +IF(KF_OUT_LT > 0) THEN +! Bug in gcc <= 10.2, see https://github.com/ecmwf-ifs/ectrans/issues/20 +#if !(defined(__GFORTRAN__) && __GNUC__ == 10 && __GNUC_MINOR__ <= 2) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) +#endif + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTINVAD(IM,JM,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + ENDDO +#if !(defined(__GFORTRAN__) && __GNUC__ == 10 && __GNUC_MINOR__ <= 2) +!$OMP END PARALLEL DO +#endif +ENDIF +CALL GSTATS(1648,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +CALL GSTATS(104,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE LTINV_CTLAD +END MODULE LTINV_CTLAD_MOD diff --git a/src/trans/cpu/internal/ltinv_mod.F90 b/src/trans/cpu/internal/ltinv_mod.F90 new file mode 100644 index 0000000..e3dbc7d --- /dev/null +++ b/src/trans/cpu/internal/ltinv_mod.F90 @@ -0,0 +1,336 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_MOD +CONTAINS +SUBROUTINE LTINV(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_FIELDS ,ONLY : F +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B, LATLON +USE TPM_FLT +USE TPM_GEOMETRY + +!USE PRLE1_MOD +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI1B_MOD ,ONLY : PRFI1B +USE VDTUV_MOD ,ONLY : VDTUV +USE SPNSDE_MOD ,ONLY : SPNSDE +USE LEINV_MOD ,ONLY : LEINV +USE ASRE1B_MOD ,ONLY : ASRE1B +USE FSPGL_INT_MOD ,ONLY : FSPGL_INT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE CDMAP_MOD ,ONLY : CDMAP + + + +!**** *LTINV* - Inverse Legendre transform +! +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence +! SPNSDE - compute north-south derivatives +! LEINV - Inverse Legendre transform +! ASRE1 - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINV in IFS CY22R1 +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(R%NLEI1,KLEI2) +REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2) +!REAL(KIND=JPRB) :: ZSOA1(KDIM1,R%NLEI3),ZAOA1(KDIM1,R%NLEI3) +REAL(KIND=JPRB), ALLOCATABLE :: ZSOA1(:,:), ZAOA1(:,:), ZALN(:,:) + +INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU, JGL, JFLD +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISLO,ISU,IDL,IDU, IGLS +INTEGER(KIND=JPIM) :: IFIRST, ILAST, IDIM1,IDIM3,J3 +INTEGER(KIND=JPIM) :: INSDS, INSDE, IUVS, IUVE, IST + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!CHARACTER(LEN=10) :: CLHOOK + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!WRITE(CLHOOK,FMT='(A,I4.4)') 'LTINV_',KM +IF (LHOOK) CALL DR_HOOK('LTINV_MOD',0,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!* 1. PREPARE ZEPSNM. +! --------------- + +CALL PREPSNM(KM,KMLOC,ZEPSNM) + +! ------------------------------------------------------------------ + + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IFIRST = 1 +ILAST = 4*KF_UV + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL PRFI1B(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL PRFI1B(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + + CALL VDTUV(KM,KF_UV,ZEPSNM,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU)) +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL PRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL PRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL PRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL PRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL SPNSDE(KM,KF_SCALARS,ZEPSNM,ZIA(:,ISL:ISU),ZIA(:,IDL:IDU)) +ENDIF + +! ------------------------------------------------------------------ + + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +IIFC=IFC +IF(KM == 0)THEN + IIFC=IFC/2 +ENDIF + +IF( LATLON.AND.S%LDLL ) THEN + + IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) + + IF( (S%LSHIFTLL .AND. KM < 2*IDGLU) .OR.& + & (.NOT.S%LSHIFTLL .AND. KM < 2*(IDGLU-1)) ) THEN + + ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + ISLO = S%FA(KMLOC)%ISLD + + ALLOCATE(ZAOA1(KDIM1,R%NLEI3)) + ALLOCATE(ZSOA1(KDIM1,R%NLEI3)) + CALL LEINV(KM,KMLOC,IFC,IIFC,KF_OUT_LT,ISL,IDGLU,ZIA(:,ISTA:ISTA+IFC-1),ZAOA1,ZSOA1) + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! before (non-linear) mapping !!!! + + ALLOCATE( ZALN(KDIM1,2*R%NDGNH) ) + DO JGL=ISL, R%NDGNH + IGLS = 2*R%NDGNH+1-JGL + DO JFLD=1,2*KF_OUT_LT + ZALN(JFLD, JGL) = ZSOA1(JFLD,JGL)+ZAOA1(JFLD,JGL) + ZALN(JFLD, IGLS) = ZSOA1(JFLD,JGL)-ZAOA1(JFLD,JGL) + ENDDO + ENDDO + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+2*KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+2*KF_UV + ENDIF + IUVS = IST + IUVE = IST+4*KF_UV-1 + IST = IST+4*KF_UV + IST = IST+2*KF_SCALARS + INSDS = IST + INSDE = IST+2*KF_SCDERS-1 + IST = IST+2*KF_SCDERS + + IGLS = 2*R%NDGNH - ISL + 1 + IF( KF_UV > 0 ) THEN + DO JGL=ISL, IGLS + DO JFLD=IUVS,IUVE + ZALN(JFLD, JGL) = ZALN(JFLD,JGL)*F%RACTHE(JGL) + ENDDO + ENDDO + ENDIF + IF( KF_SCDERS > 0 ) THEN + DO JGL=ISL, IGLS + DO JFLD=INSDS,INSDE + ZALN(JFLD, JGL) = ZALN(JFLD,JGL)*F%RACTHE(JGL) + ENDDO + ENDDO + ENDIF + ENDIF + + DEALLOCATE(ZAOA1) + DEALLOCATE(ZSOA1) + + ! this routine maps to the output latitudes AND fills the FOUBUF + CALL CDMAP(KM,KMLOC,ISL,ISLO,ZEPSNM(R%NTMAX+1),-1_JPIM,& + & R%NDGNH,S%NDGNHD,2*KF_OUT_LT,ZALN,ZALN) + DEALLOCATE(ZALN) + + ENDIF + +ELSE + IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) + ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + + ALLOCATE(ZAOA1(KDIM1,R%NLEI3)) + ALLOCATE(ZSOA1(KDIM1,R%NLEI3)) + CALL LEINV(KM,KMLOC,IFC,IIFC,KF_OUT_LT,ISL,IDGLU,ZIA(:,ISTA:ISTA+IFC-1),ZAOA1,ZSOA1) + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART/FILL FOUBUF +! -------------------------------------------- + + CALL ASRE1B(KF_OUT_LT,KM,KMLOC,ZAOA1,ZSOA1) + DEALLOCATE(ZAOA1) + DEALLOCATE(ZSOA1) + +ENDIF + +! ------------------------------------------------------------------ + +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +IF(PRESENT(FSPGL_PROC)) THEN + CALL FSPGL_INT(KM,KMLOC,KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& + & KFLDPTRUV,KFLDPTRSC) +ENDIF + +IF (LHOOK) CALL DR_HOOK('LTINV_MOD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE LTINV +END MODULE LTINV_MOD + + + + diff --git a/src/trans/cpu/internal/ltinvad_mod.F90 b/src/trans/cpu/internal/ltinvad_mod.F90 new file mode 100644 index 0000000..04fd528 --- /dev/null +++ b/src/trans/cpu/internal/ltinvad_mod.F90 @@ -0,0 +1,239 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINVAD_MOD +CONTAINS +SUBROUTINE LTINVAD(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPM_GEOMETRY + +!USE PRLE1AD_MOD +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI1BAD_MOD ,ONLY : PRFI1BAD +USE VDTUVAD_MOD ,ONLY : VDTUVAD +USE SPNSDEAD_MOD ,ONLY : SPNSDEAD +USE LEINVAD_MOD ,ONLY : LEINVAD +USE ASRE1BAD_MOD ,ONLY : ASRE1BAD +!USE FSPGL_INT_MOD + + +!**** *LTINVAD* - Inverse Legendre transform + +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINVAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- +! PRLE1AD - prepares the Legendre polonymials +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1AD - prepares the spectral fields +! VDTUVAD - compute u and v from vorticity and divergence +! SPNSDEAD- compute north-south derivatives +! LEINVAD - Inverse Legendre transform +! ASRE1AD - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINVAD in IFS CY22R1 +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(R%NLEI1,KLEI2) +REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB) :: ZSOA1(KDIM1,R%NLEI3),ZAOA1(KDIM1,R%NLEI3) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: ILAST,IFIRST,IDIM1,IDIM3,J3 + +! LOCAL LOGICAL SCALARS + +! LOCAL REAL SCALARS + +! ------------------------------------------------------------------ + +!* 1. PREPARE AND ZEPSNM. +! ------------------- + +CALL PREPSNM(KM,KMLOC,ZEPSNM) + +! ------------------------------------------------------------------ +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +!IF(PRESENT(FSPGL_PROC)) THEN +! CALL FSPGL_INT(KM,KMLOC,FSPGL_PROC) +!ENDIF + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + +CALL ASRE1BAD(KF_OUT_LT,KM,KMLOC,ZAOA1,ZSOA1) + +! ------------------------------------------------------------------ + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +ZIA(:,ISTA:ISTA+IFC-1) = 0.0_JPRB + +IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +IIFC=IFC +IF(KM == 0)THEN + IIFC=IFC/2 +ENDIF +CALL LEINVAD(KM,KMLOC,IFC,IIFC,KF_OUT_LT,IDGLU,ZIA(:,ISTA:ISTA+IFC-1),ZAOA1,ZSOA1) + +! ------------------------------------------------------------------ + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +ZIA(:,1:ISTA-1) = 0.0_JPRB + +IFIRST = 1 +ILAST = 4*KF_UV +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL VDTUVAD(KM,KF_UV,ZEPSNM,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU)) + CALL PRFI1BAD(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL PRFI1BAD(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL SPNSDEAD(KM,KF_SCALARS,ZEPSNM,ZIA(:,ISL:ISU),ZIA(:,IDL:IDU)) +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF +ENDIF + + +! ------------------------------------------------------------------ + + +END SUBROUTINE LTINVAD +END MODULE LTINVAD_MOD + + + + diff --git a/src/trans/cpu/internal/myrecvset_mod.F90 b/src/trans/cpu/internal/myrecvset_mod.F90 new file mode 100644 index 0000000..ba439c4 --- /dev/null +++ b/src/trans/cpu/internal/myrecvset_mod.F90 @@ -0,0 +1,83 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE MYRECVSET_MOD +CONTAINS +FUNCTION MYRECVSET(KSETS,KMYSET,KSET) + + +!**** *MYRECVSET* RETURNS SET NUMBER TO SEND TO + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! ISENDSET = MYRECVSET(KSETS,KMYSET,KSET) + +! Explicit arguments : +! -------------------- +! input: KSETS + +! Implicit arguments : NONE +! -------------------- +! Method. +! ------- + +! + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE +INTEGER(KIND=JPIM) :: MYRECVSET +INTEGER(KIND=JPIM),INTENT(IN) :: KSETS,KMYSET,KSET + + +! ------------------------------------------------------------------ + +!* 1. Check input argument for validity +! --------------------------------- + +IF(KSETS < 1 .OR. KMYSET > KSETS .OR. KSET > KSETS-1) THEN + + CALL ABORT_TRANS(' MYRECVSET: INVALID ARGUMENT ') + +ELSE + +!* 2. Compute output parameters +! ------------------------- + + MYRECVSET = MOD(-KSET-1+KMYSET+KSETS,KSETS)+1 + +ENDIF + +END FUNCTION MYRECVSET +END MODULE MYRECVSET_MOD diff --git a/src/trans/cpu/internal/mysendset_mod.F90 b/src/trans/cpu/internal/mysendset_mod.F90 new file mode 100644 index 0000000..7db5c52 --- /dev/null +++ b/src/trans/cpu/internal/mysendset_mod.F90 @@ -0,0 +1,80 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE MYSENDSET_MOD +CONTAINS +FUNCTION MYSENDSET(KSETS,KMYSET,KSET) + + +!**** *MYSENDSET* RETURNS SET NUMBER TO SEND TO + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! ISENDSET = MYSENDSET(KSETS,KMYSET,KSET) + +! Explicit arguments : +! -------------------- +! input: KSETS + +! Implicit arguments : NONE +! -------------------- +! Method. +! ------- + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE +INTEGER(KIND=JPIM) :: MYSENDSET +INTEGER(KIND=JPIM),INTENT(IN) :: KSETS,KMYSET,KSET + + +! ------------------------------------------------------------------ + +!* 1. Check input argument for validity +! --------------------------------- + +IF(KSETS < 1 .OR. KMYSET > KSETS .OR. KSET > KSETS-1) THEN + + CALL ABORT_TRANS(' MYSENDSET: INVALID ARGUMENT ') + +ELSE + +!* 2. Compute output parameters +! ------------------------- + + MYSENDSET = MOD(KMYSET+KSET-1,KSETS)+1 + +ENDIF + +END FUNCTION MYSENDSET +END MODULE MYSENDSET_MOD diff --git a/src/trans/cpu/internal/pe2set_mod.F90 b/src/trans/cpu/internal/pe2set_mod.F90 new file mode 100644 index 0000000..f1703a9 --- /dev/null +++ b/src/trans/cpu/internal/pe2set_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 1998- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PE2SET_MOD +CONTAINS +SUBROUTINE PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) + + +!**** *PE2SET* - Convert from PE number to set numbers + +! Purpose. +! -------- +! Convert from PE number to set numbers in both +! grid-point space and spectral space + +!** Interface. +! ---------- +! *CALL* *PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) + +! Explicit arguments : +! -------------------- +! input: KPE - integer processor number +! in the range 1 .. NPROC +! output: KPRGPNS - integer A set number in grid space +! in the range 1 .. NPRGPNS +! KPRGPEW - integer B set number in grid space +! in the range 1 .. NPRGPEW +! KPRTRW - integer A set number in spectral space +! in the range 1 .. NPRTRW +! KPRTRV - integer B set number in spectral space +! in the range 1 .. NPRTRV + +! Implicit arguments : YOMMP parameters +! NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NPROC + +! -------------------- +! Method. +! ------- + +! PE allocation order is row oriented (e.g. NPRGPNS or NPRTRW = 4): + +! 1 2 3 4 +! 5 6 7 8 +! 9 10 11 12 +! 13 14 15 16 +! . . . . + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! David Dent *ECMWF* + +! Modifications. +! -------------- +! Original : 98-08-19 +! Revision : 98-10-13 row ordering +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW, NPROC, NPRTRV +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE +INTEGER(KIND=JPIM),INTENT(IN) :: KPE +INTEGER(KIND=JPIM),INTENT(OUT) :: KPRGPNS,KPRGPEW,KPRTRW,KPRTRV + +INTEGER(KIND=JPIM) :: IPE,JA +! ------------------------------------------------------------------ + +!* 1. Check input argument for validity +! --------------------------------- + +IF(KPE <= 0.OR.KPE > NPROC) THEN + WRITE(*,'(A,2I8)') ' PE2SET INVALID ARGUMENT ',KPE,NPROC + CALL ABORT_TRANS(' PE2SET INVALID ARGUMENT ') + +ELSE + +!* 2. Compute output parameters +! ------------------------- + + IF( LEQ_REGIONS )THEN + KPRGPNS=1 + IPE=KPE + DO JA=1,N_REGIONS_NS + IF( IPE > N_REGIONS(JA) )THEN + IPE=IPE-N_REGIONS(JA) + KPRGPNS=KPRGPNS+1 + CYCLE + ENDIF + KPRGPEW=IPE + EXIT + ENDDO + ELSE + KPRGPEW=MOD(KPE-1,NPRGPEW)+1 + KPRGPNS=(KPE-1)/NPRGPEW+1 + ENDIF + KPRTRV =MOD(KPE-1,NPRTRV)+1 + KPRTRW =(KPE-1)/NPRTRV+1 + +ENDIF + +END SUBROUTINE PE2SET +END MODULE PE2SET_MOD diff --git a/src/trans/cpu/internal/pre_suleg_mod.F90 b/src/trans/cpu/internal/pre_suleg_mod.F90 new file mode 100644 index 0000000..d7f519e --- /dev/null +++ b/src/trans/cpu/internal/pre_suleg_mod.F90 @@ -0,0 +1,71 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRE_SULEG_MOD +CONTAINS +SUBROUTINE PRE_SULEG +USE PARKIND1 ,ONLY : JPRD, JPIM +USE PARKIND2 ,ONLY : JPRH +USE TPM_GEN +USE TPM_DIM +USE TPM_CONSTANTS +USE TPM_DISTR +USE TPM_FIELDS + +INTEGER(KIND=JPIM) :: INM, IM, ICOUNT,JMLOC,JN +LOGICAL :: LLP1,LLP2 + + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +ICOUNT = 0 +DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + DO JN=IM,R%NTMAX+2 + ICOUNT = ICOUNT+1 + ENDDO +ENDDO + +ALLOCATE(F%REPSNM(ICOUNT)) +IF (LLP2) WRITE(NOUT,9) 'F%REPSNM ',SIZE(F%REPSNM ),SHAPE(F%REPSNM ) +ALLOCATE(F%RN(-1:R%NTMAX+3)) +IF (LLP2) WRITE(NOUT,9) 'F%RN ',SIZE(F%RN ),SHAPE(F%RN ) +ALLOCATE(F%RLAPIN(-1:R%NSMAX+2)) +IF (LLP2) WRITE(NOUT,9) 'F%RLAPIN ',SIZE(F%RLAPIN ),SHAPE(F%RLAPIN ) +ALLOCATE(F%NLTN(-1:R%NTMAX+3)) +IF (LLP2) WRITE(NOUT,9) 'F%NLTN ',SIZE(F%NLTN ),SHAPE(F%NLTN ) + +ICOUNT = 0 +DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + DO JN=IM,R%NTMAX+2 + ICOUNT = ICOUNT+1 + F%REPSNM(ICOUNT) = SQRT(REAL(JN*JN-IM*IM,JPRD)/& + &REAL(4*JN*JN-1,JPRD)) + ENDDO +ENDDO + +DO JN=-1,R%NTMAX+3 + F%RN(JN) = REAL(JN,JPRD) + F%NLTN(JN) = R%NTMAX+2-JN +ENDDO +F%RLAPIN(:) = 0.0_JPRD +F%RLAPIN(0) = 0.0_JPRD +F%RLAPIN(-1) = 0.0_JPRD +DO JN=1,R%NSMAX+2 + F%RLAPIN(JN)=-(REAL(RA,JPRD)*REAL(RA,JPRD)/REAL(JN*(JN+1),JPRD)) +ENDDO + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE PRE_SULEG +END MODULE PRE_SULEG_MOD diff --git a/src/trans/cpu/internal/prepsnm_mod.F90 b/src/trans/cpu/internal/prepsnm_mod.F90 new file mode 100644 index 0000000..b7a99fd --- /dev/null +++ b/src/trans/cpu/internal/prepsnm_mod.F90 @@ -0,0 +1,86 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PREPSNM_MOD +CONTAINS +SUBROUTINE PREPSNM(KM,KMLOC,PEPSNM) + + +!**** *PREPSNM* - Prepare REPSNM for wavenumber KM + +! Purpose. +! -------- +! Copy the REPSNM values for specific zonal wavenumber M +! to work array + +!** Interface. +! ---------- +! CALL PREPSNM(...) + +! Explicit arguments : KM - zonal wavenumber +! ------------------- KMLOC - local zonal wavenumber +! PEPSNM - REPSNM for zonal +! wavenumber KM + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + + +! Reference. +! ---------- + + +! Author. +! ------- +! Lars Isaksen *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINV in IFS CY22R1 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KMLOC +REAL(KIND=JPRB), INTENT(OUT) :: PEPSNM(0:R%NTMAX+2) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: JN + +! ------------------------------------------------------------------ + +!* 1. COPY REPSNM. +! ------------ + + +IF (KM > 0) THEN + PEPSNM(0:KM-1) = 0.0_JPRB +ENDIF + +DO JN=KM,R%NTMAX+2 + PEPSNM(JN) = F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN) +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE PREPSNM +END MODULE PREPSNM_MOD + diff --git a/src/trans/cpu/internal/prfi1_mod.F90 b/src/trans/cpu/internal/prfi1_mod.F90 new file mode 100644 index 0000000..235bc0e --- /dev/null +++ b/src/trans/cpu/internal/prfi1_mod.F90 @@ -0,0 +1,115 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1_MOD +CONTAINS +SUBROUTINE PRFI1(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_DISTR +!USE TPM_TRANS + +USE PRFI1B_MOD ,ONLY : PRFI1B + + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1 in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL PRFI1B(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL PRFI1B(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL PRFI1B(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1 +END MODULE PRFI1_MOD + + diff --git a/src/trans/cpu/internal/prfi1ad_mod.F90 b/src/trans/cpu/internal/prfi1ad_mod.F90 new file mode 100644 index 0000000..6556aaf --- /dev/null +++ b/src/trans/cpu/internal/prfi1ad_mod.F90 @@ -0,0 +1,113 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1AD_MOD +CONTAINS +SUBROUTINE PRFI1AD(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_DISTR +!USE TPM_TRANS + +USE PRFI1BAD_MOD ,ONLY : PRFI1BAD + + +!**** *PRFI1AD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1AD(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1AD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL PRFI1BAD(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL PRFI1BAD(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL PRFI1BAD(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1AD +END MODULE PRFI1AD_MOD + + diff --git a/src/trans/cpu/internal/prfi1b_mod.F90 b/src/trans/cpu/internal/prfi1b_mod.F90 new file mode 100644 index 0000000..f5dd282 --- /dev/null +++ b/src/trans/cpu/internal/prfi1b_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1B_MOD +CONTAINS +SUBROUTINE PRFI1B(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1B(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1B in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF,IFLD + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + + +ILCM = R%NSMAX+1-KM +IOFF = D%NASM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 + PIA(J+2,IR) = PSPEC(IFLD,INM ) + PIA(J+2,II) = PSPEC(IFLD,INM+1) + ENDDO + ENDDO + +ELSE + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 + !DIR$ IVDEP + !OCL NOVREC + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + PIA(J+2,IR) = PSPEC(JFLD,INM ) + PIA(J+2,II) = PSPEC(JFLD,INM+1) + ENDDO + ENDDO + +ENDIF + +DO JFLD=1,2*KFIELDS + PIA(1,JFLD) = 0.0_JPRB + PIA(2,JFLD) = 0.0_JPRB + PIA(ILCM+3,JFLD) = 0.0_JPRB +ENDDO + + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1B +END MODULE PRFI1B_MOD diff --git a/src/trans/cpu/internal/prfi1bad_mod.F90 b/src/trans/cpu/internal/prfi1bad_mod.F90 new file mode 100644 index 0000000..30b720b --- /dev/null +++ b/src/trans/cpu/internal/prfi1bad_mod.F90 @@ -0,0 +1,112 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1BAD_MOD +CONTAINS +SUBROUTINE PRFI1BAD(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1BAD(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1BAD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF, IFLD + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + + +ILCM = R%NSMAX+1-KM +IOFF = D%NASM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 + PSPEC(IFLD,INM ) = PSPEC(IFLD,INM ) + PIA(J+2,IR) + PSPEC(IFLD,INM+1) = PSPEC(IFLD,INM+1) + PIA(J+2,II) + ENDDO + ENDDO +ELSE + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + PSPEC(JFLD,INM ) = PSPEC(JFLD,INM ) + PIA(J+2,IR) + PSPEC(JFLD,INM+1) = PSPEC(JFLD,INM+1) + PIA(J+2,II) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1BAD +END MODULE PRFI1BAD_MOD diff --git a/src/trans/cpu/internal/prfi2_mod.F90 b/src/trans/cpu/internal/prfi2_mod.F90 new file mode 100644 index 0000000..07a29d2 --- /dev/null +++ b/src/trans/cpu/internal/prfi2_mod.F90 @@ -0,0 +1,99 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2_MOD +CONTAINS +SUBROUTINE PRFI2(KM,KMLOC,KF_FS,PAIA,PSIA) + +!**** *PRFI2* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *PRFI2(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. PRFI2B - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_TRANS + +USE PRFI2B_MOD ,ONLY : PRFI2B +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + + +REAL(KIND=JPRB) , INTENT(OUT) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS + + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL PRFI2B(KF_FS,KM,KMLOC,PAIA,PSIA) + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI2 +END MODULE PRFI2_MOD diff --git a/src/trans/cpu/internal/prfi2ad_mod.F90 b/src/trans/cpu/internal/prfi2ad_mod.F90 new file mode 100644 index 0000000..ce7f99a --- /dev/null +++ b/src/trans/cpu/internal/prfi2ad_mod.F90 @@ -0,0 +1,91 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2AD_MOD +CONTAINS +SUBROUTINE PRFI2AD(KM,KMLOC,KF_FS,PAIA,PSIA) + +!**** *PRFI2AD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *PRFI2AD(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. PRFI2ADB - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE PRFI2BAD_MOD ,ONLY : PRFI2BAD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRB) , INTENT(IN) :: PSIA(:,:), PAIA(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL PRFI2BAD(KF_FS,KM,KMLOC,PAIA,PSIA) + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI2AD +END MODULE PRFI2AD_MOD diff --git a/src/trans/cpu/internal/prfi2b_mod.F90 b/src/trans/cpu/internal/prfi2b_mod.F90 new file mode 100644 index 0000000..92e2428 --- /dev/null +++ b/src/trans/cpu/internal/prfi2b_mod.F90 @@ -0,0 +1,99 @@ +! (C) Copyright 1990- ECMWF. +! (C) Copyright 1990- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2B_MOD +CONTAINS +SUBROUTINE PRFI2B(KFIELD,KM,KMLOC,PAIA,PSIA) + +!**** *PRFI2B* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *PRFI2B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(OUT) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IGLS, ISL, ISTAN, ISTAS, JF, JGL + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + +DO JGL=ISL,R%NDGNH + IGLS = R%NDGL+1-JGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + ISTAS = (D%NSTAGT1B(D%NPROCL(IGLS))+D%NPNTGTB1(KMLOC,IGLS))*2*KFIELD +!DIR$ IVDEP +!OCL NOVREC + DO JF=1,KFIELD*2 + PSIA(JF,JGL) = FOUBUF(ISTAN+JF)+FOUBUF(ISTAS+JF) + PAIA(JF,JGL) = FOUBUF(ISTAN+JF)-FOUBUF(ISTAS+JF) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI2B +END MODULE PRFI2B_MOD diff --git a/src/trans/cpu/internal/prfi2bad_mod.F90 b/src/trans/cpu/internal/prfi2bad_mod.F90 new file mode 100644 index 0000000..5a218c1 --- /dev/null +++ b/src/trans/cpu/internal/prfi2bad_mod.F90 @@ -0,0 +1,99 @@ +! (C) Copyright 1990- ECMWF. +! (C) Copyright 1990- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2BAD_MOD +CONTAINS +SUBROUTINE PRFI2BAD(KFIELD,KM,KMLOC,PAIA,PSIA) + +!**** *PRFI2BAD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *PRFI2BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(IN) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IGLS, ISL, ISTAN, ISTAS, JF, JGL + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + +DO JGL=ISL,R%NDGNH + IGLS = R%NDGL+1-JGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + ISTAS = (D%NSTAGT1B(D%NPROCL(IGLS))+D%NPNTGTB1(KMLOC,IGLS))*2*KFIELD +!DIR$ IVDEP +!OCL NOVREC + DO JF=1,KFIELD*2 + FOUBUF(ISTAN+JF) = PSIA(JF,JGL)+PAIA(JF,JGL) + FOUBUF(ISTAS+JF) = PSIA(JF,JGL)-PAIA(JF,JGL) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI2BAD +END MODULE PRFI2BAD_MOD diff --git a/src/trans/cpu/internal/read_legpol_mod.F90 b/src/trans/cpu/internal/read_legpol_mod.F90 new file mode 100644 index 0000000..d93a45a --- /dev/null +++ b/src/trans/cpu/internal/read_legpol_mod.F90 @@ -0,0 +1,286 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE READ_LEGPOL_MOD +CONTAINS +SUBROUTINE READ_LEGPOL +USE PARKIND1 ,ONLY : JPIM, JPRB ,JPRD +USE TPM_GEN +USE TPM_DISTR +USE TPM_DIM +USE TPM_GEOMETRY +USE TPM_FLT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_CTL +USE BYTES_IO_MOD +USE BUTTERFLY_ALG_MOD +USE SHAREDMEM_MOD + +!**** *READ_LEGPOL * - read in Leg.Pol. and assocciated arrays from file or memory segment + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! *CALL* *READ_LEGPOL* + +! Explicit arguments : None +! -------------------- + +! Implicit arguments : +! -------------------- +! + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! + +! ------- +! Mats Hamrud and Willem Deconinck *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 + +IMPLICIT NONE + +INTEGER(KIND=JPIM),PARAMETER :: JPIBUFL=4 +INTEGER(KIND=JPIM) :: IRBYTES,IIBYTES,JMLOC,IPRTRV,IMLOC,IM,ILA,ILS +INTEGER(KIND=JPIM) :: IDGLU,ISIZE,IBYTES,IRET,IFILE,JSETV,IDUM,JGL,II,IDGLU2 +INTEGER(KIND=JPIM),POINTER :: IBUF(:) +REAL(KIND=JPRB) ,ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM) ,POINTER :: IBUFA(:) +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 +TYPE(CLONE) :: YLCLONE +CHARACTER(LEN=8) :: CLABEL +CHARACTER(LEN=16) :: CLABEL_16 + +! ------------------------------------------------------------------ + +IRBYTES = 8 +IIBYTES = 4 +IDUM = 3141 + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_OPEN(IFILE,C%CLEGPOLFNAME,'R') + ALLOCATE(IBUF(JPIBUFL)) +ELSE + NULLIFY(IBUF) +ENDIF +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) +ENDIF +CLABEL = TRANSFER(IBUF(1:2),CLABEL) +IF( S%LUSEFLT .AND. CLABEL /= 'LEGPOLBF') THEN + WRITE(NERR,*) S%LUSEFLT,CLABEL + CALL ABORT_TRANS('READ_LEGPOL:WRONG LABEL') +ELSEIF(.NOT. S%LUSEFLT .AND. CLABEL /= 'LEGPOL ') THEN + WRITE(NERR,*) S%LUSEFLT,CLABEL + CALL ABORT_TRANS('READ_LEGPOL:WRONG LABEL') +ENDIF +IF(IBUF(3) /= R%NSMAX) CALL ABORT_TRANS('READ_LEGPOL:WRONG SPECTRAL TRUNCATION') +IF(IBUF(4) /= R%NDGNH) CALL ABORT_TRANS('READ_LEGPOL:WRONG NO OF GAUSSIAN LATITUDES') +IF(C%CIO_TYPE == 'file') THEN + ALLOCATE(IBUFA(2*R%NDGNH)) + CALL BYTES_IO_READ(IFILE,IBUFA,2*R%NDGNH*IIBYTES,IRET) +ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,2*R%NDGNH,IBUFA,ADVANCE=.TRUE.) +ENDIF +II = 0 +DO JGL=1,R%NDGNH + II = II+1 + IF(IBUFA(II) /= G%NLOEN(JGL)) THEN + WRITE(NERR,*) 'WRONG NUMBER OF LONGITUDE POINTS ', JGL,G%NLOEN(JGL),IBUFA(II) + CALL ABORT_TRANS('READ_LEGPOL:WRONG NLOEN') + ENDIF + II=II+1 + IF(IBUFA(II) /= G%NMEN(JGL)) THEN + WRITE(NERR,*) 'WRONG CUT-OFF WAVE NUMBER ', JGL,G%NMEN(JGL),IBUFA(II) + CALL ABORT_TRANS('READ_LEGPOL:WRONG NMEN') + ENDIF +ENDDO +IF(C%CIO_TYPE == 'file') THEN + DEALLOCATE(IBUFA) +ENDIF + +DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) +! Anti-symmetric + IF( S%LUSEFLT .AND. ILA > S%ITHRESHOLD) THEN + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + IF(IBUF(1) /= IDGLU .OR. IBUF(2) /= ILA ) THEN + WRITE(NERR,*) 'READ_LEGPOL ERROR ', IBUF,IDGLU,ILA + CALL ABORT_TRANS('READ_LEGPOL:WRONG MATRIX SIZE') + ENDIF + + ISIZE = IBUF(3) + IF(C%CIO_TYPE == 'file') THEN + ALLOCATE(YLCLONE%COMMSBUF(ISIZE)) + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_READ(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,YLCLONE) + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,YDMEMBUF=C%STORAGE) + ENDIF + ELSE + IF(C%CIO_TYPE == 'file') THEN + ISIZE = IDGLU*ILA + ALLOCATE(ZBUF(ISIZE)) + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(IMLOC)%RPNMA(IDGLU,ILA)) + S%FA(IMLOC)%RPNMA(:,:) = RESHAPE(ZBUF,(/IDGLU,ILA/)) + DEALLOCATE(ZBUF) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,IDGLU,ILA,S%FA(IMLOC)%RPNMA,ADVANCE=.TRUE.) + ENDIF + ENDIF +! Symmetric + IF( S%LUSEFLT .AND. ILS > S%ITHRESHOLD) THEN + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + IF(IBUF(1) /= IDGLU .OR. IBUF(2) /= ILS ) THEN + WRITE(NERR,*) 'READ_LEGPOL ERROR ', IBUF,IDGLU,ILA + CALL ABORT_TRANS('READ_LEGPOL:WRONG MATRIX ZIZE') + ENDIF + ISIZE = IBUF(3) + IF(C%CIO_TYPE == 'file') THEN + ALLOCATE(YLCLONE%COMMSBUF(ISIZE)) + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_READ(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,YLCLONE) + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,YDMEMBUF=C%STORAGE) + ENDIF + ELSE + IF(C%CIO_TYPE == 'file') THEN + ISIZE = IDGLU*ILS + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(IMLOC)%RPNMS(IDGLU,ILS)) + S%FA(IMLOC)%RPNMS(:,:) = RESHAPE(ZBUF,(/IDGLU,ILS/)) + DEALLOCATE(ZBUF) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,IDGLU,ILS,S%FA(IMLOC)%RPNMS,ADVANCE=.TRUE.) + ENDIF + ENDIF + ENDDO +ENDDO + +! Lat-lon grid +IF(S%LDLL) THEN + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + CLABEL_16 = TRANSFER(IBUF,CLABEL_16) + IF(CLABEL_16 /= 'LATLON---BEG-BEG')CALL ABORT_TRANS('READ_LEGPOL:WRONG LAT/LON LABEL') + + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IDGLU2 = S%NDGNHD + + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + IF(IBUF(1) /= IM .OR. IBUF(2) /= IDGLU .OR. IBUF(3) /= IDGLU2 ) THEN + WRITE(NERR,*) 'READ_LEGPOL ERROR ', IBUF,IM,IDGLU,IDGLU2 + CALL ABORT_TRANS('READ_LEGPOL:WRONG LAT-LON MATRIX SIZE') + ENDIF + + IF(C%CIO_TYPE == 'file') THEN + + ISIZE = 2*IDGLU*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(JMLOC)%RPNMWI(2*IDGLU,2)) + S%FA(JMLOC)%RPNMWI(:,:) = RESHAPE(ZBUF,(/2*IDGLU,2/)) + DEALLOCATE(ZBUF) + + ISIZE = 2*IDGLU2*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(JMLOC)%RPNMWO(2*IDGLU2,2)) + S%FA(JMLOC)%RPNMWO(:,:) = RESHAPE(ZBUF,(/2*IDGLU2,2/)) + DEALLOCATE(ZBUF) + + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,2*IDGLU,2,S%FA(JMLOC)%RPNMWI,ADVANCE=.TRUE.) + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,2*IDGLU2,2,S%FA(JMLOC)%RPNMWO,ADVANCE=.TRUE.) + ENDIF + ENDDO +ENDIF + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) +ENDIF +CLABEL_16 = TRANSFER(IBUF,CLABEL_16) +IF(CLABEL_16 /= 'LEGPOL---EOF-EOF')CALL ABORT_TRANS('READ_LEGPOL:WRONG END LABEL') +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_CLOSE(IFILE) + DEALLOCATE(IBUF) +ENDIF + +END SUBROUTINE READ_LEGPOL +END MODULE READ_LEGPOL_MOD diff --git a/src/trans/cpu/internal/set2pe_mod.F90 b/src/trans/cpu/internal/set2pe_mod.F90 new file mode 100644 index 0000000..313057b --- /dev/null +++ b/src/trans/cpu/internal/set2pe_mod.F90 @@ -0,0 +1,131 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 1998- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SET2PE_MOD +CONTAINS +SUBROUTINE SET2PE(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) + + +!**** *SET2PE* - Convert from set numbers to PE number + +! Purpose. +! -------- +! Convert from set numbers in either grid-point space or spectral space +! to PE number + +!** Interface. +! ---------- +! *CALL* *SET2PE(KPRGPNS,KPRGPEW,KPRTRW,KPRTRV,KPE) + +! Explicit arguments : +! -------------------- + +! input : KPRGPNS - integer A set number in grid space +! in the range 1 .. NPRGPNS +! KPRGPEW - integer B set number in grid space +! in the range 1 .. NPRGPEW +! KPRTRW - integer A set number in spectral space +! in the range 1 .. NPRTRW +! KPRTRV - integer B set number in spectral space +! in the range 1 .. NPRTRV +! output: KPE - integer processor number +! in the range 1 .. NPROC + +! Normally, one pair of input set numbers will be set to zero +! SET2PE will compute KPE from the first pair if they are valid numbers. +! else from the other pair, + +! Implicit arguments : YOMMP parameters +! NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NPROC + +! -------------------- +! Method. +! ------- + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! David Dent *ECMWF* + +! Modifications. +! -------------- +! Original : 98-08-19 +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRV, NPRTRW +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE +INTEGER(KIND=JPIM),INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KPRTRV +INTEGER(KIND=JPIM),INTENT(OUT) :: KPE + +INTEGER(KIND=JPIM) :: IPE,JA +! ------------------------------------------------------------------ + +!* 1. Choose from input parameters +! ---------------------------- + +IF(KPRGPNS > 0.AND.KPRGPEW > 0) THEN + + IF( LEQ_REGIONS )THEN + IF( KPRGPNS > N_REGIONS_NS )THEN + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPNS,N_REGIONS_NS + CALL ABOR1(' SET2PE INVALID ARGUMENT ') + ENDIF + IF( KPRGPEW > N_REGIONS(KPRGPNS) )THEN + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPEW,N_REGIONS(KPRGPNS) + CALL ABOR1(' SET2PE INVALID ARGUMENT ') + ENDIF + KPE=0 + DO JA=1,KPRGPNS-1 + KPE=KPE+N_REGIONS(JA) + ENDDO + KPE=KPE+KPRGPEW + ELSE + IF(KPRGPNS <= NPRGPNS.AND.KPRGPEW <= NPRGPEW) THEN + +!* 2. Grid-space set values supplied +! ------------------------------ + + KPE=(KPRGPNS-1)*NPRGPEW + KPRGPEW + ELSE + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPNS,KPRGPEW + CALL ABORT_TRANS(' SET2PE INVALID ARGUMENT ') + ENDIF + ENDIF + +ELSE + +!* 3. Spectral space set values supplied +! ---------------------------------- + + IF(KPRTRW <= NPRTRW.AND.KPRTRV <= NPRTRV) THEN + KPE=(KPRTRW-1)*NPRTRV + KPRTRV + ELSE + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRTRW,KPRTRV + CALL ABORT_TRANS(' SET2PE INVALID ARGUMENT ') + ENDIF + +ENDIF + +END SUBROUTINE SET2PE +END MODULE SET2PE_MOD diff --git a/src/trans/cpu/internal/set_resol_mod.F90 b/src/trans/cpu/internal/set_resol_mod.F90 new file mode 100644 index 0000000..fd98ead --- /dev/null +++ b/src/trans/cpu/internal/set_resol_mod.F90 @@ -0,0 +1,77 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SET_RESOL_MOD +CONTAINS +SUBROUTINE SET_RESOL(KRESOL,LDSETUP) +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL,LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL, TB, FFTB_RESOL +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +#endif +USE TPM_FLT +USE TPM_CTL ,ONLY : C, CTL_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSETUP + +! Local varaibles +INTEGER(KIND=JPIM) :: IRESOL +LOGICAL :: LLSETUP + +! ------------------------------------------------------------------ + +IF(MSETUP0 == 0) CALL ABORT_TRANS('SET_RESOL:TRANS NOT SETUP') +LLSETUP = .FALSE. +IF(PRESENT(LDSETUP)) LLSETUP = LDSETUP +IRESOL = 1 +IF(PRESENT(KRESOL)) THEN + IRESOL = KRESOL + IF(IRESOL < 1 .OR. IRESOL > NMAX_RESOL) THEN + WRITE(NOUT,*)'SET_RESOL: UNKNOWN RESOLUTION ',IRESOL,NMAX_RESOL + CALL ABORT_TRANS('SET_RESOL:IRESOL < 1 .OR. KRESOL > NMAX_RESOL') + ENDIF + IF(.NOT.LLSETUP) THEN + IF(.NOT.LENABLED(IRESOL)) THEN + WRITE(NOUT,*)'SET_RESOL: UNKNOWN RESOLUTION ',IRESOL,LENABLED + CALL ABORT_TRANS('SET_RESOL:IRESOL NOT ENABLED') + ENDIF + ENDIF +ENDIF +IF(IRESOL /= NCUR_RESOL) THEN + NCUR_RESOL = IRESOL + R => DIM_RESOL(NCUR_RESOL) + F => FIELDS_RESOL(NCUR_RESOL) + G => GEOM_RESOL(NCUR_RESOL) + D => DISTR_RESOL(NCUR_RESOL) + T => FFT_RESOL(NCUR_RESOL) + TB => FFTB_RESOL(NCUR_RESOL) +#ifdef WITH_FFTW + TW => FFTW_RESOL(NCUR_RESOL) +#endif + S => FLT_RESOL(NCUR_RESOL) + C => CTL_RESOL(NCUR_RESOL) +ENDIF + +END SUBROUTINE SET_RESOL +END MODULE SET_RESOL_MOD diff --git a/src/trans/cpu/internal/setup_dims_mod.F90 b/src/trans/cpu/internal/setup_dims_mod.F90 new file mode 100644 index 0000000..d8178c2 --- /dev/null +++ b/src/trans/cpu/internal/setup_dims_mod.F90 @@ -0,0 +1,50 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SETUP_DIMS_MOD +CONTAINS +SUBROUTINE SETUP_DIMS + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_FLT ,ONLY : S +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JN,ISPOLEG + +! ------------------------------------------------------------------ + +ISPOLEG = 0 +DO JM=0,R%NSMAX + DO JN=JM,R%NTMAX+1 + ISPOLEG = ISPOLEG+1 + ENDDO +ENDDO +R%NSPOLEG = ISPOLEG + +R%NSPEC_G = (R%NSMAX+1)*(R%NSMAX+2)/2 +R%NSPEC2_G = R%NSPEC_G*2 + +R%NDGNH = (R%NDGL+1)/2 + +R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) +R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) +IF (S%LSOUTHPNM) R%NLEI3=2*R%NLEI3 + +R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) +R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) + +! ------------------------------------------------------------------ + +END SUBROUTINE SETUP_DIMS +END MODULE SETUP_DIMS_MOD diff --git a/src/trans/cpu/internal/setup_geom_mod.F90 b/src/trans/cpu/internal/setup_geom_mod.F90 new file mode 100644 index 0000000..68de63e --- /dev/null +++ b/src/trans/cpu/internal/setup_geom_mod.F90 @@ -0,0 +1,110 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SETUP_GEOM_MOD +CONTAINS +SUBROUTINE SETUP_GEOM + +USE PARKIND1 ,ONLY : JPRD, JPIM + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_FIELDS ,ONLY : F +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +REAL(KIND=JPRD) :: ZSQM2(R%NDGL) +INTEGER(KIND=JPIM) :: IDGLU(0:R%NSMAX,R%NDGNH) +INTEGER(KIND=JPIM) :: JGL,JM,NSMAXLIN + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_GEOM ===' + + ALLOCATE (G%NMEN(R%NDGL)) + IF(LLP2)WRITE(NOUT,9) 'G%NMEN ',SIZE(G%NMEN ),SHAPE(G%NMEN ) + + NSMAXLIN = R%NDGL-1 + IF (R%NSMAX>=NSMAXLIN .OR. .NOT. G%LREDUCED_GRID) THEN + ! linear or full grid + DO JGL=1,R%NDGL + G%NMEN(JGL) = MIN(R%NSMAX,(G%NLOEN(JGL)-1)/2) + ENDDO + ELSEIF (R%NSMAX>=R%NDGL*2/3-1) THEN + ! quadratic grid + ZSQM2(:) = 3*(NSMAXLIN-R%NSMAX)/R%NDGL*F%R1MU2(:) + G%NMEN(1) = MIN(R%NSMAX,INT(REAL(G%NLOEN(1)-1,JPRD)/(2.0_JPRD+ZSQM2(1)))) + DO JGL=2,R%NDGNH + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL-1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL))))) + ENDDO + ! * SOUTHERN HEMISPHERE : + G%NMEN(R%NDGL) = MIN(R%NSMAX,INT(REAL(G%NLOEN(R%NDGL)-1,JPRD)/(2.0_JPRD+ZSQM2(R%NDGL)))) + DO JGL=R%NDGL-1, R%NDGNH+1, -1 + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL+1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL))))) + ENDDO + ELSE + ! cubic grid + ZSQM2(:) = F%R1MU2(:) + G%NMEN(1) = MIN(R%NSMAX,INT(REAL(G%NLOEN(1)-1,JPRD)/(2.0_JPRD+ZSQM2(1)))-1) + DO JGL=2,R%NDGNH + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL-1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL)))-1)) + ENDDO + ! * SOUTHERN HEMISPHERE : + G%NMEN(R%NDGL) = MIN(R%NSMAX,INT(REAL(G%NLOEN(R%NDGL)-1,JPRD)/(2.0_JPRD+ZSQM2(R%NDGL)))-1) + DO JGL=R%NDGL-1, R%NDGNH+1, -1 + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL+1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL)))-1)) + ENDDO + ENDIF + IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JGL,G%NLOEN,G%NMEN) '')') + WRITE(NOUT,FMT='(8(1X,''('',I4,I4,I4,'')''))')& + &(JGL,G%NLOEN(JGL),G%NMEN(JGL),JGL=1,R%NDGL) + ENDIF + ALLOCATE(G%NDGLU(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'G%NDGLU ',SIZE(G%NDGLU ),SHAPE(G%NDGLU ) + IDGLU(:,:) = 0 + G%NDGLU(:) = 0 + DO JGL=1,R%NDGNH + DO JM=0,G%NMEN(JGL) + IDGLU(JM,JGL) = 1 + ENDDO + ENDDO + DO JM=0,R%NSMAX + DO JGL=1,R%NDGNH + G%NDGLU(JM) = G%NDGLU(JM)+IDGLU(JM,JGL) + ENDDO + ENDDO + IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JM,G%NDGLU) '')') + WRITE(NOUT,FMT='(10(1X,''('',I4,I4,'')''))')& + &(JM,G%NDGLU(JM),JM=0,R%NSMAX) + ENDIF + +ENDIF + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SETUP_GEOM +END MODULE SETUP_GEOM_MOD diff --git a/src/trans/cpu/internal/shuffle_mod.F90 b/src/trans/cpu/internal/shuffle_mod.F90 new file mode 100644 index 0000000..9f0d933 --- /dev/null +++ b/src/trans/cpu/internal/shuffle_mod.F90 @@ -0,0 +1,137 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SHUFFLE_MOD +CONTAINS +SUBROUTINE SHUFFLE(KF_UV_G,KF_SCALARS_G,KSHFUV_G,KIVSETUV,KSHFSC_G,KIVSETSC,& + & KVSETUV,KVSETSC) + +!**** *SHUFFLE* - Re-shuffle fields for load balancing + +! Purpose. +! -------- +! Re-shuffle fields for load balancing if NPRTRV>1. Note that the +! relative order of the local spectral fields has to maintained. + +!** Interface. +! ---------- +! CALL SHUFFLE(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KSHFUV_G - reshuffling index for uv fields +! KIVSETUV - reshuffled KVSETUV +! KSHFSC_G - reshuffling index for scalar fields +! KIVSETSC - reshuffled KVSETSC +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. + +! Externals. NONE +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_GEN +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : NPRTRV +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G,KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(OUT) :: KSHFUV_G(:),KSHFSC_G(:) +INTEGER(KIND=JPIM), INTENT(OUT) :: KIVSETUV(:),KIVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) + +INTEGER(KIND=JPIM) :: IHELP(MAX(KF_UV_G,KF_SCALARS_G),NPRTRV),IHELPC(NPRTRV) +INTEGER(KIND=JPIM) :: IDW,J + +! ------------------------------------------------------------------ + +IF(NPRTRV > 1) THEN + IHELP(:,:) = 0 + IHELPC(:) = 0 + DO J=1,KF_UV_G + IHELPC(KVSETUV(J)) = IHELPC(KVSETUV(J))+1 + IHELP(IHELPC(KVSETUV(J)),KVSETUV(J)) = J + ENDDO + IDW = KF_UV_G+1 + DO + DO J=NPRTRV,1,-1 + IF(IHELPC(J) > 0) THEN + IDW = IDW-1 + KSHFUV_G(IDW) = IHELP(IHELPC(J),J) + IHELPC(J) =IHELPC(J)-1 + ENDIF + ENDDO + IF(IDW == 1) EXIT + ENDDO + + IHELP(:,:) = 0 + IHELPC(:) = 0 + DO J=1,KF_SCALARS_G + IHELPC(KVSETSC(J)) = IHELPC(KVSETSC(J))+1 + IHELP(IHELPC(KVSETSC(J)),KVSETSC(J)) = J + ENDDO + IDW = KF_SCALARS_G+1 + DO + DO J=NPRTRV,1,-1 + IF(IHELPC(J) > 0) THEN + IDW = IDW-1 + KSHFSC_G(IDW) = IHELP(IHELPC(J),J) + IHELPC(J) =IHELPC(J)-1 + ENDIF + ENDDO + IF(IDW == 1) EXIT + ENDDO + + DO J=1,KF_UV_G + KIVSETUV(J) = KVSETUV(KSHFUV_G(J)) + ENDDO + DO J=1,KF_SCALARS_G + KIVSETSC(J) = KVSETSC(KSHFSC_G(J)) + ENDDO +ELSE + DO J=1,KF_UV_G + KSHFUV_G(J) = J + KIVSETUV(J) = 1 + ENDDO + DO J=1,KF_SCALARS_G + KSHFSC_G(J) = J + KIVSETSC(J) = 1 + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SHUFFLE +END MODULE SHUFFLE_MOD diff --git a/src/trans/cpu/internal/spnorm_ctl_mod.F90 b/src/trans/cpu/internal/spnorm_ctl_mod.F90 new file mode 100644 index 0000000..154e5dc --- /dev/null +++ b/src/trans/cpu/internal/spnorm_ctl_mod.F90 @@ -0,0 +1,62 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNORM_CTL_MOD +CONTAINS +SUBROUTINE SPNORM_CTL(PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET,PNORM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MYPROC, MYSETV + +USE SPNORMD_MOD ,ONLY : SPNORMD +USE SPNORMC_MOD ,ONLY : SPNORMC +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFLD,KFLD_G +INTEGER(KIND=JPIM) :: IVSET(KFLD_G) +REAL(KIND=JPRB) :: ZMET(0:R%NSMAX) +REAL(KIND=JPRB) :: ZSM(KFLD,D%NUMP) +REAL(KIND=JPRB) :: ZGM(KFLD_G,0:R%NSMAX) + +! ------------------------------------------------------------------ + +IF(PRESENT(KVSET)) THEN + IVSET(:) = KVSET(:) +ELSE + IVSET(:) = MYSETV +ENDIF + +IF(PRESENT(PMET)) THEN + ZMET(:) = PMET(:) +ELSE + ZMET(:) = 1.0_JPRB +ENDIF + +CALL SPNORMD(PSPEC,KFLD,ZMET,ZSM) + +CALL SPNORMC(ZSM,KFLD_G,IVSET,KMASTER,R%NSMAX,ZGM) + +IF(MYPROC == KMASTER) THEN + PNORM(1:KFLD_G) = SUM(ZGM,DIM=2) + PNORM(1:KFLD_G) = SQRT(PNORM(1:KFLD_G)) +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE SPNORM_CTL +END MODULE SPNORM_CTL_MOD diff --git a/src/trans/cpu/internal/spnormc_mod.F90 b/src/trans/cpu/internal/spnormc_mod.F90 new file mode 100644 index 0000000..f894cf2 --- /dev/null +++ b/src/trans/cpu/internal/spnormc_mod.F90 @@ -0,0 +1,89 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNORMC_MOD +CONTAINS +SUBROUTINE SPNORMC(PSM,KFLD_G,KVSET,KMASTER,KSMAX,PGM) + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER + +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, MYPROC, NPROC + +USE PE2SET_MOD ,ONLY : PE2SET + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSM(:,:) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KMASTER +INTEGER(KIND=JPIM) ,INTENT(IN) :: KSMAX +REAL(KIND=JPRB) ,INTENT(OUT) :: PGM(KFLD_G,0:KSMAX) + +REAL(KIND=JPRB) :: ZRECVBUF(SIZE(PGM)) +INTEGER(KIND=JPIM) :: IFLDR(NPRTRV) + +INTEGER(KIND=JPIM) :: ISTOTAL,JFLD,ITAG,JROC,IMSGLEN,IRECVID +INTEGER(KIND=JPIM) :: IRECVNUMP,IRECVFLD,IFLD,JMLOC,IM,IBUFLENR,IA,IB +INTEGER(KIND=JPIM) :: IRECVSETA,IRECVSETB +! ------------------------------------------------------------------ + +ISTOTAL = SIZE(PSM) +IBUFLENR = SIZE(ZRECVBUF) + +IFLDR(:) = 0 +DO JFLD=1,KFLD_G + IFLDR(KVSET(JFLD)) = IFLDR(KVSET(JFLD))+1 +ENDDO +ITAG = 100 + +IF (NPROC > 1.AND.MYPROC /= KMASTER) THEN + CALL MPL_SEND(PSM(:,:),KDEST=NPRCIDS(KMASTER),KTAG=ITAG,& + &CDSTRING='SPNORMC:') +ENDIF + +IF (MYPROC == KMASTER) THEN + DO JROC=1,NPROC + IF (JROC == KMASTER) THEN + ZRECVBUF(1:ISTOTAL) = RESHAPE(PSM,SHAPE(ZRECVBUF(1:ISTOTAL))) + IRECVID = MYPROC + IMSGLEN = ISTOTAL + ELSE + CALL MPL_RECV(ZRECVBUF(1:IBUFLENR),KTAG=ITAG,& + &KFROM=IRECVID,CDSTRING='SPNORMC :') + ENDIF + CALL PE2SET(IRECVID,IA,IB,IRECVSETA,IRECVSETB) + IRECVNUMP = D%NUMPP(IRECVSETA) + IRECVFLD = IFLDR(IRECVSETB) + IFLD = 0 + DO JFLD=1,KFLD_G + IF(KVSET(JFLD) == IRECVSETB) THEN + IFLD=IFLD+1 + DO JMLOC=1,IRECVNUMP + IM = D%NALLMS(D%NPTRMS(IRECVSETA)-1+JMLOC) + PGM(JFLD,IM) = ZRECVBUF((JMLOC-1)*IRECVFLD+IFLD) + ENDDO + ENDIF + ENDDO + ENDDO +ENDIF + +! Perform barrier synchronisation to guarantee all processors have +! completed communication + +IF( NPROC > 1 )THEN + CALL MPL_BARRIER(CDSTRING='SPNORMC') +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE SPNORMC +END MODULE SPNORMC_MOD diff --git a/src/trans/cpu/internal/spnormd_mod.F90 b/src/trans/cpu/internal/spnormd_mod.F90 new file mode 100644 index 0000000..89c0bfb --- /dev/null +++ b/src/trans/cpu/internal/spnormd_mod.F90 @@ -0,0 +1,66 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNORMD_MOD +CONTAINS +SUBROUTINE SPNORMD(PSPEC,KFLD,PMET,PSM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PMET(0:R%NSMAX) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(OUT) :: PSM(:,:) + +INTEGER(KIND=JPIM) :: JM ,JFLD ,JN ,IM ,ISP + +! ------------------------------------------------------------------ + + +CALL GSTATS(1651,0) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JM,IM,JN,ISP,JFLD) +DO JM=1,D%NUMP + PSM(:,JM) = 0.0_JPRB + IM = D%MYMS(JM) + IF(IM == 0)THEN + DO JN=0,R%NSMAX + ISP = D%NASM0(0)+JN*2 + DO JFLD=1,KFLD + PSM(JFLD,JM) = PSM(JFLD,JM)+PMET(JN)*PSPEC(JFLD,ISP)**2 + ENDDO + ENDDO + ELSE + DO JN=IM,R%NSMAX + ISP = D%NASM0(IM)+(JN-IM)*2 + DO JFLD=1,KFLD + PSM(JFLD,JM) = PSM(JFLD,JM)+2.0_JPRB*PMET(JN)*& + &(PSPEC(JFLD,ISP)**2+PSPEC(JFLD,ISP+1)**2) + ENDDO + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1651,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNORMD +END MODULE SPNORMD_MOD + + + + + diff --git a/src/trans/cpu/internal/spnsde_mod.F90 b/src/trans/cpu/internal/spnsde_mod.F90 new file mode 100644 index 0000000..74774dc --- /dev/null +++ b/src/trans/cpu/internal/spnsde_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNSDE_MOD +CONTAINS +SUBROUTINE SPNSDE(KM,KF_SCALARS,PEPSNM,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_GEN +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +!USE TPM_TRANS + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(IN) :: PF(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IJ, ISKIP, J, JN,JI,ISMAX +REAL(KIND=JPRB) :: ZEPSNM(-1:R%NSMAX+4) +REAL(KIND=JPRB) :: ZN(-1:R%NTMAX+4) + + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + + +!* 1.1 COMPUTE + +ISMAX = R%NSMAX +DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) +ENDDO +ZN(0) = F%RN(ISMAX+3) + +IF(KM == 0) THEN + ISKIP = 2 +ELSE + ISKIP = 1 +ENDIF + +DO J=1,2*KF_SCALARS,ISKIP + DO JI=2,ISMAX+3-KM + PNSD(JI,J) = -ZN(JI+1)*ZEPSNM(JI)*PF(JI+1,J)+& + &ZN(JI-2)*ZEPSNM(JI-1)*PF(JI-1,J) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNSDE +END MODULE SPNSDE_MOD diff --git a/src/trans/cpu/internal/spnsdead_mod.F90 b/src/trans/cpu/internal/spnsdead_mod.F90 new file mode 100644 index 0000000..5564333 --- /dev/null +++ b/src/trans/cpu/internal/spnsdead_mod.F90 @@ -0,0 +1,119 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNSDEAD_MOD +CONTAINS +SUBROUTINE SPNSDEAD(KM,KF_SCALARS,PEPSNM,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_GEN +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +!USE TPM_TRANS + +!**** *SPNSDEAD* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDEAD(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDEAD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(INOUT) :: PF(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PNSD(:,:) + +INTEGER(KIND=JPIM) :: IJ, ISKIP, J, JN,JI,ISMAX +REAL(KIND=JPRB) :: ZEPSNM(-1:R%NSMAX+4) +REAL(KIND=JPRB) :: ZN(-1:R%NTMAX+4) + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +ISMAX = R%NSMAX +DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) +ENDDO + +ZN(0) = F%RN(ISMAX+3) +IF(KM == 0) THEN + ISKIP = 2 +ELSE + ISKIP = 1 +ENDIF + +!cdir loopchg +!cdir select(vector) +DO J=1,2*KF_SCALARS,ISKIP + DO JI=2,ISMAX+3-KM + PF(JI+1,J) = PF(JI+1,J)-ZN(JI+1)*ZEPSNM(JI) *PNSD(JI,J) + PF(JI-1,J) = PF(JI-1,J)+ZN(JI-2)*ZEPSNM(JI-1)*PNSD(JI,J) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNSDEAD +END MODULE SPNSDEAD_MOD diff --git a/src/trans/cpu/internal/sufft_mod.F90 b/src/trans/cpu/internal/sufft_mod.F90 new file mode 100644 index 0000000..dec0e91 --- /dev/null +++ b/src/trans/cpu/internal/sufft_mod.F90 @@ -0,0 +1,106 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUFFT_MOD +CONTAINS +SUBROUTINE SUFFT + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T, TB +#ifdef WITH_FFTW +USE TPM_FFTW ,ONLY : TW, INIT_PLANS_FFTW +#endif +USE BLUESTEIN_MOD ,ONLY : BLUESTEIN_INIT, FFTB_TYPE +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JGL,IGLG, ILATS +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUFFT ===' + +#ifdef WITH_FFTW + IF(TW%LFFTW)THEN + + CALL INIT_PLANS_FFTW(R%NDLON) + + ELSE + + NULLIFY(TW%FFTW_PLANS) +#endif + + ALLOCATE(T%TRIGS(R%NDLON,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%TRIGS ',SIZE(T%TRIGS),SHAPE(T%TRIGS) + ALLOCATE(T%NFAX(19,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%NFAX ',SIZE(T%NFAX),SHAPE(T%NFAX) + ALLOCATE(T%LUSEFFT992(D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%LUSEFFT992',SIZE(T%LUSEFFT992),SHAPE(T%LUSEFFT992) + + ! + ! create TRIGS and NFAX for latitude lengths supported by FFT992, + ! that is just with factors 2, 3 or 5 + ! + + T%LBLUESTEIN=.FALSE. + ILATS=0 + DO JGL=1,D%NDGL_FS + IGLG = D%NPTRLS(MYSETW)+JGL-1 + CALL SET99B(T%TRIGS(1,JGL),T%NFAX(1,JGL),G%NLOEN(IGLG),T%LUSEFFT992(JGL)) + IF( .NOT.T%LUSEFFT992(JGL) )THEN + ILATS=ILATS+1 + T%LBLUESTEIN=.TRUE. + ENDIF + ENDDO + + ! + ! we only initialise for bluestein if there are latitude lengths + ! not supported by FFT992 + ! + + IF( T%LBLUESTEIN )THEN + TB%NDLON=R%NDLON + TB%NLAT_COUNT=ILATS + ILATS=0 + ALLOCATE(TB%NLATS(TB%NLAT_COUNT)) + DO JGL=1,D%NDGL_FS + IF( .NOT.T%LUSEFFT992(JGL) )THEN + ILATS=ILATS+1 + IGLG = D%NPTRLS(MYSETW)+JGL-1 + TB%NLATS(ILATS)=G%NLOEN(IGLG) + ENDIF + ENDDO + CALL BLUESTEIN_INIT(TB) + ENDIF + +#ifdef WITH_FFTW + + ENDIF +#endif + +ENDIF + +! ------------------------------------------------------------------ + +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUFFT +END MODULE SUFFT_MOD diff --git a/src/trans/cpu/internal/sugaw_mod.F90 b/src/trans/cpu/internal/sugaw_mod.F90 new file mode 100644 index 0000000..410fca7 --- /dev/null +++ b/src/trans/cpu/internal/sugaw_mod.F90 @@ -0,0 +1,431 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUGAW_MOD +CONTAINS +SUBROUTINE SUGAW(KDGL,KM,KN,PL,PW,PANM,PFN) + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE PARKIND2 ,ONLY : JPRH + +USE TPM_CONSTANTS ,ONLY : RA + +USE TPM_GEN ,ONLY : NOUT +USE GAWL_MOD ,ONLY : GAWL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SUPOLF_MOD +USE TPM_POL + +!**** *SUGAW * - Routine to initialize the Gaussian +! abcissa and the associated weights + +! Purpose. +! -------- +! Initialize arrays PL, and PW (quadrature abscissas and weights) +!** Interface. +! ---------- +! *CALL* *SUGAW(KN,PFN,PL,PW) * + +! Explicit arguments : +! -------------------- +! INPUT: +! KDGL : Number of Gauss abscissas +! KM : Polynomial order m +! KN : Polynomial degree n +! PFN : Fourier coefficients of series expansion for +! the ordinary Legendre polynomials +! OUTPUT: +! PL (KN) : abscissas of Gauss +! PW (KN) : Weights of the Gaussian integration + +! PL (i) is the abscissa i starting from the northern pole, it is +! the cosine of the colatitude of the corresponding row of the collocation +! grid. + +! Implicit arguments : +! -------------------- +! None + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- + +! S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon Press (1962) +! P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre quadrature, +! SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! Michel Rochas : 90-08-30 +! Philippe Courtier : 92-12-19 Multitasking +! Ryad El Khatib : 94-04-20 Remove unused comdecks pardim and yomdim +! Mats Hamrud : 94-08-12 Printing level +! K. Yessad (Sep 2008): cleaning, improve comments. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KN + +REAL(KIND=JPRD) ,INTENT(IN) :: PANM + +REAL(KIND=JPRD),INTENT(OUT) :: PW(KDGL) +REAL(KIND=JPRD),INTENT(OUT) :: PL(KDGL) + +REAL(KIND=JPRD) ,OPTIONAL, INTENT(IN) :: PFN(0:KDGL,0:KDGL) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRD) :: ZLI(KDGL),ZT(KDGL),ZFN(0:KDGL/2),ZL(KDGL) +REAL(KIND=JPRD) :: ZREG(KDGL),ZMOD(KDGL),ZM(KDGL),ZRR(KDGL) +INTEGER(KIND=JPIM) :: ITER(KDGL) + +INTEGER(KIND=JPIM) :: IALLOW, INS2, ISYM, JGL, IK, IODD, I, IMAX + +REAL(KIND=JPRD) :: Z, ZEPS, Z0, ZPI + +! computations in extended precision for alternative root finding +! which also works for associated polynomials (m>0) +REAL(KIND=JPRH) :: ZLK, ZLK1, ZLLDN, ZANM +REAL(KIND=JPRH) :: ZTHETA, ZTHETA0, ZX, ZX0, ZDX0, ZH, ZPIH, ZS0 +REAL(KIND=JPRH) :: ZK1, ZK2, ZK3, ZK4 +REAL(KIND=JPRH) :: ZF1, ZF2, ZF3 +REAL(KIND=JPRH) :: FP, FQ, FP1, FQ1 +REAL(KIND=JPRH) :: X, ZXOLD, ZBIG, ZEPSH + +INTEGER(KIND=JPIM) :: ISTEPMAX + +LOGICAL :: LLP2, LLREF, LLOLD + +REAL(KIND=JPRD) :: ZDDPOL(0:KN) + +INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(ZLK) + +FP(X) = 1._JPRH-X**2 +FQ(X) = REAL(KN*(KN+1),JPRH)-REAL(KM**2,JPRH)/(1._JPRH-X**2) +FP1(X) = -2._JPRH*X +FQ1(X) = -2._JPRH*X*REAL(KM**2,JPRH)/SQRT(1._JPRH-X**2) + +! ------------------------------------------------------------------ +! ------------------------------------------------------------------ +!* 1. Initialization + root + weight computation +! ------------------------------------------ + +LLP2 = .FALSE. +INS2 = KDGL/2 + +LLOLD=( KM == 0 .AND. KN == KDGL ).AND.PRESENT(PFN) + + +CALL GSTATS(1650,0) + +ZEPS = EPSILON(Z) +ZEPSH = EPSILON(X) + +ZBIG = SQRT(HUGE(X)) + +!* 1.1 Find the roots of the ordinary +! Legendre polynomial of degree KN using an analytical first guess +! and then refine to machine precision via Newton's method +! in double precision following Swarztrauber (2002) + +! Nils Comment: in principle the else case could also be used for this but +! this is slightly more accurate and consistent with the past + +IF( LLOLD ) THEN + + ZPI = 2.0_JPRD*ASIN(1.0_JPRD) + IODD=MOD(KDGL,2) + IK=IODD + DO JGL=IODD,KDGL,2 + ZFN(IK)=PFN(KDGL,JGL) + IK=IK+1 + ENDDO + + DO JGL=1,INS2 + Z = REAL(4*JGL-1,JPRD)*ZPI/REAL(4*KN+2,JPRD) + ! analytic initial guess for cos(theta) (same quality as RK below) + ! ZX = 1._JPRD-REAL(KN-1,JPRD)/REAL(8*KN*KN*KN,JPRD)-(1._JPRD/REAL(384*KN*KN*KN*KN))*(39._JPRD-28._JPRD/SIN(Z)**2) + ! PL(JGL) = ACOS(ZX*COS(Z)) + ZL(JGL) = Z+1.0_JPRD/(TAN(Z)*REAL(8*KN**2,JPRD)) + ZREG(JGL) = COS(Z) + ZLI(JGL) = COS(ZL(JGL)) + ENDDO + + ! refine PL here via Newton's method + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JGL) + DO JGL=INS2,1,-1 + CALL GAWL(ZFN,ZL(JGL),PW(JGL),ZEPS,KN,ITER(JGL),ZMOD(JGL)) + ENDDO + !$OMP END PARALLEL DO + + ! convert to physical latitude space PMU + !DIR$ IVDEP + !OCL NOVREC + DO JGL=1,INS2 + PL(JGL) = COS(ZL(JGL)) + ENDDO + +ELSE + +!* 1.2 Find the roots of the associated +! Legendre polynomial of degree KN and the associated Gaussian weights +! using a Runge-Kutta 4 integration of the Pruefer transformed Sturm-Liouville problem +! (Tygert (J. Comput. Phys. 2008) and Glaser et al., SIAM J. SCI. COMPUT. Vol. 29 (4) 1420-1438) +! + + ISTEPMAX=10 + + ZANM = REAL(PANM, JPKD) + ZPIH = 2.0_JPRH*ASIN(1.0_JPRH) + + ZX0 = 0._JPRH + Z0 = 0._JPRD + + ! first guess starting point + IF( MOD(KN-KM,2) == 0 ) THEN + ! even, extremum at X == 0 + ZTHETA0 = 0._JPRH + ZH = -0.5_JPRH*ZPIH/REAL(ISTEPMAX,JPRH) + ELSE + ! odd, root at X == 0 + ZTHETA0 = 0.5_JPRH*ZPIH + ZX0 = 0._JPRH + ZH = -ZPIH/REAL(ISTEPMAX,JPRH) + ENDIF + + ZX = ZX0 + ZTHETA = ZTHETA0 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ! Formula (81) in Tygert + ZDX0=-1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + + ! loop over all roots + LLREF=.TRUE. + DO JGL=INS2,1,-1 + + ! runge-kutta + DGL:DO IK=1,ISTEPMAX + + ZK1 = ZDX0 + ZTHETA = ZTHETA + 0.5_JPRH*ZH + + ZX = ZX0 + 0.5_JPRH*ZH*ZK1 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZK2 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ZX = ZX0 + 0.5_JPRH*ZH*ZK2 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZK3 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ZTHETA = ZTHETA + 0.5_JPRH*ZH + ZX = ZX0 + ZH*ZK3 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZK4 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ZX = ZX0 + (1._JPRH/6._JPRH)*ZH*(ZK1+2._JPRH*ZK2+2._JPRH*ZK3+ZK4) + ZXOLD = ZX0 + + ZX0 = ZX + + IF( .NOT.ZX==ZX ) THEN + WRITE(NOUT,*) 'invoke overflow ...ZX ',KM, KN, JGL + ZX = ZXOLD + ZX0 = ZXOLD + EXIT DGL + ENDIF + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZDX0 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + + ENDDO DGL + +! ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! Everything from here until <> is to refine the +! root and compute the starting point for the next root search +! ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + ! should not happen, but does if loss of accuracy in supolf occurs (useful for debugging) + IF( JGL < INS2 ) LLREF = PW(JGL+1).GT.ZEPSH + + IF( LLREF ) THEN + + ! chosen for speed/accuracy compromise + IMAX=3 + LOOP: DO I=1,IMAX + ! supol fast + ZS0 = ACOS(ZX0) + CALL SUPOLF(KM,KN,REAL(ZX0,JPRD),ZDDPOL) + ZLK=REAL(ZDDPOL(KN),JPKD) + ZLK1= REAL(ZDDPOL(KN-1),JPKD) + ZLLDN= -(ZANM*ZLK1-DDI(KN)*COS(ZS0)*ZLK)/SIN(ZS0) + + IF( ABS(ZLLDN) > ZEPSH ) THEN + ! single Newton refinement in theta + ZS0 = ZS0 - ZLK/ZLLDN + ZX = COS(ZS0) + ELSE + ! do nothing + ZX = ZX0 + ENDIF + + IF( ABS(ZX-ZX0) > 1000._JPRD*ZEPS ) THEN + ZX0 = ZX + ELSE + EXIT LOOP + ENDIF + ENDDO LOOP + + ! recompute for accuracy weights + CALL SUPOLF(KM,KN,REAL(ZX,JPRD),ZDDPOL) + ! option f in Schwarztrauber to compute the weights + ZS0 = ACOS(ZX) + ZLK=REAL(ZDDPOL(KN),JPKD) + ZLK1= REAL(ZDDPOL(KN-1),JPKD) + ZLLDN= -(ZANM*ZLK1-DDI(KN)*COS(ZS0)*ZLK)/SIN(ZS0) + + PW(JGL) = REAL(REAL(2*KN+1,JPRH)/ZLLDN**2,JPRD) + + ! catch overflow, should never happen + IF( .NOT.(PW(JGL)==PW(JGL)) ) THEN + WRITE(NOUT,*) 'invoke overflow ...PW ',KM, KN, JGL + PW(JGL) = 0.0_JPRD + ENDIF + + ELSE + ! should never happen ... + WRITE(NOUT,*) 'Refinement not possible ... PW set to 0',KM, KN, JGL + PW(JGL) = 0.0_JPRD + ENDIF + + ZX0 = ZX + PL(JGL) = REAL(ZX0,JPRD) + + ! catch overflow, should never happen + IF( .NOT.(PW(JGL)==PW(JGL)) ) THEN + WRITE(NOUT,*) 'invoke overflow ...PW ',KM, KN, JGL + PW(JGL) = 0.0_JPRD + ENDIF + +! ++++++++++++++++++++++++++++++++++++++++++++++++ +! <<<< END REFINEMENT >>>> +! ++++++++++++++++++++++++++++++++++++++++++++++++ + + ZF1 = SQRT(FQ(ZX0)/FP(ZX0)) + ZF2 = FQ1(ZX0)/FQ(ZX0) + ZF3 = FP1(ZX0)/FP(ZX0) + + ! continue to next root with refined ZX,ZR as initial condition + ZH = -ZPIH/REAL(ISTEPMAX,JPRH) + ZTHETA = 0.5_JPRH*ZPIH + ZDX0 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ENDDO + +ENDIF + +CALL GSTATS(1650,1) +! ------------------------------------------------------------------ + +!DIR$ IVDEP +!OCL NOVREC +DO JGL=1,KDGL/2 + ISYM = KDGL-JGL+1 + PL(ISYM) = -PL(JGL) + PW(ISYM) = PW(JGL) +ENDDO + +! ------------------------------------------------------------------ + +!* 3. Diagnostics. +! ------------ + +IF( LLOLD ) THEN + + IF(LLP2)THEN + DO JGL=1,INS2 + ZM(JGL) = (ACOS(PL(JGL))-ACOS(ZLI(JGL)))*RA + ZRR(JGL) = (ACOS(PL(JGL))-ACOS(ZREG(JGL)))*RA + ZT(JGL) = ACOS(PL(JGL))*180._JPRD/ZPI + ENDDO + ENDIF + + IALLOW = 20 + DO JGL=1,INS2 + + IF(LLP2)THEN + WRITE(UNIT=NOUT,FMT=& + &'('' M ='',I4,'' ROW ='',I4,'' ITERATIONS='',I4,'' ROOT='',F30.20,& + &'' WEIGHT='',F30.20,'' MODIF :'',E8.2)')KM,JGL,ITER(JGL),PL(JGL)& + &,PW(JGL),PL(JGL)-ZLI(JGL) + WRITE(UNIT=NOUT,FMT=& + &'(10X,'' LAST INC. : '',E8.2,'' MODIF IN M : '',F10.3,& + &'' FROM THE REGULAR GRID : '',F10.3,'' COLAT '',F10.3)')& + &ZMOD(JGL),ZM(JGL),ZRR(JGL),ZT(JGL) + ENDIF + + IF(ITER(JGL) > IALLOW)THEN + WRITE(UNIT=NOUT,FMT='('' CONVERGENCE FAILED IN SUGAW '')') + WRITE(UNIT=NOUT,FMT='('' ALLOWED : '',I4,''& + &NECESSARY : '',& + &I4)')IALLOW,ITER(JGL) + CALL ABORT_TRANS(' FAILURE IN SUGAW ') + ENDIF + + ENDDO + +ELSE + + IF(LLP2)THEN + DO JGL=1,INS2 + WRITE(UNIT=NOUT,FMT=& + &'('' M ='',I4,'' ROW ='',I4,'' ITERATIONS='',I4,'' ROOT='',F30.20,& + &'' WEIGHT='',F30.20,'' COLAT '',F10.3)')KM,JGL,0,PL(JGL),PW(JGL),& + & ACOS(PL(JGL))*180._JPRD/ZPIH + ENDDO + ENDIF + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUGAW +END MODULE SUGAW_MOD diff --git a/src/trans/cpu/internal/suleg_mod.F90 b/src/trans/cpu/internal/suleg_mod.F90 new file mode 100644 index 0000000..53886b3 --- /dev/null +++ b/src/trans/cpu/internal/suleg_mod.F90 @@ -0,0 +1,1207 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SULEG_MOD +#ifdef __NEC__ +#define SIZEOF(x) STORAGE_SIZE(x)/KIND(x) +#endif +CONTAINS +SUBROUTINE SULEG +!DEC$ OPTIMIZE:1 + +USE PARKIND1 ,ONLY : JPRD, JPIM, JPRB +USE PARKIND2 ,ONLY : JPRH +USE MPL_MODULE ,ONLY : MPL_BYTES, MPL_BARRIER, JP_NON_BLOCKING_STANDARD, MPL_RECV, & + & MPL_SEND, MPL_WAIT + +USE TPM_GEN ,ONLY : NOUT, LMPOFF, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_CONSTANTS ,ONLY : RA +USE TPM_DISTR ,ONLY : NPRTRV, NPRTRW, NPROC, D, MTAGLETR, MYPROC, MYSETV, MYSETW, NPRCIDS +USE TPM_FIELDS ,ONLY : F +USE TPM_FLT ,ONLY : S +USE TPM_GEOMETRY ,ONLY : G +USE TPM_CTL ,ONLY : C +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE PRE_SULEG_MOD ,ONLY : PRE_SULEG +USE SUGAW_MOD ,ONLY : SUGAW +USE SUPOL_MOD ,ONLY : SUPOL +USE SUPOLF_MOD ,ONLY : SUPOLF +USE TPM_POL ,ONLY : INI_POL, END_POL +USE SUTRLE_MOD ,ONLY : SUTRLE +USE SETUP_GEOM_MOD ,ONLY : SETUP_GEOM +USE BUTTERFLY_ALG_MOD ,ONLY : CLONE, CONSTRUCT_BUTTERFLY, PACK_BUTTERFLY_STRUCT, & + & UNPACK_BUTTERFLY_STRUCT +USE SEEFMM_MIX ,ONLY : SETUP_SEEFMM +USE SET2PE_MOD ,ONLY : SET2PE +USE PREPSNM_MOD ,ONLY : PREPSNM +USE WRITE_LEGPOL_MOD ,ONLY : WRITE_LEGPOL +USE READ_LEGPOL_MOD ,ONLY : READ_LEGPOL + +!**** *SULEG * - initialize the Legendre polynomials + +! Purpose. +! -------- +! Initialize COMMON YOMLEG + +!** Interface. +! ---------- +! *CALL* *SULEG* + +! Explicit arguments : +! -------------------- + +! Implicit arguments : +! -------------------- +! COMMON YOMLEG + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- +! SUGAW (Gaussian latitudes) +! SUPOLM (polynomials) +! LFI routines for external IO's +! Called by SUGEM. + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! +! S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon Press (1962) +! P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre quadrature, +! SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! MODIFICATION : 91-04 J.M. Piriou: +! - Read gaussian latitudes and PNM on LFI +! - If file missing, computes +! 91-04 M.Hamrud: +! - IO Scheme introduced +! MODIFICATION : 91-07-03 P.Courtier suppress derivatives +! MODIFICATION : 91-07-03 P.Courtier computes RATATH and RACTHE +! MODIFICATION : 91-07-03 P.Courtier change upper limit (NSMAX+1) +! MODIFICATION : 91-07-03 P.Courtier change ordering +! Order of the PNM in the file, as in the model : +! - increasing wave numbers m +! - for a given m, from n=NSMAX+1 to m +! MODIFICATION : 92-07-02 R. Bubnova: shift RATATH calculation +! to SUGEM1 +! MODIFICATION : 92-12-17 P.Courtier multitask computations +! Modified by R. EL Khatib : 93-04-02 Set-up defaults controled by LECMWF +! MODIFICATION : 93-03-19 D.Giard : n <= NTMAX +! K. YESSAD : 93-05-11 : DLMU --> global array DRMU(NDGSA:NDGEN). +! (not stored currently on LFI files). +! MODIFICATION : 94-02-03 R. El Khatib : subroutine SULEG2 to write out +! the Leg. polynomials on workfile or LFI file +! Modification : 94-08-31 M. Tolstykh: Setup for CUD interpolation +! Modified by K. YESSAD (MARCH 1995): Extra-latitudes computations +! according to value of NDGSUR and LRPOLE only. +! + change fancy loop numbering. +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision +! on NEC +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! G.Mozdzynski: March 2011 Support 2D (RW,RV) initialisation of legendre coeffs +! G.Mozdzynski: July 2012 distribute FLT initialisation over NPRTRV +! R. El Khatib 14-Jun-2013 optional computation on the stretched latitudes +! F. Vana 05-Mar-2015 Support for single precision +! Nils Wedi, 20-Apr-2015 Support dual latitude/longitude set +! T. Wilhelmsson, 22-Sep-2016 Support single precision for dual too +! ------------------------------------------------------------------ + +IMPLICIT NONE + +! LOCAL +! ------------------------------------------------------------------ +REAL(KIND=JPRD),ALLOCATABLE :: ZPNMG(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZFN(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZLRMUZ2(:) +REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRD) :: ZLRMUZ(R%NDGL) +REAL(KIND=JPRD) :: ZW(R%NDGL) + +REAL(KIND=JPRD) :: ZANM +REAL(KIND=JPRD) :: ZFNN +REAL(KIND=JPRD) :: ZPI, ZINC, ZOFF, ZTEMP, ZORIG, ZTHETA, ZCOS + +REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFV(:),ZRCVBUFV(:,:) +REAL(KIND=JPRD), ALLOCATABLE :: ZPNMCDO(:,:),ZPNMCDD(:,:) +REAL(KIND=JPRB), ALLOCATABLE :: ZRCVBUTFV(:,:) +REAL(KIND=JPRB) :: ZDUM(2) +REAL(KIND=KIND(ZRCVBUTFV)) :: ZBYTES +INTEGER(KIND=JPIM) :: IBYTES +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRV) +INTEGER(KIND=JPIM) :: IRECVREQ(NPRTRV) +INTEGER(KIND=JPIM) :: IKOUNT(NPRTRV) +INTEGER(KIND=JPIM) :: IRECVLENMAXV(NPRTRV) + +INTEGER(KIND=JPIM) :: INM, IM, IRECV, ISEND, ISREQ, IRREQ, & + &JGL, JM, JMLOC, IMLOC, JN, JNM, IODD, INN, INMAX, JI, IMAXN, ITAG, ITAG1, & + &INX, ISL, ISTART, ITHRESHOLD, INSMAX, IMAXCOLS,ILATSMAX,JW,JV,J, & + &IDGLU, ILA, ILS, IA, IS, I, ILATS, ILOOP, IPRTRV, JSETV, JH, & + &IMAXRECVA, IMAXRECVS, IRECVLENMAX, ICLONELEN, IHEMIS, INNH, IGL, IGL1, IGL2, & + &IDGLU2, ISYM, INZ + +REAL(KIND=JPRD) :: ZEPS_INT_DEC +REAL(KIND=JPRD) :: ZEPS +REAL(KIND=JPRD),ALLOCATABLE :: ZLFPOL(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZLPOL(:) + +TYPE(CLONE),ALLOCATABLE :: ZCLONEA(:),ZCLONES(:) + +LOGICAL :: LLP1,LLP2 + +! For latitudes on the stretched geometry +REAL(KIND=JPRH) :: ZTAN +REAL(KIND=JPRH) :: ZSTRETMU(R%NDGL) + +! ------------------------------------------------------------------ + +!* 0. Some initializations. +! --------------------- + +IBYTES = MPL_BYTES(ZBYTES) + +ZEPS = 1000._JPRD*EPSILON(ZEPS) +!ZEPS_INT_DEC = EPSILON(ZEPS) +ZEPS_INT_DEC = 1.0E-7_JPRD +!ZEPS_INT_DEC = 1.0E-5_JPRD + +IHEMIS=1 +IF (S%LSOUTHPNM) IHEMIS=2 +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SULEG ===' + +IF( NPROC > 1 )THEN + CALL GSTATS(798,0) + CALL MPL_BARRIER(CDSTRING='SULEG:') + CALL GSTATS(798,1) +ENDIF + +CALL GSTATS(140,0) +CALL GSTATS(1801,0) + +IF(.NOT.D%LGRIDONLY) THEN + CALL PRE_SULEG +ENDIF + +ALLOCATE(F%RMU(R%NDGL)) +IF (LLP2) WRITE(NOUT,9) 'F%RMU ',SIZE(F%RMU ),SHAPE(F%RMU ) +ALLOCATE(F%RW(R%NDGL)) +IF (LLP2) WRITE(NOUT,9) 'F%RW ',SIZE(F%RW ),SHAPE(F%RW ) + + +!* 1.0 Initialize Fourier coefficients for ordinary Legendre polynomials +! ------------------------------------------------------------------------ + +ALLOCATE(ZFN(0:R%NDGL,0:R%NDGL)) +IF (LLP2) WRITE(NOUT,9) 'ZFN ',SIZE(ZFN ),SHAPE(ZFN ) + + + +! determines the number of stripes in butterfly NSMAX/IMAXCOLS +! IMAXCOLS = (R%NSMAX - 1)/4 + 1 +! IMAXCOLS=64 (min flops) +IMAXCOLS=64 + +! the threshold of efficiency +IF(NPROC == 1 .OR. R%NDGNH <= 2560) THEN + ITHRESHOLD = R%NDGNH/4 + DO + IF(ITHRESHOLD >= IMAXCOLS*4) EXIT + IMAXCOLS = IMAXCOLS/2 + ENDDO +ELSE + ITHRESHOLD = 900 +ENDIF + +ITHRESHOLD = MAX(ITHRESHOLD,IMAXCOLS+1) +S%ITHRESHOLD = ITHRESHOLD + +!* 3.1 Gaussian latitudes and weights +! --------------------------------------- + +!IF (S%LUSE_BELUSOV.OR.S%LUSEFLT) +CALL INI_POL(R%NTMAX+3) + +IF(.NOT.D%LGRIDONLY) THEN + ISTART=1 +ELSE + ISTART=R%NDGL +ENDIF + +INMAX=R%NDGL +! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) +! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 +ZFN(0,0)=2._JPRD +DO JN=ISTART,R%NDGL + ZFNN=ZFN(0,0) + DO JGL=1,JN + ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) + ENDDO + + IODD=MOD(JN,2) + ZFN(JN,JN)=ZFNN + DO JGL=2,JN-IODD,2 + ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) + ENDDO +ENDDO + +! compute latitudes and weights for original Gaussian latitudes +ZANM=SQRT(REAL(2*INMAX+1,JPRD)*REAL(INMAX**2,JPRD)/REAL(2*INMAX-1,JPRD)) +INN=R%NDGL +CALL GSTATS(1801,2) +CALL SUGAW(INN,0,INMAX,ZLRMUZ(1:INN),ZW(1:INN),ZANM,ZFN) +CALL GSTATS(1801,3) + +IF (ABS(G%RSTRET-1.0_JPRD)>100._JPRD*EPSILON(1._JPRD)) THEN + WRITE(NOUT,*) '=== SULEG: Change Gaussian latitudes to the transformed sphere ===' + INNH=(INN+1)/2 + ZTAN=(1.0_JPRD-G%RSTRET**2)/(1.0_JPRD+G%RSTRET**2) +! North hemisphere + DO JGL=1,INNH + ZSTRETMU(JGL)=(ZTAN+REAL(ZLRMUZ(JGL),JPRH))/(1.0_JPRD+ZTAN*REAL(ZLRMUZ(JGL),JPRH)) + ENDDO +! South hemisphere + DO JGL=1,INNH + IGL=2*INNH-JGL+1 + ZSTRETMU(IGL)=(ZTAN-REAL(ZLRMUZ(JGL),JPRH))/(1.0_JPRD-ZTAN*REAL(ZLRMUZ(JGL),JPRH)) + ENDDO + DO JGL=1,INN + ZLRMUZ(JGL)=REAL(ZSTRETMU(JGL),JPRD) + ENDDO +ENDIF + +DO JGL=1,R%NDGL + F%RW(JGL) = ZW(JGL) + F%RMU(JGL) = ZLRMUZ(JGL) +ENDDO + +IF (LLP1) WRITE(NOUT,*) '=== SULEG: Finished Gaussian latitudes ===' + +!* 3.1.1 specify a dual set of output (inv_trans) or input (dir_trans) latitudes / longitudes + +IF( S%LDLL ) THEN + + INMAX = S%NDGL + INN= S%NDGL + + S%NDGNHD = (INMAX+1)/2 + ALLOCATE(ZLRMUZ2(INN)) + + ! here we want to use the positions of the specified dual grid + ! accuracy requirement is ZLRMUZ2(JGL) < F%RMU(1) + ! so we use approximations for the remaining latitudes outside this range + ! we approximate the vicinity to the pole/equator + + ZPI = 2.0_JPRD*ASIN(1.0_JPRD) + + ZORIG = ASIN(F%RMU(1)) + IF( S%LSHIFTLL ) THEN + ZINC = ZPI/REAL(INN,JPRD) + ZOFF = 0.5_JPRD*ZINC + ZTEMP = ZOFF + ZINC*REAL(S%NDGNHD-1,JPRD) + ZLRMUZ2(1) = SIN(MIN(ZTEMP,ZORIG) - 0.5_JPRD*MAX(0._JPRD,ZTEMP - ZORIG)) + ZLRMUZ2(S%NDGNHD) = SIN(ZOFF) + ELSE + ZINC = ZPI/REAL(INN-2,JPRD) + ZOFF=-0.5_JPRD*ZINC + ZTEMP = ZOFF + ZINC*REAL(S%NDGNHD-1,JPRD) + ZLRMUZ2(1) = SIN(MIN(ZTEMP,ZORIG) - 0.5_JPRD*MAX(0._JPRD,ZTEMP - ZORIG)) + ZOFF=0.01_JPRD*ZINC + ZLRMUZ2(S%NDGNHD) = SIN(ZOFF) + ZOFF=0._JPRD + ENDIF + DO JGL=2, S%NDGNHD-1 + ZLRMUZ2(JGL) = SIN(ZOFF + ZINC*REAL(S%NDGNHD-JGL,JPRD)) + ENDDO + DO JGL=1, S%NDGNHD + ISYM = INN-JGL+1 + ZLRMUZ2(ISYM) = -ZLRMUZ2(JGL) + ENDDO + + IF( LLP2 ) THEN + WRITE(NOUT,*) 'dual latitudes' + DO JGL= 1, INN + WRITE(NOUT,*) 'dual JGL=',JGL,(180._JPRD/ZPI)*ZINC,(180._JPRD/ZPI)*ASIN(ZLRMUZ2(JGL)),(180._JPRD/ZPI)*ASIN(F%RMU(JGL)) + ENDDO + ENDIF + + ALLOCATE(F%RMU2(INMAX)) + IF (LLP2) WRITE(NOUT,9) 'F%RMU2 ',SIZE(F%RMU2 ),SHAPE(F%RMU2 ) + ALLOCATE(F%RACTHE2(INMAX)) + IF (LLP2) WRITE(NOUT,9) 'F%RACTHE2 ',SIZE(F%RACTHE2),SHAPE(F%RACTHE2 ) + DO JGL=1,INN + F%RMU2(JGL) = ZLRMUZ2(JGL) + F%RACTHE2(JGL) = 1.0_JPRD/(SQRT(1.0_JPRD-ZLRMUZ2(JGL)*ZLRMUZ2(JGL))+ZEPS)/REAL(RA,JPRD) + ENDDO + + IF (LLP1) WRITE(NOUT,*) '=== SULEG: Finished dual Gaussian latitudes ===' + + ! inverse + direct map for FMM + INX=2*R%NDGNH + INZ=2*S%NDGNHD + ALLOCATE(S%FMM_INTI) + CALL SETUP_SEEFMM(INX,F%RMU,INZ,F%RMU2,S%FMM_INTI) + +ENDIF + +!* 3.2 Computes related arrays + +IF(.NOT.D%LGRIDONLY) THEN + + ALLOCATE(S%FA(D%NUMP)) + + ALLOCATE(F%R1MU2(R%NDGL)) + IF (LLP2) WRITE(NOUT,9) 'F%R1MU2 ',SIZE(F%R1MU2),SHAPE(F%R1MU2 ) + ALLOCATE(F%RACTHE(R%NDGL)) + IF (LLP2) WRITE(NOUT,9) 'F%RACTHE ',SIZE(F%RACTHE),SHAPE(F%RACTHE ) + + IF( S%LUSE_BELUSOV) THEN + ALLOCATE(F%RPNM(R%NLEI3,D%NSPOLEGL)) + IF (LLP2) WRITE(NOUT,9) 'F%RPNM ',SIZE(F%RPNM),SHAPE(F%RPNM) + DO JNM=1,D%NSPOLEGL + F%RPNM(R%NLEI3,JNM) = 0.0_JPRD + ENDDO + ENDIF + +!* 3.2 Computes related arrays + + DO JGL=1,R%NDGL +! test cosine differently + ZTHETA = ASIN(ZLRMUZ(JGL)) + ZCOS = COS(ZTHETA) + F%R1MU2(JGL) = ZCOS**2 + F%RACTHE(JGL) = 1.0_JPRD/ZCOS/REAL(RA,JPRD) +! F%R1MU2(JGL) = 1.0_JPRD-ZLRMUZ(JGL)*ZLRMUZ(JGL) +! F%RACTHE(JGL) = 1.0_JPRD/SQRT(1.0_JPRD-ZLRMUZ(JGL)*ZLRMUZ(JGL))/REAL(RA,JPRD) + ENDDO + +!* 3.3 Working arrays + +! compute the Legendre polynomials as a function of the z_k (Gaussian Latitudes) +! this may be faster than calling supolf for each m but uses extra communication +! and the parallelism is more limited ? Nils + + IF( S%LUSE_BELUSOV .AND. .NOT. C%LREAD_LEGPOL ) THEN + + INSMAX = R%NTMAX+1 + + IF( INSMAX /= R%NDGL) THEN + DEALLOCATE(ZFN) + ALLOCATE(ZFN(0:INSMAX,0:INSMAX)) + ! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) + ! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + ZFN(0,0)=2._JPRD + DO JN=1,INSMAX + ZFNN=ZFN(0,0) + DO JGL=1,JN + ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) + ENDDO + + IODD=MOD(JN,2) + ZFN(JN,JN)=ZFNN + DO JGL=2,JN-IODD,2 + ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) + ENDDO + ENDDO + ENDIF + + ALLOCATE(ZLFPOL(0:INSMAX,0:INSMAX)) + ALLOCATE(ZPNMG(R%NSPOLEG)) + + DO JH=1,IHEMIS + + IF (JH==1) THEN + IGL1=D%NLATLS(MYSETW,MYSETV) + IGL2=D%NLATLE(MYSETW,MYSETV) + ELSE + IGL1=R%NDGL-D%NLATLE(MYSETW,MYSETV)+1 + IGL2=R%NDGL-D%NLATLS(MYSETW,MYSETV)+1 + ENDIF + + ILOOP=0 + DO JGL=IGL1,IGL2 + + INM = 0 + CALL SUPOL(INSMAX,ZLRMUZ(JGL),ZFN,ZLFPOL) + DO JM=0,R%NSMAX + DO JN=INSMAX,JM,-1 + INM = INM+1 + ZPNMG(INM) = ZLFPOL(JM,JN) + ENDDO + ENDDO + + CALL GSTATS(1801,2) + ILOOP = JGL-IGL1+1 + CALL SUTRLE(ZPNMG,JGL,ILOOP) + CALL GSTATS(1801,3) + + ENDDO + + ILATSMAX=0 + DO JW=1,NPRTRW + DO JV=1,NPRTRV + ILATSMAX=MAX(ILATSMAX,D%NLATLE(JW,JV)-D%NLATLS(JW,JV)+1) + ENDDO + ENDDO + + ILATS=IGL2-IGL1+1 + IF (S%LSOUTHPNM .AND. IHEMIS==1 .AND. ILATSMAX-1 >= ILATS) THEN + ! I don't know what to do for south pole. But isn't this piece of code + ! a dead stuff for poles rows ? + CALL ABORT_TRANS('SULEG: WILL BE BROKEN FOR SOUTH HEMISPHERE') + ENDIF + + DO J=ILATS,ILATSMAX-1 + ILOOP=ILOOP+1 + CALL GSTATS(1801,2) + CALL SUTRLE(ZPNMG,-1,ILOOP) + CALL GSTATS(1801,3) + ENDDO + + ENDDO + + DEALLOCATE(ZLFPOL) + IF( ALLOCATED(ZFN) ) DEALLOCATE(ZFN) + + DEALLOCATE(ZPNMG) + + IF(LLP1) WRITE(NOUT,*) '=== SULEG: Finished RPNM ===' + + ENDIF + + CALL SETUP_GEOM + + IMAXN=R%NTMAX+1 + + ITAG=MTAGLETR + ITAG1=MTAGLETR+1 + + IMAXRECVA=0 + IMAXRECVS=0 + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IMAXRECVA = MAX(IDGLU*ILA,IMAXRECVA) + IMAXRECVS = MAX(IDGLU*ILS,IMAXRECVS) + + !find nearest starting latitude of the dual set + IF( S%LDLL ) THEN + + INMAX=MIN(R%NTMAX+1,S%NDGL) + IDGLU2=S%NDGNHD + S%FA(JMLOC)%ISLD = 1 + LLA:DO JGL=1,S%NDGNHD-1 + IF( (ZLRMUZ2(JGL) < ZLRMUZ(ISL)) ) THEN + S%FA(JMLOC)%ISLD = JGL + IDGLU2 = S%NDGNHD-S%FA(JMLOC)%ISLD+1 + EXIT LLA + ENDIF + ENDDO LLA + + IF( .NOT. C%LREAD_LEGPOL ) THEN + ! compute auxiliary quantities for the dual mapping + + ! output data latitudes + ALLOCATE(ZPNMCDO(2*IDGLU2,2)) + !$OMP PARALLEL PRIVATE(JGL,ZLPOL) + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + !$OMP DO SCHEDULE(DYNAMIC,1) + DO JGL=1,2*IDGLU2 + CALL SUPOLF(IM,INMAX,ZLRMUZ2(S%FA(JMLOC)%ISLD+JGL-1),ZLPOL(0:INMAX)) + ZPNMCDO(JGL,1)=ZLPOL(INMAX-1) + ZPNMCDO(JGL,2)=ZLPOL(INMAX) + ENDDO + !$OMP END DO + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + !$OMP END PARALLEL + + ! internal (gg-roots) latitudes + ALLOCATE(ZPNMCDD(2*IDGLU,2)) + !$OMP PARALLEL PRIVATE(JGL,ZLPOL,JI,JN) + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + !$OMP DO SCHEDULE(DYNAMIC,1) + DO JGL=1,2*IDGLU + CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX)) + ZPNMCDD(JGL,1)=ZLPOL(INMAX-1) + ZPNMCDD(JGL,2)=ZLPOL(INMAX) + ENDDO + !$OMP END DO + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + !$OMP END PARALLEL + + CALL PREPSNM(IM,JMLOC,ZEPSNM) + ALLOCATE(S%FA(JMLOC)%RPNMWI(2*IDGLU,1:2)) + DO JGL=1,2*IDGLU + ! inverse trafo + S%FA(JMLOC)%RPNMWI(JGL,1) = F%RW(ISL+JGL-1)*ZPNMCDD(JGL,1) + S%FA(JMLOC)%RPNMWI(JGL,2) = F%RW(ISL+JGL-1)*ZPNMCDD(JGL,2) + ! direct trafo needed if mapping to another set of gg roots + !S%FA(JMLOC)%RPNMWI(JGL,3) = -ZEPSNM(IMAXN)*ZPNMCDD(JGL,2) + !S%FA(JMLOC)%RPNMWI(JGL,4) = -ZEPSNM(IMAXN)*ZPNMCDD(JGL,1) + ENDDO + DEALLOCATE(ZPNMCDD) + ALLOCATE(S%FA(JMLOC)%RPNMWO(2*IDGLU2,1:2)) + DO JGL=1,2*IDGLU2 + ! inverse trafo + S%FA(JMLOC)%RPNMWO(JGL,1) = -ZEPSNM(IMAXN)*ZPNMCDO(JGL,2) + S%FA(JMLOC)%RPNMWO(JGL,2) = -ZEPSNM(IMAXN)*ZPNMCDO(JGL,1) + ! only needed in direct trafo, need if mapping to another set of roots + !S%FA(JMLOC)%RPNMWO(JGL,3) = F%RW2(S%FA(JMLOC)%ISLD+JGL-1)*ZPNMCDO(JGL,1) + !S%FA(JMLOC)%RPNMWO(JGL,4) = F%RW2(S%FA(JMLOC)%ISLD+JGL-1)*ZPNMCDO(JGL,2) + ENDDO + DEALLOCATE(ZPNMCDO) + ENDIF ! LREAD_LEGPOL + ENDIF ! LDLL + + ENDDO + + IF( S%LDLL ) THEN + DEALLOCATE(ZLRMUZ2) + ENDIF + +!!$ IF( S%LUSEFLT.AND.LMPOFF ) THEN +!!$ CALL ABORT_TRANS('SULEG: LUSEFLT=T and LMPOFF=T not supported') +!!$ ENDIF + CALL GSTATS(1801,2) + + IF(.NOT.C%LREAD_LEGPOL) THEN + IF( S%LUSEFLT )THEN + ALLOCATE(ZCLONEA(D%NUMP)) + ALLOCATE(ZCLONES(D%NUMP)) + ENDIF + +! not correct logic +! IF (S%LUSE_BELUSOV.OR.S%LUSEFLT) THEN + + DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP +++++++++++++++++++++++ +! IF( S%LUSEFLT )THEN +! ZCLONES(JMLOC)%COMMSBUF => NULL() +! ZCLONEA(JMLOC)%COMMSBUF => NULL() +! ENDIF + + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + + ! --------------------anti-symmetric----------------------- + ! --------------------anti-symmetric----------------------- + ! --------------------anti-symmetric----------------------- + + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + ALLOCATE(S%FA(IMLOC)%RPNMA(IDGLU,ILA)) + ENDDO + + IF( .NOT. S%LUSE_BELUSOV ) THEN + + ISREQ = 0 + IRREQ = 0 + + ALLOCATE (ZRCVBUFV(IMAXRECVA,IPRTRV)) + CALL GSTATS(851,0) + DO JSETV=1,IPRTRV + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + IRREQ = IRREQ+1 + CALL MPL_RECV(ZRCVBUFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + + ALLOCATE(ZSNDBUFV(IDGLU*ILA)) + + IF(MOD(IMAXN-IM,2) == 0) THEN + INMAX=IMAXN+1 + ELSE + INMAX=IMAXN + ENDIF + + CALL GSTATS(1251,0) + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) + DO JGL=1,IDGLU + CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=3) + DO JI=1,ILA + JN=IM+2*(JI-1)+1 + ZSNDBUFV((JGL-1)*ILA+JI)=ZLPOL(JN) + ENDDO + ENDDO + !$OMP END PARALLEL DO + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + CALL GSTATS(1251,1) + + CALL GSTATS(851,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZSNDBUFV(:),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + ENDIF + + CALL GSTATS(851,0) + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + + IF( NPROC==1.AND.LMPOFF )THEN + ZRCVBUFV(1:SIZE(ZSNDBUFV(:)),1)=ZSNDBUFV(:) + ENDIF + CALL GSTATS(851,1) + + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IA,ILA,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILA > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMA) + ALLOCATE(S%FA(IMLOC)%RPNMDA(IDGLU,ILA)) + DO JGL=1,IDGLU + DO JI=1,ILA + S%FA(IMLOC)%RPNMDA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JGL=1,IDGLU + DO JI=1,ILA + S%FA(IMLOC)%RPNMA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) + ENDDO + ENDDO + ENDIF + ELSE + DO JGL=1,IDGLU + DO JI=1,ILA + S%FA(IMLOC)%RPNMA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) + ENDDO + ENDDO + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) + IF( ALLOCATED(ZRCVBUFV) ) DEALLOCATE(ZRCVBUFV) + + ELSE + + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IA,ILA,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILA > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMA) + ALLOCATE(S%FA(IMLOC)%RPNMDA(IDGLU,ILA)) + DO JI=1,ILA + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMDA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JI=1,ILA + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) + ENDDO + ENDDO + ENDIF + ELSE + DO JI=1,ILA + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) + ENDDO + ENDDO + END IF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + ENDIF + + ! -------------------- anti-symmetric FLT iniitialisation ----------------------- + + IF( S%LUSEFLT) THEN + IRECVLENMAX=0 + ISREQ = 0 + IRREQ = 0 + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( ILA > ITHRESHOLD ) THEN + S%LSYM = .FALSE. + INX = IDGLU + CALL CONSTRUCT_BUTTERFLY(ZEPS_INT_DEC,IMAXCOLS,INX,ILA,S%FA(IMLOC)%RPNMDA,& + & S%FA(IMLOC)%YBUT_STRUCT_A) + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,ZCLONEA(IMLOC)) + IRECVLENMAX=SIZE(ZCLONEA(IMLOC)%COMMSBUF) + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZCLONEA(IMLOC)%COMMSBUF(:),KDEST=NPRCIDS(ISEND),& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ELSE + IRECVLENMAX=2 + ZDUM(:)=0.0_JPRB + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZDUM(:),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ENDIF + ENDIF + IF(.NOT.LMPOFF) THEN + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + CALL MPL_SEND(IRECVLENMAX,KDEST=NPRCIDS(ISEND),KTAG=ITAG1,CDSTRING='SULEG:') + ENDDO + IRECVLENMAX=0 + DO JSETV=1,NPRTRV ! ? should this be IPRTRV ? + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(IRECVLENMAXV(JSETV),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG1,CDSTRING='SULEG:') + IRECVLENMAX=MAX(IRECVLENMAX,IRECVLENMAXV(JSETV)) + ENDDO + IF( MYPROC == 1 .AND. LLP1 )THEN + IF( IRECVLENMAX > 2 )THEN + WRITE(NOUT,'("SULEG: ANTI-SYM MAX BUTTERFLY CLONE LEN=",I8)')IRECVLENMAX + ENDIF + ENDIF + IF( IRECVLENMAX == 0 )THEN + WRITE(NOUT,'("SULEG: ANTI-SYM WARNING CLONE LEN=",I8,I8)') MYPROC, IRECVLENMAX + ENDIF + IF( IRECVLENMAX > 0 )THEN + ALLOCATE (ZRCVBUTFV(IRECVLENMAX,IPRTRV)) + DO JSETV=1,IPRTRV + IRREQ = IRREQ+1 + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCVBUTFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDDO + END IF + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KBYTES=IBYTES,KOUNT=IKOUNT(1:IRREQ),KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + CALL GSTATS(852,1) + IF( IRECVLENMAX > 0 )THEN + CALL GSTATS(1252,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ILA,IDGLU,INX,ICLONELEN) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + IF( ILA > ITHRESHOLD ) THEN + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + INX=IDGLU + IF( .NOT.ALLOCATED(ZCLONEA(IMLOC)%COMMSBUF) )THEN + ICLONELEN=IKOUNT(JSETV) + ALLOCATE(ZCLONEA(IMLOC)%COMMSBUF(ICLONELEN)) + ZCLONEA(IMLOC)%COMMSBUF(1:ICLONELEN) = ZRCVBUTFV(1:ICLONELEN,JSETV) + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,ZCLONEA(IMLOC)) + ENDIF + IF(ALLOCATED(ZCLONEA(IMLOC)%COMMSBUF) ) THEN + IF( SIZEOF(ZCLONEA(IMLOC)%COMMSBUF) > 0 ) DEALLOCATE(ZCLONEA(IMLOC)%COMMSBUF) + ! ZCLONEA(IMLOC)%COMMSBUF=>NULL() + ENDIF + IF( ASSOCIATED(S%FA(IMLOC)%RPNMA) .AND. .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMA) + IF( ASSOCIATED(S%FA(IMLOC)%RPNMDA) ) DEALLOCATE(S%FA(IMLOC)%RPNMDA) + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1252,1) + DEALLOCATE(ZRCVBUTFV) + ENDIF + ENDIF + ENDIF + + ! --------------------symmetric----------------------- + ! --------------------symmetric----------------------- + ! --------------------symmetric----------------------- + + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + ALLOCATE(S%FA(IMLOC)%RPNMS(IDGLU,ILS)) + ENDDO + + IF( .NOT. S%LUSE_BELUSOV ) THEN + + ISREQ = 0 + IRREQ = 0 + + ALLOCATE (ZRCVBUFV(IMAXRECVS,IPRTRV)) + CALL GSTATS(851,0) + DO JSETV=1,IPRTRV + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + IRREQ = IRREQ+1 + CALL MPL_RECV(ZRCVBUFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + + ALLOCATE(ZSNDBUFV(IDGLU*ILS)) + + IF(MOD(IMAXN-IM,2) == 0) THEN + INMAX=IMAXN + ELSE + INMAX=IMAXN+1 + ENDIF + + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) + DO JGL=1,IDGLU + CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=2) + DO JI=1,ILS + JN=IM+2*(JI-1) + ZSNDBUFV((JGL-1)*ILS+JI)=ZLPOL(JN) + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + + CALL GSTATS(851,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZSNDBUFV(:),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + ENDIF + + CALL GSTATS(851,0) + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + IF( NPROC==1.AND.LMPOFF )THEN + ZRCVBUFV(1:SIZE(ZSNDBUFV(:)),1)=ZSNDBUFV(:) + ENDIF + CALL GSTATS(851,1) + + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IS,ILS,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILS > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMS) + ALLOCATE(S%FA(IMLOC)%RPNMDS(IDGLU,ILS)) + DO JGL=1,IDGLU + DO JI=1,ILS + S%FA(IMLOC)%RPNMDS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JGL=1,IDGLU + DO JI=1,ILS + S%FA(IMLOC)%RPNMS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) + ENDDO + ENDDO + ENDIF + ELSE + DO JGL=1,IDGLU + DO JI=1,ILS + S%FA(IMLOC)%RPNMS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) + ENDDO + ENDDO + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) + IF( ALLOCATED(ZRCVBUFV) ) DEALLOCATE(ZRCVBUFV) + + ELSE + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IS,ILS,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILS > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMS) + ALLOCATE(S%FA(IMLOC)%RPNMDS(IDGLU,ILS)) + DO JI=1,ILS + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMDS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JI=1,ILS + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) + ENDDO + ENDDO + ENDIF + ELSE + DO JI=1,ILS + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) + ENDDO + ENDDO + END IF + END DO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + ENDIF + + ! -------------------- symmetric FLT iniitialisation ----------------------- + + IF( S%LUSEFLT) THEN + IRECVLENMAX=0 + ISREQ = 0 + IRREQ = 0 + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( ILS > ITHRESHOLD ) THEN + S%LSYM = .TRUE. + INX = IDGLU + CALL CONSTRUCT_BUTTERFLY(ZEPS_INT_DEC,IMAXCOLS,INX,ILS,S%FA(IMLOC)%RPNMDS,& + & S%FA(IMLOC)%YBUT_STRUCT_S) + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,ZCLONES(IMLOC)) + IRECVLENMAX=SIZE(ZCLONES(IMLOC)%COMMSBUF) + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZCLONES(IMLOC)%COMMSBUF(:),KDEST=NPRCIDS(ISEND),& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ELSE + IRECVLENMAX=2 + ZDUM(:)=0.0_JPRB + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZDUM(:),KDEST=NPRCIDS(ISEND), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ENDIF + ENDIF + IF(.NOT. LMPOFF) THEN + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + CALL MPL_SEND(IRECVLENMAX,KDEST=NPRCIDS(ISEND),KTAG=ITAG1,CDSTRING='SULEG:') + ENDDO + IRECVLENMAX=0 + DO JSETV=1,NPRTRV ! ? should this be IPRTRV ? + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(IRECVLENMAXV(JSETV),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG1,CDSTRING='SULEG:') + IRECVLENMAX=MAX(IRECVLENMAX,IRECVLENMAXV(JSETV)) + ENDDO + IF( MYPROC == 1 .AND. LLP1 )THEN + IF( IRECVLENMAX > 2 )THEN + WRITE(NOUT,'("SULEG: SYM MAX BUTTERFLY CLONE LEN=",I8)')IRECVLENMAX + ENDIF + ENDIF + IF( IRECVLENMAX == 0 )THEN + WRITE(NOUT,'("SULEG: SYM WARNING CLONE LEN=",I8,I8)')MYPROC, IRECVLENMAX + ENDIF + IF( IRECVLENMAX > 0 )THEN + ALLOCATE (ZRCVBUTFV(IRECVLENMAX,IPRTRV)) + DO JSETV=1,IPRTRV + IRREQ = IRREQ+1 + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCVBUTFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDDO + ENDIF + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KBYTES=IBYTES,KOUNT=IKOUNT(1:IRREQ),KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + CALL GSTATS(852,1) + + + IF( IRECVLENMAX > 0 )THEN + CALL GSTATS(1252,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ILS,IDGLU,INX,ICLONELEN) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILS = (R%NSMAX-IM+3)/2 + IF( ILS > ITHRESHOLD ) THEN + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + INX=IDGLU + IF( .NOT.ALLOCATED(ZCLONES(IMLOC)%COMMSBUF) )THEN + ICLONELEN=IKOUNT(JSETV) + ALLOCATE(ZCLONES(IMLOC)%COMMSBUF(ICLONELEN)) + ZCLONES(IMLOC)%COMMSBUF(1:ICLONELEN) = ZRCVBUTFV(1:ICLONELEN,JSETV) + CALL UNPACK_BUTTERFLY_STRUCT( S%FA(IMLOC)%YBUT_STRUCT_S,ZCLONES(IMLOC)) + ENDIF + IF( ALLOCATED(ZCLONES(IMLOC)%COMMSBUF) ) THEN + IF( SIZEOF(ZCLONES(IMLOC)%COMMSBUF) > 0 ) DEALLOCATE(ZCLONES(IMLOC)%COMMSBUF) + ! ZCLONES(IMLOC)%COMMSBUF=>NULL() + ENDIF + IF( ASSOCIATED(S%FA(IMLOC)%RPNMS) .AND. .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMS) + IF( ASSOCIATED(S%FA(IMLOC)%RPNMDS) ) DEALLOCATE(S%FA(IMLOC)%RPNMDS) + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1252,1) + DEALLOCATE(ZRCVBUTFV) + ENDIF + ENDIF + ENDIF + + ENDDO ! +++++++++++++++++++++ END JMLOC LOOP +++++++++++++++++++++++ + +! ENDIF ! (S%LUSE_BELUSOV.OR.S%LUSEFLT) + + IF( S%LUSEFLT )THEN + DEALLOCATE(ZCLONEA) + DEALLOCATE(ZCLONES) + ENDIF + + IF( LLP1 .AND. S%LUSEFLT ) THEN + WRITE(NOUT,*) '=== SULEG: Finished SETUP_BUTTERFLY ===' + ENDIF + ENDIF + + CALL GSTATS(1801,3) + IF(S%LUSE_BELUSOV) DEALLOCATE(F%RPNM) + + IF(C%LWRITE_LEGPOL) CALL WRITE_LEGPOL + IF(C%LREAD_LEGPOL) CALL READ_LEGPOL + + +ENDIF +CALL GSTATS(1801,1) +CALL GSTATS(140,1) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!IF (S%LUSE_BELUSOV.OR.S%LUSEFLT) +CALL END_POL + +END SUBROUTINE SULEG +END MODULE SULEG_MOD diff --git a/src/trans/cpu/internal/sump_trans0_mod.F90 b/src/trans/cpu/internal/sump_trans0_mod.F90 new file mode 100644 index 0000000..d24d8c4 --- /dev/null +++ b/src/trans/cpu/internal/sump_trans0_mod.F90 @@ -0,0 +1,115 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMP_TRANS0_MOD +CONTAINS +SUBROUTINE SUMP_TRANS0 + +! Set up distributed environment for the transform package (part 0) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_GROUPS_CREATE, MPL_MYRANK, MPL_NPROC + +USE TPM_GEN ,ONLY : NOUT, LMPOFF, NPRINTLEV +USE TPM_DISTR ,ONLY : LEQ_REGIONS, MTAGDISTGP, MTAGDISTSP, MTAGGL, & + & MTAGLETR, MTAGLG, MTAGLM, MTAGML, MTAGPART, & + & MYSETV, MYSETW, NPRCIDS, & + & NPRGPEW, NPRGPNS, NPRTRNS, NPRTRV, NPRTRW, & + & MYPROC, NPROC + +USE EQ_REGIONS_MOD ,ONLY : EQ_REGIONS, MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE PE2SET_MOD ,ONLY : PE2SET +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +LOGICAL :: LLP1,LLP2 +INTEGER(KIND=JPIM) :: IPROC,JJ + +! ------------------------------------------------------------------ + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS0 ===' + + +NPROC = NPRGPNS*NPRGPEW +NPRTRNS = NPRTRW +IF(MOD(NPROC,NPRTRW) /= 0 .OR. NPRTRW > NPROC) THEN + CALL ABORT_TRANS('SUMP_TRANS0: NPROC INCONSISTENT WITH NPRTRW') +ENDIF +NPRTRV = NPROC/NPRTRW +IF(LLP1) WRITE(NOUT,*)'NPROC =',NPROC,' NPRGPNS=',NPRGPNS,' NPRGPEW=',& + & NPRGPEW,' NPRTRW=',NPRTRW,' NPRTRV=',NPRTRV + +IF(NPROC > 1 ) THEN + IPROC = MPL_NPROC() + IF(IPROC /= NPROC) THEN + WRITE(NOUT,*) 'SUMP_TRANS0: NPROC=',NPROC,' BUT MPL_NPROC RETURNS',& + & IPROC + CALL ABORT_TRANS('SUMP_TRANS0: NPROC INCONSISTENT WITH MPL_NPROC') + ENDIF + MYPROC = MPL_MYRANK() +ELSE + MYPROC = 1 +ENDIF + +IF (MYPROC > NPROC) THEN + CALL ABORT_TRANS('SUMP_TRANS0: INCONSISTENCY IN NUMBER OF PROCESSORS USED') +ENDIF + +IF( LEQ_REGIONS )THEN + ALLOCATE(N_REGIONS(NPROC+2)) + N_REGIONS(:)=0 + CALL EQ_REGIONS(NPROC) +ELSE + N_REGIONS_NS=NPRGPNS + ALLOCATE(N_REGIONS(N_REGIONS_NS)) + N_REGIONS(:)=NPRGPEW + N_REGIONS_EW=NPRGPEW +ENDIF +CALL PE2SET(MYPROC,MY_REGION_NS,MY_REGION_EW,MYSETW,MYSETV) +IF(LLP1) WRITE(NOUT,*)'MYPROC=',MYPROC,'MY_REGION_NS =',MY_REGION_NS,& + & ' MY_REGION_EW=',MY_REGION_EW,' MYSETW=',MYSETW,' MYSETV=',MYSETV + + +ALLOCATE(NPRCIDS(NPROC)) +IF(LLP2)WRITE(NOUT,9) 'NPRCIDS ',SIZE(NPRCIDS ),SHAPE(NPRCIDS ) +DO JJ=1,NPROC + NPRCIDS(JJ) = JJ +ENDDO + +! Message passing tags + +MTAGLETR = 18000 +MTAGML = 19000 +MTAGLG = 20000 +MTAGPART = 21000 +MTAGDISTSP = 22000 +MTAGGL = 23000 +MTAGLM = 24000 +MTAGDISTGP = 25000 + +! Create communicators for MPI groups + +IF (.NOT.LMPOFF) THEN + CALL MPL_GROUPS_CREATE(NPRTRW, NPRTRV) +ENDIF + +! Setup labels for timing package (gstats) + +! CF ifs/utility GSTATS_OUTPUT_IFS + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUMP_TRANS0 +END MODULE SUMP_TRANS0_MOD diff --git a/src/trans/cpu/internal/sump_trans_mod.F90 b/src/trans/cpu/internal/sump_trans_mod.F90 new file mode 100644 index 0000000..2b776b3 --- /dev/null +++ b/src/trans/cpu/internal/sump_trans_mod.F90 @@ -0,0 +1,276 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMP_TRANS_MOD +CONTAINS +SUBROUTINE SUMP_TRANS + +! Set up distributed environment for the transform package (part 2) + +! Modifications : +! P.Marguinaud : 11-Sep-2012 : Fix twice allocated pointer + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC + +!USE SUWAVEDI_MOD +!USE PE2SET_MOD +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUMPLAT_MOD ,ONLY : SUMPLAT +USE SUSTAONL_MOD ,ONLY : SUSTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM +INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM +INTEGER(KIND=JPIM) :: I1,I2,I3,IAUX0,IAUX1,JA1 +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF +INTEGER(KIND=JPIM),ALLOCATABLE :: IGPTOTL(:,:) + +REAL(KIND=JPRB),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPRB) :: ZMEDIAP +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS ===' + +IF(.NOT.D%LGRIDONLY) THEN + ALLOCATE(D%NULTPP(NPRTRNS)) + IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) + ALLOCATE(D%NPTRLS(NPRTRNS)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) + ALLOCATE(D%NPROCL(R%NDGL)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) + + CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) + D%NDGL_FS = D%NULTPP(MYSETW) + +! Help arrays for spectral to fourier space transposition + ALLOCATE(D%NLTSGTB (NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) + ALLOCATE(D%NLTSFTB (NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) + ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) + ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) + ALLOCATE(D%MSTABF (NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) + + D%NLTSGTB(:) = 0 + DO JGL=1,D%NDGL_FS + IGL = D%NPTRLS(MYSETW)+JGL-1 + DO JM=0,G%NMEN(IGL) + D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 + ENDDO + ENDDO + DO JA=1,NPRTRW + IPLAT = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA)+JGL-1 + DO JM=1,D%NUMP + IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN + IPLAT = IPLAT + 1 + ENDIF + ENDDO + ENDDO + D%NLTSFTB(JA) = IPLAT + ENDDO + + DO JA=1,NPRTRW-1 + ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) + IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) + D%MSTABF(IRECVSET) = ISENDSET + ENDDO + D%MSTABF(MYSETW) = MYSETW + + ALLOCATE(D%NPNTGTB0(0:R%NSMAX,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) + ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) + IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) + + DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(MYSETW) + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 + IM = D%NALLMS(JML) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB0(IM,JGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB0(IM,JGL) = -99 + ENDIF + ENDDO + ENDDO + ENDDO + + DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA) + JGL - 1 + DO JM=1,D%NUMP + IM = D%MYMS(JM) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB1(JM,IGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB1(JM,IGL) = -99 + ENDIF + ENDDO + ENDDO + ENDDO + + IAUX0 = 0 + IAUX1 = 0 + DO JA=1,NPRTRNS-1 + I1 = MYSENDSET(NPRTRNS,MYSETW,JA) + I2 = MYRECVSET(NPRTRNS,MYSETW,JA) + I3 = -1 + DO JA1=1,NPRTRNS-1 + IF(MYSENDSET(NPRTRNS,MYSETW,JA1) == I2) I3 =MYRECVSET(NPRTRNS,MYSETW,JA1) + ENDDO + IAUX0 = MAX(D%NLTSFTB(I1),D%NLTSGTB(I2),IAUX0) + IAUX1 = MAX(D%NLTSGTB(I2),D%NLTSFTB(I3),IAUX1) + ENDDO + IAUX0 = MAX(D%NLTSGTB(MYSETW),IAUX0) + IAUX1 = MAX(D%NLTSGTB(MYSETW),IAUX1) + DO JA=1,NPRTRNS+1 + D%NSTAGT0B(JA) = (JA-1)*IAUX0 + D%NSTAGT1B(JA) = (JA-1)*IAUX1 + ENDDO + D%NLENGT0B = IAUX0*NPRTRNS + D%NLENGT1B = IAUX1*NPRTRNS +ENDIF + +! GRIDPOINT SPACE + +ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) +ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) +ALLOCATE(D%NPTRLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) +ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) +ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) +ALLOCATE(D%LSPLITLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) +ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) + + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + ALLOCATE(ZDUM(1)) + CALL SUMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT,LEQ_REGIONS,& + &D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + &D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + &ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + &IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN) +ELSE + CALL SUMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT,LEQ_REGIONS,& + &D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + &D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + &D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + &IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN) +ENDIF +D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF + +IF (LLP1) THEN + IF(.NOT.D%LGRIDONLY) THEN + WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUMPLAT: ''/)') + WRITE(NOUT,FMT='('' D%NULTPP '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) + WRITE(NOUT,FMT='('' D%NPROCL '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) + ENDIF + WRITE(NOUT,FMT='('' D%NFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') + WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF + WRITE(NOUT,FMT='('' D%NPTRLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%LSPLITLAT '')') + WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='(/)') +ENDIF +ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) +ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + CALL SUSTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) +ELSE + CALL SUSTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) +ENDIF +! IGPTOTL is the number of grid points in each individual processor +ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IGPTOTL(:,:)=0 + +DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + IGPTOT = 0 + DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) + IGPTOT = IGPTOT+D%NONL(JGL,JB) + ENDDO + IGPTOTL(JA,JB) = IGPTOT + ENDDO +ENDDO +D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) +D%NGPTOTMX = MAXVAL(IGPTOTL) +D%NGPTOTG = SUM(IGPTOTL) +ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) +D%NGPTOTL(:,:) = IGPTOTL(:,:) + +IF(.NOT.D%LGRIDONLY) THEN + ALLOCATE(D%NSTAGTF(D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) + IOFF = 0 + DO JGL=1,D%NDGL_FS + D%NSTAGTF(JGL) = IOFF + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IOFF = IOFF + G%NLOEN(IGL)+3 + ENDDO + D%NLENGTF = IOFF +ENDIF + +IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) +DEALLOCATE(IGPTOTL) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUMP_TRANS +END MODULE SUMP_TRANS_MOD + diff --git a/src/trans/cpu/internal/sump_trans_preleg_mod.F90 b/src/trans/cpu/internal/sump_trans_preleg_mod.F90 new file mode 100644 index 0000000..72064d4 --- /dev/null +++ b/src/trans/cpu/internal/sump_trans_preleg_mod.F90 @@ -0,0 +1,149 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMP_TRANS_PRELEG_MOD +CONTAINS +SUBROUTINE SUMP_TRANS_PRELEG + +! Set up distributed environment for the transform package (part 1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW + +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +!USE ABORT_TRANS_MOD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JW,JV,JJ,JM,JMLOC,ILATPP,IRESTL,IMLOC,IDT,INM,ILAST + +INTEGER(KIND=JPIM) :: IMYMS(R%NSMAX+1),INUMTPP(NPRTRW) +INTEGER(KIND=JPIM) :: IDUMI1,IDUMI2,IDUMI3 +INTEGER(KIND=JPIM) :: IDUM2(0:R%NSMAX), IDUM3(NPRTRW+1), IDUM4(R%NSMAX+1) + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +IF(.NOT.D%LGRIDONLY) THEN + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS_PRELEG ===' + +!* 1. Initialize partitioning of wave numbers to PEs ! +! ---------------------------------------------- + + ALLOCATE(D%NASM0(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) + ALLOCATE(D%NATM0(0:R%NTMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) + ALLOCATE(D%NUMPP(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) + ALLOCATE(D%NPOSSP(NPRTRW+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) + ALLOCATE(D%NPROCM(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) + ALLOCATE(D%NPTRMS(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) + ALLOCATE(D%NALLMS(R%NSMAX+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) + ALLOCATE(D%NDIM0G(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) + + CALL SUWAVEDI(R%NSMAX,R%NTMAX,NPRTRW,MYSETW,& + &D%NASM0,D%NSPOLEGL,D%NPROCM,D%NUMPP,& + &D%NSPEC,D%NSPEC2,D%NSPEC2MX,D%NPOSSP,IMYMS,& + &D%NPTRMS,D%NALLMS,D%NDIM0G) + CALL SUWAVEDI(R%NTMAX,R%NTMAX,NPRTRW,MYSETW,& + &KASM0=D%NATM0,KUMPP=INUMTPP,KSPEC2=D%NTPEC2) + + D%NUMP = D%NUMPP (MYSETW) + ALLOCATE(D%MYMS(D%NUMP)) + IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) + D%MYMS(:) = IMYMS(1:D%NUMP) + D%NUMTP = INUMTPP(MYSETW) + ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) + IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) + ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) + IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) + + D%NLATLS(:,:) = 999999 + D%NLATLE(:,:) = -1 + + ILATPP = R%NDGNH/NPRTRW + IRESTL = R%NDGNH-NPRTRW*ILATPP + DO JW=1,NPRTRW + IF (JW > IRESTL) THEN + D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JW-IRESTL-1)*ILATPP+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 + ELSE + D%NLATLS(JW,1) = (JW-1)*(ILATPP+1)+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP + ENDIF + ENDDO + ILAST=0 + DO JW=1,NPRTRW + ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV + IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP + DO JV=1,NPRTRV + IF (JV > IRESTL) THEN + D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 + ELSE + D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP + ENDIF + ENDDO + ILAST=D%NLATLE(JW,NPRTRV) + ENDDO + + IF (LLP1) THEN + DO JW=1,NPRTRW + DO JV=1,NPRTRV + WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& + & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) + ENDDO + ENDDO + ENDIF + + ALLOCATE(D%NPMT(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) + ALLOCATE(D%NPMS(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) + ALLOCATE(D%NPMG(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) + IDT = R%NTMAX-R%NSMAX + INM = 0 + DO JMLOC=1,D%NUMP + IMLOC = D%MYMS(JMLOC) + D%NPMT(IMLOC) = INM + D%NPMS(IMLOC) = INM+IDT + INM = INM+R%NTMAX+2-IMLOC + ENDDO + INM = 0 + DO JM=0,R%NSMAX + D%NPMG(JM) = INM + INM = INM+R%NTMAX+2-JM + ENDDO + + D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 + +ENDIF + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUMP_TRANS_PRELEG +END MODULE SUMP_TRANS_PRELEG_MOD diff --git a/src/trans/cpu/internal/sumplat_mod.F90 b/src/trans/cpu/internal/sumplat_mod.F90 new file mode 100644 index 0000000..de34b38 --- /dev/null +++ b/src/trans/cpu/internal/sumplat_mod.F90 @@ -0,0 +1,256 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLAT_MOD +CONTAINS +SUBROUTINE SUMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& + &KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + &KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,LDSPLITLAT,KMYPROC,KLOEN) + +!**** *SUMPLAT * - Initialize gridpoint distrbution in N-S direction + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLAT * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction +! LDSPLIT -true for latitudes shared between sets +! LDEQ_REGIONS -true if eq_regions partitioning +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! PMEDIAP -mean weight per PE if weighted distribution +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KFRSTLAT -first latitude row on processor +! KLSTLAT -last latitude row on processor +! KFRSTLOFF -offset for first latitude in set +! KPTRLAT -pointer to start of latitude +! KPTRFRSTLAT-pointer to first latitude +! KPTRLSTLAT -pointer to last latitude +! KPTRFLOFF -offset for pointer to first latitude +! LDSPLITLAT -true for latitudes which are split + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : MYPROC + +USE SUMPLATB_MOD ,ONLY : SUMPLATB +USE SUMPLATBEQ_MOD ,ONLY : SUMPLATBEQ +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + + +! * DUMMY: +REAL(KIND=JPRB),INTENT(OUT) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(OUT) :: LDSPLITLAT(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(KDGL) + +! * LOCAL: +! === END OF INTERFACE BLOCK === +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IPTRLATITUDE, JA, JGL + +LOGICAL :: LLFOURIER +LOGICAL :: LLDEBUG=.FALSE. + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- +INDIC(:)=0 +ILAST(:)=0 + +IF(LDWEIGHTED_DISTR.AND..NOT.LDEQ_REGIONS)THEN + CALL ABORT_TRANS ('SUMPLAT: LDWEIGHTED_DISTR=T AND LDEQ_REGIONS=F NOT SUPPORTED') +ENDIF + +IF( LDEQ_REGIONS )THEN + CALL SUMPLATBEQ(1,KDGL,KPROC,KPROCA,KLOEN,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ELSE + LLFOURIER=.FALSE. + CALL SUMPLATB(1,KDGL,KPROCA,KLOEN,LDSPLIT,LLFOURIER,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ENDIF + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': COMPUTATION OF +! KFRSTLAT TO LDSPLITLAT. +! --------------------------------------------- + + +! * Computation of first and last latitude of processor sets +! ----------- in grid-point-space ----------------------- + +IF(KMYPROC==1.AND.LLDEBUG)THEN + WRITE(0,'("")') + WRITE(0,'("SUMPLAT_MOD:LDWEIGHTED_DISTR=",L1)')LDWEIGHTED_DISTR + WRITE(0,'("")') + DO JA=1,KPROCA + WRITE(0,'("SUMPLAT_MOD: JA=",I5," ILAST=",I5," INDIC=",I5)')& + &JA,ILAST(JA),INDIC(JA) + ENDDO + WRITE(0,'("")') + IF( LDEQ_REGIONS .AND. LDSPLIT )THEN + DO JA=1,KPROCA + WRITE(0,'("SUMPLAT_MOD: JA=",I5," KPROCAGP=",I12)')& + &JA,KPROCAGP(JA) + ENDDO + WRITE(0,'("")') + ENDIF +ENDIF + +KFRSTLAT(1) = 1 +KLSTLAT(KPROCA) = KDGL +DO JA=1,KPROCA-1 + IF ((.NOT. LDSPLIT) .OR. INDIC(JA) == 0) THEN + KFRSTLAT(JA+1) = ILAST(JA) + 1 + KLSTLAT(JA) = ILAST(JA) + ELSE + KFRSTLAT(JA+1) = INDIC(JA) + KLSTLAT(JA) = INDIC(JA) + ENDIF +ENDDO +KFRSTLOFF=KFRSTLAT(KMYSETA)-1 + +! * Initialise following data structures:- +! NPTRLAT (pointer to the start of each latitude) +! LSPLITLAT (TRUE if latitude is split over two A sets) +! NPTRFRSTLAT (pointer to the first latitude of each A set) +! NPTRLSTLAT (pointer to the last latitude of each A set) + +DO JGL=1,KDGL + KPTRLAT (JGL)=-999 + LDSPLITLAT(JGL)=.FALSE. +ENDDO +IPTRLATITUDE=0 +DO JA=1,KPROCA + DO JGL=KFRSTLAT(JA),KLSTLAT(JA) + IPTRLATITUDE=IPTRLATITUDE+1 + LDSPLITLAT(JGL)=.TRUE. + IF( KPTRLAT(JGL) == -999 )THEN + KPTRLAT(JGL)=IPTRLATITUDE + LDSPLITLAT(JGL)=.FALSE. + ENDIF + ENDDO +ENDDO +DO JA=1,KPROCA + IF( LDSPLITLAT(KFRSTLAT(JA)) .AND. JA /= 1)THEN + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA))+1 + ELSE + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA)) + ENDIF + IF( LDSPLITLAT(KLSTLAT(JA)) .AND. JA == KPROCA)THEN + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA))+1 + ELSE + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA)) + ENDIF +ENDDO +KPTRFLOFF=KPTRFRSTLAT(KMYSETA)-1 + +IF(KMYPROC==1.AND.LLDEBUG)THEN + DO JGL=1,KDGL + WRITE(0,'("SUMPLAT_MOD: JGL=",I5," KPTRLAT=",I5," LDSPLITLAT=",L4)')& + & JGL,KPTRLAT(JGL),LDSPLITLAT(JGL) + ENDDO + DO JA=1,KPROCA + WRITE(0,'("SUMPLAT_MOD: JA=",I5," KFRSTLAT=",I5," KLSTLAT=",I5,& + & " KPTRFRSTLAT=",I5," KPTRLSTLAT=",I5," KLSTLAT-KFRSTLAT=",I5,& + & " SUM(G%NLOEN(KFRSTLAT:KLSTLAT))=",I10)')& + & JA,KFRSTLAT(JA),KLSTLAT(JA),KPTRFRSTLAT(JA),KPTRLSTLAT(JA),& + & KLSTLAT(JA)-KFRSTLAT(JA),SUM(G%NLOEN(KFRSTLAT(JA):KLSTLAT(JA))) + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUMPLAT +END MODULE SUMPLAT_MOD + + + diff --git a/src/trans/cpu/internal/sumplatb_mod.F90 b/src/trans/cpu/internal/sumplatb_mod.F90 new file mode 100644 index 0000000..60ae75b --- /dev/null +++ b/src/trans/cpu/internal/sumplatb_mod.F90 @@ -0,0 +1,226 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 1998- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLATB_MOD +CONTAINS +SUBROUTINE SUMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,LDFOURIER,& + &KMEDIAP,KRESTM,KINDIC,KLAST) + +!**** *SUMPLATB * - Routine to initialize parallel environment + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLATB * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! LDFOURIER -true for fourier space partitioning + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! K. YESSAD (after old version of sumplat.F). + +! Modifications. +! -------------- +! Original : 98-12-07 +! G. Mozdzynski (August 2012): rewrite of fourier latitude distribution +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM, JPIB, JPRB + +USE TPM_DISTR +USE ABORT_TRANS_MOD + +IMPLICIT NONE + + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDFOURIER +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) + +! * LOCAL: +INTEGER(KIND=JPIB) :: ICOST(KDGSA:KDGL) +INTEGER(KIND=JPIM) :: ILATS(KPROCA) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: ICOMP, IGL, JA, JGL, ILAST, IREST, IA +INTEGER(KIND=JPIM) :: ITOT_TOP, ITOT_BOT, IGL_TOP, IGL_BOT +INTEGER(KIND=JPIB) :: IMEDIA,ITOT +REAL(KIND=JPRB) :: ZLG +LOGICAL :: LLDONE,LLSIMPLE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- + +! * Computation of KMEDIAP and KRESTM. + +IF( LDFOURIER )THEN + +! DO JGL=1,KDGL +! ZLG=LOG(FLOAT(KLOENG(JGL))) +! ICOST(JGL)=KLOENG(JGL)*ZLG*SQRT(ZLG) +! ENDDO + + DO JGL=1,KDGL + ICOST(JGL)=KLOENG(JGL) + ENDDO + +ELSE + + DO JGL=1,KDGL + ICOST(JGL)=KLOENG(JGL) + ENDDO + +ENDIF + +IMEDIA = SUM(ICOST(KDGSA:KDGL)) +KMEDIAP = IMEDIA / KPROCA +KRESTM = IMEDIA - KMEDIAP * KPROCA +IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +! * Computation of intermediate quantities KINDIC and KLAST + +KINDIC(:)=0 +KLAST(:)=0 + +IF (LDSPLIT) THEN + + IREST = 0 + ILAST =0 + DO JA=1,KPROCA + IF (JA <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = KMEDIAP + ELSE + ICOMP = KMEDIAP - 1 + ENDIF + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGL + ILAST = JGL + IF(ITOT+ICOST(JGL) < ICOMP) THEN + ITOT = ITOT+ICOST(JGL) + ELSEIF(ITOT+ICOST(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = ICOST(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + +ELSE + + ITOT_TOP=0 + ITOT_BOT=0 + IGL_TOP=1 + IGL_BOT=KDGL + DO JA=1,(KPROCA-1)/2+1 + IF( JA /= KPROCA/2+1 )THEN + LLDONE=.TRUE. + DO WHILE ( LLDONE ) + IF( ITOT_TOP+ICOST(IGL_TOP) < KMEDIAP )THEN + KLAST(JA)=IGL_TOP + ITOT_TOP=ITOT_TOP+ICOST(IGL_TOP) + IGL_TOP=IGL_TOP+1 + ELSE + ITOT_TOP=ITOT_TOP-KMEDIAP + LLDONE=.FALSE. + ENDIF + ENDDO + KLAST(KPROCA-JA+1)=IGL_BOT + LLDONE=.TRUE. + DO WHILE ( LLDONE ) + IF( ITOT_BOT+ICOST(IGL_BOT) < KMEDIAP )THEN + ITOT_BOT=ITOT_BOT+ICOST(IGL_BOT) + IGL_BOT=IGL_BOT-1 + ELSE + ITOT_BOT=ITOT_BOT-KMEDIAP + LLDONE=.FALSE. + ENDIF + ENDDO + ELSE + KLAST(JA)=IGL_BOT + ENDIF + ENDDO + + LLSIMPLE=.FALSE. + DO JA=1,KPROCA + IF( KLAST(JA)==0 )THEN + LLSIMPLE=.TRUE. + EXIT + ENDIF + ENDDO + IF( LLSIMPLE )THEN +! WRITE(0,'("SUMPLATB_MOD: REVERTING TO SIMPLE LATITUDE DISTRIBUTION")') + ILATS(:)=0 + IA=0 + DO JGL=1,KDGL + IA=IA+1 + ILATS(IA)=ILATS(IA)+1 + IF( IA==KPROCA ) IA=0 + ENDDO + KLAST(1)=ILATS(1) + DO JA=2,KPROCA + KLAST(JA)=KLAST(JA-1)+ILATS(JA) + ENDDO + ENDIF + +ENDIF + +END SUBROUTINE SUMPLATB +END MODULE SUMPLATB_MOD diff --git a/src/trans/cpu/internal/sumplatbeq_mod.F90 b/src/trans/cpu/internal/sumplatbeq_mod.F90 new file mode 100644 index 0000000..e9ba66a --- /dev/null +++ b/src/trans/cpu/internal/sumplatbeq_mod.F90 @@ -0,0 +1,289 @@ +! (C) Copyright 2006- ECMWF. +! (C) Copyright 2006- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLATBEQ_MOD +CONTAINS +SUBROUTINE SUMPLATBEQ(KDGSA,KDGL,KPROC,KPROCA,KLOENG,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,KINDIC,KLAST) + +!**** *SUMPLATBEQ * - Routine to initialize parallel environment +! (latitude partitioning for LEQ_REGIONS=T) + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLATBEQ * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! LDEQ_REGIONS -true if eq_regions partitioning +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! PMEDIAP -mean weight per PE if weighted distribution +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! G. Mozdzynski + +! Modifications. +! -------------- +! Original : April 2006 +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : MYPROC +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +REAL(KIND=JPRB),INTENT(OUT) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) + +! * LOCAL: + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: ICOMP, IGL, IMAXI, IMEDIA, IMEDIAP, ITOT, JA, JB, IA, JGL,& + &ILAST,IREST,IPE,I2REGIONS,IGP +REAL(KIND=JPRB) :: ZMEDIA, ZCOMP +LOGICAL :: LLDONE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- +100 CONTINUE +! * Computation of KMEDIAP and KRESTM. + +IF (.NOT.LDWEIGHTED_DISTR) THEN + + IMEDIA = SUM(KLOENG(KDGSA:KDGL)) + KMEDIAP = IMEDIA / KPROC + + IF( KPROC > 1 )THEN +! test if KMEDIAP is too small and no more than 2 asets would be required +! for the first latitude + IF( LDSPLIT )THEN + I2REGIONS=N_REGIONS(1)+N_REGIONS(2) + IF( KMEDIAP < (KLOENG(KDGSA)-1)/I2REGIONS+1 )THEN + WRITE(0,'("SUMPLATBEQ: KMEDIAP=",I6," I2REGIONS=",I3," KLOENG(KDGSA)=",I4)')& + &KMEDIAP,I2REGIONS,KLOENG(KDGSA) + CALL ABORT_TRANS ('SUMPLATBEQ: NPROC TOO BIG FOR THIS RESOLUTION, LDSPLIT=T') + ENDIF + ELSE +! test for number asets too large for the number of latitudes + IF( KPROCA > KDGL )THEN + WRITE(0,'("SUMPLATBEQ: KMEDIAP=",I6," KPROCA=",I4," KDGL=",I4)')& + &KMEDIAP,KPROCA,KDGL + CALL ABORT_TRANS ('SUMPLATBEQ: NPROC TOO BIG FOR THIS RESOLUTION, LDSPLIT=F') + ENDIF + ENDIF + ENDIF + + KRESTM = IMEDIA - KMEDIAP * KPROC + IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +ELSE + + ZMEDIA = SUM(PWEIGHT(:)) + PMEDIAP = ZMEDIA / KPROC + +ENDIF + +! * Computation of intermediate quantities KINDIC and KLAST + +IF (LDSPLIT) THEN + + KPROCAGP(:)=0 + IREST = 0 + ILAST =0 + IPE=0 + ZCOMP=0 + IGP=0 + DO JA=1,KPROCA + ICOMP=0 + DO JB=1,N_REGIONS(JA) + IF( LDWEIGHTED_DISTR )THEN + DO WHILE ( ( JA == KPROCA .OR. ZCOMP < PMEDIAP ) .AND. IGP < SIZE(PWEIGHT) ) + IGP = IGP + 1 + ICOMP = ICOMP + 1 + ZCOMP = ZCOMP + PWEIGHT(IGP) + ENDDO + ZCOMP = ZCOMP - PMEDIAP + ELSE + IPE=IPE+1 + IF (IPE <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = ICOMP + KMEDIAP + ELSE + ICOMP = ICOMP + (KMEDIAP-1) + ENDIF + ENDIF + ENDDO + KPROCAGP(JA)=ICOMP + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGL + ILAST = JGL + IF(ITOT+KLOENG(JGL) < ICOMP) THEN + ITOT = ITOT+KLOENG(JGL) + ELSEIF(ITOT+KLOENG(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = KLOENG(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + IF( LDWEIGHTED_DISTR )THEN + IF( KLAST(KPROCA) /= KDGL )THEN + DO JA=1,KPROCA + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ_MOD: JA=",I3," KLAST=",I3," KINDIC=",I3)')& + &JA,KLAST(JA),KINDIC(JA) + ENDIF + ENDDO + WRITE(0,'("SUMPLATBEQ: LWEIGHTED_DISTR=T FAILED TO PARTITION GRID, REVERTING TO ",& + & " LWEIGHTED_DISTR=F PARTITIONING")') + LDWEIGHTED_DISTR=.FALSE. + GOTO 100 + ENDIF + ENDIF + IF( SUM(KPROCAGP(:)) /= SUM(KLOENG(KDGSA:KDGL)) )THEN + IF( MYPROC == 1 )THEN + WRITE(0,'("SUM(KPROCAGP(:))=",I12)')SUM(KPROCAGP(:)) + WRITE(0,'("SUM(KLOENG(:))=",I12)')SUM(KLOENG(KDGSA:KDGL)) + ENDIF + CALL ABORT_TRANS ('SUMPLATBEQ: PROBLEM IN PARTITIONING ') + ENDIF + +ELSE + + IF( LDWEIGHTED_DISTR )THEN + CALL ABORT_TRANS ('SUMPLATBEQ: LSPLIT=F NOT SUPPORTED FOR WEIGHTED DISTRIBUTION ') + ENDIF + + KINDIC(:) = 0 + LLDONE=.FALSE. + IMEDIAP=KMEDIAP + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: IMEDIAP=",I6)')IMEDIAP + ENDIF + DO WHILE(.NOT.LLDONE) +! loop until a satisfactory distribution can be found + IA=1 + IMAXI=IMEDIAP*N_REGIONS(IA) + DO JGL=1,KDGL + KLAST(IA)=JGL + IMAXI=IMAXI-KLOENG(JGL) + IF( IA == KPROCA .AND. JGL == KDGL )THEN + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: EXIT 1")') + ENDIF + EXIT + ENDIF + IF( IA == KPROCA .AND. JGL < KDGL )THEN + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: EXIT 2")') + ENDIF + KLAST(KPROCA)=KDGL + EXIT + ENDIF + IF( IA < KPROCA .AND. JGL == KDGL )THEN + DO JA=KPROCA,IA+1,-1 + KLAST(JA)=KDGL+JA-KPROCA + ENDDO + DO JA=KPROCA,2,-1 + IF( KLAST(JA) <= KLAST(JA-1) )THEN + KLAST(JA-1)=KLAST(JA)-1 + ENDIF + ENDDO + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: EXIT 3")') + ENDIF + EXIT + ENDIF + IF( IMAXI <= 0 )THEN + IA=IA+1 + IMAXI=IMAXI+IMEDIAP*N_REGIONS(IA) + ENDIF + ENDDO + IF( KPROCA > 1 .AND. KLAST(KPROCA) == KLAST(KPROCA-1) )THEN + IMEDIAP=IMEDIAP-1 + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: REDUCING IMEDIAP=",I6)')IMEDIAP + ENDIF + IF( IMEDIAP <= 0 )THEN + CALL ABORT_TRANS ('SUMPLATBEQ: PROBLEM PARTITIONING WITH LSPLIT=F, IMEDIAP <= 0') + ENDIF + ELSE + LLDONE=.TRUE. + ENDIF + ENDDO +ENDIF + +END SUBROUTINE SUMPLATBEQ +END MODULE SUMPLATBEQ_MOD diff --git a/src/trans/cpu/internal/sumplatf_mod.F90 b/src/trans/cpu/internal/sumplatf_mod.F90 new file mode 100644 index 0000000..1b4f1fd --- /dev/null +++ b/src/trans/cpu/internal/sumplatf_mod.F90 @@ -0,0 +1,150 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLATF_MOD +CONTAINS +SUBROUTINE SUMPLATF(KDGL,KPROCA,KMYSETA,& + &KULTPP,KPROCL,KPTRLS) + +!**** *SUMPLATF * - Initialize fourier space distibution in N-S direction + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLATF * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction + +! Explicit arguments - output: +! -------------------- + +! KULTPP -number of latitudes in process +! (in Fourier space) +! KPROCL -process responsible for latitude +! (in Fourier space) +! KPTRLS -pointer to first global latitude +! of process (in Fourier space) + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEOMETRY ,ONLY : G + +USE SUMPLATB_MOD ,ONLY : SUMPLATB +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +INTEGER(KIND=JPIM),INTENT(OUT) :: KULTPP(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCL(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLS(:) + +! * LOCAL: +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IA, ILAT, ISTART, IMEDIAP,IRESTM, JA, JLTLOC + +LOGICAL :: LLSPLIT,LLFOURIER + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- + +LLSPLIT = .FALSE. +LLFOURIER = .TRUE. + +CALL SUMPLATB(1,KDGL,KPROCA,G%NLOEN,LLSPLIT,LLFOURIER,& + &IMEDIAP,IRESTM,INDIC,ILAST) + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': +! ------------------------------ + + + +! * Definitions related to distribution of latitudes along sets +! ------------ in fourier-space ----------------------------- +ISTART = 0 +KULTPP(1) = ILAST(1) +DO JA=1,KPROCA + IF(JA > 1) THEN + IF(ILAST(JA) /= 0) THEN + KULTPP(JA) = ILAST(JA)-ILAST(JA-1) + ELSE + KULTPP(JA) = 0 + ENDIF + ENDIF + DO JLTLOC=1,KULTPP(JA) + ILAT = ISTART + JLTLOC + KPROCL(ILAT) = JA + ENDDO + ISTART = ISTART + KULTPP(JA) +ENDDO + +! * Computes KPTRLS. + +IA = KPROCL(1) +KPTRLS(IA) = 1 +DO JA=IA+1,KPROCA + KPTRLS(JA) = KPTRLS(JA-1) + KULTPP(JA-1) +ENDDO + +END SUBROUTINE SUMPLATF +END MODULE SUMPLATF_MOD diff --git a/src/trans/cpu/internal/supol_mod.F90 b/src/trans/cpu/internal/supol_mod.F90 new file mode 100644 index 0000000..da7fcbf --- /dev/null +++ b/src/trans/cpu/internal/supol_mod.F90 @@ -0,0 +1,173 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUPOL_MOD +CONTAINS +SUBROUTINE SUPOL(KNSMAX,PDDMU,PFN,PDDPOL) + +!**** *SUPOL * - Routine to compute the Legendre polynomials + +! Purpose. +! -------- +! For a given value of mu, computes the Legendre polynomials. + +!** Interface. +! ---------- +! *CALL* *SUPOL(...) + +! Explicit arguments : +! -------------------- +! KNSMAX : Truncation (triangular) [in] +! PDDMU : Abscissa at which the polynomials are computed (mu) [in] +! PFN : Fourier coefficients of series expansion +! for the ordinary Legendre polynomials [in] +! PDDPOL : Polynomials (the first index is m and the second n) [out] + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- +! See documentation about spectral transforms +! (doc (IDTS) by K. Yessad, appendix 3, or doc (NTA30) by M. Rochas) + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! K. YESSAD (MAY 1998): modification to avoid underflow. +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision +! on NEC +! K. YESSAD (NOV 2008): make consistent arp/SUPOLA and tfl/SUPOL. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! R. El Khatib 30-Apr-2013 Open-MP parallelization +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE TPM_POL + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX +REAL(KIND=JPRD) ,INTENT(IN) :: PDDMU +REAL(KIND=JPRD) ,INTENT(IN) :: PFN(0:KNSMAX,0:KNSMAX) + +REAL(KIND=JPRD) ,INTENT(OUT) :: PDDPOL(0:KNSMAX,0:KNSMAX) + +REAL(KIND=JPRD) :: ZDLX,ZDLX1,ZDLSITA,ZDL1SITA,ZDLS,ZDLK,ZDLLDN + +INTEGER(KIND=JPIM) :: JM, JN, JK +REAL(KIND=JPRD) :: Z +REAL(KIND=JPRD) :: DCL, DDL + +! ------------------------------------------------------------------ + +!* 1. First two columns. +! ------------------ + +ZDLX=PDDMU +ZDLX1=ACOS(ZDLX) +ZDLSITA=SQRT(1.0_JPRD-ZDLX*ZDLX) + +PDDPOL(0,0)=1._JPRD +ZDLLDN = 0.0_JPRD + +! IF WE ARE LESS THAN 1Meter FROM THE POLE, +IF(ABS(REAL(ZDLSITA,KIND(Z))) <= SQRT(EPSILON(Z)))THEN + ZDLX=1._JPRD + ZDLSITA=0._JPRD + ZDL1SITA=0._JPRD +ELSE + ZDL1SITA=1.0_JPRD/ZDLSITA +ENDIF + +!* ordinary Legendre polynomials from series expansion +! --------------------------------------------------- + +! even N +!$OMP PARALLEL DO PRIVATE(JN,ZDLK,ZDLLDN,JK) +DO JN=2,KNSMAX,2 + ZDLK = 0.5_JPRD*PFN(JN,0) + ZDLLDN = 0.0_JPRD + ! represented by only even k + DO JK=2,JN,2 + ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 + ZDLK = ZDLK + PFN(JN,JK)*COS(DDI(JK)*ZDLX1) + ! normalised associated Legendre polynomial == \overbar{P_n}^1 + ZDLLDN = ZDLLDN + DDA(JN)*PFN(JN,JK)*DDI(JK)*SIN(DDI(JK)*ZDLX1) + ENDDO + PDDPOL(0,JN) = ZDLK + PDDPOL(1,JN) = ZDLLDN +ENDDO +!$OMP END PARALLEL DO +! odd N +!$OMP PARALLEL DO PRIVATE(JN,ZDLK,ZDLLDN,JK) +DO JN=1,KNSMAX,2 + ZDLK = 0.0_JPRD + ZDLLDN = 0.0_JPRD + ! represented by only odd k + DO JK=1,JN,2 + ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 + ZDLK = ZDLK + PFN(JN,JK)*COS(DDI(JK)*ZDLX1) + ! normalised associated Legendre polynomial == \overbar{P_n}^1 + ZDLLDN = ZDLLDN + DDA(JN)*PFN(JN,JK)*DDI(JK)*SIN(DDI(JK)*ZDLX1) + ENDDO + PDDPOL(0,JN) = ZDLK + PDDPOL(1,JN) = ZDLLDN +ENDDO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ + +!* 2. Diagonal (the terms 0,0 and 1,1 have already been computed) +! Belousov, equation (23) +! ----------------------------------------------------------- + +ZDLS=ZDL1SITA*TINY(ZDLS) + +#ifdef VPP +!OCL SCALAR +#endif +DO JN=2,KNSMAX + PDDPOL(JN,JN)=PDDPOL(JN-1,JN-1)*ZDLSITA*DDH(JN) + IF ( ABS(PDDPOL(JN,JN)) < ZDLS ) PDDPOL(JN,JN)=0.0_JPRD +ENDDO + +! ------------------------------------------------------------------ + +!* 3. General recurrence (Belousov, equation 17) +! ----------------------------------------- + +DO JN=3,KNSMAX +!DIR$ IVDEP +!OCL NOVREC + DO JM=2,JN-1 + PDDPOL(JM,JN)=DDC(JM,JN)*PDDPOL(JM-2,JN-2)& + &-DDD(JM,JN)*PDDPOL(JM-2,JN-1)*ZDLX & + &+DDE(JM,JN)*PDDPOL(JM ,JN-1)*ZDLX + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE SUPOL +END MODULE SUPOL_MOD diff --git a/src/trans/cpu/internal/supolf_mod.F90 b/src/trans/cpu/internal/supolf_mod.F90 new file mode 100644 index 0000000..06d599d --- /dev/null +++ b/src/trans/cpu/internal/supolf_mod.F90 @@ -0,0 +1,284 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUPOLF_MOD +CONTAINS +SUBROUTINE SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) + +!**** *SUPOL * - Routine to compute the Legendre polynomials + +! Purpose. +! -------- +! For a given value of mu and M, computes the Legendre +! polynomials upto KNSMAX + +!** Interface. +! ---------- +! *CALL* *SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) + +! Explicit arguments : +! -------------------- +! KM : zonal wavenumber M +! KNSMAX : Truncation (triangular) +! DDMU : Abscissa at which the polynomials are computed (mu) +! DDPOL : Polynomials (the first index is m and the second n) +! KCHEAP : odd/even saving switch + + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + George Mozdzynski + Mats Hamrud + +! Modifications. +! -------------- +! Original : 87-10-15 +! K. YESSAD (MAY 1998): modification to avoid underflow. +! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision +! on NEC +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +USE TPM_POL + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX +REAL(KIND=JPRD) ,INTENT(IN) :: DDMU +REAL(KIND=JPRD) ,INTENT(OUT) :: DDPOL(0:KNSMAX) + +INTEGER(KIND=JPIM),INTENT(IN), OPTIONAL :: KCHEAP + +REAL(KIND=JPRD) :: DLX,DLX1,DLSITA,DLSITA2,DL1SITA,DLK,DL1, DLKM1, DLKM2 + +INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(DLX) + +INTEGER(KIND=JPIM) :: JN, KKL, ICHEAP, IC, IEND +REAL(KIND=JPRD) :: DCL, DDL + +REAL(KIND=JPRD) :: ZFAC, ZLSITA, ZFAC0, ZFAC1, ZMULT, ZEPS + +INTEGER(KIND=JPIM) :: JCORR, ICORR3, ICORR(KNSMAX) +REAL(KIND=JPRD) :: ZSCALE, ZISCALE + +DCL(KKL)=SQRT((REAL(KKL-KM+1,JPKD)*REAL(KKL-KM+2,JPKD)* & + & REAL(KKL+KM+1,JPKD)*REAL(KKL+KM+2,JPKD))/(REAL(2*KKL+1,JPKD)*REAL(2*KKL+3,JPKD)*& + & REAL(2*KKL+3,JPKD)*REAL(2*KKL+5,JPKD))) +DDL(KKL)=(2.0_JPKD*REAL(KKL,JPKD)*REAL(KKL+1,JPKD)-2.0_JPKD*REAL(KM**2,JPKD)-1.0_JPKD)/ & + & (REAL(2*KKL-1,JPKD)*REAL(2*KKL+3,JPKD)) + +! ------------------------------------------------------------------ + +!* 1. First two columns. +! ------------------ + +ZEPS = EPSILON(ZSCALE) +ICORR3=0 + +ICHEAP=1 +IF( PRESENT(KCHEAP) ) THEN + ICHEAP = KCHEAP +ENDIF + +DLX=DDMU +DLX1=ACOS(DLX) +DLSITA2=1.0_JPRD-DLX*DLX +DLSITA=SQRT(DLSITA2) + +!* ordinary Legendre polynomials from series expansion +! --------------------------------------------------- + +! this is supol_fast just using single KM +IF( ABS(REAL(DLSITA,JPRD)) <= ZEPS ) THEN + DLX=1._JPRD + DLSITA=0._JPRD + DL1SITA=0._JPRD + DLSITA2=0._JPRD +ELSE + DL1SITA=1.0_JPRD/DLSITA +ENDIF + +DLKM2=1._JPRD +DLKM1=DLX + +IF( KM == 0 ) THEN + DDPOL(0)=DLKM2 + DDPOL(1)=DLKM1*DFB(1)/DFA(1) + DO JN=2,KNSMAX + DLK=DFF(JN)*DLX*DLKM1-DFG(JN)*DLKM2 + DL1=DFI(JN)*(DLKM1-DLX*DLK)*DL1SITA + DDPOL(JN)=DLK*DFB(JN)/DFA(JN) + DLKM2=DLKM1 + DLKM1=DLK + ENDDO +ELSEIF( KM == 1 ) THEN + DDPOL(0)=0 + DDPOL(1)=DLSITA*DFB(1) + DO JN=2,KNSMAX + DLK=DFF(JN)*DLX*DLKM1-DFG(JN)*DLKM2 + DL1=DFI(JN)*(DLKM1-DLX*DLK)*DL1SITA + DDPOL(JN)=DL1*DFB(JN) + DLKM2=DLKM1 + DLKM1=DLK + ENDDO +ELSE + +! ------------------------------------------------------------------ +!* KM >= 2 +! ------------------------------------------------------------------ + +! ZSCALE=1._JPRD/ZEPS + ! Maintaining the consistency with the CY41R1 reference + ZSCALE=1.0E+100_JPRD + ZISCALE=1.0E-100_JPRD + ! General case + !ZSCALE = 10._JPRD**( MAXEXPONENT(ZSCALE)/10) + !ZISCALE = 10._JPRD**(-MAXEXPONENT(ZSCALE)/10) + + IEND=KM/2 + ZLSITA=1._JPRD +! WRITE(*,*) 'SUPOLF: DLSITA2=',DLSITA2,' DDMU=',DDMU,' DLX=',DLX + DO JN=1,IEND + ZLSITA=ZLSITA*DLSITA2 + IF( ABS(ZLSITA) < ZISCALE ) THEN + ZLSITA=ZLSITA*ZSCALE + ICORR3=ICORR3+1 + ENDIF + ENDDO + IF( MOD(KM,2) == 1 ) ZLSITA=ZLSITA*DLSITA +! WRITE(*,*) 'SUPOLF: ZSCALE=',ZSCALE,' ICORR3=',ICORR3,' KM=',KM,' ZLSITA=',ZLSITA + + ZFAC0=1._JPRD + ZFAC=1._JPRD + DO JN=1,KM-1 + ZFAC=ZFAC*SQRT(REAL(2*JN-1,JPRD)) + ZFAC=ZFAC/SQRT(REAL(2*JN,JPRD)) + ENDDO + ZFAC=ZFAC*SQRT(REAL(2*KM-1,JPRD)) +! WRITE(*,*) 'SUPOLF: ZSCALE=',ZSCALE,' ICORR3=',ICORR3,' ZFAC=',ZFAC + + ZFAC1=1._JPRD + DO IC=0,MIN(KNSMAX-KM,3) + + ! (2m+i)! + ZFAC0 = ZFAC0 * REAL(2*KM+IC,JPRD) + + SELECT CASE (IC) + CASE (0) + ZMULT=ZFAC + CASE (1) + ZFAC=ZFAC*REAL(2*KM+IC,JPRD) + ZMULT=ZFAC*DLX + CASE (2) + ZMULT=0.5_JPRD*ZFAC*(REAL(2*KM+3,JPRD)*DLX*DLX-1._JPRD) + CASE (3) + ZFAC=ZFAC*REAL(2*KM+IC,JPRD) + ZMULT=(1._JPRD/6._JPRD)*DLX*ZFAC*(REAL(2*KM+5,JPRD)*DLX*DLX-3._JPRD) + END SELECT + + DDPOL(KM+IC) = ZLSITA*ZMULT*SQRT(2._JPRD*(REAL(KM+IC,JPRD)+0.5_JPRD)*ZFAC1/ZFAC0) + + ZFAC1=ZFAC1*REAL(IC+1,JPRD) + + ENDDO + + ICORR(:)=ICORR3 + IF( ICHEAP == 2 ) THEN + ! symmetric case + DO JN=KM+2,KNSMAX-2,2 + + IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN + DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE + DDPOL(JN)=DDPOL(JN)/ZSCALE + ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 + ENDIF + + DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) + ENDDO + + DO JN=KM,KNSMAX,2 + DO JCORR=1,ICORR(JN) + DDPOL(JN)=DDPOL(JN)/ZSCALE + IF( DDPOL(JN) < ZEPS ) THEN + DDPOL(JN) = ZEPS + ENDIF + ENDDO + ENDDO + + ELSEIF( ICHEAP == 3 ) THEN + ! antisymmetric case + DO JN=KM+3,KNSMAX-2,2 + + IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN + DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE + DDPOL(JN)=DDPOL(JN)/ZSCALE + ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 + ENDIF + + DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) + ENDDO + + DO JN=KM+1,KNSMAX,2 + DO JCORR=1,ICORR(JN) + DDPOL(JN)=DDPOL(JN)/ZSCALE + IF( DDPOL(JN) < ZEPS ) THEN + DDPOL(JN) = ZEPS + ENDIF + ENDDO + ENDDO + + ELSE + DO JN=KM+2,KNSMAX-2 + + IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN + DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE + DDPOL(JN-1)=DDPOL(JN-1)/ZSCALE + DDPOL(JN)=DDPOL(JN)/ZSCALE + DDPOL(JN+1)=DDPOL(JN+1)/ZSCALE + ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 + ENDIF + + DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) + + ENDDO + + DO JN=KM,KNSMAX + DO JCORR=1,ICORR(JN) + DDPOL(JN)=DDPOL(JN)/ZSCALE + IF( DDPOL(JN) < ZEPS ) THEN + DDPOL(JN) = ZEPS + ENDIF + ENDDO + ENDDO + + ENDIF + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUPOLF +END MODULE SUPOLF_MOD diff --git a/src/trans/cpu/internal/sustaonl_mod.F90 b/src/trans/cpu/internal/sustaonl_mod.F90 new file mode 100644 index 0000000..57f522e --- /dev/null +++ b/src/trans/cpu/internal/sustaonl_mod.F90 @@ -0,0 +1,457 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUSTAONL_MOD +CONTAINS +SUBROUTINE SUSTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) + +!**** *SUSTAONL * - Routine to initialize parallel environment + +! Purpose. +! -------- +! Initialize D%NSTA and D%NONL. +! Calculation of distribution of grid points to processors : +! Splitting of grid in B direction + +!** Interface. +! ---------- +! *CALL* *SUSTAONL * + +! Explicit arguments : +! -------------------- +! KMEDIAP - mean number of grid points per PE +! KRESTM - number of PEs with one extra point +! LDWEIGHTED_DISTR -true if weighted distribution +! PWEIGHT -weight per grid-point if weighted distribution +! PMEDIAP -mean weight per PE if weighted distribution +! KPROCAGP -number of grid points per A set + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) +! R. El Khatib 05-Apr-2007 Enable back vectorization on NEC +! R. El Khatib 30-Apr-2013 Optimization +! R. El Khatib 26-Apr-2018 vectorization +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC + +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +! DUMMY +INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM +REAL(KIND=JPRB),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR +REAL(KIND=JPRB),INTENT(IN) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) + +! LOCAL + +INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL),ISENDREQ(NPROC) +INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) +INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, IDWIDE,& + &IGL, IGL1, IGL2, IGLOFF, IGPTA, & + &IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & + &ILSEND, INPLAT, INXLAT, IPOS, & + &IPROCB, IPTSRE, IRECV, IPE, & + &IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & + &ILAT, ILON, ILOEN +INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWEIGHT(:,:) +INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 + +LOGICAL :: LLABORT +LOGICAL :: LLP1,LLP2 + +REAL(KIND=JPRB) :: ZDIVID(R%NDGL) +REAL(KIND=JPRB) :: ZCOMP,ZPI,ZLON +INTEGER(KIND=JPIM) :: ILATMD,ILATMD1 + +! ----------------------------------------------------------------- + +ZPI = 2.0_JPRB*ASIN(1.0_JPRB) + +IXPTLAT (:)=999999 +ILSTPTLAT(:)=999999 + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IDWIDE = R%NDGL/2 +IBUFLEN = R%NDGL*N_REGIONS_EW*2 +IDGLG = R%NDGL + +I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) +I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) + +ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 + +IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) + +IF (D%LSPLIT) THEN + IF( LEQ_REGIONS )THEN + IGPTA=0 + DO JA=1,MY_REGION_NS-1 + IGPTA = IGPTA + KPROCAGP(JA) + ENDDO + IGPTS = KPROCAGP(MY_REGION_NS) + ELSE + IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN + IGPTS = KMEDIAP + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ELSE + IGPTS = KMEDIAP-1 + IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) + ENDIF + ENDIF +ELSE + IGPTA = IGPTPRSETS + IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) +ENDIF + +IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) +IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP +IXPTLAT(1) = IGPTA-IGPTPRSETS+1 +ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) +INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 +DO JGL=2,ILEN + IXPTLAT(JGL) = 1 + ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) + INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) +ENDDO +ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS + +DO JB=1,N_REGIONS_EW + DO JGL=1,R%NDGL+N_REGIONS_NS-1 + D%NSTA(JGL,JB) = 0 + D%NONL(JGL,JB) = 0 + ENDDO +ENDDO + + +! grid point decomposition +! --------------------------------------- +IF( NPROC > 1 )THEN + DO JGL=1,ILEN + ZDIVID(JGL) = 360000.0_JPRB/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRB) + ENDDO + IF( LDWEIGHTED_DISTR )THEN + ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) + IGL=0 + DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IGL=IGL+1 + ZWEIGHT(JL,JGL)=PWEIGHT(IGL) + ENDDO + ENDDO + ZCOMP=0 + IGPTS=0 + ENDIF + + DO JB=1,N_REGIONS(MY_REGION_NS) + + IF( .NOT.LDWEIGHTED_DISTR )THEN + + IF (JB <= IREST) THEN + IPTSRE = IGPTSP+1 + ELSE + IPTSRE = IGPTSP + ENDIF + DO JNPTSRE=1,IPTSRE + + ILATMD = 360000 + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + ILATMD1 = NINT(REAL(IXPTLAT(JGL)-1,JPRB)*ZDIVID(JGL)) + IF(ILATMD1 < ILATMD) THEN + ILATMD = ILATMD1 + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ENDDO + + ELSE + + DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & + & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) + + IGPTS = IGPTS + 1 + ILATMD = 360000 + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + ILATMD1 = NINT(REAL(IXPTLAT(JGL)-1,JPRB)*ZDIVID(JGL)) + IF(ILATMD1 < ILATMD) THEN + ILATMD = ILATMD1 + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN + CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') + ENDIF + ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 + ILOEN=G%NLOEN(ILAT) + IF(ILON<1.OR.ILON>ILOEN)THEN + CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') + ENDIF + ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ENDDO + + ZCOMP = ZCOMP - PMEDIAP + + ENDIF + + ENDDO + + IF( LDWEIGHTED_DISTR )THEN + DEALLOCATE(ZWEIGHT) + ENDIF + + ! Exchange local partitioning info to produce global view + ! + + CALL GSTATS_BARRIER(795) + CALL GSTATS(814,0) + IF( LEQ_REGIONS )THEN + + ITAG = MTAGPART + IPOS = 0 + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + ILENG(NPRCIDS(IRECV))=ILEN + ENDDO + ENDDO + IOFF(1)=0 + DO JJ=2,NPROC + IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) + ENDDO + ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) + CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') + DO JA=1,N_REGIONS_NS + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + IPOS = IOFF(NPRCIDS(IRECV)) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUFG(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUFG(IPOS) + ENDDO + ENDIF + ENDDO + ENDDO + DEALLOCATE(ICOMBUFG) + + ELSE + + ITAG = MTAGPART + IPOS = 0 + DO JB=1,N_REGIONS(MY_REGION_NS) + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) + ENDDO + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + DO JA=1,N_REGIONS_NS + CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) + IF(ISEND /= MYPROC) THEN + CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & + & CDSTRING='SUSTAONL:') + ENDIF + ENDDO + + DO JA=1,N_REGIONS_NS + CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 + CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & + & KOUNT=ILRECV,CDSTRING='SUSTAONL:') + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IPOS = 0 + DO JB=1,N_REGIONS(JA) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUF(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUF(IPOS) + ENDDO + ENDDO + ENDIF + ENDDO + + ENDIF + CALL GSTATS(814,1) + CALL GSTATS_BARRIER2(795) +ELSE + DO JGL=1,R%NDGL + D%NSTA(JGL,1) = 1 + D%NONL(JGL,1) = G%NLOEN(JGL) + ENDDO +ENDIF + +! Confirm consistency of global partitioning, specifically testing for +! multiple assignments of same grid point and unassigned grid points + +LLABORT = .FALSE. +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + ICHK(JL,JGL) = 1 + ENDDO +ENDDO +DO JA=1,N_REGIONS_NS + IGLOFF = D%NPTRFRSTLAT(JA) + DO JB=1,N_REGIONS(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL = IGLOFF+JGL-IGL1 + DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + IF( ICHK(JL,JGL) /= 1 )THEN + WRITE(NOUT,'(" SUSTAONL : seta=",i4," setb=",i4,& + &" row=",I4," sta=",I4," INVALID GRID POINT")')& + &JA,JB,JGL,JL + WRITE(0,'(" SUSTAONL : seta=",i4," setb=",i4,& + &" ROW=",I4," sta=",I4," INVALID GRID POINT")')& + &JA,JB,JGL,JL + LLABORT = .TRUE. + ENDIF + ICHK(JL,JGL) = 2 + ENDDO + ENDDO + ENDDO +ENDDO +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IF( ICHK(JL,JGL) /= 2 )THEN + WRITE(NOUT,'(" SUSTAONL : row=",i4," sta=",i4,& + &" GRID POINT NOT ASSIGNED")') JGL,JL + LLABORT = .TRUE. + ENDIF + ENDDO +ENDDO +IF( LLABORT )THEN + WRITE(NOUT,'(" SUSTAONL : inconsistent partitioning")') + CALL ABORT_TRANS(' SUSTAONL: inconsistent partitioning') +ENDIF + + +IF (LLP1) THEN + WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUSTAONL '')') + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') + WRITE(UNIT=NOUT,FMT='('' '')') + IPROCB = MIN(32,N_REGIONS_EW) + WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I5))') (JB,JB=1,IPROCB) + DO JA=1,N_REGIONS_NS + IPROCB = MIN(32,N_REGIONS(JA)) + WRITE(UNIT=NOUT,FMT='('' '')') + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL=IGLOFF+JGL-IGL1 + WRITE(UNIT=NOUT,FMT='(" SETA=",I5," LAT=",I5," NSTA=",& + &32(1X,I5))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='(" SETA=",I5," LAT=",I5," D%NONL=",& + &32(1X,I5))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' '')') +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUSTAONL +END MODULE SUSTAONL_MOD diff --git a/src/trans/cpu/internal/sutrle_mod.F90 b/src/trans/cpu/internal/sutrle_mod.F90 new file mode 100644 index 0000000..89a315b --- /dev/null +++ b/src/trans/cpu/internal/sutrle_mod.F90 @@ -0,0 +1,366 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUTRLE_MOD +CONTAINS +SUBROUTINE SUTRLE(PNM,KGL,KLOOP) + +!**** *sutrle * - transposition of Legendre polynomials during set-up + +! Purpose. +! -------- +! transposition of Legendre polynomials during set-up + +!** Interface. +! ---------- +! *call* *sutrle(pnm) + +! Explicit arguments : +! -------------------- + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! P.Towers : 10-01-12 Corrected over allocation of ZSNDBUF (XT4 fix) +! G.Mozdzynski: March 2011 Support 2D (RW,RV) initialisation of legendre coeffs +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE MPL_MODULE ,ONLY : MPL_ALLREDUCE, MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MTAGLETR, NCOMBFLEN, NPRCIDS, NPRTRW, NPRTRV, & + & MYSETV, MYSETW, NPROC +USE TPM_FIELDS ,ONLY : F +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +REAL(KIND=JPRD),INTENT(IN) :: PNM(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KGL +INTEGER(KIND=JPIM),INTENT(IN) :: KLOOP + +! LOCAL + +REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFV(:),ZRCVBUFV(:,:) +REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFW(:,:),ZRCVBUFW(:,:) +INTEGER(KIND=JPIM) :: ILREC, IM, IPOS, & + & IRECVSET, IRECV, ISEND, ISENDSET, ITAG,ISENDSIZE, IRECVSIZE, & + & J, JM, JMLOC, JN, JV, JROC ,IOFFT, IOFFG, IGL, ISREQ, IRREQ +INTEGER(KIND=JPIM) :: ISENDREQ(MAX(NPRTRW,NPRTRV)) +INTEGER(KIND=JPIM) :: IRECVREQ(MAX(NPRTRW,NPRTRV)) +INTEGER(KIND=JPIM) :: IGLVS(NPRTRV) +INTEGER(KIND=JPIM) :: IGLVR(NPRTRV) +INTEGER(KIND=JPIM) :: IPOSW(NPRTRW) + +! ------------------------------------------------------------------ + +!* 0. Some initializations. +! --------------------- + +ITAG = MTAGLETR+KLOOP + +! Perform barrier synchronisation to guarantee all processors have +! completed all previous communication + +IF( NPROC > 1 .AND. KLOOP ==1)THEN + CALL GSTATS(783,0) + CALL MPL_BARRIER(CDSTRING='SUTRLE:') + CALL GSTATS(783,1) +ENDIF + +! +! First do communications in NPRTRV direction +! + +!* Calculate send buffer size + +IF(KGL > 0) THEN + ISENDSIZE = R%NSPOLEG+1 +ELSE + ISENDSIZE=1 +ENDIF + +ALLOCATE (ZSNDBUFV(ISENDSIZE)) +ALLOCATE (ZRCVBUFV(R%NSPOLEG+1,NPRTRV)) + +!* copy data to be sent into zsndbufv + +ZSNDBUFV(1) = KGL +IF(KGL > 0) THEN + CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(J) + DO J=1,R%NSPOLEG + ZSNDBUFV(J+1) = PNM(J) + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1141,1) +ENDIF + +ISREQ = 0 +DO JROC=1,NPRTRV-1 + ISEND = MYSETV-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRV + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,MYSETW,ISENDSET) + ISREQ = ISREQ+1 + CALL GSTATS(801,0) + CALL MPL_SEND(ZSNDBUFV(1:ISENDSIZE),KDEST=NPRCIDS(ISEND), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + + +IRREQ=0 +DO JROC=1,NPRTRV-1 + IRECV = MYSETV+JROC + IF (IRECV > NPRTRV) IRECV = IRECV-NPRTRV + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,MYSETW,IRECVSET) + IRREQ = IRREQ+1 + CALL GSTATS(801,0) + CALL MPL_RECV(ZRCVBUFV(:,IRECVSET),KSOURCE=NPRCIDS(IRECV), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + +IF(ISREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +IF(IRREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +!* copy data from buffer to f%rpnm +CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,IRECV,IRECVSET,IGL,JMLOC,IM,IOFFT,IOFFG,JN) +DO JROC=1,NPRTRV-1 + IRECV = MYSETV+JROC + IF (IRECV > NPRTRV) IRECV = IRECV-NPRTRV + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,MYSETW,IRECVSET) + IGL = ZRCVBUFV(1,IRECVSET) + IGLVS(IRECVSET)=IGL + IF( IGL > 0 )THEN + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + IOFFT = D%NPMT(IM) + IOFFG = D%NPMG(IM) + DO JN=1,R%NTMAX-IM+2 + F%RPNM(IGL,IOFFT+JN) = ZRCVBUFV(1+IOFFG+JN,IRECVSET) + ENDDO + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO + +DEALLOCATE (ZSNDBUFV) + +!* copy data from pnm to rpnm + +IGLVS(MYSETV)=KGL +IF(KGL > 0) THEN + ZRCVBUFV(1,MYSETV)=KGL + ZRCVBUFV(2:R%NSPOLEG+1,MYSETV)=PNM(1:R%NSPOLEG) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JMLOC,IM,IOFFT,IOFFG,JN) + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + IOFFT = D%NPMT(IM) + IOFFG = D%NPMG(IM) + DO JN=1,R%NTMAX-IM+2 + F%RPNM(KGL,IOFFT+JN) = PNM(IOFFG+JN) + ENDDO + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1141,1) + + +! +! Now do communications in the NPRTRW direction +! + +!* Calculate send buffer size + +ISENDSIZE=0 +DO JROC=1,NPRTRW-1 + ISEND = MYSETW-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRW + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV) + IPOS = 0 + DO JM=0,R%NSMAX + IF (ISENDSET == D%NPROCM(JM) ) IPOS = IPOS + R%NTMAX-JM+2 + ENDDO + ISENDSIZE = MAX(IPOS,ISENDSIZE) +ENDDO +ISENDSIZE=ISENDSIZE*NPRTRV+NPRTRV +IRECVSIZE=ISENDSIZE +IF( NPROC > 1 )THEN + CALL GSTATS(801,0) + CALL MPL_ALLREDUCE(IRECVSIZE,'MAX',CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDIF + +ALLOCATE (ZSNDBUFW(ISENDSIZE,NPRTRW)) +ALLOCATE (ZRCVBUFW(IRECVSIZE,NPRTRW)) + +CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,ISEND,ISENDSET,IPOS,JV,IGL,JM,JN) +DO JROC=1,NPRTRW-1 + ISEND = MYSETW-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRW + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV) +!* copy data to be sent into zsndbufw + IPOS=0 + DO JV=1,NPRTRV + IPOS=IPOS+1 + ZSNDBUFW(IPOS,ISENDSET) = IGLVS(JV) + ENDDO + DO JV=1,NPRTRV + IGL = IGLVS(JV) + IF( IGL > 0 )THEN + DO JM=0,R%NSMAX + IF (ISENDSET == D%NPROCM(JM) ) THEN + DO JN=1,R%NTMAX-JM+2 + IPOS = IPOS + 1 + ZSNDBUFW(IPOS,ISENDSET) = ZRCVBUFV(1+D%NPMG(JM)+JN,JV) + ENDDO + ENDIF + ENDDO + ENDIF + ENDDO + IPOSW(ISENDSET)=IPOS +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1141,1) + +ISREQ = 0 +DO JROC=1,NPRTRW-1 + ISEND = MYSETW-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRW + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV) + ISENDSIZE = IPOSW(ISENDSET) + ISREQ = ISREQ+1 + CALL GSTATS(801,0) + CALL MPL_SEND(ZSNDBUFW(1:ISENDSIZE,ISENDSET),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + + +IRREQ = 0 +DO JROC=1,NPRTRW-1 + + IRECV = MYSETW+JROC + IF (IRECV > NPRTRW) IRECV = IRECV-NPRTRW + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,IRECVSET,MYSETV) +!* receive message (if not empty) + + IRREQ = IRREQ+1 + CALL GSTATS(801,0) + CALL MPL_RECV(ZRCVBUFW(:,IRECVSET),KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + +IF(ISREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +IF(IRREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,IRECV,IRECVSET,IPOS,IGLVR,JV,IGL,JMLOC,IM,IOFFT,JN) +DO JROC=1,NPRTRW-1 + IRECV = MYSETW+JROC + IF (IRECV > NPRTRW) IRECV = IRECV-NPRTRW + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,IRECVSET,MYSETV) +!* copy data from buffer to f%rpnm + IPOS=0 + DO JV=1,NPRTRV + IPOS=IPOS+1 + IGLVR(JV)=ZRCVBUFW(IPOS,IRECVSET) + ENDDO + DO JV=1,NPRTRV + IGL = IGLVR(JV) + IF( IGL > 0 )THEN + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + IOFFT = D%NPMT(IM) + DO JN=1,R%NTMAX-IM+2 + IPOS = IPOS + 1 + F%RPNM(IGL,IOFFT+JN) = ZRCVBUFW(IPOS,IRECVSET) + ENDDO + ENDDO + ENDIF + ENDDO +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1141,1) + +DEALLOCATE (ZRCVBUFV) +DEALLOCATE (ZSNDBUFW) +DEALLOCATE (ZRCVBUFW) + +IF( NPROC > 1 .AND. KLOOP ==1)THEN + CALL GSTATS(783,0) + CALL MPL_BARRIER(CDSTRING='SUTRLE:') + CALL GSTATS(783,1) +ENDIF +END SUBROUTINE SUTRLE +END MODULE SUTRLE_MOD diff --git a/src/trans/cpu/internal/suwavedi_mod.F90 b/src/trans/cpu/internal/suwavedi_mod.F90 new file mode 100644 index 0000000..548ca8c --- /dev/null +++ b/src/trans/cpu/internal/suwavedi_mod.F90 @@ -0,0 +1,183 @@ +! (C) Copyright 1996- ECMWF. +! (C) Copyright 1996- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUWAVEDI_MOD +CONTAINS +SUBROUTINE SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS,& + &KPTRMS,KALLMS,KDIM0G) + +!**** *SUWAVEDI * - Routine to initialize spectral wave distribution + +! Purpose. +! -------- +! Initialize arrays controlling spectral wave distribution + +!** Interface. +! ---------- +! *CALL* *SUWAVEDI * + +! Explicit arguments : +! -------------------- +! KSMAX - Spectral truncation limit (input) +! KTMAX - Overtruncation for KSMAX (input) +! KPRTRW - Number of processors in A-direction (input) +! KMYSETW - A-set for present processor (input) +! KASM0 - Offsets for spectral waves (output) +! KSPOLEGL - Local version of NSPOLEG (output) +! KPROCM - Where a certain spectral wave belongs (output) +! KUMPP - Number of spectral waves on this PE (output) +! KSPEC - Local version on NSPEC (output) +! KSPEC2 - Local version on NSPEC2 (output) +! KSPEC2MX - Maximum KSPEC2 across PEs (output) +! KPOSSP - Global spectral fields partitioning (output) +! KMYMS - This PEs spectral zonal wavenumbers (output) + +! Implicit arguments : NONE +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 96-01-10 +! L.Isaksen: 96-02-02 - Calculation of KSPEC2MX added +! K.YESSAD : 97-02-18 - Add KTMAX, bug correction for KSPOLEGL. +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + + +! DUMMY +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPTRMS(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KALLMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KDIM0G(0:KSMAX) + +! LOCAL +INTEGER(KIND=JPIM) :: IK, IL, IND, IPOS, ISPEC2P, JA, JM,JMLOC,IM +INTEGER(KIND=JPIM) :: ISPOLEGL,ISPEC2MX,IASM0(0:KSMAX),IPROCM(0:KSMAX) +INTEGER(KIND=JPIM) :: IUMPP(KPRTRW),IMYMS(KSMAX+1),IPOSSP(KPRTRW+1) +INTEGER(KIND=JPIM) :: IPTRMS(KPRTRW),IALLMS(KSMAX+1),IDIM0G(0:KSMAX) +INTEGER(KIND=JPIM) :: ISPEC(KPRTRW),IC(KPRTRW) + + +! ----------------------------------------------------------------- + +!* 1. Initialize partitioning of wave numbers to PEs +! ---------------------------------------------- + +ISPEC(:) = 0 + +IUMPP(:) = 0 +IASM0(:) = -99 +ISPOLEGL = 0 + +IL = 1 +IND = 1 +IK = 0 +IPOS = 1 +DO JM=0,KSMAX + IK = IK + IND + IF (IK > KPRTRW) THEN + IK = KPRTRW + IND = -1 + ELSEIF (IK < 1) THEN + IK = 1 + IND = 1 + ENDIF + IPROCM(JM) = IK + ISPEC(IK) = ISPEC(IK)+KSMAX-JM+1 + IUMPP(IK) = IUMPP(IK)+1 + IF (IK == KMYSETW) THEN + ISPOLEGL = ISPOLEGL +KTMAX+1-JM+1 + IMYMS(IL) = JM + IASM0(JM) = IPOS + IPOS = IPOS+(KSMAX-JM+1)*2 + IL = IL+1 + ENDIF +ENDDO + +IPOSSP(1) = 1 +ISPEC2P = 2*ISPEC(1) +ISPEC2MX = ISPEC2P +IPTRMS(1) = 1 +DO JA=2,KPRTRW + IPOSSP(JA) = IPOSSP(JA-1)+ISPEC2P + ISPEC2P = 2*ISPEC(JA) + ISPEC2MX = MAX(ISPEC2MX,ISPEC2P) +! pointer to the first wave number of a given wave-set in NALLMS array + IPTRMS(JA) = IPTRMS(JA-1)+IUMPP(JA-1) +ENDDO +IPOSSP(KPRTRW+1) = IPOSSP(KPRTRW)+ISPEC2P + +! IALLMS : wave numbers for all wave-set concatenated together to give all +! wave numbers in wave-set order. +IC(:) = 0 +DO JM=0,KSMAX + IALLMS(IC(IPROCM(JM))+IPTRMS(IPROCM(JM))) = JM + IC(IPROCM(JM)) = IC(IPROCM(JM))+1 +ENDDO + +IPOS = 1 +DO JA=1,KPRTRW + DO JMLOC=1,IUMPP(JA) + IM = IALLMS(IPTRMS(JA)+JMLOC-1) + IDIM0G(IM) = IPOS + IPOS = IPOS+(KSMAX+1-IM)*2 + ENDDO +ENDDO + +IF(PRESENT(KSPEC)) KSPEC = ISPEC(KMYSETW) +IF(PRESENT(KSPEC2)) KSPEC2 = 2*ISPEC(KMYSETW) +IF(PRESENT(KSPEC2MX)) KSPEC2MX = ISPEC2MX +IF(PRESENT(KSPOLEGL)) KSPOLEGL = ISPOLEGL + +IF(PRESENT(KASM0)) KASM0(:) = IASM0(:) +IF(PRESENT(KPROCM)) KPROCM(:) = IPROCM(:) +IF(PRESENT(KUMPP)) KUMPP(:) = IUMPP(:) +IF(PRESENT(KMYMS)) KMYMS(:) = IMYMS(:) +IF(PRESENT(KPOSSP)) KPOSSP(:) = IPOSSP(:) +IF(PRESENT(KPTRMS)) KPTRMS(:) = IPTRMS(:) +IF(PRESENT(KALLMS)) KALLMS(:) = IALLMS(:) +IF(PRESENT(KDIM0G)) KDIM0G(:) = IDIM0G(:) + +END SUBROUTINE SUWAVEDI +END MODULE SUWAVEDI_MOD + + diff --git a/src/trans/cpu/internal/tpm_constants.F90 b/src/trans/cpu/internal/tpm_constants.F90 new file mode 100644 index 0000000..1f72a4b --- /dev/null +++ b/src/trans/cpu/internal/tpm_constants.F90 @@ -0,0 +1,20 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_CONSTANTS +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRB) :: RA ! Radius of Earth + +END MODULE TPM_CONSTANTS diff --git a/src/trans/cpu/internal/tpm_ctl.F90 b/src/trans/cpu/internal/tpm_ctl.F90 new file mode 100644 index 0000000..a5ee258 --- /dev/null +++ b/src/trans/cpu/internal/tpm_ctl.F90 @@ -0,0 +1,43 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_CTL + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE, INTRINSIC :: iso_c_binding, ONLY: C_PTR, C_NULL_PTR +USE SHAREDMEM_MOD ,ONLY : SHAREDMEM +IMPLICIT NONE + +SAVE + + +TYPE CTL_TYPE + +LOGICAL :: LREAD_LEGPOL = .FALSE. +LOGICAL :: LWRITE_LEGPOL = .FALSE. +CHARACTER(LEN=256) :: CLEGPOLFNAME='legpol_file' +CHARACTER(LEN=4) :: CIO_TYPE='file' +TYPE(SHAREDMEM) :: STORAGE + +END TYPE CTL_TYPE + + +TYPE(CTL_TYPE),ALLOCATABLE,TARGET :: CTL_RESOL(:) +TYPE(CTL_TYPE),POINTER :: C + + +END MODULE TPM_CTL + + + + + + + diff --git a/src/trans/cpu/internal/tpm_dim.F90 b/src/trans/cpu/internal/tpm_dim.F90 new file mode 100644 index 0000000..4d56f92 --- /dev/null +++ b/src/trans/cpu/internal/tpm_dim.F90 @@ -0,0 +1,51 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_DIM + +! Module for dimensions. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE DIM_TYPE +! SPECTRAL SPACE DIMENSIONS + + INTEGER(KIND=JPIM) :: NSMAX ! Truncation order + INTEGER(KIND=JPIM) :: NTMAX ! Truncation order for tendencies + INTEGER(KIND=JPIM) :: NSPOLEG ! Number of Legandre polynomials + INTEGER(KIND=JPIM) :: NSPEC_G ! Number of complex spectral coefficients (global) + INTEGER(KIND=JPIM) :: NSPEC2_G ! 2*NSPEC_G + +! COLLOCATION GRID DIMENSIONS + + INTEGER(KIND=JPIM) :: NDGL ! Number of rows of latitudes + INTEGER(KIND=JPIM) :: NDLON ! Maximum number of longitude points (near equator) + INTEGER(KIND=JPIM) :: NDGNH ! Number of rows in northern hemisphere + +! Legendre transform dimensions + INTEGER(KIND=JPIM) :: NLEI1 ! R%NSMAX+4+MOD(R%NSMAX+4+1,2) + INTEGER(KIND=JPIM) :: NLEI3 ! R%NDGNH+MOD(R%NDGNH+2,2) + INTEGER(KIND=JPIM) :: NLED3 ! R%NTMAX+2+MOD(R%NTMAX+3,2) + INTEGER(KIND=JPIM) :: NLED4 ! R%NTMAX+3+MOD(R%NTMAX+4,2) + +! Width of E'-zone + INTEGER(KIND=JPIM) :: NNOEXTZL ! Longitude direction + INTEGER(KIND=JPIM) :: NNOEXTZG ! Latitude direction + +END TYPE DIM_TYPE + +TYPE(DIM_TYPE),ALLOCATABLE,TARGET :: DIM_RESOL(:) +TYPE(DIM_TYPE),POINTER :: R + +END MODULE TPM_DIM diff --git a/src/trans/cpu/internal/tpm_distr.F90 b/src/trans/cpu/internal/tpm_distr.F90 new file mode 100644 index 0000000..8912797 --- /dev/null +++ b/src/trans/cpu/internal/tpm_distr.F90 @@ -0,0 +1,169 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_DISTR + +! Module for distributed memory environment. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +!* Variables describing distributed memory parallelization + +INTEGER(KIND=JPIM) :: NPROC ! Number of processors (NPRGPNS*NPRGPEW) +INTEGER(KIND=JPIM) :: NPRGPNS ! No. of sets in N-S direction (grid-point space) +INTEGER(KIND=JPIM) :: NPRGPEW ! No. of sets in E-W direction (grid-point space) +INTEGER(KIND=JPIM) :: NPRTRW ! No. of sets in wave direction (spectral space) +INTEGER(KIND=JPIM) :: NPRTRV ! NPROC/NPRTRW +INTEGER(KIND=JPIM) :: NPRTRNS ! No. of sets in N-S direction (Fourier space) + ! (always equal to NPRTRW) +LOGICAL :: LEQ_REGIONS ! TRUE - Use new eq_regions partitioning + ! FALSE- Use old NPRGPNS x NPRGPEW partitioning +INTEGER(KIND=JPIM) :: MYPROC ! My processor number +INTEGER(KIND=JPIM) :: MYSETW ! My set number in wave direction (spectral space) +INTEGER(KIND=JPIM) :: MYSETV ! My set number in field direction(S.S and F.S) +INTEGER(KIND=JPIM) :: NCOMBFLEN ! Size of communication buffer + +INTEGER(KIND=JPIM) :: MTAGLETR ! Tag +INTEGER(KIND=JPIM) :: MTAGML ! Tag +INTEGER(KIND=JPIM) :: MTAGLG ! Tag +INTEGER(KIND=JPIM) :: MTAGGL ! Tag +INTEGER(KIND=JPIM) :: MTAGPART ! Tag +INTEGER(KIND=JPIM) :: MTAGDISTSP ! Tag +INTEGER(KIND=JPIM) :: MTAGLM ! Tag +INTEGER(KIND=JPIM) :: MTAGDISTGP ! Tag + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPRCIDS(:) ! Array containing the process ids + +TYPE DISTR_TYPE +LOGICAL :: LGRIDONLY ! TRUE - only grid space structures are available +LOGICAL :: LWEIGHTED_DISTR ! TRUE - weighted distribution +LOGICAL :: LSPLIT ! TRUE - latitudes are shared between a-sets +LOGICAL :: LCPNMONLY ! TRUE - Compute Legendre polynomials only, not FFTs + +! SPECTRAL SPACE + +INTEGER(KIND=JPIM) :: NUMP ! No. of spectral waves handled by this processor +INTEGER(KIND=JPIM) :: NSPEC ! No. of complex spectral coefficients (on this PE) +INTEGER(KIND=JPIM) :: NSPEC2 ! 2*NSPEC +INTEGER(KIND=JPIM) :: NSPEC2MX ! maximun NSPEC2 among all PEs +INTEGER(KIND=JPIM) :: NTPEC2 ! cf. NSPEC2 but for truncation NTMAX +INTEGER(KIND=JPIM) :: NUMTP ! cf. NUMP but for truncation NTMAX + +INTEGER(KIND=JPIM) :: NSPOLEGL ! No. of legendre polynomials on this PE +INTEGER(KIND=JPIM) :: NLEI3D ! (NLEI3-1)/NPRTRW+1 + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: MYMS(:) ! Wave numbers handled by this PE +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NUMPP(:) ! No. of wave numbers each wave set is + ! responsible for +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPOSSP(:) ! Not needed in transform? +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCM(:) ! Process that does the calc. for certain + ! wavenumber M +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NDIM0G(:) ! Defines partitioning of global spectral + ! fields among PEs + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NASM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NATM0(:) ! Same as NASM0 but for NTMAX +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NALLMS(:) ! Wave numbers for all a-set concatenated + ! together to give all wave numbers in a-set + ! order. Used when global spectral norms + ! have to be gathered. +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRMS(:) ! Pointer to the first wave number of a given + ! a-set in nallms array. + + +! Legendre polynomials + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLATLS(:,:) ! First latitude for which each a-set,bset calcul. +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLATLE(:,:) ! Last latitude for which each a-set,bset calcul. + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMT(:) ! Adress for legendre polynomial for + ! given M (NTMAX) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMS(:) ! Adress for legendre polynomial for + ! given M (NSMAX) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMG(:) ! Global version of NPMS + +! FOURIER SPACE + +INTEGER(KIND=JPIM) :: NDGL_FS ! Number of rows of latitudes for which this process is + ! performing Fourier Space calculations + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in + ! Fourier/gridpoint buffer +INTEGER(KIND=JPIM) :: NLENGTF ! Second dimension of Fourier/gridpoint buffer + ! (sum of (NLOEN+3) over local latitudes) + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NULTPP(:) ! No of lats. for each wave_set (F.S) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCL(:) ! Process responsible for each lat. (F.S) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLS(:) ! Pointer to first lat. (F.S) + +! NSTAGT0B to NLENGT1B: help arrays for spectral to fourier space transposition +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGT0B(:) ! Start adresses for segments within buffer + ! (according to processors to whom data + ! is going to be sent) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGT1B(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPNTGTB0(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPNTGTB1(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTSFTB(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTSGTB(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: MSTABF(:) +INTEGER(KIND=JPIM) :: NLENGT0B ! dimension +INTEGER(KIND=JPIM) :: NLENGT1B ! dimension + +! GRIDPOINT SPACE + +INTEGER(KIND=JPIM) :: NDGL_GP ! D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NFRSTLAT(:) ! First lat of each a-set +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLSTLAT(:) ! Last lat of each a-set +INTEGER(KIND=JPIM) :: NFRSTLOFF ! Offset for first lat of own a-set + ! i.e. NFRSTLOFF=NFRSTLAT(MYSETA)-1 +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLAT(:) ! Pointer to start of latitude +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRFRSTLAT(:) ! Pointer to the first latitude of each + ! a-set in NSTA and NONL arrays +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLSTLAT(:) ! Pointer to the last latitude of each + ! a-set in NSTA and NONL arrays +INTEGER(KIND=JPIM) :: NPTRFLOFF ! Offset for pointer to the first latitude of own a-set + ! NSTA and NONL arrays, i.e. NPTRFRSTLAT(MYSETA)-1 +LOGICAL ,ALLOCATABLE :: LSPLITLAT(:) ! True if latitude is split over 2 a-sets + +! NSTA(R%NDGL+NPRGPNS-1,NPRGPEW) : Position of first grid column +! for the latitudes on a processor. The information is +! available for all processors. The b-sets are distinguished +! by the last dimension of NSTA(). The latitude band for +! each a-set is addressed by NPTRFRSTLAT(JASET), +! NPTRLSTLAT(JASET), and NPTRFLOFF=NPTRFRSTLAT(MYSETA) on +! this processors a-set. Each split latitude has two entries +! in NSTA(,:) which necessitates the rather complex +! addressing of NSTA(,:) and the overdimensioning of NSTA by +! NPRGPNS. +! NONL(R%NDGL+NPRGPNS-1,NPRGPEW) : Number of grid columns for +! the latitudes on a processor. Similar to NSTA() in data +! structure. +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTA(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NONL(:,:) + +INTEGER(KIND=JPIM) :: NGPTOT ! Total number of grid columns on this PE +INTEGER(KIND=JPIM) :: NGPTOTG ! Total number of grid columns on the Globe +INTEGER(KIND=JPIM) :: NGPTOTMX ! Maximum number of grid columns on any of the PEs +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NGPTOTL(:,:) ! Number of grid columns on each PE. + +REAL(KIND=JPRB) ,ALLOCATABLE :: RWEIGHT(:) ! Weight per grid-point (if weighted distribution) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCA_GP(:) ! Number of grid-points per a-set + +END TYPE DISTR_TYPE + +TYPE(DISTR_TYPE),ALLOCATABLE,TARGET :: DISTR_RESOL(:) +TYPE(DISTR_TYPE),POINTER :: D + +END MODULE TPM_DISTR + diff --git a/src/trans/cpu/internal/tpm_fft.F90 b/src/trans/cpu/internal/tpm_fft.F90 new file mode 100644 index 0000000..5577262 --- /dev/null +++ b/src/trans/cpu/internal/tpm_fft.F90 @@ -0,0 +1,37 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FFT +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE BLUESTEIN_MOD ,ONLY : FFTB_TYPE + +! Module for Fourier transforms. + +IMPLICIT NONE + +SAVE + +TYPE FFT_TYPE + REAL(KIND=JPRB) ,ALLOCATABLE :: TRIGS(:,:) ! list of trigonometric function values + INTEGER(KIND=JPIM),ALLOCATABLE :: NFAX(:,:) ! list of factors of truncation + LOGICAL :: LBLUESTEIN=.FALSE. ! logical indicating whether some + ! latitudes require bluestein algorithm + ! with prime factors that are not 2,3,or 5 + LOGICAL,ALLOCATABLE :: LUSEFFT992(:) ! describes which FFT algorithm to be used + ! T=use FFT992 F=use bluestein +END TYPE FFT_TYPE + +TYPE(FFT_TYPE),ALLOCATABLE,TARGET :: FFT_RESOL(:) +TYPE(FFT_TYPE),POINTER :: T + +TYPE(FFTB_TYPE),ALLOCATABLE,TARGET :: FFTB_RESOL(:) +TYPE(FFTB_TYPE),POINTER :: TB + +END MODULE TPM_FFT diff --git a/src/trans/cpu/internal/tpm_fftw.F90 b/src/trans/cpu/internal/tpm_fftw.F90 new file mode 100644 index 0000000..e86fe94 --- /dev/null +++ b/src/trans/cpu/internal/tpm_fftw.F90 @@ -0,0 +1,505 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FFTW +! Author. +! ------- +! George Mozdzynski +! +! Modifications. +! -------------- +! Original October 2014 +! R. El Khatib 01-Sep-2015 More subroutines for better modularity + +USE, INTRINSIC :: ISO_C_BINDING + +USE PARKIND1 ,ONLY : JPIB, JPIM, JPRB, JPRD +USE MPL_MODULE ,ONLY : MPL_MYRANK +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE + +SAVE + +#ifdef __NEC__ +! From NLC (NEC Numeric Library Collection) +#include "aslfftw3.f03" +#define FFTW_NO_SIMD 0 +#else +#include "fftw3.f03" +#endif + +PRIVATE +PUBLIC CREATE_PLAN_FFTW, DESTROY_PLAN_FFTW, DESTROY_PLANS_FFTW, INIT_PLANS_FFTW, & + & FFTW_RESOL, TW, EXEC_FFTW, EXEC_EFFTW + +TYPE FFTW_TYPE + INTEGER(KIND=JPIM),ALLOCATABLE :: N_PLANS(:) + TYPE(FFTW_PLAN),POINTER :: FFTW_PLANS(:) + INTEGER(KIND=JPIM) :: N_MAX=0 ! maximum number of latitudes + INTEGER(KIND=JPIM) :: N_MAX_PLANS=4 ! maximum number of plans for each active latitude + LOGICAL :: LFFTW=.FALSE. +END TYPE FFTW_TYPE + + +TYPE FFTW_PLAN + INTEGER(KIND=JPIM) :: NPLAN_ID=123456 + INTEGER(KIND=JPIB) :: NPLAN + INTEGER(KIND=JPIM) :: NLOT + INTEGER(KIND=JPIM) :: NTYPE + TYPE(FFTW_PLAN),POINTER :: NEXT_PLAN => NULL() +END TYPE FFTW_PLAN + +TYPE(FFTW_TYPE),ALLOCATABLE,TARGET :: FFTW_RESOL(:) +TYPE(FFTW_TYPE),POINTER :: TW + + + +! ------------------------------------------------------------------ +CONTAINS +! ------------------------------------------------------------------ + + +SUBROUTINE INIT_PLANS_FFTW(KDLON) +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON + +#include "abor1.intfb.h" + +TW%N_MAX=KDLON +ALLOCATE(TW%FFTW_PLANS(TW%N_MAX)) +ALLOCATE(TW%N_PLANS(TW%N_MAX)) +TW%N_PLANS(:)=0 +RETURN +END SUBROUTINE INIT_PLANS_FFTW + + +SUBROUTINE CREATE_PLAN_FFTW(KPLAN,KTYPE,KN,KLOT) +INTEGER(KIND=JPIB),INTENT(OUT) :: KPLAN +INTEGER(KIND=JPIM),INTENT(IN) :: KTYPE,KN,KLOT + +INTEGER(KIND=JPIB) :: IPLAN +INTEGER(KIND=JPIM) :: IRANK, ISTRIDE +INTEGER(KIND=JPIM) :: JL, JN +INTEGER(KIND=JPIM) :: IRDIST,ICDIST,IN(1),IEMBED(1) +REAL(KIND=JPRB), POINTER :: ZDUM(:) +TYPE(C_PTR) :: ZDUMP +LOGICAL :: LLFOUND +LOGICAL :: LLRESTRICT_PLANS=.TRUE. +TYPE(FFTW_PLAN),POINTER :: CURR_FFTW_PLAN,START_FFTW_PLAN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE2 +IF (LHOOK) CALL DR_HOOK('CREATE_PLAN_FFTW',0,ZHOOK_HANDLE) + +IF( KN > TW%N_MAX )THEN + CALL ABOR1('CREATE_PLAN_FFTW: KN > N_MAX THAT WAS INITIALISED IN INIT_PLANS_FFTW') +ENDIF + +IRANK=1 +ISTRIDE=1 +IN(1)=KN +IEMBED(1)=IN(1) +ICDIST=KN/2+1 +IRDIST=ICDIST*2 + +!$OMP CRITICAL (FFTW_CREATE) +LLFOUND=.FALSE. +IF( TW%FFTW_PLANS(KN)%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFTW.1: PLAN_ID=",I10)')TW%FFTW_PLANS(KN)%NPLAN_ID + CALL ABOR1('CREATE_PLAN_FFTW.1: NPLAN_ID /= 123456') +ENDIF +CURR_FFTW_PLAN=>TW%FFTW_PLANS(KN) +IF( CURR_FFTW_PLAN%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFTW.2: PLAN_ID=",I10)')CURR_FFTW_PLAN%NPLAN_ID + CALL ABOR1('CREATE_PLAN_FFTW.2: NPLAN_ID /= 123456') +ENDIF +! search for plan in existing plans +DO JL=1,TW%N_PLANS(KN) + IF( KLOT == CURR_FFTW_PLAN%NLOT .AND. KTYPE == CURR_FFTW_PLAN%NTYPE )THEN + LLFOUND=.TRUE. + IPLAN=CURR_FFTW_PLAN%NPLAN + EXIT + ELSEIF( JL /= TW%N_PLANS(KN) )THEN + CURR_FFTW_PLAN=>CURR_FFTW_PLAN%NEXT_PLAN + IF( CURR_FFTW_PLAN%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFTW.3: PLAN_ID=",I10)')CURR_FFTW_PLAN%NPLAN_ID + CALL ABOR1('CREATE_PLAN_FFTW.3: NPLAN_ID /= 123456') + ENDIF + ENDIF +ENDDO +IF( .NOT.LLFOUND )THEN + IF( LLRESTRICT_PLANS )THEN + IF( TW%N_PLANS(KN) == TW%N_MAX_PLANS )THEN + ! destroy the plan at the start of the list +! WRITE(*,'("CREATE_PLAN_FFTW: BEG: DESTROYING A PLAN AT THE START OF THE LIST")') + IF (JPRB == JPRD) THEN + CALL DFFTW_DESTROY_PLAN(TW%FFTW_PLANS(KN)%NPLAN) + ELSE + CALL SFFTW_DESTROY_PLAN(TW%FFTW_PLANS(KN)%NPLAN) + END IF + TW%FFTW_PLANS(KN)%NPLAN_ID=999999 + START_FFTW_PLAN=>TW%FFTW_PLANS(KN) + TW%FFTW_PLANS(KN)=TW%FFTW_PLANS(KN)%NEXT_PLAN + ! DEALLOCATE(START_FFTW_PLAN) + TW%N_PLANS(KN)=TW%N_PLANS(KN)-1 +! WRITE(*,'("CREATE_PLAN_FFTW: END: DESTROYING A PLAN AT THE START OF THE LIST")') + ENDIF + ENDIF + IF (JPRB == JPRD) THEN + ZDUMP=FFTW_ALLOC_COMPLEX(INT(1,C_SIZE_T)) + ELSE + ZDUMP=FFTWF_ALLOC_COMPLEX(INT(1,C_SIZE_T)) + END IF + CALL C_F_POINTER(ZDUMP,ZDUM,[2]) + IF( KTYPE==1 )THEN + IF (LHOOK) CALL DR_HOOK('FFTW_PLAN_MANY_DFT_C2R',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_PLAN_MANY_DFT_C2R(IPLAN,IRANK,IN,KLOT,ZDUM,IEMBED,ISTRIDE,ICDIST,& + & ZDUM,IEMBED,ISTRIDE,IRDIST,FFTW_ESTIMATE+FFTW_NO_SIMD) + ELSE + CALL SFFTW_PLAN_MANY_DFT_C2R(IPLAN,IRANK,IN,KLOT,ZDUM,IEMBED,ISTRIDE,ICDIST,& + & ZDUM,IEMBED,ISTRIDE,IRDIST,FFTW_ESTIMATE+FFTW_NO_SIMD) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_PLAN_MANY_DFT_C2R',1,ZHOOK_HANDLE2) + ELSEIF( KTYPE==-1 )THEN + IF (LHOOK) CALL DR_HOOK('FFTW_PLAN_MANY_DFT_R2C',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_PLAN_MANY_DFT_R2C(IPLAN,IRANK,IN,KLOT,ZDUM,IEMBED,ISTRIDE,IRDIST,& + & ZDUM,IEMBED,ISTRIDE,ICDIST,FFTW_ESTIMATE+FFTW_NO_SIMD) + ELSE + CALL SFFTW_PLAN_MANY_DFT_R2C(IPLAN,IRANK,IN,KLOT,ZDUM,IEMBED,ISTRIDE,IRDIST,& + & ZDUM,IEMBED,ISTRIDE,ICDIST,FFTW_ESTIMATE+FFTW_NO_SIMD) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_PLAN_MANY_DFT_R2C',1,ZHOOK_HANDLE2) + ELSE + CALL ABOR1('FFTW_PLAN: INVALID KTYPE') + ENDIF + IF (JPRB == JPRD) THEN + CALL FFTW_FREE(ZDUMP) + ELSE + CALL FFTWF_FREE(ZDUMP) + END IF + KPLAN=IPLAN + TW%N_PLANS(KN)=TW%N_PLANS(KN)+1 + IF( TW%N_PLANS(KN) /= 1 )THEN + ALLOCATE(CURR_FFTW_PLAN%NEXT_PLAN) + CURR_FFTW_PLAN=>CURR_FFTW_PLAN%NEXT_PLAN + ENDIF + IF( CURR_FFTW_PLAN%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFTW.4: PLAN_ID=",I10)')CURR_FFTW_PLAN%NPLAN_ID + CALL ABOR1('CREATE_PLAN_FFTW.4: NPLAN_ID /= 123456') + ENDIF + CURR_FFTW_PLAN%NPLAN=IPLAN + CURR_FFTW_PLAN%NLOT=KLOT + CURR_FFTW_PLAN%NTYPE=KTYPE + CURR_FFTW_PLAN%NEXT_PLAN=>NULL() +! write(*,'("CREATE_PLAN_FFTW: KN=",I5," NPLANS=",I3," KLOT=",I6," KTYPE=",I2,& +! & " NEW IPLAN=",Z16)')KN,TW%N_PLANS(KN),KLOT,KTYPE,IPLAN +ELSE + KPLAN=IPLAN +! write(*,'("CREATE_PLAN_FFTW: KN=",I5," NPLANS=",I3," KLOT=",I6," KTYPE=",I2,& +! & " CUR IPLAN=",Z16)')KN,TW%N_PLANS(KN),KLOT,KTYPE,IPLAN +ENDIF +!$OMP END CRITICAL (FFTW_CREATE) + +IF (LHOOK) CALL DR_HOOK('CREATE_PLAN_FFTW',1,ZHOOK_HANDLE) +RETURN +END SUBROUTINE CREATE_PLAN_FFTW + + +SUBROUTINE DESTROY_PLAN_FFTW(KPLAN) +INTEGER(KIND=JPIB),INTENT(IN) :: KPLAN +!$OMP CRITICAL (FFTW_DESTROY) +IF (JPRB == JPRD) THEN + CALL DFFTW_DESTROY_PLAN(KPLAN) +ELSE + CALL SFFTW_DESTROY_PLAN(KPLAN) +END IF +!$OMP END CRITICAL (FFTW_DESTROY) +RETURN +END SUBROUTINE DESTROY_PLAN_FFTW + + +SUBROUTINE DESTROY_PLANS_FFTW +INTEGER(KIND=JPIM) :: JL, JN +TYPE(FFTW_PLAN),POINTER :: CURR_FFTW_PLAN, NEXT_FFTW_PLAN +DO JN=1,TW%N_MAX + CURR_FFTW_PLAN=>TW%FFTW_PLANS(JN) + DO JL=1,TW%N_PLANS(JN) + CALL DESTROY_PLAN_FFTW(CURR_FFTW_PLAN%NPLAN) + NEXT_FFTW_PLAN=>CURR_FFTW_PLAN%NEXT_PLAN + IF( JL /= 1 ) THEN + DEALLOCATE( CURR_FFTW_PLAN ) + ENDIF + CURR_FFTW_PLAN => NEXT_FFTW_PLAN + ENDDO +ENDDO +IF( ASSOCIATED(TW) ) THEN + IF( ASSOCIATED(TW%FFTW_PLANS) ) DEALLOCATE(TW%FFTW_PLANS) + IF( ALLOCATED(TW%N_PLANS) ) DEALLOCATE(TW%N_PLANS) +ENDIF +RETURN +END SUBROUTINE DESTROY_PLANS_FFTW + +SUBROUTINE EXEC_FFTW(KTYPE,KRLEN,KCLEN,KOFF,KFIELDS,LD_ALL,PREEL) + +INTEGER(KIND=JPIM),INTENT(IN) :: KTYPE +INTEGER(KIND=JPIM),INTENT(IN) :: KRLEN +INTEGER(KIND=JPIM),INTENT(IN) :: KCLEN +INTEGER(KIND=JPIM),INTENT(IN) :: KOFF +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +LOGICAL ,INTENT(IN) :: LD_ALL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +REAL(KIND=JPRB), POINTER :: ZFFT(:,:) +REAL(KIND=JPRB), POINTER :: ZFFT1(:) +TYPE(C_PTR) :: ZFFTP, ZFFT1P + +INTEGER(KIND=JPIM) :: JJ,JF + +INTEGER(KIND=JPIB) :: IPLAN_C2R, IPLAN_C2R1 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE2 + +#include "abor1.intfb.h" + +IF (LHOOK) CALL DR_HOOK('EXEC_FFTW',0,ZHOOK_HANDLE) + +IF ( (KTYPE /= -1) .AND. (KTYPE /=1) ) THEN + CALL ABOR1('TPM_FFTW:EXEC_FFTW : WRONG VALUE KTYPE') +ENDIF + +IF( LD_ALL )THEN + CALL CREATE_PLAN_FFTW(IPLAN_C2R,KTYPE,KRLEN,KFIELDS) + IF (JPRB == JPRD) THEN + ZFFTP=FFTW_ALLOC_COMPLEX(INT(KCLEN/2*KFIELDS,C_SIZE_T)) + ELSE + ZFFTP=FFTWF_ALLOC_COMPLEX(INT(KCLEN/2*KFIELDS,C_SIZE_T)) + END IF + CALL C_F_POINTER(ZFFTP,ZFFT,[KCLEN,KFIELDS]) + IF (KTYPE==1) THEN + DO JF=1,KFIELDS + DO JJ=1,KCLEN + ZFFT(JJ,JF) =PREEL(JF,KOFF+JJ-1) + ENDDO + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_C2R(IPLAN_C2R,ZFFT,ZFFT) + ELSE + CALL SFFTW_EXECUTE_DFT_C2R(IPLAN_C2R,ZFFT,ZFFT) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',1,ZHOOK_HANDLE2) + DO JJ=1,KRLEN + DO JF=1,KFIELDS + PREEL(JF,KOFF+JJ-1)=ZFFT(JJ,JF) + ENDDO + ENDDO + ELSE + DO JF=1,KFIELDS + DO JJ=1,KRLEN + ZFFT(JJ,JF) =PREEL(JF,KOFF+JJ-1) + ENDDO + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_R2C(IPLAN_C2R,ZFFT,ZFFT) + ELSE + CALL SFFTW_EXECUTE_DFT_R2C(IPLAN_C2R,ZFFT,ZFFT) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',1,ZHOOK_HANDLE2) + DO JJ=1,KCLEN + DO JF=1,KFIELDS + PREEL(JF,KOFF+JJ-1)=ZFFT(JJ,JF)/REAL(KRLEN,JPRB) + ENDDO + ENDDO + ENDIF + IF (JPRB == JPRD) THEN + CALL FFTW_FREE(ZFFTP) + ELSE + CALL FFTWF_FREE(ZFFTP) + END IF +ELSE + CALL CREATE_PLAN_FFTW(IPLAN_C2R1,KTYPE,KRLEN,1) + IF (JPRB == JPRD) THEN + ZFFT1P=FFTW_ALLOC_COMPLEX(INT(KCLEN/2,C_SIZE_T)) + ELSE + ZFFT1P=FFTWF_ALLOC_COMPLEX(INT(KCLEN/2,C_SIZE_T)) + END IF + CALL C_F_POINTER(ZFFT1P,ZFFT1,[KCLEN]) + IF (KTYPE==1) THEN + DO JF=1,KFIELDS + DO JJ=1,KCLEN + ZFFT1(JJ) =PREEL(JF,KOFF+JJ-1) + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_C2R(IPLAN_C2R1,ZFFT1,ZFFT1) + ELSE + CALL SFFTW_EXECUTE_DFT_C2R(IPLAN_C2R1,ZFFT1,ZFFT1) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',1,ZHOOK_HANDLE2) + DO JJ=1,KRLEN + PREEL(JF,KOFF+JJ-1)=ZFFT1(JJ) + ENDDO + ENDDO + ELSE + DO JF=1,KFIELDS + DO JJ=1,KRLEN + ZFFT1(JJ) =PREEL(JF,KOFF+JJ-1) + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_R2C(IPLAN_C2R1,ZFFT1,ZFFT1) + ELSE + CALL SFFTW_EXECUTE_DFT_R2C(IPLAN_C2R1,ZFFT1,ZFFT1) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',1,ZHOOK_HANDLE2) + DO JJ=1,KCLEN + PREEL(JF,KOFF+JJ-1)=ZFFT1(JJ)/REAL(KRLEN,JPRB) + ENDDO + ENDDO + ENDIF + IF (JPRB == JPRD) THEN + CALL FFTW_FREE(ZFFT1P) + ELSE + CALL FFTWF_FREE(ZFFT1P) + END IF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EXEC_FFTW',1,ZHOOK_HANDLE) +END SUBROUTINE EXEC_FFTW + +SUBROUTINE EXEC_EFFTW(KTYPE,KRLEN,KCLEN,KOFF,KFIELDS,LD_ALL,PREEL) + +INTEGER(KIND=JPIM),INTENT(IN) :: KTYPE +INTEGER(KIND=JPIM),INTENT(IN) :: KRLEN +INTEGER(KIND=JPIM),INTENT(IN) :: KCLEN +INTEGER(KIND=JPIM),INTENT(IN) :: KOFF +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +LOGICAL ,INTENT(IN) :: LD_ALL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +REAL(KIND=JPRB), POINTER :: ZFFT(:,:) +REAL(KIND=JPRB), POINTER :: ZFFT1(:) +TYPE(C_PTR) :: ZFFTP, ZFFT1P + +INTEGER(KIND=JPIM) :: JJ,JF + +INTEGER(KIND=JPIB) :: IPLAN_C2R, IPLAN_C2R1 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE2 + +#include "abor1.intfb.h" + +IF (LHOOK) CALL DR_HOOK('EXEC_EFFTW',0,ZHOOK_HANDLE) + +IF ( (KTYPE /= -1) .AND. (KTYPE /=1) ) THEN + CALL ABOR1('TPM_FFTW:EXEC_EFFTW : WRONG VALUE KTYPE') +ENDIF + +IF( LD_ALL )THEN + CALL CREATE_PLAN_FFTW(IPLAN_C2R,KTYPE,KRLEN,KFIELDS) + IF (JPRB == JPRD) THEN + ZFFTP=FFTW_ALLOC_COMPLEX(INT(KCLEN/2*KFIELDS,C_SIZE_T)) + ELSE + ZFFTP=FFTWF_ALLOC_COMPLEX(INT(KCLEN/2*KFIELDS,C_SIZE_T)) + END IF + CALL C_F_POINTER(ZFFTP,ZFFT,[KCLEN,KFIELDS]) + IF (KTYPE==1) THEN + DO JF=1,KFIELDS + DO JJ=1,KCLEN + ZFFT(JJ,JF) =PREEL(KOFF+JJ-1,JF) + ENDDO + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_C2R(IPLAN_C2R,ZFFT,ZFFT) + ELSE + CALL SFFTW_EXECUTE_DFT_C2R(IPLAN_C2R,ZFFT,ZFFT) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',1,ZHOOK_HANDLE2) + DO JJ=1,KRLEN + DO JF=1,KFIELDS + PREEL(KOFF+JJ-1,JF)=ZFFT(JJ,JF) + ENDDO + ENDDO + ELSE + DO JF=1,KFIELDS + DO JJ=1,KRLEN + ZFFT(JJ,JF) =PREEL(KOFF+JJ-1,JF) + ENDDO + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_R2C(IPLAN_C2R,ZFFT,ZFFT) + ELSE + CALL SFFTW_EXECUTE_DFT_R2C(IPLAN_C2R,ZFFT,ZFFT) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',1,ZHOOK_HANDLE2) + DO JJ=1,KCLEN + DO JF=1,KFIELDS + PREEL(KOFF+JJ-1,JF)=ZFFT(JJ,JF)/REAL(KRLEN,JPRB) + ENDDO + ENDDO + ENDIF + IF (JPRB == JPRD) THEN + CALL FFTW_FREE(ZFFTP) + ELSE + CALL FFTWF_FREE(ZFFTP) + END IF +ELSE + CALL CREATE_PLAN_FFTW(IPLAN_C2R1,KTYPE,KRLEN,1) + IF (JPRB == JPRD) THEN + ZFFT1P=FFTW_ALLOC_COMPLEX(INT(KCLEN/2,C_SIZE_T)) + ELSE + ZFFT1P=FFTWF_ALLOC_COMPLEX(INT(KCLEN/2,C_SIZE_T)) + END IF + CALL C_F_POINTER(ZFFT1P,ZFFT1,[KCLEN]) + IF (KTYPE==1) THEN + DO JF=1,KFIELDS + DO JJ=1,KCLEN + ZFFT1(JJ) =PREEL(KOFF+JJ-1,JF) + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_C2R(IPLAN_C2R1,ZFFT1,ZFFT1) + ELSE + CALL SFFTW_EXECUTE_DFT_C2R(IPLAN_C2R1,ZFFT1,ZFFT1) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',1,ZHOOK_HANDLE2) + DO JJ=1,KRLEN + PREEL(KOFF+JJ-1,JF)=ZFFT1(JJ) + ENDDO + ENDDO + ELSE + DO JF=1,KFIELDS + DO JJ=1,KRLEN + ZFFT1(JJ) =PREEL(KOFF+JJ-1,JF) + ENDDO + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',0,ZHOOK_HANDLE2) + IF (JPRB == JPRD) THEN + CALL DFFTW_EXECUTE_DFT_R2C(IPLAN_C2R1,ZFFT1,ZFFT1) + ELSE + CALL SFFTW_EXECUTE_DFT_R2C(IPLAN_C2R1,ZFFT1,ZFFT1) + END IF + IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',1,ZHOOK_HANDLE2) + DO JJ=1,KCLEN + PREEL(KOFF+JJ-1,JF)=ZFFT1(JJ)/REAL(KRLEN,JPRB) + ENDDO + ENDDO + ENDIF + IF (JPRB == JPRD) THEN + CALL FFTW_FREE(ZFFT1P) + ELSE + CALL FFTWF_FREE(ZFFT1P) + END IF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EXEC_EFFTW',1,ZHOOK_HANDLE) +END SUBROUTINE EXEC_EFFTW + +END MODULE TPM_FFTW diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 new file mode 100644 index 0000000..7a9ffb1 --- /dev/null +++ b/src/trans/cpu/internal/tpm_fields.F90 @@ -0,0 +1,38 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FIELDS + +USE PARKIND1 ,ONLY : JPIM, JPRB, JPRD + +IMPLICIT NONE + +SAVE + +TYPE FIELDS_TYPE +REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials +REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes +REAL(KIND=JPRB) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRB) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 +REAL(KIND=JPRB) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) + +REAL(KIND=JPRB) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms +REAL(KIND=JPRB) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) +REAL(KIND=JPRB) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN + +REAL(KIND=JPRB) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes +REAL(KIND=JPRB) ,ALLOCATABLE :: RACTHE2(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes +END TYPE FIELDS_TYPE + +TYPE(FIELDS_TYPE),ALLOCATABLE,TARGET :: FIELDS_RESOL(:) +TYPE(FIELDS_TYPE),POINTER :: F + +END MODULE TPM_FIELDS diff --git a/src/trans/cpu/internal/tpm_flt.F90 b/src/trans/cpu/internal/tpm_flt.F90 new file mode 100644 index 0000000..2be0e49 --- /dev/null +++ b/src/trans/cpu/internal/tpm_flt.F90 @@ -0,0 +1,74 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FLT + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE BUTTERFLY_ALG_MOD,ONLY : BUTTERFLY_STRUCT +USE SEEFMM_MIX +IMPLICIT NONE + +SAVE + + +TYPE FLT_TYPE +INTEGER(KIND=JPIM) :: NSPOLEGL +INTEGER(KIND=JPIM) :: NDGNH +INTEGER(KIND=JPIM) :: INS2 +INTEGER(KIND=JPIM) :: INA2 +REAL(KIND=JPRB) ,POINTER :: RPNMS(:,:) ! Legendre polynomials +REAL(KIND=JPRB) ,POINTER :: RPNMA(:,:) ! Legendre polynomials +REAL(KIND=JPRD) ,POINTER :: RPNMDS(:,:) ! Legendre polynomials +REAL(KIND=JPRD) ,POINTER :: RPNMDA(:,:) ! Legendre polynomials +REAL(KIND=JPRB) :: RCS +REAL(KIND=JPRB) :: RCA +!REAL(KIND=JPRB) ,POINTER :: RPNMCDO(:,:) ! Legendre polynomials for C-D formula at orig roots +!REAL(KIND=JPRB) ,POINTER :: RPNMCDD(:,:) ! Legendre polynomials for C-D formula at dual roots +REAL(KIND=JPRB) ,POINTER :: RPNMWI(:,:) ! special weights +REAL(KIND=JPRB) ,POINTER :: RPNMWO(:,:) ! special weights +INTEGER(KIND=JPIM) :: ISLD ! starting latitude dual + +! Butterfly + +INTEGER(KIND=JPIM) :: MAXCOLS +TYPE(BUTTERFLY_STRUCT) :: YBUT_STRUCT_S,YBUT_STRUCT_A + +END TYPE FLT_TYPE + +TYPE FLT_TYPE_WRAP +TYPE(FLT_TYPE),ALLOCATABLE :: FA(:) +LOGICAL :: LDLL +LOGICAL :: LSHIFTLL +LOGICAL :: LUSEFLT +LOGICAL :: LUSE_BELUSOV +LOGICAL :: LKEEPRPNM +LOGICAL :: LSOUTHPNM ! .TRUE. to compute Legendre polynomials on southern hemisphere +INTEGER(KIND=JPIM) :: IMLOC +INTEGER(KIND=JPIM) :: ITHRESHOLD +INTEGER(KIND=JPIM) :: NDGNHD ! dual set dimension +INTEGER(KIND=JPIM) :: NDLON ! dual number of longitudes +INTEGER(KIND=JPIM) :: NDGL ! dual number of latitudes +LOGICAL :: LSYM +TYPE(FMM_TYPE),POINTER :: FMM_INTI ! FMM interpolation + +END TYPE FLT_TYPE_WRAP + +TYPE(FLT_TYPE_WRAP),ALLOCATABLE,TARGET :: FLT_RESOL(:) +TYPE(FLT_TYPE_WRAP),POINTER :: S + + +END MODULE TPM_FLT + + + + + + + diff --git a/src/trans/cpu/internal/tpm_gen.F90 b/src/trans/cpu/internal/tpm_gen.F90 new file mode 100644 index 0000000..3cd0509 --- /dev/null +++ b/src/trans/cpu/internal/tpm_gen.F90 @@ -0,0 +1,45 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_GEN + +! Module for general control variables. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +INTEGER(KIND=JPIM) :: NOUT ! Unit number for "standard" output +INTEGER(KIND=JPIM) :: NERR ! Unit number for error messages +INTEGER(KIND=JPIM) :: NPRINTLEV ! Printing level, 0=no print, 1=standard,2=debug + +INTEGER(KIND=JPIM) :: MSETUP0 = 0 ! Control of setup calls +INTEGER(KIND=JPIM) :: NMAX_RESOL = 0 ! Maximum allowed number of resolutions +INTEGER(KIND=JPIM) :: NCUR_RESOL = 0 ! Current resolution +INTEGER(KIND=JPIM) :: NDEF_RESOL = 0 ! Number of defined resolutions +INTEGER(KIND=JPIM) :: NPROMATR ! Packet size for transform (in no of fields) + ! NPROMATR=0 means do all fields together (dflt) + +LOGICAL :: LALLOPERM ! Allocate some shared data structures permanently +LOGICAL :: LMPOFF ! true: switch off message passing +LOGICAL :: LSYNC_TRANS ! true: activate barriers in trmtol and trltom + +! Use of synchronization/blocking in Transpose (some networks do get flooded) +! 0 = Post IRECVs up-front, use ISENDs, use WAITANY to recv data (default) +! 1 = Use ISENDs, use blocking RECVs, add barrier at the end of each cycle +! 2 = Use buffered SENDs, use blocking RECVs, add barrier at the end of each cycle +INTEGER(KIND=JPIM) :: NTRANS_SYNC_LEVEL = 0 + +LOGICAL, ALLOCATABLE :: LENABLED(:) ! true: the resolution is enabled (it has been + ! initialised and has not been released afterward) + +END MODULE TPM_GEN diff --git a/src/trans/cpu/internal/tpm_geometry.F90 b/src/trans/cpu/internal/tpm_geometry.F90 new file mode 100644 index 0000000..2c6e2ff --- /dev/null +++ b/src/trans/cpu/internal/tpm_geometry.F90 @@ -0,0 +1,37 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_GEOMETRY + +! Module containing data describing Gaussian grid. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE GEOM_TYPE +INTEGER(KIND=JPIM),ALLOCATABLE :: NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL +INTEGER(KIND=JPIM),ALLOCATABLE :: NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER +INTEGER(KIND=JPIM),ALLOCATABLE :: NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES +! FOR A GIVEN WAVE NUMBER M + +LOGICAL :: LAM ! LAM geometry if T, Global geometry if F +LOGICAL :: LREDUCED_GRID ! Reduced Gaussian grid if T +! quadratic Gaussian grid otherwise. +REAL(KIND=JPRB) :: RSTRET ! Stretching factor (for Legendre polynomials +! computed on stretched latitudes only) +END TYPE GEOM_TYPE + +TYPE(GEOM_TYPE),ALLOCATABLE,TARGET :: GEOM_RESOL(:) +TYPE(GEOM_TYPE),POINTER :: G + +END MODULE TPM_GEOMETRY diff --git a/src/trans/cpu/internal/tpm_pol.F90 b/src/trans/cpu/internal/tpm_pol.F90 new file mode 100644 index 0000000..27eaa82 --- /dev/null +++ b/src/trans/cpu/internal/tpm_pol.F90 @@ -0,0 +1,123 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_POL + +! MODIFICATIONS. +! -------------- +! R. El Khatib 17-Feb-2016 Optional allocation/computation of DDC/DDD/DDE +! since they are (big and) not used in supolf. + +USE PARKIND1 ,ONLY : JPRD, JPIM + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRD),ALLOCATABLE :: DDC(:,:), DDD(:,:), DDE(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: DDA(:), DDI(:), DDH(:) + +REAL(KIND=JPRD),ALLOCATABLE :: DFA(:), DFB(:), DFF(:), DFG(:), DFI(:), DFH(:) + +CONTAINS +!====================================================================== +SUBROUTINE INI_POL(KNSMAX,LDFAST) + +INTEGER(KIND=JPIM), INTENT(IN) :: KNSMAX +LOGICAL, INTENT(IN), OPTIONAL :: LDFAST + +REAL(KIND=JPRD) :: DA,DC,DD,DE +INTEGER(KIND=JPIM) :: KKN, KKM + +INTEGER(KIND=JPIM) :: JN, JM +LOGICAL :: LLFAST + +DC(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN+KKM-1,JPRD)& + &*REAL(KKN+KKM-3,JPRD))& + &/ (REAL(2*KKN-3,JPRD)*REAL(KKN+KKM,JPRD)& + &*REAL(KKN+KKM-2,JPRD)) ) +DD(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN+KKM-1,JPRD)& + &*REAL(KKN-KKM+1,JPRD))& + &/ (REAL(2*KKN-1,JPRD)*REAL(KKN+KKM,JPRD)& + &*REAL(KKN+KKM-2,JPRD)) ) +DE(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN-KKM,JPRD))& + &/ (REAL(2*KKN-1,JPRD)*REAL(KKN+KKM,JPRD)) ) +DA(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN-KKM,JPRD)& + &*REAL(KKN+KKM,JPRD))& + &/ REAL(2*KKN-1,JPRD) ) + +IF (PRESENT(LDFAST)) THEN + LLFAST=LDFAST +ELSE + LLFAST=.FALSE. +ENDIF +IF (.NOT.LLFAST) ALLOCATE( DDC(0:KNSMAX,0:KNSMAX) ) +IF (.NOT.LLFAST) ALLOCATE( DDD(0:KNSMAX,0:KNSMAX) ) +IF (.NOT.LLFAST) ALLOCATE( DDE(0:KNSMAX,0:KNSMAX) ) + +ALLOCATE( DDA(0:KNSMAX) ) +ALLOCATE( DDI(0:KNSMAX) ) +ALLOCATE( DDH(0:KNSMAX) ) + +ALLOCATE( DFA(0:KNSMAX) ) +ALLOCATE( DFB(0:KNSMAX) ) +ALLOCATE( DFF(0:KNSMAX) ) +ALLOCATE( DFG(0:KNSMAX) ) +ALLOCATE( DFI(0:KNSMAX) ) +ALLOCATE( DFH(0:KNSMAX) ) + + +DO JN=1,KNSMAX + DFA(JN) = 1._JPRD/SQRT(REAL(JN*(JN+1),JPRD)) + DFB(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(JN*(JN+1),JPRD)) + DFF(JN) = REAL(2*JN-1,JPRD)/REAL(JN,JPRD) + DFG(JN) = REAL(JN-1,JPRD)/REAL(JN,JPRD) + DFI(JN) = REAL(JN,JPRD) + DFH(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(2*JN,JPRD)) +ENDDO + +IF (.NOT.LLFAST) THEN + DO JN=3,KNSMAX + DO JM=2,JN-1 + DDC(JM,JN) = DC(JN,JM) + DDD(JM,JN) = DD(JN,JM) + DDE(JM,JN) = DE(JN,JM) + ENDDO + ENDDO +ENDIF + +DO JN=1,KNSMAX + DDA(JN) = 1._JPRD/SQRT(REAL(JN*(JN+1),JPRD)) + DDI(JN) = REAL(JN,JPRD) + DDH(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(2*JN,JPRD)) +ENDDO + +END SUBROUTINE INI_POL + +SUBROUTINE END_POL + +IF (ALLOCATED (DDC) ) DEALLOCATE( DDC ) +IF (ALLOCATED (DDD) ) DEALLOCATE( DDD ) +IF (ALLOCATED (DDE) ) DEALLOCATE( DDE ) + +DEALLOCATE( DDA ) +DEALLOCATE( DDI ) +DEALLOCATE( DDH ) + +DEALLOCATE( DFA ) +DEALLOCATE( DFB ) +DEALLOCATE( DFF ) +DEALLOCATE( DFG ) +DEALLOCATE( DFI ) +DEALLOCATE( DFH ) + +END SUBROUTINE END_POL + +END MODULE TPM_POL diff --git a/src/trans/cpu/internal/tpm_trans.F90 b/src/trans/cpu/internal/tpm_trans.F90 new file mode 100644 index 0000000..235f292 --- /dev/null +++ b/src/trans/cpu/internal/tpm_trans.F90 @@ -0,0 +1,58 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_TRANS + +! Module to contain variables "local" to a specific call to a transform + +! +USE PARKIND1 ,ONLY : JPIM, JPRB + +IMPLICIT NONE + +SAVE + +!INTEGER_M :: NF_UV ! Number of u-v fields (spectral/fourier space) +!INTEGER_M :: NF_SCALARS ! Number of scalar fields (spectral/fourier space) +!INTEGER_M :: NF_SCDERS ! Number of fields for derivatives of scalars + ! (inverse transform, spectral/fourier space) +!INTEGER_M :: NF_OUT_LT ! Number of fields that comes out of Inverse + ! Legendre transform +INTEGER(KIND=JPIM) :: NF_SC2 ! Number of fields in "SPSC2" arrays. +INTEGER(KIND=JPIM) :: NF_SC3A ! Number of fields in "SPSC3A" arrays. +INTEGER(KIND=JPIM) :: NF_SC3B ! Number of fields in "SPSC3B" arrays. + +!LOGICAL :: LUV ! uv fields requested +!LOGICAL :: LSCALAR ! scalar fields requested +LOGICAL :: LVORGP ! vorticity requested +LOGICAL :: LDIVGP ! divergence requested +LOGICAL :: LUVDER ! E-W derivatives of U and V requested +LOGICAL :: LSCDERS ! derivatives of scalar variables are req. +LOGICAL :: LATLON ! lat-lon output requested + +!INTEGER_M :: NLEI2 ! 8*NF_UV + 2*NF_SCALARS + 2*NF_SCDERS (dimension in + ! inverse Legendre transform) +!INTEGER_M :: NLED2 ! 2*NF_FS (dimension in direct Legendre transform) + +!INTEGER_M :: NF_FS ! Total number of fields in Fourier space + +!INTEGER_M :: NF_GP ! Total number of field in grid-point space +!INTEGER_M :: NF_UV_G ! Global version of NF_UV (grid-point space) +!INTEGER_M :: NF_SCALARS_G ! Global version of NF_SCALARS (grid-point space) + +REAL(KIND=JPRB), ALLOCATABLE :: FOUBUF_IN(:) ! Fourier buffer +REAL(KIND=JPRB), ALLOCATABLE :: FOUBUF(:) ! Fourier buffer + +INTEGER(KIND=JPIM) :: NPROMA ! Blocking factor for gridpoint input/output +INTEGER(KIND=JPIM) :: NGPBLKS ! Number of NPROMA blocks + +LOGICAL :: LGPNORM = .FALSE. ! indicates whether transform is being done for gpnorm + +END MODULE TPM_TRANS diff --git a/src/trans/cpu/internal/trgtol_mod.F90 b/src/trans/cpu/internal/trgtol_mod.F90 new file mode 100644 index 0000000..8f17762 --- /dev/null +++ b/src/trans/cpu/internal/trgtol_mod.F90 @@ -0,0 +1,838 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRGTOL_MOD + +PUBLIC TRGTOL +PRIVATE TRGTOL_PROLOG, TRGTOL_COMM + +CONTAINS + +SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *TRGTOL * - head routine for transposition of grid point data from column +! structure to latitudinal. Reorganize data between +! grid point calculations and direct Fourier Transform + +!** Interface. +! ---------- +! *call* *trgtol_prolog(...) + +! Explicit arguments : +! -------------------- + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! R. El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 18-Aug-2014 from trgtol +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC +USE TPM_TRANS ,ONLY : NGPBLKS + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP +REAL(KIND=JPRB),INTENT(OUT) :: PGLAT(KF_FS,D%NLENGTF) +INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + +INTEGER(KIND=JPIM) :: ISENDCOUNT +INTEGER(KIND=JPIM) :: IRECVCOUNT +INTEGER(KIND=JPIM) :: INSEND +INTEGER(KIND=JPIM) :: INRECV +INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) +INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) +INTEGER(KIND=JPIM) :: ISEND (NPROC) +INTEGER(KIND=JPIM) :: IRECV (NPROC) +INTEGER(KIND=JPIM) :: IINDEX(D%NLENGTF) +INTEGER(KIND=JPIM) :: INDOFF(NPROC) +INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) + +CALL TRGTOL_PROLOG(KF_FS,KF_GP,KVSET,& + & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND) +CALL TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, & + & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & + & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRGTOL + +SUBROUTINE TRGTOL_PROLOG(KF_FS,KF_GP,KVSET,& + & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND) + +!**** *TRGTOL_PROLOG * - prolog for transposition of grid point data from column +! structure to latitudinal. Reorganize data between +! grid point calculations and direct Fourier Transform +! the purpose is essentially +! to compute the size of communication buffers in order to enable +! the use of automatic arrays later. + + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *call* *trgtol_prolog(...) + +! Explicit arguments : +! -------------------- + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! R. El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 18-Aug-2014 from trgtol +! ------------------------------------------------------------------ + + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, NPRTRNS, MYSETW, MYPROC, NPROC +USE TPM_TRANS ,ONLY : NGPBLKS + +USE INIGPTR_MOD ,ONLY : INIGPTR +USE PE2SET_MOD ,ONLY : PE2SET +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP +INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) + +INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDCOUNT +INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVCOUNT +INTEGER(KIND=JPIM), INTENT(OUT) :: KNSEND +INTEGER(KIND=JPIM), INTENT(OUT) :: KNRECV +INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSEND (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KRECV (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KINDEX(D%NLENGTF) +INTEGER(KIND=JPIM), INTENT(OUT) :: KNDOFF(NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) + +INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) +INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILASTLAT, JROC, IPOS, ISETB, ISETA +INTEGER(KIND=JPIM) :: ISETV, J, JFLD, JGL, JL, ISETW, INDOFFX,IBUFLENS,IBUFLENR + +! ------------------------------------------------------------------ + +!* 0. Some initializations +! -------------------- + +CALL GSTATS(1805,0) + +CALL INIGPTR(KGPTRSEND,IGPTRRECV) + +INDOFFX = 0 +IBUFLENS = 0 +IBUFLENR = 0 +KNRECV = 0 +KNSEND = 0 + +DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + +! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + KSENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS + + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,KSENDTOT(JROC)) + IF(KSENDTOT(JROC) > 0) THEN + KNSEND = KNSEND+1 + KSEND(KNSEND)=JROC + ENDIF + ENDIF + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + KRECVTOT(JROC) = IPOS*KF_FS + IF(KRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + KNRECV = KNRECV + 1 + KRECV(KNRECV)=JROC + ENDIF + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,KRECVTOT(JROC)) + + IF(IPOS > 0) THEN + KNDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+KNDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + +ENDDO + +KSENDCOUNT=0 +KRECVCOUNT=0 +DO J=1,NPROC + KSENDCOUNT=MAX(KSENDCOUNT,KSENDTOT(J)) + KRECVCOUNT=MAX(KRECVCOUNT,KRECVTOT(J)) +ENDDO + +CALL GSTATS(1805,1) + +END SUBROUTINE TRGTOL_PROLOG + +SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& + & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& + & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *TRGTOL_COMM * - transposition of grid point data from column +! structure to latitudinal. Reorganize data between +! grid point calculations and direct Fourier Transform + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *call* *trgtol(...) + +! Explicit arguments : +! -------------------- +! PGLAT - Latitudinal data ready for direct FFT (output) +! PGP - Blocked grid point data (input) + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original: 95-10-01 +! D.Dent : 97-08-04 Reorganisation to allow +! NPRTRV to differ from NPRGPEW +! : 98-06-17 add mailbox control logic (from TRLTOM) +! =99-03-29= Mats Hamrud and Deborah Salmond +! JUMP in FFT's changed to 1 +! KINDEX introduced and ZCOMBUF not used for same PE +! 01-11-23 Deborah Salmond and John Hague +! LIMP_NOOLAP Option for non-overlapping message passing +! and buffer packing +! 01-12-18 Peter Towers +! Improved vector performance of GTOL_PACK,GTOL_UNPACK +! 03-04-02 G. Radnoti: call barrier always when nproc>1 +! 08-01-01 G.Mozdzynski: cleanup +! 09-01-02 G.Mozdzynski: use non-blocking recv and send +! ------------------------------------------------------------------ + + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_WAITANY, & + & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED + +USE TPM_GEN ,ONLY : NOUT, NTRANS_SYNC_LEVEL +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL, & + & MYSETV, MYSETW, MYPROC, NPROC +USE TPM_TRANS ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS + +USE PE2SET_MOD ,ONLY : PE2SET +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP +REAL(KIND=JPRB),INTENT(OUT) :: PGLAT(KF_FS,D%NLENGTF) +INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT +INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT +INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND +INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV +INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) +INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + +REAL(KIND=JPRB), ALLOCATABLE :: ZCOMBUFS(:,:) +REAL(KIND=JPRB), ALLOCATABLE :: ZCOMBUFR(:,:) + +INTEGER(KIND=JPIM) :: IPOSPLUS(KNSEND) +INTEGER(KIND=JPIM) :: ISETW(KNSEND) +INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS,KNSEND) +INTEGER(KIND=JPIM) :: IFLDA(KF_GP,KNSEND) +INTEGER(KIND=JPIM) :: IREQ_SEND(NPROC) +INTEGER(KIND=JPIM) :: IREQ_RECV(NPROC) + +! LOCAL LOGICAL SCALARS +LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY, LLINDER +LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFIRST, ILAST, ILEN, IPOS, ISETA, ISETB, IRECV, ISETV +INTEGER(KIND=JPIM) :: ISEND, ITAG, JBLK, JFLD, JK, JL, IFLD, II, IFLDS, INS, INR +INTEGER(KIND=JPIM) :: JJ,JI,IFLDT, J + +INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) +INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) +INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF +INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2, JNR +INTEGER(KIND=JPIM) :: IFLDOFF(KF_FS) +INTEGER(KIND=JPIM) :: ISEND_FLD_START,ISEND_FLD_END +INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END +INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + +! ------------------------------------------------------------------ + +!* 0. Some initializations +! -------------------- + +ITAG = MTAGGL + +IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',0,ZHOOK_HANDLE_BAR) +ALLOCATE(ZCOMBUFS(-1:KSENDCOUNT,KNSEND)) +ALLOCATE(ZCOMBUFR(-1:KRECVCOUNT,KNRECV)) +CALL GSTATS_BARRIER(761) +IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',1,ZHOOK_HANDLE_BAR) + +IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,0) +ELSE + CALL GSTATS(804,0) +ENDIF + +IF (NTRANS_SYNC_LEVEL <= 0) THEN + ! Receive loop......................................................... + DO INR=1,KNRECV + IRECV=KRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_RECV(INR), & + & KTAG=ITAG,CDSTRING='TRGTOL_COMM: NON-BLOCKING IRECV' ) + ENDDO +ENDIF + +IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,1) +ELSE + CALL GSTATS(804,1) +ENDIF + +CALL GSTATS(1805,0) +LLINDER = .FALSE. +LLPGPUV = .FALSE. +LLPGP3A = .FALSE. +LLPGP3B = .FALSE. +LLPGP2 = .FALSE. +LLPGPONLY = .FALSE. +IF(PRESENT(KPTRGP)) LLINDER = .TRUE. +IF(PRESENT(PGP)) LLPGPONLY = .TRUE. +IF(PRESENT(PGPUV)) LLPGPUV = .TRUE. +IF(PRESENT(PGP3A)) LLPGP3A = .TRUE. +IF(PRESENT(PGP3B)) LLPGP3B = .TRUE. +IF(PRESENT(PGP2)) LLPGP2 = .TRUE. + +IUVPAR=0 +IUVLEV=0 +IOFF1=0 +IOFFNS=KF_SCALARS_G +IOFFEW=2*KF_SCALARS_G + +LLUV(:) = .FALSE. +IUVPARS(:) = -99 +IUVLEVS(:) = -99 +IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF +ENDIF + +LLGP2(:)=.FALSE. +IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF +ENDIF + +LLGP3A(:) = .FALSE. +IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF +ENDIF + +LLGP3B(:) = .FALSE. +IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF +ENDIF + +CALL GSTATS(1805,1) + + +! Copy local contribution + +IF(KSENDTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = KGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + CALL GSTATS(1601,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ENDDO + ELSE + WRITE(NOUT,*)'TRGTOL_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRGTOL_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1601,1) + +ENDIF + + +! Now overlapping buffer packing/unpacking with sends/waits +! Time as if all communications to avoid double accounting + +IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,0) +ELSE + CALL GSTATS(804,0) +ENDIF + +!....Pack+send loop......................................................... + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JBLK,IFIRST,ILAST,& +!$OMP& INS,ISEND,ISETA,ISETB,ISETV,IFLD,IPOS,JFLD) +DO INS=1,KNSEND + ISEND=KSEND(INS) + CALL PE2SET(ISEND,ISETA,ISETB,ISETW(INS),ISETV) + IFLD = 0 + IPOS = 0 + IPOSPLUS(INS)=0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD,INS)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,ISETW(INS)) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,ISETW(INS)) + IJPOS(JBLK,INS)=IPOS + IPOSPLUS(INS)=IPOSPLUS(INS)+(ILAST-IFIRST+1) + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = IFLD + +ENDDO +!$OMP END PARALLEL DO + +DO INS=1,KNSEND + ISEND=KSEND(INS) + IPOS=IPOSPLUS(INS) + + ISEND_FLD_START=ZCOMBUFS(-1,INS) + ISEND_FLD_END = ZCOMBUFS(0,INS) + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,JBLK,IFIRST,ILAST,JK,JJ,JI) + DO JJ=ISEND_FLD_START,ISEND_FLD_END + IFLDT=IFLDA(JJ,INS) + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,ISETW(INS)) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,ISETW(INS)) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,KPTRGP(IFLDT),JBLK) + ENDDO + ELSE + IF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) + ENDDO + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO +!$OMP END PARALLEL DO + + IF (NTRANS_SYNC_LEVEL <= 1) THEN + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_SEND(INS), & + & KTAG=ITAG,CDSTRING='TRGTOL_COMM: NON-BLOCKING ISEND' ) + ELSE + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_BLOCKING_BUFFERED, & + & KTAG=ITAG,CDSTRING='TRGTOL_COMM: BLOCKING BUFFERED BSEND' ) + ENDIF + +ENDDO + +! Unpack loop......................................................... + +DO JNR=1,KNRECV + + IF (NTRANS_SYNC_LEVEL <= 0) THEN + CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV),KINDEX=INR,& + & CDSTRING='TRGTOL_COMM: WAIT FOR ANY RECEIVES') + ELSE + INR = JNR + IRECV=KRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_BLOCKING_STANDARD, & + & KTAG=ITAG,CDSTRING='TRGTOL_COMM: BLOCKING RECV' ) + ENDIF + + IRECV=KRECV(INR) + ILEN = KRECVTOT(IRECV)/KF_FS + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD) + DO JL=1,ILEN + II = KINDEX(KNDOFF(IRECV)+JL) + DO JFLD=IRECV_FLD_START,IRECV_FLD_END + PGLAT(JFLD,II) = ZCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) + ENDDO + ENDDO +!$OMP END PARALLEL DO + IPOS = ILEN*(IRECV_FLD_END-IRECV_FLD_START+1) +ENDDO + +IF (NTRANS_SYNC_LEVEL <= 1) THEN + IF(KNSEND > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ_SEND(1:KNSEND),CDSTRING='TRGTOL_COMM: WAIT FOR ISENDS') + ENDIF +ENDIF + +IF (NTRANS_SYNC_LEVEL >= 1) THEN + CALL MPL_BARRIER(CDSTRING='TRGTOL_COMM: BARRIER AT END') +ENDIF + +IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,1) +ELSE + CALL GSTATS(804,1) +ENDIF + +DEALLOCATE(ZCOMBUFR) +DEALLOCATE(ZCOMBUFS) +CALL GSTATS_BARRIER2(761) + +END SUBROUTINE TRGTOL_COMM + +END MODULE TRGTOL_MOD diff --git a/src/trans/cpu/internal/trltog_mod.F90 b/src/trans/cpu/internal/trltog_mod.F90 new file mode 100644 index 0000000..c6654c1 --- /dev/null +++ b/src/trans/cpu/internal/trltog_mod.F90 @@ -0,0 +1,833 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRLTOG_MOD + +PUBLIC TRLTOG +PRIVATE TRLTOG_PROLOG, TRLTOG_COMM + +CONTAINS + +SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *TRLTOG * - head routine for transposition of grid point data from latitudinal +! to column structure (this takes place between inverse +! FFT and grid point calculations) +! TRLTOG is the inverse of TRGTOL + +!** Interface. +! ---------- +! *call* *TRLTOG(...) + +! Explicit arguments : +! -------------------- +! PGLAT - Latitudinal data ready for direct FFT (input) +! PGP - Blocked grid point data (output) +! KVSET - "v-set" for each field (input) + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! R. El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 18-Aug-2014 from trltog +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC +USE TPM_TRANS ,ONLY : NGPBLKS + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G +REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) +INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(KF_GP) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) + +INTEGER(KIND=JPIM) :: ISENDCOUNT +INTEGER(KIND=JPIM) :: IRECVCOUNT +INTEGER(KIND=JPIM) :: INSEND +INTEGER(KIND=JPIM) :: INRECV +INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) +INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) +INTEGER(KIND=JPIM) :: ISEND (NPROC) +INTEGER(KIND=JPIM) :: IRECV (NPROC) +INTEGER(KIND=JPIM) :: IINDEX(D%NLENGTF) +INTEGER(KIND=JPIM) :: INDOFF(NPROC) +INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) +INTEGER(KIND=JPIM) :: ISETAL(NPROC), ISETBL(NPROC), ISETWL(NPROC), ISETVL(NPROC) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('TRLTOG',0,ZHOOK_HANDLE) + +CALL TRLTOG_PROLOG(KF_FS,KF_GP,KVSET,& + & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & + & ISETAL,ISETBL,ISETWL,ISETVL) +CALL TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, & + & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & + & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2,& + & ISETAL,ISETBL,ISETWL,ISETVL) + +IF (LHOOK) CALL DR_HOOK('TRLTOG',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRLTOG + +SUBROUTINE TRLTOG_PROLOG(KF_FS,KF_GP,KVSET,& + & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND, & + & KSETAL,KSETBL,KSETWL,KSETVL) + +!**** *TRLTOG_PROLOG * - prolog for transposition of grid point data from latitudinal +! to column structure (this takes place between inverse +! FFT and grid point calculations) : the purpose is essentially +! to compute the size of communication buffers in order to enable +! the use of automatic arrays later. +! TRLTOG_PROLOG is the inverse of TRGTOL_PROLOG + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *call* *TRLTOG_PROLOG(...) + +! Explicit arguments : +! -------------------- +! KVSET - "v-set" for each field (input) + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! R. El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 18-Aug-2014 from trltog +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW, NPRTRNS, MYPROC, NPROC +USE TPM_TRANS ,ONLY : NGPBLKS + +USE INIGPTR_MOD ,ONLY : INIGPTR +USE PE2SET_MOD ,ONLY : PE2SET +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP +INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) + +INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDCOUNT +INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVCOUNT +INTEGER(KIND=JPIM), INTENT(OUT) :: KNSEND +INTEGER(KIND=JPIM), INTENT(OUT) :: KNRECV +INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSEND (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KRECV (NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KINDEX(D%NLENGTF) +INTEGER(KIND=JPIM), INTENT(OUT) :: KNDOFF(NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSETAL(NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSETBL(NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSETWL(NPROC) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSETVL(NPROC) + +INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) +INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILASTLAT, IPOS, ISETA, ISETB, ISETV +INTEGER(KIND=JPIM) :: ISEND, JFLD, JGL, JL, ISETW, JROC, J +INTEGER(KIND=JPIM) :: INDOFFX,IBUFLENS,IBUFLENR + +! ------------------------------------------------------------------ + +!* 0. Some initializations +! -------------------- + +CALL GSTATS(1806,0) + +CALL INIGPTR(KGPTRSEND,IGPTRRECV) + +INDOFFX = 0 +IBUFLENS = 0 +IBUFLENR = 0 +KNRECV = 0 +KNSEND = 0 + +DO JROC=1,NPROC + + CALL PE2SET(JROC,KSETAL(JROC),KSETBL(JROC),KSETWL(JROC),KSETVL(JROC)) + ISEND = JROC + ISETA=KSETAL(JROC) + ISETB=KSETBL(JROC) + ISETW=KSETWL(JROC) + ISETV=KSETVL(JROC) +! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + KRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF(KRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + KNRECV = KNRECV + 1 + KRECV(KNRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,KRECVTOT(JROC)) + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + KSENDTOT(JROC) = IPOS*KF_FS + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,KSENDTOT(JROC)) + IF(KSENDTOT(JROC) > 0) THEN + KNSEND = KNSEND+1 + KSEND(KNSEND)=JROC + ENDIF + ENDIF + + IF(IPOS > 0) THEN + KNDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+KNDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF +ENDDO + +KSENDCOUNT=0 +KRECVCOUNT=0 +DO J=1,NPROC + KSENDCOUNT=MAX(KSENDCOUNT,KSENDTOT(J)) + KRECVCOUNT=MAX(KRECVCOUNT,KRECVTOT(J)) +ENDDO + +CALL GSTATS(1806,1) + +END SUBROUTINE TRLTOG_PROLOG + +SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& + & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& + & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2, & + & KSETAL, KSETBL,KSETWL,KSETVL) + + +!**** *trltog * - transposition of grid point data from latitudinal +! to column structure. This takes place between inverse +! FFT and grid point calculations. +! TRLTOG_COMM is the inverse of TRGTOL + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *call* *trltog(...) + +! Explicit arguments : +! -------------------- +! PGLAT - Latitudinal data ready for direct FFT (input) +! PGP - Blocked grid point data (output) +! KVSET - "v-set" for each field (input) + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! D.Dent : 97-08-04 Reorganisation to allow NPRTRV +! to differ from NPRGPEW +! =99-03-29= Mats Hamrud and Deborah Salmond +! JUMP in FFT's changed to 1 +! KINDEX introduced and ZCOMBUF not used for same PE +! 01-11-23 Deborah Salmond and John Hague +! LIMP_NOOLAP Option for non-overlapping message passing +! and buffer packing +! 01-12-18 Peter Towers +! Improved vector performance of LTOG_PACK,LTOG_UNPACK +! 03-0-02 G. Radnoti: Call barrier always when nproc>1 +! 08-01-01 G.Mozdzynski: cleanup +! 09-01-02 G.Mozdzynski: use non-blocking recv and send +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_WAITANY, & + & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED + +USE TPM_GEN ,ONLY : NOUT, NTRANS_SYNC_LEVEL +USE TPM_DISTR ,ONLY : D, MYSETV, MYSETW, MTAGLG, & + & NPRCIDS, NPRTRNS, MYPROC, NPROC +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NGPBLKS + +USE PE2SET_MOD ,ONLY : PE2SET +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS,KF_GP +REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) +INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(KF_GP) +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT +INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT +INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND +INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV +INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) +INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +INTEGER(KIND=JPIM), INTENT(IN) :: KSETAL(NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KSETBL(NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KSETWL(NPROC) +INTEGER(KIND=JPIM), INTENT(IN) :: KSETVL(NPROC) + +! LOCAL VARIABLES + +REAL(KIND=JPRB), ALLOCATABLE :: ZCOMBUFS(:,:) +REAL(KIND=JPRB), ALLOCATABLE :: ZCOMBUFR(:,:) + +INTEGER(KIND=JPIM) :: IPOSPLUS(KNRECV) +INTEGER(KIND=JPIM) :: ISETW(KNRECV) +INTEGER(KIND=JPIM) :: JPOS(NGPBLKS,KNRECV) +INTEGER(KIND=JPIM) :: IFLDA(KF_GP,KNRECV) +INTEGER(KIND=JPIM) :: IREQ_SEND(NPROC) +INTEGER(KIND=JPIM) :: IREQ_RECV(NPROC) + +INTEGER(KIND=JPIM) :: IFIRST, IFLD, ILAST, IPOS, ISETA, ISETB, IRECV, ISETV +INTEGER(KIND=JPIM) :: ISEND, ITAG, JBLK, JFLD, JK, JL, IFLDS, IPROC,JROC, INR, INS +INTEGER(KIND=JPIM) :: II,ILEN,IBUFLENS,IBUFLENR, IFLDT, JI, JJ, J + +LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY +LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) +LOGICAL :: LLINDER +INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) +INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) +INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF +INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2, JNR +INTEGER(KIND=JPIM) :: IFLDOFF(KF_FS) +INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END +INTEGER(KIND=JPIM) :: ISEND_FLD_START,ISEND_FLD_END +INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) +INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + +! ------------------------------------------------------------------ + +!* 0. Some initializations +! -------------------- + +ITAG = MTAGLG + +IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',0,ZHOOK_HANDLE_BAR) +ALLOCATE(ZCOMBUFS(-1:KSENDCOUNT,KNSEND)) +ALLOCATE(ZCOMBUFR(-1:KRECVCOUNT,KNRECV)) +CALL GSTATS_BARRIER(762) +IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',1,ZHOOK_HANDLE_BAR) + +CALL GSTATS(805,0) + +IF (NTRANS_SYNC_LEVEL <= 0) THEN + !...Receive loop......................................................... + DO INR=1,KNRECV + IRECV=KRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_RECV(INR), & + & KTAG=ITAG,CDSTRING='TRLTOG_COMM: NON-BLOCKING IRECV' ) + ENDDO +ENDIF + +CALL GSTATS(805,1) + +CALL GSTATS(1806,0) +LLINDER = .FALSE. +LLPGPUV = .FALSE. +LLPGP3A = .FALSE. +LLPGP3B = .FALSE. +LLPGP2 = .FALSE. +LLPGPONLY = .FALSE. +IF(PRESENT(KPTRGP)) LLINDER = .TRUE. +IF(PRESENT(PGP)) LLPGPONLY=.TRUE. +IF(PRESENT(PGPUV)) LLPGPUV=.TRUE. +IF(PRESENT(PGP3A)) LLPGP3A=.TRUE. +IF(PRESENT(PGP3B)) LLPGP3B=.TRUE. +IF(PRESENT(PGP2)) LLPGP2=.TRUE. + +IUVPAR=0 +IUVLEV=0 +IOFF1=0 +IOFFNS=KF_SCALARS_G +IOFFEW=2*KF_SCALARS_G + +LLUV(:) = .FALSE. +IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF +ENDIF + +LLGP2(:)=.FALSE. +IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF +ENDIF + +LLGP3A(:) = .FALSE. +IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF +ENDIF + +LLGP3B(:) = .FALSE. +IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF +ENDIF + +CALL GSTATS(1806,1) + + +! Copy local contribution +IF( KRECVTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = KGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + CALL GSTATS(1604,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ELSE + WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1604,1) + +ENDIF + +! +! loop over the number of processors we need to communicate with. +! NOT MYPROC +! +! Now overlapping buffer packing/unpacking with sends/waits +! Time as if all communications to avoid double accounting + +CALL GSTATS(805,0) + +! Pack+send loop......................................................... + +ISEND_FLD_START = 1 +ISEND_FLD_END = KF_FS +DO INS=1,KNSEND + ISEND=KSEND(INS) + ILEN = KSENDTOT(ISEND)/KF_FS +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JL,II) + DO JL=1,ILEN + II = KINDEX(KNDOFF(ISEND)+JL) + DO JFLD=ISEND_FLD_START,ISEND_FLD_END + ZCOMBUFS((JFLD-ISEND_FLD_START)*ILEN+JL,INS) = PGLAT(JFLD,II) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = KF_FS + IF (NTRANS_SYNC_LEVEL <= 1) THEN + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND),& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_SEND(INS), & + & KTAG=ITAG,CDSTRING='TRLTOG_COMM: NON-BLOCKING ISEND') + ELSE + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND),& + & KMP_TYPE=JP_BLOCKING_BUFFERED, & + & KTAG=ITAG,CDSTRING='TRLTOG_COMM: BLOCKING BUFFERED BSEND') + ENDIF +ENDDO + +! Unpack loop......................................................... + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INR,IRECV,ISETA,ISETB,ISETV,IFLD,JFLD,IPOS,JBLK,IFIRST,ILAST) +DO INR=1,KNRECV + IRECV=KRECV(INR) + + ISETA=KSETAL(IRECV) + ISETB=KSETBL(IRECV) + ISETW(INR)=KSETWL(IRECV) + ISETV=KSETVL(IRECV) + IFLD = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD,INR)=JFLD + ENDIF + ENDDO + IPOS = 0 + IPOSPLUS(INR)=0 + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,ISETW(INR)) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,ISETW(INR)) + JPOS(JBLK,INR)=IPOS + IPOSPLUS(INR)=IPOSPLUS(INR)+(ILAST-IFIRST+1) + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO +ENDDO +!$OMP END PARALLEL DO + +DO JNR=1,KNRECV + + IF (NTRANS_SYNC_LEVEL <= 0) THEN + CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV),KINDEX=INR,& + & CDSTRING='TRLTOG_COMM: WAIT FOR ANY RECEIVES') + ELSE + INR = JNR + IRECV=KRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_BLOCKING_STANDARD, & + & KTAG=ITAG,CDSTRING='TRLTOG_COMM: BLOCKING RECV' ) + ENDIF + + IPOS=IPOSPLUS(INR) + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,IFIRST,ILAST,JK,JJ,JI,JBLK) + DO JJ=IRECV_FLD_START,IRECV_FLD_END + IFLDT=IFLDA(JJ,INR) + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,ISETW(INR)) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,ISETW(INR)) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 + PGP(JK,KPTRGP(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 + PGP(JK,IFLDT,JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO +!$OMP END PARALLEL DO +ENDDO + +IF (NTRANS_SYNC_LEVEL <= 1) THEN + IF(KNSEND > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ_SEND(1:KNSEND),CDSTRING='TRLTOG_COMM: WAIT FOR ISENDS') + ENDIF +ENDIF + +IF (NTRANS_SYNC_LEVEL >= 1) THEN + CALL MPL_BARRIER(CDSTRING='TRLTOG_COMM: BARRIER AT END') +ENDIF + +CALL GSTATS(805,1) + +DEALLOCATE(ZCOMBUFR) +DEALLOCATE(ZCOMBUFS) +CALL GSTATS_BARRIER2(762) + +END SUBROUTINE TRLTOG_COMM +END MODULE TRLTOG_MOD diff --git a/src/trans/cpu/internal/trltom_mod.F90 b/src/trans/cpu/internal/trltom_mod.F90 new file mode 100644 index 0000000..128efb2 --- /dev/null +++ b/src/trans/cpu/internal/trltom_mod.F90 @@ -0,0 +1,149 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRLTOM_MOD +CONTAINS +SUBROUTINE TRLTOM(PFBUF_IN,PFBUF,KFIELD) + +!**** *TRLTOM * - transposition in Fourierspace + +! Purpose. +! -------- +! Transpose Fourier coefficients from partitioning +! over latitudes to partitioning over wave numbers +! This is done between inverse Legendre Transform +! and inverse FFT. +! This is the inverse routine of TRMTOL. + +!** Interface. +! ---------- +! *CALL* *TRLTOM(...)* + +! Explicit arguments : PFBUF - Fourier coefficient buffer. It is +! -------------------- used for both input and output. + +! KFIELD - Number of fields communicated + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified : 97-06-18 G. Mozdzynski - control MPI mailbox use +! (NCOMBFLEN) for nphase.eq.1 +! Modified : 99-05-28 D.Salmond - Optimise copies. +! Modified : 00-02-02 M.Hamrud - Remove NPHASE +! D.Salmond : 01-11-23 LIMP_NOOLAP Option for non-overlapping message +! passing and buffer packing +! G.Mozdzynski : 08-01-01 Cleanup +! Y.Seity : 07-08-30 Add barrier synchonisation under LSYNC_TRANS +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK, MPL_WAIT, JP_NON_BLOCKING_STANDARD + +USE TPM_DISTR ,ONLY : D, MTAGLM, MYSETW, NPRTRW, NPROC +USE TPM_GEN ,ONLY : LSYNC_TRANS + +!USE SET2PE_MOD +!USE MYSENDSET_MOD +!USE MYRECVSET_MOD +!USE ABORT_TRANS_MOD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRB) ,INTENT(INOUT) :: PFBUF(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PFBUF_IN(:) + +INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) + +INTEGER(KIND=JPIM) :: ITAG, J, ILEN, ISTA + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR2 + +REAL(KIND=JPRB) :: ZDUM(1) +INTEGER(KIND=JPIM) :: IREQ + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('TRLTOM',0,ZHOOK_HANDLE) + + +ITAG = MTAGLM + +DO J=1,NPRTRW + ILENS(J) = D%NLTSGTB(J)*KFIELD + IOFFS(J) = D%NSTAGT1B(D%MSTABF(J))*KFIELD + ILENR(J) = D%NLTSFTB(J)*KFIELD + IOFFR(J) = D%NSTAGT1B(J)*KFIELD +ENDDO + +IF(NPROC > 1) THEN + IF (LHOOK) CALL DR_HOOK('TRLTOM_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(763) + IF (LHOOK) CALL DR_HOOK('TRLTOM_BAR',1,ZHOOK_HANDLE_BAR) + CALL GSTATS(806,0) +! IF (LSYNC_TRANS) THEN +! CALL MPL_BARRIER(CDSTRING='TRLTOM:') +! ENDIF + + CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN,KSENDCOUNTS=ILENS,& + & PRECVBUF=PFBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & KCOMM=MPL_ALL_MS_COMM,CDSTRING='TRLTOM:') +!Faster on Cray - because of peculiarity of their MPICH +! CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN,KSENDCOUNTS=ILENS,& +! & PRECVBUF=PFBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& +! & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ,& +! & KCOMM=MPL_ALL_MS_COMM,CDSTRING='TRLTOM:') +! CALL MPL_WAIT(KREQUEST=IREQ,CDSTRING='TRLTOM: WAIT') + + CALL GSTATS(806,1) + IF (LHOOK) CALL DR_HOOK('TRLTOM_BAR2',0,ZHOOK_HANDLE_BAR2) + CALL GSTATS_BARRIER2(763) + IF (LHOOK) CALL DR_HOOK('TRLTOM_BAR2',1,ZHOOK_HANDLE_BAR2) +ELSE + ILEN = D%NLTSGTB(MYSETW)*KFIELD + ISTA = D%NSTAGT1B(MYSETW)*KFIELD+1 + CALL GSTATS(1607,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(J) + DO J=ISTA,ISTA+ILEN-1 + PFBUF(J) = PFBUF_IN(J) + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1607,1) +ENDIF + +IF (LHOOK) CALL DR_HOOK('TRLTOM',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +END SUBROUTINE TRLTOM +END MODULE TRLTOM_MOD diff --git a/src/trans/cpu/internal/trmtol_mod.F90 b/src/trans/cpu/internal/trmtol_mod.F90 new file mode 100644 index 0000000..34c5e35 --- /dev/null +++ b/src/trans/cpu/internal/trmtol_mod.F90 @@ -0,0 +1,155 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRMTOL_MOD + +CONTAINS +SUBROUTINE TRMTOL(PFBUF_IN,PFBUF,KFIELD) + +!**** *trmtol * - transposition in Fourier space + +! Purpose. +! -------- +! Transpose Fourier buffer data from partitioning +! over wave numbers to partitioning over latitudes. +! It is called between direct FFT and direct Legendre +! transform. +! This routine is the inverse of TRLTOM. + + +!** Interface. +! ---------- +! *call* *trmtol(...)* + +! Explicit arguments : PFBUF - Fourier coefficient buffer. It is +! -------------------- used for both input and output. +! KFIELD - Number of fields communicated + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified : 97-06-17 G. Mozdzynski - control MPI mailbox use +! (NCOMBFLEN) for nphase.eq.1 +! Modified : 99-05-28 D.Salmond - Optimise copies. +! Modified : 00-02-02 M.Hamrud - Remove NPHASE +! D.Salmond : 01-11-23 LIMP_NOOLAP Option for non-overlapping message +! passing and buffer packing +! G.Mozdzynski: 08-01-01 Cleanup +! Y.Seity : 07-08-31 add barrien synchronisation under LSYNC_TRANS +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_WAIT, JP_NON_BLOCKING_STANDARD + +USE TPM_DISTR ,ONLY : D, MTAGML, MYSETW, NPRTRW, NPROC +USE TPM_GEN ,ONLY : LSYNC_TRANS + +!USE SET2PE_MOD +!USE MYSENDSET_MOD +!USE MYRECVSET_MOD +!USE ABORT_TRANS_MOD +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRB) ,INTENT(INOUT) :: PFBUF(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PFBUF_IN(:) + +INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) + +INTEGER(KIND=JPIM) :: ITAG, J, ILEN, ISTA + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR2 + +REAL(KIND=JPRB) :: ZDUM(1) +INTEGER(KIND=JPIM) :: IREQ + + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('TRMTOL',0,ZHOOK_HANDLE) + + +ITAG = MTAGML + +DO J=1,NPRTRW + ILENS(J) = D%NLTSFTB(J)*KFIELD + IOFFS(J) = D%NSTAGT0B(J)*KFIELD + ILENR(J) = D%NLTSGTB(J)*KFIELD + IOFFR(J) = D%NSTAGT0B(D%MSTABF(J))*KFIELD +ENDDO + +IF(NPROC > 1) THEN + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(764) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR',1,ZHOOK_HANDLE_BAR) +! IF (LSYNC_TRANS) THEN +! CALL MPL_BARRIER(CDSTRING='TRMTOL') +! ENDIF + + CALL GSTATS(807,0) + CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN,KSENDCOUNTS=ILENS,& + & PRECVBUF=PFBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & KCOMM=MPL_ALL_MS_COMM,CDSTRING='TRMTOL:') +!Faster on Cray - because of peculiarity of their MPICH +! CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN,KSENDCOUNTS=ILENS,& +! & PRECVBUF=PFBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& +! & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ,& +! & KCOMM=MPL_ALL_MS_COMM,CDSTRING='TRMTOL:') +! CALL MPL_WAIT(KREQUEST=IREQ,CDSTRING='TRMTOL: WAIT') + + CALL GSTATS(807,1) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR2',0,ZHOOK_HANDLE_BAR2) + CALL GSTATS_BARRIER2(764) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR2',1,ZHOOK_HANDLE_BAR2) +ELSE + ILEN = D%NLTSGTB(MYSETW)*KFIELD + ISTA = D%NSTAGT0B(MYSETW)*KFIELD+1 + CALL GSTATS(1608,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(J) + DO J=ISTA,ISTA+ILEN-1 + PFBUF(J) = PFBUF_IN(J) + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1608,1) +ENDIF + + +IF (LHOOK) CALL DR_HOOK('TRMTOL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRMTOL +END MODULE TRMTOL_MOD diff --git a/src/trans/cpu/internal/updsp_mod.F90 b/src/trans/cpu/internal/updsp_mod.F90 new file mode 100644 index 0000000..e6136b2 --- /dev/null +++ b/src/trans/cpu/internal/updsp_mod.F90 @@ -0,0 +1,166 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSP_MOD +CONTAINS +SUBROUTINE UPDSP(KM,KF_UV,KF_SCALARS,POA1,POA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *UPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL UPDSP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR ,ONLY : D + +USE UPDSPB_MOD ,ONLY : UPDSPB +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS + +REAL(KIND=JPRB) , INTENT(IN) :: POA1(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: POA2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,JFLD,IFLD,IDIM1,IDIM3,J3 + + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + + +!* 1.1 VORTICITY AND DIVERGENCE. + +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL UPDSPB(KM,KF_UV,POA2(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL UPDSPB(KM,KF_UV,POA2(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) + IF (KM == 0) THEN + IF(PRESENT(KFLDPTRUV)) THEN + DO JFLD=1,KF_UV + IFLD = KFLDPTRUV(JFLD) + PSPVOR(IFLD,D%NASM0(0)) = 0.0_JPRB + PSPDIV(IFLD,D%NASM0(0)) = 0.0_JPRB + ENDDO + ELSE + DO JFLD=1,KF_UV + PSPVOR(JFLD,D%NASM0(0)) = 0.0_JPRB + PSPDIV(JFLD,D%NASM0(0)) = 0.0_JPRB + ENDDO + ENDIF + ENDIF +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL UPDSPB(KM,KF_SCALARS,POA1(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL UPDSPB(KM,IDIM1,POA1(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPB(KM,IDIM1,POA1(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPB(KM,IDIM1,POA1(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSP +END MODULE UPDSP_MOD diff --git a/src/trans/cpu/internal/updspad_mod.F90 b/src/trans/cpu/internal/updspad_mod.F90 new file mode 100644 index 0000000..60f8cf3 --- /dev/null +++ b/src/trans/cpu/internal/updspad_mod.F90 @@ -0,0 +1,178 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPAD_MOD +CONTAINS +SUBROUTINE UPDSPAD(KM,KF_UV,KF_SCALARS,POA1,POA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *UPDSPAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL UPDSPAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPADB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPADB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR ,ONLY : D + +USE UPDSPBAD_MOD ,ONLY : UPDSPBAD +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS + +REAL(KIND=JPRB) , INTENT(OUT) :: POA1(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: POA2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND, JN, ISE,IFLD,JFLD +INTEGER(KIND=JPIM) :: IDIM1,IDIM3,J3 + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + + +!* 1.1 VORTICITY AND DIVERGENCE. + +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + IF (KM == 0) THEN + IF(PRESENT(KFLDPTRUV)) THEN + DO JFLD=1,KF_UV + IFLD = KFLDPTRUV(JFLD) + PSPVOR(IFLD,D%NASM0(0)) = 0.0_JPRB + PSPDIV(IFLD,D%NASM0(0)) = 0.0_JPRB + ENDDO + DO JN=0,R%NSMAX + ISE = 1+JN*2+1 + DO JFLD=1,KF_UV + IFLD = KFLDPTRUV(JFLD) + PSPDIV(IFLD,ISE) = 0.0_JPRB + PSPVOR(IFLD,ISE) = 0.0_JPRB + ENDDO + ENDDO + ELSE + PSPVOR(:,D%NASM0(0)) = 0.0_JPRB + PSPDIV(:,D%NASM0(0)) = 0.0_JPRB + DO JN=0,R%NSMAX + ISE = 1+JN*2+1 + PSPDIV(:,ISE) = 0.0_JPRB + PSPVOR(:,ISE) = 0.0_JPRB + ENDDO + ENDIF + ENDIF + CALL UPDSPBAD(KM,KF_UV,POA2(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL UPDSPBAD(KM,KF_UV,POA2(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL UPDSPBAD(KM,KF_SCALARS,POA1(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL UPDSPBAD(KM,IDIM1,POA1(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPBAD(KM,IDIM1,POA1(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPBAD(KM,IDIM1,POA1(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSPAD +END MODULE UPDSPAD_MOD diff --git a/src/trans/cpu/internal/updspb_mod.F90 b/src/trans/cpu/internal/updspb_mod.F90 new file mode 100644 index 0000000..28070b8 --- /dev/null +++ b/src/trans/cpu/internal/updspb_mod.F90 @@ -0,0 +1,155 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPB_MOD +CONTAINS +SUBROUTINE UPDSPB(KM,KFIELD,POA,PSPEC,KFLDPTR) + + +!**** *UPDSPB* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL UPDSPB(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +!USE TPM_FIELDS +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(IN) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN, ISMAX, ITMAX, IASM0,IFLD + + +! ------------------------------------------------------------------ + +!* 0. NOTE. +! ----- + +! The following transfer reads : +! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) +! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) +! with n from m to NSMAX +! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. +! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) +! nn is the loop index. + + + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +ISMAX = R%NSMAX +ITMAX = R%NTMAX +IASM0 = D%NASM0(KM) + +!* 1.1 KM=0 + +IF(KM == 0) THEN + IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + IFLD = KFLDPTR(JFLD) + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+(ITMAX+2-JN)*2 + PSPEC(IFLD,INM) = POA(JN,IR) + PSPEC(IFLD,INM+1) = 0.0_JPRB + ENDDO + ENDDO + ELSE + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+(ITMAX+2-JN)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + PSPEC(JFLD,INM) = POA(JN,IR) + PSPEC(JFLD,INM+1) = 0.0_JPRB + ENDDO + ENDDO + ENDIF + +!* 1.2 KM!=0 + +ELSE + IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+((ITMAX+2-JN)-KM)*2 + PSPEC(IFLD,INM) = POA(JN,IR) + PSPEC(IFLD,INM+1) = POA(JN,II) + ENDDO + ENDDO + ELSE + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+((ITMAX+2-JN)-KM)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + II = IR+1 + PSPEC(JFLD,INM) = POA(JN,IR) + PSPEC(JFLD,INM+1) = POA(JN,II) + ENDDO + ENDDO + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSPB +END MODULE UPDSPB_MOD diff --git a/src/trans/cpu/internal/updspbad_mod.F90 b/src/trans/cpu/internal/updspbad_mod.F90 new file mode 100644 index 0000000..4aef0b9 --- /dev/null +++ b/src/trans/cpu/internal/updspbad_mod.F90 @@ -0,0 +1,160 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPBAD_MOD +CONTAINS +SUBROUTINE UPDSPBAD(KM,KFIELD,POA,PSPEC,KFLDPTR) + + +!**** *UPDSPBAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL UPDSPBAD(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +!USE TPM_FIELDS +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(OUT) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN, ISMAX, ITMAX, IASM0,IFLD + + +! ------------------------------------------------------------------ + +!* 0. NOTE. +! ----- + +! The following transfer reads : +! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) +! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) +! with n from m to NSMAX +! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. +! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) +! nn is the loop index. + + + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +ISMAX = R%NSMAX +ITMAX = R%NTMAX +IASM0 = D%NASM0(KM) + + +POA(:,:) = 0.0_JPRB + +!* 1.1 KM=0 + +IF(KM == 0) THEN + IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + IFLD = KFLDPTR(JFLD) + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+(ITMAX+2-JN)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + PSPEC(IFLD,INM) = 0.0_JPRB + ENDDO + ENDDO + ELSE + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+(ITMAX+2-JN)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + POA(JN,IR) = PSPEC(JFLD,INM) + PSPEC(JFLD,INM) = 0.0_JPRB + ENDDO + ENDDO + ENDIF +!* 1.2 KM!=0 + +ELSE + IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+((ITMAX+2-JN)-KM)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + POA(JN,II) = PSPEC(IFLD,INM+1) + PSPEC(IFLD,INM) = 0.0_JPRB + PSPEC(IFLD,INM+1) = 0.0_JPRB + ENDDO + ENDDO + ELSE + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+((ITMAX+2-JN)-KM)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + II = IR+1 + POA(JN,IR) = PSPEC(JFLD,INM) + POA(JN,II) = PSPEC(JFLD,INM+1) + PSPEC(JFLD,INM) = 0.0_JPRB + PSPEC(JFLD,INM+1) = 0.0_JPRB + ENDDO + ENDDO + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSPBAD +END MODULE UPDSPBAD_MOD diff --git a/src/trans/cpu/internal/uvtvd_mod.F90 b/src/trans/cpu/internal/uvtvd_mod.F90 new file mode 100644 index 0000000..9aa0c3a --- /dev/null +++ b/src/trans/cpu/internal/uvtvd_mod.F90 @@ -0,0 +1,144 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UVTVD_MOD +CONTAINS +SUBROUTINE UVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +!**** *UVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL UVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +!USE TPM_DISTR +! + +IMPLICIT NONE + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +INTEGER(KIND=JPIM), INTENT(IN) :: KM + +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(OUT) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, ITMAX + +! LOCAL REAL SCALARS +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZN(-1:R%NTMAX+3) + + +! ------------------------------------------------------------------ + + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM = KM +ITMAX = R%NTMAX +ZN(KM-1:ITMAX+3) = F%RN(KM-1:ITMAX+3) +!* 1.1 SET N=KM-1 COMPONENT TO 0 FOR U AND V + +IN = F%NLTN(KM-1) +DO J=1,2*KFIELD + PU(IN,J) = 0.0_JPRB + PV(IN,J) = 0.0_JPRB +ENDDO + +!* 1.2 COMPUTE VORTICITY AND DIVERGENCE. + +IF(KM /= 0) THEN + DO JN=KM,ITMAX + IN = ITMAX+2-JN +!DIR$ IVDEP +!OCL NOVREC + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + PVOR(IN,IR) = -ZKM*PV(IN,II)-& + &ZN(JN)*PEPSNM(JN+1)*PU(IN-1,IR)+& + &ZN(JN+1)*PEPSNM(JN)*PU(IN+1,IR) + PVOR(IN,II) = +ZKM*PV(IN,IR)-& + &ZN(JN)*PEPSNM(JN+1)*PU(IN-1,II)+& + &ZN(JN+1)*PEPSNM(JN)*PU(IN+1,II) + PDIV(IN,IR) = -ZKM*PU(IN,II)+& + &ZN(JN)*PEPSNM(JN+1)*PV(IN-1,IR)-& + &ZN(JN+1)*PEPSNM(JN)*PV(IN+1,IR) + PDIV(IN,II) = +ZKM*PU(IN,IR)+& + &ZN(JN)*PEPSNM(JN+1)*PV(IN-1,II)-& + &ZN(JN+1)*PEPSNM(JN)*PV(IN+1,II) + ENDDO + ENDDO +ELSE + DO JN=KM,ITMAX + IN = ITMAX+2-JN + DO J=1,KFIELD + IR = 2*J-1 + PVOR(IN,IR) = -& + &ZN(JN)*PEPSNM(JN+1)*PU(IN-1,IR)+& + &ZN(JN+1)*PEPSNM(JN)*PU(IN+1,IR) + PDIV(IN,IR) = & + &ZN(JN)*PEPSNM(JN+1)*PV(IN-1,IR)-& + &ZN(JN+1)*PEPSNM(JN)*PV(IN+1,IR) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UVTVD +END MODULE UVTVD_MOD diff --git a/src/trans/cpu/internal/uvtvdad_mod.F90 b/src/trans/cpu/internal/uvtvdad_mod.F90 new file mode 100644 index 0000000..7ae4bb8 --- /dev/null +++ b/src/trans/cpu/internal/uvtvdad_mod.F90 @@ -0,0 +1,139 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UVTVDAD_MOD +CONTAINS +SUBROUTINE UVTVDAD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +!**** *UVTVDAD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL UVTVDAD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +!USE TPM_DISTR +! + +IMPLICIT NONE + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +INTEGER(KIND=JPIM), INTENT(IN) :: KM + +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, ITMAX + +! LOCAL REAL SCALARS +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZN(-1:R%NTMAX+3) + + +! ------------------------------------------------------------------ + + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM = KM +ITMAX = R%NTMAX +ZN(KM-1:ITMAX+3) = F%RN(KM-1:ITMAX+3) + +!* 1.2 COMPUTE VORTICITY AND DIVERGENCE. + +IF(KM /= 0) THEN + DO JN=KM,ITMAX + IN = ITMAX+2-JN +!DIR$ IVDEP +!OCL NOVREC + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + + PV(IN,II) = PV(IN,II)-ZKM*PVOR(IN,IR) + PU(IN-1,IR) = PU(IN-1,IR)-ZN(JN)*PEPSNM(JN+1)*PVOR(IN,IR) + PU(IN+1,IR) = PU(IN+1,IR)+ZN(JN+1)*PEPSNM(JN)*PVOR(IN,IR) + + PV(IN,IR) = PV(IN,IR)+ZKM*PVOR(IN,II) + PU(IN-1,II) = PU(IN-1,II)-ZN(JN)*PEPSNM(JN+1)*PVOR(IN,II) + PU(IN+1,II) = PU(IN+1,II)+ZN(JN+1)*PEPSNM(JN)*PVOR(IN,II) + + PU(IN,II) = PU(IN,II)-ZKM*PDIV(IN,IR) + PV(IN-1,IR) = PV(IN-1,IR)+ZN(JN)*PEPSNM(JN+1)*PDIV(IN,IR) + PV(IN+1,IR) = PV(IN+1,IR)-ZN(JN+1)*PEPSNM(JN)*PDIV(IN,IR) + + PU(IN,IR) = PU(IN,IR)+ZKM*PDIV(IN,II) + PV(IN-1,II) = PV(IN-1,II)+ZN(JN)*PEPSNM(JN+1)*PDIV(IN,II) + PV(IN+1,II) = PV(IN+1,II)-ZN(JN+1)*PEPSNM(JN)*PDIV(IN,II) + ENDDO + ENDDO +ELSE + DO JN=KM,ITMAX + IN = ITMAX+2-JN + DO J=1,KFIELD + IR = 2*J-1 + PU(IN-1,IR) = PU(IN-1,IR)-ZN(JN )*PEPSNM(JN+1)*PVOR(IN,IR) + PU(IN+1,IR) = PU(IN+1,IR)+ZN(JN+1)*PEPSNM(JN )*PVOR(IN,IR) + PV(IN-1,IR) = PV(IN-1,IR)+ZN(JN )*PEPSNM(JN+1)*PDIV(IN,IR) + PV(IN+1,IR) = PV(IN+1,IR)-ZN(JN+1)*PEPSNM(JN )*PDIV(IN,IR) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UVTVDAD +END MODULE UVTVDAD_MOD diff --git a/src/trans/cpu/internal/vd2uv_ctl_mod.F90 b/src/trans/cpu/internal/vd2uv_ctl_mod.F90 new file mode 100644 index 0000000..2adaa54 --- /dev/null +++ b/src/trans/cpu/internal/vd2uv_ctl_mod.F90 @@ -0,0 +1,81 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VD2UV_CTL_MOD +CONTAINS +SUBROUTINE VD2UV_CTL(KF_UV,PSPVOR,PSPDIV,PU,PV) + +!**** *VD2UV_CTL* - Control routine for going from vor/div to spectral U and V. + +! Purpose. +! -------- +! Control routine for computing spectral U (u*cos(theta)) and V + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) +! KF_UV - local number of spectral u-v fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PU(:,:) - U (out) +! PV(:,:) - V (out) + +! Method. +! ------- + +! Externals. +! ---------- + + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : D + +USE VD2UV_MOD ,ONLY : VD2UV + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +REAL(KIND=JPRB),INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB),INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PU(:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PV(:,:) + +INTEGER(KIND=JPIM) :: JM,IM,ILEI2 + +! ------------------------------------------------------------------ + +CALL GSTATS(102,0) +ILEI2 = 8*KF_UV + +CALL GSTATS(1647,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL VD2UV(IM,JM,KF_UV,ILEI2,PSPVOR,PSPDIV,PU,PV) +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1647,1) +CALL GSTATS(102,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE VD2UV_CTL +END MODULE VD2UV_CTL_MOD diff --git a/src/trans/cpu/internal/vd2uv_mod.F90 b/src/trans/cpu/internal/vd2uv_mod.F90 new file mode 100644 index 0000000..6b95f86 --- /dev/null +++ b/src/trans/cpu/internal/vd2uv_mod.F90 @@ -0,0 +1,155 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VD2UV_MOD +CONTAINS +SUBROUTINE VD2UV(KM,KMLOC,KF_UV,KLEI2,PSPVOR,PSPDIV,PU,PV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_CONSTANTS +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI1B_MOD ,ONLY : PRFI1B +USE VDTUV_MOD ,ONLY : VDTUV + + +!**** *VD2UV* - U and V from Vor/div +! +! Purpose. +! -------- +! +!** Interface. +! ---------- +! *CALL* *VD2UV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PU(:,:) - spectral U (out) +! PV(:,:) - spectral V (out) + + +! Implicit arguments : + +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 +! +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + +REAL(KIND=JPRB) , INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PU(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PV(:,:) + +REAL(KIND=JPRB) :: ZIA(R%NLEI1,KLEI2) +REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2),ZA_R + +INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU, JGL, JFLD,ILCM +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,II,IR,INM,J +INTEGER(KIND=JPIM) :: IFIRST, ILAST, IOFF + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('VD2UV_MOD',0,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!* 1. PREPARE ZEPSNM. +! --------------- + +CALL PREPSNM(KM,KMLOC,ZEPSNM) + +! ------------------------------------------------------------------ + + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IFIRST = 1 +ILAST = 4*KF_UV + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL PRFI1B(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV) + CALL PRFI1B(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV) + + CALL VDTUV(KM,KF_UV,ZEPSNM,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU)) + ILCM = R%NSMAX+1-KM + IOFF = D%NASM0(KM) + ZA_R = 1.0_JPRB/RA + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 + DO JFLD=1,KF_UV + IR = 2*(JFLD-1)+1 + II = IR+1 + PU(JFLD,INM ) = ZIA(J+2,IR+IUL-1)*ZA_R + PU(JFLD,INM+1) = ZIA(J+2,II+IUL-1)*ZA_R + PV(JFLD,INM ) = ZIA(J+2,IR+IVL-1)*ZA_R + PV(JFLD,INM+1) = ZIA(J+2,II+IVL-1)*ZA_R + ENDDO + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('VD2UV_MOD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE VD2UV +END MODULE VD2UV_MOD + + + + diff --git a/src/trans/cpu/internal/vdtuv_mod.F90 b/src/trans/cpu/internal/vdtuv_mod.F90 new file mode 100644 index 0000000..bd840a7 --- /dev/null +++ b/src/trans/cpu/internal/vdtuv_mod.F90 @@ -0,0 +1,149 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VDTUV_MOD +CONTAINS +SUBROUTINE VDTUV(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F + + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:),PV (:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, ISMAX,JI + +! LOCAL REAL SCALARS +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZN(-1:R%NTMAX+4) +REAL(KIND=JPRB) :: ZLAPIN(-1:R%NSMAX+4) +REAL(KIND=JPRB) :: ZEPSNM(-1:R%NSMAX+4) + + + +! ------------------------------------------------------------------ + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM = KM +ISMAX = R%NSMAX +DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + ZLAPIN(IJ) = F%RLAPIN(JN) + IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) +ENDDO +ZN(0) = F%RN(ISMAX+3) + +!* 1.1 U AND V (KM=0) . + +IF(KM == 0) THEN + DO J=1,KFIELD + IR = 2*J-1 + DO JI=2,ISMAX+3-KM + PU(JI,IR) = +& + &ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PVOR(JI+1,IR)-& + &ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PVOR(JI-1,IR) + PV(JI,IR) = -& + &ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PDIV(JI+1,IR)+& + &ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PDIV(JI-1,IR) + ENDDO + ENDDO + +!* 1.2 U AND V (KM!=0) . + +ELSE + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JI=2,ISMAX+3-KM + PU(JI,IR) = -ZKM*ZLAPIN(JI)*PDIV(JI,II)+& + &ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PVOR(JI+1,IR)-& + &ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PVOR(JI-1,IR) + PU(JI,II) = +ZKM*ZLAPIN(JI)*PDIV(JI,IR)+& + &ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PVOR(JI+1,II)-& + &ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PVOR(JI-1,II) + PV(JI,IR) = -ZKM*ZLAPIN(JI)*PVOR(JI,II)-& + &ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PDIV(JI+1,IR)+& + &ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PDIV(JI-1,IR) + PV(JI,II) = +ZKM*ZLAPIN(JI)*PVOR(JI,IR)-& + &ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PDIV(JI+1,II)+& + &ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PDIV(JI-1,II) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE VDTUV +END MODULE VDTUV_MOD + diff --git a/src/trans/cpu/internal/vdtuvad_mod.F90 b/src/trans/cpu/internal/vdtuvad_mod.F90 new file mode 100644 index 0000000..88589f0 --- /dev/null +++ b/src/trans/cpu/internal/vdtuvad_mod.F90 @@ -0,0 +1,145 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VDTUVAD_MOD +CONTAINS +SUBROUTINE VDTUVAD(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F + + +!**** *VDTUVAD* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUVAD(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(INOUT) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PU (:,:),PV (:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, ISMAX,JI + +! LOCAL REAL SCALARS +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZN(-1:R%NTMAX+4) +REAL(KIND=JPRB) :: ZLAPIN(-1:R%NSMAX+4) +REAL(KIND=JPRB) :: ZEPSNM(-1:R%NSMAX+4) + + + +! ------------------------------------------------------------------ + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM = KM +ISMAX = R%NSMAX +DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + ZLAPIN(IJ) = F%RLAPIN(JN) + IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) +ENDDO +ZN(0) = F%RN(ISMAX+3) + +!* 1.1 U AND V (KM=0) . + +IF(KM == 0) THEN + DO J=1,KFIELD + IR = 2*J-1 + DO JI=2,ISMAX+3-KM + PDIV(JI-1,IR) = PDIV(JI-1,IR)+ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PV(JI,IR) + PVOR(JI-1,IR) = PVOR(JI-1,IR)-ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PU(JI,IR) + PDIV(JI+1,IR) = PDIV(JI+1,IR)-ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PV(JI,IR) + PVOR(JI+1,IR) = PVOR(JI+1,IR)+ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PU(JI,IR) + ENDDO + ENDDO +!* 1.2 U AND V (KM!=0) . + +ELSE + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JI=2,ISMAX+3-KM + PDIV(JI-1,II) = PDIV(JI-1,II)+ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PV(JI,II) + PDIV(JI-1,IR) = PDIV(JI-1,IR)+ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PV(JI,IR) + PVOR(JI-1,II) = PVOR(JI-1,II)-ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PU(JI,II) + PVOR(JI-1,IR) = PVOR(JI-1,IR)-ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PU(JI,IR) + PDIV(JI+1,II) = PDIV(JI+1,II)-ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PV(JI,II) + PDIV(JI+1,IR) = PDIV(JI+1,IR)-ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PV(JI,IR) + PVOR(JI+1,II) = PVOR(JI+1,II)+ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PU(JI,II) + PVOR(JI+1,IR) = PVOR(JI+1,IR)+ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PU(JI,IR) + PVOR(JI,IR) = PVOR(JI,IR)+ZKM*ZLAPIN(JI)*PV(JI,II) + PVOR(JI,II) = PVOR(JI,II)-ZKM*ZLAPIN(JI)*PV(JI,IR) + PDIV(JI,IR) = PDIV(JI,IR)+ZKM*ZLAPIN(JI)*PU(JI,II) + PDIV(JI,II) = PDIV(JI,II)-ZKM*ZLAPIN(JI)*PU(JI,IR) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE VDTUVAD +END MODULE VDTUVAD_MOD diff --git a/src/trans/cpu/internal/write_legpol_mod.F90 b/src/trans/cpu/internal/write_legpol_mod.F90 new file mode 100644 index 0000000..ea17f83 --- /dev/null +++ b/src/trans/cpu/internal/write_legpol_mod.F90 @@ -0,0 +1,229 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE WRITE_LEGPOL_MOD +CONTAINS +SUBROUTINE WRITE_LEGPOL +USE PARKIND1 ,ONLY : JPIM, JPRB +USE TPM_GEN +USE TPM_DISTR +USE TPM_DIM +USE TPM_GEOMETRY +USE TPM_FLT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_CTL +USE BUTTERFLY_ALG_MOD +USE BYTES_IO_MOD + +!**** *WRITE_LEGPOL * - write out Leg.Pol. and assocciated arrays to file + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! *CALL* *WRITE_LEGPOL* + +! Explicit arguments : None +! -------------------- + +! Implicit arguments : +! -------------------- +! + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! + +! ------- +! Mats Hamrud and Willem Deconinck *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 + +IMPLICIT NONE + +INTEGER(KIND=JPIM),PARAMETER :: JPIBUFL=4 +INTEGER(KIND=JPIM) :: IRBYTES,IIBYTES,JMLOC,IPRTRV,IMLOC,IM,ILA,ILS,IFILE,JSETV +INTEGER(KIND=JPIM) :: IDGLU,ISIZE,IBYTES,IRET,IBUF(JPIBUFL),IDUM,JGL,II +INTEGER(KIND=JPIM) :: IDGLU2 +TYPE(CLONE) :: YLCLONE +REAL(KIND=JPRB) ,ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: IBUFA(:) +! ------------------------------------------------------------------ + +IRBYTES = 8 +IIBYTES = 4 +IDUM = 3141 + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_OPEN(IFILE,C%CLEGPOLFNAME,'W',IRET) + IF ( IRET < JPBYTES_IO_SUCCESS ) CALL ABORT_TRANS('WRITE_LEGPOL: BYTES_IO_OPEN FAILED') +ENDIF +IF( S%LUSEFLT ) THEN + IBUF(1:2) = TRANSFER('LEGPOLBF',IBUF(1:2)) +ELSE + IBUF(1:2) = TRANSFER('LEGPOL ',IBUF(1:2)) +ENDIF +IBUF(3) = R%NSMAX +IBUF(4) = R%NDGNH +CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +IF ( IRET < JPBYTES_IO_SUCCESS ) CALL ABORT_TRANS('WRITE_LEGPOL: BYTES_IO_WRITE FAILED') +ALLOCATE(IBUFA(2*R%NDGNH)) +II = 0 +DO JGL=1,R%NDGNH + II = II+1 + IBUFA(II) = G%NLOEN(JGL) + II=II+1 + IBUFA(II) = G%NMEN(JGL) +ENDDO +CALL BYTES_IO_WRITE(IFILE,IBUFA,2*R%NDGNH*IIBYTES,IRET) +IF ( IRET < JPBYTES_IO_SUCCESS ) CALL ABORT_TRANS('WRITE_LEGPOL: BYTES_IO_WRITE FAILED') +DEALLOCATE(IBUFA) +DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) +! Anti-symmetric + IF( S%LUSEFLT .AND. ILA > S%ITHRESHOLD) THEN + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,YLCLONE) + ISIZE = SIZE(YLCLONE%COMMSBUF) + IBUF(:) = (/IDGLU,ILA,ISIZE,IDUM/) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IIBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_WRITE(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + ISIZE = IDGLU*ILA + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(IMLOC)%RPNMA,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + ENDIF +! Symmetric + IF( S%LUSEFLT .AND. ILS > S%ITHRESHOLD) THEN + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,YLCLONE) + ISIZE = SIZE(YLCLONE%COMMSBUF) + IBUF(:) = (/IDGLU,ILS,ISIZE,IDUM/) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IIBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_WRITE(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + ISIZE = IDGLU*ILS + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(IMLOC)%RPNMS,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + ENDIF + ENDDO +ENDDO + +! Lat-lon grid + +IF(S%LDLL) THEN + IBUF(:) = TRANSFER('LATLON---BEG-BEG',IBUF(1:4)) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IDGLU2 = S%NDGNHD + IBUF(:) = (/IM,IDGLU,IDGLU2,IDUM/) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IIBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + + ISIZE = 2*IDGLU*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(JMLOC)%RPNMWI,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + + ISIZE = 2*IDGLU2*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(JMLOC)%RPNMWO,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + + ENDDO +ENDIF +!End marker +IBUF(:) = TRANSFER('LEGPOL---EOF-EOF',IBUF(1:4)) +CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +IF( IRET < JPBYTES_IO_SUCCESS ) THEN + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') +ENDIF + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_CLOSE(IFILE,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_CLOSE FAILED') + ENDIF +ENDIF + +END SUBROUTINE WRITE_LEGPOL +END MODULE WRITE_LEGPOL_MOD diff --git a/src/trans/cpu/sharedmem/sharedmem.c b/src/trans/cpu/sharedmem/sharedmem.c new file mode 100644 index 0000000..29426ce --- /dev/null +++ b/src/trans/cpu/sharedmem/sharedmem.c @@ -0,0 +1,28 @@ +/* + * (C) Copyright 2015- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + + +#include + +void sharedmem_malloc_bytes (void** ptr, size_t bytes) +{ + *ptr = malloc(bytes); +} + +void sharedmem_free(void** ptr) +{ + free(*ptr); +} + +void sharedmem_advance_bytes (void** ptr, size_t bytes) +{ + char** char_ptr = (char**)ptr; + *char_ptr += bytes; +} diff --git a/src/trans/cpu/sharedmem/sharedmem_mod.F90 b/src/trans/cpu/sharedmem/sharedmem_mod.F90 new file mode 100644 index 0000000..16bb0fc --- /dev/null +++ b/src/trans/cpu/sharedmem/sharedmem_mod.F90 @@ -0,0 +1,315 @@ +! (C) Copyright 2015- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SHAREDMEM_MOD + +! Routines to allow use of shared memery segments in Fortran + + +! Willem Deconinck and Mats Hamrud *ECMWF* +! Original : July 2015 + + +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT, C_NULL_PTR,C_SIZE_T +USE PARKIND1 ,ONLY : JPIM, JPRB ,JPRD + +#ifdef __NEC__ +#define C_SIZEOF(x) INT(KIND(x),C_SIZE_T) +#endif + +IMPLICIT NONE + +PRIVATE + +PUBLIC :: SHAREDMEM +PUBLIC :: SHAREDMEM_ALLOCATE +PUBLIC :: SHAREDMEM_MALLOC_BYTES +PUBLIC :: SHAREDMEM_CREATE +PUBLIC :: SHAREDMEM_ASSOCIATE +PUBLIC :: SHAREDMEM_ADVANCE +PUBLIC :: SHAREDMEM_DELETE + +TYPE, BIND(C) :: SHAREDMEM +! Memory buffer + TYPE(C_PTR), PRIVATE :: BEGIN=C_NULL_PTR + INTEGER(C_SIZE_T), PRIVATE :: SIZE=0 ! IN BYTES + TYPE(C_PTR), PRIVATE :: CPTR=C_NULL_PTR + INTEGER(C_SIZE_T), PRIVATE :: OFFSET=0 ! IN BYTES +END TYPE SHAREDMEM + + +INTERFACE SHAREDMEM_ASSOCIATE +! Associate fortran scalars/arrays with memory segment + MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_INT32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_REAL32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_REAL64 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_INT32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_REAL32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_REAL64 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_INT32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_REAL32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_REAL64 +END INTERFACE + + +INTERFACE + +! EXTERNAL C FUNCTIONS USED IN THIS MODULE +! ---------------------------------------- + + SUBROUTINE SHAREDMEM_ADVANCE_BYTES(CPTR,BYTES) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T + TYPE(C_PTR) :: CPTR + INTEGER(C_SIZE_T), VALUE :: BYTES + END SUBROUTINE SHAREDMEM_ADVANCE_BYTES + + SUBROUTINE SHAREDMEM_MALLOC_BYTES(PTR,BYTES) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T + TYPE(C_PTR) :: PTR + INTEGER(C_SIZE_T), VALUE :: BYTES + END SUBROUTINE SHAREDMEM_MALLOC_BYTES + + SUBROUTINE SHAREDMEM_FREE(PTR) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR + TYPE(C_PTR), INTENT(IN) :: PTR + END SUBROUTINE SHAREDMEM_FREE + +END INTERFACE + +CONTAINS +!========================================================================= +SUBROUTINE SHAREDMEM_CREATE(HANDLE,CPTR,BYTES) +! Create memory buffer object from c pointer +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T, C_F_POINTER +TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE +TYPE(C_PTR) , INTENT(IN) :: CPTR +INTEGER(C_SIZE_T), INTENT(IN) :: BYTES +!------------------------------------------------------------------------ +HANDLE%BEGIN = CPTR +HANDLE%SIZE = BYTES +HANDLE%CPTR = HANDLE%BEGIN +HANDLE%OFFSET = 0 +END SUBROUTINE SHAREDMEM_CREATE +!========================================================================= +SUBROUTINE SHAREDMEM_ALLOCATE(HANDLE,BYTES) +! Create memory buffer object from Fortran +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T +TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE +INTEGER(C_SIZE_T), INTENT(IN) :: BYTES +INTEGER(C_SIZE_T) :: SIZE +!------------------------------------------------------------------------ +SIZE = BYTES +CALL SHAREDMEM_MALLOC_BYTES(HANDLE%BEGIN,SIZE) +HANDLE%SIZE = BYTES +HANDLE%CPTR = HANDLE%BEGIN +HANDLE%OFFSET = 0 +END SUBROUTINE SHAREDMEM_ALLOCATE +!========================================================================= +SUBROUTINE SHAREDMEM_DELETE(HANDLE) +! Free memory buffer +TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE +CALL SHAREDMEM_FREE(HANDLE%BEGIN) +END SUBROUTINE SHAREDMEM_DELETE +!========================================================================= + +! PRIVATE SUBROUTINES +! ------------------- + +SUBROUTINE SHAREDMEM_ASSOCIATE0_INT32(HANDLE,VALUE,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(OUT) :: VALUE + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + INTEGER(C_INT), POINTER :: FPTR(:) + INTEGER(C_INT) :: K + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) + VALUE = FPTR(1) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(K)) + HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(K) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE0_INT32 + +SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL32(HANDLE,VALUE,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + REAL(C_FLOAT), INTENT(OUT) :: VALUE + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_FLOAT), POINTER :: FPTR(:) + REAL(C_FLOAT) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) + VALUE = FPTR(1) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(R) + + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL32 + +SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL64(HANDLE,VALUE,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + REAL(C_DOUBLE), INTENT(OUT) :: VALUE + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_DOUBLE), POINTER :: FPTR(:) + REAL(C_DOUBLE) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) + VALUE = FPTR(1) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(R) + + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL64 + +SUBROUTINE SHAREDMEM_ASSOCIATE1_INT32(HANDLE,SIZE,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: SIZE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: FPTR(:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + INTEGER(C_INT) :: K + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(K)) + HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(K) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE1_INT32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL32(HANDLE,SIZE,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: SIZE + REAL(C_FLOAT), POINTER, INTENT(INOUT) :: FPTR(:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_FLOAT) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL64(HANDLE,SIZE,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: SIZE + REAL(C_DOUBLE), POINTER, INTENT(INOUT) :: FPTR(:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_DOUBLE) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL64 + +SUBROUTINE SHAREDMEM_ASSOCIATE2_INT32(HANDLE,DIM1,DIM2,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 + INTEGER(C_INT), POINTER, INTENT(INOUT) :: FPTR(:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + INTEGER(C_INT) :: K + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(K)) + HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(K) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE2_INT32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL32(HANDLE,DIM1,DIM2,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 + REAL(C_FLOAT), POINTER, INTENT(INOUT) :: FPTR(:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_FLOAT) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL64(HANDLE,DIM1,DIM2,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 + REAL(C_DOUBLE), POINTER, INTENT(INOUT) :: FPTR(:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_DOUBLE) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL64 + +SUBROUTINE SHAREDMEM_ADVANCE(HANDLE,BYTES) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: BYTES + INTEGER(C_SIZE_T) :: SIZE + SIZE = BYTES + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE) + HANDLE%OFFSET = HANDLE%OFFSET+BYTES +END SUBROUTINE SHAREDMEM_ADVANCE + +!============================================================================ +END MODULE SHAREDMEM_MOD diff --git a/src/trans/gpu/#CMakeLists.txt# b/src/trans/gpu/#CMakeLists.txt# new file mode 100644 index 0000000..14e665e --- /dev/null +++ b/src/trans/gpu/#CMakeLists.txt# @@ -0,0 +1,124 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +## Apply workarounds for some known compilers + +if(CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC") + + # Compile setup_trans with pinned memory to improve data movement performance. + ectrans_add_compile_options( + SOURCES external/setup_trans.F90 + #FLAGS "-gpu=pinned,deepcopy,fastmath,nordc") + FLAGS "-gpu=pinned,fastmath") + # TODO: check if it is sufficient to only set "-gpu=pinned" which appends rather than overwrites + +endif() + +## Assemble sources + +ecbuild_list_add_pattern( LIST trans_src + GLOB + sharedmem/* + algor/* + internal/* + external/* + QUIET + ) + +## for reduced memory option, replace source files +if( HAVE_GPU_REDUCED_MEMORY ) + ecbuild_list_add_pattern( LIST reducedmem_files + GLOB internal_reducedmem/* QUIET ) + foreach( src_file ${reducedmem_files} ) + get_filename_component( base_name ${src_file} NAME ) + list(REMOVE_ITEM trans_src "internal/${base_name}" ) + list(APPEND trans_src ${src_file} ) + endforeach() +endif() + +ecbuild_list_exclude_pattern( LIST trans_src REGEX dilatation_mod.F90 ) + +foreach( prec sp dp ) + if( HAVE_${prec} ) + foreach( gpumethod acc) + if( HAVE_${gpumethod}) + + # We build an object library first. And then use these objects to create a shared and static library. + + ectrans_add_library( + TARGET trans_gpu_object_${prec}_${gpumethod} + TYPE OBJECT + SOURCES ${trans_src} + PUBLIC_INCLUDES $ + $ + $ + $ + $ + PUBLIC_LIBS parkind_${prec} + fiat + PRIVATE_LIBS hipfft + hipblas + MPI::MPI_Fortran + #MPI::MPI_C + MPI::MPI_CXX + #nvhpcwrapnvtx + ${LAPACK_LIBRARIES} + ) + + target_link_options ( trans_gpu_object_${prec}_${gpumethod} INTERFACE "$<$:${${gpumethod}_link_flags}>" ) + target_compile_options( trans_gpu_object_${prec}_${gpumethod} PUBLIC $<$:${${gpumethod}_flags}>) + + ectrans_target_fortran_module_directory( + TARGET trans_gpu_object_${prec}_${gpumethod} + MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/trans_gpu_${prec}_${gpumethod} + INSTALL_DIRECTORY module/trans_gpu_${prec}_${gpumethod} + ) + + if( HAVE_OMP ) + target_link_libraries( trans_gpu_object_${prec}_${gpumethod} PRIVATE OpenMP::OpenMP_Fortran ) + endif() + + if( prec STREQUAL sp ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) + endif() + + if( gpumethod STREQUAL acc ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE ACCGPU ) + target_link_libraries( trans_gpu_object_${prec}_${gpumethod} PRIVATE OpenACC::OpenACC_Fortran ) + endif() + + if( gpumethod STREQUAL omp ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE OMPGPU ) + endif() + + if( HAVE_GPU_REDUCED_MEMORY ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE REDUCED_MEM ) + endif() + + if( HAVE_GPU_AWARE_MPI ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE USE_CUDA_AWARE_MPI_FT ) + endif() + + ectrans_add_library( + TARGET trans_gpu_shared_${prec}_${gpumethod} + OUTPUT_NAME trans_gpu_${prec}_${gpumethod} + TYPE SHARED + LINKER_LANGUAGE Fortran + PUBLIC_LIBS trans_gpu_object_${prec}_${gpumethod} + ) + + ectrans_add_library( + TARGET trans_gpu_${prec}_${gpumethod} + TYPE STATIC + LINKER_LANGUAGE Fortran + PUBLIC_LIBS trans_gpu_object_${prec}_${gpumethod} + ) + endif() + endforeach() + endif() +endforeach() diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt new file mode 100644 index 0000000..2b480a2 --- /dev/null +++ b/src/trans/gpu/CMakeLists.txt @@ -0,0 +1,145 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +## Apply workarounds for some known compilers + +if(CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC") + + # Compile setup_trans with pinned memory to improve data movement performance. + ectrans_add_compile_options( + SOURCES external/setup_trans.F90 + #FLAGS "-gpu=pinned,deepcopy,fastmath,nordc") + FLAGS "-gpu=pinned,fastmath") + # TODO: check if it is sufficient to only set "-gpu=pinned" which appends rather than overwrites + +endif() + +## Assemble sources + +ecbuild_list_add_pattern( LIST trans_src + GLOB + sharedmem/* + algor/* + internal/* + external/* + QUIET + ) + +## for reduced memory option, replace source files +if( HAVE_GPU_REDUCED_MEMORY ) + ecbuild_list_add_pattern( LIST reducedmem_files + GLOB internal_reducedmem/* QUIET ) + foreach( src_file ${reducedmem_files} ) + get_filename_component( base_name ${src_file} NAME ) + list(REMOVE_ITEM trans_src "internal/${base_name}" ) + list(APPEND trans_src ${src_file} ) + endforeach() +endif() + +ecbuild_list_exclude_pattern( LIST trans_src REGEX dilatation_mod.F90 ) + +set_source_files_properties( internal/ftinv_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) +set_source_files_properties( internal/ftdir_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) + +ecbuild_list_add_pattern( LIST trans_hip_src + GLOB + sharedmem/*.hip.cpp + algor/*.hip.cpp + internal/*.hip.cpp + external/*.hip.cpp + QUIET + ) + ecbuild_warn_var(trans_hip_src) + set_source_files_properties( ${trans_hip_src} PROPERTIES COMPILE_FLAGS "--offload-arch=gfx90a -x hip" ) + + +foreach( prec sp dp ) + if( HAVE_${prec} ) + foreach( gpumethod acc omp) + if( HAVE_${gpumethod}) + + # We build an object library first. And then use these objects to create a shared and static library. + ectrans_add_library( + TARGET trans_gpu_object_${prec}_${gpumethod} + TYPE OBJECT + SOURCES ${trans_src} + PUBLIC_INCLUDES $ + $ + $ + $ + $ + PUBLIC_LIBS parkind_${prec} + fiat + PRIVATE_LIBS hip::hipfft + roc::hipblas + roc::rocblas + roc::rocfft + MPI::MPI_Fortran + mpifort + #MPI::MPI_C + MPI::MPI_CXX + #nvhpcwrapnvtx + #${LAPACK_LIBRARIES} + ) + + target_link_options ( trans_gpu_object_${prec}_${gpumethod} INTERFACE "$<$:${${gpumethod}_link_flags}>" ) + target_compile_options( trans_gpu_object_${prec}_${gpumethod} PRIVATE $<$:${${gpumethod}_flags}>) + + ectrans_target_fortran_module_directory( + TARGET trans_gpu_object_${prec}_${gpumethod} + MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/trans_gpu_${prec}_${gpumethod} + INSTALL_DIRECTORY module/trans_gpu_${prec}_${gpumethod} + ) + + if( CMAKE_Fortran_COMPILER_ID MATCHES "Cray" AND CMAKE_Fortran_COMPILER_VERSION MATCHES "15.*" ) + target_link_libraries( trans_gpu_object_${prec}_${gpumethod} PRIVATE omptarget ) + endif() + + if( HAVE_OMP ) + target_link_libraries( trans_gpu_object_${prec}_${gpumethod} PRIVATE OpenMP::OpenMP_Fortran ) + endif() + + if( prec STREQUAL sp ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) + endif() + + if( gpumethod STREQUAL acc ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE ACCGPU ) + target_link_libraries( trans_gpu_object_${prec}_${gpumethod} PRIVATE OpenACC::OpenACC_Fortran ) + endif() + + if( gpumethod STREQUAL omp ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE OMPGPU ) + endif() + + if( HAVE_GPU_REDUCED_MEMORY ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE REDUCED_MEM ) + endif() + + if( HAVE_GPU_AWARE_MPI ) + target_compile_definitions( trans_gpu_object_${prec}_${gpumethod} PRIVATE USE_CUDA_AWARE_MPI_FT ) + endif() + + ectrans_add_library( + TARGET trans_gpu_shared_${prec}_${gpumethod} + OUTPUT_NAME trans_gpu_${prec}_${gpumethod} + TYPE SHARED + LINKER_LANGUAGE Fortran + PUBLIC_LIBS trans_gpu_object_${prec}_${gpumethod} + ) + + ectrans_add_library( + TARGET trans_gpu_${prec}_${gpumethod} + TYPE STATIC + LINKER_LANGUAGE Fortran + PUBLIC_LIBS trans_gpu_object_${prec}_${gpumethod} + ) + endif() + endforeach() + endif() +endforeach() diff --git a/src/trans/gpu/algor/external/fourier/create_plan_ffth.hip.cpp b/src/trans/gpu/algor/external/fourier/create_plan_ffth.hip.cpp new file mode 100644 index 0000000..61debae --- /dev/null +++ b/src/trans/gpu/algor/external/fourier/create_plan_ffth.hip.cpp @@ -0,0 +1,189 @@ +#define hipfftSafeCall(err) __hipfftSafeCall(err, __FILE__, __LINE__) +#include "stdio.h" +#include +#include "hipfft.h" + static const char *_hipGetErrorEnum(hipfftResult error) + { + switch (error) + { + case HIPFFT_SUCCESS: + return "HIPFFT_SUCCESS"; + + case HIPFFT_INVALID_PLAN: + return "HIPFFT_INVALID_PLAN"; + + case HIPFFT_ALLOC_FAILED: + return "HIPFFT_ALLOC_FAILED"; + + case HIPFFT_INVALID_TYPE: + return "HIPFFT_INVALID_TYPE"; + + case HIPFFT_INVALID_VALUE: + return "HIPFFT_INVALID_VALUE"; + + case HIPFFT_INTERNAL_ERROR: + return "HIPFFT_INTERNAL_ERROR"; + + case HIPFFT_EXEC_FAILED: + return "HIPFFT_EXEC_FAILED"; + + case HIPFFT_SETUP_FAILED: + return "HIPFFT_SETUP_FAILED"; + + case HIPFFT_INVALID_SIZE: + return "HIPFFT_INVALID_SIZE"; + + case HIPFFT_UNALIGNED_DATA: + return "HIPFFT_UNALIGNED_DATA"; + + case HIPFFT_INCOMPLETE_PARAMETER_LIST: + return "HIPFFT_INCOMPLETE_PARAMETER_LIST"; + + case HIPFFT_INVALID_DEVICE: + return "HIPFFT_INVALID_DEVICE"; + + case HIPFFT_PARSE_ERROR: + return "HIPFFT_PARSE_ERROR"; + + case HIPFFT_NO_WORKSPACE: + return "HIPFFT_NO_WORKSPACE"; + + case HIPFFT_NOT_IMPLEMENTED: + return "HIPFFT_NOT_IMPLEMENTED"; + + case HIPFFT_NOT_SUPPORTED: + return "HIPFFT_NOT_SUPPORTED"; + } + + return ""; + } + + inline void __hipfftSafeCall(hipfftResult err, const char *file, const int line) + { + if( HIPFFT_SUCCESS != err) { + fprintf(stderr, "HIPFFT error in file '%s', line %d; error %d: %s\nterminating!\n", \ + __FILE__, __LINE__,err, \ + _hipGetErrorEnum(err)); \ + fprintf(stderr, "HIPFFT error %d: %s\nterminating!\n",err,_hipGetErrorEnum(err)); \ + hipDeviceReset(); abort(); \ + } + } + + +static int allocatedWorkspace=0; +static void* planWorkspace; +static int planWorkspaceSize=100*1024*1024; //100MB + +extern "C" +void +create_plan_ffth_(hipfftHandle * * plan_ptr_ptr, int *ISIGNp, int *Np, int *LOTp, int * NONSTRIDEDp) +{ +int ISIGN = *ISIGNp; +int N = *Np; +int LOT = *LOTp; +int NONSTRIDED = *NONSTRIDEDp; + +if (hipDeviceSynchronize() != hipSuccess){ + fprintf(stderr, "Hip error: Failed to synchronize\n"); + return; +} + +hipfftHandle * plan_ptr = new hipfftHandle; +plan_ptr_ptr[0]=plan_ptr; + +// create plan +hipfftSafeCall(hipfftCreate(plan_ptr)); + +// disable auto allocation so we can re-use a single workspace (created above) +hipfftSafeCall(hipfftSetAutoAllocation(*plan_ptr, false)); + +//create a single re-usable workspace +if(!allocatedWorkspace){ + allocatedWorkspace=1; + //allocate plan workspace + hipMalloc(&planWorkspace,planWorkspaceSize); +} + + +// plan parameters +int embed[1]; +int stride; +int cdist, rdist; + +#ifdef TRANS_SINGLE +hipfftType hipfft_1 = HIPFFT_R2C; +hipfftType hipfft_2 = HIPFFT_C2R; +#else +hipfftType hipfft_1 = HIPFFT_D2Z; +hipfftType hipfft_2 = HIPFFT_Z2D; +#endif + +embed[0] = 0; +if (NONSTRIDED==0) { + // for global and LAM zonal + stride = LOT; + cdist = 1; + rdist = 1; +} else { + // for LAM meridional + stride=1; + cdist=N/2+1; + rdist=N+2; +} + + +fprintf(stderr,"CreatePlan hipfft for \n"); +fprintf(stderr," %s %p \n","plan address=",plan_ptr); +fprintf(stderr," %s %d \n","LOT=",LOT); +fprintf(stderr," %s %d \n","stride=",stride); +fprintf(stderr," %s %d \n","rdist=",rdist); +fprintf(stderr," %s %d \n","cdist=",cdist); +fprintf(stderr," %s %p \n","embed address=",embed); +fprintf(stderr," %s %d \n","ISIGN=",ISIGN); +fprintf(stderr," %s %d \n","N=",N); +fprintf(stderr," %s %p \n","N address=",&N); + +size_t workSize=123456; + +if( ISIGN== -1 ){ + hipfftSafeCall(hipfftMakePlanMany(*plan_ptr, 1, &N, + embed, stride, rdist, + embed, stride, cdist, + hipfft_1, LOT, &workSize)); +} +else if( ISIGN== 1){ + hipfftSafeCall(hipfftMakePlanMany(*plan_ptr, 1, &N, + embed, stride, cdist, + embed, stride, rdist, + hipfft_2, LOT, &workSize)); +} +else { + abort(); +} + +// use our reusaable work area for the plan +hipfftSafeCall(hipfftSetWorkArea(*plan_ptr,planWorkspace)); + +// print worksize returned from hipfftMakePlan +fprintf(stderr," %s %d \n","workSize from hipfftMakePlan=",workSize); + +// get worksize +hipfftSafeCall(hipfftGetSize(*plan_ptr, &workSize)); +fprintf(stderr," %s %d \n","workSize from hipfftGetSize=",workSize); + +// abort if we don't have enough space for the work area in the re-usable workspace +if(workSize > planWorkspaceSize){ + fprintf(stderr,"create_plan_ffth: plan workspace size not large enough - aborting\n"); + abort(); +} + +if (hipDeviceSynchronize() != hipSuccess){ + fprintf(stderr, "Hip error: Failed to synchronize\n"); + return; +} + +return; + + +} + diff --git a/src/trans/gpu/algor/external/fourier/destroy_plan_ffth.hip.cpp b/src/trans/gpu/algor/external/fourier/destroy_plan_ffth.hip.cpp new file mode 100644 index 0000000..70e1e2f --- /dev/null +++ b/src/trans/gpu/algor/external/fourier/destroy_plan_ffth.hip.cpp @@ -0,0 +1,95 @@ +#define hipfftSafeCall(err) __hipfftSafeCall(err, __FILE__, __LINE__) +#include "hipfft.h" +#include "stdio.h" + static const char *_hipGetErrorEnum(hipfftResult error) + { + switch (error) + { + case HIPFFT_SUCCESS: + return "HIPFFT_SUCCESS"; + + case HIPFFT_INVALID_PLAN: + return "HIPFFT_INVALID_PLAN"; + + case HIPFFT_ALLOC_FAILED: + return "HIPFFT_ALLOC_FAILED"; + + case HIPFFT_INVALID_TYPE: + return "HIPFFT_INVALID_TYPE"; + + case HIPFFT_INVALID_VALUE: + return "HIPFFT_INVALID_VALUE"; + + case HIPFFT_INTERNAL_ERROR: + return "HIPFFT_INTERNAL_ERROR"; + + case HIPFFT_EXEC_FAILED: + return "HIPFFT_EXEC_FAILED"; + + case HIPFFT_SETUP_FAILED: + return "HIPFFT_SETUP_FAILED"; + + case HIPFFT_INVALID_SIZE: + return "HIPFFT_INVALID_SIZE"; + + case HIPFFT_UNALIGNED_DATA: + return "HIPFFT_UNALIGNED_DATA"; + + case HIPFFT_INCOMPLETE_PARAMETER_LIST: + return "HIPFFT_INCOMPLETE_PARAMETER_LIST"; + + case HIPFFT_INVALID_DEVICE: + return "HIPFFT_INVALID_DEVICE"; + + case HIPFFT_PARSE_ERROR: + return "HIPFFT_PARSE_ERROR"; + + case HIPFFT_NO_WORKSPACE: + return "HIPFFT_NO_WORKSPACE"; + + case HIPFFT_NOT_IMPLEMENTED: + return "HIPFFT_NOT_IMPLEMENTED"; + + case HIPFFT_NOT_SUPPORTED: + return "HIPFFT_NOT_SUPPORTED"; + } + + return ""; + } + + inline void __hipfftSafeCall(hipfftResult err, const char *file, const int line) + { + if( HIPFFT_SUCCESS != err) { + fprintf(stderr, "HIPFFT error at 1\n"); + fprintf(stderr, "HIPFFT error in file '%s'\n",__FILE__); + fprintf(stderr, "HIPFFT error at 2\n"); + /*fprintf(stderr, "HIPFFT error line '%s'\n",__LINE__);*/ + fprintf(stderr, "HIPFFT error at 3\n"); + /*fprintf(stderr, "HIPFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \ + _hipGetErrorEnum(err)); \*/ + fprintf(stderr, "HIPFFT error %d: %s\nterminating!\n",err,_hipGetErrorEnum(err)); \ + hipDeviceReset(); return; \ + } + } + +extern "C" +void +destroy_plan_ffth_(hipfftHandle *PLANp) +{ +hipfftHandle plan = *PLANp; + +if (hipDeviceSynchronize() != hipSuccess){ + fprintf(stderr, "Hip error: Failed to synchronize\n"); + return; +} + +hipfftSafeCall(hipfftDestroy(plan)); + +if (hipDeviceSynchronize() != hipSuccess){ + fprintf(stderr, "Hip error: Failed to synchronize\n"); + return; +} + + +} + diff --git a/src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.cpp b/src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.cpp new file mode 100644 index 0000000..56d9b1f --- /dev/null +++ b/src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.cpp @@ -0,0 +1,173 @@ +#define hipfftSafeCall(err) __hipfftSafeCall(err, __FILE__, __LINE__) +#include "hip/hip_runtime.h" +#include "hipfft.h" +#include "stdio.h" +#include "execute_plan_ffth.hip.h" + +#ifdef TRANS_SINGLE +typedef hipfftComplex HIP_DATA_TYPE_COMPLEX; +typedef hipfftReal HIP_DATA_TYPE_REAL; +#else +typedef hipfftDoubleComplex HIP_DATA_TYPE_COMPLEX; +typedef hipfftDoubleReal HIP_DATA_TYPE_REAL; +#endif + + + static const char *_hipGetErrorEnum(hipfftResult error) + { + switch (error) + { + case HIPFFT_SUCCESS: + return "HIPFFT_SUCCESS"; + + case HIPFFT_INVALID_PLAN: + return "HIPFFT_INVALID_PLAN"; + + case HIPFFT_ALLOC_FAILED: + return "HIPFFT_ALLOC_FAILED"; + + case HIPFFT_INVALID_TYPE: + return "HIPFFT_INVALID_TYPE"; + + case HIPFFT_INVALID_VALUE: + return "HIPFFT_INVALID_VALUE"; + + case HIPFFT_INTERNAL_ERROR: + return "HIPFFT_INTERNAL_ERROR"; + + case HIPFFT_EXEC_FAILED: + return "HIPFFT_EXEC_FAILED"; + + case HIPFFT_SETUP_FAILED: + return "HIPFFT_SETUP_FAILED"; + + case HIPFFT_INVALID_SIZE: + return "HIPFFT_INVALID_SIZE"; + + case HIPFFT_UNALIGNED_DATA: + return "HIPFFT_UNALIGNED_DATA"; + + case HIPFFT_INCOMPLETE_PARAMETER_LIST: + return "HIPFFT_INCOMPLETE_PARAMETER_LIST"; + + case HIPFFT_INVALID_DEVICE: + return "HIPFFT_INVALID_DEVICE"; + + case HIPFFT_PARSE_ERROR: + return "HIPFFT_PARSE_ERROR"; + + case HIPFFT_NO_WORKSPACE: + return "HIPFFT_NO_WORKSPACE"; + + case HIPFFT_NOT_IMPLEMENTED: + return "HIPFFT_NOT_IMPLEMENTED"; + + case HIPFFT_NOT_SUPPORTED: + return "HIPFFT_NOT_SUPPORTED"; + } + + return ""; + } + + inline void __hipfftSafeCall(hipfftResult err, const char *file, const int line) + { + if( HIPFFT_SUCCESS != err) { + fprintf(stderr, "HIPFFT error at 1\n"); + fprintf(stderr, "HIPFFT error in file '%s'\n",__FILE__); + fprintf(stderr, "HIPFFT error at 2\n"); + /*fprintf(stderr, "HIPFFT error line '%s'\n",__LINE__);*/ + fprintf(stderr, "HIPFFT error at 3\n"); + /*fprintf(stderr, "HIPFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \ + _hipGetErrorEnum(err)); \*/ + fprintf(stderr, "HIPFFT error %d: %s\nterminating!\n",err,_hipGetErrorEnum(err)); \ + hipDeviceReset(); return; \ + } + } + +__global__ void debug(int varId, int N, HIP_DATA_TYPE_COMPLEX *x) { + //printf("Hello from GPU\n"); + for (int i = 0; i < N; i++) + { + HIP_DATA_TYPE_COMPLEX a = x[i]; + double b = (double)a.x; + double c = (double)a.y; + if (varId == 0) printf("GPU: input[%d]=(%2.4f,%2.4f)\n",i+1,b,c); + if (varId == 1) printf("GPU: output[%d]=(%2.4f,%2.4f)\n",i+1,b,c); + }} + +__global__ void debugFloat(int varId, int N, HIP_DATA_TYPE_REAL *x) { + //printf("Hello from GPU\n"); + for (int i = 0; i < N; i++) + { + double a = (double)x[i]; + if (varId == 0) printf("GPU: input[%d]=%2.4f\n",i+1,a); + if (varId == 1) printf("GPU: output[%d]=%2.4f\n",i+1,a); + }} + +extern "C" { + +void +execute_plan_ffth_c_(int ISIGNp, int N, DATA_TYPE *data_in_host, DATA_TYPE *data_out_host, hipfftHandle * plan_ptr) +//void hipfunction(int ISIGNp, int N, DATA_TYPE *data_in_host, DATA_TYPE *data_out_host, long *iplan) +{ + +HIP_DATA_TYPE_COMPLEX *data_in = reinterpret_cast(data_in_host); +HIP_DATA_TYPE_COMPLEX *data_out = reinterpret_cast(data_out_host); + +fprintf(stderr, "execute_plan_ffth_c_: plan address = %p\n",plan_ptr); +fflush(stderr); + +int ISIGN = ISIGNp; + +// Check variables on the GPU: +/*int device_count = 0; +hipGetDeviceCount(&device_count); +for (int i = 0; i < device_count; ++i) { + hipSetDevice(i); + hipLaunchKernelGGL(debug, dim3(1), dim3(1), 0, 0, 0, N, data_in); + hipDeviceSynchronize(); +}*/ + +if (hipDeviceSynchronize() != hipSuccess){ + fprintf(stderr, "Hip error: Failed to synchronize\n"); + abort(); +} + +if( ISIGN== -1 ){ +#ifdef TRANS_SINGLE + hipfftSafeCall(hipfftExecR2C(*plan_ptr, (HIP_DATA_TYPE_REAL*)data_in, data_out)); +#else + hipfftSafeCall(hipfftExecD2Z(*plan_ptr, (HIP_DATA_TYPE_REAL*)data_in, data_out)); +#endif +} +else if( ISIGN== 1){ +#ifdef TRANS_SINGLE + hipfftSafeCall(hipfftExecC2R(*plan_ptr, data_in, (HIP_DATA_TYPE_REAL*)data_out)); +#else + hipfftSafeCall(hipfftExecZ2D(*plan_ptr, data_in, (HIP_DATA_TYPE_REAL*)data_out)); +#endif +} +else { + abort(); +} + +if (hipDeviceSynchronize() != hipSuccess){ + fprintf(stderr, "Hip error: Failed to synchronize\n"); + abort(); +} + + +/*for (int i = 0; i < device_count; ++i) { + hipSetDevice(i); + hipLaunchKernelGGL(debugFloat, dim3(1), dim3(1), 0, 0, 1, N, (HIP_DATA_TYPE_REAL*)data_out); + hipDeviceSynchronize(); +}*/ + +//if (hipDeviceSynchronize() != hipSuccess){ +// fprintf(stderr, "Hip error: Failed to synchronize\n"); +// return; +//} + + +} +} diff --git a/src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.h b/src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.h new file mode 100644 index 0000000..8a7671d --- /dev/null +++ b/src/trans/gpu/algor/external/fourier/execute_plan_ffth.hip.h @@ -0,0 +1,10 @@ +#ifdef TRANS_SINGLE +typedef float DATA_TYPE; +#else +typedef double DATA_TYPE; +#endif + +#ifndef FUNCTIONS_H_INCLUDED +#define FUNCTIONS_H_INCLUDED +void hipfunction(int ISIGNp, int N, DATA_TYPE *data_in, DATA_TYPE *data_out, long *iplan); +#endif diff --git a/src/trans/gpu/algor/interface/dbfgsl.h b/src/trans/gpu/algor/interface/dbfgsl.h new file mode 100644 index 0000000..2e52a48 --- /dev/null +++ b/src/trans/gpu/algor/interface/dbfgsl.h @@ -0,0 +1,16 @@ +INTERFACE +subroutine dbfgsl (K_N,YD_D,K_M,K_NYS,K_JMIN,K_JMAX,YD_YBAR,YD_SBAR,P_RHO,P_SIZE) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE CONTROL_VECTORS_MOD +INTEGER(KIND=JPIM),INTENT(IN) :: K_M +INTEGER(KIND=JPIM) :: K_N +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_D +INTEGER(KIND=JPIM),INTENT(IN) :: K_NYS +INTEGER(KIND=JPIM),INTENT(IN) :: K_JMIN +INTEGER(KIND=JPIM),INTENT(IN) :: K_JMAX +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_YBAR(K_M) +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_SBAR(K_M) +REAL(KIND=JPRB) ,INTENT(OUT) :: P_RHO(K_M) +REAL(KIND=JPRB) ,INTENT(IN) :: P_SIZE +end subroutine dbfgsl +END INTERFACE diff --git a/src/trans/gpu/algor/interface/dpseuclid.h b/src/trans/gpu/algor/interface/dpseuclid.h new file mode 100644 index 0000000..cb949fd --- /dev/null +++ b/src/trans/gpu/algor/interface/dpseuclid.h @@ -0,0 +1,11 @@ +INTERFACE +subroutine dpseuclid (K_N,YD_X,YD_Y,P_SP) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND2 ,ONLY : JPRH +USE CONTROL_VECTORS_MOD +INTEGER(KIND=JPIM) :: K_N +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_X +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_Y +REAL(KIND=JPRH) ,INTENT(OUT) :: P_SP +end subroutine dpseuclid +END INTERFACE diff --git a/src/trans/gpu/algor/interface/dysave.h b/src/trans/gpu/algor/interface/dysave.h new file mode 100644 index 0000000..c7bfaf8 --- /dev/null +++ b/src/trans/gpu/algor/interface/dysave.h @@ -0,0 +1,27 @@ +INTERFACE +subroutine dysave (K_N,YD_Y,YD_S,P_YS,K_M,K_NYS,K_JMIN,K_JMAX,YD_YBAR,YD_SBAR,K_SELECT,& + & K_IITER,P_OL,K_JOL,P_EPS,P_SIZE,K_MODE,K_PLEV,K_IO) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND2 ,ONLY : JPRH +USE CONTROL_VECTORS_MOD +INTEGER(KIND=JPIM),INTENT(IN) :: K_M +INTEGER(KIND=JPIM) :: K_N +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_Y +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_S +REAL(KIND=JPRH) ,INTENT(IN) :: P_YS +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_NYS +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_JMIN +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_JMAX +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_YBAR(K_M) +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_SBAR(K_M) +INTEGER(KIND=JPIM),INTENT(IN) :: K_SELECT +INTEGER(KIND=JPIM),INTENT(IN) :: K_IITER +REAL(KIND=JPRB) ,INTENT(INOUT) :: P_OL(K_M) +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_JOL(K_M) +REAL(KIND=JPRB) ,INTENT(IN) :: P_EPS +REAL(KIND=JPRB) ,INTENT(OUT) :: P_SIZE +INTEGER(KIND=JPIM),INTENT(OUT) :: K_MODE +INTEGER(KIND=JPIM),INTENT(IN) :: K_PLEV +INTEGER(KIND=JPIM),INTENT(IN) :: K_IO +end subroutine dysave +END INTERFACE diff --git a/src/trans/gpu/algor/interface/eigsol.h b/src/trans/gpu/algor/interface/eigsol.h new file mode 100644 index 0000000..a689b8f --- /dev/null +++ b/src/trans/gpu/algor/interface/eigsol.h @@ -0,0 +1,17 @@ +INTERFACE +SUBROUTINE EIGSOL(KFLEVG,KNFLEVG,PA,PFR,PFI,K,PMO,KWO,PWO,KER) +USE PARKIND1 ,ONLY : JPIM ,JPRB + +INTEGER(KIND=JPIM),INTENT(IN) :: KFLEVG +INTEGER(KIND=JPIM),INTENT(IN) :: KNFLEVG +REAL(KIND=JPRB),INTENT(IN) :: PA(*) +REAL(KIND=JPRB),INTENT(OUT) :: PFR(*) +REAL(KIND=JPRB),INTENT(OUT) :: PFI(*) +INTEGER(KIND=JPIM),INTENT(IN) :: K +REAL(KIND=JPRB),INTENT(OUT) :: PMO(*) +INTEGER(KIND=JPIM),INTENT(OUT) :: KWO(*) +REAL(KIND=JPRB),INTENT(OUT) :: PWO(*) +INTEGER(KIND=JPIM),INTENT(OUT) :: KER + +END SUBROUTINE EIGSOL +END INTERFACE diff --git a/src/trans/gpu/algor/interface/intavg.h b/src/trans/gpu/algor/interface/intavg.h new file mode 100644 index 0000000..b038b9c --- /dev/null +++ b/src/trans/gpu/algor/interface/intavg.h @@ -0,0 +1,9 @@ +INTERFACE + SUBROUTINE INTAVG(PVLEV,PVI,KNIDIM,KNI,KNPROF,KNO,PPO,PVO) + USE PARKIND1 ,ONLY : JPIM ,JPRB + INTEGER(KIND=JPIM), INTENT(in) :: KNIDIM, KNI, KNO, KNPROF + REAL(KIND=JPRB), INTENT(in) :: PVLEV(KNIDIM,KNPROF) + REAL(KIND=JPRB), INTENT(in) :: PPO(KNO),PVI(KNIDIM,KNPROF) + REAL(KIND=JPRB), INTENT(inout) :: PVO(KNO,KNPROF) + END SUBROUTINE INTAVG +END INTERFACE diff --git a/src/trans/gpu/algor/interface/layeravg.h b/src/trans/gpu/algor/interface/layeravg.h new file mode 100644 index 0000000..5d0453a --- /dev/null +++ b/src/trans/gpu/algor/interface/layeravg.h @@ -0,0 +1,9 @@ +INTERFACE + SUBROUTINE LAYERAVG(LDGRADPS,PX1,PX2,PY2,KN1,KN2,KI,PZ,PZS,PZPS) + USE PARKIND1 ,ONLY : JPIM ,JPRB + LOGICAL, INTENT(in) :: LDGRADPS + INTEGER(KIND=JPIM), INTENT(in) :: KN1,KN2,KI + REAL(KIND=JPRB), INTENT(in) :: PX1(KN1),PX2(KN2),PY2(KN2),PZS(KN2) + REAL(KIND=JPRB), INTENT(inout) :: PZ(KN2),PZPS + END SUBROUTINE LAYERAVG +END INTERFACE diff --git a/src/trans/gpu/algor/interface/minv.h b/src/trans/gpu/algor/interface/minv.h new file mode 100644 index 0000000..323749e --- /dev/null +++ b/src/trans/gpu/algor/interface/minv.h @@ -0,0 +1,13 @@ +INTERFACE +SUBROUTINE MINV(PAB,KDIMN,KDBA,PZSCRA,PDET1,PTOL,KDIMM,KMODE) +USE PARKIND1, ONLY : JPIM, JPRB +INTEGER(KIND=JPIM), INTENT(IN) :: KDIMN +INTEGER(KIND=JPIM), INTENT(IN) :: KDBA +INTEGER(KIND=JPIM), INTENT(IN) :: KDIMM +INTEGER(KIND=JPIM), INTENT(IN) :: KMODE +REAL(KIND=JPRB), INTENT(IN) :: PTOL +REAL(KIND=JPRB), INTENT(OUT) :: PDET1 +REAL(KIND=JPRB), INTENT(INOUT) :: PAB(KDBA,KDIMN+KDIMM) +REAL(KIND=JPRB), INTENT(INOUT) :: PZSCRA(2*KDIMN) +END SUBROUTINE MINV +END INTERFACE diff --git a/src/trans/gpu/algor/interface/minv_8.h b/src/trans/gpu/algor/interface/minv_8.h new file mode 100644 index 0000000..97a8174 --- /dev/null +++ b/src/trans/gpu/algor/interface/minv_8.h @@ -0,0 +1,13 @@ +INTERFACE +SUBROUTINE MINV_8(PAB,KDIMN,KDBA,PZSCRA,PDET1,PTOL,KDIMM,KMODE) +USE PARKIND1, ONLY : JPIM, JPRD +INTEGER(KIND=JPIM), INTENT(IN) :: KDIMN +INTEGER(KIND=JPIM), INTENT(IN) :: KDBA +INTEGER(KIND=JPIM), INTENT(IN) :: KDIMM +INTEGER(KIND=JPIM), INTENT(IN) :: KMODE +REAL(KIND=JPRD), INTENT(IN) :: PTOL +REAL(KIND=JPRD), INTENT(OUT) :: PDET1 +REAL(KIND=JPRD), INTENT(INOUT) :: PAB(KDBA,KDIMN+KDIMM) +REAL(KIND=JPRD), INTENT(INOUT) :: PZSCRA(2*KDIMN) +END SUBROUTINE MINV_8 +END INTERFACE diff --git a/src/trans/gpu/algor/interface/minv_caller.h b/src/trans/gpu/algor/interface/minv_caller.h new file mode 100644 index 0000000..5e5e527 --- /dev/null +++ b/src/trans/gpu/algor/interface/minv_caller.h @@ -0,0 +1,9 @@ +INTERFACE +SUBROUTINE MINV_CALLER(LDSCALE,KDIM,PIN,POU) +USE PARKIND1 , ONLY : JPIM ,JPRB +LOGICAL ,INTENT(IN) :: LDSCALE +INTEGER(KIND=JPIM),INTENT(IN) :: KDIM +REAL(KIND=JPRB),INTENT(IN) :: PIN(KDIM,KDIM) +REAL(KIND=JPRB),INTENT(OUT) :: POU(KDIM,KDIM) +END SUBROUTINE MINV_CALLER +END INTERFACE diff --git a/src/trans/gpu/algor/interface/multvdv.h b/src/trans/gpu/algor/interface/multvdv.h new file mode 100644 index 0000000..115062e --- /dev/null +++ b/src/trans/gpu/algor/interface/multvdv.h @@ -0,0 +1,8 @@ +INTERFACE +SUBROUTINE MULTVDV(PVEC,PDIA,PROD) +USE PARKIND1, ONLY: JPIM, JPRB +REAL(KIND=JPRB), INTENT(IN) :: PVEC(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PDIA(:) +REAL(KIND=JPRB), INTENT(OUT) :: PROD(:,:) +END SUBROUTINE MULTVDV +END INTERFACE diff --git a/src/trans/gpu/algor/interface/mxmaop.h b/src/trans/gpu/algor/interface/mxmaop.h new file mode 100644 index 0000000..1365e64 --- /dev/null +++ b/src/trans/gpu/algor/interface/mxmaop.h @@ -0,0 +1,17 @@ +INTERFACE +SUBROUTINE MXMAOP(PA,KA,KAD,PB,KB,KBD,PC,KC,KCA,KAR,KAC,KBC) +USE PARKIND1 ,ONLY : JPIM ,JPRB +REAL(KIND=JPRB) ,INTENT(IN) :: PA(*) +INTEGER(KIND=JPIM),INTENT(IN) :: KA +INTEGER(KIND=JPIM),INTENT(IN) :: KAD +REAL(KIND=JPRB) ,INTENT(IN) :: PB(*) +INTEGER(KIND=JPIM),INTENT(IN) :: KB +INTEGER(KIND=JPIM),INTENT(IN) :: KBD +REAL(KIND=JPRB) ,INTENT(OUT) :: PC(*) +INTEGER(KIND=JPIM),INTENT(IN) :: KC +INTEGER(KIND=JPIM),INTENT(IN) :: KCA +INTEGER(KIND=JPIM),INTENT(IN) :: KAR +INTEGER(KIND=JPIM),INTENT(IN) :: KAC +INTEGER(KIND=JPIM),INTENT(IN) :: KBC +END SUBROUTINE MXMAOP +END INTERFACE diff --git a/src/trans/gpu/algor/interface/mxptma.h b/src/trans/gpu/algor/interface/mxptma.h new file mode 100644 index 0000000..13b7386 --- /dev/null +++ b/src/trans/gpu/algor/interface/mxptma.h @@ -0,0 +1,16 @@ +INTERFACE +SUBROUTINE MXPTMA(KLX,KVX,KVXS,KIX,PA,PBI,PCI,PBS,PCS,PX,PY) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KIX +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PBI(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PCI(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PBS(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PCS(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PX(KVXS,KLX,KIX) +REAL(KIND=JPRB) ,INTENT(OUT) :: PY(KVXS,KLX,KIX) +END SUBROUTINE MXPTMA +END INTERFACE diff --git a/src/trans/gpu/algor/interface/mxtrma.h b/src/trans/gpu/algor/interface/mxtrma.h new file mode 100644 index 0000000..2d7852a --- /dev/null +++ b/src/trans/gpu/algor/interface/mxtrma.h @@ -0,0 +1,14 @@ +INTERFACE +SUBROUTINE MXTRMA(KLX,KVX,KVXS,KIX,PA,PBI,PBS,PX,PY) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KIX +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PBI(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PBS(KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PX(KVXS,KLX,KIX) +REAL(KIND=JPRB) ,INTENT(OUT) :: PY(KVXS,KLX,KIX) +END SUBROUTINE MXTRMA +END INTERFACE diff --git a/src/trans/gpu/algor/interface/mxture.h b/src/trans/gpu/algor/interface/mxture.h new file mode 100644 index 0000000..3878e66 --- /dev/null +++ b/src/trans/gpu/algor/interface/mxture.h @@ -0,0 +1,16 @@ +INTERFACE +SUBROUTINE MXTURE(KLX,KVX,KVXS,KIX,KT,LDMT,PA,PB,PC,PY,PX) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KIX +INTEGER(KIND=JPIM),INTENT(IN) :: KT +LOGICAL ,INTENT(IN) :: LDMT +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PB(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PC(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PY(KVXS,KLX,KIX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PX(KVXS,KLX,KIX) +END SUBROUTINE MXTURE +END INTERFACE diff --git a/src/trans/gpu/algor/interface/mxturhd.h b/src/trans/gpu/algor/interface/mxturhd.h new file mode 100644 index 0000000..27239d4 --- /dev/null +++ b/src/trans/gpu/algor/interface/mxturhd.h @@ -0,0 +1,14 @@ +INTERFACE +SUBROUTINE MXTURHD(KLX,KVX,KVXS,KT,LDMT,PA,PB,PY,PX) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KT +LOGICAL ,INTENT(IN) :: LDMT +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PB(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PY(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PX(KVXS,KLX) +END SUBROUTINE MXTURHD +END INTERFACE diff --git a/src/trans/gpu/algor/interface/mxturs.h b/src/trans/gpu/algor/interface/mxturs.h new file mode 100644 index 0000000..ba7fbda --- /dev/null +++ b/src/trans/gpu/algor/interface/mxturs.h @@ -0,0 +1,14 @@ +INTERFACE +SUBROUTINE MXTURS(KLX,KVX,KVXS,KIX,PA,PB,PC,PY,PX) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KIX +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PB(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PC(KVX,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PY(KVXS,KLX,KIX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PX(KVXS,KLX,KIX) +END SUBROUTINE MXTURS +END INTERFACE diff --git a/src/trans/gpu/algor/interface/n1cg1.h b/src/trans/gpu/algor/interface/n1cg1.h new file mode 100644 index 0000000..524c7cd --- /dev/null +++ b/src/trans/gpu/algor/interface/n1cg1.h @@ -0,0 +1,40 @@ +INTERFACE +subroutine n1cg1 (simul,K_N,YD_X,P_EPSNEG,P_EPS,K_ITER,K_IMP,K_IO,K_MODE,& + & K_PRECO,K_M0,K_ILM0,K_NILM0,YD_YBAR0,YD_SBAR0,P_SIZE0,& + & K_BFGSB,K_M1,K_ILM1,K_NILM1,YD_YBAR1,YD_SBAR1,P_SIZE1,& + & K_SELECT,K_NUPTRA,P_F,YD_R,YD_YS,YD_YRS) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE CONTROL_VECTORS_MOD + +EXTERNAL SIMUL +INTEGER(KIND=JPIM),INTENT(IN) :: K_NILM0 +INTEGER(KIND=JPIM),INTENT(IN) :: K_NILM1 +INTEGER(KIND=JPIM),INTENT(IN) :: K_N +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_X +REAL(KIND=JPRB) ,INTENT(IN) :: P_EPSNEG +REAL(KIND=JPRB) ,INTENT(IN) :: P_EPS +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_ITER +INTEGER(KIND=JPIM),INTENT(IN) :: K_IMP +INTEGER(KIND=JPIM),INTENT(IN) :: K_IO +INTEGER(KIND=JPIM),INTENT(OUT) :: K_MODE +INTEGER(KIND=JPIM),INTENT(IN) :: K_PRECO +INTEGER(KIND=JPIM),INTENT(IN) :: K_M0 +INTEGER(KIND=JPIM),INTENT(IN) :: K_ILM0(K_NILM0) +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_YBAR0(K_M0) +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_SBAR0(K_M0) +REAL(KIND=JPRB) ,INTENT(IN) :: P_SIZE0 +INTEGER(KIND=JPIM),INTENT(IN) :: K_BFGSB +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_M1 +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_ILM1(K_NILM1) +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_YBAR1(K_M1) +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_SBAR1(K_M1) +REAL(KIND=JPRB) ,INTENT(OUT) :: P_SIZE1 +INTEGER(KIND=JPIM),INTENT(IN) :: K_SELECT +INTEGER(KIND=JPIM),INTENT(IN) :: K_NUPTRA +REAL(KIND=JPRB) ,INTENT(IN) :: P_F +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_R +TYPE(CONTROL_VECTOR) ,INTENT(IN) :: YD_YS +TYPE(CONTROL_VECTOR) ,INTENT(IN) :: YD_YRS + +end subroutine n1cg1 +END INTERFACE diff --git a/src/trans/gpu/algor/interface/n1cga.h b/src/trans/gpu/algor/interface/n1cga.h new file mode 100644 index 0000000..62de2e7 --- /dev/null +++ b/src/trans/gpu/algor/interface/n1cga.h @@ -0,0 +1,53 @@ +INTERFACE +subroutine n1cga (simul,K_N,YD_X,YD_B,YD_Q,YD_R,YD_V,YD_RM,P_EPSNEG,P_EPS2,K_ITER,K_IMP,K_IO,& + & K_MODE,& + & K_BFGSP,& + & K_M0,K_NYS0,K_JMIN0,K_JMAX0,YD_YBAR0,YD_SBAR0,P_SIZE0,& + & K_BFGSB,& + & K_M1,K_NYS1,K_JMIN1,K_JMAX1,K_JOL,YD_YBAR1,YD_SBAR1,P_SIZE1,P_OL,& + & K_SELECT,& + & P_RHO,P_F0,& + & YD_YS,YD_YRS) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE CONTROL_VECTORS_MOD + +external simul +INTEGER(KIND=JPIM),INTENT(IN) :: K_M0 +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_M1 +INTEGER(KIND=JPIM),INTENT(IN) :: K_N +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_X +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_B +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_Q +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_R +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_V +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_RM +REAL(KIND=JPRB) ,INTENT(IN) :: P_EPSNEG +REAL(KIND=JPRB) ,INTENT(INOUT) :: P_EPS2 +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_ITER +INTEGER(KIND=JPIM),INTENT(IN) :: K_IMP +INTEGER(KIND=JPIM),INTENT(IN) :: K_IO +INTEGER(KIND=JPIM),INTENT(OUT) :: K_MODE +INTEGER(KIND=JPIM),INTENT(IN) :: K_BFGSP +INTEGER(KIND=JPIM),INTENT(IN) :: K_NYS0 +INTEGER(KIND=JPIM),INTENT(IN) :: K_JMIN0 +INTEGER(KIND=JPIM),INTENT(IN) :: K_JMAX0 +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_YBAR0(K_M0) +TYPE(CONTROL_VECTOR),INTENT(IN) :: YD_SBAR0(K_M0) +REAL(KIND=JPRB) ,INTENT(IN) :: P_SIZE0 +INTEGER(KIND=JPIM),INTENT(IN) :: K_BFGSB +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_NYS1 +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_JMIN1 +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_JMAX1 +INTEGER(KIND=JPIM),INTENT(INOUT) :: K_JOL(K_M1) +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_YBAR1(K_M1) +TYPE(CONTROL_VECTOR),INTENT(INOUT) :: YD_SBAR1(K_M1) +REAL(KIND=JPRB) ,INTENT(OUT) :: P_SIZE1 +REAL(KIND=JPRB) ,INTENT(INOUT) :: P_OL(K_M1) +INTEGER(KIND=JPIM),INTENT(IN) :: K_SELECT +REAL(KIND=JPRB) ,INTENT(OUT) :: P_RHO(K_M0) +REAL(KIND=JPRB) ,INTENT(IN) :: P_F0 +TYPE(CONTROL_VECTOR) ,INTENT(IN) :: YD_YS +TYPE(CONTROL_VECTOR) ,INTENT(IN) :: YD_YRS + +end subroutine n1cga +END INTERFACE diff --git a/src/trans/gpu/algor/interface/si_mxptco.h b/src/trans/gpu/algor/interface/si_mxptco.h new file mode 100644 index 0000000..fd1d7e7 --- /dev/null +++ b/src/trans/gpu/algor/interface/si_mxptco.h @@ -0,0 +1,16 @@ +INTERFACE +SUBROUTINE SI_MXPTCO(KM,KSMAX,KFLEV,KFLSUR,PF,PALPHA,PDENIM,& + & PEPSI,PIN,POU) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KFLEV +INTEGER(KIND=JPIM),INTENT(IN) :: KFLSUR +REAL(KIND=JPRB) ,INTENT(IN) :: PF +REAL(KIND=JPRB) ,INTENT(IN) :: PALPHA(KM:KSMAX+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PDENIM(KM:KSMAX+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PEPSI(KM:KSMAX) +REAL(KIND=JPRB) ,INTENT(IN) :: PIN(KFLSUR,2,KM:KSMAX) +REAL(KIND=JPRB) ,INTENT(OUT) :: POU(KFLSUR,2,KM:KSMAX) +END SUBROUTINE SI_MXPTCO +END INTERFACE diff --git a/src/trans/gpu/algor/interface/simplico.h b/src/trans/gpu/algor/interface/simplico.h new file mode 100644 index 0000000..bb9b6ee --- /dev/null +++ b/src/trans/gpu/algor/interface/simplico.h @@ -0,0 +1,19 @@ +INTERFACE +SUBROUTINE SIMPLICO(KM,KSMAX,KFLEV,KFLSUR,PALPHA,PDENIM,& + & PFPLUS,PFMINUS,PSIVP,PRLAPDI,PBDT2,PY,PX) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KFLEV +INTEGER(KIND=JPIM),INTENT(IN) :: KFLSUR +REAL(KIND=JPRB) ,INTENT(IN) :: PALPHA(KM:KSMAX+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PDENIM(KM:KSMAX+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PFPLUS(KM:KSMAX+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PFMINUS(KM:KSMAX+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PSIVP(KFLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PRLAPDI(0:KSMAX) +REAL(KIND=JPRB) ,INTENT(IN) :: PBDT2 +REAL(KIND=JPRB) ,INTENT(INOUT) :: PY(KFLSUR,2,KM:KSMAX) +REAL(KIND=JPRB) ,INTENT(OUT) :: PX(KFLSUR,2,KM:KSMAX) +END SUBROUTINE SIMPLICO +END INTERFACE diff --git a/src/trans/gpu/algor/interface/sublayer.h b/src/trans/gpu/algor/interface/sublayer.h new file mode 100644 index 0000000..fae7ac2 --- /dev/null +++ b/src/trans/gpu/algor/interface/sublayer.h @@ -0,0 +1,9 @@ +INTERFACE + SUBROUTINE SUBLAYER(pz1,pz2,pz3,px1,px2,ldgradps,& + & pt1,pt2,pw1,pw2,pzs1,pzs2,pzps) + USE PARKIND1 ,ONLY : JPIM ,JPRB + LOGICAL, INTENT(in) :: ldgradps + REAL(KIND=JPRB), INTENT(in) :: pz1,pz2,pz3,px1,px2,pt1,pt2,pzs1,pzs2 + REAL(KIND=JPRB), INTENT(out) :: pw1,pw2,pzps + END SUBROUTINE SUBLAYER +END INTERFACE diff --git a/src/trans/gpu/algor/interface/suher.h b/src/trans/gpu/algor/interface/suher.h new file mode 100644 index 0000000..57e2b97 --- /dev/null +++ b/src/trans/gpu/algor/interface/suher.h @@ -0,0 +1,18 @@ +INTERFACE +SUBROUTINE SUHER(KLX,KVX,KVXS,PD,PEI,PES,PFI,PFS,PA,PB,PC,PG,PH) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +REAL(KIND=JPRB) ,INTENT(IN) :: PD(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PEI(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PES(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PFI(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PFS(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PA(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PB(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PC(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PG(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PH(KVXS,KLX) +END SUBROUTINE SUHER +END INTERFACE diff --git a/src/trans/gpu/algor/interface/suhert.h b/src/trans/gpu/algor/interface/suhert.h new file mode 100644 index 0000000..f73ad5f --- /dev/null +++ b/src/trans/gpu/algor/interface/suhert.h @@ -0,0 +1,14 @@ +INTERFACE +SUBROUTINE SUHERT(KLX,KVX,KVXS,PD,PEI,PES,PA,PB,PG) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +REAL(KIND=JPRB) ,INTENT(IN) :: PD(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PEI(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PES(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PA(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PB(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PG(KVXS,KLX) +END SUBROUTINE SUHERT +END INTERFACE diff --git a/src/trans/gpu/algor/interface/suhes.h b/src/trans/gpu/algor/interface/suhes.h new file mode 100644 index 0000000..25bb0ec --- /dev/null +++ b/src/trans/gpu/algor/interface/suhes.h @@ -0,0 +1,14 @@ +INTERFACE +SUBROUTINE SUHES(KLX,KVX,KVXS,PD,PE,PF,PA,PB,PC) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KLX +INTEGER(KIND=JPIM),INTENT(IN) :: KVXS +INTEGER(KIND=JPIM),INTENT(IN) :: KVX +REAL(KIND=JPRB) ,INTENT(IN) :: PD(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PE(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(IN) :: PF(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PA(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PB(KVXS,KLX) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PC(KVXS,KLX) +END SUBROUTINE SUHES +END INTERFACE diff --git a/src/trans/gpu/algor/interface/tridia.h b/src/trans/gpu/algor/interface/tridia.h new file mode 100644 index 0000000..836bfa8 --- /dev/null +++ b/src/trans/gpu/algor/interface/tridia.h @@ -0,0 +1,25 @@ +INTERFACE +SUBROUTINE TRIDIA(KN,KSYS,KFIRST,KEND,KTYP,PM,PRHS,PSOL) + +! ----------------------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK + +! ----------------------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KN +INTEGER(KIND=JPIM),INTENT(IN) :: KSYS +INTEGER(KIND=JPIM),INTENT(IN) :: KFIRST +INTEGER(KIND=JPIM),INTENT(IN) :: KEND +INTEGER(KIND=JPIM),INTENT(IN) :: KTYP +REAL(KIND=JPRB) ,INTENT(IN) :: PM(1+(KTYP-1)*(KSYS-1),KN,-1:1) +REAL(KIND=JPRB) ,INTENT(IN) :: PRHS(KSYS,KN) +REAL(KIND=JPRB) ,INTENT(OUT) :: PSOL(KSYS,KN) + +! ----------------------------------------------------------------------------- + +END SUBROUTINE TRIDIA +END INTERFACE diff --git a/src/trans/gpu/algor/module/butterfly_alg_mod.F90 b/src/trans/gpu/algor/module/butterfly_alg_mod.F90 new file mode 100644 index 0000000..788af11 --- /dev/null +++ b/src/trans/gpu/algor/module/butterfly_alg_mod.F90 @@ -0,0 +1,1148 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE BUTTERFLY_ALG_MOD +USE PARKIND_ECTRANS, ONLY : JPRD, JPIM, JPRB, JPRBT, JPIB +USE INTERPOL_DECOMP_MOD +USE SHAREDMEM_MOD + +use, intrinsic :: ieee_exceptions + +IMPLICIT NONE + +PRIVATE +PUBLIC NODE_TYPE,LEV_STRUCT,BUTTERFLY_STRUCT,CONSTRUCT_BUTTERFLY,MULT_BUTV,MULT_BUTM,CLONE,& + & PACK_BUTTERFLY_STRUCT,UNPACK_BUTTERFLY_STRUCT + +! Butterfly package. + +! Butterfly algorithm for matrix multiplication +! Coded from: "An algorithm for the rapid evaluation of special function transform" by +! Michael O'Neill, Franco Woolfe and Vladimir Rohklin, Appl.Comput.Harmon.Anal. 2009? +! referred to in the following as ONWR + +TYPE NODE_TYPE +INTEGER(KIND=JPIM) :: ILEV =0 ! Level of this node +INTEGER(KIND=JPIM) :: IFCOL =0 ! First column +INTEGER(KIND=JPIM) :: ILCOL =0 ! Last column +INTEGER(KIND=JPIM) :: IFROW =0 ! first row +INTEGER(KIND=JPIM) :: ILROW =0 ! Last row +INTEGER(KIND=JPIM) :: ICOLS =0 ! Number of columns +INTEGER(KIND=JPIM) :: IROWS =0 ! Number of rows +INTEGER(KIND=JPIM) :: IRANK =0 ! Rank of interpolative decomposition +INTEGER(KIND=JPIM) :: IOFFBETA=0 ! Offset in "beta" work space +INTEGER(KIND=JPIM),POINTER :: ICLIST(:) => NULL() ! List of columns in B (column skeleton matrix) +REAL(KIND=JPRBT),POINTER :: PNONIM(:) => NULL() ! Non-identety part of interpolation matrix +REAL(KIND=JPRBT),POINTER :: B(:,:) => NULL() ! Column skeleton matrix +REAL(KIND=JPRD),POINTER :: DB(:,:) => NULL() ! Column skeleton matrix, as part of pre-computations only +END TYPE NODE_TYPE + +TYPE LEV_STRUCT +INTEGER(KIND=JPIM) :: IJ =0 ! Number of row boxes at this level +INTEGER(KIND=JPIM) :: IK =0 ! Number of column boxes at this level +INTEGER(KIND=JPIM) :: IBETALEN=0 ! Workspace needed at this level of interim results "beta" +TYPE(NODE_TYPE),POINTER :: NODE(:,:) => NULL() ! Box info +END TYPE LEV_STRUCT + +TYPE BUTTERFLY_STRUCT +INTEGER(KIND=JPIM) :: M_ORDER =0 ! M of original matrix +INTEGER(KIND=JPIM) :: N_ORDER =0 ! N of original matrix +INTEGER(KIND=JPIM) :: N_CMAX =0 ! Max number of columns in each submatrix at level 0 +INTEGER(KIND=JPIM) :: N_LEVELS =0 ! Max level in dyadic hierarchy +INTEGER(KIND=JPIM) :: IBETALEN_MAX=0 ! Max workspace for one level of interim results "beta" +TYPE(LEV_STRUCT),POINTER :: SLEV(:) => NULL() ! Level structure (dimensioned 0:n_levels) +END TYPE BUTTERFLY_STRUCT + +TYPE CLONE +REAL(KIND=JPRBT) , ALLOCATABLE :: COMMSBUF(:) ! for communicating packed bufferfly_structs +END TYPE CLONE ! between MPI tasks + +#ifdef WITH_IEEE_HALT +LOGICAL, PARAMETER :: LL_IEEE_HALT = .TRUE. +#else +LOGICAL, PARAMETER :: LL_IEEE_HALT = .FALSE. +#endif + +LOGICAL, PARAMETER :: LLDOUBLE = (JPRBT == JPRD) + +CONTAINS +!================================================================================ +SUBROUTINE CONSTRUCT_BUTTERFLY(PEPS,KCMAX,KM,KN,PMAT,YD_STRUCT) +IMPLICIT NONE + +! Constuct butterfly + +REAL(KIND=JPRD),INTENT(IN) :: PEPS ! Precision +INTEGER(KIND=JPIM),INTENT(IN) :: KCMAX ! Max number of columns in each submatrix at level 0 +INTEGER(KIND=JPIM),INTENT(IN) :: KM ! Number of rows in matrix pmat +INTEGER(KIND=JPIM),INTENT(IN) :: KN ! Number of columns in matrix pmat +REAL(KIND=JPRD),INTENT(IN) :: PMAT(:,:) ! original matrix +TYPE(BUTTERFLY_STRUCT),INTENT(INOUT) :: YD_STRUCT ! Structure needed to apply butterfly + +REAL(KIND=JPRD),ALLOCATABLE :: ZSUB(:,:),ZBCOMB(:,:) +INTEGER(KIND=JPIM) :: ILEVELS,JL,JJ,JK,IM,II,JR,IJ,IK +INTEGER(KIND=JPIM) :: IROWS,ICOLS,IRANK,ICLIST(KN) +INTEGER(KIND=JPIM) :: ILM1,IJL,IKL,IJR,IKR,IRANKL,IRANKR,IOFFROW,IBLEV,IBLEVM1 +INTEGER(KIND=JPIM) :: IFR,ILR,IFC,IROFF,IRSTRIDE,IOFFBETA +INTEGER(KIND=JPIM) :: ILEN,I,J,J1,J2,JIJ,JIK +REAL(KIND=JPRD) :: ZNORMS(KN) +TYPE(NODE_TYPE),POINTER :: YNODEL,YNODER,YNODE +TYPE(NODE_TYPE),POINTER :: YBNODEL,YBNODER,YBNODE +TYPE(LEV_STRUCT) :: YTEMPB(0:1) + +!-------------------------------------------------------------------------------- + + +! ONWR 5.4.1 +YD_STRUCT%M_ORDER = KM +YD_STRUCT%N_ORDER = KN +YD_STRUCT%N_CMAX = KCMAX + +!Find number of levels +ILEVELS = 0 +DO + IF(2**ILEVELS >= (YD_STRUCT%N_ORDER+YD_STRUCT%N_CMAX-1) /YD_STRUCT%N_CMAX ) EXIT + ILEVELS = ILEVELS+1 +ENDDO +YD_STRUCT%N_LEVELS = ILEVELS +ALLOCATE(YD_STRUCT%SLEV(0:YD_STRUCT%N_LEVELS)) + +! Number of boxes at each level +IJ = 1 +IK = (KN-1)/KCMAX+1 +DO JL=0,YD_STRUCT%N_LEVELS + YD_STRUCT%SLEV(JL)%IJ = IJ + YD_STRUCT%SLEV(JL)%IK = IK + IJ = IJ*2 + IK = MAX((IK+1)/2,1) +ENDDO + +DO JL=0,YD_STRUCT%N_LEVELS + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(YD_STRUCT%SLEV(JL)%IJ,YD_STRUCT%SLEV(JL)%IK)) + CALL GSTATS(1253,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JJ,JK,YNODE,ILM1,IJL,IKL,IJR,IKR,IRSTRIDE) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + YNODE%ILEV = JL + IF(JL == 0) THEN + YNODE%IFCOL = 1+(JK-1)*KCMAX + YNODE%ILCOL = MIN(JK*KCMAX,KN) + YNODE%ICOLS = YNODE%ILCOL - YNODE%IFCOL+1 + YNODE%IFROW = 1 + YNODE%ILROW = KM + ELSE + YNODE%IFCOL = -99 + YNODE%ILCOL = -99 + YNODE%ICOLS = -99 + ILM1 = JL-1 + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRSTRIDE = (YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IROWS+1)/2 + IF(MOD(JJ,2) == 1) THEN + YNODE%IFROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW + YNODE%ILROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW+IRSTRIDE -1 + ELSE + YNODE%IFROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW+IRSTRIDE + YNODE%ILROW = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%ILROW + ENDIF + ENDIF + YNODE%IROWS = YNODE%ILROW - YNODE%IFROW+1 + YNODE%IROWS = MAX(YNODE%IROWS,0) + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1253,1) +ENDDO + + +! ONWR 5.4.2 + +DO JL=0,YD_STRUCT%N_LEVELS + IBLEV = MOD(JL,2) + IF(JL > 0) THEN + IBLEVM1 = MOD(JL-1,2) + ELSE + IBLEVM1 = -1 + ENDIF + ALLOCATE(YTEMPB(IBLEV)%NODE(YD_STRUCT%SLEV(JL)%IJ,YD_STRUCT%SLEV(JL)%IK)) + CALL GSTATS(1253,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JJ,JK,YNODE,YBNODE,IROWS,ICOLS,& + !$OMP& ZSUB,ILM1,IJL,IKL,IJR,IKR,YNODEL,YBNODEL,IRANKL,YNODER,YBNODER,IRANKR,IOFFROW,ZBCOMB) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + YBNODE => YTEMPB(IBLEV)%NODE(JJ,JK) + IF(JL == 0) THEN + IROWS=YNODE%IROWS + ICOLS=YNODE%ICOLS + ALLOCATE(ZSUB(IROWS,ICOLS)) + CALL EXTRACT_SUB(YNODE,PMAT,ZSUB) + CALL COMPRESS_MAT(YNODE,YBNODE,PEPS,IROWS,ICOLS,ZSUB) + DEALLOCATE(ZSUB) + ELSE + ILM1 = JL-1 + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + YNODEL => YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL) + YBNODEL => YTEMPB(IBLEVM1)%NODE(IJL,IKL) + IRANKL = YNODEL%IRANK + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + YNODER => YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR) + YBNODER => YTEMPB(IBLEVM1)%NODE(IJR,IKR) + IRANKR = YNODER%IRANK + ELSE + YNODER => YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL) + IRANKR = 0 + ENDIF + IROWS = YNODE%IROWS + ICOLS = IRANKL+IRANKR + YNODE%ICOLS=ICOLS + IOFFROW = YNODE%IFROW-& + & YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IFROW + ALLOCATE(ZBCOMB(IROWS,ICOLS)) + CALL COMBINE_B(YBNODEL%DB,IRANKL,& + & YBNODER%DB,IRANKR,& + & IROWS,IOFFROW,ZBCOMB) + CALL COMPRESS_MAT(YNODE,YBNODE,PEPS,IROWS,ICOLS,ZBCOMB) + DEALLOCATE(ZBCOMB) + ENDIF + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1253,1) + IF(IBLEVM1 >= 0) THEN +!Deallocate Bs no longer needed + DO JJ=1,YD_STRUCT%SLEV(JL-1)%IJ + DO JK=1,YD_STRUCT%SLEV(JL-1)%IK + DEALLOCATE(YTEMPB(IBLEVM1)%NODE(JJ,JK)%DB) + ENDDO + ENDDO + DEALLOCATE(YTEMPB(IBLEVM1)%NODE) + ENDIF +! Permanently store B for last level + IF(JL == YD_STRUCT%N_LEVELS) THEN + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + ALLOCATE(YNODE%DB(YNODE%IROWS,YNODE%IRANK)) + YNODE%DB(:,:) = YTEMPB(IBLEV)%NODE(JJ,JK)%DB(:,:) + DEALLOCATE(YTEMPB(IBLEV)%NODE(JJ,JK)%DB) + ENDDO + ENDDO + DEALLOCATE(YTEMPB(IBLEV)%NODE) + ENDIF +ENDDO + +CALL GSTATS(1901,0) +! Compute work space +YD_STRUCT%IBETALEN_MAX = 0 +DO JL=0,YD_STRUCT%N_LEVELS + IOFFBETA = 0 + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IF( ASSOCIATED(YNODE%DB) ) THEN + ALLOCATE(YNODE%B(SIZE(YNODE%DB(:,1)),SIZE(YNODE%DB(1,:)))) + YNODE%B(:,:) = YNODE%DB(:,:) + DEALLOCATE(YNODE%DB) + ENDIF + YNODE%IOFFBETA = IOFFBETA + IOFFBETA = IOFFBETA+YNODE%IRANK + ENDDO + ENDDO + YD_STRUCT%SLEV(JL)%IBETALEN = IOFFBETA + YD_STRUCT%IBETALEN_MAX = MAX(YD_STRUCT%IBETALEN_MAX,YD_STRUCT%SLEV(JL)%IBETALEN) +ENDDO + +CALL GSTATS(1901,1) + +RETURN + +END SUBROUTINE CONSTRUCT_BUTTERFLY +!============================================================================= +SUBROUTINE PACK_BUTTERFLY_STRUCT(YD_STRUCT,YD_CLONE) +IMPLICIT NONE +! Pack butterfly struct into array +TYPE(BUTTERFLY_STRUCT),INTENT(IN) :: YD_STRUCT ! Structure needed to apply butterfly +TYPE(CLONE), TARGET, INTENT(OUT) :: YD_CLONE ! for communicating packed bufferfly_structs + +INTEGER(KIND=JPIM) :: ILEN,I,JL,JIK,JIJ,J,J1,J2 +!-------------------------------------------------------------------------------- + +ILEN=0 +ILEN=ILEN+5 +DO JL=0,YD_STRUCT%N_LEVELS + ILEN=ILEN+3 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE) )THEN + DO JIK=1,YD_STRUCT%SLEV(JL)%IK + DO JIJ=1,YD_STRUCT%SLEV(JL)%IJ + ILEN=ILEN+9 + ILEN=ILEN+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) )THEN + ILEN=ILEN+SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) + ENDIF + ILEN=ILEN+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) )THEN + ILEN=ILEN+SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) + ENDIF + ILEN=ILEN+2 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B) )THEN + ILEN=ILEN+SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B) + ENDIF + ENDDO + ENDDO + ENDIF +ENDDO +ALLOCATE(YD_CLONE%COMMSBUF(ILEN)) +I=0 +YD_CLONE%COMMSBUF(I+1)=YD_STRUCT%M_ORDER +YD_CLONE%COMMSBUF(I+2)=YD_STRUCT%N_ORDER +YD_CLONE%COMMSBUF(I+3)=YD_STRUCT%N_CMAX +YD_CLONE%COMMSBUF(I+4)=YD_STRUCT%N_LEVELS +YD_CLONE%COMMSBUF(I+5)=YD_STRUCT%IBETALEN_MAX +I=I+5 +DO JL=0,YD_STRUCT%N_LEVELS + YD_CLONE%COMMSBUF(I+1)=YD_STRUCT%SLEV(JL)%IJ + YD_CLONE%COMMSBUF(I+2)=YD_STRUCT%SLEV(JL)%IK + YD_CLONE%COMMSBUF(I+3)=YD_STRUCT%SLEV(JL)%IBETALEN + I=I+3 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE) )THEN + DO JIK=1,YD_STRUCT%SLEV(JL)%IK + DO JIJ=1,YD_STRUCT%SLEV(JL)%IJ + YD_CLONE%COMMSBUF(I+1)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILEV + YD_CLONE%COMMSBUF(I+2)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFCOL + YD_CLONE%COMMSBUF(I+3)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILCOL + YD_CLONE%COMMSBUF(I+4)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFROW + YD_CLONE%COMMSBUF(I+5)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILROW + YD_CLONE%COMMSBUF(I+6)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICOLS + YD_CLONE%COMMSBUF(I+7)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IROWS + YD_CLONE%COMMSBUF(I+8)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IRANK + YD_CLONE%COMMSBUF(I+9)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IOFFBETA + I=I+9 + YD_CLONE%COMMSBUF(I+1)=0 + I=I+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) )THEN + J=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST) + YD_CLONE%COMMSBUF(I)=J + YD_CLONE%COMMSBUF(I+1:I+J)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST(:) + I=I+J + ENDIF + YD_CLONE%COMMSBUF(I+1)=0 + I=I+1 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) )THEN + J=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM) + YD_CLONE%COMMSBUF(I)=J + YD_CLONE%COMMSBUF(I+1:I+J)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM(:) + I=I+J + ENDIF + YD_CLONE%COMMSBUF(I+1)=0 + YD_CLONE%COMMSBUF(I+2)=0 + I=I+2 + IF( ASSOCIATED(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B) )THEN + J1=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B,DIM=1) + J2=SIZE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B,DIM=2) + YD_CLONE%COMMSBUF(I-1)=J1 + YD_CLONE%COMMSBUF(I )=J2 + DO J=1,J2 + YD_CLONE%COMMSBUF(I+1:I+J1)=YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B(:,J) + I=I+J1 + ENDDO + ENDIF + ENDDO + ENDDO + ENDIF +ENDDO +IF( I /= ILEN )THEN + CALL ABOR1('PACK_BUTTERFLY_STRUCT: PACKED LENGTH /= PRECOMPUTED LENGTH') +ENDIF + +END SUBROUTINE PACK_BUTTERFLY_STRUCT +!===================================================================================== +SUBROUTINE UNPACK_BUTTERFLY_STRUCT(YD_STRUCT,YD_CLONE,YDMEMBUF) +IMPLICIT NONE +! Construct butterfly struct from packed array +TYPE(BUTTERFLY_STRUCT),INTENT(OUT) :: YD_STRUCT ! Structure needed to apply butterfly +TYPE(CLONE), TARGET, OPTIONAL,INTENT(IN) :: YD_CLONE ! for communicating packed bufferfly_structs +TYPE(SHAREDMEM),OPTIONAL,INTENT(INOUT) :: YDMEMBUF ! Memory buffer +INTEGER(KIND=JPIM) :: ILEN,I,JL,JIK,JIJ,J,J1,J2,II +REAL(KIND=JPRBT),POINTER :: ZBUF(:) +LOGICAL :: LLMEMBUF +!-------------------------------------------------------------------------------- +IF(PRESENT(YDMEMBUF)) THEN + LLMEMBUF = .TRUE. +ELSE + IF(.NOT.PRESENT(YD_CLONE)) CALL ABOR1('UNPACK_BUTTERFLY_STRUCT: YD_CLONE ARGUMENT MISSING') + LLMEMBUF = .FALSE. +ENDIF +I=0 +IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,5,ZBUF,ADVANCE=.TRUE.) +ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+5) +ENDIF +YD_STRUCT%M_ORDER = NINT(ZBUF(1),JPRBT) +YD_STRUCT%N_ORDER = NINT(ZBUF(2),JPRBT) +YD_STRUCT%N_CMAX = NINT(ZBUF(3),JPRBT) +YD_STRUCT%N_LEVELS = NINT(ZBUF(4),JPRBT) +YD_STRUCT%IBETALEN_MAX = NINT(ZBUF(5),JPRBT) +I=I+5 + +ALLOCATE(YD_STRUCT%SLEV(0:YD_STRUCT%N_LEVELS)) +DO JL=0,YD_STRUCT%N_LEVELS + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,3,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+3) + ENDIF + YD_STRUCT%SLEV(JL)%IJ =NINT(ZBUF(1),JPRBT) + YD_STRUCT%SLEV(JL)%IK =NINT(ZBUF(2),JPRBT) + YD_STRUCT%SLEV(JL)%IBETALEN=NINT(ZBUF(3),JPRBT) + I=I+3 + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(YD_STRUCT%SLEV(JL)%IJ,YD_STRUCT%SLEV(JL)%IK)) + DO JIK=1,YD_STRUCT%SLEV(JL)%IK + DO JIJ=1,YD_STRUCT%SLEV(JL)%IJ + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,10,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+10) + ENDIF + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILEV = NINT(ZBUF(1),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFCOL = NINT(ZBUF(2),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILCOL = NINT(ZBUF(3),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IFROW = NINT(ZBUF(4),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ILROW = NINT(ZBUF(5),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICOLS = NINT(ZBUF(6),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IROWS = NINT(ZBUF(7),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IRANK = NINT(ZBUF(8),JPRBT) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%IOFFBETA= NINT(ZBUF(9),JPRBT) + J = NINT(ZBUF(10)) + I=I+10 + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST(J)) + IF( J > 0 )THEN + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,J,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+J) + ENDIF + DO II=1,J + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%ICLIST(II)=NINT(ZBUF(II),JPRBT) + END DO + I=I+J + ENDIF + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,1,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+1) + ENDIF + J=NINT(ZBUF(1),JPRBT) + I=I+1 + IF( J > 0 )THEN + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,J,YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM,ADVANCE=.TRUE.) + ELSE + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM(J)) + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%PNONIM(:)=YD_CLONE%COMMSBUF(I+1:I+J) + ENDIF + I=I+J + ENDIF + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,2,ZBUF,ADVANCE=.TRUE.) + ELSE + ZBUF => YD_CLONE%COMMSBUF(I+1:I+2) + ENDIF + J1=NINT(ZBUF(1),JPRBT) + J2=NINT(ZBUF(2),JPRBT) + I=I+2 + IF( J1 > 0 .AND. J2 > 0 )THEN + IF(LLMEMBUF) THEN + CALL SHAREDMEM_ASSOCIATE(YDMEMBUF,J1,J2,YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B,ADVANCE=.TRUE.) + I=I+J1*J2 + ELSE + ALLOCATE(YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B(J1,J2)) + DO J=1,J2 + YD_STRUCT%SLEV(JL)%NODE(JIJ,JIK)%B(:,J)=YD_CLONE%COMMSBUF(I+1:I+J1) + I=I+J1 + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO +ENDDO +IF(.NOT.LLMEMBUF) THEN + IF( I /= SIZE(YD_CLONE%COMMSBUF) )THEN + CALL ABOR1('UNPACK_BUTTERFLY_STRUCT: UNPACKED LENGTH /= ALLOCATED LENGTH') + ENDIF +ENDIF +END SUBROUTINE UNPACK_BUTTERFLY_STRUCT +!=========================================================================== +SUBROUTINE EXTRACT_SUB(YDNODE,PMAT,PSUB) +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(IN) :: YDNODE +REAL(KIND=JPRD),INTENT(IN) :: PMAT(:,:) +REAL(KIND=JPRD),INTENT(OUT) :: PSUB(:,:) + +INTEGER(KIND=JPIM) :: ICOL,IROW,JCOL,JROW +!-------------------------------------------------------------------- + +ICOL = 0 +DO JCOL=YDNODE%IFCOL,YDNODE%ILCOL + ICOL = ICOL+1 + IROW = 0 + DO JROW=YDNODE%IFROW,YDNODE%ILROW + IROW = IROW+1 + PSUB(IROW,ICOL) = PMAT(JROW,JCOL) + ENDDO +ENDDO + +END SUBROUTINE EXTRACT_SUB +!=================================================================== +SUBROUTINE COMBINE_B(PBL,KRANKL,PBR,KRANKR,KROWS,KOFFROW,PBCOMB) +IMPLICIT NONE +REAL(KIND=JPRD),INTENT(IN) :: PBL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KRANKL +REAL(KIND=JPRD),INTENT(IN) :: PBR(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KRANKR +INTEGER(KIND=JPIM),INTENT(IN) :: KROWS +INTEGER(KIND=JPIM),INTENT(IN) :: KOFFROW +REAL(KIND=JPRD),INTENT(OUT) :: PBCOMB(:,:) + +INTEGER(KIND=JPIM) :: JCOL,JM +!-------------------------------------------------------------------- +DO JCOL=1,KRANKL + DO JM=1,KROWS + PBCOMB(JM,JCOL) = PBL(KOFFROW+JM,JCOL) + ENDDO +ENDDO +DO JCOL=1,KRANKR + DO JM=1,KROWS + PBCOMB(JM,KRANKL+JCOL) = PBR(KOFFROW+JM,JCOL) + ENDDO +ENDDO + +END SUBROUTINE COMBINE_B +!=================================================================== +SUBROUTINE COMPRESS_MAT(YDNODE,YDBNODE,PEPS,KROWS,KCOLS,PSUB) +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDBNODE +REAL(KIND=JPRD),INTENT(IN) :: PEPS +INTEGER(KIND=JPIM),INTENT(IN) :: KROWS,KCOLS +REAL(KIND=JPRD),INTENT(IN) :: PSUB(:,:) + +INTEGER(KIND=JPIM) :: JR,IRANK,ICLIST(KCOLS),JN,JM,II +REAL(KIND=JPRD) :: ZNORMS(KCOLS) +REAL(KIND=JPRD) :: ZSUB(KROWS,KCOLS),ZPNONIM(KROWS,KCOLS) +REAL(KIND=JPRD),ALLOCATABLE :: ZP(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZB(:,:) +!-------------------------------------------------------------------- + +II = 0 +DO JN=1,KCOLS + DO JM=1,KROWS + II = II+1 + ZSUB(JM,JN) = PSUB(JM,JN) + ENDDO +ENDDO + +CALL COMPUTE_ID(PEPS,KROWS,KCOLS,ZSUB,IRANK,ICLIST,ZPNONIM) +YDNODE%IRANK = IRANK +ALLOCATE(YDNODE%PNONIM(IRANK*(KCOLS-IRANK))) +ALLOCATE(YDNODE%ICLIST(KCOLS)) +ALLOCATE(YDBNODE%DB(KROWS,IRANK)) +YDNODE%ICLIST(:) = ICLIST(1:KCOLS) +II = 0 +DO JN=1,KCOLS-IRANK + DO JM=1,IRANK + II = II+1 + YDNODE%PNONIM(II) = REAL(ZPNONIM(JM,JN), JPRBT) + ENDDO +ENDDO +DO JR=1,IRANK + YDBNODE%DB(:,JR) = PSUB(:,ICLIST(JR)) +ENDDO + +END SUBROUTINE COMPRESS_MAT +!==================================================================== +SUBROUTINE MULT_BUTV(CDTRANS,YD_STRUCT,PVECIN,PVECOUT) +IMPLICIT NONE +! Multiply vector by matrix represented by buttervfly + +TYPE(BUTTERFLY_STRUCT),INTENT(IN) :: YD_STRUCT ! Structure from constucT-butterfly +CHARACTER(LEN=1),INTENT(IN) :: CDTRANS ! 'N' normal matmul, 'T' with transpose of matrix +REAL(KIND=JPRBT),INTENT(IN) :: PVECIN(:) ! Input vector +REAL(KIND=JPRBT),INTENT(OUT) :: PVECOUT(:) ! Output vector + +REAL(KIND=JPRBT),ALLOCATABLE :: ZBETA(:,:) +INTEGER(KIND=JPIM) :: JL,JJ,JK,ILEVS,IFR,ILR,IROWS +INTEGER(KIND=JPIM) :: ILM1,IJL,IKL,IJR,IKR,IRANKL,IRANKR +INTEGER(KIND=JPIM) :: IBETALV,IBTST,IBTEN,IBETALVM1,IBTSTL,IBTENL,IBTSTR,IBTENR +REAL(KIND=JPRBT) :: ZVECOUT(SIZE(PVECOUT)) +LOGICAL :: LLTRANSPOSE +TYPE(NODE_TYPE),POINTER :: YNODEL,YNODER,YNODE +!---------------------------------------------------------------------------------- +LLTRANSPOSE = (CDTRANS == 'T' .OR. CDTRANS == 't') + +ILEVS = YD_STRUCT%N_LEVELS +ALLOCATE(ZBETA(YD_STRUCT%IBETALEN_MAX,0:1)) ! Work space for "beta" + +! ONWR 5.4.3 +IF(LLTRANSPOSE) THEN + DO JL=ILEVS,0,-1 + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + CALL MULT_P_TR(YNODE,ZBETA(IBTST:IBTEN,IBETALV),PVECOUT(IFR:ILR)) + ELSE + IF(JL == ILEVS) THEN + IFR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IFROW + ILR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%ILROW + IROWS=YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IROWS + IF (LLDOUBLE) THEN + CALL DGEMV('T',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRD,YNODE%B,IROWS,PVECIN(IFR:ILR),1,& + & 0.0_JPRD,ZBETA(IBTST:IBTEN,IBETALV),1) + ELSE + CALL SGEMV('T',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRBT,YNODE%B,IROWS,PVECIN(IFR:ILR),1,& + & 0.0_JPRBT,ZBETA(IBTST:IBTEN,IBETALV),1) + ENDIF + ENDIF + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + ELSE + IRANKR = 0 + ENDIF + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + CALL MULT_P_TR(YNODE,ZBETA(IBTST:IBTEN,IBETALV),ZVECOUT(1:IRANKL+IRANKR)) + IF(MOD(JJ,2) == 1) THEN + ZBETA(IBTSTL:IBTENL,IBETALVM1)= ZVECOUT(1:IRANKL) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,IBETALVM1)=ZVECOUT(IRANKL+1:IRANKL+IRANKR) + ENDIF + ELSE + ZBETA(IBTSTL:IBTENL,IBETALVM1)=ZBETA(IBTSTL:IBTENL,IBETALVM1)+ & + & ZVECOUT(1:IRANKL) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,IBETALVM1)=ZBETA(IBTSTR:IBTENR,IBETALVM1) + & + & ZVECOUT(IRANKL+1:IRANKL+IRANKR) + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +ELSE + DO JL=0,ILEVS + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN ! ONWR (115) + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + CALL MULT_P(YNODE,PVECIN(IFR:ILR),ZBETA(IBTST:IBTEN,IBETALV) ) + ELSE ! ONWR (116) + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + ELSE + IRANKR = 0 + ENDIF + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + CALL MULT_P(YNODE,ZBETA(IBTSTL:IBTENR,IBETALVM1),ZBETA(IBTST:IBTEN,IBETALV)) + ENDIF + IF(JL == ILEVS) THEN ! ONWR (117) + IFR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IFROW + ILR = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%ILROW + IROWS = YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IROWS + IF (LLDOUBLE) THEN + CALL DGEMV('N',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRD,YNODE%B,IROWS,ZBETA(IBTST:IBTEN,IBETALV),1,& + & 0.0_JPRD,PVECOUT(IFR:ILR),1) + ELSE + CALL SGEMV('N',IROWS,YD_STRUCT%SLEV(ILEVS)%NODE(JJ,JK)%IRANK,& + & 1.0_JPRBT,YNODE%B,IROWS,ZBETA(IBTST:IBTEN,IBETALV),1,& + & 0.0_JPRBT,PVECOUT(IFR:ILR),1) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +ENDIF +END SUBROUTINE MULT_BUTV +!==================================================================== +SUBROUTINE MULT_BUTM(CDTRANS,YD_STRUCT,KF,PVECIN,PVECOUT) +IMPLICIT NONE + +! Multiply matrix by matrix represented by butterfly + +CHARACTER(LEN=1),INTENT(IN) :: CDTRANS ! 'N' normal matmul, 'T' with transpose of matrix +TYPE(BUTTERFLY_STRUCT),INTENT(IN) :: YD_STRUCT ! Structure from constucT-butterfly +INTEGER(KIND=JPIM),INTENT(IN) :: KF ! Number of fields +REAL(KIND=JPRBT),INTENT(IN) :: PVECIN(:,:) ! Input vector +REAL(KIND=JPRBT),INTENT(OUT) :: PVECOUT(:,:) ! Output vector + +INTEGER(KIND=JPIM) :: JL,JJ,JK,ILEVS,IFR,ILR,IROWS,JF +INTEGER(KIND=JPIM) :: ILM1,IJL,IKL,IJR,IKR,IRANKL,IRANKR,IROUT,IRIN +INTEGER(KIND=JPIM) :: IRANK,IM,IN,JN,IDX +INTEGER(KIND=JPIM) :: IBETALV,IBTST,IBTEN,IBETALVM1,IBTSTL,IBTENL,IBTSTR,IBTENR,ILBETA +REAL(KIND=JPRBT) :: ZVECIN(YD_STRUCT%N_ORDER,KF),ZVECOUT(YD_STRUCT%N_ORDER,KF) +REAL(KIND=JPRBT),ALLOCATABLE :: ZBETA(:,:,:) +LOGICAL :: LLTRANSPOSE +LOGICAL :: LL_HALT_INVALID + +TYPE(NODE_TYPE),POINTER :: YNODEL,YNODER,YNODE + + +!---------------------------------------------------------------------------------- +LLTRANSPOSE = (CDTRANS == 'T' .OR. CDTRANS == 't') +IROUT=SIZE(PVECOUT(:,1)) +IRIN=SIZE(PVECIN(:,1)) + +ILEVS = YD_STRUCT%N_LEVELS +ILBETA = YD_STRUCT%IBETALEN_MAX +ALLOCATE(ZBETA(ILBETA,KF,0:1)) ! Work space for "beta" + +! ONWR 5.4.3 +IF(LLTRANSPOSE) THEN + DO JL=ILEVS,0,-1 + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + IN = YNODE%ICOLS-YNODE%IRANK + IM = YNODE%IRANK + IF( IM <=0 ) CALL ABOR1('mult_butm: IM<=0 not allowed') + IF(IN>0) THEN + IF (LLDOUBLE) THEN + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRD,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IN,KF,IM,1.0_JPRBT,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRBT,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + DO JF=1,KF + DO JN=1,YNODE%IRANK + IDX = YNODE%ICLIST(JN) + PVECOUT(IFR+IDX-1,JF) = ZBETA(IBTST+JN-1,JF,IBETALV) + ENDDO + DO JN=YNODE%IRANK+1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + PVECOUT(IFR+IDX-1,JF) = ZVECOUT(JN,JF) + ENDDO + ENDDO + ELSE + IF(JL == ILEVS) THEN + IFR = YNODE%IFROW + ILR = YNODE%ILROW + IROWS =YNODE%IROWS + IRANK = YNODE%IRANK + IF (LLDOUBLE) THEN + CALL DGEMM('T','N',IRANK,KF,IROWS,1.0_JPRD,& + & YNODE%B,IROWS,PVECIN(IFR,1),IRIN,0.0_JPRD,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IRANK,KF,IROWS,1.0_JPRBT,& + & YNODE%B,IROWS,PVECIN(IFR,1),IRIN,0.0_JPRBT,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + ELSE + IRANKR = 0 + ENDIF + IN = YNODE%ICOLS-YNODE%IRANK + IM = YNODE%IRANK + IF( IM <=0 ) CALL ABOR1('mult_butm: IM<=0 not allowed') + IF(IN>0) THEN + IF (LLDOUBLE) THEN + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRD,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IN,KF,IM,1.0_JPRBT,& + & YNODE%PNONIM(1),IM,ZBETA(IBTST,1,IBETALV),ILBETA,0.0_JPRBT,& + & ZVECOUT(YNODE%IRANK+1,1),YD_STRUCT%N_ORDER) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + DO JF=1,KF + DO JN=1,YNODE%IRANK + IDX = YNODE%ICLIST(JN) + ZVECIN(IDX,JF) = ZBETA(IBTST+JN-1,JF,IBETALV) + ENDDO + DO JN=YNODE%IRANK+1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + ZVECIN(IDX,JF) = ZVECOUT(JN,JF) + ENDDO + ENDDO + + DO JF=1,KF + IF(MOD(JJ,2) == 1) THEN + ZBETA(IBTSTL:IBTENL,JF,IBETALVM1)= ZVECIN(1:IRANKL,JF) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,JF,IBETALVM1)=ZVECIN(IRANKL+1:IRANKL+IRANKR,JF) + ENDIF + ELSE + ZBETA(IBTSTL:IBTENL,JF,IBETALVM1)=ZBETA(IBTSTL:IBTENL,JF,IBETALVM1)+ & + & ZVECIN(1:IRANKL,JF) + IF(IRANKR > 0) THEN + ZBETA(IBTSTR:IBTENR,JF,IBETALVM1)=ZBETA(IBTSTR:IBTENR,JF,IBETALVM1) + & + & ZVECIN(IRANKL+1:IRANKL+IRANKR,JF) + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + ENDDO + ENDDO +ELSE + DO JL=0,ILEVS + IBETALV = MOD(JL,2) + DO JJ=1,YD_STRUCT%SLEV(JL)%IJ + DO JK=1,YD_STRUCT%SLEV(JL)%IK + YNODE => YD_STRUCT%SLEV(JL)%NODE(JJ,JK) + IBTST = YNODE%IOFFBETA+1 + IBTEN = YNODE%IOFFBETA+YNODE%IRANK + IF(JL == 0) THEN + IFR = YNODE%IFCOL + ILR = YNODE%ILCOL + IRANK = YNODE%IRANK + IM = IRANK + IN = YNODE%ICOLS-IRANK + DO JF=1,KF + DO JN=1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZBETA(IBTST+JN-1,JF,IBETALV) = PVECIN(IFR+IDX-1,JF) + ELSE + ZVECIN(JN,JF) = PVECIN(IFR+IDX-1,JF) + ENDIF + ENDDO + ENDDO + IF( IRANK <=0 ) CALL ABOR1('mult_butm: IRANK<=0 not allowed') + IF(YNODE%ICOLS > IRANK) THEN + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',IRANK,KF,IN,1.0_JPRD,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRD,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IRANK,KF,IN,1.0_JPRBT,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRBT,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ELSE + ILM1 = JL-1 + IBETALVM1=MOD(ILM1,2) + IJL = (JJ+1)/2 + IKL = 2*JK-1 + IJR = (JJ+1)/2 + IKR = 2*JK + IRANKL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IRANK + IBTSTL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+1 + IBTENL = YD_STRUCT%SLEV(ILM1)%NODE(IJL,IKL)%IOFFBETA+IRANKL + IF(IKR <= YD_STRUCT%SLEV(ILM1)%IK) THEN + IRANKR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IRANK + IBTSTR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+1 + IBTENR = YD_STRUCT%SLEV(ILM1)%NODE(IJR,IKR)%IOFFBETA+IRANKR + ELSE + IRANKR = 0 + IBTENR = IBTENL + ENDIF + IRANK = YNODE%IRANK + IM = IRANK + IN = YNODE%ICOLS-IRANK + DO JF=1,KF + DO JN=1,YNODE%ICOLS + IDX = YNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZBETA(IBTST+JN-1,JF,IBETALV) = ZBETA(IBTSTL+IDX-1,JF,IBETALVM1) + ELSE + ZVECIN(JN,JF) = ZBETA(IBTSTL+IDX-1,JF,IBETALVM1) + ENDIF + ENDDO + ENDDO + IF( IRANK <=0 ) CALL ABOR1('mult_butm: IRANK<=0 not allowed') + IF(YNODE%ICOLS > IRANK) THEN + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',IRANK,KF,IN,1.0_JPRD,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRD,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IRANK,KF,IN,1.0_JPRBT,& + & YNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YD_STRUCT%N_ORDER,1.0_JPRBT,& + & ZBETA(IBTST,1,IBETALV),ILBETA) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ENDIF + IF( IRANK <=0 ) CALL ABOR1('mult_butm: IRANK<=0 not allowed') + IF(JL == ILEVS) THEN + IFR = YNODE%IFROW + ILR = YNODE%ILROW + IROWS = YNODE%IROWS + IF (LLDOUBLE) THEN + CALL DGEMM('N','N',IROWS,KF,YNODE%IRANK,1.0_JPRD,& + & YNODE%B,IROWS,ZBETA(IBTST,1,IBETALV),YD_STRUCT%IBETALEN_MAX,0.0_JPRD,& + & PVECOUT(IFR,1),IROUT) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IROWS,KF,YNODE%IRANK,1.0_JPRBT,& + & YNODE%B,IROWS,ZBETA(IBTST,1,IBETALV),YD_STRUCT%IBETALEN_MAX,0.0_JPRBT,& + & PVECOUT(IFR,1),IROUT) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +ENDIF +DEALLOCATE(ZBETA) +END SUBROUTINE MULT_BUTM +!===================================================================== +SUBROUTINE MULT_P(YDNODE,PVECIN,PVECOUT) +! Multiply vector by projection matrix +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +REAL(KIND=JPRBT),INTENT(IN) :: PVECIN(:) +REAL(KIND=JPRBT),INTENT(OUT) :: PVECOUT(:) + +REAL(KIND=JPRBT) :: ZVECIN(YDNODE%ICOLS), ZVECOUT(SIZE(PVECOUT)) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN +!--------------------------------------------------------- + +IRANK = YDNODE%IRANK +DO JN=1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZVECOUT(JN) = PVECIN(IDX) + ELSE + ZVECIN(JN) = PVECIN(IDX) + ENDIF +ENDDO + +IF(YDNODE%ICOLS > IRANK) THEN + IM = IRANK + IN = YDNODE%ICOLS-IRANK + IF (JPRBT == JPRD) THEN + CALL DGEMV('N',IM,IN,1.0_JPRBT,YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1),1,1.0_JPRBT,ZVECOUT,1) + PVECOUT(:)=ZVECOUT(:) + ELSE + CALL SGEMV('N',IM,IN,1.0_JPRBT,YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1),1,1.0_JPRBT,PVECOUT,1) + ENDIF +ENDIF + +END SUBROUTINE MULT_P +!===================================================================== +SUBROUTINE MULT_PM(YDNODE,KF,KLBETA,PVECIN,PVECOUT) +IMPLICIT NONE +! Multiply matrix by projection matrix +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +INTEGER(KIND=JPIM),INTENT(IN) :: KF +INTEGER(KIND=JPIM),INTENT(IN) :: KLBETA +REAL(KIND=JPRBT),INTENT(IN) :: PVECIN(:,:) +REAL(KIND=JPRBT),INTENT(OUT) :: PVECOUT(:,:) + +REAL(KIND=JPRBT) :: ZVECIN(YDNODE%ICOLS,KF), ZVECOUT(SIZE(PVECOUT(:,1)),KF) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN,JF + +LOGICAL :: LL_HALT_INVALID +!--------------------------------------------------------- + +IRANK = YDNODE%IRANK +IM = IRANK +IN = YDNODE%ICOLS-IRANK +DO JF=1,KF + DO JN=1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + IF(JN <= IRANK) THEN + ZVECOUT(JN,JF) = PVECIN(IDX,JF) + ELSE + ZVECIN(JN,JF) = PVECIN(IDX,JF) + ENDIF + ENDDO +ENDDO +IF(YDNODE%ICOLS > IRANK) THEN + IF (JPRBT == JPRD) THEN + CALL DGEMM('N','N',IRANK,KF,IN,1.0_JPRD,& + & YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YDNODE%ICOLS,1.0_JPRD,& + & PVECOUT,IRANK) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('N','N',IRANK,KF,IN,1.0_JPRBT,& + & YDNODE%PNONIM(1),IRANK,ZVECIN(IRANK+1,1),YDNODE%ICOLS,1.0_JPRBT,& + & PVECOUT,IRANK) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF +ENDIF +END SUBROUTINE MULT_PM +!================================================================== +SUBROUTINE MULT_P_TR(YDNODE,PVECIN,PVECOUT) +! Multiply vector by transposed procetion matrix +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +REAL(KIND=JPRBT),INTENT(IN) :: PVECIN(:) +REAL(KIND=JPRBT),INTENT(OUT) :: PVECOUT(:) + +REAL(KIND=JPRBT) :: ZVECOUT(YDNODE%ICOLS), ZVECIN(SIZE(PVECIN)) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN +!--------------------------------------------------------- + +IRANK = YDNODE%IRANK +IN = YDNODE%ICOLS-IRANK +IF(IN>0) THEN + IM = IRANK + IF (JPRBT == JPRD) THEN + ZVECIN(:) = PVECIN(:) + CALL DGEMV('T',IM,IN,1.0_JPRD,YDNODE%PNONIM,IRANK,ZVECIN,1,0.0_JPRD,ZVECOUT(IRANK+1),1) + ELSE + CALL SGEMV('T',IM,IN,1.0_JPRBT,YDNODE%PNONIM,IRANK,PVECIN,1,0.0_JPRBT,ZVECOUT(IRANK+1),1) + ENDIF +ENDIF +DO JK=1,IRANK + IDX = YDNODE%ICLIST(JK) + PVECOUT(IDX) = PVECIN(JK) +ENDDO +DO JN=IRANK+1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + PVECOUT(IDX) = ZVECOUT(JN) +ENDDO + +END SUBROUTINE MULT_P_TR +!================================================================== +SUBROUTINE MULT_P_TRM(YDNODE,KF,PVECIN,PVECOUT) +! Multiply matrix by transposed procetion matrix +IMPLICIT NONE +TYPE(NODE_TYPE),INTENT(INOUT) :: YDNODE +INTEGER(KIND=JPIM),INTENT(IN) :: KF +REAL(KIND=JPRBT),INTENT(IN) :: PVECIN(:,:) +REAL(KIND=JPRBT),INTENT(OUT) :: PVECOUT(:,:) + +REAL(KIND=JPRBT) :: ZVECOUT(YDNODE%ICOLS,KF), ZVECIN(SIZE(PVECIN(:,1)),KF) +INTEGER(KIND=JPIM) :: JK,JN,IDX,IRANK,IM,IN,JF + +LOGICAL :: LL_HALT_INVALID + +!------------------------------------------------------------------ + +IN = YDNODE%ICOLS-YDNODE%IRANK +IM = YDNODE%IRANK +IF(IN>0) THEN + IF (JPRBT == JPRD) THEN + ZVECIN(:,:) = PVECIN(:,:) + CALL DGEMM('T','N',IN,KF,IM,1.0_JPRD,& + & YDNODE%PNONIM(1),IM,ZVECIN,IM,0.0_JPRD,& + & ZVECOUT(YDNODE%IRANK+1,1),YDNODE%ICOLS) + ELSE + IF (LL_IEEE_HALT) THEN + call ieee_get_halting_mode(ieee_invalid,LL_HALT_INVALID) + if (LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.false.) + ENDIF + CALL SGEMM('T','N',IN,KF,IM,1.0_JPRBT,& + & YDNODE%PNONIM(1),IM,PVECIN,IM,0.0_JPRBT,& + & ZVECOUT(YDNODE%IRANK+1,1),YDNODE%ICOLS) + if (LL_IEEE_HALT .and. LL_HALT_INVALID) call ieee_set_halting_mode(ieee_invalid,.true.) + ENDIF +ENDIF +DO JF=1,KF + DO JK=1,YDNODE%IRANK + IDX = YDNODE%ICLIST(JK) + PVECOUT(IDX,JF) = PVECIN(JK,JF) + ENDDO + DO JN=YDNODE%IRANK+1,YDNODE%ICOLS + IDX = YDNODE%ICLIST(JN) + PVECOUT(IDX,JF) = ZVECOUT(JN,JF) + ENDDO +ENDDO +END SUBROUTINE MULT_P_TRM +!==================================================================== +END MODULE BUTTERFLY_ALG_MOD + diff --git a/src/trans/gpu/algor/module/dilatation_mod.F90 b/src/trans/gpu/algor/module/dilatation_mod.F90 new file mode 100644 index 0000000..e70d0e5 --- /dev/null +++ b/src/trans/gpu/algor/module/dilatation_mod.F90 @@ -0,0 +1,485 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DILATATION_MOD + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE PARKIND2 ,ONLY : JPRH +USE MPL_MODULE, ONLY : MPL_SEND, MPL_RECV +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE + +PRIVATE +PUBLIC DILAT_CALC, DILAT_DEVIATION, DILAT_CONTROL, DILAT_MAPPING + +! Dilatation package package. + +! To compute dilatation and contraction matrixes from Legendre polynomials and +! gaussian weights and latitudes on a streched sphere ; and control the +! deviation of the product of these matrixes against the identity matrix. + +CONTAINS +!================================================================================ +SUBROUTINE DILAT_MAPPING(PSTRET,PMU,PMAPP) + +!**** *DILAT_MAPPING* - Compute the map factor for each latitudes + +! Purpose. +! -------- +! To compute the map factor, given the stretching +! factor and the sines of the standard gaussian latitudes + +!** Interface. +! ---------- +! *CALL *DILAT_MAPPING* + +! Explicit arguments. +! ------------------- +! PSTRET : stretching factor +! PMU : sines of the gaussian latitudes +! PMAPP : map factor + +! Implicit arguments. +! ------------------- + +! Method. +! ------- +! See documentation. + +! Reference. +! ---------- +! Arpege note 19 (in French). + +! Externals. +! --------- + +! Author. +! ------- +! R. El Khatib 19-Jun-2013 from Michel Rochas, DMN (Original 91-01-28). + +! Modifications. +! -------------- +! ------------------------------------------------------------------ + +IMPLICIT NONE + +REAL(KIND=JPRB), INTENT(IN) :: PSTRET +REAL(KIND=JPRD), INTENT(IN) :: PMU(:) +REAL(KIND=JPRB), INTENT(OUT) :: PMAPP(:) + +INTEGER(KIND=JPIM) :: IGLS, IDGNH, JGL + +REAL(KIND=JPRH) :: Z_DLSINE,Z_DLTAN + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_MAPPING',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +IF ( (SIZE(PMU,DIM=1) /= SIZE(PMAPP,DIM=1)) ) THEN + CALL ABOR1('DILAT_MAPPING : SIZE MISMATCH BETWEEN PMU AND PMAPP') +ENDIF + +! Remark : this is an inconsistent mixture of double and quadruple precision. REK +Z_DLTAN=(1.0_JPRB-PSTRET**2)/(1.0_JPRB+PSTRET**2) +Z_DLSINE=2.0_JPRB*PSTRET/(1.0_JPRB+PSTRET**2) +IDGNH=(SIZE(PMU,DIM=1)+1)/2 +DO JGL=1,IDGNH + PMAPP(JGL)=REAL((Z_DLSINE/(1.0_JPRB+Z_DLTAN*PMU(JGL)))**2,JPRB) +ENDDO +DO JGL=1,IDGNH + IGLS=2*IDGNH-JGL+1 + PMAPP(IGLS)=REAL((Z_DLSINE/(1.0_JPRB-Z_DLTAN*PMU(JGL)))**2,JPRB) +ENDDO + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_MAPPING',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE DILAT_MAPPING +!==================================================================== +SUBROUTINE DILAT_CALC(KM,KNSMAX,KNSMIN,KDGNH,PNM,PW,PMAPP,PRPNM,PALFA,PBETA) + +!**** *DILAT_CALC* - Computes dilatation matrix. + +! Purpose. +! -------- +! Computes dilatation matrix for schmidt transform. + +!** Interface. +! ---------- +! *CALL *DILAT_CALC(...) + +! Explicit Arguments : +! -------------------- +! INPUT: +! KM - Zonal wave number +! KNSMAX - Larger truncation +! KNSMIN - smaller truncation +! KDGNH - number of points on an hemisphere +! PNM - Legendre polynomials small truncation +! PW - Gaussian weights +! PMAPP - Mapping factor +! PRPNM - Legendre polynomials large truncation + +! OUTPUT: +! PALFA - Contraction (0) matrix +! PBETA - Dilatation (1) matrix + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation. + +! Reference. +! ---------- +! Arpege note 19 (in French). + +! Externals. +! ---------- +! Calls MXMAOP. +! Is called by SUDIL. + +! Author. +! ------- +! Michel Rochas, DMN. + +! Modifications. +! -------------- +! Original : 91-01-28. +! =07-08-91= Philippe Courtier. Changes Leg. polynomials mapping +! K. YESSAD: 93-05-11 : cleaning, comments put into English. +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! K. Yessad: Aug 2005 : A-level distribution of conf 911. +! K. Yessad (June 2009): externalisation. +! R. El Khatib 20-Jun-2013 Optimization +! R. El Khatib 07-Mar-2016 Simplification/Optimization +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KNSMIN +INTEGER(KIND=JPIM),INTENT(IN) :: KDGNH +REAL(KIND=JPRB) ,INTENT(IN) :: PNM(2*KDGNH,KNSMIN-KM+1) +REAL(KIND=JPRB) ,INTENT(IN) :: PW(KDGNH) +REAL(KIND=JPRB) ,INTENT(IN) :: PMAPP(2*KDGNH) +REAL(KIND=JPRB) ,INTENT(IN) :: PRPNM(KDGNH,KNSMAX-KM+1) +REAL(KIND=JPRB) ,INTENT(OUT) :: PALFA(KNSMAX-KM+1,KNSMIN-KM+1) +REAL(KIND=JPRB) ,INTENT(OUT) :: PBETA(KNSMAX-KM+1,KNSMIN-KM+1) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRB) :: ZP(KNSMAX-KM+1) +REAL(KIND=JPRB) :: ZPNM(KDGNH+1,KNSMIN-KM+1) +REAL(KIND=JPRB) :: ZNOR(2*((KNSMAX-KM+1)/2)+1,KNSMIN-KM+1) +REAL(KIND=JPRB) :: ZSUD(2*((KNSMAX-KM+1)/2)+1,KNSMIN-KM+1) +REAL(KIND=JPRB) :: ZRPNM(2*((KNSMAX-KM+1)/2)+1,KDGNH+1) + +INTEGER(KIND=JPIM) :: IGLS, IR, IR2, JGL, JN, JS, JI, II, IOFF +INTEGER(KIND=JPIM) :: ISMAXSUR + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "mxmaop.h" + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_CALC',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +!* 0. Preparations +! ------------ + +ISMAXSUR=2*((KNSMAX-KM+1)/2)+1 +IOFF=KNSMAX+2-KM + +! Initialize a parity array + +DO JS=1,KNSMAX-KM+1 + ZP(JS)=REAL(2*MOD(JS+KNSMAX-KM,2)-1,JPRB) +ENDDO + +! multiplication by gaussian weights +! of the Legendre polynomials at high truncation + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JGL,JS) +DO JGL=1,KDGNH + DO JS=1,KNSMAX-KM+1 + ZRPNM(JS,JGL)=PRPNM(JGL,JS)*PW(JGL) + ENDDO +ENDDO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ + +!* 1. Matrix ALPHA + + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JN,JGL) +DO JN=1,KNSMIN-KM+1 + DO JGL=1,KDGNH + ZPNM(JGL,JN)=PNM(JGL,JN)*PMAPP(JGL) + ENDDO +ENDDO +!$OMP END PARALLEL DO + +CALL MXMAOP(ZRPNM,1,ISMAXSUR,ZPNM(:,:),1,KDGNH+1,ZNOR(:,:),1,ISMAXSUR,& + & KNSMAX-KM+1,KDGNH,KNSMIN-KM+1) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JN,JGL) +DO JN=1,KNSMIN-KM+1 + DO JGL=1,KDGNH + ZPNM(JGL,JN)=PNM(2*KDGNH-JGL+1,JN)*PMAPP(2*KDGNH-JGL+1) + ENDDO +ENDDO +!$OMP END PARALLEL DO + +CALL MXMAOP(ZRPNM,1,ISMAXSUR,ZPNM(:,:),1,KDGNH+1,ZSUD(:,:),1,ISMAXSUR,& + & KNSMAX-KM+1,KDGNH,KNSMIN-KM+1) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JN,JS) +DO JN=1,KNSMIN-KM+1 + DO JS=1,KNSMAX-KM+1 + PALFA(IOFF-JS,KNSMIN-KM+2-JN)=ZNOR(JS,JN)+ZP(JS)*ZSUD(JS,JN) + ENDDO +ENDDO +!$OMP END PARALLEL DO + + +!* 2. Matrix BETA + + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JN,JGL) +DO JN=1,KNSMIN-KM+1 + DO JGL=1,KDGNH + ZPNM(JGL,JN)=PNM(JGL,JN) + ENDDO +ENDDO +!$OMP END PARALLEL DO + +CALL MXMAOP(ZRPNM,1,ISMAXSUR,ZPNM(:,:),1,KDGNH+1,ZNOR(:,:),1,ISMAXSUR,& + & KNSMAX-KM+1,KDGNH,KNSMIN-KM+1) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JN,JGL) +DO JN=1,KNSMIN-KM+1 + DO JGL=1,KDGNH + ZPNM(JGL,JN)=PNM(2*KDGNH-JGL+1,JN) + ENDDO +ENDDO +!$OMP END PARALLEL DO + +CALL MXMAOP(ZRPNM,1,ISMAXSUR,ZPNM(:,:),1,KDGNH+1,ZSUD(:,:),1,ISMAXSUR,& + & KNSMAX-KM+1,KDGNH,KNSMIN-KM+1) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JN,JS) +DO JN=1,KNSMIN-KM+1 + DO JS=1,KNSMAX-KM+1 + PBETA(IOFF-JS,KNSMIN-KM+2-JN)=ZNOR(JS,JN)+ZP(JS)*ZSUD(JS,JN) + ENDDO +ENDDO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_CALC',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +END SUBROUTINE DILAT_CALC +!============================================================================= +SUBROUTINE DILAT_DEVIATION(PALFA,PBETA,PMAX) + +!**** *DILAT_DEVIATION* - compute the deviation of dilatation/contraction matrixes. + +! Purpose. +! -------- +! Compute the deviation from identity of the product contraction o dilatation +! for a given wave number + +!** Interface. +! ---------- +! *CALL *DILAT_DEVIATION* + +! Explicit arguments. +! ------------------- +! PALFA - Matrix Alfa (Contraction) +! PBETA - Matrix Beta (Dilatation) +! PMAX - deviation from identity + +! Implicit arguments. +! ------------------- + +! Method. +! ------- +! See documentation. + +! Reference. +! ---------- +! Arpege note 19 (in French). + +! Externals. +! --------- + +! Author. +! ------- +! R. El Khatib 19-Jun-2013 from Michel Rochas, DMN (Original 91-01-28). + +! Modifications. +! -------------- +! ------------------------------------------------------------------ + +IMPLICIT NONE + +REAL(KIND=JPRB), INTENT(IN) :: PALFA(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PBETA(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PMAX + +INTEGER(KIND=JPIM) :: ID1, ID2, JN1, JN2 + +REAL(KIND=JPRB), ALLOCATABLE :: ZRESUL(:,:) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "mxmaop.h" + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_DEVIATION',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +ID1=SIZE(PALFA,DIM=1) +ID2=SIZE(PALFA,DIM=2) + +IF ( (SIZE(PBETA,DIM=1) /= ID1) .OR. (SIZE(PBETA,DIM=2) /= ID2) ) THEN + CALL ABOR1('DILAT_DEVIATION : SIZES MISMATCH BETWEEN PALFA AND PBETA') +ENDIF + +PMAX=-HUGE(1._JPRB) +ALLOCATE(ZRESUL(ID2,ID2)) +CALL MXMAOP(PBETA(:,:),ID1,1,PALFA(:,:),1,ID1,ZRESUL,1,ID2,ID2,ID1,ID2) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JN1) +DO JN1=1,ID2 + ZRESUL(JN1,JN1)=ZRESUL(JN1,JN1)-1.0_JPRB +ENDDO +!$OMP END PARALLEL DO +DO JN1=1,ID2 + DO JN2=1,ID2 + PMAX=MAX(PMAX,ABS(ZRESUL(JN1,JN2))) + ENDDO +ENDDO +DEALLOCATE(ZRESUL) + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_DEVIATION',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE DILAT_DEVIATION +!==================================================================== +SUBROUTINE DILAT_CONTROL(KMYPROC,KOUTPROC,KWSET,KULOUT,PMAXALL) + +!**** *DILAT_CONTROL* - Control the dilatation/contraction matrixes. + +! Purpose. +! -------- +! Print out the deviation from identity of the product contraction o dilatation +! for a contiguous set of distributed wave numbers + +!** Interface. +! ---------- +! *CALL *DILAT_CONTROL* + +! Explicit arguments. +! ------------------- +! KMYPROC - Current mpi task +! KOUTPROC - task in charge of writing out +! KWSET - wave set for all wave numbers +! KULOUT - Output logical unit number +! PMAXALL - deviation from identity of all wave numbers + +! Implicit arguments. +! ------------------- + +! Method. +! ------- +! See documentation. + +! Reference. +! ---------- +! Arpege note 19 (in French). + +! Externals. +! --------- + +! Author. +! ------- +! R. El Khatib 19-Jun-2013 from Michel Rochas, DMN (Original 91-01-28). + +! Modifications. +! -------------- +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KWSET(0:) +INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KOUTPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KULOUT +REAL(KIND=JPRB), INTENT(IN) :: PMAXALL(0:) + +INTEGER(KIND=JPIM) :: JM +REAL(KIND=JPRB) :: ZMAXRECV + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "mxmaop.h" + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_CONTROL',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +IF ( (UBOUND(PMAXALL,DIM=1) /= UBOUND(KWSET,DIM=1)) ) THEN + CALL ABOR1('DILAT_CONTROL : UBOUNDS MISMATCH BETWEEN PMAXALL AND KWSET') +ENDIF + +DO JM=0,UBOUND(PMAXALL,DIM=1) + IF (KWSET(JM) == KMYPROC) THEN + IF (KMYPROC == KOUTPROC) THEN + WRITE(KULOUT,'('' ZONAL WAVE NUMBER '',I4, & + & '' DEVIATION FROM IDENTITY MATRIX '',E10.3)') JM,PMAXALL(JM) + ELSE + CALL MPL_SEND(PMAXALL(JM),KDEST=KOUTPROC,KTAG=JM,CDSTRING='DILAT_CONTROL:') + ENDIF + ELSEIF(KMYPROC == KOUTPROC) THEN + CALL MPL_RECV(ZMAXRECV,KSOURCE=KWSET(JM),KTAG=JM,CDSTRING='DILAT_CONTROL:') + WRITE(KULOUT,'('' ZONAL WAVE NUMBER '',I4, & + & '' DEVIATION FROM IDENTITY MATRIX '',E10.3)') JM,ZMAXRECV + ENDIF +ENDDO + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DILAT_CONTROL',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +END SUBROUTINE DILAT_CONTROL +!============================================================================= +END MODULE DILATATION_MOD diff --git a/src/trans/gpu/algor/module/hip_device_mod.F90 b/src/trans/gpu/algor/module/hip_device_mod.F90 new file mode 100644 index 0000000..d634c9e --- /dev/null +++ b/src/trans/gpu/algor/module/hip_device_mod.F90 @@ -0,0 +1,56 @@ +module hip_device_mod + +interface hip_sync + +integer function hip_synchronize() bind(C,name='hipDeviceSynchronize') +use iso_c_binding +end function hip_synchronize + +end interface hip_sync + +interface hipstreamsync + +integer function hip_stream_synchronize(stream) bind(C,name='hipStreamSynchronize') +use iso_c_binding +type(c_ptr) :: stream +end function hip_stream_synchronize + +end interface hipstreamsync + +interface hipstreamdestroy + +integer function hip_stream_destroy(stream) bind(C,name='hipStreamDestroy') +use iso_c_binding +type(c_ptr) :: stream +end function hip_stream_destroy + +end interface hipstreamdestroy + +interface hipsetdevice + +integer function hip_SetDevice(devnum) bind(C,name='hipSetDevice') +use iso_c_binding +integer(c_int),value :: devnum +end function hip_SetDevice + +end interface hipsetdevice + +interface hipgetdevice + +integer function hip_GetDevice(devnum) bind(C,name='hipGetDevice') +use iso_c_binding +integer(c_int) :: devnum +end function hip_GetDevice + +end interface hipgetdevice + +interface hipgetdevicecount + +integer function hip_GetDeviceCount(devnum) bind(C,name='hipGetDeviceCount') +use iso_c_binding +integer(c_int) :: devnum +end function hip_GetDeviceCount + +end interface hipgetdevicecount + +end module hip_device_mod diff --git a/src/trans/gpu/algor/module/hip_device_mod.F90~ b/src/trans/gpu/algor/module/hip_device_mod.F90~ new file mode 100644 index 0000000..bee2784 --- /dev/null +++ b/src/trans/gpu/algor/module/hip_device_mod.F90~ @@ -0,0 +1,40 @@ +module hip_device_mod + +interface hip_sync + +integer function hip_synchronize() bind(C,name='hipDeviceSynchronize') +use iso_c_binding +end function hip_synchronize + +integer function hip_stream_synchronize(stream) bind(C,name='hipStreamSynchronize') +use iso_c_binding +type(c_ptr) :: stream +end function hip_stream_synchronize + +integer function hip_stream_destroy(stream) bind(C,name='hipStreamDestroy') +use iso_c_binding +type(c_ptr) :: stream +end function hip_stream_destroy + +end interface hip_sync + +interface hip_device + +integer function hip_SetDevice(devnum) bind(C,name='hipSetDevice') +use iso_c_binding +integer(c_int),value :: devnum +end function hip_SetDevice + +integer function hip_GetDevice(devnum) bind(C,name='hipGetDevice') +use iso_c_binding +integer(c_int) :: devnum +end function hip_GetDevice + +integer function hip_GetDeviceCount(devnum) bind(C,name='hipGetDeviceCount') +use iso_c_binding +integer(c_int) :: devnum +end function hip_GetDeviceCount + +end interface hip_device + +end module hip_device_mod diff --git a/src/trans/gpu/algor/module/hipblasDgemmBatched.hip.cpp b/src/trans/gpu/algor/module/hipblasDgemmBatched.hip.cpp new file mode 100644 index 0000000..4b3666e --- /dev/null +++ b/src/trans/gpu/algor/module/hipblasDgemmBatched.hip.cpp @@ -0,0 +1,141 @@ +// +// Wrapper for hipblasDgemm function. +// +// Alan Gray, NVIDIA +// + +#include +#include "hipblas.h" + + +bool hip_alreadyAllocated_dgemm=false; +bool hip_alreadyAllocated_dgemm_handle=false; + +double **d_Aarray_hip; +double **d_Barray_hip; +double **d_Carray_hip; + +double **Aarray_hip; +double **Barray_hip; +double **Carray_hip; + +hipblasHandle_t handle_hip_dgemm; + +extern "C" void hipblasDgemmBatched_wrapper (char transa, char transb, int m, int n,int k, double alpha, const double *A, int lda, int tda, const double *B, int ldb, int tdb, double beta, double *C, int ldc, int tdc, int batchCount) +{ + + + // printf("HIPBLAS m=%d,n=%d,k=%d,batchcount=%d\n",m,n,k,batchCount); + hipblasStatus_t stat; + + + hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; + + if (transa=='T' || transa=='t') + op_t1=HIPBLAS_OP_T; + + if (transb=='T' || transb=='t') + op_t2=HIPBLAS_OP_T; + + + //double **Aarray_hip = (double**) malloc(batchCount*sizeof(double*)); + //double **Barray_hip = (double**) malloc(batchCount*sizeof(double*)); + //double **Carray_hip = (double**) malloc(batchCount*sizeof(double*)); + + + + if (!hip_alreadyAllocated_dgemm_handle){ + stat = hipblasCreate(&handle_hip_dgemm); + if (stat != HIPBLAS_STATUS_SUCCESS) { + printf ("HIPBLAS initialization failed\n"); + //return EXIT_FAILURE; + } + } + hip_alreadyAllocated_dgemm_handle=true; + + if (!hip_alreadyAllocated_dgemm){ + hipError_t errcm1 = hipHostMalloc(&Aarray_hip,batchCount*sizeof(double*)); + hipError_t errcm2 = hipHostMalloc(&Barray_hip,batchCount*sizeof(double*)); + hipError_t errcm3 = hipHostMalloc(&Carray_hip,batchCount*sizeof(double*)); + + hipError_t errcm4 = hipMalloc(&d_Aarray_hip,batchCount*sizeof(double*)); + hipError_t errcm5 = hipMalloc(&d_Barray_hip,batchCount*sizeof(double*)); + hipError_t errcm6 = hipMalloc(&d_Carray_hip,batchCount*sizeof(double*)); + } + hip_alreadyAllocated_dgemm=true; + + int i; + for(i=0;i +#include "hipblas.h" + + +bool hip_alreadyAllocated_sgemm=false; +bool hip_alreadyAllocated_sgemm_handle=false; + +float **d_Aarray_sgemm_hip; +float **d_Barray_sgemm_hip; +float **d_Carray_sgemm_hip; + +float **Aarray_sgemm_hip; +float **Barray_sgemm_hip; +float **Carray_sgemm_hip; + +hipblasHandle_t handle_hip_sgemm; + +extern "C" void hipblasSgemmBatched_wrapper (char transa, char transb, int m, int n,int k, float alpha, const float *A, int lda, int tda, const float *B, int ldb, int tdb, float beta, float *C, int ldc, int tdc, int batchCount) +{ + + //printf("HIPBLAS m=%d,n=%d,k=%d,batchcount=%d\n",m,n,k,batchCount); + //exit; + hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; + + if (transa=='T' || transa=='t') + op_t1=HIPBLAS_OP_T; + + if (transb=='T' || transb=='t') + op_t2=HIPBLAS_OP_T; + + //float **Aarray_sgemm = (float**) malloc(batchCount*sizeof(float*)); + //float **Barray_sgemm = (float**) malloc(batchCount*sizeof(float*)); + //float **Carray_sgemm = (float**) malloc(batchCount*sizeof(float*)); + + if (!hip_alreadyAllocated_sgemm_handle){ + hipblasCreate(&handle_hip_sgemm); + hip_alreadyAllocated_sgemm_handle=true; + } + + if (!hip_alreadyAllocated_sgemm){ + hipHostMalloc(&Aarray_sgemm_hip,batchCount*sizeof(float*)); + hipHostMalloc(&Barray_sgemm_hip,batchCount*sizeof(float*)); + hipHostMalloc(&Carray_sgemm_hip,batchCount*sizeof(float*)); + hip_alreadyAllocated_sgemm=true; + } + + hipMalloc(&d_Aarray_sgemm_hip,batchCount*sizeof(float*)); + hipMalloc(&d_Barray_sgemm_hip,batchCount*sizeof(float*)); + hipMalloc(&d_Carray_sgemm_hip,batchCount*sizeof(float*)); + + int i; + for(i=0;i ZTAU) THEN + IK = JN + ZTAU = ZC(JN) + ENDIF +ENDDO +ZTAU_IN = ZTAU +ZTAU_REC= ZTAU +KRANK = 0 +DO WHILE (ZTAU > PEPS**2*ZTAU_IN) + KRANK = KRANK+1 + IF( KRANK <= IMIN ) THEN + ILIST(KRANK) = IK + ! Column swap KRANK with IK + ZSWAPA(:) = PA(:,KRANK) + PA(:,KRANK) = PA(:,IK) + PA(:,IK) = ZSWAPA(:) + ZSWAP = ZC(KRANK) + ZC(KRANK) = ZC(IK) + ZC(IK) = ZSWAP + ! Compute Householder vector + ZBETA=0.0_JPRD + IF( KM-KRANK >= 0 ) THEN + CALL ALG511(ZEPS,KM-KRANK+1_JPIM,PA(KRANK:KM,KRANK),ZV,ZBETA) + ENDIF + ! Apply Householder matrix + IM = KM-KRANK+1 + IN = KN-KRANK+1 + ! LAPACK + CALL DLARF('Left',IM,IN,ZV,1,ZBETA,PA(KRANK,KRANK),KM,ZWORK) + ENDIF + +! Update column norms + ZTAU = 0.0_JPRD + IF(KRANK < IMIN) THEN + PA(KRANK+1:KM,KRANK) = ZV(2:IM) + DO JN=KRANK+1,KN + ZC(JN) = ZC(JN)-PA(KRANK,JN)**2 + IF(ZC(JN) > ZTAU) THEN + IK = JN + ZTAU = ZC(JN) + ENDIF + ENDDO +! Re-compute column norms due to round-off error + IF(ZTAU < ZEPS*ZTAU_REC .OR. ZTAU < 0._JPRD .or. (KN-KRANK) > 100 ) THEN + DO JN=KRANK+1,KN + ZC(JN) = DOT_PRODUCT(PA(KRANK+1:,JN),PA(KRANK+1:,JN)) + IF(ZC(JN) > ZTAU) THEN + IK = JN + ZTAU = ZC(JN) + ENDIF + ENDDO + !write(0,*) 'RECOMPUTE TAU ',KRANK,ZTAU_REC,ZTAU + ZTAU_REC = ZTAU + ENDIF + ENDIF +ENDDO +! Make sure klist is filled also beyond krank +DO JN=1,KN + KLIST(JN) = JN +ENDDO +DO JN=1,KRANK + ISWAP = KLIST(JN) + KLIST(JN) = KLIST(ILIST(JN)) + KLIST(ILIST(JN)) = ISWAP +ENDDO + +END SUBROUTINE ALG541 + +!============================================================================== +SUBROUTINE ALG511(PEPS,KSIZE,PX,PV,PBETA) +IMPLICIT NONE +! Compute Householder vector +! Algorithm 5.1.1 from Matrix Computations, G.H.Golub & C.F van Loen, third ed. +REAL(KIND=JPRD),INTENT(IN) :: PEPS ! Precision +REAL(KIND=JPRD),INTENT(IN) :: PX(:) +INTEGER(KIND=JPIM), INTENT(IN) :: KSIZE +REAL(KIND=JPRD),INTENT(OUT) :: PV(:) +REAL(KIND=JPRD),INTENT(OUT) :: PBETA +INTEGER(KIND=JPIM) :: IL +REAL(KIND=JPRD) :: ZSIGMA,ZMU, ZNORM +REAL(KIND=JPRD) :: ZX(KSIZE) +!------------------------------------------------------------------------------- +! normalize +ZNORM=0._JPRD +DO IL=1,KSIZE + ZNORM = ZNORM + PX(IL)*PX(IL) +ENDDO +ZNORM=SQRT(ZNORM) +ZX(:)=PX(1:KSIZE) +IF( ZNORM > PEPS ) ZX(:)=PX(1:KSIZE)/ZNORM + +ZSIGMA=0._JPRD +IF( KSIZE > 1 ) ZSIGMA = DOT_PRODUCT(ZX(2:KSIZE),ZX(2:KSIZE)) +PV(1) = 1.0_JPRD +IF( KSIZE > 1 ) PV(2:KSIZE) = ZX(2:KSIZE) +IF(ABS(ZSIGMA) < PEPS**2) THEN + PBETA = 0.0_JPRD +ELSE + ZMU = SQRT(ZX(1)**2+ZSIGMA) + IF(ZX(1) <= 0.0_JPRD) THEN + PV(1) = ZX(1)-ZMU + ELSE + PV(1) = -ZSIGMA/(ZX(1)+ZMU) + ENDIF + PBETA = 2.0_JPRD*PV(1)**2/(ZSIGMA+PV(1)**2) + PV(:) = PV(:)/(PV(1)) +ENDIF + +END SUBROUTINE ALG511 +!================================================================================ + +END MODULE INTERPOL_DECOMP_MOD diff --git a/src/trans/gpu/algor/module/rocblasDgemmBatched.hip.cpp b/src/trans/gpu/algor/module/rocblasDgemmBatched.hip.cpp new file mode 100644 index 0000000..e36d75d --- /dev/null +++ b/src/trans/gpu/algor/module/rocblasDgemmBatched.hip.cpp @@ -0,0 +1,162 @@ +// +// Wrapper for hipblasDgemm function. +// +// Alan Gray, NVIDIA +// +#include +#include +#include "hip/hip_runtime_api.h" +#include "rocblas.h" + +using namespace std; + +bool roc_alreadyAllocated_dgemm=false; +bool roc_alreadyAllocated_dgemm_handle=false; + +double **d_Aarray_roc; +double **d_Barray_roc; +double **d_Carray_roc; + +double **Aarray_roc; +double **Barray_roc; +double **Carray_roc; + +//hipblasHandle_t handle_roc_dgemm; +rocblas_handle handle_roc_dgemm; + +extern "C" void rocblasDgemmBatched_wrapper (char transa, char transb, int m, int n,int k, double alpha, const double *A, int lda, int tda, const double *B, int ldb, int tdb, double beta, double *C, int ldc, int tdc, int batchCount) +{ + + // printf("ROCBLAS m=%d,n=%d,k=%d,batchcount=%d\n",m,n,k,batchCount); + // hipblasStatus_t stat; + //hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; + //if (transa=='T' || transa=='t') + // op_t1=HIPBLAS_OP_T; + //if (transb=='T' || transb=='t') + // op_t2=HIPBLAS_OP_T; + rocblas_operation op_t1 = rocblas_operation_none, op_t2 = rocblas_operation_none; + //hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; + if (transa=='T' || transa=='t') op_t1 = rocblas_operation_transpose; + if (transb=='T' || transb=='t') op_t2 = rocblas_operation_transpose; + + //double **Aarray_roc = (double**) malloc(batchCount*sizeof(double*)); + //double **Barray_roc = (double**) malloc(batchCount*sizeof(double*)); + //double **Carray_roc = (double**) malloc(batchCount*sizeof(double*)); + + if (!roc_alreadyAllocated_dgemm_handle){ + rocblas_status stat = rocblas_create_handle(&handle_roc_dgemm); + if(stat == rocblas_status_success) + { + //cout << "status == rocblas_status_success" << endl; + } + else + { + cout << "rocblas failure: status = " << stat << endl; + } + //stat = hipblasCreate(&handle_roc_dgemm); + //if (stat != HIPBLAS_STATUS_SUCCESS) { + // printf ("ROCBLAS initialization failed\n"); + //return EXIT_FAILURE; + //} + } + roc_alreadyAllocated_dgemm_handle=true; + + if (!roc_alreadyAllocated_dgemm){ + hipError_t errcm1 = hipHostMalloc(&Aarray_roc,batchCount*sizeof(double*)); + hipError_t errcm2 = hipHostMalloc(&Barray_roc,batchCount*sizeof(double*)); + hipError_t errcm3 = hipHostMalloc(&Carray_roc,batchCount*sizeof(double*)); + + hipError_t errcm4 = hipMalloc(&d_Aarray_roc,batchCount*sizeof(double*)); + hipError_t errcm5 = hipMalloc(&d_Barray_roc,batchCount*sizeof(double*)); + hipError_t errcm6 = hipMalloc(&d_Carray_roc,batchCount*sizeof(double*)); + } + roc_alreadyAllocated_dgemm=true; + + int i; + for(i=0;i +#include +#include "hip/hip_runtime_api.h" +#include "rocblas.h" +//#include "hipblas.h" + +using namespace std; + +bool roc_alreadyAllocated_sgemm=false; +bool roc_alreadyAllocated_sgemm_handle=false; + +float **d_Aarray_roc_sgemm; +float **d_Barray_roc_sgemm; +float **d_Carray_roc_sgemm; + +float **Aarray_roc_sgemm; +float **Barray_roc_sgemm; +float **Carray_roc_sgemm; + +//hipblasHandle_t handle_roc_sgemm; +rocblas_handle handle_roc_sgemm; + +extern "C" void rocblasSgemmBatched_wrapper (char transa, char transb, int m, int n,int k, float alpha, const float *A, int lda, int tda, const float *B, int ldb, int tdb, float beta, float *C, int ldc, int tdc, int batchCount) +{ + + //TCo79 default ROCBLAS m=4,n=80,k=40,batchcount=80 + //printf("ROCBLAS m=%d,n=%d,k=%d,batchcount=%d\n",m,n,k,batchCount); + //exit; + rocblas_operation op_t1 = rocblas_operation_none, op_t2 = rocblas_operation_none; + //hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; + if (transa=='T' || transa=='t') + op_t1 = rocblas_operation_transpose; + //op_t1=HIPBLAS_OP_T; + if (transb=='T' || transb=='t') + op_t2 = rocblas_operation_transpose; + //op_t2=HIPBLAS_OP_T; + + //float **Aarray_roc_sgemm = (float**) malloc(batchCount*sizeof(float*)); + //float **Barray_roc_sgemm = (float**) malloc(batchCount*sizeof(float*)); + //float **Carray_roc_sgemm = (float**) malloc(batchCount*sizeof(float*)); + + if (!roc_alreadyAllocated_sgemm_handle){ + //hipblasCreate(&handle_roc_sgemm); + rocblas_status stat = rocblas_create_handle(&handle_roc_sgemm); + if(stat == rocblas_status_success) + { + //cout << "status create == rocblas_status_success" << endl; + } + else + { + cout << "rocblas create failure: status = " << stat << endl; + } + } + roc_alreadyAllocated_sgemm_handle=true; + + if (!roc_alreadyAllocated_sgemm){ + hipHostMalloc(&Aarray_roc_sgemm,batchCount*sizeof(float*)); + hipHostMalloc(&Barray_roc_sgemm,batchCount*sizeof(float*)); + hipHostMalloc(&Carray_roc_sgemm,batchCount*sizeof(float*)); + } + roc_alreadyAllocated_sgemm=true; + + hipMalloc(&d_Aarray_roc_sgemm,batchCount*sizeof(float*)); + hipMalloc(&d_Barray_roc_sgemm,batchCount*sizeof(float*)); + hipMalloc(&d_Carray_roc_sgemm,batchCount*sizeof(float*)); + + int i; + for(i=0;i 0) then + inumc=0 + do j1=1,kn-1 + do j2=1,kclosel(j1) + idist=j2 + if(.not.llxy(j1) .and. llxy(j1+idist)) then + inumc=inumc+1 + ztheta(j1)=ztheta(j1)-pcik(inumc)*zq(j1+idist) + elseif(llxy(j1) .and. .not.llxy(j1+idist)) then + inumc=inumc+1 + ztheta(j1+idist)=ztheta(j1+idist)+pcik(inumc)*zq(j1) + endif + enddo + enddo +endif + +do j1=1,kn + if(.not. llxy(j1)) then + i1=kindex(j1) + ptheta(i1+iy)=ztheta(j1) + endif +enddo + +end subroutine potf +!========================================================================== +recursive subroutine seefmm_mulv(ydfmm,ldxout,pq,ptheta) +implicit none + +type(fmm_type) ,intent(in) :: ydfmm +logical ,intent(in) :: ldxout +real(kind=JPRBT) ,intent(in) :: pq(:) +real(kind=JPRBT) ,intent(out) :: ptheta(:) + +!------------------------------------------------------------------------- +call potf(ydfmm%nxy,ydfmm%nx,ldxout,ydfmm%nquad,& + & ydfmm%rw,pq,ydfmm%rdexp,ydfmm%index,& + & ydfmm%nclose,ydfmm%ncik,ydfmm%cik,ptheta) + +end subroutine seefmm_mulv +!========================================================================== +recursive subroutine seefmm_mulm(ydfmm,km,kskip,ldxout,pq,ptheta) +implicit none + +type(fmm_type) ,intent(in) :: ydfmm +integer(kind=jpim),intent(in) :: km +integer(kind=jpim),intent(in) :: kskip +logical ,intent(in) :: ldxout +real(kind=JPRBT) ,intent(in) :: pq(:,:) +real(kind=JPRBT) ,intent(out) :: ptheta(:,:) + +!------------------------------------------------------------------------- +call potfm(ydfmm%nxy,km,kskip,ydfmm%nx,ldxout,ydfmm%nquad,& + & ydfmm%rw,pq,ydfmm%rdexp,ydfmm%index,& + & ydfmm%nclose,ydfmm%ncik,ydfmm%cik,ptheta) +end subroutine seefmm_mulm +!========================================================================== + +recursive subroutine potfm(kn,km,kskip,kx,ldxout,kquad,prw,pq,prdexp,kindex,kclosel,kcik,pcik,ptheta) +implicit none + +integer(kind=jpim),intent(in) :: kn +integer(kind=jpim),intent(in) :: km +integer(kind=jpim),intent(in) :: kskip +integer(kind=jpim),intent(in) :: kx +logical ,intent(in) :: ldxout +integer(kind=jpim),intent(in) :: kquad +real(kind=JPRBT) ,intent(in) :: prw(:) +real(kind=JPRBT) ,intent(in) :: pq(:,:) +real(kind=JPRBT) ,intent(in) :: prdexp(:,:) +integer(kind=jpim),intent(in) :: kindex(:) +integer(kind=jpim),intent(in) :: kclosel(:) +integer(kind=jpim),intent(in) :: kcik +real(kind=JPRBT) ,intent(in) :: pcik(:) +real(kind=JPRBT) ,intent(out) :: ptheta(:,:) + +real(kind=JPRBT) :: zalpha(kquad,km) +integer(kind=jpim) :: j1,j2,jm,jq,inumc,idist,iquad +integer(kind=jpim) :: iout,iq,i1,i1p1,i1pd,ik1,ix,iy +logical :: lxy,llxy(kn) + +lxy(ik1) = (ik1 <= kx .eqv. ldxout) +!------------------------------------------------------------------------- + +!CALL GSTATS(209,0) +ptheta(:,:)=0.0_JPRBT +if(ldxout) then + ix=0 + iy=-kx +else + ix=-kx + iy=0 +endif +do j1=1,kn + i1=kindex(j1) + llxy(j1)=lxy(i1) +enddo + +if(llxy(1)) then + do jm=1,km,kskip + zalpha(:,jm)=pq(jm,kindex(1)+ix) + enddo +else + zalpha(:,:)=0.0_JPRBT +endif +!CALL GSTATS(209,1) +!CALL GSTATS(210,0) +do j1=2,kn + i1=kindex(j1) + if(llxy(j1) ) then + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + endif + else + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)+prw(jq)*zalpha(jq,jm) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)+prw(jq)*zalpha(jq,jm) + enddo + enddo + endif + endif +enddo +!CALL GSTATS(210,1) + +!CALL GSTATS(211,0) +if(llxy(kn)) then + do jm=1,km,kskip + zalpha(:,jm)=pq(jm,kindex(kn)+ix) + enddo +else + zalpha(:,:)=0.0 +endif +!CALL GSTATS(211,1) +!CALL GSTATS(212,0) +do j1=kn-1,1,-1 + i1=kindex(j1) + i1p1=kindex(j1+1) + if(llxy(j1)) then + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + zalpha(jq,jm)=zalpha(jq,jm)+pq(jm,i1+ix) + enddo + enddo + endif + else + if( kskip==1 )then + do jq=1,kquad + do jm=1,km + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)-prw(jq)*zalpha(jq,jm) + enddo + enddo + else + do jq=1,kquad + do jm=1,km,kskip + zalpha(jq,jm)=zalpha(jq,jm)*prdexp(jq,j1+1) + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)-prw(jq)*zalpha(jq,jm) + enddo + enddo + endif + endif +enddo +!CALL GSTATS(212,1) + + +IF(kcik > 0) then +! CALL GSTATS(213,0) + inumc=0 + do j1=1,kn-1 + do j2=1,kclosel(j1) + idist=j2 + i1=kindex(j1) + i1pd=kindex(j1+idist) + if(.not.llxy(j1) .and. llxy(j1+idist)) then + inumc=inumc+1 + do jm=1,km,kskip + ptheta(jm,i1+iy)=ptheta(jm,i1+iy)-pcik(inumc)*pq(jm,i1pd+ix) + enddo + elseif(llxy(j1) .and. .not.llxy(j1+idist)) then + inumc=inumc+1 + do jm=1,km,kskip + ptheta(jm,i1pd+iy)=ptheta(jm,i1pd+iy)+pcik(inumc)*pq(jm,i1+ix) + enddo + endif + enddo + enddo +! CALL GSTATS(213,1) +endif + +end subroutine potfm +!========================================================================= +recursive subroutine suquad(kn,prange,kquad,prw,prt,pr) +implicit none + +integer(kind=jpim) ,intent(in) :: kn +real(kind=JPRBT),intent(in) :: prange +integer(kind=jpim) ,intent(in) :: kquad +real(kind=JPRBT),intent(out) :: prw(:) +real(kind=JPRBT),intent(out) :: prt(:) +real(kind=JPRBT),intent(out) :: pr + +real(kind=JPRBT) :: za,zb,zs +integer(kind=jpim) :: jm +!------------------------------------------------------------------------- + +za=1.0 +zb=500.0 +zs=zb/prange +pr=za/zs +call wts500(prt,prw,kquad) +do jm=1,kquad + prw(jm)=prw(jm)*zs + prt(jm)=prt(jm)*zs +enddo +end subroutine suquad +!========================================================================== + +recursive subroutine comb_xy(kx,px,ky,py,kxy,pxy,kindex) + +implicit none + +integer(kind=jpim), intent(in) :: kx,ky +real(kind=jprd), intent(in) :: px(:) +real(kind=JPRBT), intent(in) :: py(:) +integer(kind=jpim), intent(in) :: kxy +real(kind=JPRBT), intent(out) :: pxy(:) +integer(kind=jpim), intent(out) :: kindex(:) +integer(kind=jpim) :: jxy,ix,iy,iret + +!------------------------------------------------------------------------- + +pxy(1:kx)=px(1:kx) +pxy(kx+1:kx+ky)=py(1:ky) +!call m01daf(pxy,1,kxy,'D',irank,ifail) +call keysort(iret,pxy,kxy,descending=.true.,index=kindex,init=.true.) +!!$do jxy=1,kxy +!!$ kindex(irank(jxy))=jxy +!!$enddo + +end subroutine comb_xy +!========================================================================== +recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& + & kclosel,pcik,knocik,pdiff) + +implicit none + +integer(kind=jpim), intent(in) :: kx +integer(kind=jpim), intent(in) :: kxy +integer(kind=jpim), intent(in) :: kquad +real(kind=JPRBT), intent(in) :: pxy(:) +real(kind=JPRBT), intent(in) :: prw(:) +real(kind=JPRBT), intent(in) :: pr +real(kind=JPRBT), intent(in) :: prt(:) +integer(kind=jpim), intent(in) :: kindex(:) +real(kind=JPRBT), intent(out) :: prdexp(:,:) +integer(kind=jpim), intent(out) :: kclosel(:) +real(kind=JPRBT), intent(out) :: pcik(:) +integer(kind=jpim), intent(out) :: knocik +real(kind=JPRBT),optional, intent(in) :: pdiff(:,:) + +real(kind=JPRBT) :: zdx +real(kind=JPRBT) :: zsum +real(kind=JPRBT) :: zdiff(kxy,kxy) +integer(kind=jpim) :: jxy,jq,isize,jdist,ixy,ixym1,i1,i1pd,j1,j2 +logical :: llexit +!------------------------------------------------------------------------- +if(present(pdiff)) then + zdiff(:,:)=pdiff(:,:) +else + do j1=1,kxy + do j2=1,kxy + zdiff(j1,j2)=pxy(j1)-pxy(j2) + enddo + enddo +endif +do jxy=2,kxy + ixy=kindex(jxy) + ixym1=kindex(jxy-1) + do jq=1,kquad + prdexp(jq,jxy)=exp(zdiff(ixy,ixym1)*prt(jq)) + enddo +enddo +kclosel(:)=0 +knocik=0 +isize=size(pcik) +llexit=.true. +do jxy=1,kxy-1 + do jdist=1,kxy-jxy + i1=kindex(jxy) + i1pd=kindex(jxy+jdist) + zdx=zdiff(i1,i1pd) + if(zdx < pr) then + llexit=.false. + kclosel(jxy)=kclosel(jxy)+1 + if((i1 > kx .and. i1pd <= kx) .or. (i1pd > kx .and. i1 <= kx)) then + knocik=knocik+1 + zsum=0.0_JPRBT + do jq=1,kquad + zsum=zsum+prw(jq)*exp(-zdx*prt(jq)) + enddo + pcik(knocik)=1.0_JPRBT/zdx-zsum + endif + else + exit + endif + enddo + if(knocik > isize) stop ' precompfint : pcik tto small' +enddo + +end subroutine prepotf +!========================================================================== + +end module seefmm_mix diff --git a/src/trans/gpu/algor/module/wts500_mod.F90 b/src/trans/gpu/algor/module/wts500_mod.F90 new file mode 100644 index 0000000..0859d78 --- /dev/null +++ b/src/trans/gpu/algor/module/wts500_mod.F90 @@ -0,0 +1,3764 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE WTS500_MOD +CONTAINS +SUBROUTINE WTS500(PX,PW,KN) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KN +REAL(KIND=JPRBT), INTENT(OUT) :: PX(:),PW(:) + +! This routine returns a set of Gaussian nodes and weights for +! integrating the functions exp(lambda*x)dx over the range x=0 to x=infinity. +! They work for lambda in the range [1,501]. The accuracy +! of the quadrature for each n is given in the tables below. + +! Input arguments: +! n - number of weights and nodes in the quadrature. This must +! be an integer in the range [2,56]. +! +! Output arguments: +! w - weights +! x - nodes +! +! +! The following table gives the approximate accuracy of the weights in +! this file, that is to say the experimentally determined maximum +! absolute error for lambda in the range [1,501]. +! +! 2 0.76126E-01 +! 3 0.26903E-01 +! 4 0.88758E-02 +! 5 0.28110E-02 +! 6 0.86785E-03 +! 7 0.26276E-03 +! 8 0.78346E-04 +! 9 0.23066E-04 +! 10 0.67184E-05 +! 11 0.19386E-05 +! 12 0.55482E-06 +! 13 0.15762E-06 +! 14 0.44478E-07 +! 15 0.12474E-07 +! 16 0.34787E-08 +! 17 0.96498E-09 +! 18 0.26636E-09 +! 19 0.73174E-10 +! 20 0.20013E-10 +! 21 0.54503E-11 +! 22 0.14783E-11 +! 23 0.39937E-12 +! 24 0.10749E-12 +! 25 0.28822E-13 +! 26 0.77011E-14 +! 27 0.20993E-14 +! 28 0.59593E-15 + +! (The accuracies beyond this point are +! only available if this routine is converted +! to extended precision.) +! +! 29 0.16665E-15 +! 30 0.45938E-16 +! 31 0.12483E-16 +! 32 0.33436E-17 +! 33 0.88209E-18 +! 34 0.22896E-18 +! 35 0.58363E-19 +! 36 0.15182E-19 +! 37 0.45892E-20 +! 38 0.13452E-20 +! 39 0.38384E-21 +! 40 0.10683E-21 +! 41 0.29025E-22 +! 42 0.76955E-23 +! 43 0.19878E-23 +! 44 0.49867E-24 +! 45 0.12879E-24 +! 46 0.38890E-25 +! 47 0.11493E-25 +! 48 0.32717E-26 +! 49 0.89977E-27 +! 50 0.23916E-27 +! 51 0.66534E-28 +! 52 0.20256E-28 +! 53 0.60754E-29 +! 54 0.17974E-29 +! 55 0.52173E-30 +! 56 0.14656E-30 +! 57 0.39867E-31 +! 58 0.17622E-31 +! 59 0.11941E-31 + + +if(kn < 1 .or. kn > 59) CALL ABOR1('kn out of bounds in wts500') + +if(kn == 1) then + px( 1)= 0.30029234138173323099658823269124393D+00 + pw( 1)= 0.10474544159373900054024730385996879D+01 +endif +if(kn == 2) then + px( 1)= 0.44614645646035084305052271657195780D-01 + px( 2)= 0.69921614559509068409005059560416520D+00 + pw( 1)= 0.15994862626671497398269903651565162D+00 + pw( 2)= 0.15511944041990193294522419186360192D+01 +endif +if(kn == 3) then + px( 1)= 0.11857389353662594950547350532174902D-01 + px( 2)= 0.16764835416208964726306668598724940D+00 + px( 3)= 0.11277491807394385305149473243132366D+01 + pw( 1)= 0.40534466810113107834140226328650886D-01 + pw( 2)= 0.36261372044374320167772351965467234D+00 + pw( 3)= 0.19347454032003660753035080254268649D+01 +endif +if(kn == 4) then + px( 1)= 0.57654208655188821571537226422468374D-02 + px( 2)= 0.62523167781181198280172417136126064D-01 + px( 3)= 0.36533207087496350173593536437526512D+00 + px( 4)= 0.16157524591822212021884702173778156D+01 + pw( 1)= 0.17868545000488806425866630797084082D-01 + pw( 2)= 0.12328668633600752343455555544669733D+00 + pw( 3)= 0.58372673164630844216911861958288950D+00 + pw( 4)= 0.22766170591050845697618451516268360D+01 +endif +if(kn == 5) then + px( 1)= 0.38286655100887720869535305553503767D-02 + px( 2)= 0.32638654131646742439488396483219523D-01 + px( 3)= 0.15979343932440245421190120326583297D+00 + px( 4)= 0.62593598592475461862138219614406608D+00 + px( 5)= 0.21448850159686373839369069493121931D+01 + pw( 1)= 0.11006282598632254556602552524231582D-01 + pw( 2)= 0.57230982733825644334586234468135637D-01 + pw( 3)= 0.23302193704059872350599634911774233D+00 + pw( 4)= 0.80399187922220639155300759119683454D+00 + pw( 5)= 0.25817835060095957471682462945418890D+01 +endif +if(kn == 6) then + px( 1)= 0.29149725976167556773168063131622477D-02 + px( 2)= 0.21103262424016434877304140537976324D-01 + px( 3)= 0.87672665103786085283260733993906787D-01 + px( 4)= 0.30362191434277672991331660483079722D+00 + px( 5)= 0.93772122523975475283401768866415608D+00 + px( 6)= 0.27034204049910086432949276908848690D+01 + pw( 1)= 0.80264082196721958922253358145693741D-02 + pw( 2)= 0.33206285080799577093824435206894388D-01 + pw( 3)= 0.11527314673621704717905199327206330D+00 + pw( 4)= 0.35781407343251622864391524621893261D+00 + pw( 5)= 0.10155006971456147012687855907386742D+01 + pw( 6)= 0.28564602273790248848620772701419901D+01 +endif +if(kn == 7) then + px( 1)= 0.23756434216797693908355583003051663D-02 + px( 2)= 0.15535118461800569190547935165674497D-01 + px( 3)= 0.56551273947723240042834250291153011D-01 + px( 4)= 0.17450996559452745580564075650691432D+00 + px( 5)= 0.49112500667928528178358415821262749D+00 + px( 6)= 0.12919504684055881399873474932878924D+01 + px( 7)= 0.32851412356973227675206901468229149D+01 + pw( 1)= 0.63886696350675163666711135442919764D-02 + pw( 2)= 0.22442242541224196499797804610442189D-01 + pw( 3)= 0.67162457789171790971598253749543852D-01 + pw( 4)= 0.18766490984256656291382113580254403D+00 + pw( 5)= 0.48993342911988760795594155472540210D+00 + pw( 6)= 0.12160105557427987424854903052204337D+01 + pw( 7)= 0.31065668529411483170389922159304207D+01 +endif +if(kn == 8) then + px( 1)= 0.20132180607225834852983700686031058D-02 + px( 2)= 0.12364145427770238507453563351438370D-01 + px( 3)= 0.40779487004322342225816753965428875D-01 + px( 4)= 0.11391148589791378330792648499651137D+00 + px( 5)= 0.29412883145944878287611616309747638D+00 + px( 6)= 0.71837098644914450185415022533227351D+00 + px( 7)= 0.16816853747148161279323645594606599D+01 + px( 8)= 0.38855951611162649511701947653116899D+01 + pw( 1)= 0.53408830450397378295721475271713427D-02 + pw( 2)= 0.16787984180357441921393643943808306D-01 + pw( 3)= 0.44204970292771876400860637563463140D-01 + pw( 4)= 0.11185011190895920238718302559774708D+00 + pw( 5)= 0.27016191944753418682586773667091217D+00 + pw( 6)= 0.62450892495416087531923817566640810D+00 + pw( 7)= 0.14051940584215133597891405916743820D+01 + pw( 8)= 0.33364764999199772405176908767598180D+01 +endif +if(kn == 9) then + px( 1)= 0.17503557878075214519879036524833375D-02 + px( 2)= 0.10330054871723184754037951908660005D-01 + px( 3)= 0.31732206325445549848937295392421049D-01 + px( 4)= 0.81679121803401719155156637120993002D-01 + px( 5)= 0.19535573613157974971806765507983509D+00 + px( 6)= 0.44595223788074039354969352640100852D+00 + px( 7)= 0.98135220945386835905397405725519927D+00 + px( 8)= 0.21015170770020791654126380210416672D+01 + px( 9)= 0.45016099625796459085057556421321067D+01 + pw( 1)= 0.46046107862322611600702922635137510D-02 + pw( 2)= 0.13429769279555420282230890098322098D-01 + pw( 3)= 0.31849272894061216190385847667286999D-01 + pw( 4)= 0.73619249380405585299506449662883408D-01 + pw( 5)= 0.16535947795274881422091783969932427D+00 + pw( 6)= 0.35934417872243713158616629815331565D+00 + pw( 7)= 0.75859660458155596407004967675485659D+00 + pw( 8)= 0.15835611580085742332645675327785911D+01 + pw( 9)= 0.35495334121390922353050388191171761D+01 +endif +if(kn == 10) then + px( 1)= 0.15499542669794147311761132610985078D-02 + px( 2)= 0.89096688398509678030924958952986541D-02 + px( 3)= 0.26016129693043413683698040760210971D-01 + px( 4)= 0.62749208598031754292711006026615926D-01 + px( 5)= 0.14050353407849426047312501552279980D+00 + px( 6)= 0.30181414123622395598472969521213967D+00 + px( 7)= 0.62857185766887058395879604119303521D+00 + px( 8)= 0.12763299399934852402803633611287669D+01 + px( 9)= 0.25471652578226896546390447678170800D+01 + px(10)= 0.51308067782658204682801321291953140D+01 + pw( 1)= 0.40548253986991389304086648382669812D-02 + pw( 2)= 0.11236188731318700188319947717229244D-01 + pw( 3)= 0.24538592970816425402207400802497825D-01 + pw( 4)= 0.52343517158460444258926972420783323D-01 + pw( 5)= 0.10996558531241373841512937168184451D+00 + pw( 6)= 0.22576984249893556435278829920283551D+00 + pw( 7)= 0.45261772769497660240184928082408646D+00 + pw( 8)= 0.89046194530658792052229355508350808D+00 + pw( 9)= 0.17519181692872931385803490555209237D+01 + pw(10)= 0.37483184442175079452559119390314762D+01 +endif +if(kn == 11) then + px( 1)= 0.13916503869954620980214122938112018D-02 + px( 2)= 0.78558050039620362116422912802704097D-02 + px( 3)= 0.22120351963459063625179184464273817D-01 + px( 4)= 0.50713242013627782652925282235350304D-01 + px( 5)= 0.10742319887758113859279057980251631D+00 + px( 6)= 0.21869117548117680828718663672766937D+00 + px( 7)= 0.43336030838054496927092430378951163D+00 + px( 8)= 0.84018288532692476091948497009100848D+00 + px( 9)= 0.15999462842086233335871841089805130D+01 + px(10)= 0.30151891304900922172275251687511795D+01 + px(11)= 0.57713337136325357659044166408988553D+01 + pw( 1)= 0.36266510989460802397873164288690734D-02 + pw( 2)= 0.96950369134995745371692684658412310D-02 + pw( 3)= 0.19868559916434820960232503677049332D-01 + pw( 4)= 0.39531505533000821496522977944503206D-01 + pw( 5)= 0.78139977968766268496958310477317134D-01 + pw( 6)= 0.15225813478813020210433348166511355D+00 + pw( 7)= 0.29137456345793780649576074277273596D+00 + pw( 8)= 0.54807495314077461646456170049418256D+00 + pw( 9)= 0.10191215020827390969142637205874466D+01 + pw(10)= 0.19111370110068844260874568369820828D+01 + pw(11)= 0.39348541670555911675625708842098543D+01 +endif +if(kn == 12) then + px( 1)= 0.12632156319939695579215561432558385D-02 + px( 2)= 0.70387146180879286642550585568639377D-02 + px( 3)= 0.19302659916972801592515201957656444D-01 + px( 4)= 0.42551194105513329489354627472007290D-01 + px( 5)= 0.86085992365439625679560348707191417D-01 + px( 6)= 0.16725813918676372888922850902058409D+00 + px( 7)= 0.31700589361943171070304767229555238D+00 + px( 8)= 0.58951191619791589683106843254408061D+00 + px( 9)= 0.10788413480839543106858840025589018D+01 + px(10)= 0.19492439011651360999619060490611511D+01 + px(11)= 0.35027911449093405858739078699931569D+01 + px(12)= 0.64217203451802643126959421992550844D+01 + pw( 1)= 0.32827550127604954815335522044868924D-02 + pw( 2)= 0.85507540197432347733759971108613029D-02 + pw( 3)= 0.16691067543209908975186757261638663D-01 + pw( 4)= 0.31305443669027793014029811945580099D-01 + pw( 5)= 0.58567896813505682343320309424861089D-01 + pw( 6)= 0.10877780693120356209434163632243069D+00 + pw( 7)= 0.19948368504455875259042629374456601D+00 + pw( 8)= 0.36073967408442558747147434040847228D+00 + pw( 9)= 0.64434179114194185133722660716595021D+00 + pw(10)= 0.11440591869200469793282529105709381D+01 + pw(11)= 0.20620613323447639907885178297356148D+01 + pw(12)= 0.41107547758236817466309181956604953D+01 +endif +if(kn == 13) then + px( 1)= 0.11568070520917003084172899381406952D-02 + px( 2)= 0.63841031288591798815234066736007959D-02 + px( 3)= 0.17167711063873432612672836118321923D-01 + px( 4)= 0.36716019315047555758945584779212588D-01 + px( 5)= 0.71538434902470635056036514447143755D-01 + px( 6)= 0.13352689214829569833793677431883262D+00 + px( 7)= 0.24327491638178311179091055382048232D+00 + px( 8)= 0.43570388054574854249210127119178039D+00 + px( 9)= 0.76944422789933215499607649746035088D+00 + px(10)= 0.13426076062689412695687622546331410D+01 + px(11)= 0.23216397056754496795639041380757885D+01 + px(12)= 0.40076692644610800412009568941378368D+01 + px(13)= 0.70807767026725092855482684561044825D+01 + pw( 1)= 0.29999461201538405335426373656007144D-02 + pw( 2)= 0.76647148893919466896210631066410433D-02 + pw( 3)= 0.14413099397767107036014918126327008D-01 + pw( 4)= 0.25735254465096214146045248201904623D-01 + pw( 5)= 0.45838911095175772187879197848974379D-01 + pw( 6)= 0.81479609103017647524386799211775902D-01 + pw( 7)= 0.14368582127715272775954689518716653D+00 + pw( 8)= 0.25069362938769053221329919008840415D+00 + pw( 9)= 0.43269827884396605348016529885550754D+00 + pw(10)= 0.74044407474304235266031833325121847D+00 + pw(11)= 0.12650477516195139055095418091901788D+01 + pw(12)= 0.22054684388256755415929800672572855D+01 + pw(13)= 0.42773285665753897495906588616832298D+01 +endif +if(kn == 14) then + px( 1)= 0.10671394835726993643401190254501129D-02 + px( 2)= 0.58463219387999205854721615603871185D-02 + px( 3)= 0.15490001691602291418567227600617406D-01 + px( 4)= 0.32358969391919711170669944447127038D-01 + px( 5)= 0.61149820125492142314645657324953302D-01 + px( 6)= 0.11030358671474348741774495105493271D+00 + px( 7)= 0.19410127886489828338777312891458955D+00 + px( 8)= 0.33608975279087339935499749076365196D+00 + px( 9)= 0.57467416136431217184690034730315880D+00 + px(10)= 0.97213677138209888520957230352753264D+00 + px(11)= 0.16296229550658990298708599801796051D+01 + px(12)= 0.27148835813646440924906285656041458D+01 + px(13)= 0.45279078084008377593720415669623160D+01 + px(14)= 0.77475240501693856896302484250295344D+01 + pw( 1)= 0.27629693325443496023249306681806622D-02 + pw( 2)= 0.69561289060164080762372517703735452D-02 + pw( 3)= 0.12707947219219552524391040926686251D-01 + pw( 4)= 0.21789865957161394780641970526254899D-01 + pw( 5)= 0.37165450147146819315861454426090832D-01 + pw( 6)= 0.63467023298815675308212729382023282D-01 + pw( 7)= 0.10796235746711791223910582849233129D+00 + pw( 8)= 0.18227038265561301471910246064953376D+00 + pw( 9)= 0.30504176678107803021400309104091856D+00 + pw(10)= 0.50632019264974725562617387788020652D+00 + pw(11)= 0.83570187185256971962451509332190849D+00 + pw(12)= 0.13820370228408991881296397708221031D+01 + pw(13)= 0.23420578929692185520173441027903024D+01 + pw(14)= 0.44356505634271121964311698817472400D+01 +endif +if(kn == 15) then + px( 1)= 0.99051088742239546911827900108699496D-03 + px( 2)= 0.53956859621998690434579925957171424D-02 + px( 3)= 0.14133099949775560668841655685250135D-01 + px( 4)= 0.28986898223070841544449976160392634D-01 + px( 5)= 0.53435705913102094881195404152789995D-01 + px( 6)= 0.93645206162765297669714244403422362D-01 + px( 7)= 0.15985934779166946137952895657247365D+00 + px( 8)= 0.26857062154409215737173322640809941D+00 + px( 9)= 0.44599642616020670957476031778753703D+00 + px(10)= 0.73354905454082877060570463263769745D+00 + px(11)= 0.11964718339441153959477756602491571D+01 + px(12)= 0.19381471569679916567453627291281352D+01 + px(13)= 0.31270149297417650854875367303912466D+01 + px(14)= 0.50618963332858424622137075227057629D+01 + px(15)= 0.84211464685237105475663901928632362D+01 + pw( 1)= 0.25613355491269301472247025631698555D-02 + pw( 2)= 0.63749573647622919143443050089843640D-02 + pw( 3)= 0.11385424145275303734569902166128714D-01 + pw( 4)= 0.18885541036270664981042580089520502D-01 + pw( 5)= 0.31017529676997219516938356388920347D-01 + pw( 6)= 0.51073868255802230619626900228351940D-01 + pw( 7)= 0.84046732560351404937544837831163072D-01 + pw( 8)= 0.13765675735555740692176543168654955D+00 + pw( 9)= 0.22395807420699105358370420342497012D+00 + pw(10)= 0.36179555862328041144887460703035761D+00 + pw(11)= 0.58087464283264030346041192108355511D+00 + pw(12)= 0.92964960101952292137644303560667260D+00 + pw(13)= 0.14950837580230651438254975578030658D+01 + pw(14)= 0.24724522131384988410164432454754806D+01 + pw(15)= 0.45866150536692880851084073088444439D+01 +endif +if(kn == 16) then + px( 1)= 0.92424556053971804395203579030996377D-03 + px( 2)= 0.50119857035276309167085959684639920D-02 + px( 3)= 0.13010148344461010136325444162584406D-01 + px( 4)= 0.26298847872231801244580127340702382D-01 + px( 5)= 0.47514597563807100058998705367026609D-01 + px( 6)= 0.81270265524596050675960333331275375D-01 + px( 7)= 0.13512212854897371400148043770796492D+00 + px( 8)= 0.22099831669763609789452208029307415D+00 + px( 9)= 0.35743533198723356545740999174337465D+00 + px(10)= 0.57303934564748771037392604303226621D+00 + px(11)= 0.91178803183594107838418321994075593D+00 + px(12)= 0.14412990877209031034459072051939979D+01 + px(13)= 0.22665732295803028877227882175486339D+01 + px(14)= 0.35563219327028564804732957321586739D+01 + px(15)= 0.56082683107347449605789692452752341D+01 + px(16)= 0.91009557053769985645449386780285663D+01 + pw( 1)= 0.23875730074587121522484947794868875D-02 + pw( 2)= 0.58885836354644078094393546205524519D-02 + pw( 3)= 0.10329316404953667658388475423521547D-01 + pw( 4)= 0.16675469828365142363971545189694061D-01 + pw( 5)= 0.26509288186703737427234519675535818D-01 + pw( 6)= 0.42238486164148838677474311842225533D-01 + pw( 7)= 0.67416914217096425505948847545251978D-01 + pw( 8)= 0.10736855475828194921843388490206803D+00 + pw( 9)= 0.17018723045468088538746817095795499D+00 + pw(10)= 0.26821766647062034548841613907117539D+00 + pw(11)= 0.42033354906191972945041277677914052D+00 + pw(12)= 0.65579398374758854246146234684717478D+00 + pw(13)= 0.10219768938841077866955934226960186D+01 + pw(14)= 0.16043073953579345130225854215735434D+01 + pw(15)= 0.25972025694404090887006269510370113D+01 + pw(16)= 0.47309741605753804978373532621330190D+01 +endif +if(kn == 17) then + px( 1)= 0.86635871969021922697368620380719171D-03 + px( 2)= 0.46809392638055264395521402125297454D-02 + px( 3)= 0.12063365169996780691336553708488918D-01 + px( 4)= 0.24103101327380368111361062755741909D-01 + px( 5)= 0.42839783531484774994275135488102184D-01 + px( 6)= 0.71796215026314425758110076991702409D-01 + px( 7)= 0.11667729790867741765510249018437646D+00 + px( 8)= 0.18634666322578944021297170782701070D+00 + px( 9)= 0.29431002802504656436686613469103506D+00 + px(10)= 0.46097947485749393786168585286436753D+00 + px(11)= 0.71707474488613285211211458276745483D+00 + px(12)= 0.11087399814864640384073502346191564D+01 + px(13)= 0.17054769509601451753469079240400202D+01 + px(14)= 0.26134294141755383267430920003219774D+01 + px(15)= 0.40013054555328633047267069424482020D+01 + px(16)= 0.61658542528138849120122933226961818D+01 + px(17)= 0.97863651496368699835621922821544266D+01 + pw( 1)= 0.22362031087732985183223720565899613D-02 + pw( 2)= 0.54748197495181657019575138084577960D-02 + pw( 3)= 0.94654762542541339323585186529928706D-02 + pw( 4)= 0.14944978294023789536885689508849665D-01 + pw( 5)= 0.23104141337215046610391678093559219D-01 + pw( 6)= 0.35743858431643263735692324511510319D-01 + pw( 7)= 0.55471485911701630593769305151799996D-01 + pw( 8)= 0.86081253422553517325473099895533971D-01 + pw( 9)= 0.13319208512009976788628468120231006D+00 + pw(10)= 0.20518383340535621745997474262468218D+00 + pw(11)= 0.31457026902282458163459435604989060D+00 + pw(12)= 0.48013605826872593355323421318271994D+00 + pw(13)= 0.73064164224806746286555643417201840D+00 + pw(14)= 0.11124853152432080885402590631214998D+01 + pw(15)= 0.17098620915289443753971369096340138D+01 + pw(16)= 0.27167962114974631513284929060677873D+01 + pw(17)= 0.48693666605809874327613580765223186D+01 +endif +if(kn == 18) then + px( 1)= 0.81534546944719247652148851160350705D-03 + px( 2)= 0.43921383032957290852797835084086745D-02 + px( 3)= 0.11252831287113979876218879126262041D-01 + px( 4)= 0.22272809918423842477230910840641111D-01 + px( 5)= 0.39059190621733757694717358109890744D-01 + px( 6)= 0.64351345855435130240707418934923649D-01 + px( 7)= 0.10254178168021558618169365620644254D+00 + px( 8)= 0.16036912239056014178785088963204099D+00 + px( 9)= 0.24793085437822483127871211432445117D+00 + px(10)= 0.38020606680363380296594904808373420D+00 + px(11)= 0.57931198537370336370768123946111904D+00 + px(12)= 0.87782030094596229067122251201903469D+00 + px(13)= 0.13236886505899049407415999004449793D+01 + px(14)= 0.19878981731926587403713993880950166D+01 + px(15)= 0.29773742063276571130424960439239811D+01 + px(16)= 0.44606479473592453417809733684642453D+01 + px(17)= 0.67336454736569532214104241445939923D+01 + px(18)= 0.10476870213785278025831291196419849D+02 + pw( 1)= 0.21031106134613254745224559686561387D-02 + pw( 2)= 0.51180225478690087298803997939678307D-02 + pw( 3)= 0.87447370935970058836913883371899363D-02 + pw( 4)= 0.13556333730223126136121123231259769D-01 + pw( 5)= 0.20464410216338036789779126385561199D-01 + pw( 6)= 0.30840826466947410241261173555971053D-01 + pw( 7)= 0.46646725563835069845910497771424517D-01 + pw( 8)= 0.70665992632893967524146851106623791D-01 + pw( 9)= 0.10691711075454395961181073477729829D+00 + pw(10)= 0.16126547210903485792084905962898804D+00 + pw(11)= 0.24229509616610255685205188073761953D+00 + pw(12)= 0.36259234617456851620630123413473169D+00 + pw(13)= 0.54077318770758097024862643911836327D+00 + pw(14)= 0.80508517291294611337977692389747849D+00 + pw(15)= 0.12010568605667241321654935386099247D+01 + pw(16)= 0.18119190754724382445935517697030275D+01 + pw(17)= 0.28316640971192016310500153967949394D+01 + pw(18)= 0.50023398513295527439214736503709354D+01 +endif +if(kn == 19) then + px( 1)= 0.77004314038027538186473565824644713D-03 + px( 2)= 0.41377999330050882914315571254404182D-02 + px( 3)= 0.10550059721986121680218103475882631D-01 + px( 4)= 0.20721165151890931602860316370210977D-01 + px( 5)= 0.35938464219963308088025295472146865D-01 + px( 6)= 0.58366846240533488939212086566679739D-01 + px( 7)= 0.91445878837421954880767431790624735D-01 + px( 8)= 0.14039874091074934933241224785817280D+00 + px( 9)= 0.21294392165350590964714225395783962D+00 + px(10)= 0.32034449388214261059174416345147381D+00 + px(11)= 0.47894630556295055579310759106626288D+00 + px(12)= 0.71240272881825772752142518345001522D+00 + px(13)= 0.10548945187200170944433178271823931D+01 + px(14)= 0.15558853016268620673915640887496031D+01 + px(15)= 0.22875048294126069831169385958820912D+01 + px(16)= 0.33571879025245227476638520498261293D+01 + px(17)= 0.49331870217088227943005162552018204D+01 + px(18)= 0.73107657529555946558412435928344152D+01 + px(19)= 0.11172033293322081236666941378477642D+02 + pw( 1)= 0.19851404434292128161572603749012797D-02 + pw( 2)= 0.48068259850224485883558544189622861D-02 + pw( 3)= 0.81333622551403917938013382271885652D-02 + pw( 4)= 0.12418290368147926797125266347472059D-01 + pw( 5)= 0.18370520996399132266929646675963512D-01 + pw( 6)= 0.27051260748772011764068846294046263D-01 + pw( 7)= 0.39965808856363914121506612420685851D-01 + pw( 8)= 0.59209186306872358867005896800202963D-01 + pw( 9)= 0.87733163743117132114765866319084195D-01 + pw(10)= 0.12975590467993236932824761605587432D+00 + pw(11)= 0.19133746801777096221136436300603368D+00 + pw(12)= 0.28119521004597734924657796391935552D+00 + pw(13)= 0.41191451657988898367993353735036478D+00 + pw(14)= 0.60189235614006428503841831846326255D+00 + pw(15)= 0.87887424161089233540150542700936886D+00 + pw(16)= 0.12876310724220064351766502884115249D+01 + pw(17)= 0.19106555786053829247748848866287425D+01 + pw(18)= 0.29421880423760456779000496029582960D+01 + pw(19)= 0.51303663807899157605725316747503964D+01 +endif +if(kn == 20) then + px( 1)= 0.72953870581824875898426590899779881D-03 + px( 2)= 0.39119765013576279090594732249736674D-02 + px( 3)= 0.99341482544177994609830844666447236D-02 + px( 4)= 0.19386948343749964006247704258211447D-01 + px( 5)= 0.33316937960159820123580449168857215D-01 + px( 6)= 0.53460240758464289437871006955420897D-01 + px( 7)= 0.82550373662718413944945968267436420D-01 + px( 8)= 0.12470333365766989270462564119778831D+00 + px( 9)= 0.18593248929460522396945432673813683D+00 + px(10)= 0.27488824477709750979845021236505632D+00 + px(11)= 0.40393075219294011341163041535306531D+00 + px(12)= 0.59066371193107531104946391158049526D+00 + px(13)= 0.86011311520208939992657730367961144D+00 + px(14)= 0.12478475018108315971898483004517581D+01 + px(15)= 0.18045718085352853467882826664457563D+01 + px(16)= 0.26032962838705539076929338309800964D+01 + px(17)= 0.37517626775845063477806691210141684D+01 + px(18)= 0.54178931704948062041809621881307184D+01 + px(19)= 0.78964489442873309920085649476831128D+01 + px(20)= 0.11871472067202549660846380067126100D+02 + pw( 1)= 0.18798307004678227911126701301382902D-02 + pw( 2)= 0.45327579066213798867372241143837770D-02 + pw( 3)= 0.76074915984009747299662992379957293D-02 + pw( 4)= 0.11468617772837279819666929117999041D-01 + pw( 5)= 0.16675567273387276975224875991907841D-01 + pw( 6)= 0.24060338209931569028899923305997740D-01 + pw( 7)= 0.34797589593340192429611675508702300D-01 + pw( 8)= 0.50498483539096564292114787807163509D-01 + pw( 9)= 0.73383972617080340693094946803759227D-01 + pw(10)= 0.10656241858153477344295213679085888D+00 + pw(11)= 0.15442089967699606004539206241769081D+00 + pw(12)= 0.22316517325789412150938929616587900D+00 + pw(13)= 0.32158746804288047814456613183637546D+00 + pw(14)= 0.46221802166864090447194287669501892D+00 + pw(15)= 0.66320650916245908698703462902787156D+00 + pw(16)= 0.95182294118921990028099509733606027D+00 + pw(17)= 0.13721884223731472188837264029436159D+01 + pw(18)= 0.20062479833990429444286070644467212D+01 + pw(19)= 0.30487071357319501782576369389563967D+01 + pw(20)= 0.52538573669990067214113994100342659D+01 +endif +if(kn == 21) then + px( 1)= 0.69310486169589490054767779485652272D-03 + px( 2)= 0.37100374076510691098395748526914731D-02 + px( 3)= 0.93893881986074420164580675137225674D-02 + px( 4)= 0.18225818290917406339248365551994452D-01 + px( 5)= 0.31081498488793808069000194256763116D-01 + px( 6)= 0.49367420729419602783748969736699563D-01 + px( 7)= 0.75284896750519291461000850881359128D-01 + px( 8)= 0.11212409497044480168252045403791321D+00 + px( 9)= 0.16464732212808249809258249926002432D+00 + px(10)= 0.23962225781352520103022311097593359D+00 + px(11)= 0.34658518012264822513642471749928831D+00 + px(12)= 0.49892252217405192191396093341852149D+00 + px(13)= 0.71538674271382417280638826801508549D+00 + px(14)= 0.10222215706608203725632212510312547D+01 + px(15)= 0.14561846864798943048112298539958500D+01 + px(16)= 0.20689966860645877444413948503855558D+01 + px(17)= 0.29343325491322992822962883455667422D+01 + px(18)= 0.41600923418527605289681744860608923D+01 + px(19)= 0.59138510177367345119623187111536027D+01 + px(20)= 0.84900211059998571872908583995965399D+01 + px(21)= 0.12574850278864023350809778583936857D+02 + pw( 1)= 0.17852307235103793959959450796716884D-02 + pw( 2)= 0.42893635684453385047417019848134615D-02 + pw( 3)= 0.71497939239830042802199211798008382D-02 + pw( 4)= 0.10663735418026519276123538929713931D-01 + pw( 5)= 0.15278738558420813530874306732239620D-01 + pw( 6)= 0.21655078725685226823011775486802689D-01 + pw( 7)= 0.30722016510142553890174563226245394D-01 + pw( 8)= 0.43741403514742541630405757195910036D-01 + pw( 9)= 0.62420477800170703110133642765679935D-01 + pw(10)= 0.89101011801730193914410815797468243D-01 + pw(11)= 0.12703103910528559102170033988553577D+00 + pw(12)= 0.18073427743399167030424190804804963D+00 + pw(13)= 0.25651888703698665673377349665161239D+00 + pw(14)= 0.36320523524230891449268691236436560D+00 + pw(15)= 0.51323006313409951821184870857512202D+00 + pw(16)= 0.72448352105440485035350350605592673D+00 + pw(17)= 0.10237957354057707173113819962280747D+01 + pw(18)= 0.14547382360170350806458262214201030D+01 + pw(19)= 0.20988676887781728328899280598296916D+01 + pw(20)= 0.31515233601433750723678379202457850D+01 + pw(21)= 0.53731727475008532237843822389416189D+01 +endif +if(kn == 22) then + px( 1)= 0.66015487793114579672026064530843580D-03 + px( 2)= 0.35283188763257655749934705032913356D-02 + px( 3)= 0.89037268484852140335335192775132156D-02 + px( 4)= 0.17204887313904086178312640698870646D-01 + px( 5)= 0.29150571838447577747778236163957035D-01 + px( 6)= 0.45901615793714000736387715594203021D-01 + px( 7)= 0.69252127065257752538232461849428663D-01 + px( 8)= 0.10186530460793644200891559033979032D+00 + px( 9)= 0.14756646191534939234956638050262194D+00 + px(10)= 0.21173627302990231446602872154763540D+00 + px(11)= 0.30186392834135377102923595739844832D+00 + px(12)= 0.42832456502483048109845734510065518D+00 + px(13)= 0.60545815580026546162531862085612489D+00 + px(14)= 0.85305900599439063193443527117705937D+00 + px(15)= 0.11984444149249300732981072917233551D+01 + px(16)= 0.16793849380767164321266153811733636D+01 + px(17)= 0.23484259421990250059871794075320909D+01 + px(18)= 0.32797346839031417791643463913803933D+01 + px(19)= 0.45812624093642886254220734381797637D+01 + px(20)= 0.64202435545325521457505355440378480D+01 + px(21)= 0.90908861089411744472574137099194608D+01 + px(22)= 0.13281870386631548366752315803495071D+02 + pw( 1)= 0.16997739815079876291270142303147830D-02 + pw( 2)= 0.40716314869569174969368080298995427D-02 + pw( 3)= 0.67473810622389953786984914722964259D-02 + pw( 4)= 0.99723772588960829502246883384624672D-02 + pw( 5)= 0.14109253034463888804804922921072868D-01 + pw( 6)= 0.19687945390172770224722572299963933D-01 + pw( 7)= 0.27452171417575159428955066586690985D-01 + pw( 8)= 0.38405330139190366155267988240486796D-01 + pw( 9)= 0.53884692766803369761362767720533598D-01 + pw(10)= 0.75690346610129349832179626694927492D-01 + pw(11)= 0.10627642236395592176596973365055862D+00 + pw(12)= 0.14901040431853077087285757833822327D+00 + pw(13)= 0.20852197820697925399881980495903384D+00 + pw(14)= 0.29118491216948479495401093995994782D+00 + pw(15)= 0.40581134430662519733362245846705041D+00 + pw(16)= 0.56471875154845478179304492458155087D+00 + pw(17)= 0.78553695828755046999062010778990119D+00 + pw(18)= 0.10946963582465286266450172551190923D+01 + pw(19)= 0.15353099122697920414083249854348604D+01 + pw(20)= 0.21886787280255501097880613119022761D+01 + pw(21)= 0.32509064591655802312953073550183727D+01 + pw(22)= 0.54886295320492726768301302193581765D+01 +endif +if(kn == 23) then + px( 1)= 0.63021006363433841831474149369938629D-03 + px( 2)= 0.33638809698722088589479529444503357D-02 + px( 3)= 0.84677484260852652320615476282127309D-02 + px( 4)= 0.16299243886097943905725295278638059D-01 + px( 5)= 0.27464030423556247673984741054128575D-01 + px( 6)= 0.42927866788160825736932098072471507D-01 + px( 7)= 0.64169139230662564082303838471065529D-01 + px( 8)= 0.93367767657860048911427490092659179D-01 + px( 9)= 0.13363456957733254533882679272072078D+00 + px(10)= 0.18930799757692553449088970134122038D+00 + px(11)= 0.26636229253087365091814709204751323D+00 + px(12)= 0.37297580801326442775998852633706569D+00 + px(13)= 0.52031366792206514283148817023645741D+00 + px(14)= 0.72359600291057382682653618452186879D+00 + px(15)= 0.10035561979187412137041345167147008D+01 + px(16)= 0.13884527722049286720427614547615987D+01 + px(17)= 0.19169141925914051220031027488430615D+01 + px(18)= 0.26421501677657864516298531009004741D+01 + px(19)= 0.36386833380196488331409942198293287D+01 + px(20)= 0.50144408008467284169777642211293427D+01 + px(21)= 0.69363388633219643316723260573935269D+01 + px(22)= 0.96985139442985089392873665294609340D+01 + px(23)= 0.13992267642385099650757505185917464D+02 + pw( 1)= 0.16221873306891233611184498841470861D-02 + pw( 2)= 0.38756062368028647892929037543456805D-02 + pw( 3)= 0.63904681228155448198104664799116312D-02 + pw( 4)= 0.93716130533642383510190970859726025D-02 + pw( 5)= 0.13116359133127231514779092120465823D-01 + pw( 6)= 0.18054564644654175010476006376716577D-01 + pw( 7)= 0.24787545528949442975874855915817576D-01 + pw( 8)= 0.34123347100699979880664417166869481D-01 + pw( 9)= 0.47126644724531756094751076593005203D-01 + pw(10)= 0.65206569654682359265380728357008572D-01 + pw(11)= 0.90252198579742822099518198813744744D-01 + pw(12)= 0.12481812207643272140418893825548652D+00 + pw(13)= 0.17237035227520038668196439463328275D+00 + pw(14)= 0.23761696881618806722148067227743494D+00 + pw(15)= 0.32696691109252817236452507243569941D+00 + pw(16)= 0.44919654054160466696484020754729982D+00 + pw(17)= 0.61648810596906235080169489216474494D+00 + pw(18)= 0.84621818814094735945360350883771170D+00 + pw(19)= 0.11644590806092632658493343351752018D+01 + pw(20)= 0.16139465349437736570093587472680170D+01 + pw(21)= 0.22758365152455412423280162288521126D+01 + pw(22)= 0.33470981208584956190993329351906399D+01 + pw(23)= 0.56005084488693785143169408072395655D+01 +endif +if(kn == 24) then + px( 1)= 0.60287590220675965883655392550708199D-03 + px( 2)= 0.32143351024974124786544026559297415D-02 + px( 3)= 0.80739802149419819753399469823810803D-02 + px( 4)= 0.15489662347070758196056101724102825D-01 + px( 5)= 0.25976667055963881164895151817795199D-01 + px( 6)= 0.40346714011175627204924600258024553D-01 + px( 7)= 0.59830440001259694739569609351980817D-01 + px( 8)= 0.86230308264184025703796753562250373D-01 + px( 9)= 0.12210371973165121163051429982451776D+00 + px(10)= 0.17099203191550984584070639962768156D+00 + px(11)= 0.23772719534691312495205955287685848D+00 + px(12)= 0.32885366959546582684337055007155510D+00 + px(13)= 0.45320556860130100192674333119232671D+00 + px(14)= 0.62268761188094067470211791701931515D+00 + px(15)= 0.85332750333845602641945072572045710D+00 + px(16)= 0.11667004858019451293051164929872329D+01 + px(17)= 0.15918861333740258915421115101398109D+01 + px(18)= 0.21682356132478165458928387387641436D+01 + px(19)= 0.29494889123245918407695084553095216D+01 + px(20)= 0.40104161879498382932676206134824540D+01 + px(21)= 0.54588693247348625992863433295378994D+01 + px(22)= 0.74614789122023638202247166496000371D+01 + px(23)= 0.10312431148373425365575271866282924D+02 + px(24)= 0.14705805275028081231659123827294642D+02 + pw( 1)= 0.15514249847644583248110080072115634D-02 + pw( 2)= 0.36981205611282502501592938773909080D-02 + pw( 3)= 0.60714888966684178001398442739385021D-02 + pw( 4)= 0.88442862964874279166007983229029862D-02 + pw( 5)= 0.12262956109957475661400953934190561D-01 + pw( 6)= 0.16679687959122678642462992762851947D-01 + pw( 7)= 0.22585160392521754760445705422367202D-01 + pw( 8)= 0.30637073979795749450832614038637313D-01 + pw( 9)= 0.41694955770756579068288981525602823D-01 + pw(10)= 0.56880053645721037701497821632682136D-01 + pw(11)= 0.77671508906845583575064408093070968D-01 + pw(12)= 0.10604093207274572585305524523636662D+00 + pw(13)= 0.14462967696642788409146183249874326D+00 + pw(14)= 0.19698231180562617603349065754159954D+00 + pw(15)= 0.26786134570104304270817950330971854D+00 + pw(16)= 0.36368625547483729027913381140943346D+00 + pw(17)= 0.49317740444788268405884668163369343D+00 + pw(18)= 0.66837334895456593354822252636195142D+00 + pw(19)= 0.90640972513509685558875644594827397D+00 + pw(20)= 0.12330418540439459719440490731353612D+01 + pw(21)= 0.16907002256383886184087957665579435D+01 + pw(22)= 0.23604873140910405927871490349730829D+01 + pw(23)= 0.34403155641323731993571442081042100D+01 + pw(24)= 0.57090593461669661084141390492124683D+01 +endif +if(kn == 25) then + px( 1)= 0.57782426626576571901902524785967824D-03 + px( 2)= 0.30777191085612843075800020147563616D-02 + px( 3)= 0.77164086160794063370485387996841129D-02 + px( 4)= 0.14761056704626219657716466804157254D-01 + px( 5)= 0.24653875400014389680962462419049193D-01 + px( 6)= 0.38083523913823039614894844760656273D-01 + px( 7)= 0.56084067240729530165951958613013393D-01 + px( 8)= 0.80159727346580499135438064461924847D-01 + px( 9)= 0.11243339120217788148662085401543468D+00 + px(10)= 0.15582715928119113928223955420169306D+00 + px(11)= 0.21429698577498381337290048538797344D+00 + px(12)= 0.29315054947238081211765761052065623D+00 + px(13)= 0.39947904356246012931942329550682875D+00 + px(14)= 0.54273750890918728697108592749256418D+00 + px(15)= 0.73551917539468211697041027264605642D+00 + px(16)= 0.99458876650351865510403034983347981D+00 + px(17)= 0.13422725867611823095435983972364716D+01 + px(18)= 0.18083631354696402035562158086478313D+01 + px(19)= 0.24328170422327489041615980739248420D+01 + px(20)= 0.32697931199298648820663632485790012D+01 + px(21)= 0.43942247576362161609644700980578340D+01 + px(22)= 0.59138559751958628054368604601307713D+01 + px(23)= 0.79950700685028878658475844781145359D+01 + px(24)= 0.10932212901976999668719864628273880D+02 + px(25)= 0.15422270538187376197239585881075748D+02 + pw( 1)= 0.14866196466384881027031457863846341D-02 + pw( 2)= 0.35366057944120495831689126436344645D-02 + pw( 3)= 0.57844969402891304739165399375745324D-02 + pw( 4)= 0.83773253924994338826352544649181741D-02 + pw( 5)= 0.11521432119169876731207250699755894D-01 + pw( 6)= 0.15508119705558278186014992003540867D-01 + pw( 7)= 0.20741213416907048628693122469214068D-01 + pw( 8)= 0.27760968610727861118254402368893756D-01 + pw( 9)= 0.37269537287121025856202203197461521D-01 + pw(10)= 0.50172098341486540969354039874345299D-01 + pw(11)= 0.67645050740466106776500656782887390D-01 + pw(12)= 0.91234375023821592290664320838692913D-01 + pw(13)= 0.12298569754451901426257783074280505D+00 + pw(14)= 0.16561310646694880188580709432419945D+00 + pw(15)= 0.22272153335996509389159575477724314D+00 + pw(16)= 0.29910756989135904284632341805878551D+00 + pw(17)= 0.40118169825940023204127855629976398D+00 + pw(18)= 0.53759403804464314015326297165302822D+00 + pw(19)= 0.72023661788993173698912869326221741D+00 + pw(20)= 0.96601967120274407325842915516032939D+00 + pw(21)= 0.13004209336797472935438387369012951D+01 + pw(22)= 0.17656287674999046987390122872170472D+01 + pw(23)= 0.24427681629137658222037012630588612D+01 + pw(24)= 0.35307546103133160002458888441211917D+01 + pw(25)= 0.58145056190411423438373377369723833D+01 +endif +if(kn == 26) then + px( 1)= 0.55477997323631075943068059436768679D-03 + px( 2)= 0.29524051251057578595813790584093855D-02 + px( 3)= 0.73901342939831943165981049698675567D-02 + px( 4)= 0.14101412872584234971190262785177463D-01 + px( 5)= 0.23468728331528566192654626405819210D-01 + px( 6)= 0.36081354457771003437927577005080307D-01 + px( 7)= 0.52815777662090440120493976780463113D-01 + px( 8)= 0.74938112557816762151465822385814066D-01 + px( 9)= 0.10422602290395549848995179022541619D+00 + px(10)= 0.14311379843177524483131355370021475D+00 + px(11)= 0.19487526595566569453971371848061316D+00 + px(12)= 0.26386679946717607062454498658707156D+00 + px(13)= 0.35585463477981546153201478673425350D+00 + px(14)= 0.47845228003668789580973663677048048D+00 + px(15)= 0.64169971258466993649179145930192297D+00 + px(16)= 0.85882779202859317392945336605019422D+00 + px(17)= 0.11472706757497270472184438297710363D+01 + px(18)= 0.15300218181930435465993945481075142D+01 + px(19)= 0.20374900555366768284567833572011801D+01 + px(20)= 0.27101363667403762213767958798451042D+01 + px(21)= 0.36024461925895008266595796162422643D+01 + px(22)= 0.47894509506391986224214442346207227D+01 + px(23)= 0.63787680281602106661197309343576824D+01 + px(24)= 0.85365750410497181159653468391328011D+01 + px(25)= 0.11557476465733555694328533876083768D+02 + px(26)= 0.16141471440934226918800792463390524D+02 + pw( 1)= 0.14270457888511795867992646035078330D-02 + pw( 2)= 0.33889549852374609060800978480342579D-02 + pw( 3)= 0.55247507422590039192859514393700253D-02 + pw( 4)= 0.79606058991690688534072920870913751D-02 + pw( 5)= 0.10870893529433384055135393826453360D-01 + pw( 6)= 0.14498723203402237674680094273702632D-01 + pw( 7)= 0.19179116372035461554716799339102955D-01 + pw( 8)= 0.25359449796805531683017831774564858D-01 + pw( 9)= 0.33619093306389351199382063563600456D-01 + pw(10)= 0.44698102947503307879411438578940957D-01 + pw(11)= 0.59545498569788992362177569759542076D-01 + pw(12)= 0.79391390756669405807121619168425508D-01 + pw(13)= 0.10584364527942384492476511067473085D+00 + pw(14)= 0.14101237929374191599558996117239049D+00 + pw(15)= 0.18767097313836850144218059320809785D+00 + pw(16)= 0.24946862930962964921754344345139933D+00 + pw(17)= 0.33121906265157831801486301115689293D+00 + pw(18)= 0.43930860961154203611926221969681638D+00 + pw(19)= 0.58230770087411222173176311911927048D+00 + pw(20)= 0.77196313761963428481446391672957021D+00 + pw(21)= 0.10249770981257475300463739775919616D+01 + pw(22)= 0.13665866627564720110102420582560601D+01 + pw(23)= 0.18387931590238615303343058387625137D+01 + pw(24)= 0.25228070818980592228138174960180006D+01 + pw(25)= 0.36185923153701902774641344711352413D+01 + pw(26)= 0.59170478659334870019465678075549678D+01 +endif +if(kn == 27) then + px( 1)= 0.53351049035404632679792315747760926D-03 + px( 2)= 0.28370305792687798355464055618059207D-02 + px( 3)= 0.70911216690725917919709661777310467D-02 + px( 4)= 0.13501035871356777604646936827660401D-01 + px( 5)= 0.22399961695605187088994736400122990D-01 + px( 6)= 0.34296092152256244946377676950669096D-01 + px( 7)= 0.49938369643043464349412733250120371D-01 + px( 8)= 0.70401007218931052290887962696404315D-01 + px( 9)= 0.97184515439527403402719960641852663D-01 + px(10)= 0.13233419679650342626453438780924062D+00 + px(11)= 0.17858535768221430523992541052027664D+00 + px(12)= 0.23955186967002279067082055689473205D+00 + px(13)= 0.31997738343786115574032202180084953D+00 + px(14)= 0.42606917808870858677237819912563285D+00 + px(15)= 0.56593757336952698275709554352322422D+00 + px(16)= 0.75017087050170008688238981674255895D+00 + px(17)= 0.99258767627203407312800418206037430D+00 + px(18)= 0.13112276119862221456283706350994460D+01 + px(19)= 0.17296743945926671138206419268165977D+01 + px(20)= 0.22788674435500397192690477951071238D+01 + px(21)= 0.29996852862458953100640794763905730D+01 + px(22)= 0.39468640965897806748062604397538661D+01 + px(23)= 0.51954835049739316659023548298272390D+01 + px(24)= 0.68530258860308177417175245026171396D+01 + px(25)= 0.90855060116903171155308252608094055D+01 + px(26)= 0.12187875689542501045614051917426613D+02 + px(27)= 0.16863234023420842677139640506273211D+02 + pw( 1)= 0.13720916834443970883055143999864006D-02 + pw( 2)= 0.32534222826312859680330393960878445D-02 + pw( 3)= 0.52884203958454954233826913886323010D-02 + pw( 4)= 0.75861682815905379946822876423968075D-02 + pw( 5)= 0.10295285639112777079908697606376495D-01 + pw( 6)= 0.13620382459545697869019461721069283D-01 + pw( 7)= 0.17841523884178755603525268045244929D-01 + pw( 8)= 0.23331884086195134219312146516778989D-01 + pw( 9)= 0.30573638051336305998992920739412241D-01 + pw(10)= 0.40178513173563771124515253029295068D-01 + pw(11)= 0.52921980029762362141777841892525060D-01 + pw(12)= 0.69796141123370781915050562748149785D-01 + pw(13)= 0.92082178946617789793924526199756695D-01 + pw(14)= 0.12144364501581543284664137255263233D+00 + pw(15)= 0.16004546398125829773973180676428128D+00 + pw(16)= 0.21070793305215927164839501305159383D+00 + pw(17)= 0.27711057981819535787363684480367458D+00 + pw(18)= 0.36407033983503565406434335329539225D+00 + pw(19)= 0.47793795110360227457939282572737921D+00 + pw(20)= 0.62719851165950099532441318332470642D+00 + pw(21)= 0.82345785449976155240467438227528273D+00 + pw(22)= 0.10832282284259041619866566987699138D+01 + pw(23)= 0.14315401678117573067846471990768551D+01 + pw(24)= 0.19102558501900861570470238950653852D+01 + pw(25)= 0.26007234476536547511161089599236255D+01 + pw(26)= 0.37039892290540711004993184577181103D+01 + pw(27)= 0.60168669305235587630364842700538762D+01 +endif +if(kn == 28) then + px( 1)= 0.51381795837439443363513212680713947D-03 + px( 2)= 0.27304457410704199858650659828620367D-02 + px( 3)= 0.68160137769119765426643490207146340D-02 + px( 4)= 0.12952008943105054557760398423212956D-01 + px( 5)= 0.21430556623995591107026232001066913D-01 + px( 6)= 0.32693077435416129189765100617149847D-01 + px( 7)= 0.47384339361452651219408943088094832D-01 + px( 8)= 0.66422538658869906464895332297645726D-01 + px( 9)= 0.91083609265701992171879031038032271D-01 + px(10)= 0.12309931515680468843361103443044334D+00 + px(11)= 0.16477454361350713713658223760718724D+00 + px(12)= 0.21913585560877712250760133646822826D+00 + px(13)= 0.29012664768975497883815066595179815D+00 + px(14)= 0.38286487674840579443374001038670456D+00 + px(15)= 0.50398058179729621330814443966676445D+00 + px(16)= 0.66205451410726746024107068348273274D+00 + px(17)= 0.86818664253955900521872813145997494D+00 + px(18)= 0.11367350670636899400604869200366967D+01 + px(19)= 0.14862849200104679887431662518488448D+01 + px(20)= 0.19409402276831122951996033127610769D+01 + px(21)= 0.25320952507423009788362491203678215D+01 + px(22)= 0.33009718613493033820591483341014688D+01 + px(23)= 0.43024948141012255149771188379035923D+01 + px(24)= 0.56117545045133971321428621533897157D+01 + px(25)= 0.73360976078470116280524716610880778D+01 + px(26)= 0.96414187592042593009284671180300365D+01 + px(27)= 0.12823096392169689557014539066503465D+02 + px(28)= 0.17587400071042305461725873529323834D+02 + pw( 1)= 0.13212378315760757100801294124815478D-02 + pw( 2)= 0.31285477801160114283501913408130908D-02 + pw( 3)= 0.50723762615025144279692888527822532D-02 + pw( 4)= 0.72476698125814896247817149986171532D-02 + pw( 5)= 0.97820955598317266921336611328577874D-02 + pw( 6)= 0.12849233584715652412922486858932270D-01 + pw( 7)= 0.16684914458309663575212478712400069D-01 + pw( 8)= 0.21602517467719514930691067702478109D-01 + pw( 9)= 0.28006326728639319667000527469785398D-01 + pw(10)= 0.36406794940307839233444412699072219D-01 + pw(11)= 0.47444873633013210207704765440613480D-01 + pw(12)= 0.61930727493054842346818886062879310D-01 + pw(13)= 0.80898273560459582900261577117872215D-01 + pw(14)= 0.10567594748292560400210313567244803D+00 + pw(15)= 0.13797620661979101239766714101470269D+00 + pw(16)= 0.18000947429659496345933986602370689D+00 + pw(17)= 0.23463186153608726981379368239281040D+00 + pw(18)= 0.30554132794139847744947603213040709D+00 + pw(19)= 0.39754682244155206328794798554812257D+00 + pw(20)= 0.51695511158335980560554454196568031D+00 + pw(21)= 0.67216328370835447637864341433166332D+00 + pw(22)= 0.87464250694849374310652866430377750D+00 + pw(23)= 0.11407332853853718344895028360165344D+01 + pw(24)= 0.14952907674277354153396565164371127D+01 + pw(25)= 0.19800794802566744638917545875047599D+01 + pw(26)= 0.26766284602650483548086237399782999D+01 + pw(27)= 0.37870913379223214170502998382833988D+01 + pw(28)= 0.61141264493435815963177300026057897D+01 +endif +if(kn == 29) then + px( 1)= 0.49553294523216557706121043094899937D-03 + px( 2)= 0.26316733478032430241054271443133571D-02 + px( 3)= 0.65619932921719095950945647167842097D-02 + px( 4)= 0.12447798146998465679635850497317547D-01 + px( 5)= 0.20546725218853503579585565380355127D-01 + px( 6)= 0.31244723477287375693064121565353181D-01 + px( 7)= 0.45100744250336127000529420647899915D-01 + px( 8)= 0.62905104337181109718071876082818174D-01 + px( 9)= 0.85750236217882191338049895922209411D-01 + px(10)= 0.11511276616778913843095284495474961D+00 + px(11)= 0.15294975780371902992191410154541288D+00 + px(12)= 0.20181754709239365590998602589064153D+00 + px(13)= 0.26502522465295890912360438565915637D+00 + px(14)= 0.34683572458174845871862351357320832D+00 + px(15)= 0.45272795494425557401215215391241853D+00 + px(16)= 0.58973560996706034541847942850272405D+00 + px(17)= 0.76688297098562182278932098742313991D+00 + px(18)= 0.99574549679776398032257261690761656D+00 + px(19)= 0.12911746195797983348858102746352939D+01 + px(20)= 0.16722452518287396697078251913438203D+01 + px(21)= 0.21635184617418031866723236843209620D+01 + px(22)= 0.27967767483991669703409180904294707D+01 + px(23)= 0.36135221397956750546277975652240522D+01 + px(24)= 0.46688173594398401975825823496477303D+01 + px(25)= 0.60377360285854709978780788792051344D+01 + px(26)= 0.78274940573494599108741305430804683D+01 + px(27)= 0.10203907613754336363839677845176062D+02 + px(28)= 0.13462852450631673420328416214301979D+02 + px(29)= 0.18313825184349771602172383716912527D+02 + pw( 1)= 0.12740401404642070584625642978316674D-02 + pw( 2)= 0.30131005567070877653374960845857871D-02 + pw( 3)= 0.48740340828768558996439695359657849D-02 + pw( 4)= 0.69399937174499322748248258405693356D-02 + pw( 5)= 0.93214421069590292260830707831625273D-02 + pw( 6)= 0.12166736320428934613437189621950636D-01 + pw( 7)= 0.15675841191975760517355581582896129D-01 + pw( 8)= 0.20113588991116143831392394792507242D-01 + pw( 9)= 0.25821196242227502811986031677074846D-01 + pw(10)= 0.33228090412405890048263783552877789D-01 + pw(11)= 0.42869421776728397919368187941044920D-01 + pw(12)= 0.55414351585478714766063918835679369D-01 + pw(13)= 0.71707159949828379722440698604318357D-01 + pw(14)= 0.92821391683596645191185627202531776D-01 + pw(15)= 0.12012815275736363503642593112420207D+00 + pw(16)= 0.15538195214533728663259981412132171D+00 + pw(17)= 0.20083006704520997993532092391882379D+00 + pw(18)= 0.25935464058941676571399243604292685D+00 + pw(19)= 0.33466206352796625147483719389400201D+00 + pw(20)= 0.43154442816926686793937626854331538D+00 + pw(21)= 0.55625869020489676787739935371459343D+00 + pw(22)= 0.71711352998990444136642620781136560D+00 + pw(23)= 0.92545309501307835297306650109719589D+00 + pw(24)= 0.11974638996261713170731294345952752D+01 + pw(25)= 0.15578539402639460936307650070163272D+01 + pw(26)= 0.20483259849497670771611527406588667D+01 + pw(27)= 0.27506256539950821112799212513803514D+01 + pw(28)= 0.38680317406459942528573299847997115D+01 + pw(29)= 0.62089749987608497692655462325987093D+01 +endif +if(kn == 30) then + px( 1)= 0.47850950616663001797979108558563715D-03 + px( 2)= 0.25398771556977649765987433184628366D-02 + px( 3)= 0.63266767276005636636918971079049107D-02 + px( 4)= 0.11982958698830863481679603697851263D-01 + px( 5)= 0.19737172990592845187334346934200923D-01 + px( 6)= 0.29928809539955562795388150514477224D-01 + px( 7)= 0.43045554614365273736449489448924963D-01 + px( 8)= 0.59772102008795894644542468648119854D-01 + px( 9)= 0.81049794509547598102960610576252333D-01 + px(10)= 0.10814588049834038786751975710018494D+00 + px(11)= 0.14273357759635461536908362003919156D+00 + px(12)= 0.18698857852398412902900671067259807D+00 + px(13)= 0.24371127323293364985980734512310093D+00 + px(14)= 0.31648527240822183351218895583590417D+00 + px(15)= 0.40988303161683259239808922937843934D+00 + px(16)= 0.52973043830387724896707606257507982D+00 + px(17)= 0.68344507067026327204085106743008657D+00 + px(18)= 0.88046769972296521320190133680773247D+00 + px(19)= 0.11328140045719997686900182451437385D+01 + px(20)= 0.14557849987988940136735253178131959D+01 + px(21)= 0.18688939211845819607060281336643708D+01 + px(22)= 0.23971019483057447245002052718973892D+01 + px(23)= 0.30725214791534315669485745292475362D+01 + px(24)= 0.39368810885201871521056281051541589D+01 + px(25)= 0.50453405183264296277555982679398449D+01 + px(26)= 0.64729369863961542161639397127927023D+01 + px(27)= 0.83267646024511909496921957165518850D+01 + px(28)= 0.10772601108679545696397581453580679D+02 + px(29)= 0.14106882472397927743465848536293191D+02 + px(30)= 0.19042377139722396576925840568797290D+02 + pw( 1)= 0.12301166672602858132749403305585138D-02 + pw( 2)= 0.29060349388917145213464128010319648D-02 + pw( 3)= 0.46912396989831005493100286944698875D-02 + pw( 4)= 0.66589658633863613568668981891460303D-02 + pw( 5)= 0.89054272701749525193189873717149483D-02 + pw( 6)= 0.11558310934603853176421212815818196D-01 + pw( 7)= 0.14788296604172157875123267577237353D-01 + pw( 8)= 0.18820536178193913710320280585533146D-01 + pw( 9)= 0.23944736371120032144351948458331453D-01 + pw(10)= 0.30524720054213913983988370110313231D-01 + pw(11)= 0.39011278230203159299747340391501401D-01 + pw(12)= 0.49962838489107032100208706917355628D-01 + pw(13)= 0.64076419786867156599743093793566683D-01 + pw(14)= 0.82229278957901320956487642041730955D-01 + pw(15)= 0.10553162871986467114052832148689872D+00 + pw(16)= 0.13539231869402525697097331669289294D+00 + pw(17)= 0.17360130147051396517985693386136263D+00 + pw(18)= 0.22243486135302704942601690670544171D+00 + pw(19)= 0.28479267765730766074850970825016903D+00 + pw(20)= 0.36438127580923131688892141299329716D+00 + pw(21)= 0.46596902265107476258412993277856600D+00 + pw(22)= 0.59575928418635779724375261556025579D+00 + pw(23)= 0.76197365269330802425420319974692784D+00 + pw(24)= 0.97583770654840390135150485526335360D+00 + pw(25)= 0.12534009762825410047987823857212526D+01 + pw(26)= 0.16192497318165861534514297610388291D+01 + pw(27)= 0.21150559758743170143427430064927639D+01 + pw(28)= 0.28228114202692982120171862876336191D+01 + pw(29)= 0.39469320964297990895048746179623006D+01 + pw(30)= 0.63015479148815916165232775496615043D+01 +endif +if(kn == 31) then + px( 1)= 0.46262124155980990900658158435387435D-03 + px( 2)= 0.24543371823976573134043990222488764D-02 + px( 3)= 0.61080328529962740945375030974273550D-02 + px( 4)= 0.11552913696384311289392080456928671D-01 + px( 5)= 0.18992554406021427042667932899509712D-01 + px( 6)= 0.28727239767175649142836330874634688D-01 + px( 7)= 0.41185025043396586435794925588583817D-01 + px( 8)= 0.56962729337650718045473296117050346D-01 + px( 9)= 0.76876408592321896648696565625767740D-01 + px(10)= 0.10202017613750426995368814592543654D+00 + px(11)= 0.13383358226439616664891093380483172D+00 + px(12)= 0.17418111785696519564699242954723688D+00 + px(13)= 0.22545079288954335283945305080525555D+00 + px(14)= 0.29068039334037578871072959727864135D+00 + px(15)= 0.37372028651206644042103206921271373D+00 + px(16)= 0.47944207171169672906363477630137984D+00 + px(17)= 0.61400401892256187595697112035275391D+00 + px(18)= 0.78518739405456531463788153132800130D+00 + px(19)= 0.10028226298243299869022545200635749D+01 + px(20)= 0.12793315955946422764804637028085576D+01 + px(21)= 0.16304237404723823714635864157191701D+01 + px(22)= 0.20760033968995280036753528016386266D+01 + px(23)= 0.26413808369702250796358745676110842D+01 + px(24)= 0.33589474379157213932868619266312477D+01 + px(25)= 0.42706130083047912404753160217494471D+01 + px(26)= 0.54316014237021642862351869609909389D+01 + px(27)= 0.69169001596879784406900118967237735D+01 + px(28)= 0.88334933035422052410960663437339313D+01 + px(29)= 0.11347158219793510037091924273351551D+02 + px(30)= 0.14754946949122040546764367410101515D+02 + px(31)= 0.19772934489331283651619534072783573D+02 + pw( 1)= 0.11891370745192323541501363233610476D-02 + pw( 2)= 0.28064565117632969596616229162402416D-02 + pw( 3)= 0.45221820039177658202628966334564400D-02 + pw( 4)= 0.64011462699419650278998414745094563D-02 + pw( 5)= 0.85276672843451997393681235521893987D-02 + pw( 6)= 0.11012361472143567538937306026897082D-01 + pw( 7)= 0.14001834101185433452944676115710984D-01 + pw( 8)= 0.17688602970920088694526112750101821D-01 + pw( 9)= 0.22319993264776239711478935025076064D-01 + pw(10)= 0.28206159264133299993412221361362783D-01 + pw(11)= 0.35729779624362463483758438125027756D-01 + pw(12)= 0.45361209930192015109636119041934318D-01 + pw(13)= 0.57681745959882977997939097436077625D-01 + pw(14)= 0.73415730766069123013128503531856238D-01 + pw(15)= 0.93471597750154348901340915093546613D-01 + pw(16)= 0.11899278980540690788248491553604027D+00 + pw(17)= 0.15142094037875549556146133150023155D+00 + pw(18)= 0.19257526453331204560446543583285304D+00 + pw(19)= 0.24475404687248924789069417791551804D+00 + pw(20)= 0.31086721331790994056051025981812504D+00 + pw(21)= 0.39461463811187169999440896691515521D+00 + pw(22)= 0.50073578853625257469356342726971100D+00 + pw(23)= 0.63537831862561035023942323431330555D+00 + pw(24)= 0.80667931860754124288487929852367162D+00 + pw(25)= 0.10257546571965496140352844249415068D+01 + pw(26)= 0.13085329420794059702124934248171024D+01 + pw(27)= 0.16795015057251400866456256943483557D+01 + pw(28)= 0.21803283205051475095028864283497279D+01 + pw(29)= 0.28932755231818831561701787327064654D+01 + pw(30)= 0.40239038808605675586573707403263756D+01 + pw(31)= 0.63919688446180182394784276398611383D+01 +endif +if(kn == 32) then + px( 1)= 0.44775812457284018024389459757135253D-03 + px( 2)= 0.23744300256081390660731331947131448D-02 + px( 3)= 0.59043190530701947087959015074527131D-02 + px( 4)= 0.11153785163059738085602585751236982D-01 + px( 5)= 0.18305065328015338438613004118489460D-01 + px( 6)= 0.27625127970368047372371104083117018D-01 + px( 7)= 0.39491775186977265606198670142306198D-01 + px( 8)= 0.54428213199640073895058009136940445D-01 + px( 9)= 0.73145916598392207837439901430911881D-01 + px(10)= 0.96594842317146387626737019235932073D-01 + px(11)= 0.12602067507135613816122919853447211D+00 + px(12)= 0.16303119243744080803051027229475087D+00 + px(13)= 0.20967680096838206807696535069330795D+00 + px(14)= 0.26855214787001775388966483767701730D+00 + px(15)= 0.34292617657317298468348748407414908D+00 + px(16)= 0.43690813207789007326976756359678361D+00 + px(17)= 0.55565789063347173772943038765658058D+00 + px(18)= 0.70565099753808518503172131364185913D+00 + px(19)= 0.89501204411614229330337167600565325D+00 + px(20)= 0.11339348063954267650829490706793417D+01 + px(21)= 0.14352147947875517113501672147412666D+01 + px(22)= 0.18149314500489324404656062560197686D+01 + px(23)= 0.22933370525154514327968223407435420D+01 + px(24)= 0.28960454333191297694038803298227801D+01 + px(25)= 0.36556826421582258456995650363192002D+01 + px(26)= 0.46143015669042314317688786912840126D+01 + px(27)= 0.58271640488452237241101113536108246D+01 + px(28)= 0.73691994618471152327249495515078891D+01 + px(29)= 0.93472955333283431319402613994267131D+01 + px(30)= 0.11927265101386731589772177284607045D+02 + px(31)= 0.15406825810545224143258580731663182D+02 + px(32)= 0.20505385359315843909738804845728816D+02 + pw( 1)= 0.11508141693310289431516002902194819D-02 + pw( 2)= 0.27135954170378174829279299706770743D-02 + pw( 3)= 0.43653263744209382080072687149869796D-02 + pw( 4)= 0.61636735125379730211346708661816263D-02 + pw( 5)= 0.81829488620066496094938528145972129D-02 + pw( 6)= 0.10519566782024470065554644097490174D-01 + pw( 7)= 0.13300211432891686360240172032295580D-01 + pw( 8)= 0.16690405167066082789755182863245534D-01 + pw( 9)= 0.20902378399561394483383583702025306D-01 + pw(10)= 0.26201993681677797113818409392334214D-01 + pw(11)= 0.32916309597556149563466321558626500D-01 + pw(12)= 0.41444783686249708806280965671571355D-01 + pw(13)= 0.52276724501596285458733044616038927D-01 + pw(14)= 0.66016045915514602434595691235181054D-01 + pw(15)= 0.83413404413409668999356206612085646D-01 + pw(16)= 0.10540610055778962043592758135111163D+00 + pw(17)= 0.13316716078519390466674733202657980D+00 + pw(18)= 0.16816620873421840783275153829642565D+00 + pw(19)= 0.21224605690896243222098764241728942D+00 + pw(20)= 0.26772081279710238967904075331055395D+00 + pw(21)= 0.33750446543867324459731453510204593D+00 + pw(22)= 0.42528477444911122706373139673310844D+00 + pw(23)= 0.53576855465657922120503664249148172D+00 + pw(24)= 0.67504694124786279657121038348068956D+00 + pw(25)= 0.85117601343595945001255999545786031D+00 + pw(26)= 0.10751709034800588277387396074134080D+01 + pw(27)= 0.13628543051087082785935353862196062D+01 + pw(28)= 0.17386349660199069072599128814460612D+01 + pw(29)= 0.22441998697757930470131299950344772D+01 + pw(30)= 0.29621015955004271568195551910771395D+01 + pw(31)= 0.40990494779803030065007758020292907D+01 + pw(32)= 0.64803510743879183341918928981358705D+01 +endif +if(kn == 33) then + px( 1)= 0.43382392840086627333673371770410791D-03 + px( 2)= 0.22996130762133902306219348170634844D-02 + px( 3)= 0.57140311482877757226936621231899489D-02 + px( 4)= 0.10782263460575904849778834680881905D-01 + px( 5)= 0.17668133941325186930637700801463696D-01 + px( 6)= 0.26610114005745400437311454869685139D-01 + px( 7)= 0.37943370827578419250767127256762961D-01 + px( 8)= 0.52129041240975080218154665697626616D-01 + px( 9)= 0.69790754616119288011923380403081974D-01 + px(10)= 0.91757677110053287920628073294896820D-01 + px(11)= 0.11911351898045213836878312985804824D+00 + px(12)= 0.15325258383592569039382713986292282D+00 + px(13)= 0.19594640045311118270956256575773479D+00 + px(14)= 0.24942637714448661140410427601352782D+00 + px(15)= 0.31648861015431273270737411206235176D+00 + px(16)= 0.40062704984338027141229486836727772D+00 + px(17)= 0.50620162303052368324000671362943743D+00 + px(18)= 0.63864913663861731335041615079256755D+00 + px(19)= 0.80474696130540033570426941368276095D+00 + px(20)= 0.10129427268888740212312948144945017D+01 + px(21)= 0.12737679814899102853785194013828910D+01 + px(22)= 0.16003609719649203492661497770183358D+01 + px(23)= 0.20091354103290292174609108669156369D+01 + px(24)= 0.25206522780436439397099624663858851D+01 + px(25)= 0.31607884518783084627961967518655955D+01 + px(26)= 0.39623662204110196024719874486085284D+01 + px(27)= 0.49675495548589556752790709024651368D+01 + px(28)= 0.62316176745314993859698590995521575D+01 + px(29)= 0.78294374119411180631665027515007813D+01 + px(30)= 0.98678149766403120157427026982285647D+01 + px(31)= 0.12512632243648644312040507648114218D+02 + px(32)= 0.16062316312799007384705365546800182D+02 + px(33)= 0.21239626413140571003413447643564263D+02 + pw( 1)= 0.11148970595521907012019732812607055D-02 + pw( 2)= 0.26267851653678976569258782421234610D-02 + pw( 3)= 0.42193631422457547168621098688321193D-02 + pw( 4)= 0.59441470797152969047545135707624789D-02 + pw( 5)= 0.78669738899974671911070000143132306D-02 + pw( 6)= 0.10072359597590157564525137294462730D-01 + pw( 7)= 0.12670399361862170180506056379944600D-01 + pw( 8)= 0.15804160264438711138334353671976234D-01 + pw( 9)= 0.19656646222033544724001127769742948D-01 + pw(10)= 0.24456891716604317647752096255377977D-01 + pw(11)= 0.30486078976025291042502816433302855D-01 + pw(12)= 0.38085942822052281180984432235900915D-01 + pw(13)= 0.47671859587250413816889365465789990D-01 + pw(14)= 0.59751905747580346721160295363733596D-01 + pw(15)= 0.74952101551419066714546625345670463D-01 + pw(16)= 0.94047940645329027189027157260871966D-01 + pw(17)= 0.11800297644299036213439491803576584D+00 + pw(18)= 0.14801615944289654934244517014490498D+00 + pw(19)= 0.18558059212548173745682851099099352D+00 + pw(20)= 0.23255756871893273043599614945440298D+00 + pw(21)= 0.29127163202367449192902938057774536D+00 + pw(22)= 0.36463564879320343679994202632934432D+00 + pw(23)= 0.45632094652154984485718193074718634D+00 + pw(24)= 0.57099911532700834474505095568484057D+00 + pw(25)= 0.71470499479041253227766745277939393D+00 + pw(26)= 0.89541776346343118676696712049315123D+00 + pw(27)= 0.11240606917291058823868093321319567D+01 + pw(28)= 0.14163644716761897658439794410119483D+01 + pw(29)= 0.17966773927205272792519385809395869D+01 + pw(30)= 0.23067252939728463992747791116176630D+01 + pw(31)= 0.30293676082841986853392483236331039D+01 + pw(32)= 0.41724631327442166841549815997399232D+01 + pw(33)= 0.65667986737889171343050053820031099D+01 +endif +if(kn == 34) then + px( 1)= 0.42073412472404638893756652613349609D-03 + px( 2)= 0.22294117501223585040813842121338791D-02 + px( 3)= 0.55358634424631592178894736933123541D-02 + px( 4)= 0.10435505221909818911938312253494801D-01 + px( 5)= 0.17076183522530106340715524871890215D-01 + px( 6)= 0.25671846892716699934599581849440412D-01 + px( 7)= 0.36521262393930458152775347026295878D-01 + px( 8)= 0.50032905506982571537052624490236219D-01 + px( 9)= 0.66756179329851771946428938115562858D-01 + px(10)= 0.87418440517600995276435533200107158D-01 + px(11)= 0.11296720871152482793171861979004167D+00 + px(12)= 0.14461798209060507185491668160548863D+00 + px(13)= 0.18391003445626398421131159916752177D+00 + px(14)= 0.23277438392452600064632190536358711D+00 + px(15)= 0.29361900416298607661742971283165405D+00 + px(16)= 0.36943648509469646191303516064166821D+00 + px(17)= 0.46393948817397095233405427748578045D+00 + px(18)= 0.58173003909628716647282591793935082D+00 + px(19)= 0.72851013492593350843955115700236997D+00 + px(20)= 0.91134335818633179015533214138153748D+00 + px(21)= 0.11389803746065945729570613409749246D+01 + px(22)= 0.14222658613929989350760708533653619D+01 + px(23)= 0.17746516415212834951524711400325322D+01 + px(24)= 0.22128526659161258391280177907354742D+01 + px(25)= 0.27577030475922356628315880749135163D+01 + px(26)= 0.34353067716163467499117583277012187D+01 + px(27)= 0.42786491225875840394307069342936437D+01 + px(28)= 0.53299784436903981664272138662385839D+01 + px(29)= 0.66445753694632339412682622877981687D+01 + px(30)= 0.82972428161435530717136911066799313D+01 + px(31)= 0.10394720964339982803669830220514570D+02 + px(32)= 0.13102991988894450504911092547924425D+02 + px(33)= 0.16721231207604090231249152699717947D+02 + px(34)= 0.21975561953387058269749184334695681D+02 + pw( 1)= 0.10811655766145357018010507192110778D-02 + pw( 2)= 0.25454456692504695230374260776656758D-02 + pw( 3)= 0.40831672721796992810291858236827121D-02 + pw( 4)= 0.57405373464649057612542381878716024D-02 + pw( 5)= 0.75761674957710259616551754950037804D-02 + pw( 6)= 0.96645393330211172110854747921485211D-02 + pw( 7)= 0.12101849132180839391980047456808267D-01 + pw( 8)= 0.15012385166203703798856896165367048D-01 + pw( 9)= 0.18554685664439102794960148200939741D-01 + pw(10)= 0.22926965460919156862511787954586100D-01 + pw(11)= 0.28372234446912034618472293586026471D-01 + pw(12)= 0.35184737546237040728179133891634893D-01 + pw(13)= 0.43719795721391424383284929004984363D-01 + pw(14)= 0.54408450285118084483187831793976080D-01 + pw(15)= 0.67777311678116535143958157494852949D-01 + pw(16)= 0.84473623053475603301350103260219498D-01 + pw(17)= 0.10529589998535694380806537413267952D+00 + pw(18)= 0.13123120457466970233581600829200986D+00 + pw(19)= 0.16350086670529523991639926152378893D+00 + pw(20)= 0.20361729924366521691008806803033027D+00 + pw(21)= 0.25345571161549014721855872141678839D+00 + pw(22)= 0.31534642949457072923890964012293452D+00 + pw(23)= 0.39219690177745437575400107500840528D+00 + pw(24)= 0.48765869057533013816866773942301700D+00 + pw(25)= 0.60636656060951114856441189594293935D+00 + pw(26)= 0.75430007270808930870580918261471173D+00 + pw(27)= 0.93936601064002840188234207354112438D+00 + pw(28)= 0.11724044094922051599035401593357573D+01 + pw(29)= 0.14690667743714044794229623979582298D+01 + pw(30)= 0.18536570456604450828475022635489700D+01 + pw(31)= 0.23679569977291335149484204767547713D+01 + pw(32)= 0.30951463106197815052582135293694448D+01 + pw(33)= 0.42442317841513325464351782732665369D+01 + pw(34)= 0.66514074844570206292427905989816795D+01 +endif +if(kn == 35) then + px( 1)= 0.40841415548604021515920964002399699D-03 + px( 2)= 0.21634090823285078411638264192646176D-02 + px( 3)= 0.53686766256969092483856645653707325D-02 + px( 4)= 0.10111052749147679432588829471203234D-01 + px( 5)= 0.16524448303260385083329478751404387D-01 + px( 6)= 0.24801589512429266354562963867459375D-01 + px( 7)= 0.35209981971294629902224557027954734D-01 + px( 8)= 0.48113158246267914322027983645621137D-01 + px( 9)= 0.63997447579595411241665965285069235D-01 + px(10)= 0.83503921106837065850088749934585509D-01 + px(11)= 0.10746492183189159858451002479100191D+00 + px(12)= 0.13694520147247420040072594436271529D+00 + px(13)= 0.17328916996785784591140856733770825D+00 + px(14)= 0.21817740121438829093802932764502942D+00 + px(15)= 0.27369653832797779961249433698659440D+00 + px(16)= 0.34242699906149186851837117836266744D+00 + px(17)= 0.42755291549796611179923030387067599D+00 + px(18)= 0.53299909554182158428637150014155340D+00 + px(19)= 0.66360070964681593124502281725999578D+00 + px(20)= 0.82531292882837242125696121653082133D+00 + px(21)= 0.10254699397214209538105608220109125D+01 + px(22)= 0.12731058972360937198805843671213415D+01 + px(23)= 0.15793550240911338542780101952325349D+01 + px(24)= 0.19579553515472245197868811537799063D+01 + px(25)= 0.24258926480980508600328748991363509D+01 + px(26)= 0.30042420249956940401285542677229005D+01 + px(27)= 0.37193027838966272865717795755051677D+01 + px(28)= 0.46041945352246751335584778757254638D+01 + px(29)= 0.57012278072022800341169042054436900D+01 + px(30)= 0.70656725104389005421933634969655744D+01 + px(31)= 0.87722686454754899579881428713693370D+01 + px(32)= 0.10927706100825575628425432470397431D+02 + px(33)= 0.13698096354347216687650292593862930D+02 + px(34)= 0.17383397148588794631800252981912492D+02 + px(35)= 0.22713103140185533251728138884147593D+02 + pw( 1)= 0.10494256988526777696106600190246748D-02 + pw( 2)= 0.24690695403322712989700976425347600D-02 + pw( 3)= 0.39557664886781607598248979520245308D-02 + pw( 4)= 0.55511159047852988512979064225320046D-02 + pw( 5)= 0.73075320876513631863948214849392405D-02 + pw( 6)= 0.92909810870320629850049703997767543D-02 + pw( 7)= 0.11585945487945439996692114097667348D-01 + pw( 8)= 0.14300927726198362348901143963929085D-01 + pw( 9)= 0.17573886933485282780152536853558582D-01 + pw(10)= 0.21577101857175674214083208317224449D-01 + pw(11)= 0.26521578525384159056539231969282277D-01 + pw(12)= 0.32662117530178998186687978899517756D-01 + pw(13)= 0.40304760583700356024679394121082348D-01 + pw(14)= 0.49818024406493848323885700890345396D-01 + pw(15)= 0.61648498278387927702353594142316029D-01 + pw(16)= 0.76340842519971003579540175209086024D-01 + pw(17)= 0.94562313841812711417919902804637839D-01 + pw(18)= 0.11713243611448668905011510723033361D+00 + pw(19)= 0.14505903302187588429490257418340878D+00 + pw(20)= 0.17958246056420660249030476513346108D+00 + pw(21)= 0.22223064193925674596660827103307295D+00 + pw(22)= 0.27488866495140173131704069241445894D+00 + pw(23)= 0.33988865913831065785773004058455433D+00 + pw(24)= 0.42012914561889293897263514396102618D+00 + pw(25)= 0.51923942630667304009247143973261919D+00 + pw(26)= 0.64181663147268449329914151912099321D+00 + pw(27)= 0.79378665914719288398623606417929212D+00 + pw(28)= 0.98298862630823929161929765759110274D+00 + pw(29)= 0.12201876100804632086249205242578178D+01 + pw(30)= 0.15209676736777719585360185637783971D+01 + pw(31)= 0.19096027011177832508222236236206329D+01 + pw(32)= 0.24279450923678125311674033890233300D+01 + pw(33)= 0.31595056381881229211813965570928908D+01 + pw(34)= 0.43144357961085612515538420099471387D+01 + pw(35)= 0.67342659787038143488625655899472123D+01 +endif +if(kn == 36) then + px( 1)= 0.39679800273035937692456569764529929D-03 + px( 2)= 0.21012371857514574429814634636370868D-02 + px( 3)= 0.52114717733736355471865173697424917D-02 + px( 4)= 0.98067697534794548776488857189272695D-02 + px( 5)= 0.16008829048610610861085787414831567D-01 + px( 6)= 0.23991913025422802197171963117636148D-01 + px( 7)= 0.33996529414545629919769339896568646D-01 + px( 8)= 0.46347640349961743012995653495638967D-01 + px( 9)= 0.61477688475695067453069318914244194D-01 + px(10)= 0.79954234262530625440149978974196142D-01 + px(11)= 0.10251169364078550312390054066216880D+00 + px(12)= 0.13008697511618887542147079025562156D+00 + px(13)= 0.16385989560749921185926347303565033D+00 + px(14)= 0.20530066324396880599546126878370330D+00 + px(15)= 0.25622775706568656239933185280550197D+00 + px(16)= 0.31887992047473434247218676095668498D+00 + px(17)= 0.39600601270766777602081700578327931D+00 + px(18)= 0.49097660869784478263571961512354166D+00 + px(19)= 0.60792179107232897416464078744063313D+00 + px(20)= 0.75190061348713360022081088193051979D+00 + px(21)= 0.92910925563205451252801298079690580D+00 + px(22)= 0.11471370551908310980346755006364606D+01 + px(23)= 0.14152827011474745738425470935393175D+01 + px(24)= 0.17449475260933662620173991491940897D+01 + px(25)= 0.21501302020535220058465237512287759D+01 + px(26)= 0.26480593984665582591226977578025315D+01 + px(27)= 0.32600222787200297912657865580064671D+01 + px(28)= 0.40124854077257132134482639353308378D+01 + px(29)= 0.49386780680939238173283121803851184D+01 + px(30)= 0.60809546519546778854776584514884910D+01 + px(31)= 0.74945653594942201002132544001003708D+01 + px(32)= 0.92541900969385753944715137178141946D+01 + px(33)= 0.11466484149563363201326491294746713D+02 + px(34)= 0.14297715117728357842590639672319382D+02 + px(35)= 0.18048653298710078396919876650411551D+02 + px(36)= 0.23452167308415151157069277254133065D+02 + pw( 1)= 0.10195057714700151361278566695804673D-02 + pw( 2)= 0.23972109360881477362914694357034945D-02 + pw( 3)= 0.38363158447342177123425836110343923D-02 + pw( 4)= 0.53744010839767438647013296257593752D-02 + pw( 5)= 0.70585351522933460613686733697482296D-02 + pw( 6)= 0.89474146486528988019752703037120466D-02 + pw( 7)= 0.11115594177177423398237576162176609D-01 + pw( 8)= 0.13658239363054657454269471936854599D-01 + pw( 9)= 0.16695920500566358906782047930663171D-01 + pw(10)= 0.20378981713905551927970998274789754D-01 + pw(11)= 0.24891420496520210335919294073059272D-01 + pw(12)= 0.30454995468399097263969267647541847D-01 + pw(13)= 0.37334924544188163864246559109559135D-01 + pw(14)= 0.45848499525488242728225859778156080D-01 + pw(15)= 0.56377323577091624057815057252308315D-01 + pw(16)= 0.69383296835632602012769529156119272D-01 + pw(17)= 0.85428366438632838815674340562473751D-01 + pw(18)= 0.10519836473842813187954075539197942D+00 + pw(19)= 0.12953172788969281249628541902884322D+00 + pw(20)= 0.15945437502320699870857044488189859D+00 + pw(21)= 0.19622256830996434214924230362068723D+00 + pw(22)= 0.24137631349243177937384183525770442D+00 + pw(23)= 0.29680703930305420431908018221051720D+00 + pw(24)= 0.36484530976842630509122266207581834D+00 + pw(25)= 0.44837789623740881716704249442275046D+00 + pw(26)= 0.55101005426642996452229825498001978D+00 + pw(27)= 0.67730110868383751051906031936646973D+00 + pw(28)= 0.83312535104283525888024444527691004D+00 + pw(29)= 0.10262590489160128936079715697075387D+01 + pw(30)= 0.12674001847129782122095271537034034D+01 + pw(31)= 0.15720761022006313395632370048802129D+01 + pw(32)= 0.19645432933987013230055443656345311D+01 + pw(33)= 0.24867374093856289898664232230940440D+01 + pw(34)= 0.32225090907915900251361180974071379D+01 + pw(35)= 0.43831496004036045715808964702697293D+01 + pw(36)= 0.68154560080748566632769223367688120D+01 +endif +if(kn == 37) then + px( 1)= 0.38582699810911109314412610582600285D-03 + px( 2)= 0.20425701942290192876130910611009103D-02 + px( 3)= 0.50633691210660737148684629408797998D-02 + px( 4)= 0.95207896708291718422773083291578438D-02 + px( 5)= 0.15525778682735758953134513377993514D-01 + px( 6)= 0.23236458251477143892762148491421503D-01 + px( 7)= 0.32869898315502381365977793756399700D-01 + px( 8)= 0.44717783821182009237592740999277480D-01 + px( 9)= 0.59166282378413474386219652173387790D-01 + px(10)= 0.76720016407611302507577044991006663D-01 + px(11)= 0.98029725568599136751267367485311366D-01 + px(12)= 0.12392331356464118155739304688774820D+00 + px(13)= 0.15544072437387706131982741857227680D+00 + px(14)= 0.19387425549171156055801006554376491D+00 + px(15)= 0.24081693190720646255336253748180444D+00 + px(16)= 0.29822205905402883495704063798959865D+00 + px(17)= 0.36847715057286422135924881742734873D+00 + px(18)= 0.45449546602004465241771763312107545D+00 + px(19)= 0.55982869948207391699702170763669914D+00 + px(20)= 0.68880504649787296744511192196333508D+00 + px(21)= 0.84669796033901533360995646038438838D+00 + px(22)= 0.10399324356558473561934598413040205D+01 + px(23)= 0.12763377882779834661586906782407420D+01 + px(24)= 0.15654589805265212137286807835074586D+01 + px(25)= 0.19189432205583964070987477122905362D+01 + px(26)= 0.23510260307961591423359389441199440D+01 + px(27)= 0.28791534439182687258499291870153821D+01 + px(28)= 0.35247986674670581588018777649051460D+01 + px(29)= 0.43145708343894232186639323097990119D+01 + px(30)= 0.52817877652195029549414854087069499D+01 + px(31)= 0.64688326916686594614365796118970700D+01 + px(32)= 0.79309297085796581254349823893132444D+01 + px(33)= 0.97427028243618144296296506331896705D+01 + px(34)= 0.12010788145502681400420281251059076D+02 + px(35)= 0.14901634128904268951043131468444574D+02 + px(36)= 0.18716850108988162034804184337830585D+02 + px(37)= 0.24192677368936148334878341907713337D+02 + pw( 1)= 0.99125336551824862194889482679760082D-03 + pw( 2)= 0.23294764153889055024996601218914981D-02 + pw( 3)= 0.37240772547834537676742266421935443D-02 + pw( 4)= 0.52091149342528848273366607113774535D-02 + pw( 5)= 0.68270221227859789716885624541247654D-02 + pw( 6)= 0.86302549871569688435939302492406456D-02 + pw( 7)= 0.10684907929606001826647765915296935D-01 + pw( 8)= 0.13074823650284587459124044679067858D-01 + pw( 9)= 0.15905815137012769943635728692865399D-01 + pw(10)= 0.19309592949421631021202995586098476D-01 + pw(11)= 0.23447231955796843102356124526512731D-01 + pw(12)= 0.28512602643555882480364043596258204D-01 + pw(13)= 0.34736803269082359319855891282489532D-01 + pw(14)= 0.42394778196431500823106022890118590D-01 + pw(15)= 0.51814900990726921245206073768999637D-01 + pw(16)= 0.63391754887352977638748380428399567D-01 + pw(17)= 0.77602103176771937423249866012581660D-01 + pw(18)= 0.95024188094564273453247510650970162D-01 + pw(19)= 0.11636084820208068310423973571076075D+00 + pw(20)= 0.14246733764500807361275561785541076D+00 + pw(21)= 0.17438513757282787980358846897741684D+00 + pw(22)= 0.21338354906834882772151573726954513D+00 + pw(23)= 0.26101159566291693588807442987832548D+00 + pw(24)= 0.31916397294414160306456288368801219D+00 + pw(25)= 0.39016685675452558044568607656388734D+00 + pw(26)= 0.47689304458379631259135969323199221D+00 + pw(27)= 0.58292255373275454371013059783773154D+00 + pw(28)= 0.71277724055369180393369777699867214D+00 + pw(29)= 0.87228215826925362283182363826585130D+00 + pw(30)= 0.10691555317433509592820312414251932D+01 + pw(31)= 0.13140356602358313756291999585335256D+01 + pw(32)= 0.16224029261605229346735838359067505D+01 + pw(33)= 0.20185076394260072518838683149008566D+01 + pw(34)= 0.25443795429863924820797324346795088D+01 + pw(35)= 0.32842160798477830990525297742207531D+01 + pw(36)= 0.44504422639266402367212097908451198D+01 + pw(37)= 0.68950534584189288110300715731079937D+01 +endif +if(kn == 38) then + px( 1)= 0.37544882641214739544404487691314902D-03 + px( 2)= 0.19871183957226785505693683210337852D-02 + px( 3)= 0.49235906141676391015802154321946533D-02 + px( 4)= 0.92514737512580193448510992395727495D-02 + px( 5)= 0.15072210889929832914852564552062275D-01 + px( 6)= 0.22529747565558688599709359818783270D-01 + px( 7)= 0.31820706479818463453808764992549706D-01 + px( 8)= 0.43207917804566229579979815937989733D-01 + px( 9)= 0.57037614703055219788340245932475170D-01 + px(10)= 0.73760278305643433875885418326364923D-01 + px(11)= 0.93954814072806105125535625384970094D-01 + px(12)= 0.11835572279751275812340426098616966D+00 + px(13)= 0.14788342387597970850254556771271985D+00 + px(14)= 0.18367881184867197784381084444678116D+00 + px(15)= 0.22714406717510235393336995794234796D+00 + px(16)= 0.27999230770730436478836636945630613D+00 + px(17)= 0.34430881911279975709736095638802572D+00 + px(18)= 0.42262660561797928237112864643574725D+00 + px(19)= 0.51801914793576724661446993655612171D+00 + px(20)= 0.63421369082081852676499438226656058D+00 + px(21)= 0.77572913890901541203968257694714068D+00 + px(22)= 0.94804373045176433040869132064288662D+00 + px(23)= 0.11577991608735723735204737419458983D+01 + px(24)= 0.14130499332620481162978195094240590D+01 + px(25)= 0.17235697910838497297077019443263907D+01 + px(26)= 0.21012318099421305470189826928381899D+01 + px(27)= 0.25604863031155004117350909468147106D+01 + px(28)= 0.31189733700354116534218512680792939D+01 + px(29)= 0.37983289491193925871766329237375059D+01 + px(30)= 0.46252830528399268638347963582117115D+01 + px(31)= 0.56332239825265713634766419255858690D+01 + px(32)= 0.68645515916046830561016745830456758D+01 + px(33)= 0.83744595975704688906750898197385297D+01 + px(34)= 0.10237521325244942798063968606118549D+02 + px(35)= 0.12560368707162995423911721031640974D+02 + px(36)= 0.15509653816608570103011114649053437D+02 + px(37)= 0.19387848243793684608247770401886161D+02 + px(38)= 0.24934561281635323945874914765928781D+02 + pw( 1)= 0.96453265301318427548094362468201990D-03 + pw( 2)= 0.22655173902979609807058505788853476D-02 + pw( 3)= 0.36184028900176044156504296271444603D-02 + pw( 4)= 0.50541489537150279336470105887486457D-02 + pw( 5)= 0.66111480676528721493086153967962652D-02 + pw( 6)= 0.83364709970141383682744167961798317D-02 + pw( 7)= 0.10288965239682486008404941690219778D-01 + pw( 8)= 0.12542814650103402794331337460320134D-01 + pw( 9)= 0.15191255363614986206935175799741358D-01 + pw(10)= 0.18350103409130288787013350611437802D-01 + pw(11)= 0.22160882105318682870151221782836377D-01 + pw(12)= 0.26793767627663720473790893510507021D-01 + pw(13)= 0.32451109787899399883524910095936794D-01 + pw(14)= 0.39372541954009629417292538828294592D-01 + pw(15)= 0.47842474290684012119499234323721647D-01 + pw(16)= 0.58200301955035707564120883548436910D-01 + pw(17)= 0.70853350044334604149559560301899009D-01 + pw(18)= 0.86292591208817884216614254072269241D-01 + pw(19)= 0.10511141477535783724613732416344133D+00 + pw(20)= 0.12802804286358628862797652201096219D+00 + pw(21)= 0.15591251149396720172437659323068423D+00 + pw(22)= 0.18981949259837187914390190669151597D+00 + pw(23)= 0.23102871661757404151219056182072738D+00 + pw(24)= 0.28109550616216987958688311291596089D+00 + pw(25)= 0.34191517456611856887400710976438959D+00 + pw(26)= 0.41580717321823436217712748531850542D+00 + pw(27)= 0.50562861773538920954280122211069833D+00 + pw(28)= 0.61493358954575527626904288123143367D+00 + pw(29)= 0.74820721198453838888752387245859917D+00 + pw(30)= 0.91122787666637256187790465660240598D+00 + pw(31)= 0.11116604876702928314541969577850448D+01 + pw(32)= 0.13600906035248579424433723117814169D+01 + pw(33)= 0.16719605033700192144793651594580592D+01 + pw(34)= 0.20715242290059813187830495943964160D+01 + pw(35)= 0.26009149126553161918961209793496358D+01 + pw(36)= 0.33446822473746498943306001989892373D+01 + pw(37)= 0.45163779904182883931050846768355054D+01 + pw(38)= 0.69731288252045659086112524629580346D+01 +endif +if(kn == 39) then + px( 1)= 0.36561668715257863266695867839020084D-03 + px( 2)= 0.19346233268023844607752680140092771D-02 + px( 3)= 0.47914454654721250429608171020940566D-02 + px( 4)= 0.89973768150331118636464557997751850D-02 + px( 5)= 0.14645426459456932116061991386768235D-01 + px( 6)= 0.21867035291160181873696376563265340D-01 + px( 7)= 0.30840905275941465760959181463130484D-01 + px( 8)= 0.41804727278858172089611260923812570D-01 + px( 9)= 0.55070109596011254633584203260158210D-01 + px(10)= 0.71040748192925559559810356000599258D-01 + px(11)= 0.90233607305000360208735980487986444D-01 + px(12)= 0.11330278644013276387294932973826412D+00 + px(13)= 0.14106605202734226946949558771740988D+00 + px(14)= 0.17453471922456742887333856505611605D+00 + px(15)= 0.21494840120203884103453089817606487D+00 + px(16)= 0.26381673974086082406623827494703215D+00 + px(17)= 0.32297045833724193805138734154727157D+00 + px(18)= 0.39462409178162717260419023658129017D+00 + px(19)= 0.48145279757007806226628785793904093D+00 + px(20)= 0.58668591241992432419881469135301121D+00 + px(21)= 0.71422043441619441875490470925759117D+00 + px(22)= 0.86875839397007365611188934469437619D+00 + px(23)= 0.10559731561016268223241013124184012D+01 + px(24)= 0.12827111749931695832113875967148563D+01 + px(25)= 0.15572378120557119987867239447717751D+01 + px(26)= 0.18895389181496921659373840308737441D+01 + px(27)= 0.22916946565666188923157402572614733D+01 + px(28)= 0.27783497396142931899333530655336223D+01 + px(29)= 0.33673171344416414054075236409384439D+01 + px(30)= 0.40803746579421435360342926127423516D+01 + px(31)= 0.49443541982197303363350679681860647D+01 + px(32)= 0.59926991656565097356612507658187921D+01 + px(33)= 0.72678162022521051390335450639073869D+01 + px(34)= 0.88248661080276432761936204880867089D+01 + px(35)= 0.10738377470289614240120984197486960D+02 + px(36)= 0.13114992524642316318648160324994074D+02 + px(37)= 0.16121587864037157860973324745148775D+02 + px(38)= 0.20061517632007615495343028194490514D+02 + px(39)= 0.25677751590102375173572742100364982D+02 + pw( 1)= 0.93922220165197278758235278705125378D-03 + pw( 2)= 0.22050238560788499833482429614154492D-02 + pw( 3)= 0.35187216063020038804486790499409390D-02 + pw( 4)= 0.49085365506617880092326144941967522D-02 + pw( 5)= 0.64093236535073721253961413565734053D-02 + pw( 6)= 0.80634829478885867255854065621590218D-02 + pw( 7)= 0.99236234747694546900134742610818704D-02 + pw( 8)= 0.12055651790884970603397688654161516D-01 + pw( 9)= 0.14542041549622581450180752510105879D-01 + pw(10)= 0.17484998327571243644029476510788471D-01 + pw(11)= 0.21009295862219636601744654763649681D-01 + pw(12)= 0.25264862509860073296774992740630911D-01 + pw(13)= 0.30429647240913452161840939756263150D-01 + pw(14)= 0.36713599504420717007290949145895236D-01 + pw(15)= 0.44364526109307181291869691370038599D-01 + pw(16)= 0.53676231693459070390920038412533532D-01 + pw(17)= 0.64999020675906355562370375168522980D-01 + pw(18)= 0.78752553989958863994856994484156486D-01 + pw(19)= 0.95441198327713183692637121288756175D-01 + pw(20)= 0.11567225339206538896170499893449828D+00 + pw(21)= 0.14017770592600709076251201422928256D+00 + pw(22)= 0.16984043156916862434884153031547730D+00 + pw(23)= 0.20572609864904223612685592628540649D+00 + pw(24)= 0.24912251113833289072621517399741476D+00 + pw(25)= 0.30158889670929686638710684005186011D+00 + pw(26)= 0.36501892348491283180440889799004221D+00 + pw(27)= 0.44172341189659552954879007161166844D+00 + pw(28)= 0.53454253015480261760607954897116394D+00 + pw(29)= 0.64700413373280623862585575306735597D+00 + pw(30)= 0.78355765539183802574608463599148275D+00 + pw(31)= 0.94993752822514424638014213628449162D+00 + pw(32)= 0.11537599191614455897031555123375337D+01 + pw(33)= 0.14055641164435818777056131176331966D+01 + pw(34)= 0.17207623206557334615184702866032957D+01 + pw(35)= 0.21236210670677286367475649834301224D+01 + pw(36)= 0.26563848390712257212223490954097065D+01 + pw(37)= 0.34039597582687380169009802962894561D+01 + pw(38)= 0.45810165654673262153491823163228067D+01 + pw(39)= 0.70497477205153040846297597338553262D+01 +endif +if(kn == 40) then + px( 1)= 0.35628858567646346686837324510549370D-03 + px( 2)= 0.18848536486770311807591243848705546D-02 + px( 3)= 0.46663181280317856746168443158323968D-02 + px( 4)= 0.87572190745324032833162010260613025D-02 + px( 5)= 0.14243053462025729117427416370397854D-01 + px( 6)= 0.21244187714177671012923575144774521D-01 + px( 7)= 0.29923549070178329860704059006245261D-01 + px( 8)= 0.40496827285887221934804797848083118D-01 + px( 9)= 0.53245474481585595162008164942410512D-01 + px(10)= 0.68532582430157398594384714629729053D-01 + px(11)= 0.86821479567961854490494392488927025D-01 + px(12)= 0.10869675959524067911380119096839804D+00 + px(13)= 0.13488761722649275069059959653024014D+00 + px(14)= 0.16629388885455273807852247897310019D+00 + px(15)= 0.20401590432120076472183920609098846D+00 + px(16)= 0.24938984749465339706889280753165521D+00 + px(17)= 0.30403061355446670063478194165459695D+00 + px(18)= 0.36988419909316149270323787724602738D+00 + px(19)= 0.44929166750312582192671647579960907D+00 + px(20)= 0.54506686870623140996608099899850594D+00 + px(21)= 0.66059043398617261201896956069992349D+00 + px(22)= 0.79992312516554243682533149390065526D+00 + px(23)= 0.96794240207734869040410161024196399D+00 + px(24)= 0.11705071340447720718704866053067197D+01 + px(25)= 0.14146568411163837104501550620112388D+01 + px(26)= 0.17088539364613371655586392377700409D+01 + px(27)= 0.20632805527205428543659205472373927D+01 + px(28)= 0.24902063739619416723013658330633921D+01 + px(29)= 0.30044517126674338175114949672570867D+01 + px(30)= 0.36239831566943951780098581431720407D+01 + px(31)= 0.43707017882279510268089904263627705D+01 + px(32)= 0.52715247584257797079839546521646347D+01 + px(33)= 0.63599375545114314246728093354610643D+01 + px(34)= 0.76783457965158492083638000098773087D+01 + px(35)= 0.92818762327705176821137326934557272D+01 + px(36)= 0.11245019162985129683252871377987811D+02 + px(37)= 0.13674441002824139783929760948169562D+02 + px(38)= 0.16737262031081031207287624813172862D+02 + px(39)= 0.20737736626704553460329127303821744D+02 + px(40)= 0.26422185009410264431343933932848600D+02 + pw( 1)= 0.91521311273862401110414707754929676D-03 + pw( 2)= 0.21477191521928219197974773759587103D-02 + pw( 3)= 0.34245277733745781136392870149979323D-02 + pw( 4)= 0.47714307428687057651870425259278087D-02 + pw( 5)= 0.62201720373839252470536835470530964D-02 + pw( 6)= 0.78090816762799270025355432702422127D-02 + pw( 7)= 0.95853728776285881753692296587104674D-02 + pw( 8)= 0.11607827206228475028893871423142139D-01 + pw( 9)= 0.13949671718334027814092300225470600D-01 + pw(10)= 0.16701414648594482319533882783791014D-01 + pw(11)= 0.19973423842092704666661066907283955D-01 + pw(12)= 0.23898237345016617036372142753573396D-01 + pw(13)= 0.28632957303620028137537791558953675D-01 + pw(14)= 0.34362389756581172586707762233025920D-01 + pw(15)= 0.41303629060426009015015262453667296D-01 + pw(16)= 0.49712538117407657728121783442595623D-01 + pw(17)= 0.59892265779927519547458491064456622D-01 + pw(18)= 0.72203793116717427042782050909270304D-01 + pw(19)= 0.87078559354632307141427375007562753D-01 + pw(20)= 0.10503340154082661838457995134010675D+00 + pw(21)= 0.12668825810872029062869005987170137D+00 + pw(22)= 0.15278730399841792433216511718627756D+00 + pw(23)= 0.18422442714170367120354764302353497D+00 + pw(24)= 0.22207427969059917966672557751489214D+00 + pw(25)= 0.26763062752730913486480650477422812D+00 + pw(26)= 0.32245451049240744596849032983153986D+00 + pw(27)= 0.38843603675318055142995014015287773D+00 + pw(28)= 0.46787586661529213993652263387714350D+00 + pw(29)= 0.56359633220992492533897924842606201D+00 + pw(30)= 0.67909910572915995345517074647986275D+00 + pw(31)= 0.81879920277693732798543935642566763D+00 + pw(32)= 0.98838986255598396167621024931216632D+00 + pw(33)= 0.11954429228293928218210248618287692D+01 + pw(34)= 0.14504574076282353803601676013439121D+01 + pw(35)= 0.17688226967485673903033088821262440D+01 + pw(36)= 0.21748255570140773474399755838047824D+01 + pw(37)= 0.27108286283847583355764140776741630D+01 + pw(38)= 0.34620975677969687591016085409314313D+01 + pw(39)= 0.46444137521824838114376662229079725D+01 + pw(40)= 0.71249713212825268738104553603659849D+01 +endif +if(kn == 41) then + px( 1)= 0.34742673101001169298125786189192247D-03 + px( 2)= 0.18376016626066348589572611512977315D-02 + px( 3)= 0.45476582212664459469467031011454626D-02 + px( 4)= 0.85298627941631364768947297314276555D-02 + px( 5)= 0.13862998302969016172574004035541326D-01 + px( 6)= 0.20657586094570613395846290407493021D-01 + px( 7)= 0.29062610848297752288163047809306784D-01 + px( 8)= 0.39274425358723378021088136325757383D-01 + px( 9)= 0.51548104852787877566528890250659503D-01 + px(10)= 0.66211354354233299700140534259286291D-01 + px(11)= 0.83680871232624778086967339679729237D-01 + px(12)= 0.10448092042565902676173295142297193D+00 + px(13)= 0.12926394759018944327153762885108248D+00 + px(14)= 0.15883342588000497563552833153551216D+00 + px(15)= 0.19416971506631978910646170849840836D+00 + px(16)= 0.23646026989593002896655227863024343D+00 + px(17)= 0.28713586785719640965554266593644607D+00 + px(18)= 0.34791462052391896376462581083932962D+00 + px(19)= 0.42085552853141849349998238918417102D+00 + px(20)= 0.50842339932407666570140739641291868D+00 + px(21)= 0.61356716058210435249023757196485689D+00 + px(22)= 0.73981399451706291428067431492187345D+00 + px(23)= 0.89138229125166542251073985530751701D+00 + px(24)= 0.10733171950376849976883148880289368D+01 + px(25)= 0.12916535619608503598632452641060769D+01 + px(26)= 0.15536125962495445292827177181517718D+01 + px(27)= 0.18678405221272713224762723085314113D+01 + px(28)= 0.22447007897223064544921318025044505D+01 + px(29)= 0.26966362208010185020303083586792707D+01 + px(30)= 0.32386254398637317225899745563640301D+01 + px(31)= 0.38887712166747737454815122744695001D+01 + px(32)= 0.46690813167550880509031095796259491D+01 + px(33)= 0.56065436673995350526181894949125319D+01 + px(34)= 0.67346748355004755553478375342343373D+01 + px(35)= 0.80958733206377174584850755308484861D+01 + px(36)= 0.97452318197102039374768068435419398D+01 + px(37)= 0.11757209117436549173752389088784801D+02 + px(38)= 0.14238509041698890616269777375250039D+02 + px(39)= 0.17356513104267573046562740943972043D+02 + px(40)= 0.21416391258741087275776630427364534D+02 + px(41)= 0.27167802059826190961056594217938860D+02 + pw( 1)= 0.89240744144922233411809208993761819D-03 + pw( 2)= 0.20933555605342582529457970383304286D-02 + pw( 3)= 0.33353720205083480430061442115773668D-02 + pw( 4)= 0.46420859644861606778799308270413399D-02 + pw( 5)= 0.60424942046045364422720752662654441D-02 + pw( 6)= 0.75713643924191847990582966217707736D-02 + pw( 7)= 0.92712216138022670728066109402065568D-02 + pw( 8)= 0.11194687905391631640874958915231129D-01 + pw( 9)= 0.13407015211365284658909716719520612D-01 + pw(10)= 0.15988623214362774067334406180515621D-01 + pw(11)= 0.19037444847571012625771486857897508D-01 + pw(12)= 0.22671015569010270758765332286863717D-01 + pw(13)= 0.27028523256092179219838374149240132D-01 + pw(14)= 0.32273327230549150524506142920429224D-01 + pw(15)= 0.38596560710149393493004578367090095D-01 + pw(16)= 0.46222282500302385970118307137327059D-01 + pw(17)= 0.55414377032731402665476377100171599D-01 + pw(18)= 0.66485217878875067216476264141658569D-01 + pw(19)= 0.79806099857296581997108727302834078D-01 + pw(20)= 0.95819567831341990475832520086841170D-01 + pw(21)= 0.11505394471321825395206987370902480D+00 + pw(22)= 0.13814054039338535506658629671808687D+00 + pw(23)= 0.16583420916823077593541262981490681D+00 + pw(24)= 0.19903814998014368860888042911909173D+00 + pw(25)= 0.23883416675372027779180574899396133D+00 + pw(26)= 0.28652010702264135124796999487042423D+00 + pw(27)= 0.34365700663826115610743525613053815D+00 + pw(28)= 0.41212981100557300459399743393003457D+00 + pw(29)= 0.49422782047790986141383860345034700D+00 + pw(30)= 0.59275496121528084438932784828140258D+00 + pw(31)= 0.71118703347840582157967515903715531D+00 + pw(32)= 0.85390607737164494406520343373727721D+00 + pw(33)= 0.10265669138579149759586965414767668D+01 + pw(34)= 0.12367012590999922651850699786000809D+01 + pw(35)= 0.14947734294405908884100490275594732D+01 + pw(36)= 0.18161565391675246807943861856738552D+01 + pw(37)= 0.22251644165626364386518263812767700D+01 + pw(38)= 0.27642836611922099334373971118073338D+01 + pw(39)= 0.35191416662378097344643779658011469D+01 + pw(40)= 0.47066216438742057965343731115092608D+01 + pw(41)= 0.71988567667853812451766331822159390D+01 +endif +if(kn == 42) then + px( 1)= 0.33899702213207280303556565703741990D-03 + px( 2)= 0.17926803514586918122543179233298713D-02 + px( 3)= 0.44349720472477033588788261295928634D-02 + px( 4)= 0.83142928381370954944600712429031326D-02 + px( 5)= 0.13503405398912139202218387144909328D-01 + px( 6)= 0.20104047687222803992574530787065418D-01 + px( 7)= 0.28252833643724993821423023769509983D-01 + px( 8)= 0.38129051839561461833536104861505433D-01 + px( 9)= 0.49964611823304399745794720191846551D-01 + px(10)= 0.64056255466771758626847757441909748D-01 + px(11)= 0.80779982442382913230902383270370264D-01 + px(12)= 0.10060749467110151157610640414769097D+00 + px(13)= 0.12412446888143911749100232205634547D+00 + px(14)= 0.15205071694865663047292718989423185D+00 + px(15)= 0.18526275842687218439176583557408384D+00 + px(16)= 0.22481983515906206676758114216568014D+00 + px(17)= 0.27199475262711723613680204884879081D+00 + px(18)= 0.32831107191481630404052581830045252D+00 + px(19)= 0.39558818386929789909718409480658300D+00 + px(20)= 0.47599581233231530250697095670252798D+00 + px(21)= 0.57211961616026616366050109797592391D+00 + px(22)= 0.68703982771101430011400322499023268D+00 + px(23)= 0.82442528240215569955771735381318182D+00 + px(24)= 0.98864576667159865631116805089235696D+00 + px(25)= 0.11849063745873302190560203655789150D+01 + px(26)= 0.14194085949505718343903162487063753D+01 + px(27)= 0.16995442942632463982698312503343140D+01 + px(28)= 0.20341308620656637127889186046327503D+01 + px(29)= 0.24336989628881715953740329050451161D+01 + px(30)= 0.29108493179186889815360617341447733D+01 + px(31)= 0.34807029996457758665062194913386789D+01 + px(32)= 0.41614831892896507985708808145127823D+01 + px(33)= 0.49752895914290239340663575251573810D+01 + px(34)= 0.59491683086745559122027634515663744D+01 + px(35)= 0.71166577579837023101788394059159047D+01 + px(36)= 0.85201446660709564225208283238116453D+01 + px(37)= 0.10214688587324610607847909840137625D+02 + px(38)= 0.12274723743508615874901666750507663D+02 + px(39)= 0.14807003938009410302904866873102566D+02 + px(40)= 0.17979187958243548948159702687043315D+02 + px(41)= 0.22097374571880291313408725390611447D+02 + px(42)= 0.27914546740390271130933515005738687D+02 + pw( 1)= 0.87071685062486345289172157206777171D-03 + pw( 2)= 0.20417105878963868357298871515770460D-02 + pw( 3)= 0.32508535230574150323908782582998050D-02 + pw( 4)= 0.45198431212516341141392418533095799D-02 + pw( 5)= 0.58752408889593859079959250826252330D-02 + pw( 6)= 0.73486832934924937540882060804195150D-02 + pw( 7)= 0.89786045784956752332612520710862566D-02 + pw( 8)= 0.10812279756759579335639253148771645D-01 + pw( 9)= 0.12908056239717444457676015564846892D-01 + pw(10)= 0.15337623022218768136530241218842739D-01 + pw(11)= 0.18188143417513304840817866168341234D-01 + pw(12)= 0.21564159053085002516938026776853287D-01 + pw(13)= 0.25589384327944258948303670138994570D-01 + pw(14)= 0.30408768217117848936156421895538494D-01 + pw(15)= 0.36191344708155839281431088511826248D-01 + pw(16)= 0.43134326776658801296020544503200249D-01 + pw(17)= 0.51468687405796338041097105695766697D-01 + pw(18)= 0.61466278505771427877600196882159870D-01 + pw(19)= 0.73448477231342626126309188685807545D-01 + pw(20)= 0.87796417742468560052699023987407758D-01 + pw(21)= 0.10496300171125714320281462758628502D+00 + pw(22)= 0.12548703017795381911118827176515123D+00 + pw(23)= 0.15000994875313221416387070158771740D+00 + pw(24)= 0.17929586510892162331076812701124762D+00 + pw(25)= 0.21425571841544983392913772225490756D+00 + pw(26)= 0.25597680754299265400920520155620068D+00 + pw(27)= 0.30575939808892163354965075699382712D+00 + pw(28)= 0.36516295819168300959340602426394554D+00 + pw(29)= 0.43606594548816172375527529406447798D+00 + pw(30)= 0.52074538635856855968547701789163874D+00 + pw(31)= 0.62198649880275266624161896479324475D+00 + pw(32)= 0.74323973657036909817079545894769618D+00 + pw(33)= 0.88885572273739267168333040860537136D+00 + pw(34)= 0.10644536078580082753759483481056876D+01 + pw(35)= 0.12775289785980491630656102334631630D+01 + pw(36)= 0.15385165702010908217004674883905600D+01 + pw(37)= 0.18627791456676687262611182457669180D+01 + pw(38)= 0.22746636192228746631618234661193503D+01 + pw(39)= 0.28167854835143406837432208820997977D+01 + pw(40)= 0.35751353025579259567249078677450433D+01 + pw(41)= 0.47676889791677097956471776553934610D+01 + pw(42)= 0.72714575121908501417344894592801099D+01 +endif +if(kn == 43) then + px( 1)= 0.33096860786757750154898847120917057D-03 + px( 2)= 0.17499208566123461094898550046151874D-02 + px( 3)= 0.43278154096847331355188904178071447D-02 + px( 4)= 0.81096003646317825364067625754120221D-02 + px( 5)= 0.13162623744349233672062735003036974D-01 + px( 6)= 0.19580760979979784765554480104376286D-01 + px( 7)= 0.27489609951663532195103068673087354D-01 + px( 8)= 0.37053342869799653592283933396820960D-01 + px( 9)= 0.48483444452707627033448163720522761D-01 + px(10)= 0.62049459953072389488156028383179189D-01 + px(11)= 0.78091737934971368286739663026054961D-01 + px(12)= 0.97036017163386886705197231591959380D-01 + px(13)= 0.11940967168918372175774848212559756D+00 + px(14)= 0.14585958712300992040134315459436264D+00 + px(15)= 0.17717200022543900754299287107723618D+00 + px(16)= 0.21429507577429224752942159606670995D+00 + px(17)= 0.25836535107930525415060613221934697D+00 + px(18)= 0.31073935543877479582966549813156744D+00 + px(19)= 0.37303174367650550564362643090028699D+00 + px(20)= 0.44716127946900452807387231860775343D+00 + px(21)= 0.53540606537110705599704958284246183D+00 + px(22)= 0.64046959170961720285075028881968045D+00 + px(23)= 0.76555947611245457114222548768598476D+00 + px(24)= 0.91448119032995475750346963701721081D+00 + px(25)= 0.10917496366295753151128433644125688D+01 + px(26)= 0.13027221869155879323859021531153006D+01 + px(27)= 0.15537578204695570075088633454278321D+01 + px(28)= 0.18524084405877676482171967152697008D+01 + px(29)= 0.22076505684206197863173711190960219D+01 + px(30)= 0.26301688303884504911030778880813732D+01 + px(31)= 0.31327077074207879423269172652003364D+01 + px(32)= 0.37305161917200709143270025763858640D+01 + px(33)= 0.44419236396092482776254163193454757D+01 + px(34)= 0.52891086092067566005114585507151512D+01 + px(35)= 0.62991644484541209348967251319712091D+01 + px(36)= 0.75056437278338836321053009023954332D+01 + px(37)= 0.89509179672327616775374703555124487D+01 + px(38)= 0.10690015208487653327362214867738207D+02 + px(39)= 0.12797352129263264861734948864857745D+02 + px(40)= 0.15379744394428552764554128123427935D+02 + px(41)= 0.18605142714959248273487455172506479D+02 + px(42)= 0.22780586028957730000926904102977113D+02 + px(43)= 0.28662366237232832381924388811320297D+02 + pw( 1)= 0.85006145870848429334757469982573891D-03 + pw( 2)= 0.19925838109657703878469708707415329D-02 + pw( 3)= 0.31706135365278565227115398483457554D-02 + pw( 4)= 0.44041172343279662518790702958553625D-02 + pw( 5)= 0.57174896648731729427179253475841935D-02 + pw( 6)= 0.71396041280184287136596813041547263D-02 + pw( 7)= 0.87053105293648014281590926123488040D-02 + pw( 8)= 0.10457223593582426133616067442291247D-01 + pw( 9)= 0.12447691003362071183889633075575652D-01 + pw(10)= 0.14740821116468595319728157346456377D-01 + pw(11)= 0.17414420402114891406970230872875573D-01 + pw(12)= 0.20561736545282396913149978083028889D-01 + pw(13)= 0.24293057998247351709738591334952020D-01 + pw(14)= 0.28737438822145949516145087089517893D-01 + pw(15)= 0.34044977279791239841839057487094614D-01 + pw(16)= 0.40390072912737209513481740353650117D-01 + pw(17)= 0.47975933504865389303614731289416436D-01 + pw(18)= 0.57040421240269648291641111667509080D-01 + pw(19)= 0.67863232374253637764436196090399013D-01 + pw(20)= 0.80774426563059878523617995686503505D-01 + pw(21)= 0.96164420051293883535914595526514204D-01 + pw(22)= 0.11449568019439733156432157598398321D+00 + pw(23)= 0.13631648279559252800439663886965342D+00 + pw(24)= 0.16227722290947194423527738296713650D+00 + pw(25)= 0.19314992693280818601029240961091576D+00 + pw(26)= 0.22985183401280394837726875217480564D+00 + pw(27)= 0.27347424857196858115127321375763169D+00 + pw(28)= 0.32531839177175415639431272795218179D+00 + pw(29)= 0.38694082912221267186717847996208730D+00 + pw(30)= 0.46021245154573169215949688893911839D+00 + pw(31)= 0.54739734403912358955407316606691874D+00 + pw(32)= 0.65126193729202891760603881973382504D+00 + pw(33)= 0.77523203175189474974013051456241330D+00 + pw(34)= 0.92362846689464044743151918155531626D+00 + pw(35)= 0.11020374135362457925484267398369432D+01 + pw(36)= 0.13179220978816197700516342241847467D+01 + pw(37)= 0.15816923933252917714931130477654841D+01 + pw(38)= 0.19087060424955494066311220499293247D+01 + pw(39)= 0.23233483559535813743753279936284135D+01 + pw(40)= 0.28683678978287494020672145638838434D+01 + pw(41)= 0.36301191889236335565768841054222539D+01 + pw(42)= 0.48276614242022139042477288493084378D+01 + pw(43)= 0.73428236438762783138460481364952627D+01 +endif +if(kn == 44) then + px( 1)= 0.32331350836957546603744084419849788D-03 + px( 2)= 0.17091703169716826666769331280363544D-02 + px( 3)= 0.42257875064315455206561743770986376D-02 + px( 4)= 0.79149690832372160827916911872302865D-02 + px( 5)= 0.12839179023186853233140061142661062D-01 + px( 6)= 0.19085232241568949577899855509474977D-01 + px( 7)= 0.26768883186791699077493305776558926D-01 + px( 8)= 0.36040864557027892650839059295103373D-01 + px( 9)= 0.47094585778667144264845412823636449D-01 + px(10)= 0.60175615761544264401796703227341314D-01 + px(11)= 0.75592961225388113581704029754538741D-01 + px(12)= 0.93732029387412788080736336661325883D-01 + px(13)= 0.11506910593622805003749038841255850D+00 + px(14)= 0.14018727035639085866142640394142696D+00 + px(15)= 0.16979393946354437271616691990993614D+00 + px(16)= 0.20474060430292067802523043906109989D+00 + px(17)= 0.24604566740448100845633933054720796D+00 + px(18)= 0.29492149096685690036119497895105667D+00 + px(19)= 0.35280682708785925696562700896642017D+00 + px(20)= 0.42140579664772257270434756016372204D+00 + px(21)= 0.50273460611520704557595850856713180D+00 + px(22)= 0.59917729885865279633574477547906320D+00 + px(23)= 0.71355204803416102949757204396579206D+00 + px(24)= 0.84918981260514039761248477195520438D+00 + px(25)= 0.10100276016887074982125673033831345D+01 + px(26)= 0.12007191488994777606413935958511916D+01 + px(27)= 0.14267665395522811775994265815554724D+01 + px(28)= 0.16946773518359189852395063278947950D+01 + px(29)= 0.20121533231038550963171865171844820D+01 + px(30)= 0.23883186915697571717337880438736199D+01 + px(31)= 0.28339996248002878664983052972434529D+01 + px(32)= 0.33620712713198530053650295556681017D+01 + px(33)= 0.39878972622760453246577762221089257D+01 + px(34)= 0.47299002992789324698627794306541718D+01 + px(35)= 0.56103262027415485179449762966436919D+01 + px(36)= 0.66563061139504917392538125507749794D+01 + px(37)= 0.79014003881557845126238132373008625D+01 + px(38)= 0.93879629282953731177093056973506411D+01 + px(39)= 0.11170992458953013913700288894894255D+02 + px(40)= 0.13324895111520577881664368628734089D+02 + px(41)= 0.15956559624176004069472090284905753D+02 + px(42)= 0.19234241988629565032886618545011883D+02 + px(43)= 0.23465930980103016989495364982244146D+02 + px(44)= 0.29411210662212999597346209126697707D+02 + pw( 1)= 0.83036884987403513181911166534208663D-03 + pw( 2)= 0.19457941864115460112729277032771097D-02 + pw( 3)= 0.30943299472937825975912487677647078D-02 + pw( 4)= 0.42943871620956718877444401513682917D-02 + pw( 5)= 0.55684261348013693314562032089920088D-02 + pw( 6)= 0.69428725530123470368722128932621244D-02 + pw( 7)= 0.84494234604398957532432243635753483D-02 + pw( 8)= 0.10126616172976027067397699017267216D-01 + pw( 9)= 0.12021566160747622141057949671782224D-01 + pw(10)= 0.14191778414242213809164508501045497D-01 + pw(11)= 0.16706905485358088359523445571590018D-01 + pw(12)= 0.19650346938023694904494456157357191D-01 + pw(13)= 0.23120695042785977245044097938079732D-01 + pw(14)= 0.27233209875082837015975056631389154D-01 + pw(15)= 0.32121665600246232435212867479076854D-01 + pw(16)= 0.37940949845194110884289250317530407D-01 + pw(17)= 0.44870698984096406199701289089460328D-01 + pw(18)= 0.53120093303752446444612781898982047D-01 + pw(19)= 0.62933823516449817737731500047201570D-01 + pw(20)= 0.74599224373678150636698930396457382D-01 + pw(21)= 0.88454634536330462816566048592135618D-01 + pw(22)= 0.10489914083790778708121373941299966D+00 + pw(23)= 0.12440396953470249784020277878699461D+00 + pw(24)= 0.14752589155074737609776847913675882D+00 + pw(25)= 0.17492312579420798336376479913219788D+00 + pw(26)= 0.20737437787760038519638439806427540D+00 + pw(27)= 0.24580187433567244121381237836878940D+00 + pw(28)= 0.29129959389673368311222620480570831D+00 + pw(29)= 0.34516843605203008284533614069195425D+00 + pw(30)= 0.40896093500380134048288051036096437D+00 + pw(31)= 0.48453955284019804235911876681118743D+00 + pw(32)= 0.57415497731524515579134334664918167D+00 + pw(33)= 0.68055495684587130214889340452349758D+00 + pw(34)= 0.80714146055673592921708826105860249D+00 + pw(35)= 0.95820721891454345652447310233062356D+00 + pw(36)= 0.11393080348437704220650007990276423D+01 + pw(37)= 0.13578783180672521841897407574039451D+01 + pw(38)= 0.16243074162684344243952826001273292D+01 + pw(39)= 0.19539528530618256474917072512577991D+01 + pw(40)= 0.23712430126501098428281754997758214D+01 + pw(41)= 0.29190630527561174415810627420409994D+01 + pw(42)= 0.36841316877299013922727992267156054D+01 + pw(43)= 0.48865818259133309055220603437140743D+01 + pw(44)= 0.74130021614058319715789731577046943D+01 +endif +if(kn == 45) then + px( 1)= 0.31600628834973779292559127048190037D-03 + px( 2)= 0.16702900106190273328935895866739893D-02 + px( 3)= 0.41285257114804008864642830706317964D-02 + px( 4)= 0.77296636135218507544800673352591724D-02 + px( 5)= 0.12531750213154611452085965505028540D-01 + px( 6)= 0.18615241131540077290889348326202186D-01 + px( 7)= 0.26087066631843252886061485477515189D-01 + px( 8)= 0.35085969566222178513934460224332887D-01 + px( 9)= 0.45789306577033888813174278261897663D-01 + px(10)= 0.58421434435784388362459047968056087D-01 + px(11)= 0.73263711622882621260808485115770070D-01 + px(12)= 0.90666037430716786669379135384670844D-01 + px(13)= 0.11105978210403381887459852100321712D+00 + px(14)= 0.13497200418547898709664745719407710D+00 + px(15)= 0.16304104546216750230118904547804422D+00 + px(16)= 0.19603390202336562386932779699909816D+00 + px(17)= 0.23486608544820513725469302847128384D+00 + px(18)= 0.28062490631545969684155812688765467D+00 + px(19)= 0.33459720050887095268667406972580638D+00 + px(20)= 0.39830252448023971951147973257838648D+00 + px(21)= 0.47353284765622731594152923198004578D+00 + px(22)= 0.56239982933240531713761978532829046D+00 + px(23)= 0.66739091019498584233201402129369733D+00 + px(24)= 0.79143567909811363215071199148744656D+00 + px(25)= 0.93798429449898007763809445265874043D+00 + px(26)= 0.11111001581385252565298438994097319D+01 + px(27)= 0.13155695863345333806714751926528375D+01 + px(28)= 0.15570319587074322621285835341493748D+01 + px(29)= 0.18421348417902183390620002381054988D+01 + px(30)= 0.21787200444979152172206336828110265D+01 + px(31)= 0.25760487257631498506605264603704116D+01 + px(32)= 0.30450769803228709332081128619189764D+01 + px(33)= 0.35987985256158263799485414495379126D+01 + px(34)= 0.42526795117768400994862312123212388D+01 + px(35)= 0.50252244422171176606674140586086445D+01 + px(36)= 0.59387361521365530542536690567593329D+01 + px(37)= 0.70203754298618509877391417240792276D+01 + px(38)= 0.83037051949719291418317245580593492D+01 + px(39)= 0.98310601809174348734008913125923897D+01 + px(40)= 0.11657412426662143324540788749464127D+02 + px(41)= 0.13857164426266931434200566791289955D+02 + px(42)= 0.16537288540540952756080764303901777D+02 + px(43)= 0.19866358206258941542582357564294186D+02 + px(44)= 0.24153320185396072162240284411839110D+02 + px(45)= 0.30161032818176703094230836042654050D+02 + pw( 1)= 0.81157322026241886094392714295783692D-03 + pw( 2)= 0.19011777475163676304804330211208542D-02 + pw( 3)= 0.30217126568442239903479316417011601D-02 + pw( 4)= 0.41901870013204838998208477401165665D-02 + pw( 5)= 0.54273283828323581210343431353844446D-02 + pw( 6)= 0.67573866388465515363650925512746062D-02 + pw( 7)= 0.82092751225427288897814574011554460D-02 + pw( 8)= 0.98179504955536258107199257981433439D-02 + pw( 9)= 0.11625949429528062575538287604998941D-01 + pw(10)= 0.13685006654390859174410475886206370D-01 + pw(11)= 0.16057648452306267238525538418198228D-01 + pw(12)= 0.18818661412340581662760322741958838D-01 + pw(13)= 0.22056412054981119160700553625749748D-01 + pw(14)= 0.25874134633155885709619608590472202D-01 + pw(15)= 0.30391451859719384524106580753360271D-01 + pw(16)= 0.35746460932048448724214073937351505D-01 + pw(17)= 0.42098664685115079575098628197326142D-01 + pw(18)= 0.49632898864791727296990224145235518D-01 + pw(19)= 0.58564289944594589064829987997677920D-01 + pw(20)= 0.69144234514984419908558444457124034D-01 + pw(21)= 0.81667423637721859042077146566843517D-01 + pw(22)= 0.96480011247988074883978780802066204D-01 + pw(23)= 0.11398911344507944971212279753122991D+00 + pw(24)= 0.13467391267458107409324694436955753D+00 + pw(25)= 0.15909873207292332703499649578337212D+00 + pw(26)= 0.18792855598692739186974920544448162D+00 + pw(27)= 0.22194762553095863671304473597844931D+00 + pw(28)= 0.26208196494751982936477406183051153D+00 + pw(29)= 0.30942704406840347450420829122426871D+00 + pw(30)= 0.36528233311686115136869183803961988D+00 + pw(31)= 0.43119539125683582249214467187083528D+00 + pw(32)= 0.50901957973734783536801637831942790D+00 + pw(33)= 0.60099191357850333533977421143095798D+00 + pw(34)= 0.70984171451926250865508439176773176D+00 + pw(35)= 0.83894803839540918949226578045417203D+00 + pw(36)= 0.99257719538081268693583754948673040D+00 + pw(37)= 0.11762571380368294152931731126534612D+01 + pw(38)= 0.13973967807100820386062004887130792D+01 + pw(39)= 0.16663689232723581345802345799199851D+01 + pw(40)= 0.19985351917633034604206124546990186D+01 + pw(41)= 0.24183711599931949498296065604711404D+01 + pw(42)= 0.29689015304223315699189706042837967D+01 + pw(43)= 0.37372089827137538865937716176457237D+01 + pw(44)= 0.49444904398530770204722137069339350D+01 + pw(45)= 0.74820372303276097896128738491191685D+01 +endif +if(kn == 46) then + px( 1)= 0.30902377399120893788354424898855426D-03 + px( 2)= 0.16331537506671398843215268321368443D-02 + px( 3)= 0.40357010980175320995569579449408174D-02 + px( 4)= 0.75530195763806797812195436379199476D-02 + px( 5)= 0.12239149854724502952182976196758722D-01 + px( 6)= 0.18168803621919060471207612598191755D-01 + px( 7)= 0.25440976363694145320552734001486569D-01 + px( 8)= 0.34183679422904259346408703316308100D-01 + px( 9)= 0.44559964643964573543760075913259475D-01 + px(10)= 0.56775358517699538859278511856645348D-01 + px(11)= 0.71086748757462020933356688085151754D-01 + px(12)= 0.87812673145710441814424785984121246D-01 + px(13)= 0.10734488854150171164189046430233515D+00 + px(14)= 0.13016110718036063404346759877348097D+00 + px(15)= 0.15683892196176944176774935350653314D+00 + px(16)= 0.18807118924767432734960867652772029D+00 + px(17)= 0.22468341948583423108691031411123462D+00 + px(18)= 0.26765394743983501400088905846999460D+00 + px(19)= 0.31813776531045013135926232571664330D+00 + px(20)= 0.37749492399880062738311332663588965D+00 + px(21)= 0.44732440247018853385136813722855067D+00 + px(22)= 0.52950437210775343555603255060398518D+00 + px(23)= 0.62623987488687805361419457851831370D+00 + px(24)= 0.74011910006308993968762196795007188D+00 + px(25)= 0.87417968361959824522374584708126947D+00 + px(26)= 0.10319867719667534605241559659266134D+01 + px(27)= 0.12177250028776661981781648200867157D+01 + px(28)= 0.14363070973204841479279696750353849D+01 + px(29)= 0.16935024864806789035793857077605729D+01 + px(30)= 0.19960904185444808460178685393801009D+01 + px(31)= 0.23520434618090888941520463681053941D+01 + px(32)= 0.27707495103418382337989450786653183D+01 + px(33)= 0.32632837492864188626066742354315931D+01 + px(34)= 0.38427473041855077266339137871017428D+01 + px(35)= 0.45246978009253960226440639549407685D+01 + px(36)= 0.53277111750428588477775437304742452D+01 + px(37)= 0.62741382355089597665586817773799499D+01 + px(38)= 0.73911624234084726194161817156844889D+01 + px(39)= 0.87123449938120379656808053147144537D+01 + px(40)= 0.10280000673796667050592943428615469D+02 + px(41)= 0.12149077777983097408939533602647387D+02 + px(42)= 0.14393981931423795298134131653931530D+02 + px(43)= 0.17121779022116518315203538273624565D+02 + px(44)= 0.20501370994989532793115106347627070D+02 + px(45)= 0.24842669385551363138030331359754804D+02 + px(46)= 0.30911787987864913738727989247192145D+02 + pw( 1)= 0.79361463897360195929686273302248409D-03 + pw( 2)= 0.18585856237700307630029880256700761D-02 + pw( 3)= 0.29524996535416808392477309033103772D-02 + pw( 4)= 0.40910988545579399083185141944050702D-02 + pw( 5)= 0.52935540515794390997929859327326122D-02 + pw( 6)= 0.65821742586378490181117941215141045D-02 + pw( 7)= 0.79834063288880762331149927032946792D-02 + pw( 8)= 0.95290513069482303557158918089126201D-02 + pw( 9)= 0.11257625314697109024742951848074159D-01 + pw(10)= 0.13215805246967286000173950239611213D-01 + pw(11)= 0.15459871723048351392169871195100160D-01 + pw(12)= 0.18057057593147761999943506400696980D-01 + pw(13)= 0.21086760440076034016469007977829453D-01 + pw(14)= 0.24641687279129405797304341158994351D-01 + pw(15)= 0.28829130387470289935818007648098355D-01 + pw(16)= 0.33772655230285352209194232463306431D-01 + pw(17)= 0.39614465986840122572670076622475638D-01 + pw(18)= 0.46518617394519992951670269787487787D-01 + pw(19)= 0.54675131186708511535131483491430821D-01 + pw(20)= 0.64305014007496806948186778346285377D-01 + pw(21)= 0.75666179654741774777744798242374952D-01 + pw(22)= 0.89060332032660676125924796117040007D-01 + pw(23)= 0.10484093752103114356647229133814935D+00 + pw(24)= 0.12342248951027967461470307235875341D+00 + pw(25)= 0.14529134199630383752180561542530020D+00 + pw(26)= 0.17101847250539549716018144547422270D+00 + pw(27)= 0.20127464277089058198410341836023683D+00 + pw(28)= 0.23684858001359149093932878615193101D+00 + pw(29)= 0.27866903348006793962100039913921415D+00 + pw(30)= 0.32783191849208153321924464180174769D+00 + pw(31)= 0.38563432289935890460881756854144066D+00 + pw(32)= 0.45361805217372703197887339399899876D+00 + pw(33)= 0.53362686058844350813392769068823926D+00 + pw(34)= 0.62788396771362463542546047416846800D+00 + pw(35)= 0.73910064578742344795091934329070699D+00 + pw(36)= 0.87063402416829911198578365788259413D+00 + pw(37)= 0.10267256741807495552845728772900802D+01 + pw(38)= 0.12128781106626951287021151112542470D+01 + pw(39)= 0.14364778560404412196974965116370614D+01 + pw(40)= 0.17078848068460459936312054922566995D+01 + pw(41)= 0.20424685786283419841913953641602057D+01 + pw(42)= 0.24647555529206493569854377641616308D+01 + pw(43)= 0.30179124308683067944200720815335092D+01 + pw(44)= 0.37893852356394974569134386395867514D+01 + pw(45)= 0.50014251355936200780723745695031871D+01 + pw(46)= 0.75499704094498894150628414754712103D+01 +endif +if(kn == 47) then + px( 1)= 0.30234480684949870042066556848012510D-03 + px( 2)= 0.15976464953770958135599778441796202D-02 + px( 3)= 0.39470145814685269106194100213451950D-02 + px( 4)= 0.73844351214347704596731706115099534D-02 + px( 5)= 0.11960307325670953208852468939751834D-01 + px( 6)= 0.17744140854528926114461559850667081D-01 + px( 7)= 0.24827775421109866384949286855042231D-01 + px( 8)= 0.33329587337597891764863922994014555D-01 + px( 9)= 0.43399840203304717839217570161011672D-01 + px(10)= 0.55227290263964079065971805634237348D-01 + px(11)= 0.69047097585245923081163450963219921D-01 + px(12)= 0.85150014954361349896311981444336255D-01 + px(13)= 0.10389275615680304450176721539326430D+00 + px(14)= 0.12570943253264044220914087598374078D+00 + px(15)= 0.15112403488297752197534393801499475D+00 + px(16)= 0.18076413027130191024242456354592003D+00 + px(17)= 0.21537618776652269918043941270387835D+00 + px(18)= 0.25584316244558279667676325421037561D+00 + px(19)= 0.30320509530959575930710602679134976D+00 + px(20)= 0.35868352704264273864602902528848421D+00 + px(21)= 0.42371052026058185226325968835995444D+00 + px(22)= 0.49996309228509504682760172037312828D+00 + px(23)= 0.58940391513730639900316144249394630D+00 + px(24)= 0.69432925544472178824662119719828653D+00 + px(25)= 0.81742530558526965352748082134155990D+00 + px(26)= 0.96183429918308909086642705501460506D+00 + px(27)= 0.11312321171355587417637105018144043D+01 + px(28)= 0.13299194954977380580964549469885521D+01 + px(29)= 0.15629294821033111247460192707385455D+01 + px(30)= 0.18361545182421012128782235750936341D+01 + px(31)= 0.21564975473013180685304927966921050D+01 + px(32)= 0.25320530389913426066138968271032273D+01 + px(33)= 0.29723260351325192884401956439321237D+01 + px(34)= 0.34885007189662000039152029330111157D+01 + px(35)= 0.40937753452277820469237089959367401D+01 + px(36)= 0.48037889682999426543864264911219419D+01 + px(37)= 0.56371796552402367761125198516216112D+01 + px(38)= 0.66163382295353580459275111274902267D+01 + px(39)= 0.77684648059929853861219213698259439D+01 + px(40)= 0.91271156012149482363243828361549091D+01 + px(41)= 0.10734585093601096152552856517558275D+02 + px(42)= 0.12645801076306643342528896213826369D+02 + px(43)= 0.14935178894323617316649392464504258D+02 + px(44)= 0.17709887244612807742256277065719222D+02 + px(45)= 0.21139166627448714331281414920298006D+02 + px(46)= 0.25533898913637103231788330664255958D+02 + px(47)= 0.31663433742152343555689779493983534D+02 + pw( 1)= 0.77643840612392469558233568878288438D-03 + pw( 2)= 0.18178823313702654997790171434491737D-02 + pw( 3)= 0.28864536542127938374932216849800024D-02 + pw( 4)= 0.39967467153526294501560789189634526D-02 + pw( 5)= 0.51665295391631236792192347882343893D-02 + pw( 6)= 0.64163743838733321438814922251353746D-02 + pw( 7)= 0.77705352297994246418018494260403994D-02 + pw( 8)= 0.92580225788462023682293420241402079D-02 + pw( 9)= 0.10913810605238600296521875414785753D-01 + pw(10)= 0.12780129454852165115348470540659794D-01 + pw(11)= 0.14907770873253776058864543333587366D-01 + pw(12)= 0.17357325452550337474212111231031977D-01 + pw(13)= 0.20200301191038969448754000899427036D-01 + pw(14)= 0.23520156080274576647610305543693418D-01 + pw(15)= 0.27413389239659681777619137570103249D-01 + pw(16)= 0.31990921824324695951826704036400849D-01 + pw(17)= 0.37380010931288310349162200884914999D-01 + pw(18)= 0.43726873583833196750460928410582440D-01 + pw(19)= 0.51200100443073974036149329120700021D-01 + pw(20)= 0.59994868260574886373334055836155630D-01 + pw(21)= 0.70337945020122803930065727247244319D-01 + pw(22)= 0.82493514750755835638795021587104324D-01 + pw(23)= 0.96769906495425836064670059167481399D-01 + pw(24)= 0.11352737495114584133685628457043583D+00 + pw(25)= 0.13318714247714292913023639085868125D+00 + pw(26)= 0.15624197763113426788311768418626933D+00 + pw(27)= 0.18326866459826594224170442595536027D+00 + pw(28)= 0.21494282570862350034472593398023850D+00 + pw(29)= 0.25205671631617380517032575867290262D+00 + pw(30)= 0.29554084830882541265073842592331588D+00 + pw(31)= 0.34649066398539118260401510223685881D+00 + pw(32)= 0.40620005573936491411597998731411544D+00 + pw(33)= 0.47620444337408185641294547642135994D+00 + pw(34)= 0.55833761203594537949491272468091006D+00 + pw(35)= 0.65480899160066358211037008014529884D+00 + pw(36)= 0.76831227871509708311671525382157320D+00 + pw(37)= 0.90218370924525609794334519875989345D+00 + pw(38)= 0.10606417731219318086869090064772246D+01 + pw(39)= 0.12491658485426170682375819102086492D+01 + pw(40)= 0.14751229591934938816623767074938966D+01 + pw(41)= 0.17488634335620615325269065049873221D+01 + pw(42)= 0.20857683710668100441568568919092486D+01 + pw(43)= 0.25104181373755596577440533667319287D+01 + pw(44)= 0.30661234529278624717819692903450825D+01 + pw(45)= 0.38406927297086597567375854792414220D+01 + pw(46)= 0.50574215821071962594952640986635070D+01 + pw(47)= 0.76168408553950413361259182112809766D+01 +endif +if(kn == 48) then + px( 1)= 0.29595002899720089598048509729240858D-03 + px( 2)= 0.15636631385928177658857961799981344D-02 + px( 3)= 0.38621935810659565095543543710661065D-02 + px( 4)= 0.72233636458828631437195788212396685D-02 + px( 5)= 0.11694254587023519966120947669484770D-01 + px( 6)= 0.17339652834546167382576155326885531D-01 + px( 7)= 0.24244927054495278219738066751504960D-01 + px( 8)= 0.32519777490587042844934209629601974D-01 + px( 9)= 0.42303000131579013083618262933841541D-01 + px(10)= 0.53768369083049019518863347968872785D-01 + px(11)= 0.67131693007024900369022557250738329D-01 + px(12)= 0.82659034800333661590098251027713411D-01 + px(13)= 0.10067601794995718748717592252167950D+00 + px(14)= 0.12157811670283109994038391671204829D+00 + px(15)= 0.14584188025331271469532975454293184D+00 + px(16)= 0.17403718708094337911584686617663455D+00 + px(17)= 0.20684083223970121872293135877750441D+00 + px(18)= 0.24505195226053213810882336066380088D+00 + px(19)= 0.28960993053758356990356801037005909D+00 + px(20)= 0.34161548376800915367159074480126869D+00 + px(21)= 0.40235563464585336049157109682672131D+00 + px(22)= 0.47333327369938541669100296412222937D+00 + px(23)= 0.55630204160312520623134123151678925D+00 + px(24)= 0.65330734108265942786541154430622162D+00 + px(25)= 0.76673441875356785236785315155850152D+00 + px(26)= 0.89936464142274902321479563230619771D+00 + px(27)= 0.10544413316663370284059399235987637D+01 + px(28)= 0.12357468356208411453551987100600846D+01 + px(29)= 0.14476928958143963682846712437006035D+01 + px(30)= 0.16954269342764845689137922115223447D+01 + px(31)= 0.19849575816646326604339901913933094D+01 + px(32)= 0.23233038211583159605827592291675721D+01 + px(33)= 0.27186736189401563011050998256085975D+01 + px(34)= 0.31806801589007531007613766645355501D+01 + px(35)= 0.37206072396727045282926190726802162D+01 + px(36)= 0.43517407927213647930655693816847034D+01 + px(37)= 0.50897921727561576322594218577457258D+01 + px(38)= 0.59534532497729963738598299894774892D+01 + px(39)= 0.69651478712196520721782791126144665D+01 + px(40)= 0.81520877402662049285349974267562675D+01 + px(41)= 0.95478213967711421151781315680800985D+01 + px(42)= 0.11194623319536797531467272595329506D+02 + px(43)= 0.13147404152263011059682758275653972D+02 + px(44)= 0.15480595341123512509584325553694590D+02 + px(45)= 0.18301477075836510534702330167968349D+02 + px(46)= 0.21779637522565706043537218918652790D+02 + px(47)= 0.26226933347488070796043946213342574D+02 + px(48)= 0.32415929770269557720899017070958329D+02 + pw( 1)= 0.75999449282242525498888775227070420D-03 + pw( 2)= 0.17789442907920029044278935841370033D-02 + pw( 3)= 0.28233592185581488393945265516117315D-02 + pw( 4)= 0.39067912705549311303324714399927545D-02 + pw( 5)= 0.50457409167121635114256894882489535D-02 + pw( 6)= 0.62592215195201857161143217251824231D-02 + pw( 7)= 0.75695311456923674293727515403732119D-02 + pw( 8)= 0.90032044940945145010068536558863271D-02 + pw( 9)= 0.10592085506839418968886878624905230D-01 + pw(10)= 0.12374483314193827446279695880913988D-01 + pw(11)= 0.14396352965559528046529852650738501D-01 + pw(12)= 0.16712429540159363311802064676696312D-01 + pw(13)= 0.19387262265658075348794679025267961D-01 + pw(14)= 0.22496156590284673178523069618335189D-01 + pw(15)= 0.26126125032895265676414993536188666D-01 + pw(16)= 0.30377032282566849853328475248140570D-01 + pw(17)= 0.35363150724079551172748521328984452D-01 + pw(18)= 0.41215303456842402904128482278539693D-01 + pw(19)= 0.48083691328284411416338444917462566D-01 + pw(20)= 0.56141427938196717060539102662208221D-01 + pw(21)= 0.65588776105316823321919603829156998D-01 + pw(22)= 0.76658094133956830129535839504567249D-01 + pw(23)= 0.89619543419632664042116217247700679D-01 + pw(24)= 0.10478766192717970201342332439078370D+00 + pw(25)= 0.12252896123742271255427206161487662D+00 + pw(26)= 0.14327075828486752574672706211131092D+00 + pw(27)= 0.16751151313821774235835757278858330D+00 + pw(28)= 0.19583302163349966411912965960570620D+00 + pw(29)= 0.22891492049551029451481007469340092D+00 + pw(30)= 0.26755212283797206211340817029488241D+00 + pw(31)= 0.31267604416833112127254011348613261D+00 + pw(32)= 0.36538085205037423661137648346933768D+00 + pw(33)= 0.42695655668634922335366944118783047D+00 + pw(34)= 0.49893168999297620297105696955480497D+00 + pw(35)= 0.58312983017364951950857474996168896D+00 + pw(36)= 0.68174673032193870559875685439839375D+00 + pw(37)= 0.79745906091752647870294820280336694D+00 + pw(38)= 0.93358322488211662322908533451921218D+00 + pw(39)= 0.10943162513760950011430094815688344D+01 + pw(40)= 0.12851165678797307366335354072388698D+01 + pw(41)= 0.15133343910778260993594764587330245D+01 + pw(42)= 0.17893135308834332452949764559455513D+01 + pw(43)= 0.21284497101212601640922819767089646D+01 + pw(44)= 0.25553800629427160180922388763367066D+01 + pw(45)= 0.31135609716998794233456050246781466D+01 + pw(46)= 0.38911620012948672120134104867773046D+01 + pw(47)= 0.51125134158119019180657428081205730D+01 + pw(48)= 0.76826855076385203411408423144047970D+01 +endif +if(kn == 49) then + px( 1)= 0.28982169661609259925974979296199715D-03 + px( 2)= 0.15311074628945562334530690364685912D-02 + px( 3)= 0.37809891427061498988417968394892015D-02 + px( 4)= 0.70693075551075721251308809528074262D-02 + px( 5)= 0.11440114051043380640580809174873546D-01 + px( 6)= 0.16953896205505568651585337372141638D-01 + px( 7)= 0.23690155524198397880316141824091359D-01 + px( 8)= 0.31750757826730201808237900276821105D-01 + px( 9)= 0.41264185607363735111209847472395680D-01 + px(10)= 0.52390788296171869395123759706096667D-01 + px(11)= 0.65329088415812943007951010158851832D-01 + px(12)= 0.80323145971104464698417160245071838D-01 + px(13)= 0.97670923692398180646763841694521778D-01 + px(14)= 0.11773356193272553739176999855177642D+00 + px(15)= 0.14094549927124676527452589226795674D+00 + px(16)= 0.16782548211787615810300942831732404D+00 + px(17)= 0.19898867709139685594599685847717735D+00 + px(18)= 0.23516028351061490409755316887724481D+00 + px(19)= 0.27719118480638011914316315022611666D+00 + px(20)= 0.32607625000818758221137748238031822D+00 + px(21)= 0.38297591208948031388506643958671483D+00 + px(22)= 0.44924164526126338307000295982995931D+00 + px(23)= 0.52644597452520216232192817378809657D+00 + px(24)= 0.61641769981161764547143482902113440D+00 + px(25)= 0.72128311119138160917726410382682506D+00 + px(26)= 0.84351411110478227605310059636301640D+00 + px(27)= 0.98598434482611919288728229535556820D+00 + px(28)= 0.11520346774611530676609320335187635D+01 + px(29)= 0.13455496592189661370306885171963069D+01 + px(30)= 0.15710470166859281003411527674326210D+01 + px(31)= 0.18337827389082834345983893278310805D+01 + px(32)= 0.21398750605157030327208430120547271D+01 + px(33)= 0.24964516861943417767039670584804599D+01 + px(34)= 0.29118261213763337159423664681275174D+01 + px(35)= 0.33957112485642962686719816570055312D+01 + px(36)= 0.39594817726282308325228740791813504D+01 + px(37)= 0.46165026215025079679688539374253711D+01 + px(38)= 0.53825491684099448048962771604007379D+01 + px(39)= 0.62763596401674826758369077814162276D+01 + px(40)= 0.73203847840971806343130509762279760D+01 + px(41)= 0.85418435921307030922239809237588315D+01 + px(42)= 0.99742749209607261981040189301013931D+01 + px(43)= 0.11659933902343682924575082774292738D+02 + px(44)= 0.13653717512175632633212031175648009D+02 + px(45)= 0.16030079451319704520212844485652403D+02 + px(46)= 0.18896419514536338819621757969794054D+02 + px(47)= 0.22422681781072229358731374027395797D+02 + px(48)= 0.26921701181159762662101522272189023D+02 + px(49)= 0.33169237709310133472812163437610434D+02 + pw( 1)= 0.74423705557468321912970974468971512D-03 + pw( 2)= 0.17416585473194369192981311717828790D-02 + pw( 3)= 0.27630202767273380717880972293549723D-02 + pw( 4)= 0.38209254857123832686825908868042634D-02 + pw( 5)= 0.49307262850810818969143173604709699D-02 + pw( 6)= 0.61100327216620297105479543777143453D-02 + pw( 7)= 0.73793929142489472766538930130422414D-02 + pw( 8)= 0.87631380820224889920133086400922484D-02 + pw( 9)= 0.10290337290113942038466358200627605D-01 + pw(10)= 0.11995832288470777205381700382547014D-01 + pw(11)= 0.13921304955008894938778083468431898D-01 + pw(12)= 0.16116315842092009170407309532189491D-01 + pw(13)= 0.18639261057732707055179140040607199D-01 + pw(14)= 0.21558238862705300435120252889679016D-01 + pw(15)= 0.24951892641527327511221421263371520D-01 + pw(16)= 0.28910375260277562929269397616107752D-01 + pw(17)= 0.33536621862509534402237961505810400D-01 + pw(18)= 0.38948101356497020534071631819477111D-01 + pw(19)= 0.45279154574446597766189875771988559D-01 + pw(20)= 0.52683958194920854977905259654150493D-01 + pw(21)= 0.61340113116439272718820743217394536D-01 + pw(22)= 0.71452855489322219691816819653732876D-01 + pw(23)= 0.83259918269799951973839847297358939D-01 + pw(24)= 0.97037114546374637815371486877102608D-01 + pw(25)= 0.11310475960635753107742085931930780D+00 + pw(26)= 0.13183509368952202468340313538628788D+00 + pw(27)= 0.15366091498274177979540995552638069D+00 + pw(28)= 0.17908568983953526314195097583061572D+00 + pw(29)= 0.20869548441677582027963091462587493D+00 + pw(30)= 0.24317317246706239890716235938537564D+00 + pw(31)= 0.28331553768501980083448486564952605D+00 + pw(32)= 0.33005413638688135108429388605654924D+00 + pw(33)= 0.38448116653084696134266394851745611D+00 + pw(34)= 0.44788218293156059076871251650381853D+00 + pw(35)= 0.52177844169743715495966266074704591D+00 + pw(36)= 0.60798318304777180959943540325649013D+00 + pw(37)= 0.70867868475109945308012661652305988D+00 + pw(38)= 0.82652519802949145510247997374762292D+00 + pw(39)= 0.96482036576439726113478189858839344D+00 + pw(40)= 0.11277413304728328935012105843530012D+01 + pw(41)= 0.13207276384535346696169033036111123D+01 + pw(42)= 0.15511151994361137315814825047961978D+01 + pw(43)= 0.18292440906220512275179521250816934D+01 + pw(44)= 0.21705274773423284009902208179302011D+01 + pw(45)= 0.25996616983900981131162894691162348D+01 + pw(46)= 0.31602501106700710441030906881720498D+01 + pw(47)= 0.39408219589198821554199569326106068D+01 + pw(48)= 0.51667323905056736314902652284285907D+01 + pw(49)= 0.77475392524477876553320371372226975D+01 +endif +if(kn == 50) then + px( 1)= 0.28394351112953297748994559783102511D-03 + px( 2)= 0.14998911954688725774856401484033629D-02 + px( 3)= 0.37031733634109224832474380329849766D-02 + px( 4)= 0.69218127312350228406485064567062889D-02 + px( 5)= 0.11197087943784824700478952929697149D-01 + px( 6)= 0.16585564985245590131106268275448670D-01 + px( 7)= 0.23161412500517665834595664639261402D-01 + px( 8)= 0.31019403043677038055911010779170235D-01 + px( 9)= 0.40278717647655643037391307728658037D-01 + px(10)= 0.51087642190675989042234002017798487D-01 + px(11)= 0.63629213809160599368347959049041399D-01 + px(12)= 0.78127829475675329032296849094039453D-01 + px(13)= 0.94856775739499734452627500076424815D-01 + px(14)= 0.11414660148335876745918230546444594D+00 + px(15)= 0.13639426440178507981483041317230735D+00 + px(16)= 0.16207305783142124363194134470205347D+00 + px(17)= 0.19174346191330080395615598185222217D+00 + px(18)= 0.22606522557261662878988725676839652D+00 + px(19)= 0.26581112483702385355663297234020894D+00 + px(20)= 0.31188292620679003206265405632135533D+00 + px(21)= 0.36533011131301634013423764015010912D+00 + px(22)= 0.42737191711071555589396273204369958D+00 + px(23)= 0.49942324705020639374541286536052638D+00 + px(24)= 0.58312503667951418472684968751768941D+00 + px(25)= 0.68037972242344487980438298325566643D+00 + px(26)= 0.79339256658260940896953648094143840D+00 + px(27)= 0.92471973446804368904394005417532194D+00 + px(28)= 0.10773242035057135225415445929049064D+01 + px(29)= 0.12546408175407001854792111537007726D+01 + px(30)= 0.14606520990918797361653158063467201D+01 + px(31)= 0.16999768257560608388590285583482666D+01 + px(32)= 0.19779739082773843717702150700803880D+01 + px(33)= 0.23008648456607864889580088643288349D+01 + px(34)= 0.26758790850855905290663568936243531D+01 + px(35)= 0.31114281531421848676256035865658428D+01 + px(36)= 0.36173167280945029265548597355573382D+01 + px(37)= 0.42050023489136640359196188798116726D+01 + px(38)= 0.48879209793922610146974780441291671D+01 + px(39)= 0.56819045073984933891246336517741761D+01 + px(40)= 0.66057308705566818707545720531514149D+01 + px(41)= 0.76818723662188017987959388336177329D+01 + px(42)= 0.89375516656298253147474053069187414D+01 + px(43)= 0.10406296477480503239729452647809424D+02 + px(44)= 0.12130343567869783578305872825995901D+02 + px(45)= 0.14164579787764828196349642136898031D+02 + px(46)= 0.16583487005376427804066458589286026D+02 + px(47)= 0.19494592188944424125485472332517264D+02 + px(48)= 0.23068202782732093092557399540639023D+02 + px(49)= 0.27618134557703623179690912010041751D+02 + px(50)= 0.33923321036009768333728618541703920D+02 + pw( 1)= 0.72912399684308635017096115410335056D-03 + pw( 2)= 0.17059216228519449314704147393870548D-02 + pw( 3)= 0.27052579385463033654518158126252911D-02 + pw( 4)= 0.37388707495255014610097719618737042D-02 + pw( 5)= 0.48210691945239681765906911252349642D-02 + pw( 6)= 0.59681965658636724574936823069437344D-02 + pw( 7)= 0.71992306962236770905536136519431856D-02 + pw( 8)= 0.85365357689533034566484478187242343D-02 + pw( 9)= 0.10006713677868850439161417859632759D-01 + pw(10)= 0.11641531351319812896113767691847514D-01 + pw(11)= 0.13478885680498147705267761047018866D-01 + pw(12)= 0.15563753671054168683182913698539199D-01 + pw(13)= 0.17949077884564351780048811341841315D-01 + pw(14)= 0.20696568103487745734831198958981091D-01 + pw(15)= 0.23877459843864173187990416648243479D-01 + pw(16)= 0.27573340091582106763900700105376174D-01 + pw(17)= 0.31877198204074685512995246247087960D-01 + pw(18)= 0.36894860399462428730109186000241340D-01 + pw(19)= 0.42746921785947037355880124311792745D-01 + pw(20)= 0.49571228532988571977201451960147272D-01 + pw(21)= 0.57525917359503163899312028414966321D-01 + pw(22)= 0.66793006913272852902897497204087823D-01 + pw(23)= 0.77582552855288975335680322806095329D-01 + pw(24)= 0.90137412508095941124740779645748160D-01 + pw(25)= 0.10473870396294021438411474581832967D+00 + pw(26)= 0.12171208319009365070220694175921390D+00 + pw(27)= 0.14143500171709279543618029697282644D+00 + pw(28)= 0.16434515151851001863886624876712371D+00 + pw(29)= 0.19095035999365251682074028234920175D+00 + pw(30)= 0.22184027575178191861381490980142531D+00 + pw(31)= 0.25770029856713080526288855613179455D+00 + pw(32)= 0.29932837398793730342885176518781178D+00 + pw(33)= 0.34765552544247195237609202772613096D+00 + pw(34)= 0.40377138372726537143521552364635440D+00 + pw(35)= 0.46895657668751681273934682099459395D+00 + pw(36)= 0.54472479692767961940426388856919793D+00 + pw(37)= 0.63287890678130081055700714208328112D+00 + pw(38)= 0.73558798250487807944749198073740843D+00 + pw(39)= 0.85549650522124124655227075039375323D+00 + pw(40)= 0.99588443073362582872294183483256617D+00 + pw(41)= 0.11609105355219986209989146786182259D+01 + pw(42)= 0.13559974384949635725484334023159422D+01 + pw(43)= 0.15884690610914334611993355370110858D+01 + pw(44)= 0.18686642910659719608475187905997900D+01 + pw(45)= 0.22120162660997500537540215058314098D+01 + pw(46)= 0.26432826564193229543032457728289448D+01 + pw(47)= 0.32062148179068205963629824250927723D+01 + pw(48)= 0.39897000025706165355011208836568943D+01 + pw(49)= 0.52201085267504392582124308457577559D+01 + pw(50)= 0.78114350904888610285435916161990246D+01 +endif +if(kn == 51) then + px( 1)= 0.27830048331694745146626670725003848D-03 + px( 2)= 0.14699332467589495012214296420135614D-02 + px( 3)= 0.36285373076700958382889506232860957D-02 + px( 4)= 0.67804640421568805932750563900953938D-02 + px( 5)= 0.10964449641475153555650317586921338D-01 + px( 6)= 0.16233474841656748209131399424045480D-01 + px( 7)= 0.22656849606742837535759660703189982D-01 + px( 8)= 0.30322907994226545082257492807954794D-01 + px( 9)= 0.39342419929775285730416663052397862D-01 + px(10)= 0.49852801135275356633188650023787799D-01 + px(11)= 0.62023178388698049878991488778075058D-01 + px(12)= 0.76060329272036397821127225875497380D-01 + px(13)= 0.92215469045402426815676149451909456D-01 + px(14)= 0.11079181963288772179472869891025245D+00 + px(15)= 0.13215289192145648970655359123013550D+00 + px(16)= 0.15673146375949390138182262372756365D+00 + px(17)= 0.18503934411172685168403222212810987D+00 + px(18)= 0.21767815425667758980667415659377157D+00 + px(19)= 0.25535148902919646272821292167480645D+00 + px(20)= 0.29887891101338956958304222194317216D+00 + px(21)= 0.34921226930606420176060402127989888D+00 + px(22)= 0.40745483836472093899131085922052085D+00 + px(23)= 0.47488376920818361679977841816170633D+00 + px(24)= 0.55297635865326739663234570169538373D+00 + px(25)= 0.64344068535303042620990947995897496D+00 + px(26)= 0.74825123790612149826635700308884120D+00 + px(27)= 0.86969026995696913003053787424248180D+00 + px(28)= 0.10103957605313364799613553988071921D+01 + px(29)= 0.11734170393027801618266450011528511D+01 + px(30)= 0.13622793663980858168640978903164547D+01 + px(31)= 0.15810590528845442660473438585783950D+01 + px(32)= 0.18344711002533369402909515294518176D+01 + px(33)= 0.21279718700902070387593931590109607D+01 + px(34)= 0.24678800386227544163678584032999521D+01 + px(35)= 0.28615201544104645116093866929518600D+01 + px(36)= 0.33173946748043709269495501699195293D+01 + px(37)= 0.38453926882082167185869634333011899D+01 + px(38)= 0.44570470958112522799784043488271277D+01 + px(39)= 0.51658576054751924442339769813379872D+01 + px(40)= 0.59877058293844537789530450556075720D+01 + px(41)= 0.69414034941300311714983901016833412D+01 + px(42)= 0.80494397879808445218602940428136405D+01 + px(43)= 0.93390380570666107887290025527994856D+01 + px(44)= 0.10843713859839632308899371210210092D+02 + px(45)= 0.12605686843057786008572981971452711D+02 + px(46)= 0.14679837296994134584444426535907765D+02 + px(47)= 0.17140680920657630892046419784075754D+02 + px(48)= 0.20095878907672599896988947474029035D+02 + px(49)= 0.23716108785958653467610019526488283D+02 + px(50)= 0.28316168939449414612178766745269557D+02 + px(51)= 0.34678144812502309409979036489309933D+02 + pw( 1)= 0.71461661125073618577714141168068574D-03 + pw( 2)= 0.16716385878603382275615092392545966D-02 + pw( 3)= 0.26499087137809206936489894716951881D-02 + pw( 4)= 0.36603737280291944817450300201489801D-02 + pw( 5)= 0.47163932617577667464231453856574222D-02 + pw( 6)= 0.58331641163766286245746932185477944D-02 + pw( 7)= 0.70282510906775725331369422805168084D-02 + pw( 8)= 0.83222573157223186581625603590243717D-02 + pw( 9)= 0.97395848045625290059070106665033066D-02 + pw(10)= 0.11309266338983814689829245251610443D-01 + pw(11)= 0.13065837815189701273276681449362080D-01 + pw(12)= 0.15050206824622500427636027066505286D-01 + pw(13)= 0.17310471604431563174843365788224820D-01 + pw(14)= 0.19902665286202140668402395991957144D-01 + pw(15)= 0.22891446750016473901306940204721219D-01 + pw(16)= 0.26350819616702910168573443501795867D-01 + pw(17)= 0.30365009714194435950502154476602598D-01 + pw(18)= 0.35029644330122852911457703936925401D-01 + pw(19)= 0.40453348181743022646044235704229779D-01 + pw(20)= 0.46759819486523129479791042090399526D-01 + pw(21)= 0.54090403179921190709407057449782309D-01 + pw(22)= 0.62607156912964304588823654347725686D-01 + pw(23)= 0.72496411785130069944054386511386143D-01 + pw(24)= 0.83972854815470973907373202544753383D-01 + pw(25)= 0.97284192841846608857881098157165865D-01 + pw(26)= 0.11271649103721755058596361082284142D+00 + pw(27)= 0.13060031219247571052268358514865611D+00 + pw(28)= 0.15131781797811823631871970343701060D+00 + pw(29)= 0.17531103556726408969705872673699634D+00 + pw(30)= 0.20309154904134215752202092778295388D+00 + pw(31)= 0.23525195397891395078569762059044835D+00 + pw(32)= 0.27247952854009146289412622775860388D+00 + pw(33)= 0.31557274490479512420214647476118810D+00 + pw(34)= 0.36546150205090068791751615370418030D+00 + pw(35)= 0.42323235431744446559543345018447882D+00 + pw(36)= 0.49016062156217930068156275755723687D+00 + pw(37)= 0.56775223265564612092706940030068368D+00 + pw(38)= 0.65779970939872780221136201150769308D+00 + pw(39)= 0.76245925865498574135551621597788489D+00 + pw(40)= 0.88436027001142532199927186614862192D+00 + pw(41)= 0.10267660753266581074365019471728486D+01 + pw(42)= 0.11938185471963720076518527335025432D+01 + pw(43)= 0.13909252171844327541246452342529830D+01 + pw(44)= 0.16254001662180292675140925291992103D+01 + pw(45)= 0.19075834129441296076092154965785291D+01 + pw(46)= 0.22529303361525593567401491175421270D+01 + pw(47)= 0.26862617892200971765670039600369766D+01 + pw(48)= 0.32514778994133779406256785815727141D+01 + pw(49)= 0.40378220858095016239167419409137174D+01 + pw(50)= 0.52726701882528895779137444037618021D+01 + pw(51)= 0.78744042074131644922750515876542347D+01 +endif +if(kn == 52) then + px( 1)= 0.27287878963778979318704099764976979D-03 + px( 2)= 0.14411589143135589309910830756029883D-02 + px( 3)= 0.35568888680936775757132750276695479D-02 + px( 4)= 0.66448808369926751816655531142947976D-02 + px( 5)= 0.10741535203421646235306609286779033D-01 + px( 6)= 0.15896548124765745985567116234974430D-01 + px( 7)= 0.22174792928607557748215031029525379D-01 + px( 8)= 0.29658745389012720604963104746455902D-01 + px( 9)= 0.38451550078535438960644609304592703D-01 + px(10)= 0.48680802154773981897502480211717432D-01 + px(11)= 0.60503099784552369897160088385145451D-01 + px(12)= 0.74109391214600302911242894598461603D-01 + px(13)= 0.89731100249233358933686871661989877D-01 + px(14)= 0.10764697753584156876969725074282997D+00 + px(15)= 0.12819061301398605430851761152295284D+00 + px(16)= 0.15175857698395808054368327545045674D+00 + px(17)= 0.17881924018609623916197948354619579D+00 + px(18)= 0.20992244196465392763256291141326856D+00 + px(19)= 0.24571029757551810503161176844891960D+00 + px(20)= 0.28692952833595684927481466342699473D+00 + px(21)= 0.33444574647578471028054731992057917D+00 + px(22)= 0.38926013793139080772351691924522347D+00 + px(23)= 0.45252898276941636881394275842894023D+00 + px(24)= 0.52558645699160000314972991962565110D+00 + px(25)= 0.60997118585865879765197048254341330D+00 + px(26)= 0.70745707342364955349644867464416164D+00 + px(27)= 0.82008901627275511415852311384357046D+00 + px(28)= 0.95022422122473475017786104441461093D+00 + px(29)= 0.11005799888944176702824780847485411D+01 + px(30)= 0.12742890037078181125625630187828027D+01 + px(31)= 0.14749633979156047676958663456164965D+01 + px(32)= 0.17067691516542664863528589999065341D+01 + px(33)= 0.19745127831074488312044849464771686D+01 + px(34)= 0.22837428183447160072986023000468876D+01 + px(35)= 0.26408692804764055817543358579323038D+01 + px(36)= 0.30533055131576702465080896333143573D+01 + px(37)= 0.35296382295469342100204889068765045D+01 + px(38)= 0.40798340368281393790208425276818839D+01 + px(39)= 0.47154942919829600315975357986809138D+01 + px(40)= 0.54501757769677042873771351203731296D+01 + px(41)= 0.62998036929575447450649046764701569D+01 + px(42)= 0.72832182898545812637798931719272845D+01 + px(43)= 0.84229216047310103819935078476256484D+01 + px(44)= 0.97461351851146406033204591955381092D+01 + px(45)= 0.11286361832018459964955857919026558D+02 + px(46)= 0.13085805530780086242892657357654051D+02 + px(47)= 0.15199343565107891341827069072222156D+02 + px(48)= 0.17701530875675334700619075035054618D+02 + px(49)= 0.20700169445006682515691632852528091D+02 + px(50)= 0.24366312928914848142376439425799257D+02 + px(51)= 0.29015743374923417716784066471530138D+02 + px(52)= 0.35433676271462821390664880501380081D+02 + pw( 1)= 0.70067921248512656450750262380470989D-03 + pw( 2)= 0.16387221028734144344416061643064582D-02 + pw( 3)= 0.25968227302892772688118502909161922D-02 + pw( 4)= 0.35852033221618217540588075672036645D-02 + pw( 5)= 0.46163571334057459414651619546435755D-02 + pw( 6)= 0.57044407177542779516540853577376776D-02 + pw( 7)= 0.68657439258466701284209801337979038D-02 + pw( 8)= 0.81192888642852760066695573485214022D-02 + pw( 9)= 0.94875105621027290016976460295608743D-02 + pw(10)= 0.10997004127430955685924352255220372D-01 + pw(11)= 0.12679313613778005716132640549939369D-01 + pw(12)= 0.14571725519850453422967526162847044D-01 + pw(13)= 0.16718025683719221072849622604121732D-01 + pw(14)= 0.19169191632602931509237238888017571D-01 + pw(15)= 0.21984027773632325146538486813698425D-01 + pw(16)= 0.25229801526425832500489421276402268D-01 + pw(17)= 0.28982985579859711073310312330962579D-01 + pw(18)= 0.33330232727989722055601631534296414D-01 + pw(19)= 0.38369695026986305327273292677722862D-01 + pw(20)= 0.44212758195453754037273486501473222D-01 + pw(21)= 0.50986218113528798819802588773272107D-01 + pw(22)= 0.58834899219859082186151202608595314D-01 + pw(23)= 0.67924711805560266218170048520749527D-01 + pw(24)= 0.78446161793212774417972991998090425D-01 + pw(25)= 0.90618353089338093281832862102708959D-01 + pw(26)= 0.10469355154807580861182066828267783D+00 + pw(27)= 0.12096240801949377693281668581524884D+00 + pw(28)= 0.13975996681192581185584791335037507D+00 + pw(29)= 0.16147261854647111561641477797041919D+00 + pw(30)= 0.18654619771836799217214159212739546D+00 + pw(31)= 0.21549548172910307786548214775429476D+00 + pw(32)= 0.24891542853693721010435838752243091D+00 + pw(33)= 0.28749460731095023155833868404490821D+00 + pw(34)= 0.33203145035164386811290281590906319D+00 + pw(35)= 0.38345421620808062938143753495150014D+00 + pw(36)= 0.44284595340052667020006768746408026D+00 + pw(37)= 0.51147637334768443075077758376770258D+00 + pw(38)= 0.59084351705391666304914198628651331D+00 + pw(39)= 0.68272966822652633355509618616923460D+00 + pw(40)= 0.78927854268555272214405511831212374D+00 + pw(41)= 0.91310513570381533776392939171880649D+00 + pw(42)= 0.10574572006171471782817226512789618D+01 + pw(43)= 0.12264611065174215544931924431769419D+01 + pw(44)= 0.14255110268002246494143307711209540D+01 + pw(45)= 0.16619131890762190850734830449377183D+01 + pw(46)= 0.19460108584649183568553387986228161D+01 + pw(47)= 0.22932836878593317486878267058034769D+01 + pw(48)= 0.27286173208169332405445956554845995D+01 + pw(49)= 0.32960612083291505532244371231002347D+01 + pw(50)= 0.40852129572893008083223241182607456D+01 + pw(51)= 0.53244443718708726283429813785502501D+01 + pw(52)= 0.79364763274033460235909669710486787D+01 +endif +if(kn == 53) then + px( 1)= 0.26766570337770727528170071246520492D-03 + px( 2)= 0.14134994888717316196512141359660423D-02 + px( 3)= 0.34880516519107403970758408932728686D-02 + px( 4)= 0.65147144546990128201893001657184119D-02 + px( 5)= 0.10527738385818098548917273507385571D-01 + px( 6)= 0.15573804518451021912970981414950172D-01 + px( 7)= 0.21713726252830234938777297490158659D-01 + px( 8)= 0.29024636763636674603190500526652300D-01 + px( 9)= 0.37602750842526664184644761079770188D-01 + px(10)= 0.47566769031392334409003814047662918D-01 + px(11)= 0.59061976687480610814082596844534784D-01 + px(12)= 0.72265065129161553717681820608077034D-01 + px(13)= 0.87389667514901234333023139285043850D-01 + px(14)= 0.10469256811716735138855696367380459D+00 + px(15)= 0.12448052659100388617653071564391793D+00 + px(16)= 0.14711767656647036665309334227739246D+00 + px(17)= 0.17303351979129595018460716414084922D+00 + px(18)= 0.20273163525968242510069482779745799D+00 + px(19)= 0.23679933260205764383581082765297944D+00 + px(20)= 0.27591857089423386003627193488624766D+00 + px(21)= 0.32087851933017891678519245554349762D+00 + px(22)= 0.37259015544714682612264560035248998D+00 + px(23)= 0.43210329558213462840685308411772863D+00 + px(24)= 0.50062645115169427006331566594320087D+00 + px(25)= 0.57954991863929172590111454477122563D+00 + px(26)= 0.67047254853929904297053555520671948D+00 + px(27)= 0.77523270078396767247020251586833006D+00 + px(28)= 0.89594398099521297070364552635590329D+00 + px(29)= 0.10350364637633983911134434941276054D+01 + px(30)= 0.11953042494029295972874450876803876D+01 + px(31)= 0.13799603766985451495079757379930050D+01 + px(32)= 0.15927003388200029769046949954648636D+01 + px(33)= 0.18377757429440148519213952318668567D+01 + px(34)= 0.21200800469036983170430724963400520D+01 + px(35)= 0.24452488452376621255574183769872919D+01 + px(36)= 0.28197779343346462148849901776278631D+01 + px(37)= 0.32511634739679835132078134711319616D+01 + px(38)= 0.37480701572008120067601493824866792D+01 + px(39)= 0.43205356864545827578744429205984365D+01 + px(40)= 0.49802234963192223418120333779580923D+01 + px(41)= 0.57407413466949074338019856932754499D+01 + px(42)= 0.66180524896462577279982649502155498D+01 + px(43)= 0.76310210235736203134564527928377033D+01 + px(44)= 0.88021583399572758584848067436385530D+01 + px(45)= 0.10158682176971824690390966321630474D+02 + px(46)= 0.11734082305033337839638441179290729D+02 + px(47)= 0.13570548670500422261838718291627263D+02 + px(48)= 0.15722959067717953266995872699981198D+02 + px(49)= 0.18265912835593835999181449267925846D+02 + px(50)= 0.21307358846087134932506642776378880D+02 + px(51)= 0.25018732302848078195180809407087663D+02 + px(52)= 0.29716799310767845023829953456809187D+02 + px(53)= 0.36189883296712405656605145166567620D+02 + pw( 1)= 0.68727895311546056672417632556060663D-03 + pw( 2)= 0.16070919274300689247610198780862261D-02 + pw( 3)= 0.25458627429238886643700982329323737D-02 + pw( 4)= 0.35131488282427671750845547282299960D-02 + pw( 5)= 0.45206512132016346669133873690489887D-02 + pw( 6)= 0.55815803493477684696883612785461534D-02 + pw( 7)= 0.67110727810055987477008256080221629D-02 + pw( 8)= 0.79267274337364857488746447630284210D-02 + pw( 9)= 0.92492158987355566092832344937099689D-02 + pw(10)= 0.10702954335651349887056984538693118D-01 + pw(11)= 0.12316817159979657159477748940914048D-01 + pw(12)= 0.14124861608422353998695630535116428D-01 + pw(13)= 0.16167026688242225669124227980203198D-01 + pw(14)= 0.18489777850309322975434696546792003D-01 + pw(15)= 0.21146695079329887928180449259296579D-01 + pw(16)= 0.24199043898015304232347701717858443D-01 + pw(17)= 0.27716412274949780256699383093693372D-01 + pw(18)= 0.31777522531875989191876025347365721D-01 + pw(19)= 0.36471323604726302437811675512366006D-01 + pw(20)= 0.41898438814168148316223242028468085D-01 + pw(21)= 0.48173004736710585958446082731843080D-01 + pw(22)= 0.55424906910282799399495351801946886D-01 + pw(23)= 0.63802408146079784845031445360740918D-01 + pw(24)= 0.73475174123291445512755683573657914D-01 + pw(25)= 0.84637721415891914195614185274215305D-01 + pw(26)= 0.97513337772431529356043555523731762D-01 + pw(27)= 0.11235854926743224508497153739859653D+00 + pw(28)= 0.12946823338997306958433565807904794D+00 + pw(29)= 0.14918150326264123120255847449750618D+00 + pw(30)= 0.17188851951158054500285261747356087D+00 + pw(31)= 0.19803842749441616169319688491554627D+00 + pw(32)= 0.22814867480320173542334621460640035D+00 + pw(33)= 0.26281604587933027973782593098613157D+00 + pw(34)= 0.30272987012943215567370818850475517D+00 + pw(35)= 0.34868803701893003485156344297085159D+00 + pw(36)= 0.40161671758475143724478782201274920D+00 + pw(37)= 0.46259509691660897469854774936577795D+00 + pw(38)= 0.53288704858998218510998379047232847D+00 + pw(39)= 0.61398266720080789199956346065784397D+00 + pw(40)= 0.70765415497287477021582999321525042D+00 + pw(41)= 0.81603315079286754570846341816085723D+00 + pw(42)= 0.94172096259934976721215747985393842D+00 + pw(43)= 0.10879507869255648374416119995114567D+01 + pw(44)= 0.12588348263164235013060998002290594D+01 + pw(45)= 0.14597555182997943253538598754860232D+01 + pw(46)= 0.16980130743144824162948847512548702D+01 + pw(47)= 0.19839559340047588104729344306844787D+01 + pw(48)= 0.23330898446405588037915551380406672D+01 + pw(49)= 0.27703666290115585782188239169095417D+01 + pw(50)= 0.33399854209196729287026099030910255D+01 + pw(51)= 0.41318959182038353492816091595673578D+01 + pw(52)= 0.53754564214506164841297568531690314D+01 + pw(53)= 0.79976793171584998097855897868836215D+01 +endif +if(kn == 54) then + px( 1)= 0.26264930117348429139290996151281804D-03 + px( 2)= 0.13868906738912660879236605780496720D-02 + px( 3)= 0.34218609389365101881877496771598208D-02 + px( 4)= 0.63896402666404078488978957960735342D-02 + px( 5)= 0.10322496845387460828947292469844905D-01 + px( 6)= 0.15264338738352449380895430335814458D-01 + px( 7)= 0.21272256455056810596723516130341971D-01 + px( 8)= 0.28418500058997028295178690126035519D-01 + px( 9)= 0.36792971921502915811121828397263107D-01 + px(10)= 0.46506297013822420204383294802715589D-01 + px(11)= 0.57693520466708188814704660142883650D-01 + px(12)= 0.70518461365129452425223403139712676D-01 + px(13)= 0.85178722402024364366080809274553805D-01 + px(14)= 0.10191132396313916367597436543864246D+00 + px(15)= 0.12099891148146303511007328280200670D+00 + px(16)= 0.14277649208960370764126858545635037D+00 + px(17)= 0.16763870128060588007507633768412165D+00 + px(18)= 0.19604767989373694212165892917330544D+00 + px(19)= 0.22854173844903711798992185084177464D+00 + px(20)= 0.26574507414867220998786541508194950D+00 + px(21)= 0.30837886575038189865518112878881341D+00 + px(22)= 0.35727409810156122358477142059464743D+00 + px(23)= 0.41338647141242328011007195602712556D+00 + px(24)= 0.47781374735909608618161123128437916D+00 + px(25)= 0.55181589013188334829869817262449587D+00 + px(26)= 0.63683838468354892203676383796234712D+00 + px(27)= 0.73453915990617498625433330235628987D+00 + px(28)= 0.84681961134902351472977037719948889D+00 + px(29)= 0.97586030619425103922072317930864173D+00 + px(30)= 0.11241620640819289778534857523108689D+01 + px(31)= 0.12945932455227004001780815602771556D+01 + px(32)= 0.14904442534576446040956072788546865D+01 + px(33)= 0.17154904765677712078499252639933687D+01 + px(34)= 0.19740651959466252026766816200100743D+01 + px(35)= 0.22711443710983615908928413364407126D+01 + px(36)= 0.26124457644974208978288838251819154D+01 + px(37)= 0.30045456290378285221329470833486270D+01 + px(38)= 0.34550172827009750892780305838794194D+01 + px(39)= 0.39725975078115629937328151923773107D+01 + px(40)= 0.45673891239659119753724152909459628D+01 + px(41)= 0.52511117619579365605498903719507942D+01 + px(42)= 0.60374185967841005869825918871706947D+01 + px(43)= 0.69423059418930191122711841795147301D+01 + px(44)= 0.79846576055279832356713682899398131D+01 + px(45)= 0.91869913294368492995853064476380612D+01 + px(46)= 0.10576519438659101773107722114056823D+02 + px(47)= 0.12186718685254033866362337537527451D+02 + px(48)= 0.14059766727898227283287996455084238D+02 + px(49)= 0.16250545334714196738451761057164415D+02 + px(50)= 0.18833703150856317701270861469808901D+02 + px(51)= 0.21917341606064316865260155485126217D+02 + px(52)= 0.25673282302478359060862571209297198D+02 + px(53)= 0.30419275213506007788817412536110986D+02 + px(54)= 0.36946729449797287732704130194371723D+02 + pw( 1)= 0.67438506760335604670255898997892204D-03 + pw( 2)= 0.15766730795758068292367252096595575D-02 + pw( 3)= 0.24969010052497357618191188751223522D-02 + pw( 4)= 0.34440151585437669876044394575278031D-02 + pw( 5)= 0.44289906416819842278218395665719176D-02 + pw( 6)= 0.54641754472827296169646638159218540D-02 + pw( 7)= 0.65636602656678083773300472655876417D-02 + pw( 8)= 0.77437596216345299454674361542457447D-02 + pw( 9)= 0.90235600454359303552602178356724465D-02 + pw(10)= 0.10425525098028000202217706397525802D-01 + pw(11)= 0.11976141415278903324803398081755125D-01 + pw(12)= 0.13706580129463991816022845593243222D-01 + pw(13)= 0.15653341765432086164765180782288484D-01 + pw(14)= 0.17858856637993859978963097726835278D-01 + pw(15)= 0.20372031993532375434976430093094102D-01 + pw(16)= 0.23248770947110602095264163700609205D-01 + pw(17)= 0.26552527146765384924391807859062013D-01 + pw(18)= 0.30354987338193146683949319082938468D-01 + pw(19)= 0.34736978572221172926784909895879575D-01 + pw(20)= 0.39789676392153673565656352708108709D-01 + pw(21)= 0.45616156632160377923066059864692116D-01 + pw(22)= 0.52333303181858472447283586460111768D-01 + pw(23)= 0.60074068936523280823710913114576385D-01 + pw(24)= 0.68990089360969502777474092821797731D-01 + pw(25)= 0.79254662793149992222714102849062817D-01 + pw(26)= 0.91066132125499050058268572358948574D-01 + pw(27)= 0.10465172413589772775444532528313942D+00 + pw(28)= 0.12027192389960804141840562034167157D+00 + pw(29)= 0.13822548329913942877038198233392259D+00 + pw(30)= 0.15885518710020893698686679081438881D+00 + pw(31)= 0.18255453080015552034691247423767820D+00 + pw(32)= 0.20977550590982571093431208426850228D+00 + pw(33)= 0.24103774649612254567565390373246475D+00 + pw(34)= 0.27693937431815337409878200394680401D+00 + pw(35)= 0.31817000170668241587144774061721975D+00 + pw(36)= 0.36552653149467718606906358503261169D+00 + pw(37)= 0.41993266332639492829188554992634159D+00 + pw(38)= 0.48246342603227571148223509952223773D+00 + pw(39)= 0.55437668888757955166034857478955110D+00 + pw(40)= 0.63715459820483206603034925190427611D+00 + pw(41)= 0.73255947628580611457525194256778522D+00 + pw(42)= 0.84271132620425470830602701627255624D+00 + pw(43)= 0.97019847802255398271823872088809738D+00 + pw(44)= 0.11182405648635278467279403261340421D+01 + pw(45)= 0.12909368955309583178279432436240182D+01 + pw(46)= 0.14936596913948188663729129981816013D+01 + pw(47)= 0.17337048448489904526596452227676677D+01 + pw(48)= 0.20214277264670434357889736680445585D+01 + pw(49)= 0.23723618064324051170579931680799604D+01 + pw(50)= 0.28115262822357354520836275890201874D+01 + pw(51)= 0.33832701615264074051804266553052254D+01 + pw(52)= 0.41778930411553237976334045242367218D+01 + pw(53)= 0.54257303561036728308093456793849096D+01 + pw(54)= 0.80580396891998440461864612470334815D+01 +endif +if(kn == 55) then + px( 1)= 0.25781894164194012434309377930977151D-03 + px( 2)= 0.13612751031594850206775668181403743D-02 + px( 3)= 0.33581698458852455723214904600493989D-02 + px( 4)= 0.62693690413467873882583513720977616D-02 + px( 5)= 0.10125310136665687971355854015542415D-01 + px( 6)= 0.14967346351298576942771412456996075D-01 + px( 7)= 0.20849147888131304033053596047200679D-01 + px( 8)= 0.27838492572990899694403556585128016D-01 + px( 9)= 0.36019520279673976527480271121267550D-01 + px(10)= 0.45495507499439646885138144349015201D-01 + px(11)= 0.56392208694907062019327988961512485D-01 + px(12)= 0.68861794220674272839206633722494427D-01 + px(13)= 0.83087389784370585294971502938757785D-01 + px(14)= 0.99288194535557030324060332733237417D-01 + px(15)= 0.11772513411433556959001042015562167D+00 + px(16)= 0.13870700468575135671370372276360989D+00 + px(17)= 0.16259709487800222734179448321576442D+00 + px(18)= 0.18982033572772041366926851284912359D+00 + px(19)= 0.22087111215348614429940107012361562D+00 + px(20)= 0.25632195206452177680013231303976781D+00 + px(21)= 0.29683337125272468206259039405023979D+00 + px(22)= 0.34316518501958180617486163242847902D+00 + px(23)= 0.39618960592837583486273888758703435D+00 + px(24)= 0.45690644457913117767731277056887464D+00 + px(25)= 0.52646073119585672648955063711186170D+00 + px(26)= 0.60616309005755762712921556869139875D+00 + px(27)= 0.69751323092341496562260118366147217D+00 + px(28)= 0.80222697243582393516449526286850739D+00 + px(29)= 0.92226728168578278287496843309443837D+00 + px(30)= 0.10598799021605760545446954338407357D+01 + px(31)= 0.12176342516911872309482334694729854D+01 + px(32)= 0.13984704081792695653834769855727069D+01 + px(33)= 0.16057531729310064020228982054108760D+01 + px(34)= 0.18433344234846220454903450952844757D+01 + px(35)= 0.21156252611660201772094982169604363D+01 + px(36)= 0.24276798551755632926179359953601148D+01 + px(37)= 0.27852934330452205314765679671405731D+01 + px(38)= 0.31951176405057028717258322130425570D+01 + px(39)= 0.36647976062397870566073557102018905D+01 + px(40)= 0.42031366779907243753664331178612278D+01 + px(41)= 0.48202972331234718143106561639280279D+01 + px(42)= 0.55280496786915488679928146699422870D+01 + px(43)= 0.63400875319084531166198158934082316D+01 + px(44)= 0.72724356766190935880980645516629891D+01 + px(45)= 0.83439939737765991151533907320407321D+01 + px(46)= 0.95772839454414406682247983833388953D+01 + px(47)= 0.10999511253155353900722704602341888D+02 + px(48)= 0.12644139881765944224874013113714956D+02 + px(49)= 0.14553337047736742937007375682160668D+02 + px(50)= 0.16781991883653111960448246164544552D+02 + px(51)= 0.19404807001908978674636447631480543D+02 + px(52)= 0.22530041649957455873120088574563768D+02 + px(53)= 0.26329908156600549662558562174411129D+02 + px(54)= 0.31123139662609753954221361691834122D+02 + px(55)= 0.37704208661211514525413812039019937D+02 + pw( 1)= 0.66197010017150176035001943503791612D-03 + pw( 2)= 0.15473986838973462236228078334406893D-02 + pw( 3)= 0.24498237063923531340555158367628164D-02 + pw( 4)= 0.33776287859319922812719456819175282D-02 + pw( 5)= 0.43411225775038708068974684768107364D-02 + pw( 6)= 0.53518651954260805354105863317050609D-02 + pw( 7)= 0.64229967432330231179415055321225939D-02 + pw( 8)= 0.75696698103592431759756449888418760D-02 + pw( 9)= 0.88095427367590507023412664390713554D-02 + pw(10)= 0.10163325161348951569439668089332658D-01 + pw(11)= 0.11655363240567140813674097334911855D-01 + pw(12)= 0.13314243348515058027067949270881268D-01 + pw(13)= 0.15173386635166955733192695121243376D-01 + pw(14)= 0.17271608028279645041513857207256865D-01 + pw(15)= 0.19653626901657341159725115198646941D-01 + pw(16)= 0.22370543498465587594423534082963911D-01 + pw(17)= 0.25480329057857957803698239091828550D-01 + pw(18)= 0.29048405984491612079844523910580431D-01 + pw(19)= 0.33148404854392620654285014862228099D-01 + pw(20)= 0.37863173170688566168679168381831541D-01 + pw(21)= 0.43286083546221674152033551748574709D-01 + pw(22)= 0.49522660152701866580552473522264330D-01 + pw(23)= 0.56692523854953065740425656620598762D-01 + pw(24)= 0.64931653102898776070657891627893174D-01 + pw(25)= 0.74394966967471873327666467149784361D-01 + pw(26)= 0.85259253137910454276787766290602372D-01 + pw(27)= 0.97726482395605256876361711073457773D-01 + pw(28)= 0.11202756962445724523518796251371951D+00 + pw(29)= 0.12842665975742201914453661139337216D+00 + pw(30)= 0.14722603673926230968313687924515801D+00 + pw(31)= 0.16877177710790063461389831494800560D+00 + pw(32)= 0.19346030041623174522759333491248744D+00 + pw(33)= 0.22174601068744206398277053899636196D+00 + pw(34)= 0.25415028231904386989193251104458640D+00 + pw(35)= 0.29127212894662705701804000206750184D+00 + pw(36)= 0.33380101778748297671825708760982179D+00 + pw(37)= 0.38253247505964083905642476699352139D+00 + pw(38)= 0.43838740189609651989190850364739389D+00 + pw(39)= 0.50243643541811971842045067743697266D+00 + pw(40)= 0.57593132895181935261643585397322053D+00 + pw(41)= 0.66034632680141149455034526996759720D+00 + pw(42)= 0.75743410948623930877746056875550536D+00 + pw(43)= 0.86930350487407763483137649785144665D+00 + pw(44)= 0.99853057127864209173395623916848905D+00 + pw(45)= 0.11483223403291556759761625679375804D+01 + pw(46)= 0.13227664361633706905774477225292859D+01 + pw(47)= 0.15272262858299299746420952422140745D+01 + pw(48)= 0.17689950292216856169402049903686857D+01 + pw(49)= 0.20584365667376688423615789562990280D+01 + pw(50)= 0.24111135465716154769717216299933567D+01 + pw(51)= 0.28521135641444982391089026540840319D+01 + pw(52)= 0.34259355468040848187361378226527105D+01 + pw(53)= 0.42232267265008794416077388243344182D+01 + pw(54)= 0.54752904412621487070917591943555763D+01 + pw(55)= 0.81175842524558588852537391429064074D+01 +endif +if(kn == 56) then + px( 1)= 0.25316296361047986979222704481695825D-03 + px( 2)= 0.13365901075979378376059799633963547D-02 + px( 3)= 0.32968187867088652972746128579161298D-02 + px( 4)= 0.61535889280423947494589016534393146D-02 + px( 5)= 0.99356438145931069871255487366443905D-02 + px( 6)= 0.14681977810167156277573501598213788D-01 + px( 7)= 0.20443111678193472909091734497338465D-01 + px( 8)= 0.27282717458052044439797443722558727D-01 + px( 9)= 0.35279661237909244138393562138044293D-01 + px(10)= 0.44530515113621224893066466054989604D-01 + px(11)= 0.55152581833421451048846493819641517D-01 + px(12)= 0.67287461982589797279652325494947566D-01 + px(13)= 0.81105172698463510127775615113455155D-01 + px(14)= 0.96808802192712951405093425956382214D-01 + px(15)= 0.11463966370426778674305108921559686D+00 + px(16)= 0.13488290707039924587391549901601733D+00 + px(17)= 0.15787356616502069558695735577341045D+00 + px(18)= 0.18400306948311470201893098688372094D+00 + px(19)= 0.21372631172205091315476055948506776D+00 + px(20)= 0.24756945964825417876144186941921348D+00 + px(21)= 0.28613872754179262008712244069824825D+00 + px(22)= 0.33013039508573925156714087895320715D+00 + px(23)= 0.38034235430595768392206349401494352D+00 + px(24)= 0.43768747182717257038476928803614522D+00 + px(25)= 0.50320905100551354990170632643254963D+00 + px(26)= 0.57809868566859133942855647933251305D+00 + px(27)= 0.66371681885395609603081291900385610D+00 + px(28)= 0.76161635782113158520389067408690172D+00 + px(29)= 0.87356975058892174416448481275277481D+00 + px(30)= 0.10015999992219932485646137689436440D+01 + px(31)= 0.11480161722880751230648197166792158D+01 + px(32)= 0.13154540866711925408169656894558134D+01 + px(33)= 0.15069229633419957830339656064577451D+01 + px(34)= 0.17258590324121035302982461790011709D+01 + px(35)= 0.19761872844199233411044622815807802D+01 + px(36)= 0.22623928590392104330658026022941003D+01 + px(37)= 0.25896039617088162412729887555211206D+01 + px(38)= 0.29636887519045083121995707405502705D+01 + px(39)= 0.33913694283389338916129064504517107D+01 + px(40)= 0.38803578612346352983211862453492671D+01 + px(41)= 0.44395187703404766581147350250334258D+01 + px(42)= 0.50790689079212912344545291525845662D+01 + px(43)= 0.58108244499131239038620507188960897D+01 + px(44)= 0.66485146168191270558987216454001789D+01 + px(45)= 0.76081888077726672589265324613407006D+01 + px(46)= 0.87087596930040374091019857045701857D+01 + px(47)= 0.99727503684543062151028368853863376D+01 + px(48)= 0.11427358864753300284154140246256655D+02 + px(49)= 0.13106036822460464759217266858454354D+02 + px(50)= 0.15050942893772414274408031537252686D+02 + px(51)= 0.17316976836533264840463009058352564D+02 + px(52)= 0.19978899582721245916009884455806908D+02 + px(53)= 0.23145132959066649402957025145184967D+02 + px(54)= 0.26988283646585129742188842153188599D+02 + px(55)= 0.31828066291287360025547176837209814D+02 + px(56)= 0.38461992963555384952401394913962473D+02 + pw( 1)= 0.65000398579028612344523107765589882D-03 + pw( 2)= 0.15191959531332206241117313092588598D-02 + pw( 3)= 0.24045082453807306790230685497814259D-02 + pw( 4)= 0.33138053120687689905848031561566419D-02 + pw( 5)= 0.42567825727886822265204218213429885D-02 + pw( 6)= 0.52442786039205566768969009014580993D-02 + pw( 7)= 0.62885672143814305898041140071920990D-02 + pw( 8)= 0.74037469337422375540169840316881048D-02 + pw( 9)= 0.86061856387838724696966695518776445D-02 + pw(10)= 0.99150131408851259539215604893253718D-02 + pw(11)= 0.11352651712245621514495559004367745D-01 + pw(12)= 0.12945367107597103270871461076597819D-01 + pw(13)= 0.14723816350026995714384635716386299D-01 + pw(14)= 0.16723567921281172025119870508991203D-01 + pw(15)= 0.18985579339702223096072943965050960D-01 + pw(16)= 0.21556638092713088509079450535846802D-01 + pw(17)= 0.24489800737865776493594940653820355D-01 + pw(18)= 0.27844892154300886517220254607831059D-01 + pw(19)= 0.31689141187091977270960955143508341D-01 + pw(20)= 0.36098024102012043921594821940947720D-01 + pw(21)= 0.41156366531808391430233301214909888D-01 + pw(22)= 0.46959728529751547723837911521797776D-01 + pw(23)= 0.53616077334483042468992867566071648D-01 + pw(24)= 0.61247744777900280040241146591130434D-01 + pw(25)= 0.69993670734405775857440933512557351D-01 + pw(26)= 0.80011946473114843210018552446024989D-01 + pw(27)= 0.91482687623385083454637087474649108D-01 + pw(28)= 0.10461128275712219242263696233465467D+00 + pw(29)= 0.11963207950030344698937570198730592D+00 + pw(30)= 0.13681258639855890430946376814155933D+00 + pw(31)= 0.15645828725394505928296678986188780D+00 + pw(32)= 0.17891818774495993299205732107037433D+00 + pw(33)= 0.20459124493705478925339465025762632D+00 + pw(34)= 0.23393387292296502681950858384393993D+00 + pw(35)= 0.26746877815837016930288816174815801D+00 + pw(36)= 0.30579546472966641872754674677137451D+00 + pw(37)= 0.34960287592590148622876766322786034D+00 + pw(38)= 0.39968482435244330897498051519252302D+00 + pw(39)= 0.45695914023983362115797429164759424D+00 + pw(40)= 0.52249188739719558018523893289646747D+00 + pw(41)= 0.59752864128661802876559766037133258D+00 + pw(42)= 0.68353583227497725169589493456556878D+00 + pw(43)= 0.78225676673770165676669761907089189D+00 + pw(44)= 0.89578957040518890914540096033762699D+00 + pw(45)= 0.10266987265790718339050961200134187D+01 + pw(46)= 0.11781795916828648269313771182072094D+01 + pw(47)= 0.13543092491251640404184331203718613D+01 + pw(48)= 0.15604436574176829660651085498995850D+01 + pw(49)= 0.18038746446191042006169647129755746D+01 + pw(50)= 0.20949760879837258079028998605170623D+01 + pw(51)= 0.24493410971001376006947638889013500D+01 + pw(52)= 0.28921265037563651103311910556760302D+01 + pw(53)= 0.34679810059374950896183058369361721D+01 + pw(54)= 0.42678969880522956032021408252168488D+01 + pw(55)= 0.55241361350341697933563283440847313D+01 + pw(56)= 0.81763098024027352046055737635386992D+01 +endif +if(kn == 57) then + px( 1)= 0.24868815335455860789772463829278776D-03 + px( 2)= 0.13128708050967756872359979837581267D-02 + px( 3)= 0.32378911411275754242453920014506796D-02 + px( 4)= 0.60424464389102377765134145508034116D-02 + px( 5)= 0.97537140744139523885808398942622953D-02 + px( 6)= 0.14408512776948249945597359956767419D-01 + px( 7)= 0.20054465803924087173957630667856565D-01 + px( 8)= 0.26751478412571506944557766303264093D-01 + px( 9)= 0.34573592603908669936189665703461548D-01 + px(10)= 0.43611265799596486187161513111221049D-01 + px(11)= 0.53974113832253153451922084032312655D-01 + px(12)= 0.65794147604388044446693612420778637D-01 + px(13)= 0.79229515286929499370440341287450519D-01 + px(14)= 0.94468740236461490024906772241244278D-01 + px(15)= 0.11173542497728950196695294189711635D+00 + px(16)= 0.13129338283356472153949413479135588D+00 + px(17)= 0.15345217055234222351399145423334369D+00 + px(18)= 0.17857303240016059517363555848698504D+00 + px(19)= 0.20707532489301037480311227730726369D+00 + px(20)= 0.23944355888568364571611377084388653D+00 + px(21)= 0.27623525598764641501713800314236035D+00 + px(22)= 0.31808985711331926597193028676339348D+00 + px(23)= 0.36573893965362420393418300064924317D+00 + px(24)= 0.42001800250816678509315665005653842D+00 + px(25)= 0.48188007571772045658000477338572683D+00 + px(26)= 0.55241141366947723712332135298933352D+00 + px(27)= 0.63284954422789175567060868091083275D+00 + px(28)= 0.72460397335389008193055638126809504D+00 + px(29)= 0.82927988618893103652768340546531032D+00 + px(30)= 0.94870524097232991512519332944115950D+00 + px(31)= 0.10849617219948828718866932170405878D+01 + px(32)= 0.12404201040143688064542721435857076D+01 + px(33)= 0.14177806871234317057617629235375845D+01 + px(34)= 0.16201195944991234133183611776590787D+01 + px(35)= 0.18509418957597958737177594237457576D+01 + px(36)= 0.21142427406807205051181553190404025D+01 + px(37)= 0.24145779838341929681853920899859986D+01 + px(38)= 0.27571461831926801287738694538112624D+01 + px(39)= 0.31478844142641227801141212544520951D+01 + px(40)= 0.35935811310102033845744816463442230D+01 + px(41)= 0.41020104413877221957804467425939289D+01 + px(42)= 0.46820938302391253474628239056559849D+01 + px(43)= 0.53440978451440606526835982001965101D+01 + px(44)= 0.60998800348958771477889866745643881D+01 + px(45)= 0.69632012890426281929489833882602384D+01 + px(46)= 0.79501320435284778078392913107490947D+01 + px(47)= 0.90795950558462101790938546852706470D+01 + px(48)= 0.10374113225206633587054123726355172D+02 + px(49)= 0.11860876304821553920803819021800250D+02 + px(50)= 0.13573324197835825157192895761042269D+02 + px(51)= 0.15553608911071130591094968547049722D+02 + px(52)= 0.17856644009398398334027651945364308D+02 + px(53)= 0.20557252972621646881571546529989799D+02 + px(54)= 0.23764024975150934121434317734353805D+02 + px(55)= 0.27649965226105594436613117812938989D+02 + px(56)= 0.32535770450738589606101398766605691D+02 + px(57)= 0.39221977092927801982893352173282336D+02 + pw( 1)= 0.63850406831505099738052476325581961D-03 + pw( 2)= 0.14921037986204782494894229786059731D-02 + pw( 3)= 0.23610114029305975624758426308210372D-02 + pw( 4)= 0.32526128541318438561754430100405078D-02 + pw( 5)= 0.41760397770836242676248541207424580D-02 + pw( 6)= 0.51414703399848581702756105177606384D-02 + pw( 7)= 0.61603891216042467763280301070012439D-02 + pw( 8)= 0.72459385496994110627368996655782062D-02 + pw( 9)= 0.84133204261124725154997593044037002D-02 + pw(10)= 0.96802413566072434418561861828501851D-02 + pw(11)= 0.11067394131957919015201993364016383D-01 + pw(12)= 0.12598960733898038415484720369114255D-01 + pw(13)= 0.14303116690973784728070109566032249D-01 + pw(14)= 0.16212514694032192842344075317239116D-01 + pw(15)= 0.18364731480288355645565506966903663D-01 + pw(16)= 0.20802678116001007719911702716244623D-01 + pw(17)= 0.23574997949263392825181094032890825D-01 + pw(18)= 0.26736501608313518430082020410358951D-01 + pw(19)= 0.30348704821972667356360038317766333D-01 + pw(20)= 0.34480535573107543829081934087773551D-01 + pw(21)= 0.39209262449594068155056630028607624D-01 + pw(22)= 0.44621673558809258382775767008026743D-01 + pw(23)= 0.50815515123079612189194336850658426D-01 + pw(24)= 0.57901188015516540741960287519003280D-01 + pw(25)= 0.66003700690181420273674403867109883D-01 + pw(26)= 0.75264885586780504125481386653660311D-01 + pw(27)= 0.85845899110248805481309245561278484D-01 + pw(28)= 0.97930039537017286368934956545788095D-01 + pw(29)= 0.11172593126446497910307506265827343D+00 + pw(30)= 0.12747113783580932628750351291841410D+00 + pw(31)= 0.14543628129357976189998302298577329D+00 + pw(32)= 0.16592976340385132601117576239709225D+00 + pw(33)= 0.18930320741347600799686592812399379D+00 + pw(34)= 0.21595777019635163082254656264589623D+00 + pw(35)= 0.24635151796681218166031807821910603D+00 + pw(36)= 0.28100812005137097745126867432418044D+00 + pw(37)= 0.32052720324236309080623061527301095D+00 + pw(38)= 0.36559683724134186969654741198617812D+00 + pw(39)= 0.41700881002095348086483260374822063D+00 + pw(40)= 0.47567763253365164936264867132129384D+00 + pw(41)= 0.54266463607013432598384544728159736D+00 + pw(42)= 0.61920917583183104118650977432015768D+00 + pw(43)= 0.70676996963437778493319526328106689D+00 + pw(44)= 0.80708121914722438188398403384545040D+00 + pw(45)= 0.92223080476089922545257576125601390D+00 + pw(46)= 0.10547722906765838946076024117162519D+01 + pw(47)= 0.12078902142342952742295563718452734D+01 + pw(48)= 0.13856521611695241681852077225258712D+01 + pw(49)= 0.15934078194682780100598346033527563D+01 + pw(50)= 0.18384490024798631769423787694389017D+01 + pw(51)= 0.21311608735825934018647767200886737D+01 + pw(52)= 0.24871681932345592025950623054739285D+01 + pw(53)= 0.29316978875123326489119117358318389D+01 + pw(54)= 0.35095485536224900059615141531182325D+01 + pw(55)= 0.43120561559297553662108510075135951D+01 + pw(56)= 0.55724340409598520094151224510004002D+01 + pw(57)= 0.82344141971751154156256216589492573D+01 +endif +if(kn == 58) then + px( 1)= 0.24434033394451184831332088274972614D-03 + px( 2)= 0.12898297855758084794990735012824795D-02 + px( 3)= 0.31806717757901164181366233077269143D-02 + px( 4)= 0.59345896357739671411939250656689172D-02 + px( 5)= 0.95773007935647589740131510133326933D-02 + px( 6)= 0.14143599916031750336288464659818307D-01 + px( 7)= 0.19678421471099466334328004427638463D-01 + px( 8)= 0.26238185206866063929609421458766724D-01 + px( 9)= 0.33892482730395244265993385655932016D-01 + px(10)= 0.42726147636423146495787112568669502D-01 + px(11)= 0.52841746547136186264664886791106024D-01 + px(12)= 0.64362519550464307152961734809637400D-01 + px(13)= 0.77435783944099717432445083945677444D-01 + px(14)= 0.92236796334736021064525541096205779D-01 + px(15)= 0.10897304960833762497368263739776640D+00 + px(16)= 0.12788897058127535161532867316020021D+00 + px(17)= 0.14927098966869470910429970787632708D+00 + px(18)= 0.17345298118495822795560109857844673D+00 + px(19)= 0.20082212068477187733619503336787526D+00 + px(20)= 0.23182526492157509727427115110610957D+00 + px(21)= 0.26697601677385238610408934208835716D+00 + px(22)= 0.30686267976111180750915583977482233D+00 + px(23)= 0.35215732947079012938113786105805915D+00 + px(24)= 0.40362623554087235036623549904310772D+00 + px(25)= 0.46214186627722416843651050017300999D+00 + px(26)= 0.52869670779772405928275015906498196D+00 + px(27)= 0.60441913758500942132872353014906915D+00 + px(28)= 0.69059161190173186977923080505129435D+00 + px(29)= 0.78867145852021566336079167775472318D+00 + px(30)= 0.90031461037842010940804732183197484D+00 + px(31)= 0.10274026720013124888351576363117054D+01 + px(32)= 0.11720737797358867139870870772361403D+01 + px(33)= 0.13367578014779413237552856365102921D+01 + px(34)= 0.15242165260123579138727083350791305D+01 + px(35)= 0.17375896232753198202855353912673167D+01 + px(36)= 0.19804473251817493601350778629165516D+01 + px(37)= 0.22568509975250307614430520806868566D+01 + px(38)= 0.25714230695306034625372310347575688D+01 + px(39)= 0.29294281925088078526353597436284922D+01 + px(40)= 0.33368680628505624786084197643705525D+01 + px(41)= 0.38005931431891197468681483744664603D+01 + px(42)= 0.43284356654318541562597981524559705D+01 + px(43)= 0.49293699827199698643746537542247355D+01 + px(44)= 0.56137088447382837752405108085311518D+01 + px(45)= 0.63933479763797142778167198658680473D+01 + px(46)= 0.72820772388702724076103924229296585D+01 + px(47)= 0.82959860217191960273479806666773572D+01 + px(48)= 0.94540058246887833757920831704609262D+01 + px(49)= 0.10778658868884060967669218732823142D+02 + px(50)= 0.12297127123681394967235226888868496D+02 + px(51)= 0.14042840296906789602475373898648457D+02 + px(52)= 0.16057946341424185599637733181299734D+02 + px(53)= 0.18397376144609008660129440553886667D+02 + px(54)= 0.21136019829441359432294268771943368D+02 + px(55)= 0.24382637830465314053970048093781607D+02 + px(56)= 0.28310635883229105824662724848457888D+02 + px(57)= 0.33241688156491882652244516947157465D+02 + px(58)= 0.39979324854730350560380760310991567D+02 + pw( 1)= 0.62733109048409588703595816867276199D-03 + pw( 2)= 0.14657936036557848053358560746138226D-02 + pw( 3)= 0.23188039412803108240913072658971970D-02 + pw( 4)= 0.31933033503497732665912080929284740D-02 + pw( 5)= 0.40979006451946182384357721499095629D-02 + pw( 6)= 0.50421643688026695837303498584333168D-02 + pw( 7)= 0.60368540232012649771110859344923619D-02 + pw( 8)= 0.70942376382161779284891095459836907D-02 + pw( 9)= 0.82284548942576416691892667668446138D-02 + pw(10)= 0.94559203825706345816982076964964040D-02 + pw(11)= 0.10795760712764688238988523701079555D-01 + pw(12)= 0.12270273749594030379848145803872545D-01 + pw(13)= 0.13905392767905254997074541953428596D-01 + pw(14)= 0.15731135789435791324224040917880572D-01 + pw(15)= 0.17782024053941809054144110879593548D-01 + pw(16)= 0.20097465728382493095345225347477278D-01 + pw(17)= 0.22722120795621834323838288686748639D-01 + pw(18)= 0.25706285556264310174964040630928441D-01 + pw(19)= 0.29106352315788903005352971189456581D-01 + pw(20)= 0.32985404610440883148326461811537831D-01 + pw(21)= 0.37413999049710482757623335110546207D-01 + pw(22)= 0.42471166374810272031277917315265662D-01 + pw(23)= 0.48245645122530704885901911499992297D-01 + pw(24)= 0.54837348649234653699497897577026759D-01 + pw(25)= 0.62359063229977042722185054531332040D-01 + pw(26)= 0.70938380290334218998361325437647613D-01 + pw(27)= 0.80719876243549163356586986213036934D-01 + pw(28)= 0.91867565718926819665093909060870324D-01 + pw(29)= 0.10456766632576442199326583277715739D+00 + pw(30)= 0.11903172509910440103985964305294291D+00 + pw(31)= 0.13550016907993941758980848856857065D+00 + pw(32)= 0.15424635639536902570179058036488656D+00 + pw(33)= 0.17558122145590420153365262455970893D+00 + pw(34)= 0.19985863067967063227542616526431877D+00 + pw(35)= 0.22748159641691842637396351126311924D+00 + pw(36)= 0.25890954059680923306019616384762021D+00 + pw(37)= 0.29466686202428234212246962384893466D+00 + pw(38)= 0.33535315105334115324523894344163479D+00 + pw(39)= 0.38165552584862205474730474843490408D+00 + pw(40)= 0.43436375602075320325783351993460999D+00 + pw(41)= 0.49438912403001643077208212506747590D+00 + pw(42)= 0.56278840340808053436660241948913705D+00 + pw(43)= 0.64079498868817746739885701660482042D+00 + pw(44)= 0.72986023375748618090030528388862540D+00 + pw(45)= 0.83170968157393847753518524949795654D+00 + pw(46)= 0.94842152201009860199494152865552426D+00 + pw(47)= 0.10825390752513919957077425880259437D+01 + pw(48)= 0.12372368612987946539691608858506276D+01 + pw(49)= 0.14165738915071239610049524678978730D+01 + pw(50)= 0.16258946038408501735019290717589660D+01 + pw(51)= 0.18724918616166796018089411475069718D+01 + pw(52)= 0.21667631278657422335640585222239716D+01 + pw(53)= 0.25243656135025940850141001255242091D+01 + pw(54)= 0.29705966338517197699939016107637128D+01 + pw(55)= 0.35504038802739003258026682341550974D+01 + pw(56)= 0.43554634619204172726228503521716960D+01 + pw(57)= 0.56199288682781632030749398177962085D+01 + pw(58)= 0.82915991164603216241594990786246181D+01 +endif +if(kn == 59) then + px( 1)= 0.24017044279779292814629839617243450D-03 + px( 2)= 0.12677357311318245204340430050679257D-02 + px( 3)= 0.31258222629100185940076720172210997D-02 + px( 4)= 0.58312499497336295459920293987796347D-02 + px( 5)= 0.94083841957770971773603088185375410D-02 + px( 6)= 0.13890148671899779318807930205580027D-01 + px( 7)= 0.19318997801708323654202530335822085D-01 + px( 8)= 0.25748144263471581820366574897844328D-01 + px( 9)= 0.33243095406506599413902998194831696D-01 + px(10)= 0.41883537206821979634762367168491752D-01 + px(11)= 0.51765600230040784734413248495245234D-01 + px(12)= 0.63004534953194280528857501029401263D-01 + px(13)= 0.75737811688489614503254126944665394D-01 + px(14)= 0.90128644081599828866050645306435530D-01 + px(15)= 0.10636991817245456656640674998882425D+00 + px(16)= 0.12468849734796975917708256298381635D+00 + px(17)= 0.14534987449428730815870543082646967D+00 + px(18)= 0.16866316194781867947963273417414352D+00 + px(19)= 0.19498644807014926777480289949553731D+00 + px(20)= 0.22473260025151679743882657221256944D+00 + px(21)= 0.25837564665252656619516749971741362D+00 + px(22)= 0.29645791158221640514428954276611043D+00 + px(23)= 0.33959810551825596456611237045554791D+00 + px(24)= 0.38850058094912455808801515111809716D+00 + px(25)= 0.44396596557949377138507728624091020D+00 + px(26)= 0.50690338304849533824426898328468191D+00 + px(27)= 0.57834447492359249583873987633512907D+00 + px(28)= 0.65945945037149605268093152551109157D+00 + px(29)= 0.75157541311753698549281538653947915D+00 + px(30)= 0.85619724923675556880411715666490139D+00 + px(31)= 0.97503140385009140556694003085244755D+00 + px(32)= 0.11100129303358947930559321811557219D+01 + px(33)= 0.12633362636366230834317390982154572D+01 + px(34)= 0.14374902524376435821063430688457459D+01 + px(35)= 0.16352980880451859683554892839580029D+01 + px(36)= 0.18599628978969427131632789868222316D+01 + px(37)= 0.21151199396574527695286927569279424D+01 + px(38)= 0.24048965537486799801854034104807364D+01 + px(39)= 0.27339813311614543030519151016136162D+01 + px(40)= 0.31077043634013766751037593199231006D+01 + px(41)= 0.35321310127538594381872851430342547D+01 + px(42)= 0.40141724497033117837380156117688880D+01 + px(43)= 0.45617173671065324231220714157123560D+01 + px(44)= 0.51837909802502262421170872012101186D+01 + px(45)= 0.58907499495757681122761284245660513D+01 + px(46)= 0.66945256944244027786731357744463874D+01 + px(47)= 0.76089345000234897625689957165319158D+01 + px(48)= 0.86500822381865882984046085645261005D+01 + px(49)= 0.98369069052844445818188741380057179D+01 + px(50)= 0.11191928172141132463081500916511436D+02 + px(51)= 0.12742318869317979750127012420299866D+02 + px(52)= 0.14521497812382557030759980660881216D+02 + px(53)= 0.16571609002403235432729979108495388D+02 + px(54)= 0.18947601696583022289134197021834890D+02 + px(55)= 0.21724434819141199485567406823338980D+02 + px(56)= 0.25011043569653595241704228425344735D+02 + px(57)= 0.28981239539754668238717062448027987D+02 + px(58)= 0.33957682861442574052880069059391550D+02 + px(59)= 0.40746915059614258028591599305878563D+02 + pw( 1)= 0.61661580843002818474199774713820435D-03 + pw( 2)= 0.14405704070514471179034636856714158D-02 + pw( 3)= 0.22783669060606481294489644369246025D-02 + pw( 4)= 0.31365358110962922351769249715961996D-02 + pw( 5)= 0.40232040175153947001508790877931979D-02 + pw( 6)= 0.49473803193663214737587320055863858D-02 + pw( 7)= 0.59191612038299517714653169073172258D-02 + pw( 8)= 0.69500182956008661963671339567494311D-02 + pw( 9)= 0.80531272876872073780988243944927837D-02 + pw(10)= 0.92437335233207542919233267694815444D-02 + pw(11)= 0.10539549268603537847804050413706850D-01 + pw(12)= 0.11961173126551657822377551429583253D-01 + pw(13)= 0.13532517058206043238763114996607351D-01 + pw(14)= 0.15281223576152454708908655408040014D-01 + pw(15)= 0.17239057603729693052163344051819092D-01 + pw(16)= 0.19442266458961476989325558184455495D-01 + pw(17)= 0.21931917469546401944382556564398834D-01 + pw(18)= 0.24754242433746641427431268228835038D-01 + pw(19)= 0.27961035103797151420897993567315053D-01 + pw(20)= 0.31610155514745908523448651363558848D-01 + pw(21)= 0.35766190345276654301560678957468317D-01 + pw(22)= 0.40501304167252803397704165063681030D-01 + pw(23)= 0.45896299064297235092911388363815978D-01 + pw(24)= 0.52041886574768681501627601462919698D-01 + pw(25)= 0.59040170127350183777203679746281117D-01 + pw(26)= 0.67006338167425270345951219198202241D-01 + pw(27)= 0.76070575665505304542628473598135760D-01 + pw(28)= 0.86380211831507007368019890251542642D-01 + pw(29)= 0.98102132636666032695755579145966437D-01 + pw(30)= 0.11142549737101106673605436812360518D+00 + pw(31)= 0.12656480912918894407887994146661412D+00 + pw(32)= 0.14376340063800550321586640229070301D+00 + pw(33)= 0.16329741041010875411766070944591531D+00 + pw(34)= 0.18548034139204283612129505343431894D+00 + pw(35)= 0.21066831722133520869183148142544999D+00 + pw(36)= 0.23926618297652114852711313447455029D+00 + pw(37)= 0.27173464220061857023595757633130234D+00 + pw(38)= 0.30859868602838502541227655874922030D+00 + pw(39)= 0.35045766213824028933896241045941680D+00 + pw(40)= 0.39799746405898883695366965337609207D+00 + pw(41)= 0.45200551538659797325171404962194440D+00 + pw(42)= 0.51338951072222608042425293298227461D+00 + pw(43)= 0.58320130681121704145811176462102830D+00 + pw(44)= 0.66266801682533587367291708503855157D+00 + pw(45)= 0.75323338727944523148159051268884834D+00 + pw(46)= 0.85661417022141803759749168951905898D+00 + pw(47)= 0.97487886818484736512715176832767718D+00 + pw(48)= 0.11105607080548591676445817081025167D+01 + pw(49)= 0.12668244926674778382406916781590943D+01 + pw(50)= 0.14477211050240886139248358867709201D+01 + pw(51)= 0.16585903209293941982874152551002654D+01 + pw(52)= 0.19067267819120553533052269878705101D+01 + pw(53)= 0.22025411213234530205031685493395576D+01 + pw(54)= 0.25617240229328119031730477599209231D+01 + pw(55)= 0.30096444005352699081587070335073080D+01 + pw(56)= 0.35914005587611010508966597641663763D+01 + pw(57)= 0.43990110029151586972435889347654638D+01 + pw(58)= 0.56675740145564914236518411943414860D+01 + pw(59)= 0.83489730609712306624049954354151761D+01 +endif +end subroutine wts500 +end MODULE WTS500_MOD diff --git a/src/trans/gpu/external/dir_trans.F90 b/src/trans/gpu/external/dir_trans.F90 new file mode 100755 index 0000000..faa9868 --- /dev/null +++ b/src/trans/gpu/external/dir_trans.F90 @@ -0,0 +1,536 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& LDLATLON,KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *DIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! LDLATLON - indicating if regular lat-lon is the input data +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, & + & NF_SC2, NF_SC3A, NF_SC3B, & + & NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +USE TPM_FIELDS ,ONLY : IF_FS_DIR,IF_FS_DIR0,NFLEV,NFLEV0,DTDZBA,DTDZBS,DTDZCA,DTDZCS +USE TPM_FLT ,ONLY : S +USE TPM_GEOMETRY ,ONLY : G +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIR_TRANS_CTL_MOD,ONLY : DIR_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDLATLON + +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +INTEGER(KIND=JPIM) :: JMLOC, IF_PP + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DIR_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(440,0) +CALL GSTATS(1808,0) +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +! This is for use in TRGTOL which is shared with adjoint inverse transform +LSCDERS=.FALSE. +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. +LATLON=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDLATLON)) THEN + LATLON = LDLATLON +ENDIF + +! Compute derived variables + + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS +!D%IADJUST_D=0 +!IF(MOD(IF_FS,2)==1) THEN +! IF_FS = IF_FS + 1 +! D%IADJUST_D=1 +!ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! add additional post-processing requirements +! (copied from setup_trans.F90. Or does this need to be different here than in setup_trans.F90?) +!IF_PP = 2*NFLEV +!IF_PP = 0 + +! How do I get the current number of levels? For now I use: (Andreas) +!NFLEV = NFLEV0 + +! set currently used array sizes for the GPU arrays: +IF_FS_DIR=2*IF_FS+2!2*(2*IF_UV+NFLEV+2+IF_PP) +!PRINT*,"dir_trans: IF_FS_DIR=",IF_FS_DIR," IF_FS_DIR0=",IF_FS_DIR0 + +DTDZBA=IF_FS_DIR +DTDZBS=IF_FS_DIR +DTDZCA=IF_FS_DIR +DTDZCS=IF_FS_DIR + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANS : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1808,1) + +! ------------------------------------------------------------------ + +CALL DIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + + IF (LHOOK) CALL DR_HOOK('DIR_TRANS',1,ZHOOK_HANDLE) +CALL GSTATS(440,1) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE DIR_TRANS + diff --git a/src/trans/gpu/external/dir_transad.F90 b/src/trans/gpu/external/dir_transad.F90 new file mode 100755 index 0000000..6256408 --- /dev/null +++ b/src/trans/gpu/external/dir_transad.F90 @@ -0,0 +1,506 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *DIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL DIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split +! +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIR_TRANS_CTLAD - control routine +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NGPBLKS, NF_SC2, NF_SC3A, NF_SC3B, NPROMA +USE TPM_DISTR ,ONLY : D, MYSETV, NPRTRV + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIR_TRANS_CTLAD_MOD ,ONLY : DIR_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('DIR_TRANSAD',0,ZHOOK_HANDLE) + +CALL GSTATS(1810,0) +! Set current resolution + +CALL SET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +LSCDERS=.FALSE. ! This is for use in TRLTOG which is shared with inverse transform +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETUV CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1810,1) + +! Perform transform + +CALL DIR_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('DIR_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE DIR_TRANSAD + + diff --git a/src/trans/gpu/external/dist_grid.F90 b/src/trans/gpu/external/dist_grid.F90 new file mode 100755 index 0000000..05f34ee --- /dev/null +++ b/src/trans/gpu/external/dist_grid.F90 @@ -0,0 +1,147 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *DIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! KSORT (:) - Re-order fields on output +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! P.Marguinaud : 10-10-14 Add KSORT + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('DIST_GRID',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_GRID: KFROM TOO SHORT!') +ENDIF +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_GRID:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_GRID:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'DIST_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'DIST_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('DIST_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'DIST_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('DIST_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('DIST_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('DIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (UBOUND (KSORT, 1) /= UBOUND (PGP, 2)) THEN + CALL ABORT_TRANS('DIST_GRID: DIMENSION MISMATCH KSORT, PGP') + ENDIF +ENDIF + +CALL DIST_GRID_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP,KSORT) + +IF (LHOOK) CALL DR_HOOK('DIST_GRID',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID + diff --git a/src/trans/gpu/external/dist_grid_32.F90 b/src/trans/gpu/external/dist_grid_32.F90 new file mode 100755 index 0000000..ab3d3f0 --- /dev/null +++ b/src/trans/gpu/external/dist_grid_32.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIST_GRID_32(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP) + +!**** *DIST_GRID_32* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_32(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_GRID_32_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN +USE TPM_DIM +USE TPM_DISTR + +USE SET_RESOL_MOD +USE DIST_GRID_32_CTL_MOD +USE ABORT_TRANS_MOD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRM) , INTENT(OUT) :: PGP(:,:,:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('DIST_GRID_32',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_GRID_32: KFROM TOO SHORT!') +ENDIF + +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_GRID_32:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_GRID_32:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'DIST_GRID_32:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('DIST_GRID_32:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'DIST_GRID_32:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('DIST_GRID_32:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'DIST_GRID_32:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('DIST_GRID_32:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('DIST_GRID_32:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('DIST_GRID_32:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('DIST_GRID_32:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + + +CALL DIST_GRID_32_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP) + +IF (LHOOK) CALL DR_HOOK('DIST_GRID_32',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_32 + diff --git a/src/trans/gpu/external/dist_spec.F90 b/src/trans/gpu/external/dist_spec.F90 new file mode 100755 index 0000000..084c747 --- /dev/null +++ b/src/trans/gpu/external/dist_spec.F90 @@ -0,0 +1,201 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE DIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSMAX,KSORT) + +!**** *DIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL DIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! KSORT (:) - Re-order fields on output +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! P.Marguinaud : 10-10-14 Add KSORT + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DIST_SPEC_CONTROL_MOD ,ONLY : DIST_SPEC_CONTROL +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFDISTG) +INTEGER(KIND=JPIM) :: IFSEND,IFRECV,J,IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('DIST_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_SPEC: KFROM TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +IF(ISMAX /= R%NSMAX) THEN + CALL SUWAVEDI(ISMAX,ISMAX,NPRTRW,MYSETW,KPOSSP=IPOSSP,KSPEC2=ISPEC2,& + & KDIM0G=IDIM0G) + ISPEC2_G = (ISMAX+1)*(ISMAX+2) +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) +ENDIF +IFSEND = 0 +IFRECV = 0 + +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'DIST_SPEC:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('DIST_SPEC:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('DIST_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFSEND) THEN + WRITE(NERR,*) 'DIST_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFSEND + CALL ABORT_TRANS('DIST_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'DIST_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('DIST_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFDISTG) THEN + CALL ABORT_TRANS('DIST_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFDISTG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'DIST_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('DIST_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFRECV = IFRECV+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFDISTG) +ELSE + IFRECV = KFDISTG + IVSET(:) = MYSETV +ENDIF + +IF(IFRECV > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('DIST_SPEC: FIELDS TO RECEIVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFRECV) THEN + CALL ABORT_TRANS('DIST_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('DIST_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (.NOT. PRESENT (PSPEC)) THEN + CALL ABORT_TRANS('DIST_SPEC: KSORT REQUIRES PSPEC') + ENDIF + IF (UBOUND (KSORT, 1) /= UBOUND (PSPEC, IFLD)) THEN + CALL ABORT_TRANS('DIST_SPEC: DIMENSION MISMATCH KSORT, PSPEC') + ENDIF +ENDIF + +CALL DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,KSORT) + +DEALLOCATE(IDIM0G) + +IF (LHOOK) CALL DR_HOOK('DIST_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_SPEC + diff --git a/src/trans/gpu/external/gath_grid.F90 b/src/trans/gpu/external/gath_grid.F90 new file mode 100755 index 0000000..8cfcc40 --- /dev/null +++ b/src/trans/gpu/external/gath_grid.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *GATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL GATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('GATH_GRID',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) + +IF (LHOOK) CALL DR_HOOK('GATH_GRID',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID + diff --git a/src/trans/gpu/external/gath_grid_32.F90 b/src/trans/gpu/external/gath_grid_32.F90 new file mode 100755 index 0000000..052552f --- /dev/null +++ b/src/trans/gpu/external/gath_grid_32.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GATH_GRID_32(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *GATH_GRID_32* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_32(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_32_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRM + +!ifndef INTERFACE + +USE TPM_GEN +USE TPM_DIM +USE TPM_DISTR + +USE SET_RESOL_MOD +USE GATH_GRID_32_CTL_MOD +USE ABORT_TRANS_MOD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRM) , INTENT(IN) :: PGP(:,:,:) + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('GATH_GRID_32',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID_32: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID_32:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID_32:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID_32:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID_32:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID_32:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID_32:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID_32:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID_32:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID_32:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID_32:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID_32:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_32_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) + +IF (LHOOK) CALL DR_HOOK('GATH_GRID_32',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_32 + diff --git a/src/trans/gpu/external/gath_spec.F90 b/src/trans/gpu/external/gath_spec.F90 new file mode 100755 index 0000000..9af1f81 --- /dev/null +++ b/src/trans/gpu/external/gath_spec.F90 @@ -0,0 +1,194 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LDZA0IP) + +!**** *GATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero imaginary part of first coefficients +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Modified 03-09-30 Y. Seity, bug correction IFSEND=0 +! Modified 13-10-10 P. Marguinaud add LDZA0IP option +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE GATH_SPEC_CONTROL_MOD ,ONLY : GATH_SPEC_CONTROL +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFGATHG) +INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J +INTEGER(KIND=JPIM) :: IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('GATH_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL SET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_SPEC: KTO TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +IF(ISMAX /= R%NSMAX) THEN + CALL SUWAVEDI(ISMAX,ISMAX,NPRTRW,MYSETW,KPOSSP=IPOSSP,KSPEC2=ISPEC2,& + & KDIM0G=IDIM0G) + ISPEC2_G = (ISMAX+1)*(ISMAX+2) +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) +ENDIF + +IFSEND = 0 +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_SPEC:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_SPEC:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('GATH_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFRECV) THEN + WRITE(NERR,*) 'GATH_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFRECV + CALL ABORT_TRANS('GATH_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'GATH_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('GATH_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFGATHG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'GATH_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('GATH_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFSEND = IFSEND+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFGATHG) +ELSEIF(NPRTRV > 1) THEN + WRITE(NERR,*) 'GATH_SPEC:KVSET MISSING, NPRTRV ',NPRTRV + CALL ABORT_TRANS('GATH_SPEC:KVSET MISSING, NPRTRV > 1') +ELSE + IFSEND = KFGATHG + IVSET(:) = 1 +ENDIF + +IF(IFSEND > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('GATH_SPEC: FIELDS TO RECIEVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFSEND) THEN + CALL ABORT_TRANS('GATH_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('GATH_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,LDZA0IP) +DEALLOCATE(IDIM0G) + +IF (LHOOK) CALL DR_HOOK('GATH_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_SPEC + diff --git a/src/trans/gpu/external/get_current.F90 b/src/trans/gpu/external/get_current.F90 new file mode 100755 index 0000000..802701a --- /dev/null +++ b/src/trans/gpu/external/get_current.F90 @@ -0,0 +1,67 @@ +! (C) Copyright 2012- Meteo-France. +! (C) Copyright 2012- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GET_CURRENT(KRESOL,LDLAM) + +!**** *GET_CURRENT* - Extract current information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting current information from the T.P. + +!** Interface. +! ---------- +! CALL GET_CURRENT(...) + +! Explicit arguments : (all optional) +! -------------------- +! KRESOL - Current resolution +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Ryad El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 24-Aug-2012 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE TPM_GEN +USE TPM_GEOMETRY + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM + +!ifndef INTERFACE + +! Get current resolution +IF (PRESENT(KRESOL)) KRESOL= NCUR_RESOL +IF (PRESENT(LDLAM)) LDLAM = G%LAM + + +!endif INTERFACE + +END SUBROUTINE GET_CURRENT diff --git a/src/trans/gpu/external/gpnorm_trans.F90 b/src/trans/gpu/external/gpnorm_trans.F90 new file mode 100755 index 0000000..38aea70 --- /dev/null +++ b/src/trans/gpu/external/gpnorm_trans.F90 @@ -0,0 +1,522 @@ +! (C) Copyright 2008- ECMWF. +! (C) Copyright 2008- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *GPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather +! than an approach using a more expensive global gather collective communication + +!** Interface. +! ---------- +! CALL GPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 +! R. El Khatib 07-08-2009 Optimisation directive for NEC + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB , JPRD +USE PARKIND_ECTRANS ,ONLY : JPRBT + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LGPNORM, NGPBLKS, NPROMA, ZGTF, ZAVE, ZMINGL, ZMAXGL, ZMINGPN, ZMAXGPN +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF,D_NPTRLS, MYPROC +USE TPM_GEOMETRY ,ONLY : G,G_NLOEN,G_NLOEN_MAX +USE TPM_FIELDS ,ONLY : F_RW +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE TRGTOL_MOD ,ONLY : TRGTOL +USE SET2PE_MOD ,ONLY : SET2PE +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL ,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KRESOL + +!ifndef INTERFACE + +! Local variables +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +INTEGER(KIND=JPIM) :: IUBOUND(4) +INTEGER(KIND=JPIM) :: IVSET(KFIELDS) +INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IVSETG(:,:) +!GPU +!REAL(KIND=JPRBT),ALLOCATABLE :: ZGTF(:,:) +!REAL(KIND=JPRD),ALLOCATABLE :: ZAVE(:,:) +!REAL(KIND=JPRBT),ALLOCATABLE :: ZMINGL(:,:) +!REAL(KIND=JPRBT),ALLOCATABLE :: ZMAXGL(:,:) +!REAL(KIND=JPRBT),ALLOCATABLE :: ZMIN(:) +!REAL(KIND=JPRBT),ALLOCATABLE :: ZMAX(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZAVEG(:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMING(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZMAXG(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZSND(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZRCV(:) +INTEGER(KIND=JPIM) :: J,JGL,IGL,JL,JF,IF_GP,IF_SCALARS_G,IF_FS,JSETV,JSETW,IWLATS,JMAX +INTEGER(KIND=JPIM) :: IPROC,ITAG,ILEN,ILENR,IBEG,IEND,IND +!INTEGER(KIND=JPIM) :: iunit + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS',0,ZHOOK_HANDLE) + +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +NPROMA = KPROMA +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +! Consistency checks + +IUBOUND(1:3)=UBOUND(PGP) +IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'GPNORM_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('GPNORM_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFIELDS) THEN + WRITE(NOUT,*)'GPNORM_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFIELDS + CALL ABORT_TRANS('GPNORM_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'GPNORM_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('GPNORM_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + + +IF_GP=KFIELDS +IF_SCALARS_G=0 + +IF_FS=0 +DO J=1,KFIELDS + IVSET(J)=MOD(J-1,NPRTRV)+1 + IF(IVSET(J)==MYSETV)THEN + IF_FS=IF_FS+1 + ENDIF +ENDDO + +ALLOCATE(IVSETS(NPRTRV)) +IVSETS(:)=0 +DO J=1,KFIELDS + IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 +ENDDO +ALLOCATE(IVSETG(NPRTRV,MAXVAL(IVSETS(:)))) +IVSETG(:,:)=0 +IVSETS(:)=0 +DO J=1,KFIELDS + IVSETS(IVSET(J))=IVSETS(IVSET(J))+1 + IVSETG(IVSET(J),IVSETS(IVSET(J)))=J +ENDDO + + +! done in setup_trans +LGPNORM=.TRUE. +CALL TRGTOL(ZGTF,IF_FS,IF_GP,IF_SCALARS_G,IVSET,PGP=PGP) +LGPNORM=.FALSE. + +! ZGTF is now on GPU + +IBEG=1 +IEND=D%NDGL_FS + +CALL GSTATS(1429,0) +IF( IF_FS > 0 )THEN + +#ifdef ACCGPU + !$ACC DATA & + !$ACC& COPY(F_RW) & + !$ACC& COPY(D,D_NSTAGTF,D_NPTRLS,G_NLOEN,G_NLOEN_MAX) & + !$ACC& PRESENT(ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:F_RW,D,D_NSTAGTF,D_NPTRLS,G_NLOEN,G_NLOEN_MAX) & + !$OMP& MAP(PRESENT,ALLOC:ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC KERNELS +#endif + DO JF=1,IF_FS + ZMINGL(JF,IBEG:IEND)=ZGTF(JF,D_NSTAGTF(1)+1) + ZMAXGL(JF,IBEG:IEND)=ZGTF(JF,D_NSTAGTF(1)+1) + ENDDO +#ifdef ACCGPU + !$ACC END KERNELS +#endif + +! FIRST DO SUMS IN EACH FULL LATITUDE + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC KERNELS +#endif + DO JGL=IBEG,IEND + IGL = D_NPTRLS(MYSETW) + JGL - 1 + DO JF=1,IF_FS + ZAVE(JF,JGL)=0.0_JPRBT +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JL=1,G_NLOEN(IGL) + ZAVE(JF,JGL)=ZAVE(JF,JGL)+ZGTF(JF,D_NSTAGTF(JGL)+JL) + ZMINGL(JF,JGL)=MIN(ZMINGL(JF,JGL),ZGTF(JF,D_NSTAGTF(JGL)+JL)) + ZMAXGL(JF,JGL)=MAX(ZMAXGL(JF,JGL),ZGTF(JF,D_NSTAGTF(JGL)+JL)) + ENDDO + ENDDO + ENDDO +#ifdef ACCGPU + !$ACC END KERNELS +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC KERNELS +#endif + DO JF=1,IF_FS + ZMINGPN(JF)=MINVAL(ZMINGL(JF,IBEG:IEND)) + ZMAXGPN(JF)=MAXVAL(ZMAXGL(JF,IBEG:IEND)) + ENDDO +#ifdef ACCGPU + !$ACC END KERNELS +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC KERNELS +#endif + DO JGL=IBEG,IEND + IGL = D_NPTRLS(MYSETW) + JGL - 1 + DO JF=1,IF_FS + ZAVE(JF,JGL)=ZAVE(JF,JGL)*F_RW(IGL)/G_NLOEN(IGL) + !write(iunit,*) 'aver inside ',JF,IF_FS,IGL,ZAVE(JF,JGL), F_RW(IGL), G_NLOEN(IGL),ZMINGPN(JF),ZMAXGPN(JF) + ENDDO + ENDDO +#ifdef ACCGPU + !$ACC END KERNELS +#endif + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +#ifdef ACCGPU +!$ACC UPDATE HOST(ZAVE) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE FROM(ZAVE) +#endif +#ifdef ACCGPU +!$ACC UPDATE HOST(ZMINGPN) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE FROM(ZMINGPN) +#endif +#ifdef ACCGPU +!$ACC UPDATE HOST(ZMAXGPN) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE FROM(ZMAXGPN) +#endif +#ifdef ACCGPU +!$ACC WAIT +#endif +#ifdef OMPGPU +!$OMP BARRIER +#endif + +ENDIF +CALL GSTATS(1429,1) + +! from here rest on CPU + +! IT IS IMPORTANT THAT SUMS ARE NOW DONE IN LATITUDE ORDER +ALLOCATE(ZAVEG(R%NDGL,KFIELDS)) +ALLOCATE(ZMING(KFIELDS)) +ALLOCATE(ZMAXG(KFIELDS)) + +ZAVEG(:,:)=0.0_JPRD +DO JF=1,IF_FS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + ZAVEG(IGL,IVSETG(MYSETV,JF))=ZAVEG(IGL,IVSETG(MYSETV,JF))+ZAVE(JF,JGL) + ENDDO +ENDDO + +IF(LDAVE_ONLY)THEN + ZMING(:)=PMIN(:) + ZMAXG(:)=PMAX(:) +ELSE + DO JF=1,IF_FS + ZMING(IVSETG(MYSETV,JF))=ZMINGPN(JF) + ZMAXG(IVSETG(MYSETV,JF))=ZMAXGPN(JF) + ENDDO +ENDIF + +! RECEIVE ABOVE FROM OTHER NPRTRV SETS FOR SAME LATS BUT DIFFERENT FIELDS +ITAG=123 + +CALL GSTATS(815,0) + +IF( MYSETV==1 )THEN + + DO JSETV=2,NPRTRV + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*IVSETS(JSETV)+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*IVSETS(JSETV) + ENDIF + IF(ILEN > 0)THEN + ALLOCATE(ZRCV(ILEN)) + CALL SET2PE(IPROC,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='GPNORM_TRANS:V') + IF(ILENR /= ILEN)THEN + CALL ABOR1('GPNORM_TRANS:ILENR /= ILEN') + ENDIF + IND=0 + DO JF=1,IVSETS(JSETV) + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZAVEG(IGL,IVSETG(JSETV,JF))=ZRCV(IND) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZMING(IVSETG(JSETV,JF))=ZRCV(IND) + IND=IND+1 + ZMAXG(IVSETG(JSETV,JF))=ZRCV(IND) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),REAL(ZRCV(IND),JPRB)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),REAL(ZRCV(IND),JPRB)) + ENDDO + ENDIF + DEALLOCATE(ZRCV) + ENDIF + ENDDO + +ELSE + + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*IVSETS(MYSETV)+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*IVSETS(MYSETV) + ENDIF + IF(ILEN > 0)THEN + CALL SET2PE(IPROC,0,0,MYSETW,1) + ALLOCATE(ZSND(ILEN)) + IND=0 + DO JF=1,IF_FS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZSND(IND)=ZAVEG(IGL,IVSETG(MYSETV,JF)) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZSND(IND)=ZMING(IVSETG(MYSETV,JF)) + IND=IND+1 + ZSND(IND)=ZMAXG(IVSETG(MYSETV,JF)) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZSND(IND)=PMIN(JF) + IND=IND+1 + ZSND(IND)=PMAX(JF) + ENDDO + ENDIF + CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='GPNORM_TRANS:V') + DEALLOCATE(ZSND) + ENDIF + +ENDIF + +! FINALLY RECEIVE CONTRIBUTIONS FROM OTHER NPRTRW SETS + +IF( MYSETV == 1 )THEN + + IF( MYSETW == 1 )THEN + + DO JSETW=2,NPRTRW + IWLATS=D%NULTPP(JSETW) + IBEG=1 + IEND=IWLATS + IF(LDAVE_ONLY)THEN + ILEN=IWLATS*KFIELDS+2*KFIELDS + ELSE + ILEN=(IWLATS+2)*KFIELDS + ENDIF + IF(ILEN > 0 )THEN + ALLOCATE(ZRCV(ILEN)) + CALL SET2PE(IPROC,0,0,JSETW,1) + CALL MPL_RECV(ZRCV(:),KSOURCE=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='GPNORM_TRANS:W') + IF(ILENR /= ILEN)THEN + CALL ABOR1('GPNORM_TRANS:ILENR /= ILEN') + ENDIF + IND=0 + DO JF=1,KFIELDS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(JSETW) + JGL - 1 + IND=IND+1 + ZAVEG(IGL,JF)=ZRCV(IND) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + IND=IND+1 + ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ENDDO + ENDIF + DEALLOCATE(ZRCV) + ENDIF + ENDDO + + ELSE + + IF(LDAVE_ONLY)THEN + ILEN=D%NDGL_FS*KFIELDS+2*KFIELDS + ELSE + ILEN=(D%NDGL_FS+2)*KFIELDS + ENDIF + IF(ILEN > 0)THEN + CALL SET2PE(IPROC,0,0,1,1) + ALLOCATE(ZSND(ILEN)) + IND=0 + DO JF=1,KFIELDS + DO JGL=IBEG,IEND + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IND=IND+1 + ZSND(IND)=ZAVEG(IGL,JF) + ENDDO + IF(.NOT.LDAVE_ONLY)THEN + IND=IND+1 + ZSND(IND)=ZMING(JF) + IND=IND+1 + ZSND(IND)=ZMAXG(JF) + ENDIF + ENDDO + IF(LDAVE_ONLY)THEN + DO JF=1,KFIELDS + IND=IND+1 + ZSND(IND)=ZMING(JF) + IND=IND+1 + ZSND(IND)=ZMAXG(JF) + ENDDO + ENDIF + CALL MPL_SEND(ZSND(:),KDEST=NPRCIDS(IPROC),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='GPNORM_TRANS:V') + DEALLOCATE(ZSND) + ENDIF + + ENDIF + +ENDIF + +CALL GSTATS(815,1) + +IF( MYSETW == 1 .AND. MYSETV == 1 )THEN + + PAVE(:)=0.0_JPRB + DO JGL=1,R%NDGL + PAVE(:)=PAVE(:)+REAL(ZAVEG(JGL,:),JPRB) + ENDDO + + PMIN(:)=ZMING(:) + PMAX(:)=ZMAXG(:) + +ENDIF + +!DEALLOCATE(ZGTF) +!DEALLOCATE(ZAVE) +!DEALLOCATE(ZMIN) +!DEALLOCATE(ZMAX) +DEALLOCATE(ZAVEG) +DEALLOCATE(ZMING) +DEALLOCATE(ZMAXG) +DEALLOCATE(IVSETS) +DEALLOCATE(IVSETG) + +IF (LHOOK) CALL DR_HOOK('GPNORM_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE GPNORM_TRANS diff --git a/src/trans/gpu/external/ini_spec_dist.F90 b/src/trans/gpu/external/ini_spec_dist.F90 new file mode 100755 index 0000000..6f28134 --- /dev/null +++ b/src/trans/gpu/external/ini_spec_dist.F90 @@ -0,0 +1,90 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS) + + +!**** *INI_SPEC_DIST* - Initialize spectral wave distribution + +! Purpose. +! -------- +! Initialize arrays controlling spectral wave distribution + +!** Interface. +! ---------- +! CALL INI_SPEC_DIST(...) + +! Explicit arguments : +! -------------------- +! KSMAX - spectral truncation required +! KTMAX - Overtruncation for KSMAX (input) +! KPRTRW - Number of processors in A-direction (input) +! KMYSETW - A-set for present processor (input) +! KASM0 - Offsets for spectral waves (output) +! KSPOLEGL - Local version of NSPOLEG (output) +! KPROCM - Where a certain spectral wave belongs (output) +! KUMPP - Number of spectral waves on this PE (output) +! KSPEC - Local version on NSPEC (output) +! KSPEC2 - Local version on NSPEC2 (output) +! KSPEC2MX - Maximum KSPEC2 across PEs (output) +! KPOSSP - Global spectral fields partitioning (output) +! KMYMS - This PEs spectral zonal wavenumbers (output) +! Implicit arguments : NONE +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. SUWAVEDI +! ---------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +!ifndef INTERFACE +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) + +!ifndef INTERFACE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('INI_SPEC_DIST',0,ZHOOK_HANDLE) + +CALL SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS) + +IF (LHOOK) CALL DR_HOOK('INI_SPEC_DIST',1,ZHOOK_HANDLE) + +!endif INTERFACE + +END SUBROUTINE INI_SPEC_DIST diff --git a/src/trans/gpu/external/inv_trans.F90 b/src/trans/gpu/external/inv_trans.F90 new file mode 100755 index 0000000..a442f37 --- /dev/null +++ b/src/trans/gpu/external/inv_trans.F90 @@ -0,0 +1,661 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE INV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,LDLATLON,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! LDLATLON - indicating if regular lat-lon output requested +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA + +USE TPM_FIELDS ,ONLY : IF_FS_INV,IF_FS_INV0,ITDZBA,ITDZBS,ITDZCA,ITDZCS +USE TPM_FLT ,ONLY : S +USE TPM_GEOMETRY ,ONLY : G +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE INV_TRANS_CTL_MOD ,ONLY : INV_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +#ifdef _OPENACC +USE OPENACC +!USE ACCEL_LIB !only for NVIDIA GPUs +#endif + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +LOGICAL ,OPTIONAL, INTENT(IN) :: LDLATLON +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +INTEGER(KIND=JPIM) :: JMLOC +INTEGER(KIND=JPIM) :: UNIT_NO,IDEVTYPE,NUMDEVS,MYGPU,MYNUM +! ------------------------------------------------------------------ + +UNIT_NO=300+MYPROC +CALL FLUSH(UNIT_NO) + + +IF (LHOOK) CALL DR_HOOK('INV_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(441,0) +CALL GSTATS(1807,0) +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +LATLON =.FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF(.NOT. PRESENT(PSPSCALAR) ) THEN + CALL ABORT_TRANS('INV_TRANS : KVSETSC PRESENT BUT PSPSCALAR MISSING') + ENDIF + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G + IF_SC2_G + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + &('INV_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS_G > 0 ) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +IF(PRESENT(LDLATLON)) THEN + LATLON = LDLATLON +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF +! importance related to cuFFT, needed Nils ? +!D%IADJUST_I=0 +!IF(MOD(IF_FS,2)==1) THEN +! IF_FS = IF_FS + 1 +! D%ADJUST_I=1 +!ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! set currently used array sizes for the GPU arrays: +!IF_FS_INV= 8*IF_UV + 2*IF_SCALARS + 2*IF_SCDERS +!Andreas: we were using the previous line in setup_trans but this doesn't consider derivatives. Better: +IF_FS_INV=2*IF_OUT_LT +PRINT*,"inv_trans: IF_FS_INV=",IF_FS_INV," IF_FS_INV0=",IF_FS_INV0 + +ITDZBA=IF_FS_INV +ITDZBS=IF_FS_INV +ITDZCA=IF_FS_INV +ITDZCS=IF_FS_INV + +#ifdef ACCGPU +!$ACC ENTER DATA COPYIN(ITDZBA, ITDZBS, ITDZCA, ITDZCS) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(TO:ITDZBA, ITDZBS, ITDZCA, ITDZCS) +#endif + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPDIV TOO SHORT') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANS:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1807,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL INV_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + + IF (LHOOK) CALL DR_HOOK('INV_TRANS',1,ZHOOK_HANDLE) +CALL GSTATS(441,1) +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE INV_TRANS + diff --git a/src/trans/gpu/external/inv_transad.F90 b/src/trans/gpu/external/inv_transad.F90 new file mode 100755 index 0000000..ad1fabe --- /dev/null +++ b/src/trans/gpu/external/inv_transad.F90 @@ -0,0 +1,619 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE INV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL INV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. +! +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTLAD - control of Legendre transform +! FTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, & + & NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE INV_TRANS_CTLAD_MOD ,ONLY : INV_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) + +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('INV_TRANSAD',0,ZHOOK_HANDLE) +CALL GSTATS(1809,0) +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + &('INV_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + + + +! Compute derived variables + + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS("INV_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING") + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS("INV_TRANSAD : PSPVOR TOO SHORT") + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS("INV_TRANSAD : IF_UV > 0 BUT PSPDIV MISSING") + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS("INV_TRANSAD : PSPDIV TOO SHORT") + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + &NPRTRV + CALL ABORT_TRANS('INV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANSAD:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1809,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL INV_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +IF (LHOOK) CALL DR_HOOK('INV_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE INV_TRANSAD + diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 new file mode 100755 index 0000000..f0bc0fa --- /dev/null +++ b/src/trans/gpu/external/setup_trans.F90 @@ -0,0 +1,988 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& + & KFLEV,KTMAX,KRESOL,PWEIGHT,LDGRIDONLY,LDUSERPNM,LDKEEPRPNM,LDUSEFLT,& + & LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,& + & LDLL,LDSHIFTLL,CDIO_LEGPOL,CDLEGPOLFNAME,KLEGPOLPTR,KLEGPOLPTR_LEN) + +!**** *SETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL SETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KDLON - number of points on each Gaussian latitude [2*KDGL] +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! PWEIGHT - the weight per grid-point (for a weighted distribution) +! LDGRIDONLY - true if only grid space is required + +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space + +! LDSPLIT describe the distribution among processors of grid-point data and +! has no relevance if you are using a single processor + +! PSTRET - stretching factor - for the case the Legendre polynomials are +! computed on the stretched sphere - works with LSOUTHPNM +! LDUSEFLT - use Fast Legandre Transform (Butterfly algorithm) +! LDUSERPNM - Use Belusov algorithm to compute legendre pol. (else new alg.) +! LDKEEPRPNM - Keep Legendre Polynomials (only applicable when using +! FLT, otherwise always kept) +! LDPNMONLY - Compute the Legendre polynomials only, not the FFTs. +! LDUSEFFTW - Use FFTW for FFTs +! LDLL - Setup second set of input/output latitudes +! the number of input/output latitudes to transform is equal KDGL +! or KDGL+2 in the case that includes poles + equator +! the number of input/output longitudes to transform is 2*KDGL +! LDSHIFTLL - Shift output lon/lat data by 0.5*dx and 0.5*dy +! CDIO_LEGPOL - IO option on Legendre polinomials : N.B. Only works for NPROC=1 +! Options: +! 'READF' - read Leg.Pol. from file CDLEGPOLFNAME +! 'WRITEF' - write Leg.Pol. to file CDLEGPOLFNAME +! 'MEMBUF' - Leg. Pol provided in shared memory segment pointed to by KLEGPOLPTR of +! length KLEGPOLPTR_LEN +! CDLEGPOLFNAME - file name for Leg.Pol. IO +! KLEGPOLPTR - pointer to Legendre polynomials memory segment +! KLEGPOLPTR_LEN - length of Legendre polynomials memory segment + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- SETUP_DIMS - setup distribution independent dimensions +! SUMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! SUMP_TRANS - Second part of setup of distributed environment +! SUFFT - setup for FFT +! SHAREDMEM_CREATE - create memory buffer for Leg.pol. + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Daan Degrauwe : Mar 2012 E'-zone dimensions +! R. El Khatib 09-Aug-2012 %LAM in GEOM_TYPE +! R. El Khatib 14-Jun-2013 PSTRET, LDPNMONLY, LENABLED +! G. Mozdzynski : Oct 2014 Support f +! N. Wedi : Apr 2015 Support dual set of lat/lon +! G. Mozdzynski : Jun 2015 Support alternative FFTs to FFTW +! M.Hamrud/W.Deconinck : July 2015 IO options for Legenndre polynomials +! R. El Khatib 07-Mar-2016 Better flexibility for Legendre polynomials computation in stretched mode +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB , JPRD +USE PARKIND_ECTRANS ,ONLY : JPRBT +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT,C_ASSOCIATED,C_SIZE_T + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NDEF_RESOL, & + & NMAX_RESOL, NPRINTLEV, LENABLED, NERR +USE TPM_DIM ,ONLY : R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL, R_NNOEXTZL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL,NPROC,nprtrv, D_NUMP,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1, D_NASM0, & +& D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,mysetv,mysetw, MYPROC +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX +USE TPM_FIELDS ,ONLY : FIELDS_RESOL, F,F_RW, F_RN, F_RLAPIN, F_RACTHE, ZIA,ZEPSNM,ZSOA1,ZAOA1,ISTAN,ISTAS,ZAIA,ZOA1,ZOA2, & +& ZAA,ZAS,LDZAA,LDZAS,TDZAA,TDZAS,& +& IZBS,ILDZBA,ILDZBS,ITDZBA0,ITDZBS0,& +& IZCA,IZCS,IZCST,ILDZCA,ILDZCS,ITDZCA0,ITDZCS0,& +& DZBAT,DZBST,DLDZBA,DLDZBS,DTDZBA0,DTDZBS0,& +& DZCAT,DZCST,DLDZCA,DLDZCS,DTDZCA0,DTDZCS0,& +& IF_FS_INV0,IF_FS_DIR0,NFLEV0,ZAA0,DZBST0,DZCAT0,& +& ZAS0,DZCST0,KMLOC0 +! IZBA,IZCAT +USE TPM_FFT ,ONLY : T, FFT_RESOL +USE TPM_FFTH ,ONLY : TC, FFTH_RESOL +USE TPM_FLT +#ifndef REDUCED_MEM +USE TPM_TRANS ,ONLY : FOUBUF_IN, FOUBUF, ZGTF, ZAVE, ZMINGL, ZMAXGL, ZMINGPN, ZMAXGPN +#else +USE TPM_TRANS ,ONLY : FOUBUF_IN, FOUBUF, ZGTF, ZGTFTMP, ZAVE, ZMINGL, ZMAXGL, ZMINGPN, ZMAXGPN +#endif +USE TPM_CTL + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE SETUP_DIMS_MOD ,ONLY : SETUP_DIMS +USE SUMP_TRANS_MOD ,ONLY : SUMP_TRANS +USE SUMP_TRANS_PRELEG_MOD ,ONLY : SUMP_TRANS_PRELEG +USE SULEG_MOD ,ONLY : SULEG +USE PRE_SULEG_MOD ,ONLY : PRE_SULEG +USE SUFFT_MOD ,ONLY : SUFFT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SHAREDMEM_MOD ,ONLY : SHAREDMEM_CREATE +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +!USE CUDA_DEVICE_MOD ! only for NVIDIA GPUs +USE PREPSNM_MOD ,ONLY : PREPSNM +#ifdef _OPENACC +USE OPENACC +#endif +USE OMP_LIB + +!endif INTERFACE + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KSMAX,KDGL +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KDLON +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT):: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSTRET +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KFLEV +LOGICAL ,OPTIONAL,INTENT(IN):: LDGRIDONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFLT +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSERPNM +LOGICAL ,OPTIONAL,INTENT(IN):: LDKEEPRPNM +LOGICAL ,OPTIONAL,INTENT(IN):: LDSPSETUPONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDPNMONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFFTW +LOGICAL ,OPTIONAL,INTENT(IN):: LDLL +LOGICAL ,OPTIONAL,INTENT(IN):: LDSHIFTLL +CHARACTER(LEN=*),OPTIONAL,INTENT(IN):: CDIO_LEGPOL +CHARACTER(LEN=*),OPTIONAL,INTENT(IN):: CDLEGPOLFNAME +TYPE(C_PTR) ,OPTIONAL,INTENT(IN) :: KLEGPOLPTR +INTEGER(C_SIZE_T) ,OPTIONAL,INTENT(IN) :: KLEGPOLPTR_LEN + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM),PARAMETER :: IMAXFLD=240 +INTEGER(KIND=JPIM) :: JGL,JRES,IDEF_RESOL +INTEGER(KIND=JPIM) :: NFLEVL, JMLOC, KM, ILA, ILS, KMLOC, KDGLU, JK, i, J, IF_FS, IF_OUT_LT, IF_UV, IF_SCALARS +INTEGER(KIND=JPIM) :: IPPNUM, IF_PP, IF_FOUBUF + +LOGICAL :: LLP1,LLP2, LLSPSETUPONLY +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +#ifdef _OPENACC +INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE +#endif +INTEGER :: INUMDEVS, IUNIT, ISTAT, IDEV, MYGPU + +#include "user_clock.intfb.h" +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SETUP_TRANS',0,ZHOOK_HANDLE) + +IF(MSETUP0 == 0) THEN + CALL ABORT_TRANS('SETUP_TRANS: SETUP_TRANS0 HAS TO BE CALLED BEFORE SETUP_TRANS') +ENDIF +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS ===' + +! Allocate resolution dependent structures +IF(.NOT. ALLOCATED(DIM_RESOL)) THEN + IDEF_RESOL = 1 + ALLOCATE(DIM_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(GEOM_RESOL(NMAX_RESOL)) + ALLOCATE(DISTR_RESOL(NMAX_RESOL)) + ALLOCATE(FFT_RESOL(NMAX_RESOL)) + ALLOCATE(FFTH_RESOL(NMAX_RESOL)) + ALLOCATE(FLT_RESOL(NMAX_RESOL)) + ALLOCATE(CTL_RESOL(NMAX_RESOL)) + GEOM_RESOL(:)%LAM=.FALSE. + ALLOCATE(LENABLED(NMAX_RESOL)) + LENABLED(:)=.FALSE. +ELSE + IDEF_RESOL = NMAX_RESOL+1 + DO JRES=1,NMAX_RESOL + IF(.NOT.LENABLED(JRES)) THEN + IDEF_RESOL = JRES + EXIT + ENDIF + ENDDO + IF(IDEF_RESOL > NMAX_RESOL) THEN + CALL ABORT_TRANS('SETUP_TRANS:IDEF_RESOL > NMAX_RESOL') + ENDIF +ENDIF + +IF (PRESENT(KRESOL)) THEN + KRESOL=IDEF_RESOL +ENDIF + +! Point at structures due to be initialized +CALL SET_RESOL(IDEF_RESOL,LDSETUP=.TRUE.) + +IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL + + + +! Defaults for optional arguments + + +G%LREDUCED_GRID = .FALSE. +G%RSTRET=1.0_JPRBT +D%LGRIDONLY = .FALSE. +D%LSPLIT = .FALSE. +D%LCPNMONLY=.FALSE. +S%LUSE_BELUSOV=.TRUE. ! use Belusov algorithm to compute RPNM array instead of per m +S%LKEEPRPNM=.FALSE. ! Keep Legendre polonomials (RPNM) +S%LUSEFLT=.FALSE. ! Use fast legendre transforms +LLSPSETUPONLY = .FALSE. ! Only create distributed spectral setup +S%LDLL = .FALSE. ! use mapping to/from second set of latitudes +S%LSHIFTLL = .FALSE. ! shift output lat-lon by 0.5dx, 0.5dy +C%LREAD_LEGPOL = .FALSE. +C%LWRITE_LEGPOL = .FALSE. + + +! NON-OPTIONAL ARGUMENTS +R%NSMAX = KSMAX +R%NDGL = KDGL +! E'-defaults +R%NNOEXTZL=0 +R%NNOEXTZG=0 + +! IMPLICIT argument : +G%LAM = .FALSE. + +IF(PRESENT(KDLON)) THEN + R%NDLON = KDLON +ELSE + R%NDLON = 2*R%NDGL +ENDIF + +IF(PRESENT(LDLL)) THEN + S%LDLL=LDLL + IF( LDLL ) THEN + S%NDLON=R%NDLON + ! account for pole + equator + R%NDGL=R%NDGL+2 + IF(PRESENT(LDSHIFTLL)) THEN + S%LSHIFTLL = LDSHIFTLL + ! geophysical (shifted) lat-lon without pole and equator + IF(S%LSHIFTLL) R%NDGL=R%NDGL-2 + ENDIF + S%NDGL=R%NDGL + ENDIF +ENDIF + +IF (R%NDGL <= 0 .OR. MOD(R%NDGL,2) /= 0) THEN + CALL ABORT_TRANS ('SETUP_TRANS: KDGL IS NOT A POSITIVE, EVEN NUMBER') +ENDIF + +! Optional arguments + +ALLOCATE(G%NLOEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) +IF(PRESENT(KLOEN)) THEN + IF( MINVAL(KLOEN(:)) <= 0 )THEN + CALL ABORT_TRANS ('SETUP_TRANS: KLOEN INVALID (ONE or MORE POINTS <= 0)') + ENDIF + R%NDLON=MAXVAL(KLOEN(:)) + DO JGL=1,R%NDGL + IF(KLOEN(JGL) /= R%NDLON) THEN + G%LREDUCED_GRID = .TRUE. + EXIT + ENDIF + ENDDO +ENDIF + +IF (G%LREDUCED_GRID) THEN + G%NLOEN(:) = KLOEN(1:R%NDGL) +ELSE + G%NLOEN(:) = R%NDLON +ENDIF + +IF(PRESENT(LDSPLIT)) THEN + D%LSPLIT = LDSPLIT +ENDIF + +IF(PRESENT(KTMAX)) THEN + R%NTMAX = KTMAX +ELSE + R%NTMAX = R%NSMAX +ENDIF + +IF(PRESENT(PWEIGHT)) THEN + D%LWEIGHTED_DISTR = .TRUE. + IF( D%LWEIGHTED_DISTR .AND. .NOT.D%LSPLIT )THEN + CALL ABORT_TRANS('SETUP_TRANS: LWEIGHTED_DISTR=T AND LSPLIT=F NOT SUPPORTED') + ENDIF + IF(SIZE(PWEIGHT) /= SUM(G%NLOEN(:)) )THEN + CALL ABORT_TRANS('SETUP_TRANS:SIZE(PWEIGHT) /= SUM(G%NLOEN(:))') + ENDIF + IF( MINVAL(PWEIGHT(:)) < 0.0_JPRBT )THEN + CALL ABORT_TRANS('SETUP_TRANS: INVALID WEIGHTS') + ENDIF + ALLOCATE(D%RWEIGHT(SIZE(PWEIGHT))) + D%RWEIGHT(:)=PWEIGHT(:) +ELSE + D%LWEIGHTED_DISTR = .FALSE. +ENDIF + +IF(PRESENT(LDGRIDONLY)) THEN + D%LGRIDONLY=LDGRIDONLY +ENDIF + +IF(PRESENT(LDSPSETUPONLY)) THEN + LLSPSETUPONLY=LDSPSETUPONLY +ENDIF + +IF(PRESENT(LDPNMONLY)) THEN + D%LCPNMONLY=LDPNMONLY +ENDIF + + +S%LSOUTHPNM=.FALSE. +IF(PRESENT(PSTRET)) THEN + IF (ABS(PSTRET-1.0_JPRBT)>100._JPRBT*EPSILON(1._JPRBT)) THEN + G%RSTRET=PSTRET + S%LSOUTHPNM=.TRUE. + ENDIF +ENDIF + +IF(PRESENT(CDIO_LEGPOL)) THEN + IF(NPROC > 1) CALL ABORT_TRANS('SETUP_TRANS:CDIO_LEGPOL OPTIONS ONLY FOR NPROC=1 ') + IF(R%NSMAX > 511 ) S%LUSEFLT = .TRUE. !To save IO and memory + IF(TRIM(CDIO_LEGPOL) == 'readf' .OR. TRIM(CDIO_LEGPOL) == 'READF' ) THEN + IF(.NOT.PRESENT(CDLEGPOLFNAME)) CALL ABORT_TRANS('SETUP_TRANS: CDLEGPOLFNAME ARGUMENT MISSING') + C%LREAD_LEGPOL = .TRUE. + C%CLEGPOLFNAME = TRIM(CDLEGPOLFNAME) + C%CIO_TYPE='file' + ELSEIF(TRIM(CDIO_LEGPOL) == 'writef' .OR. TRIM(CDIO_LEGPOL) == 'WRITEF') THEN + IF(.NOT.PRESENT(CDLEGPOLFNAME)) CALL ABORT_TRANS('SETUP_TRANS: CDLEGPOLFNAME ARGUMENT MISSING') + C%LWRITE_LEGPOL = .TRUE. + C%CLEGPOLFNAME = TRIM(CDLEGPOLFNAME) + C%CIO_TYPE='file' + ELSEIF(TRIM(CDIO_LEGPOL) == 'membuf' .OR. TRIM(CDIO_LEGPOL) == 'MEMBUF') THEN + IF(.NOT.PRESENT(KLEGPOLPTR)) CALL ABORT_TRANS('SETUP_TRANS: KLEGPOLPTR ARGUMENT MISSING') + IF(.NOT.C_ASSOCIATED(KLEGPOLPTR)) CALL ABORT_TRANS('SETUP_TRANS: KLEGPOLPTR NULL POINTER') + IF(.NOT.PRESENT(KLEGPOLPTR_LEN)) CALL ABORT_TRANS('SETUP_TRANS: KLEGPOLPTR_LEN ARGUMENT MISSING') + C%LREAD_LEGPOL = .TRUE. + C%CIO_TYPE='mbuf' + CALL SHAREDMEM_CREATE( C%STORAGE,KLEGPOLPTR,KLEGPOLPTR_LEN) + ELSE + WRITE(NERR,*) 'CDIO_LEGPOL ', TRIM(CDIO_LEGPOL) + CALL ABORT_TRANS('SETUP_TRANS:CDIO_LEGPOL UNKNOWN METHOD ') + ENDIF +ENDIF + +IF(PRESENT(LDUSEFLT)) THEN + S%LUSEFLT=LDUSEFLT +ENDIF +IF(PRESENT(LDUSERPNM)) THEN + S%LUSE_BELUSOV=LDUSERPNM +ENDIF +IF(PRESENT(LDKEEPRPNM)) THEN + IF(S%LUSEFLT) THEN + IF(LDKEEPRPNM.AND..NOT.LDUSERPNM) THEN + CALL ABORT_TRANS('SETUP_TRANS: LDKEEPRPNM=true with LDUSERPNM=false') + ENDIF + ENDIF + S%LKEEPRPNM=LDKEEPRPNM +ENDIF +! Setup resolution dependent structures +! ------------------------------------- + +! Setup distribution independent dimensions +CALL SETUP_DIMS + +! First part of setup of distributed environment +CALL SUMP_TRANS_PRELEG + +IF( .NOT.LLSPSETUPONLY ) THEN + +! Compute Legendre polonomial and Gaussian Latitudes and Weights + CALL SULEG + +! Second part of setup of distributed environment + CALL SUMP_TRANS + CALL GSTATS(1802,0) + +! Initialize Fast Fourier Transform package + IF (.NOT.D%LCPNMONLY) CALL SUFFT + CALL GSTATS(1802,1) +ELSE + CALL PRE_SULEG +ENDIF + +! Signal the current resolution is active +LENABLED(IDEF_RESOL)=.TRUE. +NDEF_RESOL = COUNT(LENABLED) + +IF (LHOOK) CALL DR_HOOK('SETUP_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +IF( .NOT.D%LGRIDONLY ) THEN + +!allocating arrays for the GPU: +IF(PRESENT(KFLEV)) THEN + NFLEV0 = KFLEV +! NFLEVL = NFLEV0/NPRTRV +ELSE + NFLEV0 = ceiling(REAL(IMAXFLD)/NPRTRV) +ENDIF + +! need to get local rank to be able to set device (1GPU == 1 MPI-rank) +!ilocal_rank = 0 +!call GETENV("OMPI_COMM_WORLD_LOCAL_RANK",comm_local_rank) +!read(comm_local_rank,'(I2)') ilocal_rank + +IUNIT=300+MYPROC + +#ifdef _OPENACC +!!IDEVTYPE=ACC_DEVICE_NVIDIA +IDEVTYPE=ACC_GET_DEVICE_TYPE() +INUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) +MYGPU = MOD(MYPROC-1,INUMDEVS) +CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) +MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) +!ISTAT = CUDA_GETDEVICE(IDEV) +#endif + +!dimensions of matrices for Legendre Transforms for RAPS ? +!IF_OUT_LT = 5*NFLEV0+2 +!IF_FS = 6*NFLEV0+3 + +! add additional post-processing requirements +!IF_PP = 2*NFLEV0 +IF_PP = 0 + +! u/v + scalars 3d + scalars 2d +IF_UV = NFLEV0 +! SCALARS INCLUDING DERIVATIVES +IF_SCALARS = NFLEV0 + 2*NFLEV0 + 1 + 2 +IF_OUT_LT = 4*IF_UV+3*NFLEV0+3 +!IF_OUT_LT = 4*IF_UV+3*NFLEV0+3 +!8*KF_UV+2*KF_SCALARS +!ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IF_FS_INV0=8*IF_UV+2*IF_SCALARS + +! fields in Fourier space for inv trans the same +!IF_FS=4*IF_UV+1*NFLEV0+2 +IF_FS=4*IF_UV+1*NFLEV0+2 +! for derivatives u/v add +!IF_FS=IFS_FS+2*(2*NFLEV0) +! for each 3d scalar derivative add +IF_FS=IF_FS+2*NFLEV0 ! temperature +! for each 2d scalar derivative add +IF_FS=IF_FS+2 ! sfc pressure +IF_FS=IF_FS+IF_PP + +! u/v + scalars for direct transforms +! plus postprocessing buffer +!ippnum=NFLEV0 +IF_FS_DIR0=2*(2*IF_UV+NFLEV0+2+IF_PP) +!QUESTION: Why do we have NFLEV0 here? (Andreas) + +! fields in Fourier space for dir trans +!IF_FS = 2*IF_UV + IF_SCALARS +! plus add 2*scalar_derivatives + add vorg/divg + 2*IF_UV for u/v zonal derivatives + +WRITE(NOUT,*)'setup_trans: if_uv=',IF_UV,' if_out_lt=',IF_OUT_LT,' IF_FS_DIR0=',IF_FS_DIR0,'IF_FS_INV0= ',IF_FS_INV0, ' IF_PP= ',IF_PP +IF(MOD(IF_FS,2)==1) IF_FS = IF_FS + 1 + +!leading and trailing dimensions of A for symmetric and antisymmetric cases +! (same for ltinv and ltdir) +LDZAA=R%NDGNH +LDZAS=R%NDGNH +TDZAA=(R%NTMAX+2)/2 +TDZAS=(R%NTMAX+3)/2 +print*,'R%NTMAX=',R%NTMAX +print*,'R%NSMAX=',R%NSMAX +!similarly for B (ltinv) +ILDZBA=(R%NSMAX+2)/2 +ILDZBS=(R%NSMAX+3)/2 +ITDZBA0=IF_FS_INV0 +ITDZBS0=IF_FS_INV0 + +!similarly for C (ltinv) +ILDZCA=R%NDGNH +ILDZCS=R%NDGNH +ITDZCA0=IF_FS_INV0 +ITDZCS0=IF_FS_INV0 + +!similarly for B (ltdir) +DLDZBA=R%NDGNH +DLDZBS=R%NDGNH +DTDZBA0=IF_FS_DIR0 +DTDZBS0=IF_FS_DIR0 + +!similarly for C (ltdir) +DLDZCA=(R%NTMAX+2)/2 +DLDZCS=(R%NTMAX+3)/2 +DTDZCA0=IF_FS_DIR0 +DTDZCS0=IF_FS_DIR0 + +! competition: NPRTRV ... larger == NUMP ... larger == NSMAX/NPRTRW +! setting NPRTRV=20 ... leads to 7GB ZAA since NUMP==55 + +!allocate matrices for matrix multiplications +ALLOCATE(IZBS(IF_FS_INV0*TDZAS*D%NUMP)) +PRINT*,"New: allocating IZBS as a 1D array!" +! just use IZBS +!IZBA=>IZBS(:,1:TDZAA,:) + +ALLOCATE(ZAA(R%NDGNH,TDZAA,D%NUMP)) +ALLOCATE(ZAS(R%NDGNH,TDZAS,D%NUMP)) + +! Allocate matrices for rescaling to allow half-precision Legendre transforms +!ALLOCATE(ZAMAX(IF_FS_INV0,D%NUMP)) +!ALLOCATE(ZSMAX(IF_FS_INV0,D%NUMP)) + +! transpose of C (for better memory access patterns) +!ALLOCATE(IZCAT(IF_FS_INV0,R%NDGNH,D%NUMP)) +ALLOCATE(IZCST(IF_FS_INV0*R%NDGNH*D%NUMP)) + +!ALLOCATE(DZBAT(IF_FS_DIR0,R%NDGNH,D%NUMP)) +ALLOCATE(DZBST(IF_FS_DIR0*R%NDGNH*D%NUMP)) + +! transpose of C (for better memory access patterns) +ALLOCATE(DZCAT(IF_FS_DIR0*TDZAA*D%NUMP)) +ALLOCATE(DZCST(IF_FS_DIR0*TDZAS*D%NUMP)) +!DZCAT=>DZCST(:,1:TDZAA,:) + +WRITE(NOUT,*)'setup_trans: sizes1 NUMP=',D%NUMP +WRITE(NOUT,*)'ZAS:',SIZE(ZAS) +WRITE(NOUT,*)'IZBS :',SIZE(IZBS ) +WRITE(NOUT,*)'IZCST:',SIZE(IZCST) +WRITE(NOUT,*)'DZBST:',SIZE(DZBST) +WRITE(NOUT,*)'DZCST:',SIZE(DZCST) +WRITE(NOUT,*)'DZCAT:',SIZE(DZCAT) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZAA,ZAS,IZBS,IZCST,DZBST,DZCST,DZCAT) & +!$ACC& COPYIN(F,S,D,R,G,TDZAA,ILDZCA,ILDZBS, ILDZCS) & +!!$ACC& COPYIN(D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,G%NMEN,D%NPROCM,D%NPTRLS) & +!!$ACC& COPYIN(F%RACTHE,F%RN,F%RLAPIN,G,G%NLOEN,D%MSTABF,R%NNOEXTZL) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(ALLOC:ZAA,ZAS,IZBS,IZCST,DZBST,DZCST,DZCAT) +!$OMP TARGET ENTER DATA MAP(TO:F,S,D,D_NUMP,D_MYMS,R,R_NDGNH,R_NSMAX,G,G_NDGLU) +!$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB0,D_NPNTGTB1,D_NSTAGT0B,D_NSTAGT1B,D_NSTAGTF,G_NMEN,D_NPROCM,D_NPTRLS,G,G_NLOEN,D_MSTABF) +#endif + +! Initialize A arrays + +IZCST(:) = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(IZCST) +#endif +DZCAT(:) = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(DZCAT) +#endif +DZCST(:) = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(DZCST) +#endif + +IZBS = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(IZBS) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(IZBS) +#endif +DZBST = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(DZBST) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(DZBST) +#endif + +! Do this on the host +ZAA(:,:,:) = 0._JPRBT +ZAS(:,:,:) = 0._JPRBT + +DO JMLOC=1,D%NUMP + KM = D%MYMS(JMLOC) + KDGLU = MIN(R%NDGNH,G%NDGLU(KM)) + + ILA = (R%NSMAX-KM+2)/2 + DO JK=1,KDGLU + DO J=1,ILA + ZAA(JK,J,JMLOC)=S%FA(JMLOC)%RPNMA(JK,J) + ENDDO + ENDDO +ENDDO + +DO JMLOC=1,D%NUMP + KM = D%MYMS(JMLOC) + KDGLU = MIN(R%NDGNH,G%NDGLU(KM)) + + ILS = (R%NSMAX-KM+3)/2 + DO JK=1,KDGLU + DO J=1,ILS + ZAS(JK,J,JMLOC)=S%FA(JMLOC)%RPNMS(JK,J) + ENDDO + ENDDO +ENDDO + +! permanent copy of Legendre polynomials into device + +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZAA) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZAA) +#endif +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZAS) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZAS) +#endif + +IF_FOUBUF=MAX(IF_OUT_LT,IF_FS) +ALLOCATE(FOUBUF_IN(MAX(1,D%NLENGT0B*2*IF_FOUBUF)))! +ALLOCATE(FOUBUF(MAX(1,D%NLENGT0B*2*IF_FOUBUF))) +! memory save + +ALLOCATE(ZGTF(2*IF_FS,D%NLENGTF)) +WRITE(NOUT,*)'ZGTF :',SIZE(ZGTF) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZGTF) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(PRESENT,ALLOC:ZGTF) +#endif +#ifdef REDUCED_MEM +ALLOCATE(ZGTFTMP(2*IF_FS,D%NLENGTF)) +WRITE(NOUT,*)'ZGTFTMP :',SIZE(ZGTF) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZGTFTMP) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(PRESENT,ALLOC:ZGTFTMP) +#endif +#endif + +ALLOCATE(ZIA(IF_FS_INV0,R%NLEI1,D%NUMP)) +ALLOCATE(ZEPSNM(D%NUMP,0:R%NTMAX+2)) +ALLOCATE(ZSOA1(2*IF_OUT_LT,R%NLEI3,D%NUMP)) +ALLOCATE(ZAOA1(2*IF_OUT_LT,R%NLEI3,D%NUMP)) +ALLOCATE(ISTAN(D%NUMP,R%NDGNH)) +ALLOCATE(ISTAS(D%NUMP,R%NDGNH)) +!ALLOCATE(ZSIA(IF_FS_INV0,R%NDGNH,D%NUMP)) +ALLOCATE(ZAIA(IF_FS_INV0,R%NDGNH,D%NUMP)) +ALLOCATE(ZOA1(4*IF_FS_DIR0,R%NLED4,D%NUMP)) +ALLOCATE(ZOA2(MAX(4*IF_UV,1),R%NLED4,D%NUMP)) +WRITE(NOUT,*)'ZIA :',size(ZIA ) +WRITE(NOUT,*)'ZSOA1:',size(ZSOA1) +WRITE(NOUT,*)'ZAOA1:',size(ZAOA1) +WRITE(NOUT,*)'ZAIA :',size(ZAIA ) +WRITE(NOUT,*)'ZOA1 :',size(ZOA1 ) +WRITE(NOUT,*)'ZOA2 :',size(ZOA2 ) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZIA,ZEPSNM,ZSOA1,ZAOA1,ZAIA,ZOA1,ZOA2) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(PRESENT,ALLOC:ZIA,ZEPSNM,ZSOA1,ZAOA1,ZAIA,ZOA1,ZOA2) +#endif + +ZEPSNM = 0._JPRBT +! on the host +CALL PREPSNM +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZEPSNM) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZEPSNM) +#endif +ZGTF = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZGTF) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZGTF) +#endif +ZIA = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZIA) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZIA) +#endif +!ZSIA = 0._JPRBT +!!!$ACC UPDATE DEVICE(ZSIA) +!!!$OMP TARGET UPDATE TO(ZSIA) +ZAIA = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZAIA) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZAIA) +#endif +ZOA1 = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZOA1) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZOA1) +#endif +ZOA2 = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZOA2) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZOA2) +#endif +ZAOA1 = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZAOA1) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZAOA1) +#endif +ZSOA1 = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZSOA1) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZSOA1) +#endif + +! add arrays for GPNORM1 +ALLOCATE(ZAVE(IF_FS,R%NDGL)) +ALLOCATE(ZMINGL(IF_FS,R%NDGL)) +ALLOCATE(ZMAXGL(IF_FS,R%NDGL)) +ALLOCATE(ZMINGPN(IF_FS)) +ALLOCATE(ZMAXGPN(IF_FS)) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(PRESENT,ALLOC:ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) +#endif + +ZAVE = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZAVE) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZAVE) +#endif +ZMINGL = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZMINGL) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZMINGL) +#endif +ZMAXGL = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZMAXGL) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZMAXGL) +#endif +ZMINGPN = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZMINGPN) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZMINGPN) +#endif +ZMAXGPN = 0._JPRBT +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZMAXGPN) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZMAXGPN) +#endif + +!set up flat copies of constant data +R_NSMAX=R%NSMAX +R_NTMAX=R%NTMAX +R_NDGNH=R%NDGNH +R_NDGL=R%NDGL +R_NNOEXTZL=R%NNOEXTZL + + +ALLOCATE(D_NSTAGT0B(SIZE(D%NSTAGT0B))) +ALLOCATE(D_NSTAGT1B(SIZE(D%NSTAGT1B))) +ALLOCATE(D_NPNTGTB0(0:SIZE(D%NPNTGTB0,1)-1,SIZE(D%NPNTGTB0,2))) +ALLOCATE(D_NPNTGTB1(SIZE(D%NPNTGTB1,1),SIZE(D%NPNTGTB1,2))) +ALLOCATE(D_MYMS(SIZE(D%MYMS))) +ALLOCATE(D_NPROCL(SIZE(D%NPROCL))) +ALLOCATE(D_NASM0(0:SIZE(D%NASM0)-1)) +ALLOCATE(D_NSTAGTF(SIZE(D%NSTAGTF))) +ALLOCATE(D_MSTABF(SIZE(D%MSTABF))) +ALLOCATE(D_NPROCM(0:SIZE(D%NPROCM)-1)) +ALLOCATE(D_NPTRLS(SIZE(D%NPTRLS))) + +ALLOCATE(G_NDGLU(0:SIZE(G%NDGLU)-1)) +ALLOCATE(G_NMEN(SIZE(G%NMEN))) +ALLOCATE(G_NLOEN(SIZE(G%NLOEN))) + +ALLOCATE(F_RW(SIZE(F%RW))) +ALLOCATE(F_RN(SIZE(F%RN))) +ALLOCATE(F_RLAPIN(SIZE(F%RLAPIN))) +ALLOCATE(F_RACTHE(SIZE(F%RACTHE))) + + +DO I=0,SIZE(G%NDGLU)-1 + G_NDGLU(I)=G%NDGLU(I) +END DO + +G_NMEN_MAX=0 +DO I=1,SIZE(G%NMEN) + G_NMEN(I)=G%NMEN(I) + IF (G_NMEN(I) .GT. G_NMEN_MAX) G_NMEN_MAX=G_NMEN(I) +END DO + +G_NLOEN_MAX=0 +DO I=1,SIZE(G%NLOEN) + G_NLOEN(I)=G%NLOEN(I) + IF (G_NLOEN(I) .GT. G_NLOEN_MAX) G_NLOEN_MAX=G_NLOEN(I) +END DO + +DO I=1,SIZE(D%NSTAGT0B) + D_NSTAGT0B(I)=D%NSTAGT0B(I) +END DO + +DO I=1,SIZE(D%NSTAGT1B) + D_NSTAGT1B(I)=D%NSTAGT1B(I) +END DO + +DO I=1,SIZE(D%NPROCL) + D_NPROCL(I)=D%NPROCL(I) +END DO + +DO I=0,SIZE(D%NASM0)-1 + D_NASM0(I)=D%NASM0(I) +END DO + +DO I=1,SIZE(D%NSTAGTF) + D_NSTAGTF(I)=D%NSTAGTF(I) +END DO + +DO I=1,SIZE(D%MSTABF) + D_MSTABF(I)=D%MSTABF(I) +END DO + +DO I=0,SIZE(D%NPROCM)-1 + D_NPROCM(I)=D%NPROCM(I) +END DO + +DO I=1,SIZE(D%NPTRLS) + D_NPTRLS(I)=D%NPTRLS(I) +END DO + +DO I=1,SIZE(D%NPNTGTB0,2) + DO J=0,SIZE(D%NPNTGTB0,1)-1 + D_NPNTGTB0(J,I)=D%NPNTGTB0(J,I) + END DO +END DO + +DO I=1,SIZE(D%NPNTGTB1,2) + DO J=1,SIZE(D%NPNTGTB1,1) + D_NPNTGTB1(J,I)=D%NPNTGTB1(J,I) + END DO +END DO + +D_NUMP=D%NUMP + +KMLOC0 = -1 +DO I=1,SIZE(D%MYMS) + D_MYMS(I)=D%MYMS(I) + IF(D_MYMS(I) == 0) KMLOC0 = I +end DO + +! arrays for m=0 in ledir_mod: +IF(KMLOC0 >= 0) THEN + ALLOCATE(ZAA0(R%NDGNH,TDZAA)) + ALLOCATE(ZAS0(R%NDGNH,TDZAS)) + ALLOCATE(DZBST0(IF_FS_DIR0*R%NDGNH)) + ALLOCATE(DZCAT0(IF_FS_DIR0*TDZAA)) + ALLOCATE(DZCST0(IF_FS_DIR0*TDZAS)) + DZCAT0(:) = 0._JPRD + DZCST0(:) = 0._JPRD + ZAA0 = ZAA(:,:,KMLOC0) + ZAS0 = ZAS(:,:,KMLOC0) + DZBST0 = 0._JPRD +#ifdef ACCGPU + !$ACC ENTER DATA COPYIN(ZAA0,DZBST0,DZCAT0,ZAS0,DZCST0,KMLOC0) +#endif +#ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(TO:ZAA0,DZBST0,DZCAT0,ZAS0,DZCST0) +#endif + WRITE(NOUT,*) 'GPU arrays for m=0 successfully allocated' +#ifdef ACCGPU + WRITE(NOUT,*) 'Using OpenACC' +#endif +#ifdef OMPGPU + WRITE(NOUT,*) 'Using OpenMP offloading' +#endif +ENDIF + +DO I=1,SIZE(F%RW) + F_RW(I)=F%RW(I) +END DO +DO I=1,SIZE(F%RLAPIN) + F_RLAPIN(I)=F%RLAPIN(I) +END DO +DO I=1,SIZE(F%RLAPIN) + F_RACTHE(I)=F%RACTHE(I) +END DO +DO I=1,SIZE(F%RN) + F_RN(I)=F%RN(I) +END DO + +#ifdef ACCGPU +!$ACC ENTER DATA COPYIN(R_NSMAX,R_NTMAX,R_NDGL,R_NNOEXTZL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,& +!$ACC& D_NPNTGTB1,D_NPROCL,D_NUMP,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF,& +!$ACC& D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,& +!$ACC& G_NLOEN_MAX,F_RW,F_RLAPIN,F_RN,F_RACTHE) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(TO:R_NSMAX,R_NTMAX,R_NDGL,R_NNOEXTZL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B) +!$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB1,D_NPROCL,D_NUMP,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF) +!$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN) +!$OMP TARGET ENTER DATA MAP(TO:G_NLOEN_MAX,F_RW,F_RLAPIN,F_RN,F_RACTHE) +#endif + +WRITE(NOUT,*) '===GPU arrays successfully allocated' +#ifdef ACCGPU +!$ACC wait +#endif +#ifdef OMPGPU +!$OMP BARRIER +#endif + +! free memory +!DO JMLOC=1,D%NUMP +! DEALLOCATE(S%FA(JMLOC)%RPNMA) +! DEALLOCATE(S%FA(JMLOC)%RPNMS) +!ENDDO + +!endif INTERFACE + +ENDIF + +END SUBROUTINE SETUP_TRANS diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 new file mode 100755 index 0000000..24acc8c --- /dev/null +++ b/src/trans/gpu/external/setup_trans0.F90 @@ -0,0 +1,257 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& +& KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN,& +& LDMPOFF,LDSYNC_TRANS,KTRANS_SYNC_LEVEL,& +& LDEQ_REGIONS,K_REGIONS_NS,K_REGIONS_EW,K_REGIONS,& +& PRAD,LDALLOPERM) + +!**** *SETUP_TRANS0* - General setup routine for transform package + +! Purpose. +! -------- +! Resolution independent part of setup of transform package +! Has to be called BEFORE SETUP_TRANS + +!** Interface. +! ---------- +! CALL SETUP_TRANS0(...) + +! Explicit arguments : All arguments are optional, [..] default value +! ------------------- +! KOUT - Unit number for listing output [6] +! KERR - Unit number for error messages [0] +! KPRINTLEV - level of output to KOUT, 0->no output,1->normal,2->debug [0] +! KMAX_RESOL - maximum number of different resolutions for this run [1] +! KPRGPNS - splitting level in N-S direction in grid-point space [1] +! KPRGPEW - splitting level in E-W direction in grid-point space [1] +! KPRTRW - splitting level in wave direction in spectral space [1] +! KCOMBFLEN - Size of communication buffer [1800000 (*8bytes) ] +! LDMPOFF - switch off message passing [false] +! LDSYNC_TRANS - switch to activate barriers in trmtol trltom [false] +! KTRANS_SYNC_LEVEL - use of synchronization/blocking [0] +! LDEQ_REGIONS - true if new eq_regions partitioning [false] +! K_REGIONS - Number of regions (1D or 2D partitioning) +! K_REGIONS_NS - Maximum number of NS partitions +! K_REGIONS_EW - Maximum number of EW partitions +! PRAD - Radius of the planet +! LDALLOPERM - Allocate certain arrays permanently +! The total number of (MPI)-processors has to be equal to KPRGPNS*KPRGPEW + +! Method. +! ------- + +! Externals. SUMP_TRANS0 - initial setup routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! R. El Khatib 03-01-24 LDMPOFF +! G. Mozdzynski 2006-09-13 LDEQ_REGIONS +! N. Wedi 2009-11-30 add radius + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, LMPOFF, LSYNC_TRANS, NTRANS_SYNC_LEVEL, MSETUP0, & + & NMAX_RESOL, NPRINTLEV, NPROMATR, LALLOPERM +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NCOMBFLEN, NPRGPEW,NPRGPNS, NPRTRW, NPRTRV, MYSETV +USE TPM_CONSTANTS ,ONLY : RA +USE MPL_MODULE + +USE SUMP_TRANS0_MOD ,ONLY : SUMP_TRANS0 +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE EC_ENV_MOD ,ONLY : EC_GETENV +#ifdef _OPENACC +USE OPENACC +#endif + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN +LOGICAL ,OPTIONAL,INTENT(IN) :: LDMPOFF +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSYNC_TRANS +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTRANS_SYNC_LEVEL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDALLOPERM +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PRAD +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS(:) +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_NS +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_EW + +INTEGER(KIND=JPIM) :: MYPROC +INTEGER :: IDEVICE_NUM, IPROC_PERNODE +#ifdef _OPENACC +INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE, IDEVICE_TYPE +#endif +INTEGER :: NUMDEVS, IERROR, MYGPU +CHARACTER(LEN=2) :: CL_NPROC_PERNODE + +!ifndef INTERFACE + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +MYPROC = MPL_MYRANK() + +#ifdef _OPENACC +IDEVTYPE=ACC_GET_DEVICE_TYPE() +NUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) +MYGPU = MOD(MYPROC-1,NUMDEVS) +CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) +MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) +!WRITE(*,*) 'MYPROC:',MYPROC, 'GPU:', MYGPU, 'of ', NUMDEVS + +CL_NPROC_PERNODE=' ' +CALL EC_GETENV('NPROC_PERNODE',CL_NPROC_PERNODE) +IF( CL_NPROC_PERNODE /= ' ')THEN + READ(CL_NPROC_PERNODE,*) IPROC_PERNODE + IDEVICE_NUM=MOD(MYPROC-1,IPROC_PERNODE) + WRITE(0,'("TRANSFORM TEST: MYPROC=",I8," CL_NPROC_PERNODE=",A," IPROC_PERNODE=",I2,& + & " IDEVICE_NUM=",I2)') MYPROC,CL_NPROC_PERNODE,IPROC_PERNODE,IDEVICE_NUM + IDEVICE_TYPE=0 + !!CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,ACC_DEVICE_NVIDIA) + CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,IDEVTYPE) + !!CALL ACC_INIT(ACC_DEVICE_NVIDIA) + CALL ACC_INIT(IDEVTYPE) + !$OMP PARALLEL + !!CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,ACC_DEVICE_NVIDIA) + CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,IDEVTYPE) + !!CALL ACC_INIT(ACC_DEVICE_NVIDIA) + CALL ACC_INIT(IDEVTYPE) +!$OMP END PARALLEL +ENDIF +#endif + +IF(MSETUP0 /= 0) THEN +!gr CALL ABORT_TRANS('SETUP_TRANS0: SETUP_TRANS0 MAY ONLY BE CALLED ONCE') +ENDIF + +! Default values + +NOUT = 6 +NERR = 0 +NPRINTLEV = 0 +NMAX_RESOL = 1 +NPRGPNS = 1 +NPRGPEW = 1 +NPRTRW = 1 +N_REGIONS_NS=1 +N_REGIONS_EW=1 +NPROMATR = 0 +NCOMBFLEN = 1800000 +LMPOFF = .FALSE. +LSYNC_TRANS=.FALSE. +NTRANS_SYNC_LEVEL=0 +LEQ_REGIONS=.FALSE. +RA=6371229._JPRB +LALLOPERM=.FALSE. + +! Optional arguments + +IF(PRESENT(KOUT)) THEN + NOUT = KOUT +ENDIF +IF(PRESENT(KERR)) THEN + NERR = KERR +ENDIF +IF(PRESENT(KPRINTLEV)) THEN + NPRINTLEV = KPRINTLEV +ENDIF + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS0 ===' + +IF(PRESENT(KMAX_RESOL))THEN + NMAX_RESOL = KMAX_RESOL +ENDIF +IF(PRESENT(KPROMATR))THEN + IF(MOD(KPROMATR,2) /= 0) THEN + CALL ABORT_TRANS('SETUP_TRANS0: KPROMATR HAS TO BE MULTIPLE OF 2') + ENDIF + NPROMATR = KPROMATR +ENDIF +IF(PRESENT(KPRGPNS)) THEN + NPRGPNS = KPRGPNS +ENDIF +IF(PRESENT(KPRGPEW)) THEN + NPRGPEW = KPRGPEW +ENDIF +IF(PRESENT(KPRTRW)) THEN + NPRTRW = KPRTRW +ENDIF +IF(PRESENT(KCOMBFLEN)) THEN + NCOMBFLEN = KCOMBFLEN +ENDIF +IF(PRESENT(LDMPOFF)) THEN + LMPOFF = LDMPOFF +ENDIF +IF(PRESENT(LDSYNC_TRANS)) THEN + LSYNC_TRANS = LDSYNC_TRANS +ENDIF +IF(PRESENT(KTRANS_SYNC_LEVEL)) THEN + NTRANS_SYNC_LEVEL = KTRANS_SYNC_LEVEL +ENDIF +IF(PRESENT(LDEQ_REGIONS)) THEN + LEQ_REGIONS = LDEQ_REGIONS +ENDIF + +! Initial setup +CALL SUMP_TRANS0 + +IF(PRESENT(K_REGIONS_NS)) THEN + K_REGIONS_NS = N_REGIONS_NS +ENDIF + +IF(PRESENT(K_REGIONS_EW)) THEN + K_REGIONS_EW = N_REGIONS_EW +ENDIF + +IF(PRESENT(K_REGIONS)) THEN + IF(UBOUND(K_REGIONS,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('SETUP_TRANS0: K_REGIONS TOO SMALL') + ELSE + K_REGIONS(1:N_REGIONS_NS)=N_REGIONS(1:N_REGIONS_NS) + ENDIF +ENDIF + +IF(PRESENT(PRAD)) THEN + RA=PRAD +ENDIF + +IF(PRESENT(LDALLOPERM)) THEN + LALLOPERM=LDALLOPERM +ENDIF + +! Setup level 0 complete +MSETUP0 = 1 + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE SETUP_TRANS0 + + diff --git a/src/trans/gpu/external/specnorm.F90 b/src/trans/gpu/external/specnorm.F90 new file mode 100755 index 0000000..50f8a4a --- /dev/null +++ b/src/trans/gpu/external/specnorm.F90 @@ -0,0 +1,144 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *SPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL SPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- SPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE SPNORM_CTL_MOD ,ONLY : SPNORM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IMASTER,IFLD,IFLD_G,J + +! ------------------------------------------------------------------ + +! Set current resolution +CALL SET_RESOL(KRESOL) + +! Set defaults +IMASTER = 1 +IFLD = 0 + + +IF(PRESENT(KMASTER)) THEN + IMASTER = KMASTER +ENDIF + +IF(PRESENT(KVSET)) THEN + IFLD_G = UBOUND(KVSET,1) + DO J=1,IFLD_G + IF(KVSET(J) > NPRTRV) THEN + WRITE(NERR,*) 'SPECNORM:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('SPECNORM:KVSET TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFLD = IFLD+1 + ENDIF + ENDDO +ELSE + IF(PRESENT(PSPEC)) THEN + IFLD = UBOUND(PSPEC,1) + ENDIF + IFLD_G = IFLD +ENDIF + +IF(NPRTRV >1) THEN + IF(IFLD > 0 .AND. .NOT. PRESENT(KVSET)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IFLD > 0 AND NOT PRESENT(KVSET)',& + &NPRTRV,IFLD + CALL ABORT_TRANS('SPECNORM: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF +IF(MYPROC == IMASTER) THEN + IF(.NOT. PRESENT(PNORM)) THEN + CALL ABORT_TRANS('SPECNORM: PNORM NOT PRESENT') + ENDIF + IF(UBOUND(PNORM,1) < IFLD_G) THEN + CALL ABORT_TRANS('SPECNORM: PNORM TOO SMALL') + ENDIF +ENDIF +IF(IFLD > 0 ) THEN + IF(.NOT. PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('SPECNORM: PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,1) < IFLD) THEN + CALL ABORT_TRANS('SPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,2) < D%NSPEC2) THEN + CALL ABORT_TRANS('SPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL SPNORM_CTL(PSPEC,IFLD,IFLD_G,KVSET,IMASTER,PMET,PNORM) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE SPECNORM + diff --git a/src/trans/gpu/external/sugawc.F90 b/src/trans/gpu/external/sugawc.F90 new file mode 100755 index 0000000..7f90637 --- /dev/null +++ b/src/trans/gpu/external/sugawc.F90 @@ -0,0 +1,102 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE SUGAWC(KDGLG,PMU,PW) + +!**** *SUGAWC* - Compute Gaussian latitudes and weights + +! Purpose. +! -------- +! Compute Gaussian latitudes and weights. + +!** Interface. +! ---------- +! CALL SUGAWC(...) + +! Explicit arguments : +! -------------------- +! INPUT: +! KDGLG - number of latitudes. + +! OUTPUT: +! PMU - sine of Gaussian latitudes. +! PW - Gaussian weights. + +! Method. +! ------- + +! Externals. SUGAW +! ---------- + +! Author. +! ------- +! K. Yessad, from SUGAWA and SULEG (trans) +! Original : May 2012 + +! Modifications. +! -------------- +! F. Vana 05-Mar-2015 Support for single precision + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +!ifndef INTERFACE + +USE SUGAW_MOD + +!endif INTERFACE + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KDGLG +REAL(KIND=JPRD) ,INTENT(OUT) :: PMU(:) +REAL(KIND=JPRD) ,INTENT(OUT) :: PW(:) + +!ifndef INTERFACE + +REAL(KIND=JPRD) :: ZANM +INTEGER(KIND=JPIM) :: ISTART,IK,IODD,JN,JGL +REAL(KIND=JPRD) :: ZFN(0:KDGLG,0:KDGLG) +REAL(KIND=JPRD) :: ZFNN + +! ------------------------------------------------------------------ + +! * preliminary calculations to compute input quantities ZANM and ZFN +! (k.y.: coded after what I found in tfl/module/suleg_mod.F90). +ISTART=1 +! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) +! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 +ZFN(0,0)=2._JPRD +DO JN=ISTART,KDGLG + ZFNN=ZFN(0,0) + DO JGL=1,JN + ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) + ENDDO + IODD=MOD(JN,2) + ZFN(JN,JN)=ZFNN + DO JGL=2,JN-IODD,2 + ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) + ENDDO +ENDDO + +ZANM=SQRT(REAL(2*KDGLG+1,JPRD)*REAL(KDGLG**2,JPRD)/REAL(2*KDGLG-1,JPRD)) + +! * call to SUGAW (output: PW, PMU): +CALL SUGAW(KDGLG,0,KDGLG,PMU,PW,ZANM,ZFN) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE SUGAWC + diff --git a/src/trans/gpu/external/trans_end.F90 b/src/trans/gpu/external/trans_end.F90 new file mode 100755 index 0000000..776aa8c --- /dev/null +++ b/src/trans/gpu/external/trans_end.F90 @@ -0,0 +1,203 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_END(CDMODE) + +!**** *TRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL TRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: 19-03-2009: intermediate end of transf to allow to switch to mono-task transforms +! R. El Khatib 09-Jul-2013 LENABLED + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NMAX_RESOL, LENABLED,NDEF_RESOL +USE TPM_DIM ,ONLY : R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL, R_NNOEXTZL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS,D_NUMP,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1, D_NASM0, & +& D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL,F_RW, ZIA,ZEPSNM,ZSOA1,ZAOA1,ISTAN,ISTAS,ZSIA,ZAIA,ZOA1,ZOA2, & +& ZAA,ZAS,LDZAA,LDZAS,TDZAA,TDZAS,& +& IZBS,ILDZBA,ILDZBS,ITDZBA,ITDZBS,& +& IZCA,IZCS,IZCST,ILDZCA,ILDZCS,ITDZCA,ITDZCS,& +& DZBAT,DZBST,DLDZBA,DLDZBS,DTDZBA,DTDZBS,& +& DZCA,DZCS,DZCAT,DZCST,DLDZCA,DLDZCS,DTDZCA,DTDZCS +USE TPM_FFT ,ONLY : T, FFT_RESOL +USE TPM_CTL ,ONLY : C, CTL_RESOL +USE TPM_FLT +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN, ZGTF + +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE DEALLOC_RESOL_MOD ,ONLY : DEALLOC_RESOL +! + +IMPLICIT NONE +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE +! Local variables +INTEGER(KIND=JPIM) :: JRES +CHARACTER*5 :: CLMODE +! ------------------------------------------------------------------ +CLMODE='FINAL' +IF (PRESENT(CDMODE)) CLMODE=CDMODE +IF (CLMODE == 'FINAL') THEN + + DEALLOCATE(ZAA) + DEALLOCATE(ZAS) + + !DEALLOCATE(IZBA) + DEALLOCATE(IZBS) + !DEALLOCATE(IZCA) + !DEALLOCATE(IZCS) + !DEALLOCATE(IZCAT) + DEALLOCATE(IZCST) + + + !DEALLOCATE(DZBA) + !DEALLOCATE(DZBS) + DEALLOCATE(DZBAT) + DEALLOCATE(DZBST) + !DEALLOCATE(DZCA) + !DEALLOCATE(DZCS) + DEALLOCATE(DZCAT) + DEALLOCATE(DZCST) + +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(ZAA,ZAS,IZBS,IZCST,DZBAT,DZBST,DZCAT,DZCST) +#endif +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:ZAA,ZAS,IZBS,IZCST,DZBAT,DZBST,DZCAT,DZCST) +#endif + + !memory save + DEALLOCATE(FOUBUF_IN) + DEALLOCATE(FOUBUF) + + +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(ZGTF) +#endif +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:ZGTF) +#endif + DEALLOCATE(ZGTF) + + DEALLOCATE(ZIA) + DEALLOCATE(ZEPSNM) + DEALLOCATE(ZSOA1) + DEALLOCATE(ZAOA1) + DEALLOCATE(ISTAN) + DEALLOCATE(ISTAS) + DEALLOCATE(ZSIA) + DEALLOCATE(ZAIA) + DEALLOCATE(ZOA1) + !DEALLOCATE(ZOA2) +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(ZIA,ZEPSNM,ZSOA1,ZAOA1,ISTAN,ISTAS,ZSIA,ZAIA,ZOA1) +#endif +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:ZIA,ZEPSNM,ZSOA1,ZAOA1,ISTAN,ISTAS,ZSIA,ZAIA,ZOA1) +#endif + + DEALLOCATE(D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_MYMS,D_NPROCL,D_NASM0,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NLOEN,F_RW) +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(R_NSMAX,R_NTMAX,R_NDGL,R_NNOEXTZL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_NPROCL,D_NUMP, & + !$ACC& D_MYMS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,D_NASM0,F_RW) +#endif +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:R_NSMAX,R_NTMAX,R_NDGL,R_NNOEXTZL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_NPROCL,D_NUMP, & + !$OMP& D_MYMS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,D_NASM0,F_RW) +#endif + !CALL HIP_DGEMM_BATCHED_FINALIZE() + + IF( ALLOCATED( LENABLED ) ) THEN + DO JRES=1,NMAX_RESOL + IF(LENABLED(JRES)) THEN + CALL DEALLOC_RESOL(JRES) + ENDIF + ENDDO + DEALLOCATE(LENABLED) + ENDIF + + NULLIFY(R) + IF( ALLOCATED(DIM_RESOL) ) DEALLOCATE(DIM_RESOL) + + NULLIFY(D) + IF( ALLOCATED(DISTR_RESOL) ) DEALLOCATE(DISTR_RESOL) + + !TPM_FFT + NULLIFY(T) + IF( ALLOCATED(FFT_RESOL) ) DEALLOCATE(FFT_RESOL) + + !TPM_FLT + NULLIFY(S) + IF( ALLOCATED(FLT_RESOL) ) DEALLOCATE(FLT_RESOL) + + !TPM_CTL + NULLIFY(C) + IF( ALLOCATED(CTL_RESOL) ) DEALLOCATE(CTL_RESOL) + + !TPM_FIELDS + NULLIFY(F) + IF( ALLOCATED(FIELDS_RESOL) ) DEALLOCATE(FIELDS_RESOL) + + + !TPM_GEOMETRY + NULLIFY(G) + IF( ALLOCATED(GEOM_RESOL) ) DEALLOCATE(GEOM_RESOL) + + !TPM_TRANS + IF(ALLOCATED(FOUBUF_IN)) DEALLOCATE(FOUBUF_IN) + IF(ALLOCATED(FOUBUF)) DEALLOCATE(FOUBUF) + + MSETUP0 = 0 + NMAX_RESOL = 0 + NCUR_RESOL = 0 + NDEF_RESOL = 0 +ENDIF +IF (CLMODE == 'FINAL' .OR. CLMODE == 'INTER') THEN + !EQ_REGIONS + IF( ASSOCIATED(N_REGIONS) ) DEALLOCATE(N_REGIONS) + !TPM_DISTR + IF( ALLOCATED(NPRCIDS) ) DEALLOCATE(NPRCIDS) +ENDIF + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE TRANS_END diff --git a/src/trans/gpu/external/trans_inq.F90 b/src/trans/gpu/external/trans_inq.F90 new file mode 100755 index 0000000..b0c1904 --- /dev/null +++ b/src/trans/gpu/external/trans_inq.F90 @@ -0,0 +1,529 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + &KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + &KMYMS,KASM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + &KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + &KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + &KULTPP,KPTRLS,KNMENG,& + &KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + &LDSPLITLAT,& + &KSMAX,PLAPIN,KNVALUE,KDEF_RESOL,LDLAM,& + &PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KDGLU) + +!**** *TRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL TRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KASM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation +! KNVALUE - n value for each KSPEC2 spectral coeffient + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations +! KNMENG - associated (with NLOENG) cut-off zonal wavenumber + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLAPIN - Eigen-values of the inverse Laplace operator +! KDGLU - Number of active points in an hemisphere for a given wavenumber "m" + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M. Hortal : 2001-03-05 Dimensions of the Legendre polynomials +! R. El Khatib 08-Aug-2012 KSMAX,PLAPIN,KNVALUE,LDLAM,KDEF_RESOL + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NDEF_RESOL +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW, NPRTRV +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FLT + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS_EW, N_REGIONS_NS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2G +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNUMP +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTG +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTMX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRFLOFF + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KASM0(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KUMPP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPOSSP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KALLMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSTA(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KONL(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL, INTENT(OUT) :: LDSPLITLAT(:) + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KULTPP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNMENG(:) + +REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGW(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLEI3 +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPOLEGL +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPMS(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDGLU(0:) + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PLAPIN(-1:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1,IU2 +INTEGER(KIND=JPIM) :: IC, JN, JMLOC +INTEGER(KIND=JPIM) :: IPRTRV,JSETV,IMLOC,IM,ISL,IA,ILA,IS,ILS,IDGLU,J,I +! ------------------------------------------------------------------ + + +! Set current resolution +CALL SET_RESOL(KRESOL) + +IF(PRESENT(KSPEC)) KSPEC = D%NSPEC +IF(PRESENT(KSPEC2)) KSPEC2 = D%NSPEC2 +IF(PRESENT(KSPEC2G)) KSPEC2G = R%NSPEC2_G +IF(PRESENT(KSPEC2MX)) KSPEC2MX = D%NSPEC2MX +IF(PRESENT(KNUMP)) KNUMP = D%NUMP +IF(PRESENT(KGPTOT)) KGPTOT = D%NGPTOT +IF(PRESENT(KGPTOTG)) KGPTOTG = D%NGPTOTG +IF(PRESENT(KGPTOTMX)) KGPTOTMX = D%NGPTOTMX +IF(PRESENT(KFRSTLOFF)) KFRSTLOFF = D%NFRSTLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPRTRW)) KPRTRW = NPRTRW +IF(PRESENT(KMYSETW)) KMYSETW = MYSETW +IF(PRESENT(KMYSETV)) KMYSETV = MYSETV +IF(PRESENT(KMY_REGION_NS)) KMY_REGION_NS = MY_REGION_NS +IF(PRESENT(KMY_REGION_EW)) KMY_REGION_EW = MY_REGION_EW +IF(PRESENT(LDLAM)) LDLAM = G%LAM +IF(PRESENT(KDEF_RESOL)) KDEF_RESOL = NDEF_RESOL + +IF(PRESENT(KGPTOTL)) THEN + IF(UBOUND(KGPTOTL,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KGPTOTL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KGPTOTL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('TRANS_INQ: KGPTOTL DIM 2 TOO SMALL') + ELSE + KGPTOTL(1:N_REGIONS_NS,1:N_REGIONS_EW) = D%NGPTOTL(:,:) + ENDIF +ENDIF + +IF(PRESENT(KMYMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KMYMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KMYMS,1) < D%NUMP) THEN + CALL ABORT_TRANS('TRANS_INQ: KMYMS TOO SMALL') + ELSE + KMYMS(1:D%NUMP) = D%MYMS(:) + ENDIF +ENDIF + +IF(PRESENT(KASM0)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KASM0 REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KASM0,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KASM0 TOO SMALL') + ELSE + KASM0(0:R%NSMAX) = D%NASM0(:) + ENDIF +ENDIF + +IF(PRESENT(KUMPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KUMPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KUMPP,1) < NPRTRW) THEN + CALL ABORT_TRANS('TRANS_INQ: KUMPP TOO SMALL') + ELSE + KUMPP(1:NPRTRW) = D%NUMPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPOSSP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPOSSP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPOSSP,1) < NPRTRW+1) THEN + CALL ABORT_TRANS('TRANS_INQ: KPOSSP TOO SMALL') + ELSE + KPOSSP(1:NPRTRW+1) = D%NPOSSP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRMS,1) < NPRTRW) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRMS TOO SMALL') + ELSE + KPTRMS(1:NPRTRW) = D%NPTRMS(:) + ENDIF +ENDIF + +IF(PRESENT(KALLMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KALLMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KALLMS,1) < R%NSMAX+1) THEN + CALL ABORT_TRANS('TRANS_INQ: KALLMS TOO SMALL') + ELSE + KALLMS(1:R%NSMAX+1) = D%NALLMS(:) + ENDIF +ENDIF + +IF(PRESENT(KDIM0G)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KDIM0G REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KDIM0G,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KDIM0G TOO SMALL') + ELSE + KDIM0G(0:R%NSMAX) = D%NDIM0G(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KFRSTLAT)) THEN + IF(UBOUND(KFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KFRSTLAT TOO SMALL') + ELSE + KFRSTLAT(1:N_REGIONS_NS) = D%NFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KLSTLAT)) THEN + IF(UBOUND(KLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KLSTLAT TOO SMALL') + ELSE + KLSTLAT(1:N_REGIONS_NS) = D%NLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLAT)) THEN + IF(UBOUND(KPTRLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLAT TOO SMALL') + ELSE + KPTRLAT(1:R%NDGL) = D%NPTRLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRFRSTLAT)) THEN + IF(UBOUND(KPTRFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRFRSTLAT TOO SMALL') + ELSE + KPTRFRSTLAT(1:N_REGIONS_NS) = D%NPTRFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLSTLAT)) THEN + IF(UBOUND(KPTRLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLSTLAT TOO SMALL') + ELSE + KPTRLSTLAT(1:N_REGIONS_NS) = D%NPTRLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KSTA)) THEN + IF(UBOUND(KSTA,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('TRANS_INQ: KSTA DIM 1 TOO SMALL') + ELSEIF(UBOUND(KSTA,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('TRANS_INQ: KSTA DIM 2 TOO SMALL') + ELSE + KSTA(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NSTA(:,:) + ENDIF +ENDIF + +IF(PRESENT(KONL)) THEN + IF(UBOUND(KONL,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('TRANS_INQ: KONL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KONL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('TRANS_INQ: KONL DIM 2 TOO SMALL') + ELSE + KONL(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NONL(:,:) + ENDIF +ENDIF + +IF(PRESENT(LDSPLITLAT)) THEN + IF(UBOUND(LDSPLITLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: LDSPLITLAT TOO SMALL') + ELSE + LDSPLITLAT(1:R%NDGL) = D%LSPLITLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KULTPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KULTPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KULTPP,1) < NPRTRNS) THEN + CALL ABORT_TRANS('TRANS_INQ: KULTPP TOO SMALL') + ELSE + KULTPP(1:NPRTRNS) = D%NULTPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRLS,1) < NPRTRNS) THEN + CALL ABORT_TRANS('TRANS_INQ: KPTRLS TOO SMALL') + ELSE + KPTRLS(1:NPRTRNS) = D%NPTRLS(:) + ENDIF +ENDIF + +IF(PRESENT(KNMENG)) THEN + IF(UBOUND(KNMENG,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: KNMENG TOO SMALL') + ELSE + KNMENG(1:R%NDGL) = G%NMEN(1:R%NDGL) + ENDIF +ENDIF + +IF(PRESENT(PMU)) THEN + IF(UBOUND(PMU,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: PMU TOO SMALL') + ELSE + PMU(1:R%NDGL) = F%RMU + ENDIF +ENDIF + +IF(PRESENT(PGW)) THEN + IF(UBOUND(PGW,1) < R%NDGL) THEN + CALL ABORT_TRANS('TRANS_INQ: PGW TOO SMALL') + ELSE + PGW(1:R%NDGL) = REAl(F%RW,JPRB) + ENDIF +ENDIF + +IF(PRESENT(PRPNM)) THEN + + IF( .NOT. S%LKEEPRPNM ) THEN + CALL ABORT_TRANS('TRANS_INQ: PRPNM REQUIRED BUT S%LKEEPRPNM=F') + ENDIF + + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF + IU1 = UBOUND(PRPNM,1) + IU2 = UBOUND(PRPNM,2) + IF(IU1 < R%NDGNH) THEN + CALL ABORT_TRANS('TRANS_INQ:FIRST DIM. OF PRNM TOO SMALL') + ELSE +! IU1 = MIN(IU1,R%NLEI3) +! IU2 = MIN(IU2,D%NSPOLEGL) +! PRPNM(1:IU1,1:IU2) = F%RPNM(1:IU1,1:IU2) + + DO JMLOC=1,D%NUMP,NPRTRV + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + DO J=1,ILA + DO I=1,IDGLU + PRPNM(ISL+I-1,D%NPMS(IM)+IA+(J-1)*2) = REAL(S%FA(IMLOC)%RPNMA(I,J),JPRB) + ENDDO + ENDDO + DO J=1,ILS + DO I=1,IDGLU + PRPNM(ISL+I-1,D%NPMS(IM)+IS+(J-1)*2) = REAL(S%FA(IMLOC)%RPNMS(I,J),JPRB) + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF +ENDIF +IF(PRESENT(KLEI3)) THEN + KLEI3=R%NLEI3 +ENDIF +IF(PRESENT(KSPOLEGL)) THEN + KSPOLEGL=D%NSPOLEGL +ENDIF +IF(PRESENT(KPMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KPMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPMS,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KPMS TOO SMALL') + ELSE + KPMS(0:R%NSMAX) = D%NPMS(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KSMAX)) KSMAX = R%NSMAX +IF(PRESENT(PLAPIN)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: PLAPIN REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(PLAPIN,1) < R%NSMAX+2) THEN + CALL ABORT_TRANS('TRANS_INQ: PLAPIN TOO SMALL') + ELSEIF (LBOUND(PLAPIN,1) /= -1) THEN + CALL ABORT_TRANS('TRANS_INQ: LOWER BOUND OF PLAPIN SHOULD BE -1') + ELSE + PLAPIN(-1:R%NSMAX+2) = REAL(F%RLAPIN(:),JPRB) + ENDIF +ENDIF +IF(PRESENT(KNVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KNVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('TRANS_INQ: KNVALUE TOO SMALL') + ELSE + IC=1 + DO JMLOC=1,D%NUMP + DO JN=D%MYMS(JMLOC),R%NSMAX + KNVALUE(IC )=JN + KNVALUE(IC+1)=JN + IC=IC+2 + ENDDO + ENDDO + ENDIF +ENDIF + + +IF(PRESENT(KDGLU)) THEN + IF(UBOUND(KDGLU,1) < R%NSMAX) THEN + CALL ABORT_TRANS('TRANS_INQ: KDGLU TOO SMALL') + ELSE + KDGLU(0:R%NSMAX) = G%NDGLU(0:R%NSMAX) + ENDIF +ENDIF +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE TRANS_INQ diff --git a/src/trans/gpu/external/trans_pnm.F90 b/src/trans/gpu/external/trans_pnm.F90 new file mode 100755 index 0000000..c8ccd0c --- /dev/null +++ b/src/trans/gpu/external/trans_pnm.F90 @@ -0,0 +1,200 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) + +!**** *TRANS_PNM* - Compute Legendre polynomials for a given wavenember + +! Purpose. +! -------- +! Interface routine for computing Legendre polynomials for a given wavenember + +!** Interface. +! ---------- +! CALL TRANS_PNM(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) +! KM - wave number +! PRPNM - Legendre polynomials +! LDTRANSPOSE - Legendre polynomials array is transposed +! LDCHEAP - cheapest but less accurate computation + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 22-Jan-2016 from G. Mozdzynski's getpnm + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE PARKIND_ECTRANS,ONLY : JPRBT + +!ifndef INTERFACE + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FLT ,ONLY : S + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_POL +USE SUPOLF_MOD + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM) ,INTENT(IN) :: KM +REAL(KIND=JPRBT) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) +LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSE +LOGICAL, OPTIONAL, INTENT(IN) :: LDCHEAP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1, IU2, IMAXN, INMAX, ICHEAP_SYM, ICHEAP_ANTISYM +INTEGER(KIND=JPIM) :: IC, JN, JMLOC, JGL, JI +INTEGER(KIND=JPIM) :: IA, IS, IDGLU, ILA, ILS, ISL +REAL(KIND=JPRD), ALLOCATABLE :: ZLPOL(:) +LOGICAL :: LLTRANSPOSE, LLCHEAP +! ------------------------------------------------------------------ + +! Set current resolution +IF (PRESENT(KRESOL)) THEN + CALL SET_RESOL(KRESOL) +ENDIF + +IF (PRESENT(LDTRANSPOSE)) THEN + LLTRANSPOSE=LDTRANSPOSE +ELSE + LLTRANSPOSE=.FALSE. +ENDIF + +IF (PRESENT(LDCHEAP)) THEN + LLCHEAP=LDCHEAP +ELSE + LLCHEAP=.FALSE. +ENDIF +IF (LLCHEAP) THEN + ICHEAP_SYM =2 + ICHEAP_ANTISYM=3 +ELSE + ICHEAP_SYM =1 + ICHEAP_ANTISYM=1 +ENDIF + +IF (PRESENT(PRPNM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('TRANS_PNM: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF +ENDIF + +IU1 = UBOUND(PRPNM,1) +IU2 = UBOUND(PRPNM,2) + +IF (LLTRANSPOSE) THEN + + IF(IU2 < R%NLEI3) THEN + CALL ABORT_TRANS('TRANS_PNM : FIRST DIM. OF PRPNM TOO SMALL') + ENDIF + IF(IU1 < R%NTMAX-KM+3) THEN + CALL ABORT_TRANS('TRANS_PNM : SECOND DIM. OF PRPNM TOO SMALL') + ENDIF + + IF (IU2 >= R%NLEI3) THEN + PRPNM(:,R%NLEI3) = 0.0_JPRBT + ENDIF + +ELSE + + IF(IU1 < R%NLEI3) THEN + CALL ABORT_TRANS('TRANS_PNM : FIRST DIM. OF PRPNM TOO SMALL') + ENDIF + IF(IU2 < R%NTMAX-KM+3) THEN + CALL ABORT_TRANS('TRANS_PNM : SECOND DIM. OF PRPNM TOO SMALL') + ENDIF + + IF (IU1 >= R%NLEI3) THEN + PRPNM(R%NLEI3,:) = 0.0_JPRBT + ENDIF + +ENDIF + +ILA = (R%NTMAX-KM+2)/2 +ILS = (R%NTMAX-KM+3)/2 + +CALL INI_POL(R%NTMAX+2,LDFAST=.TRUE.) + +IMAXN=R%NTMAX+1 + +IA = 1+MOD(R%NTMAX-KM+2,2) +IS = 1+MOD(R%NTMAX-KM+1,2) + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IF (S%LSOUTHPNM) THEN + IDGLU = 2*MIN(R%NDGNH,G%NDGLU(KM)) +ELSE + IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +ENDIF + +IF(MOD(IMAXN-KM,2) == 0) THEN + INMAX=IMAXN+1 +ELSE + INMAX=IMAXN +ENDIF + +ALLOCATE(ZLPOL(0:R%NTMAX+2)) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) +DO JGL=1,IDGLU + CALL SUPOLF(KM,INMAX,REAL (F%RMU(ISL+JGL-1), JPRD),ZLPOL(0:INMAX),KCHEAP=ICHEAP_ANTISYM) + IF (LLTRANSPOSE) THEN + DO JI=1,ILA + PRPNM(IA+(JI-1)*2,ISL+JGL-1) = ZLPOL(KM+2*(ILA-JI)+1) + ENDDO + ELSE + DO JI=1,ILA + PRPNM(ISL+JGL-1,IA+(JI-1)*2) = ZLPOL(KM+2*(ILA-JI)+1) + ENDDO + ENDIF + CALL SUPOLF(KM,INMAX,REAL (F%RMU(ISL+JGL-1), JPRD),ZLPOL(0:INMAX),KCHEAP=ICHEAP_SYM) + IF (LLTRANSPOSE) THEN + DO JI=1,ILS + PRPNM(IS+(JI-1)*2,ISL+JGL-1) = ZLPOL(KM+2*(ILS-JI)) + ENDDO + ELSE + DO JI=1,ILS + PRPNM(ISL+JGL-1,IS+(JI-1)*2) = ZLPOL(KM+2*(ILS-JI)) + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO + +CALL END_POL + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE TRANS_PNM diff --git a/src/trans/gpu/external/trans_release.F90 b/src/trans/gpu/external/trans_release.F90 new file mode 100755 index 0000000..ea97b3c --- /dev/null +++ b/src/trans/gpu/external/trans_release.F90 @@ -0,0 +1,61 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE TRANS_RELEASE(KRESOL) + +!**** *TRANS_RELEASE* - release a spectral resolution + +! Purpose. +! -------- +! Release all arrays related to a given resolution tag + +!** Interface. +! ---------- +! CALL TRANS_RELEASE + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE DEALLOC_RESOL_MOD ,ONLY : DEALLOC_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL + +!endif INTERFACE + +! ------------------------------------------------------------------ + +CALL DEALLOC_RESOL(KRESOL) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRANS_RELEASE diff --git a/src/trans/gpu/external/vordiv_to_uv.F90 b/src/trans/gpu/external/vordiv_to_uv.F90 new file mode 100755 index 0000000..7aa8342 --- /dev/null +++ b/src/trans/gpu/external/vordiv_to_uv.F90 @@ -0,0 +1,179 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE VORDIV_TO_UV(PSPVOR,PSPDIV,PSPU,PSPV,KSMAX,KVSETUV) + +!**** *VORDIV_TO_UV* - Convert spectral vorticity and divergence to spectral U (u*cos(theta)) and V (v*cos(theta). + +! Purpose. +! -------- +! Interface routine for Convert spectral vorticity and divergence to spectral U and V + +!** Interface. +! ---------- +! CALL VORDIV_TO_UV(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPU(:,:) - spectral U (u*cos(theta) (output) +! PSPV(:,:) - spectral V (v*cos(theta) (output) +! KSMAX - spectral resolution (input) +! KVSETUV(:) - Optionally indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- VD2UV_CTL - control vordiv to uv + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 15-06-15 + + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT,MSETUP0 +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE VD2UV_CTL_MOD ,ONLY : VD2UV_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB), INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSPU(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSPV(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) + +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IRESOL,IDGL +LOGICAL :: LTMP_SETUP0 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +#include "setup_trans0.h" +#include "setup_trans.h" +#include "trans_release.h" +#include "trans_end.h" + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('VORDIV_TO_UV',0,ZHOOK_HANDLE) + +!CALL GSTATS(XXXX,0) + +IF(MSETUP0 == 0) THEN + CALL SETUP_TRANS0() + LTMP_SETUP0 = .TRUE. +ELSE + LTMP_SETUP0 = .FALSE. +ENDIF +IDGL = 2 ! It doesn't matter as long as it's a positive even number +CALL SETUP_TRANS(KSMAX,IDGL,LDSPSETUPONLY=.TRUE.,KRESOL=IRESOL) +CALL SET_RESOL(IRESOL) + + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'VORDIV_TO_UV:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('VORDIV_TO_UV:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSE + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPVOR TOO SHORT') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPDIV TOO SHORT') + ENDIF + IF(UBOUND(PSPU,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPU,1) < IF_UV ',UBOUND(PSPU,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPU TOO SHORT') + ENDIF + IF(UBOUND(PSPV,1) < IF_UV) THEN + WRITE(NERR,*)'VORDIV_TO_UV : UBOUND(PSPV,1) < IF_UV ',UBOUND(PSPV,1),IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV : PSPV TOO SHORT') + ENDIF +ENDIF + + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + &NPRTRV,IF_UV + CALL ABORT_TRANS('VORDIV_TO_UV: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +!CALL GSTATS(XXXX,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL VD2UV_CTL(IF_UV,PSPVOR,PSPDIV,PSPU,PSPV) + +CALL TRANS_RELEASE(IRESOL) +IF (LTMP_SETUP0) THEN + CALL TRANS_END() +ENDIF + +IF (LHOOK) CALL DR_HOOK('VORDIV_TO_UV',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE VORDIV_TO_UV + diff --git a/src/trans/gpu/internal/abort_trans_mod.F90 b/src/trans/gpu/internal/abort_trans_mod.F90 new file mode 100755 index 0000000..b92131d --- /dev/null +++ b/src/trans/gpu/internal/abort_trans_mod.F90 @@ -0,0 +1,39 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ABORT_TRANS_MOD +CONTAINS +SUBROUTINE ABORT_TRANS(CDTEXT) + +USE TPM_GEN ,ONLY : NOUT,NERR +USE TPM_DISTR ,ONLY : NPROC,MYPROC +USE MPL_MODULE ,ONLY : MPL_ABORT +USE SDL_MOD ,ONLY : SDL_TRACEBACK, SDL_SRLABORT + +IMPLICIT NONE + + +CHARACTER(LEN=*),INTENT(IN) :: CDTEXT + +WRITE(NOUT,'(1X,A)') 'ABORT_TRANS CALLED' + +WRITE(NOUT,'(1X,A)') CDTEXT +WRITE(NERR,'(1X,A,1X,I3,1X,A)') 'ABORT! ',MYPROC,CDTEXT +CLOSE(NOUT) +IF (NPROC > 1) THEN + CALL MPL_ABORT(CDTEXT) +ELSE + CALL SDL_TRACEBACK + CALL FLUSH(0) + CALL SDL_SRLABORT +ENDIF + +END SUBROUTINE ABORT_TRANS +END MODULE ABORT_TRANS_MOD diff --git a/src/trans/gpu/internal/asre1_mod.F90 b/src/trans/gpu/internal/asre1_mod.F90 new file mode 100755 index 0000000..02b3a75 --- /dev/null +++ b/src/trans/gpu/internal/asre1_mod.F90 @@ -0,0 +1,98 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1_MOD +CONTAINS +SUBROUTINE ASRE1(KM,KMLOC,KF_OUT_LT,PAOA1,PSOA1) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE TPM_DIM ,ONLY : R + +!USE TPM_TRANS + +USE ASRE1B_MOD ,ONLY : ASRE1B + + +!**** *ASRE1* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. ASRE1B - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1 in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRBT) , INTENT(IN) :: PSOA1(:,:), PAOA1(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFLDS + +! WORK ARRAYS FOR ASREL1B +INTEGER(KIND=JPIM) :: ISTAN(R%NDGNH),ISTAS(R%NDGNH) + +stop 'Error: this code path is not (yet) supported in GPU version' + + +! ------------------------------------------------------------------ + +IFLDS = KF_OUT_LT + +!CALL ASRE1B(IFLDS,KM,KMLOC,PAOA1,PSOA1) + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1 +END MODULE ASRE1_MOD diff --git a/src/trans/gpu/internal/asre1ad_mod.F90 b/src/trans/gpu/internal/asre1ad_mod.F90 new file mode 100755 index 0000000..7c7118c --- /dev/null +++ b/src/trans/gpu/internal/asre1ad_mod.F90 @@ -0,0 +1,92 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1AD_MOD +CONTAINS +SUBROUTINE ASRE1AD(KM,KMLOC,KF_OUT_LT,PAOA1,PSOA1) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +!USE TPM_TRANS + +USE ASRE1BAD_MOD ,ONLY : ASRE1BAD + + +!**** *ASRE1AD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1AD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. ASRE1BAD - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1AD in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRBT) , INTENT(OUT) :: PSOA1(:,:), PAOA1(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFLDS + + +! ------------------------------------------------------------------ + +IFLDS = KF_OUT_LT + +CALL ASRE1BAD(IFLDS,KM,KMLOC,PAOA1,PSOA1) + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1AD +END MODULE ASRE1AD_MOD diff --git a/src/trans/gpu/internal/asre1b_mod.F90 b/src/trans/gpu/internal/asre1b_mod.F90 new file mode 100755 index 0000000..c8138eb --- /dev/null +++ b/src/trans/gpu/internal/asre1b_mod.F90 @@ -0,0 +1,125 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1B_MOD +CONTAINS +SUBROUTINE ASRE1B(KFIELD,PAOA,PSOA) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R, R_NDGNH, R_NDGL +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G, G_NDGLU +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1 +USE TPM_GEN ,ONLY : NOUT + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +INTEGER(KIND=JPIM) :: KM,KMLOC +REAL(KIND=JPRBT), INTENT(IN) :: PSOA(:,:,:) +REAL(KIND=JPRBT), INTENT(IN) :: PAOA(:,:,:) +!INTEGER(KIND=JPIM), INTENT(OUT) :: ISTAN(:,:) +!INTEGER(KIND=JPIM), INTENT(OUT) :: ISTAS(:,:) + +! LOCAL INTEGERS +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS, IDGNH, ISTAN, ISTAS + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- +#ifdef OMPGPU +!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! +!$OMP TARGET DATA MAP(ALLOC:PAOA,PSOA,D_MYMS,D_NPROCL,D_NSTAGT0B,D_NPNTGTB1,G_NDGLU,FOUBUF_IN) +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(KM,ISL,IPROC,ISTAN,IGLS,IPROCS,ISTAS) & +!$OMP& SHARED(D_NUMP,D_MYMS,R_NDGNH,G_NDGLU,D_NPROCL,D_NSTAGT0B,D_NPNTGTB1,KFIELD,R_NDGL,FOUBUF_IN,PAOA,PSOA) +#endif +#ifdef ACCGPU +!$ACC DATA PRESENT(PAOA,PSOA,D_MYMS,D_NPROCL,D_NSTAGT0B,D_NPNTGTB1,G_NDGLU,FOUBUF_IN,D_NUMP,& +!$ACC& R_NDGNH,R_NDGL) +!$ACC PARALLEL LOOP COLLAPSE(2) DEFAULT(NONE) PRIVATE(KM,ISL,IPROC,ISTAN,IGLS,IPROCS,ISTAS) & +!$ACC& COPYIN(KFIELD) +#endif +DO KMLOC=1,D_NUMP + DO JFLD=1,2*KFIELD + KM = D_MYMS(KMLOC) + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) +#ifdef ACCGPU + !$ACC LOOP SEQ +#endif + DO JGL=ISL, R_NDGNH + IPROC = D_NPROCL(JGL) + ISTAN = (D_NSTAGT0B(IPROC) + D_NPNTGTB1(KMLOC,JGL))*2*KFIELD + IGLS = R_NDGL+1-JGL + IPROCS = D_NPROCL(IGLS) + ISTAS = (D_NSTAGT0B(IPROCS) + D_NPNTGTB1(KMLOC,IGLS))*2*KFIELD + + FOUBUF_IN(ISTAN+JFLD) = PAOA(JFLD,JGL,KMLOC)+PSOA(JFLD,JGL,KMLOC) + FOUBUF_IN(ISTAS+JFLD) = PSOA(JFLD,JGL,KMLOC)-PAOA(JFLD,JGL,KMLOC) + ENDDO + ENDDO +ENDDO +#ifdef ACCGPU +!$ACC END DATA +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1B +END MODULE ASRE1B_MOD diff --git a/src/trans/gpu/internal/asre1bad_mod.F90 b/src/trans/gpu/internal/asre1bad_mod.F90 new file mode 100755 index 0000000..bba75bc --- /dev/null +++ b/src/trans/gpu/internal/asre1bad_mod.F90 @@ -0,0 +1,108 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1BAD_MOD +CONTAINS +SUBROUTINE ASRE1BAD(KFIELD,KM,KMLOC,PAOA,PSOA) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D + + +!**** *ASRE1BAD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1BAD in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRBT), INTENT(OUT) :: PSOA(:,:) +REAL(KIND=JPRBT), INTENT(OUT) :: PAOA(:,:) + +! LOCAL INTEGERS +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS, IDGNH +INTEGER(KIND=JPIM) :: ISTAN(R%NDGNH),ISTAS(R%NDGNH) + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IDGNH = R%NDGNH + +!* 1.2 RECOMBINE + +DO JGL=ISL,IDGNH + IPROC = D%NPROCL(JGL) + ISTAN(JGL) = (D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*2*KFIELD + IGLS = R%NDGL+1-JGL + IPROCS = D%NPROCL(IGLS) + ISTAS(JGL) = (D%NSTAGT0B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*2*KFIELD +ENDDO + +DO JGL=ISL,IDGNH +!OCL NOVREC + DO JFLD=1,2*KFIELD + PSOA(JFLD,JGL) = FOUBUF_IN(ISTAN(JGL)+JFLD)+FOUBUF_IN(ISTAS(JGL)+JFLD) + PAOA(JFLD,JGL) = FOUBUF_IN(ISTAN(JGL)+JFLD)-FOUBUF_IN(ISTAS(JGL)+JFLD) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1BAD +END MODULE ASRE1BAD_MOD + diff --git a/src/trans/gpu/internal/cdmap_mod.F90 b/src/trans/gpu/internal/cdmap_mod.F90 new file mode 100755 index 0000000..10648a0 --- /dev/null +++ b/src/trans/gpu/internal/cdmap_mod.F90 @@ -0,0 +1,178 @@ +! (C) Copyright 2014- ECMWF. +! (C) Copyright 2014- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE CDMAP_MOD +CONTAINS +SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& +& KFIELDS, PCOEFA, PCOEFS) + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_FLT +USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +USE TPM_TRANS ,ONLY : FOUBUF_IN, FOUBUF +USE SEEFMM_MIX + +!**** *CDMAP* - REMAP ROOTS +! +! Purpose. +! -------- +! remap from one set of roots to another using Christoffel-Darboux formula, see Chien + Alpert, 1997. + +!** Interface. +! ---------- +! *CALL* *CDMAP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! Chien + Alpert, 1997. + +! Author. +! ------- +! Nils Wedi *ECMWF* + +! Modifications. +! -------------- +! Original : 14-05-14 +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KSL +INTEGER(KIND=JPIM), INTENT(IN) :: KSLO +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM +INTEGER(KIND=JPIM), INTENT(IN) :: KDIR ! direction of map +INTEGER(KIND=JPIM), INTENT(IN) :: KDGNH +INTEGER(KIND=JPIM), INTENT(IN) :: KDGNHD +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELDS +REAL(KIND=JPRBT), INTENT(INOUT) :: PCOEFA(:,:) +REAL(KIND=JPRBT), INTENT(INOUT) :: PCOEFS(:,:) + +INTEGER(KIND=JPIM) :: JGL, IGL, JF +REAL(KIND=JPRBT), ALLOCATABLE :: ZALL(:,:), ZQX(:,:) +REAL(KIND=JPRBT), ALLOCATABLE :: ZALL1(:,:), ZQY(:,:) +INTEGER(KIND=JPIM) :: ISTN(KDGNH), ISTS(KDGNH) + +INTEGER(KIND=JPIM) :: IGLS, IPROC, IPROCS, IEND, IENDO + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('CDMAP_MOD',0,ZHOOK_HANDLE) + +IF( KDIR == -1 ) THEN + ! inverse map from internal (gg) roots to post-processing roots + + IENDO = 2*KDGNHD - KSLO + 1 + IEND = 2*KDGNH - KSL + 1 + + !!!!! fourier buffer setup in output latitudes, may not work if different from input !!!! + DO IGL=KSLO, KDGNHD + IPROC = D%NPROCL(IGL) + ISTN(IGL) = (D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,IGL))*KFIELDS + IGLS = 2*KDGNH+1-IGL + IPROCS = D%NPROCL(IGLS) + ISTS(IGL) = (D%NSTAGT0B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*KFIELDS + ENDDO + + ALLOCATE(ZALL(KFIELDS, 2*KDGNHD)) + ALLOCATE(ZALL1(KFIELDS, 2*KDGNHD)) + ALLOCATE(ZQX(KFIELDS, 2*KDGNH)) + ALLOCATE(ZQY(KFIELDS, 2*KDGNH)) + ZQX(:,1:KSL) = 0._JPRBT + ZQX(:,IEND:2*KDGNH) = 0._JPRBT + ZQY(:,1:KSL) = 0._JPRBT + ZQY(:,IEND:2*KDGNH) = 0._JPRBT + DO JGL=KSL, IEND + ZQX(1:KFIELDS,JGL)=S%FA(KMLOC)%RPNMWI(JGL-KSL+1,1)*PCOEFA(1:KFIELDS,JGL) + ZQY(1:KFIELDS,JGL)=S%FA(KMLOC)%RPNMWI(JGL-KSL+1,2)*PCOEFA(1:KFIELDS,JGL) + ENDDO + CALL SEEFMM_MULM(S%FMM_INTI,KFIELDS,1_JPIM,.TRUE.,ZQX,ZALL1) + CALL SEEFMM_MULM(S%FMM_INTI,KFIELDS,1_JPIM,.TRUE.,ZQY,ZALL) + DEALLOCATE(ZQX) + DEALLOCATE(ZQY) + ! minus sign comes from FMM ?! + ! fill buffer + DO IGL=KSLO,KDGNHD + IGLS = 2*KDGNHD+1-IGL + DO JF=1,KFIELDS + FOUBUF_IN(ISTN(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,1)*ZALL1(JF,IGL) & + & - S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,2)*ZALL(JF,IGL) + FOUBUF_IN(ISTS(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,1)*ZALL1(JF,IGLS) & + & - S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,2)*ZALL(JF,IGLS) + ENDDO + ENDDO + DEALLOCATE(ZALL1) + DEALLOCATE(ZALL) + +ELSE +! direct map from post-processing/input field roots to internal (gg) roots +! this assumes essentially a nearest neighbour interpolation in latitude +! a more accurate approach may be +! a local gridpoint interpolation of the input field to the target latitudes prior to the transforms + + IENDO = 2*KDGNHD - KSLO + 1 + IEND = 2*KDGNH - KSL + 1 + + !!!!! fourier buffer setup in input data latitudes, may not work if different from output !!!! + DO JGL=KSLO, KDGNHD + IPROC = D%NPROCL(JGL) + ISTN(JGL) = (D%NSTAGT1B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*KFIELDS + IGLS = 2*KDGNHD+1-JGL + IPROCS = D%NPROCL(IGLS) + ISTS(JGL) = (D%NSTAGT1B(IPROCS) + D%NPNTGTB1(KMLOC,IGLS))*KFIELDS + ENDDO + + ALLOCATE( ZQX( KFIELDS, 2*KDGNHD)) + ZQX(:,1:KSLO) = 0._JPRBT + ZQX(:,IENDO:2*KDGNHD) = 0._JPRBT + DO JGL=KSLO, KDGNHD + IGLS = 2*KDGNHD+1-JGL + DO JF=1,KFIELDS + ZQX(JF,JGL)=FOUBUF(ISTN(JGL)+JF) + ZQX(JF,IGLS)=FOUBUF(ISTS(JGL)+JF) + ENDDO + ENDDO + + ! split into symmetric / antisymmetric + DO IGL=KSL,KDGNH + IGLS = 2*KDGNH+1-IGL + PCOEFS(1:KFIELDS,IGL) = ZQX(1:KFIELDS,IGL) + ZQX(1:KFIELDS,IGLS) + PCOEFA(1:KFIELDS,IGL) = ZQX(1:KFIELDS,IGL) - ZQX(1:KFIELDS,IGLS) + ENDDO + + DEALLOCATE(ZQX) + +ENDIF + +IF (LHOOK) CALL DR_HOOK('CDMAP_MOD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE CDMAP +END MODULE CDMAP_MOD diff --git a/src/trans/gpu/internal/cpledn_mod.F90 b/src/trans/gpu/internal/cpledn_mod.F90 new file mode 100755 index 0000000..9b60b18 --- /dev/null +++ b/src/trans/gpu/internal/cpledn_mod.F90 @@ -0,0 +1,134 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE CPLEDN_MOD +CONTAINS +SUBROUTINE CPLEDN(KN,KODD,PFN,PX,KFLAG,PW,PXN,PXMOD) + +!**** *CPLEDN* - Routine to perform a single Newton iteration step to find +! the zero of the ordinary Legendre polynomial of degree N + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *CPLEDN(KN,KDBLE,PX,KFLAG,PW,PXN,PXMOD)* + +! Explicit arguments : +! -------------------- +! KN : Degree of the Legendre polynomial (in) +! KODD : odd or even number of latitudes (in) +! PFN : Fourier coefficients of series expansion (in) +! for the ordinary Legendre polynomials +! PX : abcissa where the computations are performed (in) +! KFLAG : When KFLAG.EQ.1 computes the weights (in) +! PW : Weight of the quadrature at PXN (out) +! PXN : new abscissa (Newton iteration) (out) +! PXMOD : PXN-PX (out) + +! Implicit arguments : +! -------------------- +! None + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- +! None + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! Michel Rochas, 90-08-30 (Lobatto+cleaning) +! K. Yessad (Sep 2008): cleaning, improve comments. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KN +INTEGER(KIND=JPIM),INTENT(IN) :: KODD +REAL(KIND=JPRD),INTENT(IN) :: PFN(0:KN/2) +REAL(KIND=JPRD),INTENT(IN) :: PX +INTEGER(KIND=JPIM),INTENT(IN) :: KFLAG +REAL(KIND=JPRD),INTENT(OUT) :: PW +REAL(KIND=JPRD),INTENT(INOUT) :: PXN +REAL(KIND=JPRD),INTENT(OUT) :: PXMOD + +! ------------------------------------------------------------------ + +REAL(KIND=JPRD) :: ZDLX,ZDLK,ZDLLDN,ZDLXN,ZDLMOD + +INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(PX) + +INTEGER(KIND=JPIM) :: JN, IK + +! ----------------------------------------------------------------- + +!* 1. NEWTON ITERATION STEP. +! ---------------------- + +ZDLX = PX + +ZDLK = 0.0_JPRD +IF( KODD==0 ) ZDLK=0.5_JPRD*PFN(0) +ZDLXN = 0.0_JPRD +ZDLLDN = 0.0_JPRD +IK=1 + +IF(KFLAG == 0)THEN + DO JN=2-KODD,KN,2 + ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 + ZDLK = ZDLK + PFN(IK)*COS(REAL(JN,JPKD)*ZDLX) + ! normalised derivative == d/d\theta(\overbar{P_n}^0) + ZDLLDN = ZDLLDN - PFN(IK)*REAL(JN,JPKD)*SIN(REAL(JN,JPKD)*ZDLX) + IK=IK+1 + ENDDO + ! Newton method + ZDLMOD = -ZDLK/ZDLLDN + ZDLXN = ZDLX+ZDLMOD + PXN = ZDLXN + PXMOD = ZDLMOD +ENDIF + +! ------------------------------------------------------------------ + +!* 2. Computes weight. +! ---------------- + +IF(KFLAG == 1)THEN + DO JN=2-KODD,KN,2 + ! normalised derivative + ZDLLDN = ZDLLDN - PFN(IK)*REAL(JN,JPKD)*SIN(REAL(JN,JPKD)*ZDLX) + IK=IK+1 + ENDDO + PW = REAL(2*KN+1,JPKD)/ZDLLDN**2 +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE CPLEDN +END MODULE CPLEDN_MOD diff --git a/src/trans/gpu/internal/cuda_gemm_batched_mod.F90 b/src/trans/gpu/internal/cuda_gemm_batched_mod.F90 new file mode 100755 index 0000000..04db087 --- /dev/null +++ b/src/trans/gpu/internal/cuda_gemm_batched_mod.F90 @@ -0,0 +1,248 @@ +MODULE CUDA_GEMM_BATCHED_MOD + USE HIPBLAS_MOD + USE PARKIND1, ONLY: JPRD, JPRM, JPIM, JPIB + + IMPLICIT NONE + +!! PRIVATE + PUBLIC CUDA_GEMM_BATCHED, CUDA_DGEMM_BATCHED_OVERLOAD, CUDA_DGEMM_BATCHED_1D_OVERLOAD + + INTERFACE CUDA_GEMM_BATCHED + MODULE PROCEDURE CUDA_DGEMM_BATCHED_OVERLOAD + MODULE PROCEDURE CUDA_SGEMM_BATCHED_OVERLOAD + MODULE PROCEDURE CUDA_SGEMM_STRIDED_BATCHED_OVERLOAD + MODULE PROCEDURE CUDA_DGEMM_BATCHED_1D_OVERLOAD + MODULE PROCEDURE CUDA_SGEMM_BATCHED_1D_OVERLOAD + MODULE PROCEDURE CUDA_SGEMM_STRIDED_BATCHED_1D_OVERLOAD + END INTERFACE CUDA_GEMM_BATCHED + +CONTAINS +SUBROUTINE CUDA_DGEMM_BATCHED_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) +CHARACTER, INTENT(IN) :: TRANSA +CHARACTER, INTENT(IN) :: TRANSB +INTEGER(KIND=JPIM) :: M +INTEGER(KIND=JPIM) :: N +INTEGER(KIND=JPIM) :: K +REAL(KIND=JPRD) :: ALPHA +REAL(KIND=JPRD), DIMENSION(:,:,:) :: AARRAY +INTEGER(KIND=JPIM) :: LDA +INTEGER(KIND=JPIM) :: STRIDEA +REAL(KIND=JPRD), DIMENSION(:,:,:) :: BARRAY +INTEGER(KIND=JPIM) :: LDB +INTEGER(KIND=JPIM) :: STRIDEB +REAL(KIND=JPRD) :: BETA +REAL(KIND=JPRD), DIMENSION(:,:,:) :: CARRAY +INTEGER(KIND=JPIM) :: LDC +INTEGER(KIND=JPIM) :: STRIDEC +INTEGER(KIND=JPIM) :: BATCHCOUNT + +CALL HIP_DGEMM_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) +END SUBROUTINE CUDA_DGEMM_BATCHED_OVERLOAD + +SUBROUTINE CUDA_SGEMM_BATCHED_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) +CHARACTER, INTENT(IN) :: TRANSA +CHARACTER, INTENT(IN) :: TRANSB +INTEGER(KIND=JPIM) :: M +INTEGER(KIND=JPIM) :: N +INTEGER(KIND=JPIM) :: K +REAL(KIND=JPRM) :: ALPHA +REAL(KIND=JPRM), DIMENSION(:,:,:) :: AARRAY +INTEGER(KIND=JPIM) :: LDA +INTEGER(KIND=JPIM) :: STRIDEA +REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY +INTEGER(KIND=JPIM) :: LDB +INTEGER(KIND=JPIM) :: STRIDEB +REAL(KIND=JPRM) :: BETA +REAL(KIND=JPRM), DIMENSION(:,:,:) :: CARRAY +INTEGER(KIND=JPIM) :: LDC +INTEGER(KIND=JPIM) :: STRIDEC +INTEGER(KIND=JPIM) :: BATCHCOUNT + +CALL HIP_SGEMM_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) +END SUBROUTINE CUDA_SGEMM_BATCHED_OVERLOAD + +SUBROUTINE CUDA_SGEMM_STRIDED_BATCHED_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + CHARACTER, INTENT(IN) :: TRANSA + CHARACTER, INTENT(IN) :: TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: K + REAL(KIND=JPRM) :: ALPHA + REAL(KIND=JPRM), DIMENSION(:,:,:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIB) :: STRIDEA + REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIB) :: STRIDEB + REAL(KIND=JPRM) :: BETA + REAL(KIND=JPRM), DIMENSION(:,:,:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIB) :: STRIDEC + INTEGER(KIND=JPIM) :: BATCHCOUNT + + CALL HIP_SGEMM_STRIDED_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) +END SUBROUTINE CUDA_SGEMM_STRIDED_BATCHED_OVERLOAD + +SUBROUTINE CUDA_DGEMM_BATCHED_1D_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + CHARACTER, INTENT(IN) :: TRANSA + CHARACTER, INTENT(IN) :: TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: K + REAL(KIND=JPRD) :: ALPHA + REAL(KIND=JPRD), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIM) :: STRIDEA + REAL(KIND=JPRD), DIMENSION(:,:,:) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIM) :: STRIDEB + REAL(KIND=JPRD) :: BETA + REAL(KIND=JPRD), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIM) :: STRIDEC + INTEGER(KIND=JPIM) :: BATCHCOUNT + + CALL HIP_DGEMM_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + END SUBROUTINE CUDA_DGEMM_BATCHED_1D_OVERLOAD + + SUBROUTINE CUDA_SGEMM_BATCHED_1D_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + CHARACTER, INTENT(IN) :: TRANSA + CHARACTER, INTENT(IN) :: TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: K + REAL(KIND=JPRM) :: ALPHA + REAL(KIND=JPRM), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIM) :: STRIDEA + REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIM) :: STRIDEB + REAL(KIND=JPRM) :: BETA + REAL(KIND=JPRM), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIM) :: STRIDEC + INTEGER(KIND=JPIM) :: BATCHCOUNT + + CALL HIP_SGEMM_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + END SUBROUTINE CUDA_SGEMM_BATCHED_1D_OVERLOAD + + SUBROUTINE CUDA_SGEMM_STRIDED_BATCHED_1D_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + CHARACTER, INTENT(IN) :: TRANSA + CHARACTER, INTENT(IN) :: TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: K + REAL(KIND=JPRM) :: ALPHA + REAL(KIND=JPRM), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIB) :: STRIDEA + REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIB) :: STRIDEB + REAL(KIND=JPRM) :: BETA + REAL(KIND=JPRM), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIB) :: STRIDEC + INTEGER(KIND=JPIM) :: BATCHCOUNT + + CALL HIP_SGEMM_STRIDED_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT) + END SUBROUTINE CUDA_SGEMM_STRIDED_BATCHED_1D_OVERLOAD + +END MODULE CUDA_GEMM_BATCHED_MOD diff --git a/src/trans/gpu/internal/dealloc_resol_mod.F90 b/src/trans/gpu/internal/dealloc_resol_mod.F90 new file mode 100755 index 0000000..001b046 --- /dev/null +++ b/src/trans/gpu/internal/dealloc_resol_mod.F90 @@ -0,0 +1,192 @@ +! (C) Copyright 2013- ECMWF. +! (C) Copyright 2013- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DEALLOC_RESOL_MOD +CONTAINS +SUBROUTINE DEALLOC_RESOL(KRESOL) + +!**** *DEALLOC_RESOL* - Deallocations of a resolution + +! Purpose. +! -------- +! Release allocated arrays for a given resolution + +!** Interface. +! ---------- +! CALL DEALLOC_RESOL + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 from trans_end + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +USE TPM_DIM ,ONLY : R +USE TPM_GEN ,ONLY : LENABLED, NOUT,NDEF_RESOL +USE TPM_DISTR ,ONLY : D,NPRTRV +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_FFT ,ONLY : T +USE TPM_FLT ,ONLY : S +USE TPM_CTL ,ONLY : C +USE SEEFMM_MIX + +USE SET_RESOL_MOD ,ONLY : SET_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM) :: JMLOC,IPRTRV,JSETV,IMLOC,IM,ILA,ILS, JRESOL + +! ------------------------------------------------------------------ + +IF (.NOT.LENABLED(KRESOL)) THEN + + WRITE(UNIT=NOUT,FMT='('' DEALLOC_RESOL WARNING : KRESOL = '',I3,'' ALREADY DISABLED '')') KRESOL + +ELSE + + CALL SET_RESOL(KRESOL) + + !TPM_FLT + IF( ALLOCATED(S%FA) ) THEN + DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IF( S%LUSEFLT .AND. ILA > S%ITHRESHOLD) THEN + ELSE + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMA)) DEALLOCATE(S%FA(IMLOC)%RPNMA) + ENDIF + IF( S%LUSEFLT .AND. ILS > S%ITHRESHOLD) THEN + ELSE + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMS)) DEALLOCATE(S%FA(IMLOC)%RPNMS) + ENDIF + IF(S%LDLL) THEN + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMWI)) DEALLOCATE(S%FA(IMLOC)%RPNMWI) + IF(.NOT.C%CIO_TYPE == 'mbuf' .AND. ASSOCIATED(S%FA(IMLOC)%RPNMWO)) DEALLOCATE(S%FA(IMLOC)%RPNMWO) + ENDIF + ENDDO + ENDDO + DEALLOCATE(S%FA) + ENDIF + IF(S%LDLL) THEN + CALL FREE_SEEFMM(S%FMM_INTI) + IF(ASSOCIATED(S%FMM_INTI)) DEALLOCATE(S%FMM_INTI) + ENDIF + + !TPM_DISTR + IF(ALLOCATED(D%NFRSTLAT)) DEALLOCATE(D%NFRSTLAT) + IF(ALLOCATED(D%NLSTLAT)) DEALLOCATE(D%NLSTLAT) + IF(ALLOCATED(D%NPTRLAT)) DEALLOCATE(D%NPTRLAT) + IF(ALLOCATED(D%NPTRFRSTLAT)) DEALLOCATE(D%NPTRFRSTLAT) + IF(ALLOCATED(D%NPTRLSTLAT)) DEALLOCATE(D%NPTRLSTLAT) + IF(ALLOCATED(D%LSPLITLAT)) DEALLOCATE(D%LSPLITLAT) + IF(ALLOCATED(D%NSTA)) DEALLOCATE(D%NSTA) + IF(ALLOCATED(D%NONL)) DEALLOCATE(D%NONL) + IF(ALLOCATED(D%NGPTOTL)) DEALLOCATE(D%NGPTOTL) + IF(ALLOCATED(D%NPROCA_GP)) DEALLOCATE(D%NPROCA_GP) + + IF(D%LWEIGHTED_DISTR) THEN + IF(ALLOCATED(D%RWEIGHT)) DEALLOCATE(D%RWEIGHT) + ENDIF + + IF(ALLOCATED(D%MYMS)) DEALLOCATE(D%MYMS) + IF(ALLOCATED(D%NUMPP)) DEALLOCATE(D%NUMPP) + IF(ALLOCATED(D%NPOSSP)) DEALLOCATE(D%NPOSSP) + IF(ALLOCATED(D%NPROCM)) DEALLOCATE(D%NPROCM) + IF(ALLOCATED(D%NDIM0G)) DEALLOCATE(D%NDIM0G) + IF(ALLOCATED(D%NASM0)) DEALLOCATE(D%NASM0) + IF(ALLOCATED(D%NATM0)) DEALLOCATE(D%NATM0) + IF(ALLOCATED(D%NLATLS)) DEALLOCATE(D%NLATLS) + IF(ALLOCATED(D%NLATLE)) DEALLOCATE(D%NLATLE) + IF(ALLOCATED(D%NPMT)) DEALLOCATE(D%NPMT) + IF(ALLOCATED(D%NPMS)) DEALLOCATE(D%NPMS) + IF(ALLOCATED(D%NPMG)) DEALLOCATE(D%NPMG) + IF(ALLOCATED(D%NULTPP)) DEALLOCATE(D%NULTPP) + IF(ALLOCATED(D%NPROCL)) DEALLOCATE(D%NPROCL) + IF(ALLOCATED(D%NPTRLS)) DEALLOCATE(D%NPTRLS) + IF(ALLOCATED(D%NALLMS)) DEALLOCATE(D%NALLMS) + IF(ALLOCATED(D%NPTRMS)) DEALLOCATE(D%NPTRMS) + IF(ALLOCATED(D%NSTAGT0B)) DEALLOCATE(D%NSTAGT0B) + IF(ALLOCATED(D%NSTAGT1B)) DEALLOCATE(D%NSTAGT1B) + IF(ALLOCATED(D%NPNTGTB0)) DEALLOCATE(D%NPNTGTB0) + IF(ALLOCATED(D%NPNTGTB1)) DEALLOCATE(D%NPNTGTB1) + IF(ALLOCATED(D%NLTSFTB)) DEALLOCATE(D%NLTSFTB) + IF(ALLOCATED(D%NLTSGTB)) DEALLOCATE(D%NLTSGTB) + IF(ALLOCATED(D%MSTABF)) DEALLOCATE(D%MSTABF) + IF(ALLOCATED(D%NSTAGTF)) DEALLOCATE(D%NSTAGTF) + + !TPM_FFT + IF (.NOT.D%LCPNMONLY) THEN + IF( ASSOCIATED(T) ) THEN + IF( ALLOCATED(T%TRIGS) ) DEALLOCATE(T%TRIGS) + IF( ALLOCATED(T%NFAX) ) DEALLOCATE(T%NFAX) +!! IF( ALLOCATED(T%LUSEFFT992)) DEALLOCATE(T%LUSEFFT992) + ENDIF + ENDIF + + + !TPM_FIELDS + IF(ALLOCATED(F%RMU)) DEALLOCATE(F%RMU) + IF(ALLOCATED(F%RW)) DEALLOCATE(F%RW) + IF(ALLOCATED(F%R1MU2)) DEALLOCATE(F%R1MU2) + IF(ALLOCATED(F%RACTHE)) DEALLOCATE(F%RACTHE) + IF(ALLOCATED(F%REPSNM)) DEALLOCATE(F%REPSNM) + IF(ALLOCATED(F%RN)) DEALLOCATE(F%RN) + IF(ALLOCATED(F%RLAPIN)) DEALLOCATE(F%RLAPIN) + IF(ALLOCATED(F%NLTN)) DEALLOCATE(F%NLTN) + IF( S%LKEEPRPNM ) THEN + IF(ALLOCATED(F%RPNM)) DEALLOCATE(F%RPNM) + ENDIF + IF( S%LDLL ) THEN + IF(ALLOCATED(F%RMU2)) DEALLOCATE(F%RMU2) + IF(ALLOCATED(F%RACTHE2)) DEALLOCATE(F%RACTHE2) + ENDIF + + !TPM_GEOMETRY + IF(ALLOCATED(G%NMEN)) DEALLOCATE(G%NMEN) + IF(ALLOCATED(G%NDGLU)) DEALLOCATE(G%NDGLU) + IF(ALLOCATED(G%NLOEN)) DEALLOCATE(G%NLOEN) + + LENABLED(KRESOL)=.FALSE. + NDEF_RESOL = COUNT(LENABLED) + ! Do not stay on a disabled resolution + DO JRESOL=1,SIZE(LENABLED) + IF (LENABLED(JRESOL)) THEN + CALL SET_RESOL(JRESOL) + EXIT + ENDIF + ENDDO + +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE DEALLOC_RESOL +END MODULE DEALLOC_RESOL_MOD diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 new file mode 100755 index 0000000..c862b6c --- /dev/null +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -0,0 +1,307 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *DIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : FOUBUF_IN, NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTDIR_CTL_MOD ,ONLY : LTDIR_CTL +USE FTDIR_CTL_MOD ,ONLY : FTDIR_CTL +USE TPM_TRANS ,ONLY : ZGTF +!USE NVTX +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB + + +! ------------------------------------------------------------------ + +! Perform transform + +! just removed this, why needed ? Nils +!#ifdef OMPGPU +!!$OMP TARGET +!#endif +!#ifdef ACCGPU +!!$ACC KERNELS +!#endif + +!!ZGTF(:,:) = 0._JPRBT +!!$ACC UPDATE DEVICE(ZGTF) + +!#ifdef ACCGPU +!!$ACC END KERNELS +!#endif +!#ifdef OMPGPU +!!$OMP END TARGET +!#endif + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ENDIF +#ifdef ACCGPU + !$ACC DATA COPYIN(FOUBUF_IN) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:FOUBUF_IN) +#endif + CALL LTDIR_CTL(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + ENDDO +ELSE + + ! No splitting of fields, transform done in one go + !call nvtxStartRange("DIRTRANS_nodata") + +#ifdef ACCGPU + !$ACC DATA CREATE(FOUBUF_IN) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(ALLOC:FOUBUF_IN) +#endif + !call nvtxStartRange("FTDIR") + CALL FTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + !call nvtxEndRange + + !call nvtxStartRange("LTDIR") +#ifdef ACCGPU + !$ACC DATA COPYOUT(PSPVOR,PSPDIV) IF(KF_UV > 0) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(FROM:PSPVOR,PSPDIV) IF(KF_UV > 0) +#endif +#ifdef ACCGPU + !$ACC DATA COPYOUT(PSPSCALAR) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSCALAR)) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(FROM:PSPSCALAR) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSCALAR)) +#endif +#ifdef ACCGPU + !$ACC DATA COPYOUT(PSPSC2) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC2) .AND. NF_SC2 > 0) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(FROM:PSPSC2) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC2) .AND. NF_SC2 > 0) +#endif +#ifdef ACCGPU + !$ACC DATA COPYOUT(PSPSC3A) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3A) .AND. NF_SC3A > 0) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(FROM:PSPSC3A) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3A) .AND. NF_SC3A > 0) +#endif +#ifdef ACCGPU + !$ACC DATA COPYOUT(PSPSC3B) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3B) .AND. NF_SC3B > 0) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(FROM:PSPSC3B) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3B) .AND. NF_SC3B > 0) +#endif + CALL LTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + !call nvtxEndRange + CALL GSTATS(430,0) +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif + CALL GSTATS(430,1) + !call nvtxEndRange + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIR_TRANS_CTL +END MODULE DIR_TRANS_CTL_MOD diff --git a/src/trans/gpu/internal/dir_trans_ctlad_mod.F90 b/src/trans/gpu/internal/dir_trans_ctlad_mod.F90 new file mode 100755 index 0000000..d19fcb1 --- /dev/null +++ b/src/trans/gpu/internal/dir_trans_ctlad_mod.F90 @@ -0,0 +1,194 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIR_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE DIR_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *DIR_TRANS_CTLAD* - Control routine for direct spectral transform-adj. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTLAD - control of Legendre transform +! FTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTDIR_CTLAD_MOD ,ONLY : LTDIR_CTLAD +USE FTDIR_CTLAD_MOD ,ONLY : FTDIR_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB + + +! ------------------------------------------------------------------ + +! Perform transform + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL LTDIR_CTLAD(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL LTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + + CALL FTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIR_TRANS_CTLAD +END MODULE DIR_TRANS_CTLAD_MOD diff --git a/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 b/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 new file mode 100755 index 0000000..792d989 --- /dev/null +++ b/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 @@ -0,0 +1,258 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIST_GRID_32_CTL_MOD +CONTAINS +SUBROUTINE DIST_GRID_32_CTL(PGPG,KFDISTG,KPROMA,KFROM,PGP) + +!**** *DIST_GRID_32_CTL* - Distributing global gridpoint array to processors + +! Purpose. +! -------- +! Routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_32_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint output +! KFROM(:) - Processor responsible for distributing each field +! PGP(:,:,:) - Local spectral array + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRM +USE MPL_MODULE + +USE TPM_DISTR +USE TPM_GEOMETRY + +USE SET2PE_MOD +USE ABORT_TRANS_MOD +USE EQ_REGIONS_MOD + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +REAL(KIND=JPRM) , INTENT(OUT) :: PGP(:,:,:) + +! Declaration of local variables + +REAL(KIND=JPRM) :: ZDUM(D%NGPTOTMX) +REAL(KIND=JPRM),ALLOCATABLE :: ZBUF(:,:,:),ZRCV2(:,:) +REAL(KIND=JPRM) :: ZRCV(D%NGPTOTMX,KFDISTG) +INTEGER(KIND=JPIM) :: JFLD,JB,JA,IGLOFF,IGL1,IGL2,IOFF,ILAST,ILOFF,ILENR +INTEGER(KIND=JPIM) :: JGL,JLON,ISND,ITAG,J,IRCV +INTEGER(KIND=JPIM) :: JKGLO,IEND,JROF,IBL,JROC +INTEGER(KIND=JPIM) :: ISENDREQ(NPROC,KFDISTG),ILEN(NPROC,KFDISTG) +INTEGER(KIND=JPIM) :: IFROM,IMYFIELDS,IFLD,IFLDSFROM(NPROC) +LOGICAL :: LLSAME + +! ------------------------------------------------------------------ + +! Copy for single PE + +IF(NPROC == 1) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,JFLD,IBL) = PGPG(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + +ELSEIF(KFDISTG>0) THEN + +! test if values in KFROM are all the same + LLSAME=.TRUE. + IFROM=KFROM(1) + DO JFLD=2,KFDISTG + IF(KFROM(JFLD) /= IFROM) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + + IMYFIELDS = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + CALL GSTATS(1663,0) + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX,IMYFIELDS,NPROC)) + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!$OMP&PRIVATE(JFLD,JA,JB,ISND,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!$OMP&ILOFF,JGL,JLON) + DO JFLD=1,IMYFIELDS + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(ISND,JA,JB,0,0) + + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN(ISND,JFLD) = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + ZBUF(ILEN(ISND,JFLD)+JLON,JFLD,ISND) = & + & PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) + ENDDO + ILEN(ISND,JFLD) = ILEN(ISND,JFLD) + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1663,1) + + ! Message passing + CALL GSTATS_BARRIER(791) + CALL GSTATS(811,0) + ! Send + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + ITAG = MTAGDISTGP + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(:,:,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,1),& + &CDSTRING='DIST_GRID_32_CTL') + ENDDO + ENDIF + ELSE + IF(IMYFIELDS > 0) THEN + ITAG = MTAGDISTGP + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(:,:,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,1),& + &CDSTRING='DIST_GRID_32_CTL') + ENDDO + ENDIF + ENDIF + + ! Receive + + IF( LLSAME )THEN + IRCV = KFROM(1) + ITAG = MTAGDISTGP + CALL MPL_RECV(ZRCV,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_32_CTL:') + IF( ILENR /= D%NGPTOTMX*KFDISTG )THEN + CALL ABORT_TRANS(' DIST_GRID_32_CTL: INVALID RECEIVE MESSAGE LENGTH 1') + ENDIF + ELSE + IFLDSFROM(:)=0 + DO JFLD=1,KFDISTG + IFLDSFROM(KFROM(JFLD)) = IFLDSFROM(KFROM(JFLD))+1 + ENDDO + ITAG = MTAGDISTGP + DO JROC=1,NPROC + IF(IFLDSFROM(JROC) > 0 ) THEN + IRCV = JROC + ALLOCATE(ZRCV2(D%NGPTOTMX,IFLDSFROM(JROC))) + CALL MPL_RECV(ZRCV2,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_32_CTL:') + IF( ILENR /= D%NGPTOTMX*IFLDSFROM(JROC) )THEN + CALL ABORT_TRANS(' DIST_GRID_32_CTL: INVALID RECEIVE MESSAGE LENGTH 2') + ENDIF + IFLD = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == JROC) THEN + IFLD = IFLD+1 + ZRCV(1:D%NGPTOT,JFLD) = ZRCV2(1:D%NGPTOT,IFLD) + ENDIF + ENDDO + DEALLOCATE(ZRCV2) + ENDIF + ENDDO + ENDIF + +! Wait for send to complete + + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,1), & + & CDSTRING='DIST_GRID_32_CTL: WAIT 1') + ENDIF + ELSEIF(IMYFIELDS > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,1), & + & CDSTRING='DIST_GRID_32_CTL: WAIT 2') + ENDIF + CALL GSTATS(811,1) + CALL GSTATS_BARRIER2(791) + + CALL GSTATS(1663,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,JFLD,IBL) = ZRCV(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1663,1) + !Synchronize processors + CALL GSTATS(786,0) + CALL MPL_BARRIER(CDSTRING='DIST_GRID_32_CTL:') + CALL GSTATS(786,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_32_CTL +END MODULE DIST_GRID_32_CTL_MOD diff --git a/src/trans/gpu/internal/dist_grid_ctl_mod.F90 b/src/trans/gpu/internal/dist_grid_ctl_mod.F90 new file mode 100755 index 0000000..4cd0835 --- /dev/null +++ b/src/trans/gpu/internal/dist_grid_ctl_mod.F90 @@ -0,0 +1,275 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIST_GRID_CTL_MOD +CONTAINS +SUBROUTINE DIST_GRID_CTL(PGPG,KFDISTG,KPROMA,KFROM,PGP,KSORT) + +!**** *DIST_GRID_CTL* - Distributing global gridpoint array to processors + +! Purpose. +! -------- +! Routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint output +! KFROM(:) - Processor responsible for distributing each field +! PGP(:,:,:) - Local spectral array +! KSORT(:) - Add KSORT + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 +! P.Marguinaud : 2014-10-10 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +USE TPM_DISTR ,ONLY : D, MTAGDISTGP, NPRCIDS, MYPROC, NPROC +USE TPM_GEOMETRY ,ONLY : G + +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +! + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN), TARGET :: KSORT (:) + +! Declaration of local variables + +! SS/2018: Removed stack hogs + +REAL(KIND=JPRB),ALLOCATABLE :: ZBUF(:,:,:) +REAL(KIND=JPRB),ALLOCATABLE :: ZRCV(:,:) ! (D%NGPTOTMX,KFDISTG) +INTEGER(KIND=JPIM) :: JFLD,JB,JA,IGLOFF,IGL1,IGL2,IOFF,ILAST,ILOFF,ILENR +INTEGER(KIND=JPIM) :: JGL,JLON,ISND,ITAG,J,IRCV +INTEGER(KIND=JPIM) :: JKGLO,IEND,JROF,IBL,JROC +INTEGER(KIND=JPIM) :: ISENDREQ(NPROC,KFDISTG),ILEN(NPROC,KFDISTG) +INTEGER(KIND=JPIM) :: IFROM,IMYFIELDS,IFLD +INTEGER(KIND=JPIM), POINTER :: ISORT (:) +LOGICAL :: LLSAME + +! ------------------------------------------------------------------ + +IF (PRESENT (KSORT)) THEN + ISORT => KSORT +ELSE + ALLOCATE (ISORT (KFDISTG)) + DO JFLD = 1, KFDISTG + ISORT (JFLD) = JFLD + ENDDO +ENDIF + +! Copy for single PE + +IF(NPROC == 1) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,ISORT(JFLD),IBL) = PGPG(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + +ELSEIF(KFDISTG>0) THEN + +! test if values in KFROM are all the same + LLSAME=.TRUE. + IFROM=KFROM(1) + DO JFLD=2,KFDISTG + IF(KFROM(JFLD) /= IFROM) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + + IMYFIELDS = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + CALL GSTATS(1663,0) + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX,IMYFIELDS,NPROC)) + +!WARNING: COMMENTING OPENMP OUT AS TEMPORARY WORKAROUND FOR AMD-COMPILER +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!!$OMP&PRIVATE(JFLD,JA,JB,ISND,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!!$OMP&ILOFF,JGL,JLON) + DO JFLD=1,IMYFIELDS + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(ISND,JA,JB,0,0) + + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN(ISND,JFLD) = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + ZBUF(ILEN(ISND,JFLD)+JLON,JFLD,ISND) = & + & PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) + ENDDO + ILEN(ISND,JFLD) = ILEN(ISND,JFLD) + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1663,1) + + ! Message passing + CALL GSTATS_BARRIER(791) + CALL GSTATS(811,0) + ! Send + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + ITAG = MTAGDISTGP + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(:,:,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,1),& + &CDSTRING='DIST_GRID_CTL') + ENDDO + ENDIF + ELSE + IFLD = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IFLD = IFLD+1 + ITAG = MTAGDISTGP+JFLD + DO JROC=1,NPROC + CALL MPL_SEND(ZBUF(1:ILEN(JROC,IFLD),IFLD,JROC),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JROC,JFLD),& + &CDSTRING='DIST_GRID_CTL') + ENDDO + ENDIF + ENDDO + ENDIF + + ! Receive + + ALLOCATE(ZRCV(D%NGPTOTMX,KFDISTG)) + + IF( LLSAME )THEN + IRCV = KFROM(1) + ITAG = MTAGDISTGP + CALL MPL_RECV(ZRCV,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_CTL:') + IF( ILENR /= D%NGPTOTMX*KFDISTG )THEN + CALL ABORT_TRANS(' DIST_GRID_CTL: INVALID RECEIVE MESSAGE LENGTH 1') + ENDIF + ELSE + DO JFLD=1,KFDISTG + IRCV = KFROM(JFLD) + ITAG = MTAGDISTGP+JFLD + CALL MPL_RECV(ZRCV(:,JFLD),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR,CDSTRING='DIST_GRID_CTL:') + IF( ILENR /= D%NGPTOT )THEN + CALL ABORT_TRANS(' DIST_GRID_CTL: INVALID RECEIVE MESSAGE LENGTH 2') + ENDIF + ENDDO + ENDIF + +! Wait for send to complete + + IF( LLSAME )THEN + IF(KFROM(1) == MYPROC) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,1), & + & CDSTRING='DIST_GRID_CTL: WAIT 1') + ENDIF + ELSE + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(:,JFLD), & + & CDSTRING='DIST_GRID_CTL: WAIT 2') + ENDIF + ENDDO + ENDIF + CALL GSTATS(811,1) + CALL GSTATS_BARRIER2(791) + + CALL GSTATS(1663,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFDISTG + DO JROF=1,IEND + PGP(JROF,ISORT(JFLD),IBL) = ZRCV(IOFF+JROF,JFLD) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1663,1) + DEALLOCATE(ZRCV) + !Synchronize processors + CALL GSTATS(786,0) + CALL MPL_BARRIER(CDSTRING='DIST_GRID_CTL:') + CALL GSTATS(786,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +IF (.NOT. PRESENT (KSORT)) THEN + DEALLOCATE (ISORT) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_CTL +END MODULE DIST_GRID_CTL_MOD diff --git a/src/trans/gpu/internal/dist_spec_control_mod.F90 b/src/trans/gpu/internal/dist_spec_control_mod.F90 new file mode 100755 index 0000000..449889a --- /dev/null +++ b/src/trans/gpu/internal/dist_spec_control_mod.F90 @@ -0,0 +1,233 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIST_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,KSORT) + +!**** *DIST_SPEC_CONTROL* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Routine for distributing spectral array + +!** Interface. +! ---------- +! CALL DIST_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array +! KSORT(:) - Re-order fields on output + +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 +! P.Marguinaud : 2014-10-10 + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_DISTR ,ONLY : MTAGDISTSP, MYSETV, NPRCIDS, NPRTRW, MYPROC, NPROC + +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN), TARGET :: KSORT (:) + +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G) +REAL(KIND=JPRB) :: ZFLD(KSPEC2) +REAL(KIND=JPRB),ALLOCATABLE :: ZBUF(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,JNM,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISTA,ISTP,ILENR,ISENDREQ(NPRTRW*KFDISTG) +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, IPOS0,ISENT +INTEGER(KIND=JPIM), POINTER :: ISORT (:) + +! ------------------------------------------------------------------ + + +! Compute help array for distribution + +IF (PRESENT (KSORT)) THEN + ISORT => KSORT +ELSE + ALLOCATE (ISORT (KFDISTG)) + DO JFLD = 1, KFDISTG + ISORT (JFLD) = JFLD + ENDDO +ENDIF + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JNM,JFLD) + DO JNM=1,KSPEC2_G + DO JFLD=1,KFDISTG + PSPEC(ISORT (JFLD),JNM) = PSPECG(JFLD,JNM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JNM,JFLD) + DO JFLD=1,KFDISTG + DO JNM=1,KSPEC2_G + PSPEC(JNM,ISORT (JFLD)) = PSPECG(JNM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KSMAX + DO JN=JM,KSMAX + IDIST(II+1) = KDIM0G(JM)+(JN-JM)*2 + IDIST(II+2) = KDIM0G(JM)+(JN-JM)*2+1 + II = II+2 + ENDDO + ENDDO + CALL GSTATS(1804,1) + +!Distribute spectral array + + IFLDS = 0 + DO JFLD=1,KFDISTG + IF(KFROM(JFLD) == MYPROC) THEN + IFLDS = IFLDS+1 + ENDIF + ENDDO + ALLOCATE(ZBUF(KSPEC2_G,IFLDS)) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JNM,JFLD) + DO JFLD=1,IFLDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + ZBUF(IDIST(JNM),JFLD) = PSPECG(JFLD,JNM) + ENDDO + ELSE + DO JNM=1,KSPEC2_G + ZBUF(IDIST(JNM),JFLD) = PSPECG(JNM,JFLD) + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + + IFLDR = 0 + IFLDS = 0 + ISENT = 0 + + CALL GSTATS_BARRIER(790) + CALL GSTATS(812,0) + DO JFLD=1,KFDISTG + + ! Send + IF(KFROM(JFLD) == MYPROC) THEN + IFLDS = IFLDS+1 + IBSET = KVSET(JFLD) + ITAG = MTAGDISTSP+JFLD + + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(ISND,0,0,JA,IBSET) + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + ISENT = ISENT+1 + CALL MPL_SEND(ZBUF(ISTA:ISTP,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISENT),& + &CDSTRING='DIST_SPEC_CONTROL:') + ENDIF + ENDDO + ENDIF + ENDDO + + !Receive + DO JFLD=1,KFDISTG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + ITAG = MTAGDISTSP+JFLD + IF( KSPEC2 > 0 )THEN + IRCV = KFROM(JFLD) + IFLDR = IFLDR+1 + IF(LDIM1_IS_FLD) THEN + CALL MPL_RECV(ZFLD,KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KOUNT=ILENR,CDSTRING='DIST_SPEC_CONTROL:') + PSPEC(ISORT (IFLDR),1:KSPEC2) = ZFLD(:) + ELSE + CALL MPL_RECV(PSPEC(:,ISORT (IFLDR)),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KOUNT=ILENR,CDSTRING='DIST_SPEC_CONTROL:') + ENDIF + IF( ILENR /= KSPEC2 )THEN + CALL ABORT_TRANS('DIST_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDIF + ENDDO + + DO JA=1,ISENT + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA), & + & CDSTRING='DIST_SPEC_CTL: WAIT') + ENDDO + + CALL GSTATS(812,1) + CALL GSTATS_BARRIER2(790) + +!Synchronize processors + CALL GSTATS(787,0) + IF( NPROC > 1 )THEN + CALL MPL_BARRIER(CDSTRING='DIST_SPEC_CONTROL:') + ENDIF + CALL GSTATS(787,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +IF (.NOT. PRESENT (KSORT)) THEN + DEALLOCATE (ISORT) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_SPEC_CONTROL +END MODULE DIST_SPEC_CONTROL_MOD diff --git a/src/trans/gpu/internal/eq_regions_mod.F90 b/src/trans/gpu/internal/eq_regions_mod.F90 new file mode 100755 index 0000000..5888c10 --- /dev/null +++ b/src/trans/gpu/internal/eq_regions_mod.F90 @@ -0,0 +1,443 @@ +! (C) Copyright 2006- ECMWF. +! (C) Copyright 2006- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE eq_regions_mod +! +! Purpose. +! -------- +! eq_regions_mod provides the code to perform a high level +! partitioning of the surface of a sphere into regions of +! equal area and small diameter. +! the type. +! +! Background. +! ----------- +! This Fortran version of eq_regions is a much cut down version of the +! "Recursive Zonal Equal Area (EQ) Sphere Partitioning Toolbox" of the +! same name developed by Paul Leopardi at the University of New South Wales. +! This version has been coded specifically for the case of partitioning the +! surface of a sphere or S^dim (where dim=2) as denoted in the original code. +! Only a subset of the original eq_regions package has been coded to determine +! the high level distribution of regions on a sphere, as the detailed +! distribution of grid points to each region is left to IFS software. +! This is required to take into account the spatial distribution of grid +! points in an IFS gaussian grid and provide an optimal (i.e. exact) +! distribution of grid points over regions. +! +! The following copyright notice for the eq_regions package is included from +! the original MatLab release. +! +! +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! + Release 1.10 2005-06-26 + +! + + +! + Copyright (c) 2004, 2005, University of New South Wales + +! + + +! + Permission is hereby granted, free of charge, to any person obtaining + +! + a copy of this software and associated documentation files (the + +! + "Software"), to deal in the Software without restriction, including + +! + without limitation the rights to use, copy, modify, merge, publish, + +! + distribute, sublicense, and/or sell copies of the Software, and to + +! + permit persons to whom the Software is furnished to do so, subject to + +! + the following conditions: + +! + + +! + The above copyright notice and this permission notice shall be included + +! + in all copies or substantial portions of the Software. + +! + + +! + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + +! + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + +! + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + +! + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + +! + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + +! + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + +! + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +! + + +! +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +! Author. +! ------- +! George Mozdzynski *ECMWF* +! +! Modifications. +! -------------- +! Original : 2006-04-15 +! +!-------------------------------------------------------------------------------- +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + +IMPLICIT NONE + +SAVE + +PRIVATE + +PUBLIC eq_regions,l_regions_debug,n_regions_ns,n_regions_ew,n_regions,my_region_ns,my_region_ew +PUBLIC eq_regions_t, eq_regions_save, eq_regions_load, eq_regions_free + +real(kind=JPRBT) :: pi + +type eq_regions_t +logical :: l_regions_debug=.false. +integer(kind=jpim) :: n_regions_ns +integer(kind=jpim) :: n_regions_ew +integer(kind=jpim) :: my_region_ns +integer(kind=jpim) :: my_region_ew +integer(kind=jpim),pointer :: n_regions(:) => null () +end type eq_regions_t + +logical :: l_regions_debug=.false. +integer(kind=jpim) :: n_regions_ns +integer(kind=jpim) :: n_regions_ew +integer(kind=jpim) :: my_region_ns +integer(kind=jpim) :: my_region_ew +integer(kind=jpim),pointer :: n_regions(:) => null () + +CONTAINS + +subroutine eq_regions_save (yder) +type (eq_regions_t), intent (inout) :: yder + +yder%l_regions_debug = l_regions_debug +yder%n_regions_ns = n_regions_ns +yder%n_regions_ew = n_regions_ew +yder%my_region_ns = my_region_ns +yder%my_region_ew = my_region_ew +yder%n_regions => n_regions + +nullify (n_regions) + +end subroutine + +subroutine eq_regions_load (yder) +type (eq_regions_t), intent (inout) :: yder + +l_regions_debug = yder%l_regions_debug +n_regions_ns = yder%n_regions_ns +n_regions_ew = yder%n_regions_ew +my_region_ns = yder%my_region_ns +my_region_ew = yder%my_region_ew +n_regions => yder%n_regions + +nullify (yder%n_regions) + +end subroutine + +subroutine eq_regions_free (yder) +type (eq_regions_t), intent (inout) :: yder + +if (associated (yder%n_regions)) then + deallocate (yder%n_regions) + nullify (yder%n_regions) +endif + +end subroutine + +subroutine eq_regions(N) +! +! eq_regions uses the zonal equal area sphere partitioning algorithm to partition +! the surface of a sphere into N regions of equal area and small diameter. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +integer(kind=jpim) :: n_collars,j +real(kind=JPRBT),allocatable :: r_regions(:) +real(kind=JPRBT) :: c_polar + +pi=2.0_JPRBT*asin(1.0_JPRBT) + +n_regions(:)=0 + +if( N == 1 )then + + ! + ! We have only one region, which must be the whole sphere. + ! + n_regions(1)=1 + n_regions_ns=1 + +else + + ! + ! Given N, determine c_polar + ! the colatitude of the North polar spherical cap. + ! + c_polar = polar_colat(N) + ! + ! Given N, determine the ideal angle for spherical collars. + ! Based on N, this ideal angle, and c_polar, + ! determine n_collars, the number of collars between the polar caps. + ! + n_collars = num_collars(N,c_polar,ideal_collar_angle(N)) + n_regions_ns=n_collars+2 + ! + ! Given N, c_polar and n_collars, determine r_regions, + ! a list of the ideal real number of regions in each collar, + ! plus the polar caps. + ! The number of elements is n_collars+2. + ! r_regions[1] is 1. + ! r_regions[n_collars+2] is 1. + ! The sum of r_regions is N. + allocate(r_regions(n_collars+2)) + call ideal_region_list(N,c_polar,n_collars,r_regions) + ! + ! Given N and r_regions, determine n_regions, a list of the natural number + ! of regions in each collar and the polar caps. + ! This list is as close as possible to r_regions. + ! The number of elements is n_collars+2. + ! n_regions[1] is 1. + ! n_regions[n_collars+2] is 1. + ! The sum of n_regions is N. + ! + call round_to_naturals(N,n_collars,r_regions) + deallocate(r_regions) + if( N /= sum(n_regions(:)) )then + write(*,'("eq_regions: N=",I10," sum(n_regions(:))=",I10)')N,sum(n_regions(:)) + call abor1('eq_regions: N /= sum(n_regions)') + endif + +endif + +if( l_regions_debug )then + write(*,'("eq_regions: N=",I6," n_regions_ns=",I4)') N,n_regions_ns + do j=1,n_regions_ns + write(*,'("eq_regions: n_regions(",I4,")=",I4)') j,n_regions(j) + enddo +endif +n_regions_ew=maxval(n_regions(:)) + +return +end subroutine eq_regions + +function num_collars(N,c_polar,a_ideal) result(num_c) +! +!NUM_COLLARS The number of collars between the polar caps +! +! Given N, an ideal angle, and c_polar, +! determine n_collars, the number of collars between the polar caps. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=JPRBT),intent(in) :: a_ideal,c_polar +integer(kind=jpim) :: num_c +logical enough +enough = (N > 2) .and. (a_ideal > 0) +if( enough )then + num_c = max(1,nint((pi-2.*c_polar)/a_ideal)) +else + num_c = 0 +endif +return +end function num_collars + +subroutine ideal_region_list(N,c_polar,n_collars,r_regions) +! +!IDEAL_REGION_LIST The ideal real number of regions in each zone +! +! List the ideal real number of regions in each collar, plus the polar caps. +! +! Given N, c_polar and n_collars, determine r_regions, a list of the ideal real +! number of regions in each collar, plus the polar caps. +! The number of elements is n_collars+2. +! r_regions[1] is 1. +! r_regions[n_collars+2] is 1. +! The sum of r_regions is N. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N,n_collars +real(kind=JPRBT),intent(in) :: c_polar +real(kind=JPRBT),intent(out) :: r_regions(n_collars+2) +integer(kind=jpim) :: collar_n +real(kind=JPRBT) :: ideal_region_area,ideal_collar_area +real(kind=JPRBT) :: a_fitting +r_regions(:)=0.0_JPRBT +r_regions(1) = 1.0_JPRBT +if( n_collars > 0 )then + ! + ! Based on n_collars and c_polar, determine a_fitting, + ! the collar angle such that n_collars collars fit between the polar caps. + ! + a_fitting = (pi-2.0_JPRBT*c_polar)/float(n_collars) + ideal_region_area = area_of_ideal_region(N) + do collar_n=1,n_collars + ideal_collar_area = area_of_collar(c_polar+(collar_n-1)*a_fitting, & + & c_polar+collar_n*a_fitting) + r_regions(1+collar_n) = ideal_collar_area / ideal_region_area + enddo +endif +r_regions(2+n_collars) = 1. +return +end subroutine ideal_region_list + +function ideal_collar_angle(N) result(ideal) +! +! IDEAL_COLLAR_ANGLE The ideal angle for spherical collars of an EQ partition +! +! IDEAL_COLLAR_ANGLE(N) sets ANGLE to the ideal angle for the +! spherical collars of an EQ partition of the unit sphere S^2 into N regions. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=JPRBT) :: ideal +ideal = area_of_ideal_region(N)**(0.5_JPRBT) +return +end function ideal_collar_angle + +subroutine round_to_naturals(N,n_collars,r_regions) +! +! ROUND_TO_NATURALS Round off a given list of numbers of regions +! +! Given N and r_regions, determine n_regions, a list of the natural number +! of regions in each collar and the polar caps. +! This list is as close as possible to r_regions, using rounding. +! The number of elements is n_collars+2. +! n_regions[1] is 1. +! n_regions[n_collars+2] is 1. +! The sum of n_regions is N. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N,n_collars +real(kind=JPRBT),intent(in) :: r_regions(n_collars+2) +integer(kind=jpim) :: zone_n +real(kind=JPRBT) :: discrepancy +n_regions(1:n_collars+2) = r_regions(:) +discrepancy = 0.0_JPRBT +do zone_n = 1,n_collars+2 + n_regions(zone_n) = nint(r_regions(zone_n)+discrepancy); + discrepancy = discrepancy+r_regions(zone_n)-float(n_regions(zone_n)); +enddo +return +end subroutine round_to_naturals + +function polar_colat(N) result(polar_c) +! +! Given N, determine the colatitude of the North polar spherical cap. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=JPRBT) :: area +real(kind=JPRBT) :: polar_c +if( N == 1 ) polar_c=pi +if( N == 2 ) polar_c=pi/2.0_JPRBT +if( N > 2 )then + area=area_of_ideal_region(N) + polar_c=sradius_of_cap(area) +endif +return +end function polar_colat + +function area_of_ideal_region(N) result(area) +! +! AREA_OF_IDEAL_REGION(N) sets AREA to be the area of one of N equal +! area regions on S^2, that is 1/N times AREA_OF_SPHERE. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +integer(kind=jpim),intent(in) :: N +real(kind=JPRBT) :: area_of_sphere +real(kind=JPRBT) :: area +area_of_sphere = (2.0_JPRBT*pi**1.5_JPRBT/gamma(1.5_JPRBT)) +area = area_of_sphere/float(N) +return +end function area_of_ideal_region + +function sradius_of_cap(area) result(sradius) +! +! SRADIUS_OF_CAP(AREA) returns the spherical radius of +! an S^2 spherical cap of area AREA. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +real(kind=JPRBT),intent(in) :: area +real(kind=JPRBT) :: sradius +sradius = 2.0_JPRBT*asin(sqrt(area/pi)/2.0_JPRBT) +return +end function sradius_of_cap + +function area_of_collar(a_top, a_bot) result(area) +! +! AREA_OF_COLLAR Area of spherical collar +! +! AREA_OF_COLLAR(A_TOP, A_BOT) sets AREA to be the area of an S^2 spherical +! collar specified by A_TOP, A_BOT, where A_TOP is top (smaller) spherical radius, +! A_BOT is bottom (larger) spherical radius. +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +real(kind=JPRBT),intent(in) :: a_top,a_bot +real(kind=JPRBT) area +area = area_of_cap(a_bot) - area_of_cap(a_top) +return +end function area_of_collar + +function area_of_cap(s_cap) result(area) +! +! AREA_OF_CAP Area of spherical cap +! +! AREA_OF_CAP(S_CAP) sets AREA to be the area of an S^2 spherical +! cap of spherical radius S_CAP. +! +real(kind=JPRBT),intent(in) :: s_cap +real(kind=JPRBT) area +area = 4.0_JPRBT*pi * sin(s_cap/2.0_JPRBT)**2 +return +end function area_of_cap + +function gamma(x) result(gamma_res) +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +IMPLICIT NONE +real(kind=JPRBT),intent(in) :: x +real(kind=JPRBT) :: gamma_res +real(kind=JPRBT) :: p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13 +real(kind=JPRBT) :: w,y +integer(kind=jpim) :: k,n +parameter (& +& p0 = 0.999999999999999990e+00_JPRBT,& +& p1 = -0.422784335098466784e+00_JPRBT,& +& p2 = -0.233093736421782878e+00_JPRBT,& +& p3 = 0.191091101387638410e+00_JPRBT,& +& p4 = -0.024552490005641278e+00_JPRBT,& +& p5 = -0.017645244547851414e+00_JPRBT,& +& p6 = 0.008023273027855346e+00_JPRBT) +parameter (& +& p7 = -0.000804329819255744e+00_JPRBT,& +& p8 = -0.000360837876648255e+00_JPRBT,& +& p9 = 0.000145596568617526e+00_JPRBT,& +& p10 = -0.000017545539395205e+00_JPRBT,& +& p11 = -0.000002591225267689e+00_JPRBT,& +& p12 = 0.000001337767384067e+00_JPRBT,& +& p13 = -0.000000199542863674e+00_JPRBT) +n = nint(x - 2) +w = x - (n + 2) +y = ((((((((((((p13 * w + p12) * w + p11) * w + p10) *& +& w + p9) * w + p8) * w + p7) * w + p6) * w + p5) *& +& w + p4) * w + p3) * w + p2) * w + p1) * w + p0 +if (n .gt. 0) then + w = x - 1 + do k = 2, n + w = w * (x - k) + end do +else + w = 1 + do k = 0, -n - 1 + y = y * (x + k) + end do +end if +gamma_res = w / y +return +end function gamma + +END MODULE eq_regions_mod diff --git a/src/trans/gpu/internal/field_split_mod.F90 b/src/trans/gpu/internal/field_split_mod.F90 new file mode 100755 index 0000000..20719e6 --- /dev/null +++ b/src/trans/gpu/internal/field_split_mod.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FIELD_SPLIT_MOD +CONTAINS +SUBROUTINE FIELD_SPLIT(KBLK,KF_GP,KKF_UV_G,KVSETUV,KVSETSC,& + & KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G,& + & KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS) + +!**** *FIELD_SPLIT* - Split fields + +! Purpose. +! -------- +! Split fields + +!** Interface. +! ---------- +! CALL FIELD_SPLIT(...) + +! Explicit arguments : +! -------------------- +! KBLK - block number +! KF_GP - total number of output gridpoint fields +! KKF_UV_G - global number of spectral u-v fields +! KVSETUV - IVSETUV from SHUFFLE +! KVSETSC - IVSETUV from SHUFFLE + +! All the following output arguments are quantities for THIS packet. +! KSTUV_G - +! KENUV_G - +! KF_UV_G - +! KSTSC_G - +! KENSC_G - +! KF_SCALARS_G - +! KSTUV - +! KENUV - +! KF_UV - +! KSTSC - +! KENSC - +! KF_SCALARS - + +! Externals. NONE +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : MYSETV, NPRTRV +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KBLK,KF_GP,KKF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KVSETUV(:),KVSETSC(:) +INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS + +! Local variables + +INTEGER(KIND=JPIM) :: ISTF,IENF,J + +! ------------------------------------------------------------------ + +ISTF = (KBLK-1)*NPROMATR+1 +IENF = MIN(KBLK*NPROMATR,KF_GP) + +KSTUV_G = (KBLK-1)*NPROMATR/2+1 +KENUV_G = MIN(KBLK*NPROMATR/2,KKF_UV_G) +IF(ISTF > 2*KKF_UV_G) KSTUV_G = KENUV_G+1 +KF_UV_G = KENUV_G-KSTUV_G+1 +KSTSC_G = MAX(ISTF-2*KKF_UV_G,1) +KENSC_G = MAX(IENF-2*KKF_UV_G,0) +KF_SCALARS_G = KENSC_G-KSTSC_G+1 + +! Spectral fields distributed over fields + +IF(NPRTRV > 1) THEN + KF_UV = 0 + KSTUV = 1 + DO J=1,KSTUV_G-1 + IF(KVSETUV(J) == MYSETV) THEN + KSTUV = KSTUV+1 + ENDIF + ENDDO + KENUV = KSTUV-1 + DO J=KSTUV_G,KENUV_G + IF(KVSETUV(J) == MYSETV) THEN + KF_UV = KF_UV+1 + KENUV = KENUV+1 + ENDIF + ENDDO + KF_SCALARS = 0 + KSTSC = 1 + DO J=1,KSTSC_G-1 + IF(KVSETSC(J) == MYSETV) THEN + KSTSC =KSTSC+1 + ENDIF + ENDDO + KENSC = KSTSC-1 + DO J=KSTSC_G,KENSC_G + IF(KVSETSC(J) == MYSETV) THEN + KF_SCALARS = KF_SCALARS+1 + KENSC = KENSC+1 + ENDIF + ENDDO +ELSE + + ! Spectral fields not distributed over fields + + KF_UV = KF_UV_G + KSTUV = KSTUV_G + KENUV = KENUV_G + KF_SCALARS = KF_SCALARS_G + KSTSC = KSTSC_G + KENSC = KENSC_G +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE FIELD_SPLIT +END MODULE FIELD_SPLIT_MOD diff --git a/src/trans/gpu/internal/fourier_in_mod.F90 b/src/trans/gpu/internal/fourier_in_mod.F90 new file mode 100755 index 0000000..52b888c --- /dev/null +++ b/src/trans/gpu/internal/fourier_in_mod.F90 @@ -0,0 +1,118 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_IN_MOD +CONTAINS +SUBROUTINE FOURIER_IN(PREEL,KFIELDS) + +!**** *FOURIER_IN* - Copy fourier data from buffer to local array + +! Purpose. +! -------- +! Routine for copying fourier data from buffer to local array + +!** Interface. +! ---------- +! CALL FOURIER_IN(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, D_NSTAGTF,D_MSTABF,D_NSTAGT0B,D_NPNTGTB0,D_NPROCM,D_NPTRLS +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G, G_NMEN,G_NMEN_MAX +USE TPM_GEN ,ONLY : NOUT +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS + +INTEGER(KIND=JPIM) :: KGL + +REAL(KIND=JPRBT), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC,iimax1,iimax2,iimax3,iunit + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +#ifdef ACCGPU +!$ACC DATA PRESENT(D_NPTRLS,G_NMEN,D_NPROCM,D_NSTAGT0B,D_MSTABF,D_NPNTGTB0,FOUBUF,PREEL,D_NSTAGTF) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(PRESENT,ALLOC:D_NPTRLS,G_NMEN,D_NPROCM,D_NSTAGT0B,D_MSTABF,D_NPNTGTB0,FOUBUF,PREEL,D_NSTAGTF) +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IGLG,IPROC,ISTA) DEFAULT(NONE) & +!$OMP& SHARED(IBEG,IEND,IINC,G_NMEN_MAX,KFIELDS,D_NPTRLS,MYSETW,G_NMEN, & +!$OMP& D_NPROCM,D_NSTAGT0B,D_MSTABF,D_NPNTGTB0,PREEL,D_NSTAGTF,FOUBUF) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IGLG,IPROC,ISTA) DEFAULT(NONE) & +!$ACC& COPYIN(IBEG,IEND,IINC,KFIELDS,MYSETW) & +!$ACC& PRESENT(G_NMEN_MAX,D_NPTRLS,G_NMEN,D_NPROCM,D_NSTAGT0B, & +!$ACC& D_MSTABF,D_NPNTGTB0,PREEL,D_NSTAGTF,FOUBUF) +#endif +DO KGL=IBEG,IEND,IINC + DO JM=0,G_NMEN_MAX + DO JF=1,KFIELDS + + IGLG = D_NPTRLS(MYSETW)+KGL-1 + + IF ( JM .LE. G_NMEN(IGLG)) THEN + + IPROC = D_NPROCM(JM) + ISTA = (D_NSTAGT0B(D_MSTABF(IPROC))+D_NPNTGTB0(JM,KGL))*2*KFIELDS + + PREEL(2*JF-1,2*JM+1+D_NSTAGTF(KGL)) = FOUBUF(ISTA+2*JF-1) + PREEL(2*JF, 2*JM+1+D_NSTAGTF(KGL)) = FOUBUF(ISTA+2*JF ) + !TODO (Andreas): should be able to remove the factor 2 in the second dimension (in front of jm) + !and reduce the size of the array. Will need to adapt fsc_mod accordingly! This is actually more + !difficult: d_nstagtf(kgl) is not necessarily even! + + END IF + ENDDO + ENDDO +ENDDO +#ifdef ACCGPU +!$ACC END DATA +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_IN +END MODULE FOURIER_IN_MOD diff --git a/src/trans/gpu/internal/fourier_inad_mod.F90 b/src/trans/gpu/internal/fourier_inad_mod.F90 new file mode 100755 index 0000000..b36f4c5 --- /dev/null +++ b/src/trans/gpu/internal/fourier_inad_mod.F90 @@ -0,0 +1,74 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_INAD_MOD +CONTAINS +SUBROUTINE FOURIER_INAD(PREEL,KFIELDS,KGL) + +!**** *FOURIER_INAD* - Copy fourier data from buffer to local array - adjoint + +! Purpose. +! -------- +! Routine for copying fourier data from buffer to local array + +!** Interface. +! ---------- +! CALL FOURIER_INAD(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL + +REAL(KIND=JPRBT), INTENT(IN) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +DO JM=0,G%NMEN(IGLG) + IPROC = D%NPROCM(JM) + IR = 2*JM+1+D%NSTAGTF(KGL) + II = 2*JM+2+D%NSTAGTF(KGL) + ISTA = (D%NSTAGT0B(D%MSTABF(IPROC))+D%NPNTGTB0(JM,KGL))*2*KFIELDS + DO JF=1,KFIELDS + FOUBUF(ISTA+2*JF-1) = PREEL(JF,IR) + FOUBUF(ISTA+2*JF ) = PREEL(JF,II) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_INAD +END MODULE FOURIER_INAD_MOD + diff --git a/src/trans/gpu/internal/fourier_out_mod.F90 b/src/trans/gpu/internal/fourier_out_mod.F90 new file mode 100755 index 0000000..d6858c7 --- /dev/null +++ b/src/trans/gpu/internal/fourier_out_mod.F90 @@ -0,0 +1,119 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_OUT_MOD +CONTAINS +SUBROUTINE FOURIER_OUT(KFIELDS) + +!**** *FOURIER_OUT* - Copy fourier data from local array to buffer + +! Purpose. +! -------- +! Routine for copying fourier data from local array to buffer + +!** Interface. +! ---------- +! CALL FOURIER_OUT(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, D_NPTRLS,D_NSTAGTF,D_MSTABF,D_NSTAGT1B,D_NPNTGTB0,D_NPROCM, D_NPROCL +USE TPM_TRANS ,ONLY : FOUBUF_IN, ZGTF +USE TPM_GEOMETRY ,ONLY : G, G_NMEN,G_NMEN_MAX +! + +IMPLICIT NONE + +!REAL(KIND=JPRBT), INTENT(IN) :: PREEL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM) :: KGL + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA, ISTA1,JMMAX, iunit + +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC, IOFF,iimax1,iimax2,iimax3 + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +#ifdef ACCGPU +!$ACC DATA PRESENT(FOUBUF_IN,ZGTF,D_NPTRLS,G_NMEN,D_NPROCM,D_NSTAGT1B,D_MSTABF,D_NPNTGTB0,D_NSTAGTF) & +!$ACC& COPYIN(IBEG,IEND,IINC) +#endif +#ifdef OMPGPU +!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! +!$OMP TARGET DATA MAP(ALLOC:FOUBUF_IN,ZGTF, D_NPTRLS,G_NMEN,D_NPROCM,D_NSTAGT1B,D_MSTABF,D_NPNTGTB0,D_NSTAGTF) & +!$OMP& MAP(TO:IBEG,IEND,IINC) + +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) COLLAPSE(3) PRIVATE(IGLG,JMMAX,IPROC,ISTA,IOFF) & +!$OMP& SHARED(IBEG,IEND,IINC,G_NMEN_MAX,KFIELDS,D_NPTRLS,MYSETW,G_NMEN, & +!$OMP& D_NPROCM,D_NSTAGT1B,D_MSTABF,D_NPNTGTB0,D_NSTAGTF,FOUBUF_IN,ZGTF) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(IGLG,JMMAX,IPROC,ISTA,IOFF) & +!$ACC & COPYIN(IBEG,IEND,IINC,KFIELDS,MYSETW) & +!$ACC & PRESENT(G_NMEN_MAX,D_NPTRLS,G_NMEN,D_NPROCM,D_NSTAGT1B,D_MSTABF,D_NPNTGTB0,D_NSTAGTF,FOUBUF_IN,ZGTF) +#endif +DO KGL=IBEG,IEND,IINC + DO JM=0,G_NMEN_MAX + DO JF=1,KFIELDS + + IGLG = D_NPTRLS(MYSETW)+KGL-1 + JMMAX = G_NMEN(IGLG) + IF (JM .LE. JMMAX) THEN + + IPROC = D_NPROCM(JM) + ISTA = (D_NSTAGT1B(D_MSTABF(IPROC))+D_NPNTGTB0(JM,KGL))*2*KFIELDS + IOFF = 1+D_NSTAGTF(KGL) + + ! imaginary may be not JM+1 but JM+G_NMEN(IGLG)+1 + FOUBUF_IN(ISTA+2*JF-1) = ZGTF(2*JF-1, 2*JM+IOFF) + FOUBUF_IN(ISTA+2*JF ) = ZGTF(2*JF , 2*JM+IOFF) + + END IF + ENDDO + ENDDO +END DO +#ifdef ACCGPU +!$ACC END DATA +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_OUT +END MODULE FOURIER_OUT_MOD + diff --git a/src/trans/gpu/internal/fourier_outad_mod.F90 b/src/trans/gpu/internal/fourier_outad_mod.F90 new file mode 100755 index 0000000..8bc73f8 --- /dev/null +++ b/src/trans/gpu/internal/fourier_outad_mod.F90 @@ -0,0 +1,73 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_OUTAD_MOD +CONTAINS +SUBROUTINE FOURIER_OUTAD(PREEL,KFIELDS,KGL) + +!**** *FOURIER_OUTAD* - Copy fourier data from local array to buffer - adjoint + +! Purpose. +! -------- +! Routine for copying fourier data from local array to buffer + +!** Interface. +! ---------- +! CALL FOURIER_OUTAD(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +REAL(KIND=JPRBT), INTENT(OUT) :: PREEL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +DO JM=0,G%NMEN(IGLG) + IPROC = D%NPROCM(JM) + IR = 2*JM+1+D%NSTAGTF(KGL) + II = 2*JM+2+D%NSTAGTF(KGL) + ISTA = (D%NSTAGT1B(D%MSTABF(IPROC))+D%NPNTGTB0(JM,KGL))*2*KFIELDS + DO JF=1,KFIELDS + PREEL(JF,IR) = FOUBUF_IN(ISTA+2*JF-1) + PREEL(JF,II) = FOUBUF_IN(ISTA+2*JF ) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_OUTAD +END MODULE FOURIER_OUTAD_MOD + diff --git a/src/trans/gpu/internal/fsc_mod.F90 b/src/trans/gpu/internal/fsc_mod.F90 new file mode 100755 index 0000000..6cb1efe --- /dev/null +++ b/src/trans/gpu/internal/fsc_mod.F90 @@ -0,0 +1,226 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FSC_MOD +CONTAINS +SUBROUTINE FSC(KF_UV,KF_SCALARS,KF_SCDERS,& + & KST_UV,KST_SC,KST_NSDERS,KST_EWDERS,KST_UVDERS) + +!**** *FSC - Division by a*cos(theta), east-west derivatives + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSC(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_TRANS ,ONLY : LUVDER, LATLON, ZGTF +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, D_NUMP, D_NPTRLS, D_NSTAGTF +USE TPM_FIELDS ,ONLY : F +USE TPM_GEOMETRY ,ONLY : G, G_NMEN +USE TPM_FLT ,ONLY : S +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +! + +IMPLICIT NONE +INTEGER(KIND=JPIM) :: KGL +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_UV,KF_SCALARS,KF_SCDERS +INTEGER(KIND=JPIM) , INTENT(IN) :: KST_UV, KST_SC, KST_NSDERS, KST_EWDERS, KST_UVDERS + +REAL(KIND=JPRBT) , POINTER :: PUV(:,:) +REAL(KIND=JPRBT) , POINTER :: PSCALAR(:,:) +REAL(KIND=JPRBT) , POINTER :: PNSDERS(:,:) +REAL(KIND=JPRBT) , POINTER :: PEWDERS(:,:) +REAL(KIND=JPRBT) , POINTER :: PUVDERS(:,:) + +REAL(KIND=JPRBT) :: ZACHTE(R%NDGL),ZMUL, ZACHTE2, ZSHIFT, ZPI +REAL(KIND=JPRBT) :: ZAMP, ZPHASE +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + + +INTEGER(KIND=JPIM) :: JLON,JF,IGLG,II,IR,JM + +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC +!DEBUGGING: +integer :: i,J,maxi,maxj +real :: maxv + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +IF( KF_UV > 0 ) THEN + PUV => ZGTF(2*KST_UV-1:2*(KST_UV+2*KF_UV-1),:) +ENDIF +PSCALAR => ZGTF(2*KST_SC-1:2*(KST_SC+KF_SCALARS-1),:) +IF( KF_SCDERS > 0 ) THEN + PNSDERS => ZGTF(2*KST_nsders-1:2*(KST_nsders+KF_SCDERS-1),:) + PEWDERS => ZGTF(2*KST_ewders-1:2*(KST_ewders+KF_SCDERS-1),:) +ENDIF +IF (LUVDER) THEN + PUVDERS => ZGTF(2*KST_uvders-1:2*(KST_uvders+2*KF_UV-1),:) +ENDIF + +ZACHTE(:) = F%RACTHE(:) + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& COPYIN(ZACHTE,IBEG,IEND,IINC,KF_SCALARS,KF_UV,KF_SCDERS,MYSETW) & +!$ACC& PRESENT(ZGTF,D_NPTRLS,MYSETW,G_NMEN,D_NSTAGTF) & +!$ACC& PRESENT(PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(PRESENT,ALLOC:ZGTF) & +!$OMP& MAP(ALLOC:PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) +#endif + +DO KGL=IBEG,IEND,IINC + +IGLG = D_NPTRLS(MYSETW)+KGL-1 +IMEN = G_NMEN(IGLG) +ISTAGTF = D_NSTAGTF(KGL) +ZACHTE2 = ZACHTE(IGLG) + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V AND N-S DERIVATIVES BY A*COS(THETA) +! ---------------------------------------------- + + +!* 1.1 U AND V. + +IF(KF_UV > 0) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(ISTAGTF,IMEN,KF_UV,PUV,ZACHTE2) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) & + !$ACC& COPYIN(KF_UV,IMEN,ISTAGTF,ZACHTE2) & + !$ACC& PRESENT(PUV) +#endif + DO JLON=ISTAGTF+1,ISTAGTF+2*IMEN+1 + DO JF=1,2*KF_UV + PUV(2*JF-1,JLON) = PUV(2*JF-1,JLON)*ZACHTE2 + PUV(2*JF, JLON) = PUV(2*JF ,JLON)*ZACHTE2 + ENDDO + ENDDO +ENDIF + +!* 1.2 N-S DERIVATIVES + +IF(KF_SCDERS > 0)THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(ISTAGTF,IMEN,KF_SCALARS,PNSDERS,ZACHTE2) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) & + !$ACC& COPYIN(KF_SCALARS,IMEN,ISTAGTF,ZACHTE2) & + !$ACC& PRESENT(PNSDERS) +#endif + DO JLON=ISTAGTF+1,ISTAGTF+2*IMEN+1 + DO JF=1,KF_SCALARS + PNSDERS(2*JF-1,JLON) = PNSDERS(2*JF-1,JLON)*ZACHTE2 + PNSDERS(2*JF, JLON) = PNSDERS(2*JF, JLON)*ZACHTE2 + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IR) SHARED(ISTAGTF,IMEN,KF_UV,PUVDERS,ZACHTE2,PUV) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IR) DEFAULT(NONE) & + !$ACC& COPYIN(KF_UV,IMEN,ISTAGTF,ZACHTE2) & + !$ACC& PRESENT(PUV,PUVDERS) +#endif + DO JM=0,IMEN + DO JF=1,2*KF_UV + IR = ISTAGTF+2*JM+1 + PUVDERS(2*JF-1,IR) = -PUV(2*JF,IR)*ZACHTE2*REAL(JM,JPRBT) + PUVDERS(2*JF, IR) = PUV(2*JF-1,IR)*ZACHTE2*REAL(JM,JPRBT) + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IR) SHARED(ISTAGTF,IMEN,KF_SCALARS,PEWDERS,ZACHTE2,PSCALAR) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IR) DEFAULT(NONE) & + !$ACC& COPYIN(KF_SCALARS,IMEN,ISTAGTF,ZACHTE2) & + !$ACC& PRESENT(PEWDERS,PSCALAR) +#endif + DO JM=0,IMEN + DO JF=1,KF_SCALARS + IR = ISTAGTF+2*JM+1 + PEWDERS(2*JF-1,IR) = -PSCALAR(2*JF,IR)*ZACHTE2*REAL(JM,JPRBT) + PEWDERS(2*JF, IR) = PSCALAR(2*JF-1,IR)*ZACHTE2*REAL(JM,JPRBT) + ENDDO + ENDDO +ENDIF + +enddo +#ifdef ACCGPU +!$ACC END DATA +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +! ------------------------------------------------------------------ + +END SUBROUTINE FSC +END MODULE FSC_MOD diff --git a/src/trans/gpu/internal/fscad_mod.F90 b/src/trans/gpu/internal/fscad_mod.F90 new file mode 100755 index 0000000..66d67b1 --- /dev/null +++ b/src/trans/gpu/internal/fscad_mod.F90 @@ -0,0 +1,146 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FSCAD_MOD +CONTAINS +SUBROUTINE FSCAD(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSCAD - Division by a*cos(theta), east-west derivatives - adjoint + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSCAD(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_FIELDS ,ONLY : F +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRBT) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRBT) , INTENT(INOUT) :: PSCALAR(:,:) +REAL(KIND=JPRBT) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRBT) , INTENT(INOUT) :: PEWDERS(:,:) +REAL(KIND=JPRBT) , INTENT(INOUT) :: PUVDERS(:,:) + +REAL(KIND=JPRBT) :: ZACHTE,ZMUL +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + + +INTEGER(KIND=JPIM) :: JLON,JF,IGLG,II,IR,JM + +! ------------------------------------------------------------------ + +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ZACHTE = F%RACTHE(IGLG) +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ZMUL = ZACHTE*JM + DO JF=1,2*KF_UV + PUV(JF,II) = PUV(JF,II) - PUVDERS(JF,IR)*ZMUL + PUV(JF,IR) = PUV(JF,IR) + PUVDERS(JF,II)*ZMUL +! PUVDERS(JF,IR) = _ZERO_ +! PUVDERS(JF,II) = _ZERO_ + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + IR = ISTAGTF+2*JM+1 + II = IR+1 + ZMUL = ZACHTE*JM + DO JF=1,KF_SCALARS + PSCALAR(JF,II) = PSCALAR(JF,II) - PEWDERS(JF,IR)*ZMUL + PSCALAR(JF,IR) = PSCALAR(JF,IR) + PEWDERS(JF,II)*ZMUL +! PEWDERS(JF,IR) = _ZERO_ +! PEWDERS(JF,II) = _ZERO_ + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V AND N-S DERIVATIVES BY A*COS(THETA) +! ---------------------------------------------- + + +!* 1.1 U AND V. + +IF(KF_UV > 0) THEN + DO JLON=ISTAGTF+1,ISTAGTF+2*(IMEN+1) + DO JF=1,2*KF_UV + PUV(JF,JLON) = PUV(JF,JLON)*ZACHTE + ENDDO + ENDDO +ENDIF + +!* 1.2 N-S DERIVATIVES + +IF(KF_SCDERS > 0)THEN + DO JLON=ISTAGTF+1,ISTAGTF+2*(IMEN+1) + DO JF=1,KF_SCALARS + PNSDERS(JF,JLON) = PNSDERS(JF,JLON)*ZACHTE + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE FSCAD +END MODULE FSCAD_MOD diff --git a/src/trans/gpu/internal/fspgl_int_mod.F90 b/src/trans/gpu/internal/fspgl_int_mod.F90 new file mode 100755 index 0000000..457eb38 --- /dev/null +++ b/src/trans/gpu/internal/fspgl_int_mod.F90 @@ -0,0 +1,157 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FSPGL_INT_MOD +CONTAINS +SUBROUTINE FSPGL_INT(KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,& + & FSPGL_PROC,KFLDPTRUV,KFLDPTRSC) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN, LDIVGP, LVORGP +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NASM0,D_NPROCL,D_NSTAGT0B,D_NPNTGTB0,D_NPNTGTB1 +USE TPM_FIELDS ,ONLY : F +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: KM, KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT +EXTERNAL FSPGL_PROC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! +! ZFIELD 2nd dimension is extended from 0 to R%NDGL+1, while only 1 to R%NDGL +! is given from the north/south transforms, and only 1 to R%NDGL rows will be +! passed to the east/west transforms. +! the 2 extra rows are used inside the model Fourier space computations +! (outside the transform package - see FSPGLH in Arpege/IFS). +! +REAL(KIND=JPRBT) :: ZFIELD(2*KF_OUT_LT,0:R%NDGL+1) + + +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS +INTEGER(KIND=JPIM) :: IPTRU,IST,J +INTEGER(KIND=JPIM) :: IDGNH,IDGL +INTEGER(KIND=JPIM) :: ISTAN(R%NDGNH),ISTAS(R%NDGNH) +INTEGER(KIND=JPIM) :: IFLDPTRUV(KF_UV),IFLDPTRSC(KF_SCALARS) +! ------------------------------------------------------------------ +#ifdef ACCGPU +!$ACC DATA IF(PRESENT(KFLDPTRUV)) COPYIN(KFLDPTRUV,KFLDPTRSC) +!$ACC DATA CREATE(IFLDPTRUV,IFLDPTRSC,ISTAN,ISTAS,ZFIELD) & +!$ACC& PRESENT(D_MYMS,D%NSTAGT0B,D%NPNTGTB1,D%NPROCL,FOUBUF_IN) +#endif +#ifdef OMPGPU +!WARNING: the last ALLOC statement should be PRESENT,ALLOC but cause issues with AMD compiler! +!$OMP TARGET DATA IF(PRESENT(KFLDPTRUV)) MAP(TO:KFLDPTRUV,KFLDPTRSC) +!$OMP TARGET DATA MAP(ALLOC:IFLDPTRUV,IFLDPTRSC,ISTAN,ISTAS,ZFIELD) & +!$OMP& MAP(ALLOC:D_MYMS,D%NSTAGT0B,D%NPNTGTB1,D%NPROCL,FOUBUF_IN) +#endif +IF(PRESENT(KFLDPTRUV)) THEN + IFLDPTRUV(:) = KFLDPTRUV(1:KF_UV) + IFLDPTRSC(:) = KFLDPTRSC(1:KF_SCALARS) +ELSE + DO J=1,KF_UV + IFLDPTRUV(J) = J + ENDDO + DO J=1,KF_SCALARS + IFLDPTRSC(J) = J + ENDDO +ENDIF + +!loop over wavenumber +DO KMLOC=1,D_NUMP + KM = D_MYMS(KMLOC) + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IDGNH = R%NDGNH +IDGL = R%NDGL +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP +#endif +DO JGL=ISL,IDGNH + IPROC = D_NPROCL(JGL) + ISTAN(JGL) = (D_NSTAGT0B(IPROC) + D_NPNTGTB1(KMLOC,JGL))*2*KF_OUT_LT + IGLS = IDGL+1-JGL + IPROCS = D_NPROCL(IGLS) + ISTAS(JGL) = (D_NSTAGT0B(IPROCS) + D_NPNTGTB1(KMLOC,IGLS))*2*KF_OUT_LT +ENDDO + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(2) +#endif +DO JGL=ISL,IDGNH + DO JFLD=1,2*KF_OUT_LT + IGLS = IDGL+1-JGL + ZFIELD(JFLD,JGL) = FOUBUF_IN(ISTAN(JGL)+JFLD) + ZFIELD(JFLD,IGLS) = FOUBUF_IN(ISTAS(JGL)+JFLD) + ENDDO +ENDDO + +IST = 1 +IF(LVORGP) THEN + IST = IST+2*KF_UV +ENDIF +IF(LDIVGP) THEN + IST = IST+2*KF_UV +ENDIF +IPTRU = IST + + + + +CALL FSPGL_PROC(KM,ISL,IDGL,KF_OUT_LT,F%R1MU2,ZFIELD,& + & IPTRU,KF_UV,KF_SCALARS,& + & IFLDPTRUV) + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(2) +#endif + DO JGL=ISL,IDGNH + DO JFLD=1,2*KF_OUT_LT + IGLS = IDGL+1-JGL + !OCL NOVREC + FOUBUF_IN(ISTAN(JGL)+JFLD) = ZFIELD(JFLD,JGL) + FOUBUF_IN(ISTAS(JGL)+JFLD) = ZFIELD(JFLD,IGLS) + ENDDO +ENDDO + +!end loop over wavenumber +END DO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif +! ------------------------------------------------------------------ + +END SUBROUTINE FSPGL_INT +END MODULE FSPGL_INT_MOD diff --git a/src/trans/gpu/internal/ftdir_ctl_mod.F90 b/src/trans/gpu/internal/ftdir_ctl_mod.F90 new file mode 100755 index 0000000..6c98972 --- /dev/null +++ b/src/trans/gpu/internal/ftdir_ctl_mod.F90 @@ -0,0 +1,266 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_CTL_MOD +CONTAINS +SUBROUTINE FTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTDIR_CTL - Direct Fourier transform control + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTL(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_GEN ,ONLY : NOUT +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : ZGTF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE TRGTOL_MOD ,ONLY : TRGTOL, TRGTOL_CUDAAWARE +USE FOURIER_OUT_MOD ,ONLY : FOURIER_OUT +USE FTDIR_MOD ,ONLY : FTDIR +use ieee_arithmetic +! + +IMPLICIT NONE + + +INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart +END INTERFACE + +INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop +END INTERFACE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! Local variables +!REAL(KIND=JPRBT),ALLOCATABLE :: ZGTF(:,:) + +INTEGER(KIND=JPIM) :: IST,JGL,IGL,JF_FS +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC +INTEGER(KIND=JPIM) :: ISIZE,IFIELDS,ICHUNK,ICHUNKS,JK + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +!call cudaProfilerStart() + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +! Transposition + +CALL GSTATS(158,0) + +! needed ??? JF_FS=KF_FS-D%IADJUST_D +#ifdef USE_CUDA_AWARE_MPI_FT +CALL GSTATS(430,0) +#ifdef ACCGPU +!$ACC DATA IF(PRESENT(PGP)) COPYIN(PGP) +!$ACC DATA IF(PRESENT(PGPUV)) COPYIN(PGPUV) +!$ACC DATA IF(PRESENT(PGP2)) COPYIN(PGP2) +!$ACC DATA IF(PRESENT(PGP3A)) COPYIN(PGP3A) +!$ACC DATA IF(PRESENT(PGP3B)) COPYIN(PGP3B) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA IF(PRESENT(PGP)) MAP(TO:PGP) +!$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(TO:PGPUV) +!$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(TO:PGP2) +!$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(TO:PGP3A) +!$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(TO:PGP3B) +#endif +CALL GSTATS(430,1) +CALL TRGTOL_CUDAAWARE(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +#ifdef OMPGPU +!$OMP END TARGET DATA +!$OMP END TARGET DATA +!$OMP END TARGET DATA +!$OMP END TARGET DATA +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +!$ACC END DATA +!$ACC END DATA +!$ACC END DATA +!$ACC END DATA +#endif +#else +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZGTF) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZGTF) +#endif +#endif + +CALL GSTATS(158,1) +CALL GSTATS(106,0) + +! Fourier transform + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + + +CALL GSTATS(1640,0) +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +IF(KF_FS>0) THEN + ! TRY THIS IN CHUNKS, ISIZE is even, need equal and even chunks too + ISIZE=size(zgtf,1) + !ICHUNKS=2 + !ICHUNK=ISIZE/ICHUNKS + !ICHUNK=ICHUNK+MOD(ICHUNK,2) + !DO JK=ICHUNKS,1,-1 + ! repeat some fields to have constant chunk size + !IOFF=MAX(1,ISIZE-(ICHUNKS-JK+1)*ICHUNK+1) + IOFF=1 + !ICHUNK=2*KF_FS+2 + ICHUNK=ISIZE + CALL FTDIR(ICHUNK) + !ENDDO +ENDIF + +! Save Fourier data in FOUBUF_IN + + CALL FOURIER_OUT(KF_FS) +#ifndef USE_CUDA_AWARE_MPI_FT +#ifdef ACCGPU + !$ACC UPDATE HOST(FOUBUF_IN) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(FOUBUF_IN) +#endif +#endif + +CALL GSTATS(1640,1) +!DEALLOCATE(ZGTF) +CALL GSTATS(106,1) +! ------------------------------------------------------------------ +!call cudaProfilerStop() +END SUBROUTINE FTDIR_CTL +END MODULE FTDIR_CTL_MOD + diff --git a/src/trans/gpu/internal/ftdir_ctlad_mod.F90 b/src/trans/gpu/internal/ftdir_ctlad_mod.F90 new file mode 100755 index 0000000..6716ad8 --- /dev/null +++ b/src/trans/gpu/internal/ftdir_ctlad_mod.F90 @@ -0,0 +1,187 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE FTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTDIR_CTLAD - Direct Fourier transform control - adjoint + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTLAD(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE TRLTOG_MOD ,ONLY : TRLTOG +USE FOURIER_OUTAD_MOD ,ONLY : FOURIER_OUTAD +USE FTDIRAD_MOD ,ONLY : FTDIRAD +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP2(:,:,:) + +! Local variables +REAL(KIND=JPRBT) :: ZGTF(KF_FS,D%NLENGTF) + + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: JGL,IGL +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +CALL GSTATS(133,0) + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1642,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=IBEG,IEND,IINC + IGL = JGL + CALL FOURIER_OUTAD(ZGTF,KF_FS,IGL) + +! Fourier transform + + IF(KF_FS>0) THEN + CALL FTDIRAD(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1642,1) +CALL GSTATS(133,1) + +! Transposition + +CALL GSTATS(183,0) +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) + +CALL GSTATS(183,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIR_CTLAD +END MODULE FTDIR_CTLAD_MOD + + + diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 new file mode 100755 index 0000000..4e5ff3d --- /dev/null +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -0,0 +1,180 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_MOD +CONTAINS +SUBROUTINE FTDIR(KFIELDS) + + +!**** *FTDIR - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC,D_NSTAGTF,D_NPTRLS +USE TPM_TRANS ,ONLY : ZGTF +USE TPM_GEOMETRY ,ONLY : G,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX +USE TPM_FFT ,ONLY : T +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE TPM_DIM ,ONLY : R,R_NNOEXTZL +USE HIP_DEVICE_MOD +USE ISO_C_BINDING +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM) :: KGL +!!!REAL(KIND=JPRBT), INTENT(INOUT) :: PREEL(KFIELDS,D%NLENGTF) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,IST1 +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE +TYPE(C_PTR) :: IPLAN_R2C +INTEGER(KIND=JPIM) :: JMAX +REAL(KIND=JPRBT) :: SCAL +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC,ISCAL +INTEGER(KIND=JPIM) :: OFFSET_VAR, IUNIT, ISIZE, II, IMAX, IDIM1, IDIM2, I, J +integer :: istat, idev +REAL(KIND=JPRBT), ALLOCATABLE :: ZGTF2(:,:) + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +OFFSET_VAR=D_NPTRLS(MYSETW) + +IMAX = G_NLOEN_MAX + 2 + R_NNOEXTZL + +IDIM1=size(zgtf,1) +IDIM2=size(zgtf,2) +ALLOCATE(ZGTF2(IDIM1,IDIM2)) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZGTF2) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(ALLOC:ZGTF2) +#endif + + +DO KGL=IBEG,IEND,IINC + IOFF=D_NSTAGTF(KGL)+1 + IGLG = D_NPTRLS(MYSETW)+KGL-1 + + CALL CREATE_PLAN_FFT(IPLAN_R2C,-1,G_NLOEN(IGLG),KFIELDS) + CALL EXECUTE_PLAN_FFT(-1,G_NLOEN(IGLG),ZGTF(1,IOFF),ZGTF2(1,IOFF),IPLAN_R2C) +END DO + +ISTAT = HIP_SYNCHRONIZE() + +! need a faster way for this, in place transforms ? Nils +#ifdef ACCGPU +!$ACC DATA +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(IGLG,JJ) DEFAULT(NONE) & +!$OMP& SHARED(ZGTF,ZGTF2,IDIM1,IDIM2) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(JF,JJ) DEFAULT(NONE) & +!$ACC& COPYIN(IDIM1,IDIM2) & +!$ACC& PRESENT(ZGTF,ZGTF2) +#endif +DO JJ=1, IDIM2 + DO JF=1,IDIM1 + ZGTF(JF,JJ) = ZGTF2(JF, JJ) + ENDDO +ENDDO + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(JMAX,KGL,IOFF,SCAL,IST) DEFAULT(NONE) & +!$OMP& SHARED(IBEG,OFFSET_VAR,IEND,IINC,IMAX,KFIELDS,G_NLOEN,G_NMEN,D_NSTAGTF,ZGTF,R_NNOEXTZL) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(JMAX,KGL,IOFF,SCAL,IST) DEFAULT(NONE) & +!$ACC& COPYIN(IBEG,IEND,IINC,OFFSET_VAR,KFIELDS,IMAX) & +!$ACC& PRESENT(ZGTF,G_NLOEN,G_NMEN,D_NSTAGTF,R_NNOEXTZL) +#endif +DO IGLG=IBEG+OFFSET_VAR-1,IEND+OFFSET_VAR-1,IINC + DO JJ=1, IMAX + DO JF=1,KFIELDS + JMAX = G_NLOEN(IGLG) + IST = 2*(G_NMEN(IGLG)+1) + IF (JJ .LE. JMAX) THEN + KGL=IGLG-OFFSET_VAR+1 + IOFF=D_NSTAGTF(KGL)+1 + SCAL = 1._JPRBT/REAL(G_NLOEN(IGLG),JPRBT) + ZGTF(JF,IOFF+JJ-1)= SCAL * ZGTF(JF, IOFF+JJ-1) + END IF + + ! case JJ>0 + IF( JJ .LE. (JMAX+R_NNOEXTZL+2-IST)) ZGTF(JF,IST+IOFF+JJ-1) = 0.0_JPRBT + ! case JJ=0 + IF (G_NLOEN(IGLG)==1) ZGTF(JF,IST+IOFF-1) = 0.0_JPRBT + ENDDO + ENDDO +ENDDO + +#ifdef ACCGPU +!$ACC END DATA +#endif + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif + +#ifdef ACCGPU +!$ACC EXIT DATA DELETE(ZGTF2) +#endif +DEALLOCATE(ZGTF2) +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIR +END MODULE FTDIR_MOD diff --git a/src/trans/gpu/internal/ftdirad_mod.F90 b/src/trans/gpu/internal/ftdirad_mod.F90 new file mode 100755 index 0000000..d3bd886 --- /dev/null +++ b/src/trans/gpu/internal/ftdirad_mod.F90 @@ -0,0 +1,94 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIRAD_MOD +CONTAINS +SUBROUTINE FTDIRAD(PREEL,KFIELDS,KGL) + + +!**** *FTDIRAD - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform - adjoint +! -------- + +!** Interface. +! ---------- +! CALL FTDIRAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T +USE TPM_DIM ,ONLY : R + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRBT), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +REAL(KIND=JPRBT) :: ZMUL +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +! ------------------------------------------------------------------ + +ITYPE = 1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IST = 2*(G%NMEN(IGLG)+1)+1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 +IRLEN = ILOEN +ICLEN = (IRLEN/2+1)*2 + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRBT + ENDDO +ENDDO + + ! Change of metric (not in forward routine) + +ZMUL = 1.0_JPRBT/ILOEN +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = PREEL(JF,IOFF-1+JJ)*ZMUL + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIRAD +END MODULE FTDIRAD_MOD diff --git a/src/trans/gpu/internal/ftinv_ctl_mod.F90 b/src/trans/gpu/internal/ftinv_ctl_mod.F90 new file mode 100755 index 0000000..c2482e3 --- /dev/null +++ b/src/trans/gpu/internal/ftinv_ctl_mod.F90 @@ -0,0 +1,274 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_CTL_MOD +CONTAINS +SUBROUTINE FTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTINV_CTL - Inverse Fourier transform control + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV_CTL(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP,LATLON, ZGTF +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC +USE TPM_FLT ,ONLY : S +USE FOURIER_IN_MOD ,ONLY : FOURIER_IN +USE FSC_MOD ,ONLY : FSC +USE FTINV_MOD ,ONLY : FTINV +USE TRLTOG_MOD ,ONLY : TRLTOG, TRLTOG_CUDAAWARE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE IEEE_ARITHMETIC +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +INTEGER(KIND=JPIM) :: IST_UV, IST_SC, IST_NSDERS, IST_UVDERS, IST_EWDERS, JF_FS + +IST_UV = 1 +IST_SC = 1 +IST_NSDERS = 1 +IST_UVDERS = 1 +IST_EWDERS = 1 + +! ------------------------------------------------------------------ + +! 1. Copy Fourier data to local array + +CALL GSTATS(107,0) + +IF (KF_UV > 0 .OR. KF_SCDERS > 0 .OR. (LATLON.AND.S%LDLL) ) THEN + IST = 1 + IF (LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF (LDIVGP) THEN + IST = IST+KF_UV + ENDIF + IST_UV = IST + IST = IST+2*KF_UV + IST_SC = IST + IST = IST+KF_SCALARS + IST_NSDERS = IST + IST = IST+KF_SCDERS + IF (LUVDER) THEN + IST_UVDERS = IST + IST = IST+2*KF_UV + ENDIF + IF (KF_SCDERS > 0) THEN + IST_EWDERS = IST + ENDIF +ENDIF +IF (MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1639,0) +! from FOUBUF to ZGTF +CALL FOURIER_IN(ZGTF,KF_OUT_LT) + +! 2. Fourier space computations + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL FSC(KF_UV,KF_SCALARS,KF_SCDERS,IST_UV,IST_SC,IST_NSDERS,IST_EWDERS,IST_UVDERS) +ENDIF + +! 3. Fourier transform +IF(KF_FS > 0) THEN + ! from ZGTF to ZGTF + CALL FTINV(ZGTF,size(zgtf,1)) +ENDIF + +CALL GSTATS(1639,1) + +CALL GSTATS(107,1) + +! 4. Transposition + +IF (PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:)=-1 +IF (PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF (PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF (PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF (LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF (PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF (LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF (IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTL_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF (KF_UV_G > 0) THEN + IF (LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF ( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF (KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF (LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF (KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF (KF_SCALARS_G > 0) THEN + IF (LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(157,0) +! needed ? JF_FS=KF_FS-D%IADJUST_I +#ifdef USE_CUDA_AWARE_MPI_FT +WRITE(NOUT,*) 'ftinv_ctl:TRLTOG_CUDAAWARE' +CALL TRLTOG_CUDAAWARE(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +#else +!WRITE(NOUT,*) 'ftinv_ctl:TRLTOG' +#ifdef ACCGPU +!$ACC UPDATE HOST(ZGTF) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE FROM(ZGTF) +#endif +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +#endif +CALL GSTATS(157,1) +! ------------------------------------------------------------------ + +!DEALLOCATE(ZGTF) + +END SUBROUTINE FTINV_CTL +END MODULE FTINV_CTL_MOD diff --git a/src/trans/gpu/internal/ftinv_ctlad_mod.F90 b/src/trans/gpu/internal/ftinv_ctlad_mod.F90 new file mode 100755 index 0000000..0219cca --- /dev/null +++ b/src/trans/gpu/internal/ftinv_ctlad_mod.F90 @@ -0,0 +1,293 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_CTLAD_MOD +CONTAINS +SUBROUTINE FTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTINV_CTLAD - Inverse Fourier transform control - adjoint + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV_CTLAD(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE FOURIER_INAD_MOD,ONLY : FOURIER_INAD +USE FSCAD_MOD ,ONLY : FSCAD +USE FTINVAD_MOD ,ONLY : FTINVAD +USE TRGTOL_MOD ,ONLY : TRGTOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRBT),TARGET :: ZGTF(KF_FS,D%NLENGTF) +REAL(KIND=JPRBT),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRBT),POINTER :: ZUV(:,:) +REAL(KIND=JPRBT),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRBT),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRBT),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRBT),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST,IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +! ------------------------------------------------------------------ + +! 4. Transposition + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF + +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV_CTLAD:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTLAD_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(182,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(182,1) + +! 3. Fourier transform + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + IF(KF_UV>0)ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + IF(KF_SCALARS>0)ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + IF(KF_SCDERS>0)ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +CALL GSTATS(132,0) + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1641,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=IBEG,IEND,IINC + IGL = JGL + IF(KF_FS > 0) THEN + CALL FTINVAD(ZGTF,KF_FS,IGL) + ENDIF + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL FSCAD(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 1. Copy Fourier data to local array + + CALL FOURIER_INAD(ZGTF,KF_OUT_LT,IGL) + +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1641,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF + +CALL GSTATS(132,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE FTINV_CTLAD +END MODULE FTINV_CTLAD_MOD + + + diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 new file mode 100755 index 0000000..76cc0f6 --- /dev/null +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -0,0 +1,176 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_MOD +CONTAINS +SUBROUTINE FTINV(PREEL,KFIELDS) + +!**** *FTINV - Inverse Fourier transform + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 : 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + +USE TPM_DISTR ,ONLY : D,D_NSTAGTF,D_NPTRLS, MYSETW, MYPROC, NPROC +USE TPM_GEOMETRY ,ONLY : G, G_NLOEN, G_NMEN +USE TPM_GEN ,ONLY : NOUT +USE TPM_FFT ,ONLY : T +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE TPM_DIM ,ONLY : R, R_NNOEXTZL +USE HIP_DEVICE_MOD +USE ISO_C_BINDING + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM) :: KGL +REAL(KIND=JPRBT), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,IST1 +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +TYPE(C_PTR) :: IPLAN_C2R +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC,ISIZE, IDIM2 +integer :: istat,idev, iunit +INTEGER :: I, J + +REAL(KIND=JPRBT), allocatable :: ZREEL2(:,:) + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +ISIZE=size(PREEL,1) +#ifdef ACCGPU +!$ACC DATA & +!$ACC& PRESENT(PREEL) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(PRESENT,ALLOC:PREEL) +#endif + +#ifdef ACCGPU +!$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IOFF,IGLG,IST,ILEN,IST1) & +!$ACC& COPYIN(IBEG,IEND,IINC,KFIELDS,MYSETW) & +!$ACC& PRESENT(D_NSTAGTF,D_NPTRLS,G_NMEN,G_NLOEN,R,R_NNOEXTZL,PREEL) +#endif +#ifdef OMPGPU +!WARNING: in OpenACC we have nested ACC loops. Can't get this to work in OpenMP with AMD compiler +!!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IOFF,IGLG,IST,ILEN,IST1) & +!!$OMP& SHARED(IBEG,IEND,IINC,MYSETW,KFIELDS,PREEL,D_NSTAGTF,D_NPTRLS,G_NMEN,G_NLOEN,R,R_NNOEXTZL) +#endif +DO KGL=IBEG,IEND,IINC + + IOFF = D_NSTAGTF(KGL)+1 + IGLG = D_NPTRLS(MYSETW)+KGL-1 + IST = 2*(G_NMEN(IGLG)+1) + ILEN = G_NLOEN(IGLG)+R_NNOEXTZL+2-IST + IST1=1 + IF (G_NLOEN(IGLG)==1) IST1=0 + +#ifdef ACCGPU + !$ACC LOOP COLLAPSE(2) +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(JJ,JF) SHARED(ILEN,KFIELDS,PREEL,IST1,IST,IOFF) +#endif + DO JJ=IST1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF+JJ-1) = 0.0_JPRBT + ENDDO + ENDDO + +END DO +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +IDIM2=size(PREEL,2) +ALLOCATE(ZREEL2(ISIZE,IDIM2)) +#ifdef ACCGPU +!$ACC ENTER DATA CREATE(ZREEL2) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(ALLOC:ZREEL2,PREEL) +#endif + +DO KGL=IBEG,IEND,IINC + IOFF=D_NSTAGTF(KGL)+1 + IGLG = D_NPTRLS(MYSETW)+KGL-1 + CALL CREATE_PLAN_FFT(IPLAN_C2R,1,G_NLOEN(IGLG),KFIELDS) + CALL EXECUTE_PLAN_FFT(1,G_NLOEN(IGLG),PREEL(1, ioff),ZREEL2(1, ioff),IPLAN_C2R) +END DO +ISTAT = HIP_SYNCHRONIZE() + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(IGLG,JJ) DEFAULT(NONE) & +!$OMP& SHARED(ZREEL2,PREEL,IDIM2,ISIZE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(JF,JJ) DEFAULT(NONE) & +!$ACC& COPYIN(ISIZE,IDIM2) & +!$ACC& PRESENT(ZREEL2,PREEL) +#endif +DO JJ=1, IDIM2 + DO JF=1,ISIZE + PREEL(JF,JJ) = ZREEL2(JF, JJ) + ENDDO +ENDDO +! ------------------------------------------------------------------ + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC EXIT DATA DELETE(ZREEL2) +#endif +DEALLOCATE(ZREEL2) + +END SUBROUTINE FTINV +END MODULE FTINV_MOD diff --git a/src/trans/gpu/internal/ftinvad_mod.F90 b/src/trans/gpu/internal/ftinvad_mod.F90 new file mode 100755 index 0000000..338d7f7 --- /dev/null +++ b/src/trans/gpu/internal/ftinvad_mod.F90 @@ -0,0 +1,94 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINVAD_MOD +CONTAINS +SUBROUTINE FTINVAD(PREEL,KFIELDS,KGL) + + +!**** *FTINVAD - Inverse Fourier transform - adjoint + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL FTINVAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FFT ,ONLY : T +USE TPM_DIM ,ONLY : R +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRBT), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +! ------------------------------------------------------------------ + +ITYPE =-1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 +IRLEN = ILOEN +ICLEN = (IRLEN/2+1)*2 + + ! Change of metric (not in forward routine) +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = PREEL(JF,IOFF-1+JJ)*ILOEN + ENDDO +ENDDO + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRBT + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE FTINVAD +END MODULE FTINVAD_MOD diff --git a/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 b/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 new file mode 100755 index 0000000..9af5b01 --- /dev/null +++ b/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 @@ -0,0 +1,277 @@ +! (C) Copyright 2000- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GATH_GRID_32_CTL_MOD +CONTAINS +SUBROUTINE GATH_GRID_32_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) + +!**** *GATH_GRID_32_CTL* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Routine for gathering gridpoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_32_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! PGP(:,:,:) - Local spectral array +! +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRM +USE MPL_MODULE + +USE TPM_GEN +USE TPM_DIM +USE TPM_GEOMETRY +USE TPM_DISTR + +USE SET2PE_MOD +USE EQ_REGIONS_MOD + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +REAL(KIND=JPRM) , INTENT(IN) :: PGP(:,:,:) + +! Declaration of local variables + +REAL(KIND=JPRM) :: ZFLD(D%NGPTOTMX*KFGATHG),ZDUM(D%NGPTOTMX) +REAL(KIND=JPRM),ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM) :: IFLDR,JFLD,ITAG,ILEN,JA,JB,ISND,JGL,JLON,ILOFF,ILENB,IST +INTEGER(KIND=JPIM) :: IRCV,IOFF,ILAST,IGL1,IGL2,IGLOFF +INTEGER(KIND=JPIM) :: JKGLO,JROF,IEND,J,IBL,IPROC,JROC,IMYFIELDS,ILRECV +INTEGER(KIND=JPIM) :: ISENDREQ(NPROC),IOUNT,ITO +INTEGER(KIND=JPIM) :: ILENS(NPROC),IOFFS(NPROC),ILENR(NPROC),IOFFR(NPROC) +INTEGER(KIND=JPIM) :: IFLDL,IFLDS +LOGICAL :: LLSAME +! ------------------------------------------------------------------ + + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFGATHG + DO JROF=1,IEND + PGPG(IOFF+JROF,JFLD) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1643,1) + +ELSE +! test if values in KTO are all the same + LLSAME=.TRUE. + ITO=KTO(1) + DO JFLD=2,KFGATHG + IF(KTO(JFLD) /= ITO) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + IFLDL=D%NGPTOTMX + IF(LLSAME) THEN + CALL GSTATS(1643,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JFLD=1,KFGATHG + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+(JFLD-1)*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1643,1) + ELSE + ILENS(:)=0 + IOFFS(:)=0 + ILENR(:)=0 + IOFFR(:)=0 + DO JFLD=1,KFGATHG + ILENS(KTO(JFLD))=ILENS(KTO(JFLD))+IFLDL + IF(KTO(JFLD) == MYPROC) THEN + ILENR(:)=ILENR(:)+IFLDL + ENDIF + ENDDO + DO JROC=2,NPROC + IOFFR(JROC)=IOFFR(JROC-1)+ ILENR(JROC-1) + IOFFS(JROC)=IOFFS(JROC-1)+ ILENS(JROC-1) + ENDDO + IFLDS=0 + DO JROC=1,NPROC + IF(ILENS(JROC) > 0) THEN + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == JROC) THEN + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+IFLDS*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + IFLDS=IFLDS+1 + ENDIF + ENDDO + ENDIF + ENDDO + ENDIF + + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX*IMYFIELDS*NPROC)) + ELSE + ALLOCATE(ZBUF(1)) + ENDIF + IFLDR = 0 + CALL GSTATS_BARRIER(789) + CALL GSTATS(809,0) + + IF( LLSAME )THEN + !Send + ISND = KTO(1) + ITAG = MTAGDISTSP+1+17 + CALL MPL_SEND(ZFLD,KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(1),& + &CDSTRING='GATH_GRID_32_CTL:') + + ! RECIEVE + IF(KTO(1) == MYPROC) THEN + IFLDR = KFGATHG + DO JROC=1,NPROC + ITAG = MTAGDISTSP+1+17 + IRCV = JROC + IOFF=IFLDL*KFGATHG*(JROC-1) + CALL MPL_RECV(ZBUF(IOFF+1:IOFF+IFLDL*KFGATHG),KSOURCE=NPRCIDS(IRCV),& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILRECV,& + &KTAG=ITAG,CDSTRING='GATH_GRID_32_CTL:') + ENDDO + ENDIF + CALL MPL_WAIT(KREQUEST=ISENDREQ(1), & + & CDSTRING='GATH_GRID_32_CTL: WAIT') + ELSE + IFLDR=IMYFIELDS + CALL MPL_ALLTOALLV(PSENDBUF=ZFLD,KSENDCOUNTS=ILENS,& + & PRECVBUF=ZBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & CDSTRING='GATH_GRID_32_CTL:') +!!$ ITAG = MTAGDISTSP+1+17 +!!$ DO JROC=1,NPROC +!!$ ISND=JROC +!!$ IOFF=IOFFS(JROC) +!!$ ILEN=ILENS(JROC) +!!$ IF(ILEN > 0 ) THEN +!!$ CALL MPL_SEND(ZFLD(IOFF+1:IOFF+ILEN),KDEST=NPRCIDS(ISND),KTAG=ITAG,& +!!$ &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISND),& +!!$ &CDSTRING='GATH_GRID_32_CTL:') +!!$ ENDIF +!!$ ENDDO +!!$ DO JROC=1,NPROC +!!$ IRCV = JROC +!!$ IOFF = IOFFR(JROC) +!!$ ILEN = ILENR(JROC) +!!$ IF(ILEN > 0 ) THEN +!!$ CALL MPL_RECV(ZBUF(IOFF+1:IOFF+ILEN),KSOURCE=NPRCIDS(IRCV),& +!!$ &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILRECV,& +!!$ &KTAG=ITAG,CDSTRING='GATH_GRID_32_CTL:') +!!$ ENDIF +!!$ ENDDO +!!$ DO JROC=1,NPROC +!!$ ISND=JROC +!!$ ILEN=ILENS(JROC) +!!$ IF(ILEN > 0 ) THEN +!!$ CALL MPL_WAIT(KREQUEST=ISENDREQ(JROC), & +!!$ & CDSTRING='GATH_GRID_32_CTL: WAIT') +!!$ ENDIF +!!$ ENDDO + ENDIF + + CALL GSTATS(809,1) + CALL GSTATS_BARRIER2(789) + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!$OMP&PRIVATE(JA,JB,IPROC,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!$OMP&ILEN,ILOFF,JGL,JLON,JFLD) + DO JFLD=1,IFLDR + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IPROC,JA,JB,0,0) + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) = & + & ZBUF(ILEN+JLON+(JFLD-1)*IFLDL+(IPROC-1)*IFLDL*IMYFIELDS) + ENDDO + ILEN = ILEN + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + + CALL GSTATS(1643,1) +! Synhronize processors +! Should not be necessary +!!$ CALL GSTATS(784,0) +!!$ CALL MPL_BARRIER(CDSTRING='GATH_GRID_32_CTL:') +!!$ CALL GSTATS(784,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_32_CTL +END MODULE GATH_GRID_32_CTL_MOD + + diff --git a/src/trans/gpu/internal/gath_grid_ctl_mod.F90 b/src/trans/gpu/internal/gath_grid_ctl_mod.F90 new file mode 100755 index 0000000..c8f5a3d --- /dev/null +++ b/src/trans/gpu/internal/gath_grid_ctl_mod.F90 @@ -0,0 +1,290 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GATH_GRID_CTL_MOD +CONTAINS +SUBROUTINE GATH_GRID_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) + +!**** *GATH_GRID_CTL* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Routine for gathering gridpoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_CTL(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! PGP(:,:,:) - Local gridpoint array +! +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_RECV, MPL_SEND, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, MTAGDISTSP, NPRCIDS, MYPROC, NPROC + +USE SET2PE_MOD ,ONLY : SET2PE +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + +! Declaration of local variables + +REAL(KIND=JPRB) :: ZFLD(D%NGPTOTMX*KFGATHG),ZDUM(D%NGPTOTMX) +REAL(KIND=JPRB),ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IREQ(:) +INTEGER(KIND=JPIM) :: IFLDR,JFLD,ITAG,ILEN,JA,JB,ISND,JGL,JLON,ILOFF,ILENB +INTEGER(KIND=JPIM) :: IRCV,IOFF,ILAST,IGL1,IGL2,IGLOFF,IR +INTEGER(KIND=JPIM) :: JKGLO,JROF,IEND,J,IBL,IPROC,JROC,IMYFIELDS,ILRECV +INTEGER(KIND=JPIM) :: ISENDREQ(KFGATHG),ITO +INTEGER(KIND=JPIM) :: ILENS(NPROC),IOFFS(NPROC),ILENR(NPROC),IOFFR(NPROC) +INTEGER(KIND=JPIM) :: IFLDL,IFLDS +LOGICAL :: LLSAME +! ------------------------------------------------------------------ + + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JFLD=1,KFGATHG + DO JROF=1,IEND + PGPG(IOFF+JROF,JFLD) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1643,1) + +ELSE +! test if values in KTO are all the same + LLSAME=.TRUE. + ITO=KTO(1) + DO JFLD=2,KFGATHG + IF(KTO(JFLD) /= ITO) THEN + LLSAME=.FALSE. + EXIT + ENDIF + ENDDO + + IFLDL=D%NGPTOTMX + IF(LLSAME) THEN + CALL GSTATS(1643,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JKGLO,IEND,IOFF,IBL,JFLD,JROF) + DO JFLD=1,KFGATHG + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+(JFLD-1)*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1643,1) + ELSE + ILENS(:)=0 + IOFFS(:)=0 + ILENR(:)=0 + IOFFR(:)=0 + DO JFLD=1,KFGATHG + ILENS(KTO(JFLD))=ILENS(KTO(JFLD))+IFLDL + IF(KTO(JFLD) == MYPROC) THEN + ILENR(:)=ILENR(:)+IFLDL + ENDIF + ENDDO + DO JROC=2,NPROC + IOFFR(JROC)=IOFFR(JROC-1)+ ILENR(JROC-1) + IOFFS(JROC)=IOFFS(JROC-1)+ ILENS(JROC-1) + ENDDO + IFLDS=0 + DO JROC=1,NPROC + IF(ILENS(JROC) > 0) THEN + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == JROC) THEN + DO JKGLO=1,D%NGPTOT,KPROMA + IEND = MIN(KPROMA,D%NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/KPROMA+1 + DO JROF=1,IEND + ZFLD(IOFF+JROF+IFLDS*IFLDL) = PGP(JROF,JFLD,IBL) + ENDDO + ENDDO + IFLDS=IFLDS+1 + ENDIF + ENDDO + ENDIF + ENDDO + ENDIF + + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + + IF(IMYFIELDS > 0) THEN + ALLOCATE(ZBUF(D%NGPTOTMX*IMYFIELDS*NPROC)) + ELSE + ALLOCATE(ZBUF(1)) + ENDIF + IFLDR = 0 + CALL GSTATS_BARRIER(789) + CALL GSTATS(809,0) + + IF( LLSAME )THEN + !Send + ISND = KTO(1) + ITAG = MTAGDISTSP+1+17 + CALL MPL_SEND(ZFLD,KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(1),& + &CDSTRING='GATH_GRID_CTL:') + ! RECIEVE + IF(KTO(1) == MYPROC) THEN + IFLDR = KFGATHG + DO JROC=1,NPROC + ITAG = MTAGDISTSP+1+17 + IRCV = JROC + IOFF=IFLDL*KFGATHG*(JROC-1) + CALL MPL_RECV(ZBUF(IOFF+1:IOFF+IFLDL*KFGATHG),KSOURCE=NPRCIDS(IRCV),& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILRECV,& + &KTAG=ITAG,CDSTRING='GATH_GRID_CTL:') + ENDDO + ENDIF + CALL MPL_WAIT(KREQUEST=ISENDREQ(1), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ELSE + IFLDR=IMYFIELDS + +! ALLTOALLV performance is really slow when number of fields (KFGATHG) is << NPROC +! This was for IBM - and RECV/SEND alternative causes problems for large number of MPI tasks. + +! IF( KFGATHG >= NPROC/8 )THEN + IF( .TRUE. )THEN + CALL MPL_ALLTOALLV(PSENDBUF=ZFLD,KSENDCOUNTS=ILENS,& + & PRECVBUF=ZBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & CDSTRING='GATH_GRID_CTL:') + ELSE + IR=0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IR=IR+NPROC + ENDIF + ENDDO + IR=IR+KFGATHG + ALLOCATE(IREQ(IR)) + IR=0 + ITAG = MTAGDISTSP+1+17 + DO JROC=1,NPROC + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IRCV = JROC + IR=IR+1 + CALL MPL_RECV(ZBUF(1+IOFFR(IRCV):IOFFR(IRCV)+ILENR(IRCV)),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR),& + &CDSTRING='GATH_GRID_CTL:') + ENDIF + ENDDO + ENDDO + DO JFLD=1,KFGATHG + ISND = KTO(JFLD) + IR=IR+1 + CALL MPL_SEND(ZFLD(1+IOFFS(ISND):IOFFS(ISND)+ILENS(ISND)),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR),& + &CDSTRING='GATH_GRID_CTL:') + ENDDO + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='GATH_GRID_CTL: WAIT') + DEALLOCATE(IREQ) + ENDIF + ENDIF + + CALL GSTATS(809,1) + CALL GSTATS_BARRIER2(789) + CALL GSTATS(1643,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1)& +!$OMP&PRIVATE(JA,JB,IPROC,IGLOFF,IGL1,IGL2,IOFF,ILAST,J,& +!$OMP&ILEN,ILOFF,JGL,JLON,JFLD) + DO JFLD=1,IFLDR + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IPROC,JA,JB,0,0) + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IOFF = 0 + IF(JA > 1) THEN + IF( D%NLSTLAT(JA-1) == D%NFRSTLAT(JA) )THEN + ILAST = D%NLSTLAT(JA-1)-1 + ELSE + ILAST = D%NLSTLAT(JA-1) + ENDIF + DO J=D%NFRSTLAT(1),ILAST + IOFF = IOFF+G%NLOEN(J) + ENDDO + ENDIF + + ILEN = 0 + ILOFF = 0 + DO JGL=IGL1,IGL2 + DO JLON=1,D%NONL(IGLOFF+JGL-IGL1,JB) + PGPG(IOFF+ILOFF+D%NSTA(IGLOFF+JGL-IGL1,JB)+JLON-1,JFLD) = & + & ZBUF(ILEN+JLON+(JFLD-1)*IFLDL+(IPROC-1)*IFLDL*IMYFIELDS) + ENDDO + ILEN = ILEN + D%NONL(IGLOFF+JGL-IGL1,JB) + ILOFF = ILOFF + G%NLOEN(JGL) + ENDDO + ENDDO + ENDDO + ENDDO +!$OMP END PARALLEL DO + + CALL GSTATS(1643,1) +! Synhronize processors +! Should not be necessary +!!$ CALL GSTATS(784,0) +!!$ CALL MPL_BARRIER(CDSTRING='GATH_GRID_CTL:') +!!$ CALL GSTATS(784,1) + IF(ALLOCATED(ZBUF)) DEALLOCATE(ZBUF) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_CTL +END MODULE GATH_GRID_CTL_MOD + + diff --git a/src/trans/gpu/internal/gath_spec_control_mod.F90 b/src/trans/gpu/internal/gath_spec_control_mod.F90 new file mode 100755 index 0000000..88f7d21 --- /dev/null +++ b/src/trans/gpu/internal/gath_spec_control_mod.F90 @@ -0,0 +1,233 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GATH_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,LDZA0IP) + +!**** *GATH_SPEC_CONTROL* - Gather global spectral array from processors + +! Purpose. +! -------- +! Routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be distributed +! KTO(:) - Processor responsible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set first coefficients (imaginary part) to zero + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +!USE TPM_GEN +!USE TPM_DIM +USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, & + & MYSETV, MYSETW, MYPROC, NPROC +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE SET2PE_MOD ,ONLY : SET2PE +!USE SUWAVEDI_MOD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG),ZDUM(KSPEC2) +REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISP,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),IPOS0,JNM +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS +LOGICAL :: LLZA0IP + +! ------------------------------------------------------------------ + +LLZA0IP=.TRUE. +IF (PRESENT (LDZA0IP)) LLZA0IP=LDZA0IP + +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFGATHG + PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFGATHG + DO JM=1,KSPEC2_G + PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + IF(IMYFIELDS>0) THEN + ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KSMAX + DO JN=JM,KSMAX + IDIST(II+1) = KDIM0G(JM)+(JN-JM)*2 + IDIST(II+2) = KDIM0G(JM)+(JN-JM)*2+1 + II = II+2 + ENDDO + ENDDO + CALL GSTATS(1804,1) + ENDIF + + CALL GSTATS_BARRIER(788) + + !Send + CALL GSTATS(810,0) + IFLDS = 0 + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + + IFLDS = IFLDS+1 + ISND = KTO(JFLD) + ITAG = MTAGDISTSP+JFLD+17 + IF(LDIM1_IS_FLD) THEN + ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) + CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ELSE + CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ENDIF + ENDIF + ENDDO + ENDIF + + ! Recieve + IFLDR = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IBSET = KVSET(JFLD) + IFLDR = IFLDR+1 + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(IRCV,0,0,JA,IBSET) + ITAG = MTAGDISTSP+JFLD+17 + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + &CDSTRING='GATH_SPEC_CONTROL') + IF( ILENR /= ILEN )THEN + WRITE(0,'("GATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& + &JFLD,JA,ILEN,ILENR + CALL ABORT_TRANS('GATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Check for completion of sends + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JFLD), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ENDIF + ENDDO + ENDIF + CALL GSTATS(810,1) + CALL GSTATS_BARRIER2(788) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN,ISP) + DO JFLD=1,IMYFIELDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + ISP = KDIM0G(0)+JN*2+1 + II = II+2 + PSPECG(JFLD,II) = 0.0_JPRB + ENDDO + ENDIF + ELSE + DO JNM=1,KSPEC2_G + PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + ISP = KDIM0G(0)+JN*2+1 + II = II+2 + PSPECG(II,JFLD) = 0.0_JPRB + ENDDO + ENDIF + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) + + !Synchronize processors + CALL GSTATS(785,0) + CALL MPL_BARRIER(CDSTRING='GATH_SPEC_CONTROL:') + CALL GSTATS(785,1) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_SPEC_CONTROL +END MODULE GATH_SPEC_CONTROL_MOD + + diff --git a/src/trans/gpu/internal/gawl_mod.F90 b/src/trans/gpu/internal/gawl_mod.F90 new file mode 100755 index 0000000..c188e59 --- /dev/null +++ b/src/trans/gpu/internal/gawl_mod.F90 @@ -0,0 +1,118 @@ +! (C) Copyright 1992- ECMWF. +! (C) Copyright 1992- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE GAWL_MOD +CONTAINS +SUBROUTINE GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) + +!**** *GAWL * - Routine to perform the Newton loop + +! Purpose. +! -------- +! Find 0 of Legendre polynomial with Newton loop +!** Interface. +! ---------- +! *CALL* *GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) + +! Explicit arguments : +! -------------------- +! PFN Fourier coefficients of series expansion +! for the ordinary Legendre polynomials (in) +! PL Gaussian latitude (inout) +! PW Gaussian weight (out) +! PEPS 0 of the machine (in) +! KN Truncation (in) +! KITER Number of iterations (out) +! PMOD Last modification (inout) + +! Implicit arguments : +! -------------------- +! None + +! Method. +! ------- +! Newton Loop. + +! Externals. +! ---------- +! CPLEDN + +! Reference. +! ---------- + +! ARPEGE Documentation vol.2, ch3. + +! Author. +! ------- +! Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 92-12-18 +! K. Yessad (Sep 2008): cleaning, improve comments. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +USE CPLEDN_MOD ,ONLY : CPLEDN + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KN +REAL(KIND=JPRD),INTENT(IN) :: PFN(0:KN/2) +REAL(KIND=JPRD),INTENT(INOUT) :: PL +REAL(KIND=JPRD),INTENT(OUT) :: PW +REAL(KIND=JPRD),INTENT(IN) :: PEPS +INTEGER(KIND=JPIM),INTENT(OUT) :: KITER +REAL(KIND=JPRD),INTENT(INOUT) :: PMOD + +! ------------------------------------------------------------------ + + +INTEGER(KIND=JPIM) :: IFLAG, ITEMAX, JTER, IODD +REAL(KIND=JPRD) :: ZW, ZX, ZXN + +! ------------------------------------------------------------------ + +!* 1. Initialization. +! --------------- + +ITEMAX = 20 +ZX = PL +IFLAG = 0 +IODD=MOD(KN,2) + +! ------------------------------------------------------------------ + +!* 2. Newton iteration. +! ----------------- + +DO JTER=1,ITEMAX+1 + KITER = JTER + CALL CPLEDN(KN,IODD,PFN,ZX,IFLAG,ZW,ZXN,PMOD) + ZX = ZXN + + IF(IFLAG == 1) EXIT + IF(ABS(PMOD) <= PEPS*1000._JPRD) IFLAG = 1 +ENDDO + +PL = ZXN +PW = ZW + +! ------------------------------------------------------------------ + +END SUBROUTINE GAWL +END MODULE GAWL_MOD + + diff --git a/src/trans/gpu/internal/gstats_label_ifs.F90 b/src/trans/gpu/internal/gstats_label_ifs.F90 new file mode 100644 index 0000000..247d7fc --- /dev/null +++ b/src/trans/gpu/internal/gstats_label_ifs.F90 @@ -0,0 +1,955 @@ +! (C) Copyright 2000- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE GSTATS_LABEL_IFS + +!**** *GSTATS_LABEL_IFS* - Set up GSTATS labels for IFS + +! PURPOSE. +! -------- +! Set up GSTATS labels for IFS + +!** INTERFACE. +! ---------- +! *CALL* *GSTATS_LABEL_IFS* + +! EXPLICIT ARGUMENTS None +! -------------------- + +! IMPLICIT ARGUMENTS +! -------------------- + +! METHOD. +! ------- + +! EXTERNALS. +! ---------- + +! REFERENCE. +! ---------- +! ECMWF Research Department documentation of the IFS + +! AUTHOR. +! ------- +! Mats Hamrud ECMWF + +! MODIFICATIONS. +! -------------- +! ORIGINAL : 98-11-15 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! M.Hamrud 01-Dec-2003 CY28R1 Cleaning +! K.Yessad (dec 2011): clean references to no longer existing routines. +! R. El Khatib 04-Aug-2014 Pruning of the conf. 927/928 +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE YOMGSTATS +IMPLICIT NONE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('GSTATS_LABEL_IFS',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +! counters 0 to 50 : IFS computations +CALL GSTATS_LABEL(0 ,' ','CNT0 - COMPLETE EXECUTION') +CALL GSTATS_LABEL(1 ,' ','CNT4 - FORWARD INTEGRATION') +CALL GSTATS_LABEL(8 ,' ','SCAN2M - GRID-POINT DYNAMICS') +CALL GSTATS_LABEL(9 ,' ','SPCM - SPECTRAL COMP.') +CALL GSTATS_LABEL(10,' ','SCAN2M - PHYSICS') +CALL GSTATS_LABEL(11,' ','IOPACK - OUTPUT P.P. RESULTS') +CALL GSTATS_LABEL(12,' ','SPNORM - SPECTRAL NORM COMP.') +CALL GSTATS_LABEL(13,' ','SCAN2M - RADIATION CALC.') +CALL GSTATS_LABEL(14,' ','SUINIF ') +CALL GSTATS_LABEL(15,' ','GET_TRAJ_SPEC ') +CALL GSTATS_LABEL(16,' ','GET_TRAJ_GRID ') +CALL GSTATS_LABEL(17,' ','GRIDFPOS IN CNT4 ') +CALL GSTATS_LABEL(18,' ','SUSPECG ') +CALL GSTATS_LABEL(19,' ','SUSPEC ') +CALL GSTATS_LABEL(20,' ','CVA1 - CONTROL OF MINIM.') +CALL GSTATS_LABEL(21,' ','SIM4D - FULL COST FUNCTION') +CALL GSTATS_LABEL(22,' ','CNT4TL - TANGENT LINEAR INT.') +CALL GSTATS_LABEL(23,' ','CNT4AD - ADJOINT INT.') +CALL GSTATS_LABEL(24,' ','SUGRIDU ') +CALL GSTATS_LABEL(25,' ','SPECRT ') +CALL GSTATS_LABEL(26,' ','SUGRIDF ') +CALL GSTATS_LABEL(27,' ','RESTART FILES - WRITING') +CALL GSTATS_LABEL(28,' ','RESTART FILES - READING') +CALL GSTATS_LABEL(29,' ','SU4FPOS IN CNT4 ') +CALL GSTATS_LABEL(30,' ','DYNFPOS IN CNT4 ') +CALL GSTATS_LABEL(31,' ','POSDDH IN STEPO ') +CALL GSTATS_LABEL(32,' ','NEMO - COST OF OCEAN MODEL') +CALL GSTATS_LABEL(33,' ','NEMO - IFS ACCUM. OF DATA') +CALL GSTATS_LABEL(34,' ','SCAN2MTL - T.L. GRID-POINT COMP.') +CALL GSTATS_LABEL(35,' ','SCAN2MAD - ADJ. GRID-POINT COMP.') +CALL GSTATS_LABEL(36,' ','SPCMAD - ADJOINT SPECTRAL COMP.') +CALL GSTATS_LABEL(37,' ','CPGLAG - SL COMPUTATIONS') +CALL GSTATS_LABEL(38,' ','WAM - TOTAL COST OF WAVE MODEL') +CALL GSTATS_LABEL(39,' ','SU0YOMB ') +CALL GSTATS_LABEL(40,' ','OBSV - OBSERVATION CALC.') +CALL GSTATS_LABEL(41,' ','OBSVTL - T.L. OBSERVATION CALC.') +CALL GSTATS_LABEL(42,' ','OBSVAD - ADJ. OBSERVATION CALC.') +CALL GSTATS_LABEL(43,' ','OBSHOR - OBS. EQUIV. COMP.') +CALL GSTATS_LABEL(44,' ','OBSHORAD - ADJ. OBS. EQUIV. COMP.') +CALL GSTATS_LABEL(45,' ','CNT1 - SCREENING') +CALL GSTATS_LABEL(46,' ','CNT1 - OBSERVATION WRITING') +CALL GSTATS_LABEL(47,' ','CVA1 - OBSERVATION WRITING') +CALL GSTATS_LABEL(48,' ','SIM4D - COST FUNCTION EVAL.') +CALL GSTATS_LABEL(49,' ','SIM4D - JB CALCULATION') +CALL GSTATS_LABEL(50,' ','SIM4D - CHANGE OF VAR.') + +! counters 51 to 100 : message passing + +CALL GSTATS_LABEL(51,' ','SCAN2M - SL COMM. PART 1') +CALL GSTATS_LABEL(54,' ','SPCM - M TO S/S TO M TRANSP.') +CALL GSTATS_LABEL(55,' ','SPCIMPF - S TO M/M TO S TRANSP.') +CALL GSTATS_LABEL(56,' ','SPNORM - SPECTRAL NORM COMM.') +CALL GSTATS_LABEL(59,' ','SLINT - SL COMM.') +CALL GSTATS_LABEL(63,' ','SCAN2MAD - SL COMM. PART 1') +CALL GSTATS_LABEL(64,' ','SCAN2MAD - SL COMM. PART 2') +CALL GSTATS_LABEL(65,' ','RADINTG - SL COMM. OUTPUT') +CALL GSTATS_LABEL(66,' ','RADINTG - SL COMM. INPUT') +CALL GSTATS_LABEL(67,' ','SLCOMM COAPHY-1 ') +CALL GSTATS_LABEL(68,' ','SLCOMM COAPHY-2 ') + +CALL GSTATS_LABEL(79,' ','THINN - DECIS/OBS.THINNING (AIRS ONLY)') + +CALL GSTATS_LABEL(84,' ','SCAN2MTL - SL COMM.') +CALL GSTATS_LABEL(85,' ','SCAN2MAD - INC SL COMM.') +CALL GSTATS_LABEL(86,' ','SPCMAD - M TO S/S TO M TRANSP.') +CALL GSTATS_LABEL(87,' ','SPCIMPFAD- S TO M/M TO S TRANSP.') +CALL GSTATS_LABEL(88,' ','SCAN2MTL - SL COMM. PART 2') +CALL GSTATS_LABEL(89,' ','SCAN2M - SL COMM. PART 2') +CALL GSTATS_LABEL(90,' ','OBSV - COMM.') +CALL GSTATS_LABEL(91,' ','OBSVTL - COMM.') +CALL GSTATS_LABEL(93,' ','OBSHOR - COMM. OF OBS == ') +CALL GSTATS_LABEL(94,' ','OBSHORAD - COMM. OF OBS == ') + +! counters 101 to 200 : trans package +CALL GSTATS_LABEL(102,'TRS','LTINV_CTL - INVERSE LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(103,'TRS','LTDIR_CTL - DIRECT LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(104,'TRS','LTINV_CTLAD - ADJ. INVERSE LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(105,'TRS','LTDIR_CTLAD - ADJ. DIRECT LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(106,'TRS','FTDIR_CTL - DIRECT FOURIER TRANSFORM') +CALL GSTATS_LABEL(107,'TRS','FTINV_CTL - INVERSE FOURIER TRANSFORM') +CALL GSTATS_LABEL(108,'TRS','OMP in GPC - TEST_ADJOINT ') +CALL GSTATS_LABEL(110,'TRS','OMP in SCALPRODSP - TEST_ADJOINT ') +CALL GSTATS_LABEL(132,'TRS','FTINV_CTLAD - ADJ. INVERSE FOURIER TRANSFORM') +CALL GSTATS_LABEL(133,'TRS','FTDIR_CTLAD - ADJ. DIRECT FOURIER TRANSFORM') +CALL GSTATS_LABEL(135,'TRS','OMP in GPCAD - TEST_ADJOINT ') +CALL GSTATS_LABEL(140,'TRS','SULEG - COMP. OF LEGENDRE POL.') +CALL GSTATS_LABEL(152,'TRS','LTINV_CTL - M TO L TRANSPOSITION') +CALL GSTATS_LABEL(153,'TRS','LTDIR_CTL - L TO M TRANSPOSITION') +CALL GSTATS_LABEL(157,'TRS','FTINV_CTL - L TO G TRANSPOSITION') +CALL GSTATS_LABEL(158,'TRS','FTDIR_CTL - G TO L TRANSPOSITION') +CALL GSTATS_LABEL(180,'TRS','LTINV_CTLAD - L TO M TRANSPOSITION') +CALL GSTATS_LABEL(181,'TRS','LTDIR_CTLAD - M TO L TRANSPOSITION') +CALL GSTATS_LABEL(182,'TRS','FTINV_CTLAD - G TO L TRANSPOSITION') +CALL GSTATS_LABEL(183,'TRS','FTDIR_CTLAD - L TO G TRANSPOSITION') +CALL GSTATS_LABEL(190,'TRS','SUTRLE - COMMUNICATE LEG.POL.') + +! to be added + +! counters 201 to 250 : additional computations +CALL GSTATS_LABEL(201,'JBW','JBWAVGEN - INPUT + Control Vector Computations') +CALL GSTATS_LABEL(202,'JBW','JBWAVGEN - GRIDPOINT STDEV Computation') +CALL GSTATS_LABEL(203,'JBW','JBWAVGEN - Hor-Ver. CORRELATION Computation') +CALL GSTATS_LABEL(204,'JBW','JBWAVGEN - NORMALIZATION of CORRELATIONs') +CALL GSTATS_LABEL(205,'JBW','JBWAVGEN - MERGE and EIGENDECOMP.') +CALL GSTATS_LABEL(206,'JBW','JBWAVGEN - WRITE OUT SUJBWAVWRI') +CALL GSTATS_LABEL(207,'JBW','JBWAVVC - COST OF SUJBGPTOMAT') +CALL GSTATS_LABEL(208,'JBW','JBWAVVC - COST OF COVARIANCE COMPUTATION') +CALL GSTATS_LABEL(209,'JBW','JBWAVGEN - COST OF NORMALIZATION BY STD') +CALL GSTATS_LABEL(210,'JBW','JBWAVGEN - COST OF POWER SPECTRUM') +CALL GSTATS_LABEL(211,'JBW','JBWAVGEN - COST OF WAVEL-SPEC TRANSF') +CALL GSTATS_LABEL(212,'JBW','JBWAVGEN - COST OF WAVEL-GRID TRANSF') +CALL GSTATS_LABEL(213,'JBW','JBWAVGEN - COST OF GATHER POWER SPECTRUM') +CALL GSTATS_LABEL(214,'JBW','JBWAVGEN - READ/TRUNC SPECTRAL STATES') +CALL GSTATS_LABEL(215,'JBW','JBWAVGEN - READ/INT GRIDPOINT STATES') +CALL GSTATS_LABEL(216,'JBW','JBWAVGEN - READ/INT PS GRIDPOINT STATES') +CALL GSTATS_LABEL(217,'JBW','JBWAVGEN - READ/INT UA GRIDPOINT STATES') +CALL GSTATS_LABEL(218,'JBW','JBWAVGEN - GRIDPOINT BALANCE') +CALL GSTATS_LABEL(219,'JBW','JBWAVGEN - SPECTRAL BALANCE') +CALL GSTATS_LABEL(220,'ACV','HYBRID B - LINEAR') +CALL GSTATS_LABEL(221,'ACV','HYBRID B - ADJOINT') +CALL GSTATS_LABEL(222,'ACV','HYBRID B - SUINEP') +CALL GSTATS_LABEL(223,'ACV','HYBRID B - SUINEP READING') +CALL GSTATS_LABEL(224,'ACV','HYBRID B - SUINEP INTERPOLATING') + +! counters 251 to 300 : additional message passing + +CALL GSTATS_LABEL(251,'MP-','ALLGATHER CONTROL VECTORS') +CALL GSTATS_LABEL(252,'MP-','CVSECTION CONTROL VECTORS') +CALL GSTATS_LABEL(253,'MP-','GATHER CONTROL VECTORS') +CALL GSTATS_LABEL(254,'MP-','SCATTER CONTROL VECTORS') +CALL GSTATS_LABEL(255,'MP-','DOT PRODUCT CONTROL VECTORS') +CALL GSTATS_LABEL(256,'MP-','REDUCE CONTROL VECTORS') +CALL GSTATS_LABEL(257,'MP-','MULTISCATTER CONTROL VECTORS') +CALL GSTATS_LABEL(258,'MP-','MULTIGATHER CONTROL VECTORS') + +! couplo4 call - intefrace to oasis4 +CALL GSTATS_LABEL(260,' ','COULPLO4_DEFINITIONS in CNT4') +CALL GSTATS_LABEL(261,' ','PRISM_DEF in COULPLO4_DEFINITIONS ') +CALL GSTATS_LABEL(262,' ','COULPLO4_EXCHANGE in GP_MODEL ') +CALL GSTATS_LABEL(263,' ','PRISM_GET 3D-GP in COULPLO4_EXCHANGE ') +CALL GSTATS_LABEL(264,' ','PRISM_PUT 3D-GP in COULPLO4_EXCHANGE ') +CALL GSTATS_LABEL(265,' ','PRISM_PUT 2D-GP in COULPLO4_EXCHANGE ') +CALL GSTATS_LABEL(266,' ','PRISM_PUT 3D_SP in COULPLO4_EXCHANGE ') +CALL GSTATS_LABEL(267,' ','PRISM_PUT 2D_SP in COULPLO4_EXCHANGE ') + +CALL GSTATS_LABEL(301,' ','INTERP_OBSAD ') +CALL GSTATS_LABEL(302,' ','INTERP_OBSAD_1 ') +CALL GSTATS_LABEL(303,' ','OPK_OBSCOR ') +CALL GSTATS_LABEL(304,' ','INTERP_OBSAD_2 ') +CALL GSTATS_LABEL(305,' ','INTERP_OBSAD_3 ') +CALL GSTATS_LABEL(306,' ','SUJBWAVALLO ') +CALL GSTATS_LABEL(307,' ','INTERP_OBS ') +CALL GSTATS_LABEL(308,'MP-','VARBC_PRED ') +CALL GSTATS_LABEL(309,'MP-','VARBC_SETUP ') + +CALL GSTATS_LABEL(400,' ','GSTATS ') +CALL GSTATS_LABEL(401,' ','GSTATS HOOK') + +CALL GSTATS_LABEL(410,' ','MPI - TRMTOL') +CALL GSTATS_LABEL(411,' ','MPI - TRLTOM') +CALL GSTATS_LABEL(412,' ','MPI - TRLTOG') +CALL GSTATS_LABEL(413,' ','MPI - TRGTOL') +CALL GSTATS_LABEL(420,' ','TRLTOM Barrier') +CALL GSTATS_LABEL(421,' ','TRMTOL Barrier') +CALL GSTATS_LABEL(422,' ','TRLTOG Barrier') +CALL GSTATS_LABEL(423,' ','TRGTOL Barrier') +CALL GSTATS_LABEL(430,' ','DIR COPIES') +CALL GSTATS_LABEL(431,' ','INV COPIES') +CALL GSTATS_LABEL(440,' ','FULL DIRTRANS') +CALL GSTATS_LABEL(441,' ','FULL INVTRANS') + +! counters 500 to 2000 +CALL GSTATS_LABEL(501,'MPL','SLCOMM2_COMMS PART1') +CALL GSTATS_LABEL(502,'MPL','SLCOMM2A_COMMS PART1') +CALL GSTATS_LABEL(507,'MPL','TRSTOM_COMMS') +CALL GSTATS_LABEL(508,'MPL','TRMTOS_COMMS') +CALL GSTATS_LABEL(509,'MPL','SLCOMM1_COMMS') +CALL GSTATS_LABEL(511,'MPL','SLCOMM2_COMMS PART2') +CALL GSTATS_LABEL(512,'MPL','SLCOMM2A_COMMS PART2') +CALL GSTATS_LABEL(513,'MPL','GOM_MESSAGE_PASSING_COMMS') +CALL GSTATS_LABEL(514,'MPL','GOM_MESSAGE_PASSING_AD_COMMS') +CALL GSTATS_LABEL(515,'MPL','STATPRED') +CALL GSTATS_LABEL(516,'MPL','SUVARBC') +CALL GSTATS_LABEL(517,'MPL','CVARBCAD') +CALL GSTATS_LABEL(518,'MPL','CVARBCINAD') +CALL GSTATS_LABEL(520,'MPL','DRESDDH ') +CALL GSTATS_LABEL(521,'MPL','STEPO ALLREDUCE ') +CALL GSTATS_LABEL(523,'MPL','GATHEREIGMD ') +CALL GSTATS_LABEL(524,'MPL','SCATTER_CTLVEC ') +CALL GSTATS_LABEL(525,'MPL','MULTISCATTER_CTLVEC ') +CALL GSTATS_LABEL(528,'MPL','GATHER_CTLVEC ') +CALL GSTATS_LABEL(529,'MPL','ALLGATHER_CTLVEC ') +CALL GSTATS_LABEL(530,'MPL','ODBMP-BCAST') +CALL GSTATS_LABEL(531,'MPL','ODBMP-ALLREDUCE') +CALL GSTATS_LABEL(532,'MPL','ODBMP-SEND') +CALL GSTATS_LABEL(533,'MPL','ODBMP-RECV') +CALL GSTATS_LABEL(534,'MPL','ODBMP-ALLGATHERV') +CALL GSTATS_LABEL(535,'MPL','SHUFFLEDB') +CALL GSTATS_LABEL(536,'MPL','MULTIGATHER_CTLVEC ') +CALL GSTATS_LABEL(537,'MPL','ALLGATHER_CVSECTION') +CALL GSTATS_LABEL(538,'MPL','HALO_EXCHANGE GP_DERS') +CALL GSTATS_LABEL(602,'MPL','BRPTOB ') +CALL GSTATS_LABEL(604,'MPL','ALLGATHER IN GATHERGPF ') +CALL GSTATS_LABEL(605,'MPL','GATHERTC GATH') +CALL GSTATS_LABEL(606,'MPL','GATHERTC SCAT') +CALL GSTATS_LABEL(607,'MPL','COMMSPNORM GATH') +CALL GSTATS_LABEL(608,'MPL','COMMSPNORM1 GATH') +CALL GSTATS_LABEL(609,'MPL','GATHERBDY GATH ') +CALL GSTATS_LABEL(610,'MPL','GATHERCOSTO ') +CALL GSTATS_LABEL(611,'MPL','GATHERCOST2 ') +CALL GSTATS_LABEL(615,'MPL','BROADCAST IN ALLGATHER_CVSECTION ') +CALL GSTATS_LABEL(616,'MPL','ALLGATHERV IN MPBCASTSCFLD ') +CALL GSTATS_LABEL(617,'MPL','BROADCAST IN MPBCASTINTFLD WAM ') +CALL GSTATS_LABEL(618,'MPL','GRFIELD WAM ') +CALL GSTATS_LABEL(619,'MPL','BROADCAST IN INMARS IN WAM ') +CALL GSTATS_LABEL(620,'MPL','BROADCAST IN MPBCASTGRDFLD WAM ') +CALL GSTATS_LABEL(621,'MPL','BROADCAST IN GETSTRESS WAM ') +CALL GSTATS_LABEL(622,'MPL','SEND/RECV IN READWIND WAM ') +CALL GSTATS_LABEL(623,'MPL','SEND/RECV IN GETSPEC WAM ') +CALL GSTATS_LABEL(624,'MPL','SEND/RECV IN MPDISTRIBSCFLD WAM ') +CALL GSTATS_LABEL(625,'MPL','SCAQC REDUCE ') +CALL GSTATS_LABEL(626,'MPL','BROADCAST GETMINI ') +CALL GSTATS_LABEL(627,'MPL','ALLGATHER GETMINI2') +CALL GSTATS_LABEL(628,'MPL','BROADCAST IOSTREAM INQUIRE ') +CALL GSTATS_LABEL(629,'MPL','GATHER CLOSE IOSTREAM ') +CALL GSTATS_LABEL(630,'MPL','GATHER IOSTREAM_STATS ') +CALL GSTATS_LABEL(631,'MPL','BROADCAST IOSTREAM SPEC_IN') +CALL GSTATS_LABEL(632,'MPL','BROADCAST IOSTREAM GRID_IN') +CALL GSTATS_LABEL(633,'MPL','IOSTREAM GATH_GRID_FP') +CALL GSTATS_LABEL(634,'MPL','BROADCAST IOSTREAM GRID_IN') +CALL GSTATS_LABEL(635,'MPL','SUJBWAVELET') +CALL GSTATS_LABEL(636,'MPL','SUJBWAVALLO MATRICES') +CALL GSTATS_LABEL(637,'MPL','BROADCAST SUJBBAL') +CALL GSTATS_LABEL(638,'MPL','ECSET') +CALL GSTATS_LABEL(639,'MPL','SUJBWAVALLO BCASTS') +CALL GSTATS_LABEL(640,'MPL','SUJBWAVALLO INDXL2G') +CALL GSTATS_LABEL(647,'MPL','IFSTOWAM WAM') +CALL GSTATS_LABEL(648,'MPL','IOSTREAM READ_RECORD 2 ') +CALL GSTATS_LABEL(649,'MPL','IOSTREAM WRITE_RECORD ') +CALL GSTATS_LABEL(650,'MPL','IOSTREAM READ_RECORD ') +CALL GSTATS_LABEL(651,'MPL','DVSECTION ALLGATH ') +CALL GSTATS_LABEL(652,'MPL','DOT_PROD ALLGATH ') +CALL GSTATS_LABEL(653,'MPL','SCAT_VECT BROAD ') +CALL GSTATS_LABEL(654,'MPL','GATH_VECT GATH ') +CALL GSTATS_LABEL(655,'MPL','BROADCAST IN SUARG ') +CALL GSTATS_LABEL(656,'MPL','ALLGATHERV IN DOT_PRODUCT_CTLVEC') +CALL GSTATS_LABEL(657,'MPL','ALLGATHERV IN DOT_PRODUCT_CTLVEC') +CALL GSTATS_LABEL(658,'MPL','SIGCHECK ') +CALL GSTATS_LABEL(659,'MPL','ALLTOALL IN WRITE_SPEC ') +CALL GSTATS_LABEL(660,'MPL','BROADCAST IN INIFGER ') +CALL GSTATS_LABEL(663,'MPL','BROADCAST IN COMMJBDAT ') +CALL GSTATS_LABEL(664,'MPL','BROADCAST IN COMMJBBAL ') +CALL GSTATS_LABEL(665,'MPL','BROADCAST IN COMMFCE2 ') +CALL GSTATS_LABEL(666,'MPL','SEND and RECV IN GOM_PRINT_RMS ') +CALL GSTATS_LABEL(667,'MPL','BROADCAST IN SUECRAD ') +CALL GSTATS_LABEL(668,'MPL','GATHERCOST1 ') +CALL GSTATS_LABEL(669,'MPL','ALLREDUCE IN SUPRFFCE ') +CALL GSTATS_LABEL(670,'MPL','SCATTERV IN READ_SPEC ') +CALL GSTATS_LABEL(671,'MPL','DOT PRODUCT SPECTRAL FIELDS') +CALL GSTATS_LABEL(672,'MPL','WAMODEL MPGATHERGRDFLD ') +CALL GSTATS_LABEL(673,'MPL','WAMODEL MPGATHERSPP ') +CALL GSTATS_LABEL(674,'MPL','WAMODEL MPGATHERSCFLD ') +CALL GSTATS_LABEL(675,'MPL','WAMODEL WAMNORM ') +CALL GSTATS_LABEL(676,'MPL','WAMODEL MPEXCHNG ') +CALL GSTATS_LABEL(677,'MPL','WAMODEL MPGATHERFL ') +CALL GSTATS_LABEL(678,'MPL','WVXF2GB ') +CALL GSTATS_LABEL(679,'MPL','SUSPECG SEND 2 ') +CALL GSTATS_LABEL(680,'MPL','SUSPECG SEND 3 ') +CALL GSTATS_LABEL(681,'MPL','SUSPECG 4 ') +CALL GSTATS_LABEL(682,'MPL','SUGRIDG SEND ') +CALL GSTATS_LABEL(683,'MPL','SUGRIDG RECV ') +CALL GSTATS_LABEL(685,'MPL','SUGRIDUG 1 ') +CALL GSTATS_LABEL(686,'MPL','WAM MPFLDTOIFS ') +CALL GSTATS_LABEL(687,'MPL','SUGRIDUG 2 ') +CALL GSTATS_LABEL(688,'MPL','WROUTGPGB ') +CALL GSTATS_LABEL(689,'MPL','WROUTSPGB ') +CALL GSTATS_LABEL(690,'MPL','ALLTOALLV IN READ_SPEC ') +CALL GSTATS_LABEL(691,'MPL','MKGLOBSTAB ') +CALL GSTATS_LABEL(692,'MPL','OUTWSPEC WAM') +CALL GSTATS_LABEL(693,'MPL','OUTGRID WAM') +CALL GSTATS_LABEL(694,'MPL','MPDECOMP WAM ') +CALL GSTATS_LABEL(695,'MPL','SARINVERT GATHER WAM ') +CALL GSTATS_LABEL(696,'MPL','SARINVERT BCAST WAM') + +! barrier counter 700 ---> 800 + +CALL GSTATS_LABEL(701,'GBR','GBAR IN TRMTOS ') +CALL GSTATS_LABEL(702,'GBR','GBAR TO TIME BARRIER CALL') +CALL GSTATS_LABEL(703,'GBR','GBAR IN TRSTOM ') +CALL GSTATS_LABEL(704,'BAR','BARRIER IN GP_MODEL ') +CALL GSTATS_LABEL(705,'GBR','GBAR IN MULTISCATTER_CTLVEC') +CALL GSTATS_LABEL(706,'BAR','BARRIER IN COMMJBDAT ') +CALL GSTATS_LABEL(707,'GBR','GBAR IN IOSTREAM_MIX:IO_INQUIRE ') +CALL GSTATS_LABEL(708,'GBR','GBAR IN RDVARBC ') +CALL GSTATS_LABEL(709,'GBR','GBAR IN GOM_MESSAGE_PASSING') +CALL GSTATS_LABEL(710,'GBR','GBAR IN GOM_MESSAGE_PASING_AD') +CALL GSTATS_LABEL(711,'GBR','GBAR IOSTREAM SPEC_OUT ') +CALL GSTATS_LABEL(712,'GBR','GBAR IN GATHERCOSTO ') +CALL GSTATS_LABEL(713,'GBR','GBAR IN SUGRIDG ') +CALL GSTATS_LABEL(714,'GBR','GBAR IN SCATTER_CTLVEC') +CALL GSTATS_LABEL(715,'GBR','GBAR IN SUSPECG ') +CALL GSTATS_LABEL(716,'GBR','GBAR IN GATHER_CTLVEC') +CALL GSTATS_LABEL(718,'BAR','BARRIER IN SIGCHECK ') +CALL GSTATS_LABEL(719,'BAR','BARRIER IN OUTBS ') +CALL GSTATS_LABEL(720,'GBR','GBAR IN ALLGATHER_CTLVEC') +CALL GSTATS_LABEL(721,'BAR','BARRIER IN SUJBSTD ') +CALL GSTATS_LABEL(722,'GBR','GBAR IN ALLGATHER_CVSECTION') +CALL GSTATS_LABEL(723,'GBR','GBAR IN SUJBWAVALLO') +CALL GSTATS_LABEL(724,'GBR','GBAR IN MULTIGATHER_CTLVEC') +CALL GSTATS_LABEL(725,'GBR','GBAR IN EVCOST ') +CALL GSTATS_LABEL(726,'GBR','GBAR IN IOSTREAM_MIX:SPEC_IN') +CALL GSTATS_LABEL(727,'GBR','GBAR IN IOSTREAM_MIX:GRID_IN') +CALL GSTATS_LABEL(728,'GBR','GBAR IN IOSTREAM_MIX:GATH_GRID_FP') +CALL GSTATS_LABEL(729,'GBR','GBAR IN IOSTREAM_MIX:IOSTREAM_STATS') +CALL GSTATS_LABEL(730,'GBR','BARRIER IN EC_PHYS ') +CALL GSTATS_LABEL(731,'GBR','BARRIER IN EC_PHYS_TL ') +CALL GSTATS_LABEL(732,'GBR','BARRIER IN EC_PHYS_AD ') +CALL GSTATS_LABEL(733,'GBR','BARRIER IN WVXF2GB ') +CALL GSTATS_LABEL(734,'GBR','BARRIER IN MPFLDTOIFS ') +CALL GSTATS_LABEL(736,'GBR','BARRIER IN MPEXCHNG ') +CALL GSTATS_LABEL(737,'BAR','BARRIER IN CNT4 ') +CALL GSTATS_LABEL(738,'GBR','GBAR IN IOSTREAM WRITE_RECORD ') +CALL GSTATS_LABEL(739,'GBR','GBAR IN IOSTREAM READ_RECORD ') +CALL GSTATS_LABEL(740,'GBR','GBAR IN WROUTGPGB ') +CALL GSTATS_LABEL(741,'GBR','GBAR IN WROUTSPGB ') +CALL GSTATS_LABEL(742,'GBR','GBAR IN SUJBWAVALLO MAT') +CALL GSTATS_LABEL(745,'GBR','GBAR IN IOSTREAM CLOSE ') +CALL GSTATS_LABEL(746,'GBR','GBAR IN DOT_PRODUCT_CTLVEC ') +CALL GSTATS_LABEL(747,'GBR','GBAR IN DOT_PRODUCT_CTLVEC ') +CALL GSTATS_LABEL(748,'GBR','GBAR IN SLCOMM2 PART2 ') +CALL GSTATS_LABEL(749,'GBR','GBAR IN SLCOMM2A PART2 ') +CALL GSTATS_LABEL(750,'BAR','BARRIER IN OUTGRID WAM ') +CALL GSTATS_LABEL(752,'BAR','BARRIER IN WAMASSI ') +CALL GSTATS_LABEL(753,'BAR','BARRIER IN WAMODEL ') +CALL GSTATS_LABEL(754,'GBR','GBAR IN TASKOB ') +CALL GSTATS_LABEL(755,'GBR','GBAR IN TASKOBTL ') +CALL GSTATS_LABEL(756,'GBR','GBAR IN TASKOBAD ') +CALL GSTATS_LABEL(757,'GBR','GBAR IN SLCOMM1 ') +CALL GSTATS_LABEL(758,'GBR','GBAR IN SLCOMM2A ') +CALL GSTATS_LABEL(759,'GBR','GBAR IN SLCOMM2 PART1 ') +CALL GSTATS_LABEL(760,'GBR','GBAR IN SLCOMM ') +CALL GSTATS_LABEL(761,'GBR','GBAR IN TRGTOL ') +CALL GSTATS_LABEL(762,'GBR','GBAR IN TRLTOG ') +CALL GSTATS_LABEL(763,'GBR','GBAR IN TRLTOM ') +CALL GSTATS_LABEL(764,'GBR','GBAR IN TRMTOL ') +CALL GSTATS_LABEL(765,'GBR','GBAR IN TRMTOS ') +CALL GSTATS_LABEL(766,'GBR','GBAR IN TRSTOM ') +CALL GSTATS_LABEL(767,'GBR','GBAR IN GOM_MESSAGE_PASSING') +CALL GSTATS_LABEL(768,'GBR','GBAR IN GOM_MESSAGE_PASSING_AD') +CALL GSTATS_LABEL(769,'GBR','GBAR IN GATHERGPF ') +CALL GSTATS_LABEL(770,'GBR','GBAR IN GATHERTC ') +CALL GSTATS_LABEL(771,'GBR','GBAR IN GATHERBDY ') +CALL GSTATS_LABEL(773,'GBR','GBAR IN COMMSPNORM ') +CALL GSTATS_LABEL(774,'GBR','GBAR IN COMMJBDAT ') +CALL GSTATS_LABEL(775,'GBR','GBAR IN COMMJBBAL ') +CALL GSTATS_LABEL(776,'GBR','GBAR IN COMMFCE2 ') +CALL GSTATS_LABEL(777,'GBR','GBAR IN GATHERCOST1 ') +CALL GSTATS_LABEL(778,'GBR','GBAR IN GATHERCOST2 ') +CALL GSTATS_LABEL(779,'GBR','GBAR IN MKGLOBSTAB ') +CALL GSTATS_LABEL(780,'GBR','GBAR IN BRPTOB ') +CALL GSTATS_LABEL(781,'GBR','GBAR IN DOT_PRODUCT_CTLVEC') +CALL GSTATS_LABEL(782,'GBR','GBAR IN ODBMP-ALLREDUCE') +CALL GSTATS_LABEL(783,'BAR','BARRIER IN SUTRLE') +CALL GSTATS_LABEL(784,'BAR','BARRIER IN GATH_GRID_CTL') +CALL GSTATS_LABEL(785,'BAR','BARRIER IN GATH_SPEC_CONTROL') +CALL GSTATS_LABEL(786,'BAR','BARRIER IN DIST_GRID_CTL') +CALL GSTATS_LABEL(787,'BAR','BARRIER IN DIST_SPEC_CONTROL') +CALL GSTATS_LABEL(788,'GBR','GBAR IN GATH_SPEC_CONTROL') +CALL GSTATS_LABEL(789,'GBR','GBAR IN GATH_GRID_CTL') +CALL GSTATS_LABEL(790,'GBR','GBAR IN DIST_SPEC_CONTROL') +CALL GSTATS_LABEL(791,'GBR','GBAR IN DIST_GRID_CTL') +CALL GSTATS_LABEL(792,'BAR','BARRIER IN WRMLFP') +CALL GSTATS_LABEL(794,'BAR','BARRIER IN DRESDDH') +CALL GSTATS_LABEL(795,'GBR','BARRIER IN SUSTAONL') +CALL GSTATS_LABEL(796,'GBR','BARRIER IN IFSTOWAM') +CALL GSTATS_LABEL(797,'GBR','GBAR IN OBSHORAD') +CALL GSTATS_LABEL(798,'BAR','BARRIER IN SULEG') +CALL GSTATS_LABEL(799,'BAR','BARRIER IN POSDDH') + +! counters 800 to 900 : trans package + +CALL GSTATS_LABEL(801,'MPL','MPI IN SUTRLE_MOD ') +CALL GSTATS_LABEL(803,'MPL','TRGTOL_COMMS') +CALL GSTATS_LABEL(804,'MPL','TRGTOL_COMMS (GPNORM)') +CALL GSTATS_LABEL(805,'MPL','TRLTOG_COMMS') +CALL GSTATS_LABEL(806,'MPL','TRLTOM_COMMS') +CALL GSTATS_LABEL(807,'MPL','TRMTOL_COMMS') +CALL GSTATS_LABEL(809,'MPL','GATH_GRID_CTL_COMMS') +CALL GSTATS_LABEL(810,'MPL','GATH_SPEC_CONTROL_COMMS') +CALL GSTATS_LABEL(811,'MPL','DIST_GRID_CTL_COMMS') +CALL GSTATS_LABEL(812,'MPL','DIST_SPEC_CONTROL_COMMS') +CALL GSTATS_LABEL(813,'MPL','EVCOST') +CALL GSTATS_LABEL(814,'MPL','TRANS SUSTAONL') +CALL GSTATS_LABEL(815,'MPL','GPNORM_TRANS') +CALL GSTATS_LABEL(816,'MPL','GPNORM_TRANS') +CALL GSTATS_LABEL(817,'MPL','SUOBSCOR') +CALL GSTATS_LABEL(851,'MPL','SULEG - SUPOLF') +CALL GSTATS_LABEL(852,'MPL','SULEG - CONSTRUCT_BUTTERFLY') + +CALL GSTATS_LABEL(901,'MPL','MWAVE_PUT_TL') +CALL GSTATS_LABEL(902,'MPL','MWAVE_NEAREST') +CALL GSTATS_LABEL(903,'MPL','MWAVE_GP2OBS') +CALL GSTATS_LABEL(904,'MPL','MWAVE_OBS2GP') +CALL GSTATS_LABEL(905,'MPL','MWAVE_IGP2OBS') +CALL GSTATS_LABEL(906,'MPL','MWAVE_IOBS2GP') +CALL GSTATS_LABEL(907,'MPL','MWAVE_PUT') +CALL GSTATS_LABEL(908,'BAR','BARRIER IN MWAVE_NEAREST') +CALL GSTATS_LABEL(909,'BAR','BARRIER IN MWAVE_GP2OBS') +CALL GSTATS_LABEL(910,'BAR','BARRIER IN MWAVE_OBS2GP') +CALL GSTATS_LABEL(911,'BAR','BARRIER IN MWAVE_IGP2OBS') +CALL GSTATS_LABEL(912,'BAR','BARRIER IN MWAVE_IOBS2GP') + +CALL GSTATS_LABEL(920,'MPL','BCAST IN BCASTCOV') +CALL GSTATS_LABEL(921,'GBR','GBAR IN BCASTCOV') + +! OMP section +! oct-03 the following OMP gstats statements has not been checked +! 1014, 1015, 1045, 1053, 1072, 1078, 1080, 1082, 1134, 1170 +! 1201, 1214, 1215, 1219, 1412, 1424 +! 1607, 1608, 1610, 1611, 1543, 1644, 1651 + +CALL GSTATS_LABEL(1001,'OMP','PHYSICS ') +CALL GSTATS_LABEL(1002,'OMP','PHYSICS CLDPP T/S') +CALL GSTATS_LABEL(1004,'OMP','CALL_SL 1') +CALL GSTATS_LABEL(1005,'OMP','CALL_SL 2') +CALL GSTATS_LABEL(1006,'OMP','GP_MODEL_TL 1') +CALL GSTATS_LABEL(1007,'OMP','GP_MODEL_TL 2') +CALL GSTATS_LABEL(1008,'OMP','GP_MODEL_TL 3') +CALL GSTATS_LABEL(1009,'OMP','CALL_SL_TL 1') +CALL GSTATS_LABEL(1010,'OMP','CALL_SL_TL 2') +CALL GSTATS_LABEL(1011,'OMP','GP_MODEL_AD 1') +CALL GSTATS_LABEL(1012,'OMP','GP_MODEL_AD 2') +CALL GSTATS_LABEL(1013,'OMP','EC_PHYS_DRV_AD 3') +CALL GSTATS_LABEL(1014,'OMP','GP_MODEL_AD 4') +CALL GSTATS_LABEL(1015,'OMP','GP_MODEL_AD 5') +CALL GSTATS_LABEL(1016,'OMP','GP_MODEL_AD 6') +CALL GSTATS_LABEL(1017,'OMP','CALL_SL_AD 1') +CALL GSTATS_LABEL(1018,'OMP','CALL_SL_AD 2') +CALL GSTATS_LABEL(1019,'OMP','MKGLOBSTAB ') +CALL GSTATS_LABEL(1020,'OMP','SLINT LAID*IOBS ') +CALL GSTATS_LABEL(1021,'OMP','LAIDLIC ') +CALL GSTATS_LABEL(1022,'OMP','SPCM 1') +CALL GSTATS_LABEL(1023,'OMP','SPCM 2') +CALL GSTATS_LABEL(1024,'OMP','SPCM 3') +CALL GSTATS_LABEL(1025,'OMP','CPG ') +CALL GSTATS_LABEL(1028,'OMP','SPCIMPFSOLVE ') +CALL GSTATS_LABEL(1029,'OMP','SPCMAD 1') +CALL GSTATS_LABEL(1030,'OMP','SPCMAD 2') +CALL GSTATS_LABEL(1031,'OMP','SPCMAD 3') +CALL GSTATS_LABEL(1032,'OMP','SCAN2M GFL ') +CALL GSTATS_LABEL(1033,'OMP','SCAN2M 2') +CALL GSTATS_LABEL(1034,'OMP','SCAN2M COBS ') +CALL GSTATS_LABEL(1035,'OMP','SPCHORAD ') +CALL GSTATS_LABEL(1036,'OMP','TRANSDIR_MDL ') +CALL GSTATS_LABEL(1037,'OMP','SPCHOR ') +CALL GSTATS_LABEL(1038,'OMP','SPECRT GPRCP') +CALL GSTATS_LABEL(1039,'OMP','SPCIMPFSOLVEAD') +CALL GSTATS_LABEL(1040,'OMP','GP_MODEL CPGLAG') +CALL GSTATS_LABEL(1041,'OMP','SCAN2MTL 1') +CALL GSTATS_LABEL(1042,'OMP','SCAN2MAD 1') +CALL GSTATS_LABEL(1043,'OMP','SCAN2MAD 2') +CALL GSTATS_LABEL(1044,'OMP','SCAN2MAD 3') +CALL GSTATS_LABEL(1045,'OMP','SCAN2MAD 4') +CALL GSTATS_LABEL(1046,'OMP','SCAN2MTL 2') +CALL GSTATS_LABEL(1047,'OMP','SCAN2MAD 5') +CALL GSTATS_LABEL(1048,'OMP','SPNORMBM 1') +CALL GSTATS_LABEL(1049,'OMP','CVARGPAD ') +CALL GSTATS_LABEL(1050,'OMP','TRAJ_MAIN_MOD 1') +CALL GSTATS_LABEL(1051,'OMP','TRAJ_MAIN_MOD 2') +CALL GSTATS_LABEL(1052,'OMP','TRAJ_MAIN_MOD 3') +CALL GSTATS_LABEL(1053,'OMP','TRAJ_MAIN_MOD 4') +CALL GSTATS_LABEL(1054,'OMP','PACK_LOC_MS 1') +CALL GSTATS_LABEL(1055,'OMP','READ_GRID_TRAJ 1') +CALL GSTATS_LABEL(1056,'OMP','READ_GRID_TRAJ 2') +CALL GSTATS_LABEL(1057,'OMP','READ_TRAJ_SPEC 1') +CALL GSTATS_LABEL(1058,'OMP','READ_TRAJ_SPEC 2') +CALL GSTATS_LABEL(1059,'OMP','READ_TRAJ_SPEC 3') +CALL GSTATS_LABEL(1060,'OMP','TASKOB 1') +CALL GSTATS_LABEL(1061,'OMP','TASKOBTL 1') +CALL GSTATS_LABEL(1062,'OMP','TASKOBAD 1') +CALL GSTATS_LABEL(1063,'OMP','GPMKTEND ') +CALL GSTATS_LABEL(1064,'OMP','CPQTUV ') +CALL GSTATS_LABEL(1065,'OMP','IN-COAPHY ') +CALL GSTATS_LABEL(1066,'OMP','OUT-COAPHY ') +CALL GSTATS_LABEL(1067,'OMP','TASKOBAD INIT') +CALL GSTATS_LABEL(1068,'OMP','TASKOB INIT') +CALL GSTATS_LABEL(1082,'OMP','SCREEN ') +CALL GSTATS_LABEL(1085,'OMP','LAIDLIOBSAD ') +CALL GSTATS_LABEL(1090,'OMP','BALVERT ') +CALL GSTATS_LABEL(1092,'OMP','BALVERTAD ') +CALL GSTATS_LABEL(1094,'OMP','DIGFIL ') +CALL GSTATS_LABEL(1096,'OMP','JGHCORI ') +CALL GSTATS_LABEL(1097,'OMP','JGVCORI ') +CALL GSTATS_LABEL(1098,'OMP','NABLA GP_DERS') +CALL GSTATS_LABEL(1099,'OMP','COPY 1 GP_DERS') +CALL GSTATS_LABEL(1100,'OMP','COPY 2 GP_DERS') +CALL GSTATS_LABEL(1109,'OMP','TRSTOM 1') +CALL GSTATS_LABEL(1110,'OMP','TRSTOM 2') +CALL GSTATS_LABEL(1111,'OMP','TRSTOM 3') +CALL GSTATS_LABEL(1112,'OMP','TRMTOS 1') +CALL GSTATS_LABEL(1113,'OMP','TRMTOS 2') +CALL GSTATS_LABEL(1114,'OMP','TRMTOS 3') +CALL GSTATS_LABEL(1115,'OMP','SLCOMM 1') +CALL GSTATS_LABEL(1116,'OMP','SLCOMM 2') +CALL GSTATS_LABEL(1118,'OMP','SLCOMM2a 1') +CALL GSTATS_LABEL(1119,'OMP','SLCOMM1 1') +CALL GSTATS_LABEL(1120,'OMP','SLCOMM1 2') +CALL GSTATS_LABEL(1121,'OMP','SLCOMM2a 2') +CALL GSTATS_LABEL(1132,'OMP','SLEXTPOLAD ') +CALL GSTATS_LABEL(1133,'OMP','SLEXTPOL ') +CALL GSTATS_LABEL(1135,'OMP','GOM_MSG_PASS_AD') +CALL GSTATS_LABEL(1136,'OMP','GOM_CONSTRUCT_OBS') +CALL GSTATS_LABEL(1140,'OMP','DRESDDH ') +CALL GSTATS_LABEL(1141,'OMP','SUTRLE ') +CALL GSTATS_LABEL(1170,'OMP','DIST_VECTOR 1') + +CALL GSTATS_LABEL(1201,'OMP','RADDRV 1') +CALL GSTATS_LABEL(1202,'OMP','RADDRV 2') +CALL GSTATS_LABEL(1203,'OMP','RADDRV 3') +CALL GSTATS_LABEL(1205,'OMP','RADINTG-INPUT LOOP1') +CALL GSTATS_LABEL(1206,'OMP','RADINTG-INPUT LOOP2') +CALL GSTATS_LABEL(1207,'OMP','RADINTG-INPUT LOOP3') +CALL GSTATS_LABEL(1208,'OMP','RADINTG-INPUT LOOP4') +CALL GSTATS_LABEL(1209,'OMP','RADINTG-INPUT LOOP5') +CALL GSTATS_LABEL(1210,'OMP','RADINTG-RADLSW') +CALL GSTATS_LABEL(1211,'OMP','RADINTG-OUTPUT LOOP1') +CALL GSTATS_LABEL(1212,'OMP','RADINTG-OUTPUT LOOP2') +CALL GSTATS_LABEL(1213,'OMP','RADINTG-OUTPUT LOOP3') +CALL GSTATS_LABEL(1214,'OMP','COPY_SPEC_2SPA 1') +CALL GSTATS_LABEL(1215,'OMP','COPY_SPEC_2SPA 2') +CALL GSTATS_LABEL(1216,'OMP','GPNORM2') +CALL GSTATS_LABEL(1217,'OMP','SUALSPA 1') +CALL GSTATS_LABEL(1219,'OMP','SUALSPA 3') +CALL GSTATS_LABEL(1220,'OMP','SL2_PACK') +CALL GSTATS_LABEL(1221,'OMP','SL2_UNPACK') +CALL GSTATS_LABEL(1222,'OMP','GP_MODEL_AD INIT ') +CALL GSTATS_LABEL(1223,'OMP','GPNORM3') +CALL GSTATS_LABEL(1224,'OMP','COPYGOM5T0 ') +CALL GSTATS_LABEL(1225,'OMP','BALNONLINTL ') +CALL GSTATS_LABEL(1226,'OMP','BALNONLINAD ') +CALL GSTATS_LABEL(1227,'OMP','GP_MODEL_TL INIT ') +CALL GSTATS_LABEL(1228,'OMP','GP_MODEL INIT ') +CALL GSTATS_LABEL(1230,'OMP','PRECOND ') +CALL GSTATS_LABEL(1231,'OMP','CONGRAD OMP') +CALL GSTATS_LABEL(1232,'OMP','DOT_PRODUCT_CTLVEC OMP 1') +CALL GSTATS_LABEL(1233,'OMP','CONGRAD OMP 1') +CALL GSTATS_LABEL(1234,'OMP','DOT_PRODUCT_CTLVEC OMP 2') +CALL GSTATS_LABEL(1235,'OMP','PREPPCM ') +CALL GSTATS_LABEL(1236,'OMP','WAM OMP ') +CALL GSTATS_LABEL(1237,'OMP','MKCMARPL OMP') +CALL GSTATS_LABEL(1238,'OMP','XFORMEV') +CALL GSTATS_LABEL(1239,'OMP','WREVECS') + +CALL GSTATS_LABEL(1251,'OMP','SULEG - SUPOLF') +CALL GSTATS_LABEL(1252,'OMP','SULEG - CONSTRUCT_BUTTERFLY') +CALL GSTATS_LABEL(1253,'OMP','CONSTRUCT BUTTERFLY ') + +CALL GSTATS_LABEL(1401,'OMP','SETUP GAWLM') +CALL GSTATS_LABEL(1403,'OMP','SETUP MODGRIN 2') +CALL GSTATS_LABEL(1404,'OMP','SETUP MODGRIN 3') +CALL GSTATS_LABEL(1405,'OMP','SETUP SUGEM2 1') +CALL GSTATS_LABEL(1406,'OMP','SETUP SUGEM2 2') +CALL GSTATS_LABEL(1407,'OMP','SETUP SUGRIDF 1') +CALL GSTATS_LABEL(1408,'OMP','SETUP SUGRIDF 2') +CALL GSTATS_LABEL(1409,'OMP','SETUP SUGRIDG X') +CALL GSTATS_LABEL(1411,'OMP','SETUP SUGRIDUG 2') +CALL GSTATS_LABEL(1412,'OMP','SETUP SURAND2') +CALL GSTATS_LABEL(1424,'OMP','SETUP SUSPECG 2') +CALL GSTATS_LABEL(1425,'OMP','WVXF2GB ') +CALL GSTATS_LABEL(1426,'OMP','WVRG2XF ') +CALL GSTATS_LABEL(1427,'OMP','GPNORM1 ') +CALL GSTATS_LABEL(1428,'OMP','GATHERGPF ') +CALL GSTATS_LABEL(1429,'OMP','GPNORM_TRANS ') +CALL GSTATS_LABEL(1430,'OMP','WAMODEL 1') +CALL GSTATS_LABEL(1431,'OMP','WAMODEL 2') +CALL GSTATS_LABEL(1432,'OMP','WAMODEL 3') +CALL GSTATS_LABEL(1433,'OMP','SPEC2FDB WAM ') +CALL GSTATS_LABEL(1434,'OMP','SCAN2M HPOS ') +CALL GSTATS_LABEL(1435,'OMP','SCAN2M VPOS ') +CALL GSTATS_LABEL(1436,'OMP','IFSTOWAM 1 ') +CALL GSTATS_LABEL(1437,'OMP','IFSTOWAM 2 ') +CALL GSTATS_LABEL(1438,'OMP','WRITESD ') +CALL GSTATS_LABEL(1439,'OMP','WAMODEL 4') +CALL GSTATS_LABEL(1440,'OMP','GP_NEAREST') +CALL GSTATS_LABEL(1441,'OMP','JBVCOR_WAVELETIN ') +CALL GSTATS_LABEL(1442,'OMP','JBVCOR_WAVELETINAD ') +CALL GSTATS_LABEL(1443,'OMP','WAVXFORM ') +CALL GSTATS_LABEL(1444,'OMP','BGVECS ') +CALL GSTATS_LABEL(1445,'OMP','CVAR2IN ') +CALL GSTATS_LABEL(1446,'OMP','CVARBCIN ') +CALL GSTATS_LABEL(1447,'OMP','CVARBCINAD ') +CALL GSTATS_LABEL(1448,'OMP','CVAR2INAD ') +CALL GSTATS_LABEL(1450,'OMP','TRANSDIR_WAVELET ') +CALL GSTATS_LABEL(1451,'OMP','TRANSDIR_WAVELETAD ') +CALL GSTATS_LABEL(1452,'OMP','TRANSINV_WAVELET ') +CALL GSTATS_LABEL(1453,'OMP','TRANSINV_WAVELETAD ') +CALL GSTATS_LABEL(1454,'OMP','JBTOMODEL ') +CALL GSTATS_LABEL(1455,'OMP','JBTOMODELAD ') +CALL GSTATS_LABEL(1456,'OMP','SUJQCOR ') +CALL GSTATS_LABEL(1460,'OMP','assign_cv_cv ') +CALL GSTATS_LABEL(1461,'OMP','CVARGPTL ') +CALL GSTATS_LABEL(1465,'OMP','STORE_TRAJ_MAIN') +CALL GSTATS_LABEL(1470,'OMP','GRID_BICONSERV ') +CALL GSTATS_LABEL(1471,'OMP','GRID_BICUBIC ') +CALL GSTATS_LABEL(1472,'OMP','GRID_BILINEAR ') +CALL GSTATS_LABEL(1475,'OMP','READ_SPEC ') +CALL GSTATS_LABEL(1476,'OMP','WRITE_SPEC ') +CALL GSTATS_LABEL(1477,'OMP','IOSTREAM_MIX: COPYGP_GLOB2LOC ') +CALL GSTATS_LABEL(1478,'OMP','IOSTREAM_MIX: COPYGP_LOC2GLOB ') +CALL GSTATS_LABEL(1479,'OMP','IOSTREAM_MIX: SPEC_OUT ') +CALL GSTATS_LABEL(1480,'OMP','IOSTREAM_MIX: SPEC_IN ') +CALL GSTATS_LABEL(1481,'OMP','IOSTREAM_MIX: GRID_IN ') +CALL GSTATS_LABEL(1482,'OMP','IOSTREAM_MIX: GATH_GRID_FP ') +CALL GSTATS_LABEL(1483,'OMP','IOSTREAM_MIX: WRITE_RECORD ') +CALL GSTATS_LABEL(1484,'OMP','IOSTREAM_MIX: GRIB_API ENC') +CALL GSTATS_LABEL(1490,'OMP','MWAVE_NEAREST') +CALL GSTATS_LABEL(1491,'OMP','WAM CLOSEND') +CALL GSTATS_LABEL(1492,'OMP','WAM NEWWIND') +CALL GSTATS_LABEL(1493,'OMP','WAM NOTIM') +CALL GSTATS_LABEL(1494,'OMP','WAM TIMIN') +CALL GSTATS_LABEL(1495,'OMP','WAM WGRIBOUT') +CALL GSTATS_LABEL(1496,'OMP','WAM OUTWSPEC') +CALL GSTATS_LABEL(1497,'OMP','WAM MPDECOMP') +CALL GSTATS_LABEL(1501,'OMP','WAM INIT_FIELDG') +CALL GSTATS_LABEL(1502,'OMP','WAM OUTBS') +CALL GSTATS_LABEL(1503,'OMP','WAM MPFLDTOIFS') + +! counters 1600 to 1700 : trans package for OMP + +CALL GSTATS_LABEL(1601,'OMP','TRGTOL LOCAL ') +CALL GSTATS_LABEL(1602,'OMP','TRGTOL PACK ') +CALL GSTATS_LABEL(1603,'OMP','TRGTOL UNPACK') +CALL GSTATS_LABEL(1604,'OMP','TRLTOG LOCAL ') +CALL GSTATS_LABEL(1605,'OMP','TRLTOG PACK ') +CALL GSTATS_LABEL(1606,'OMP','TRLTOG UNPACK') +CALL GSTATS_LABEL(1607,'OMP','TRLTOM ') +CALL GSTATS_LABEL(1608,'OMP','TRMTOL ') +CALL GSTATS_LABEL(1639,'OMP','FTINV_CTL ') +CALL GSTATS_LABEL(1640,'OMP','FTDIR_CTL ') +CALL GSTATS_LABEL(1641,'OMP','FTINV_CTLAD ') +CALL GSTATS_LABEL(1642,'OMP','FTDIR_CTLAD ') +CALL GSTATS_LABEL(1643,'OMP','GATH_GRID_CTL ') +CALL GSTATS_LABEL(1644,'OMP','GATH_SPEC_CONTROL ') +CALL GSTATS_LABEL(1645,'OMP','LTDIR_CTL - DIRECT LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(1646,'OMP','LTDIR_CTLAD - ADJ. DIRECT LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(1647,'OMP','LTINV_CTL - INVERSE LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(1648,'OMP','LTINV_CTLAD - ADJ. INVERSE LEGENDRE TRANSFORM') +CALL GSTATS_LABEL(1650,'OMP','SUGAW_MOD ') +CALL GSTATS_LABEL(1651,'OMP','SPNORMD ') +CALL GSTATS_LABEL(1655,'OMP','SPCSI LDONEM=F SIGAM') +CALL GSTATS_LABEL(1656,'OMP','SPCSI LDONEM=F') +CALL GSTATS_LABEL(1657,'OMP','SPCSI LDONEM=F SITNU') +CALL GSTATS_LABEL(1658,'OMP','SPCSIAD LDONEM=F SIGAMAD') +CALL GSTATS_LABEL(1659,'OMP','SPCSIAD LDONEM=F') +CALL GSTATS_LABEL(1660,'OMP','SPCSI LDONEM=F MXMAOP') +CALL GSTATS_LABEL(1661,'OMP','WVCOUPLE Triple loop') +CALL GSTATS_LABEL(1662,'OMP','SPCSIAD LDONEM=F SITNUAD') +CALL GSTATS_LABEL(1663,'OMP','DIST_GRID_CTL ') +CALL GSTATS_LABEL(1664,'OMP','SPCSIAD LDONEM=F MXMAOP') + +!SERIAL PART + +! IO counter 1700 ---> 1800 +CALL GSTATS_LABEL(1705,'IO-','SUJBDAT ') +CALL GSTATS_LABEL(1706,'IO-','SUJBBAL ') +CALL GSTATS_LABEL(1709,'IO-','WGRIBOUT WAM') +CALL GSTATS_LABEL(1710,'IO-','CNT4 IFLUSHFDB') +CALL GSTATS_LABEL(1711,'IO-','WRVARBC') +CALL GSTATS_LABEL(1712,'IO-','RDVARBC') +CALL GSTATS_LABEL(1713,'IO-','WRITESD FDB ') +CALL GSTATS_LABEL(1714,'SER','WVXF2GB') +CALL GSTATS_LABEL(1737,'IO-','SUOBS GET_RS_T_BIAS') +CALL GSTATS_LABEL(1738,'IO-','CNT4TL PPCLOSE ') +CALL GSTATS_LABEL(1739,'IO-','DB IN OBSHOR ') +CALL GSTATS_LABEL(1740,'IO-','READWIND WAM ') +CALL GSTATS_LABEL(1741,'IO-','DB IN SUOBSCOR ') +CALL GSTATS_LABEL(1742,'SER','PREGPFPOS ') +CALL GSTATS_LABEL(1752,'IO-','OPEN IN IOSTREAM ') +CALL GSTATS_LABEL(1753,'IO-','CLOSE IN IOSTREAM ') +CALL GSTATS_LABEL(1761,'SER','ECSET ') +CALL GSTATS_LABEL(1764,'IO-','WRITE IN IOSTREAM ') +CALL GSTATS_LABEL(1765,'IO-','READ IN IOSTREAM ') +CALL GSTATS_LABEL(1766,'IO-','RTSETUP I/O') +CALL GSTATS_LABEL(1767,'IO-','DB IN RD_OBS_BOXES ') +CALL GSTATS_LABEL(1770,'IO-','READT WAM I/O ') +CALL GSTATS_LABEL(1771,'IO-','MPDECOMP WAM I/O ') +CALL GSTATS_LABEL(1772,'IO-','PBGRIB IN GETSPEC ') +CALL GSTATS_LABEL(1773,'IO-','SHUFFLE_ODB I/O ') +CALL GSTATS_LABEL(1774,'IO-','READOBA distribute_odb ') +CALL GSTATS_LABEL(1775,'IO-','WRITEOBA revmatchupdb ') +CALL GSTATS_LABEL(1776,'IO-','PRESPFPOS ') +CALL GSTATS_LABEL(1777,'IO-','READOBA matchupdb ') +CALL GSTATS_LABEL(1778,'IO-','READOBA grid_nearest ') +CALL GSTATS_LABEL(1783,'IO-','DB IN MKGLOBSTAB ') +CALL GSTATS_LABEL(1784,'IO-','DB IN SURAD ') +CALL GSTATS_LABEL(1785,'IO-','DB IN SUAMV ') +CALL GSTATS_LABEL(1786,'IO-','SARINVERT WRITE ') +CALL GSTATS_LABEL(1787,'IO-','ICLOSEFDB in WAM ') +CALL GSTATS_LABEL(1788,'IO-','DB IN ECSET ') +CALL GSTATS_LABEL(1789,'IO-','DB IN SCREEN ') +CALL GSTATS_LABEL(1790,'IO-','DB IN SCREEN ') +CALL GSTATS_LABEL(1791,'IO-','DB in READOBA ') +CALL GSTATS_LABEL(1792,'IO-','DB in WRITEOBA ') +CALL GSTATS_LABEL(1793,'IO-','DB in OBATABS ') +CALL GSTATS_LABEL(1796,'IO-','DB in SUREO3 ') +CALL GSTATS_LABEL(1797,'IO-','DB in SULIMB ') +CALL GSTATS_LABEL(1798,'IO-','SUJQDATA ') +CALL GSTATS_LABEL(1799,'IO-','DB in SUOBAREA ') +! counters 1800 to 1810 : trans package for serial part +CALL GSTATS_LABEL(1801,'SER','SULEG ') +CALL GSTATS_LABEL(1802,'SER','SETUP_TRANS ') +CALL GSTATS_LABEL(1803,'SER','SUTRLE ') +CALL GSTATS_LABEL(1804,'SER','DIST_SPEC_CONTROL_SERIAL') +CALL GSTATS_LABEL(1805,'SER','TRGTOL init') +CALL GSTATS_LABEL(1806,'SER','TRLTOG init') +CALL GSTATS_LABEL(1807,'SER','INV_TRANS init') +CALL GSTATS_LABEL(1808,'SER','DIR_TRANS init') +CALL GSTATS_LABEL(1809,'SER','INV_TRANSAD init') +CALL GSTATS_LABEL(1810,'SER','DIR_TRANSAD init') +CALL GSTATS_LABEL(1811,'SER','SUPHEC ') +CALL GSTATS_LABEL(1812,'SER','EVJCDFI array ') +CALL GSTATS_LABEL(1813,'SER','SLCOMM2a ') +CALL GSTATS_LABEL(1815,'SER','SLEXTPOLAD array ') +CALL GSTATS_LABEL(1817,'SER','SUVARBC ') +CALL GSTATS_LABEL(1818,'SER','SUECRAD ') +CALL GSTATS_LABEL(1819,'SER','COMMJBDAT array ') +CALL GSTATS_LABEL(1820,'SER','COMMJBDAT zbuf ') +CALL GSTATS_LABEL(1821,'SER','COMMJBBAL zbuf ') +CALL GSTATS_LABEL(1822,'SER','COMMJBBAL array ') +CALL GSTATS_LABEL(1823,'SER','TRANSDIR_MDLAD array ') +CALL GSTATS_LABEL(1824,'SER','COMTC doloops ') +CALL GSTATS_LABEL(1825,'SER','GATHERTC array ') +CALL GSTATS_LABEL(1826,'SER','GATHERCOSTO array ') +CALL GSTATS_LABEL(1827,'SER','TRANSINV_MD 1') +CALL GSTATS_LABEL(1828,'SER','TRANSINV_MD 2') +CALL GSTATS_LABEL(1829,'SER','PRE_PRSTA in SCREEN ') +CALL GSTATS_LABEL(1830,'SER','SUSCAL ') +CALL GSTATS_LABEL(1831,'SER','COMTC array ') +CALL GSTATS_LABEL(1832,'SER','CNT4AD OBSPREP ') +CALL GSTATS_LABEL(1833,'SER','SETUP SUSPECG ') +CALL GSTATS_LABEL(1834,'SER','SLEXTPOL1 doloop ') +CALL GSTATS_LABEL(1835,'SER','BALOMEGATL zeroing ') +CALL GSTATS_LABEL(1836,'SER','BALOMEGATL do loop ') +CALL GSTATS_LABEL(1837,'SER','BALOMEGATL array ') +CALL GSTATS_LABEL(1838,'SER','CAIN ') +CALL GSTATS_LABEL(1839,'SER','GETMINI array ') +CALL GSTATS_LABEL(1840,'SER','BALOMEGATL arrays ') +CALL GSTATS_LABEL(1841,'SER','BALOMEGAAD arrays 1 ') +CALL GSTATS_LABEL(1842,'SER','BALOMEGAAD arrays 2 ') +CALL GSTATS_LABEL(1843,'SER','WRITE_SPEC ') +CALL GSTATS_LABEL(1844,'SER','CVAR2IN ADDBGS ') +CALL GSTATS_LABEL(1845,'SER','CNT4 SIGMASTER ') +CALL GSTATS_LABEL(1846,'SER','SCATTER_CTLVEC pack') +CALL GSTATS_LABEL(1847,'SER','MULTISCATTER_CTLVEC pack') +CALL GSTATS_LABEL(1849,'SER','THIN_RED_PRESORT ') +CALL GSTATS_LABEL(1850,'SER','COPY_SPEC_2SPA arrays ') +CALL GSTATS_LABEL(1852,'SER','NEW_THINNER_NO_SQ doloop ') +CALL GSTATS_LABEL(1853,'SER','NEW_THINNER_NO_SQ KEYSORT ') +CALL GSTATS_LABEL(1856,'SER','SLINT setup ') +CALL GSTATS_LABEL(1857,'SER','GATHER_CTLVEC unpack ') +CALL GSTATS_LABEL(1858,'SER','READGRIB array ') +CALL GSTATS_LABEL(1859,'SER','MULTIGATHER_CTLVEC unpack ') +CALL GSTATS_LABEL(1860,'SER','SCAN2MTL Gaussian dist. ') +CALL GSTATS_LABEL(1861,'SER','OBSHORAD ') +CALL GSTATS_LABEL(1863,'SER','SBSFGS array ') +CALL GSTATS_LABEL(1864,'SER','GATHERTC doloop ') +CALL GSTATS_LABEL(1866,'SER','PRECH doloop') +CALL GSTATS_LABEL(1868,'SER','SUHIFCEAD') +CALL GSTATS_LABEL(1869,'SER','SLINTAD ') +CALL GSTATS_LABEL(1871,'SER','ESTSIG array ') +CALL GSTATS_LABEL(1872,'SER','TRANSDIR_MDLAD ') +CALL GSTATS_LABEL(1873,'SER','BALNONLINAD DOLOOP') +CALL GSTATS_LABEL(1875,'SER','SUGRIDUG array ') +CALL GSTATS_LABEL(1876,'SER','BCASTCOV PACK/UNPACK ') +CALL GSTATS_LABEL(1877,'SER','MKGLOBSTAB') +CALL GSTATS_LABEL(1878,'SER','IOSTREAM GRIB_API ENC') +CALL GSTATS_LABEL(1879,'SER','IOSTREAM GRIB_API DEC') +CALL GSTATS_LABEL(1880,'SER','SUOBS STAGE 1') +CALL GSTATS_LABEL(1881,'SER','ORCVGPF doloop ') +CALL GSTATS_LABEL(1883,'SER','SARAS WAM ') +CALL GSTATS_LABEL(1884,'SER','SARINVERT WAM ') +CALL GSTATS_LABEL(1886,'SER','GETSPEC WAM ') +CALL GSTATS_LABEL(1887,'SER','ESTSIG doloop ') +CALL GSTATS_LABEL(1888,'SER','GETSPEC WAM INQUIRE ') +CALL GSTATS_LABEL(1889,'SER','stepotl user_clock ') +CALL GSTATS_LABEL(1890,'SER','BRPTOB array1 ') +CALL GSTATS_LABEL(1891,'SER','BRPTOB array2 ') +CALL GSTATS_LABEL(1901,'SER','CONSTRUCT BUTTERFLY ') + +!CALL GSTATS_LABEL(1903,'SER','CNT4 IFLUSHFDB') +CALL GSTATS_LABEL(1904,'SER','UPDTIM setup ') +CALL GSTATS_LABEL(1905,'SER','UPDTIM update ') +CALL GSTATS_LABEL(1906,'SER','RADDIAG in RADDRV ') +CALL GSTATS_LABEL(1909,'SER','WAMODEL ') +CALL GSTATS_LABEL(1911,'SER','FPOSHOR doloop ') +CALL GSTATS_LABEL(1912,'SER','PPSYDH ') +CALL GSTATS_LABEL(1913,'SER','PPEDDHEC ') +CALL GSTATS_LABEL(1916,'SER','WROUTGPGB reorder ') +CALL GSTATS_LABEL(1917,'SER','WRPLFP ') +CALL GSTATS_LABEL(1918,'SER','CONGRAD ') +CALL GSTATS_LABEL(1924,'SER','WVCOUPLE WVALLOC ') +CALL GSTATS_LABEL(1925,'SER','GPNORM1 doloop ') +CALL GSTATS_LABEL(1928,'SER','SUJBCOR 2 ') +CALL GSTATS_LABEL(1929,'SER','SUJBCOR 3 ') +CALL GSTATS_LABEL(1930,'SER','OPDIS ') +CALL GSTATS_LABEL(1931,'SER','COMMSPNORM DOLOOP ') +CALL GSTATS_LABEL(1933,'SER','DOT_PRODUCT ') +CALL GSTATS_LABEL(1934,'SER','SU0YOMA ') +CALL GSTATS_LABEL(1935,'SER','OBADAT ') +CALL GSTATS_LABEL(1936,'SER','SUVAZX VARBC ') +CALL GSTATS_LABEL(1937,'SER','LAID*IOBS IN SLINT ') +CALL GSTATS_LABEL(1938,'SER','STORE_TRAJ_MAIN ') +CALL GSTATS_LABEL(1939,'SER','SUSC2B') +CALL GSTATS_LABEL(1940,'SER','CNT4 SMS ') +CALL GSTATS_LABEL(1941,'SER','CNT4 file') +CALL GSTATS_LABEL(1942,'SER','pack_loc_tovs ') +CALL GSTATS_LABEL(1943,'SER','GRID_BICONSERV ') +CALL GSTATS_LABEL(1944,'SER','GRID_BICUBIC ') +CALL GSTATS_LABEL(1945,'SER','GRID_BILINEAR ') +CALL GSTATS_LABEL(1946,'SER','WRITE_GRID_GRIB doloop ') +CALL GSTATS_LABEL(1948,'SER','ECSET SETTC') +CALL GSTATS_LABEL(1949,'SER','ECSET ') +CALL GSTATS_LABEL(1950,'SER','SPNORMAVE array') +CALL GSTATS_LABEL(1952,'SER','WRMLFP do1') +CALL GSTATS_LABEL(1953,'SER','WRMLFP do2') +CALL GSTATS_LABEL(1954,'SER','SURAD SUADVAR ') +CALL GSTATS_LABEL(1956,'SER','copy_spa_spec ') +CALL GSTATS_LABEL(1957,'SER','MKGLOBSTAB KEYSORT ') +CALL GSTATS_LABEL(1958,'SER','BGVECS + Gaussian dist') +CALL GSTATS_LABEL(1959,'SER','PREPPCM ') +CALL GSTATS_LABEL(1960,'SER','DOT_PRODUCT_CTLVEC BROADCREAL ') +CALL GSTATS_LABEL(1961,'SER','EVCOST SUM ') +CALL GSTATS_LABEL(1962,'SER','DOT_PRODUCT_CTLVEC 1 ') +CALL GSTATS_LABEL(1963,'SER','DOT_PRODUCT_CTLVEC 2 ') +CALL GSTATS_LABEL(1965,'SER','DECIS IN LOOP3 ') +CALL GSTATS_LABEL(1966,'SER','DECIS DUPLI ') +CALL GSTATS_LABEL(1967,'SER','REDUN 1 ') +CALL GSTATS_LABEL(1969,'SER','REDUN 4 ') +CALL GSTATS_LABEL(1973,'SER','CNT2 ') +CALL GSTATS_LABEL(1974,'SER','SUOBS SU_ERRORS ') +CALL GSTATS_LABEL(1976,'SER','IFLUSHFDBSUBS WAM ') +CALL GSTATS_LABEL(1977,'SER','SU_GRIB_API ') +CALL GSTATS_LABEL(1978,'SER','VARBC_SETUP_TRAJ') +CALL GSTATS_LABEL(1979,'SER','VARBC_SETUP_MIN') +CALL GSTATS_LABEL(1980,'SER','STEPO ') +CALL GSTATS_LABEL(1984,'SER','GETCURR WAM ') +CALL GSTATS_LABEL(1985,'SER','in GP_MODEL_AD 7 ') +CALL GSTATS_LABEL(1987,'SER','SPEC2FDB WAM ') + +!section serial but possible to put OMP +! timing < 1% +CALL GSTATS_LABEL(1892,'SER','MPEXCHNG WAM doloop1 ') +CALL GSTATS_LABEL(1893,'SER','MPEXCHNG WAM doloop2 ') +CALL GSTATS_LABEL(1894,'SER','TRANSINV_MDL') +CALL GSTATS_LABEL(1895,'SER','TRAJ_MAIN_MOD 3') +CALL GSTATS_LABEL(1896,'SER','SCAN2MTL 3') +CALL GSTATS_LABEL(1898,'SER','DIGFILAD ') +CALL GSTATS_LABEL(1899,'SER','BALOMEGAAD do ') +CALL GSTATS_LABEL(1986,'SER','CVARGPTL ') +CALL GSTATS_LABEL(1988,'SER','GEFGER ') +CALL GSTATS_LABEL(1989,'SER','GRID_BILINEAR 2 ') +CALL GSTATS_LABEL(1991,'SER','SUJBCOSU ') +CALL GSTATS_LABEL(1992,'SER','TRANSDIR_FP ') +CALL GSTATS_LABEL(1993,'SER','WVWG2RG ') +CALL GSTATS_LABEL(1994,'SER','WVCOUPLE 3') +CALL GSTATS_LABEL(1995,'SER','ESTSIG ') +CALL GSTATS_LABEL(1996,'SER','MAKEGRID WAM ') +CALL GSTATS_LABEL(1997,'SER','WROUTSPGB ') +CALL GSTATS_LABEL(1998,'SER','SUSPECG array ') +CALL GSTATS_LABEL(1999,'SER','GOM_PRINT_RMS') + +!more SER counters +CALL GSTATS_LABEL(2000,'SER','WAVXFORM ') +CALL GSTATS_LABEL(2001,'SER','SUBFPOS ') +CALL GSTATS_LABEL(2002,'SER','PPFIDH ') +CALL GSTATS_LABEL(2003,'SER','WRPGR ') +CALL GSTATS_LABEL(2010,'SER','MWAVE_PUT_TL') +CALL GSTATS_LABEL(2011,'SER','MWAVE_PUT_AD') +CALL GSTATS_LABEL(2012,'SER','MWAVE_NEAREST') +CALL GSTATS_LABEL(2013,'SER','MWAVE_PUT') +CALL GSTATS_LABEL(2014,'SER','MWAVE_GET') +CALL GSTATS_LABEL(2015,'SER','MWAVE_GET_TL') +CALL GSTATS_LABEL(2016,'SER','MWAVE_OBS2GP') +CALL GSTATS_LABEL(2017,'SER','MWAVE_IOBS2GP') +CALL GSTATS_LABEL(2023,'SER','GOM_MAKE_POST_INTERP') + +! MXD counters - for when we don't know how to categorise + +LGSTATS_LABEL=.TRUE. +IF (LHOOK) CALL DR_HOOK('GSTATS_LABEL_IFS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE GSTATS_LABEL_IFS diff --git a/src/trans/gpu/internal/inigptr_mod.F90 b/src/trans/gpu/internal/inigptr_mod.F90 new file mode 100755 index 0000000..83f2ab8 --- /dev/null +++ b/src/trans/gpu/internal/inigptr_mod.F90 @@ -0,0 +1,92 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INIGPTR_MOD +CONTAINS +SUBROUTINE INIGPTR(KGPTRSEND,KGPTRRECV) + +! Compute tables to assist GP to/from Fourier space transpositions + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DISTR ,ONLY : D, NPRTRNS +USE TPM_TRANS ,ONLY : NGPBLKS, NPROMA +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) +INTEGER(KIND=JPIM),INTENT(OUT) :: KGPTRRECV(NPRTRNS) + +INTEGER(KIND=JPIM) :: IBLOCK,IROF,IBFIRST,IPROCLAST,IPROC,IFIRST,ILAST,IBLAST +INTEGER(KIND=JPIM) :: JGL,JBL,JPRTRNS,JBLKS +! Compute tables to assist GP to/from Fourier space transpositions + + +KGPTRSEND(:,:,:)=0 +IBLOCK=1 +IROF=1 +IBFIRST=1 +IPROCLAST=D%NPROCL(D%NFRSTLOFF+1) +DO JGL=1,D%NDGL_GP + ! Find processor which deals with this latitude in Fourier distribution + IPROC=D%NPROCL(D%NFRSTLOFF+JGL) + IF(IPROC > NPRTRNS) THEN + WRITE(NOUT,'(A,I8)')& + &' INIGPTR ERROR : exceeding processor limit ',NPRTRNS + CALL ABORT_TRANS(' INIGPTR ERROR : exceeding processor limit ') + ENDIF + + ! for each latitude on this processor, find first and last points + ! for each NPROMA chunk, for each destination processor + IF(IPROC /= IPROCLAST) THEN + IF(IROF > 1) THEN + KGPTRSEND(1,IBLOCK,IPROCLAST)=IBFIRST + KGPTRSEND(2,IBLOCK,IPROCLAST)=IROF-1 + ENDIF + IF(IROF <= NPROMA) IBFIRST=IROF + IPROCLAST=IPROC + ENDIF + IFIRST=D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + ILAST =IFIRST + D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) -1 + DO JBL=IFIRST,ILAST + IF(IROF == NPROMA) THEN + IBLAST=IROF + KGPTRSEND(1,IBLOCK,IPROC)=IBFIRST + KGPTRSEND(2,IBLOCK,IPROC)=IBLAST + IF(IBLOCK < NGPBLKS) IBLOCK=IBLOCK+1 + IROF=0 + IBFIRST=1 + ENDIF + IROF=IROF+1 + ENDDO +ENDDO +IF(IROF /= 1.AND.IROF /= IBFIRST) THEN +! non-empty residual block after last latitude line + IBLAST=IROF-1 + KGPTRSEND(1,IBLOCK,IPROC)=IBFIRST + KGPTRSEND(2,IBLOCK,IPROC)=IBLAST +ENDIF +! sum up over blocks +KGPTRRECV(:)=0 +DO JPRTRNS=1,NPRTRNS + DO JBLKS=1,NGPBLKS + IF(KGPTRSEND(1,JBLKS,JPRTRNS) > 0) THEN + KGPTRRECV(JPRTRNS)=KGPTRRECV(JPRTRNS)+& + &KGPTRSEND(2,JBLKS,JPRTRNS)-KGPTRSEND(1,JBLKS,JPRTRNS)+1 + ENDIF + ENDDO +ENDDO + +END SUBROUTINE INIGPTR +END MODULE INIGPTR_MOD diff --git a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 new file mode 100755 index 0000000..e9e0d9f --- /dev/null +++ b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 @@ -0,0 +1,320 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, FOUBUF +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTINV_CTL_MOD ,ONLY : LTINV_CTL +USE FTINV_CTL_MOD ,ONLY : FTINV_CTL +!use nvtx +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB + +! ------------------------------------------------------------------ + +! Perform transform + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL LTINV_CTL(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,FSPGL_PROC=FSPGL_PROC) + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + !call nvtxStartRange("INVTRANS") + +#ifdef ACCGPU + !$ACC DATA CREATE(FOUBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(ALLOC:FOUBUF) +#endif + ! No splitting of fields, transform done in one go + ! from PSPXXX to FOUBUF + !call nvtxStartRange("LTINV") + CALL LTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + &FSPGL_PROC=FSPGL_PROC) + !call nvtxEndRange + + ! from FOUBUF to PGPXXX + !call nvtxStartRange("FTINV") + CALL FTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + !call nvtxEndRange + + !call nvtxEndRange + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE INV_TRANS_CTL +END MODULE INV_TRANS_CTL_MOD diff --git a/src/trans/gpu/internal/inv_trans_ctlad_mod.F90 b/src/trans/gpu/internal/inv_trans_ctlad_mod.F90 new file mode 100755 index 0000000..bef3906 --- /dev/null +++ b/src/trans/gpu/internal/inv_trans_ctlad_mod.F90 @@ -0,0 +1,296 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INV_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE INV_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS_CTLAD* - Control routine for inverse spectral transform adj. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields +! +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTLAD - control of Legendre transform +! FTINV_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTINV_CTLAD_MOD ,ONLY : LTINV_CTLAD +USE FTINV_CTLAD_MOD ,ONLY : FTINV_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB + +! ------------------------------------------------------------------ + +! Perform transform + + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + CALL LTINV_CTLAD(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) + + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL FTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL LTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE INV_TRANS_CTLAD +END MODULE INV_TRANS_CTLAD_MOD diff --git a/src/trans/gpu/internal/ldfou2_mod.F90 b/src/trans/gpu/internal/ldfou2_mod.F90 new file mode 100755 index 0000000..d7d2ccf --- /dev/null +++ b/src/trans/gpu/internal/ldfou2_mod.F90 @@ -0,0 +1,135 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LDFOU2_MOD +CONTAINS +SUBROUTINE LDFOU2(KF_UV,PAIA) + +!**** *LDFOU2* - Division by a*cos(theta) of u and v + +! Purpose. +! -------- +! In Fourier space divide u and v by a*cos(theta). + +!** Interface. +! ---------- +! CALL LDFOU2(KM,PAIA,PSIA) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! PAIA - antisymmetric fourier fields +! PSIA - symmetric fourierfields + +! Implicit arguments : RACTHE - 1./(a*cos(theta)) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Message Passing option added +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_FIELDS ,ONLY : F_RACTHE +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS +USE TPM_DIM ,ONLY : R, R_NDGNH, R_NDGL +USE TPM_GEOMETRY ,ONLY : G, G_NDGLU + +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) :: KM,KMLOC + +REAL(KIND=JPRBT) ,INTENT(INOUT) :: PAIA(:,:,:) +!REAL(KIND=JPRBT) ,INTENT(INOUT) :: PSIA(:,:,:), PAIA(:,:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: J, JGL ,IFLD ,ISL, IGLS + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V BY A*COS(THETA) +! -------------------------- + +IFLD = 4*KF_UV +IF( IFLD > 0 ) THEN + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& COPYIN(KF_UV,F_RACTHE) & +!$ACC& PRESENT(D_NUMP,D_MYMS,R_NDGNH,R_NDGL,G_NDGLU) & +!$ACC& PRESENT(PAIA) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP(TO:F_RACTHE,D,D_NUMP,D_MYMS,R_NDGNH,R_NDGL,G_NDGLU) & +!$OMP& MAP(TOFROM:PAIA) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) +!! DEFAULT(NONE) PRIVATE(KM,ISL,IGLS) & +!!$OMP& SHARED(D_NUMP,R_NDGNH,KF_UV,D_MYMS,R_NDGNH,G_NDGLU,R_NDGL,PAIA,F_RACTHE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ISL,IGLS) DEFAULT(NONE) & +!$ACC& PRESENT(D_NUMP,R_NDGNH,KF_UV,D_MYMS,R_NDGNH,G_NDGLU,R_NDGL,PAIA,F_RACTHE) +#endif +DO KMLOC=1,D_NUMP + DO JGL=1,R_NDGNH + DO J=1,4*KF_UV + KM = D_MYMS(KMLOC) + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) +!* 1.1 U AND V + if (JGL .ge. ISL) then + IGLS = R_NDGL+1-JGL + PAIA(J,JGL,KMLOC) = PAIA(J,JGL,KMLOC)*F_RACTHE(JGL) +! PSIA(J,JGL,KMLOC) = PSIA(J,JGL,KMLOC)*F_RACTHE(JGL) + endif + ENDDO + ENDDO +ENDDO +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE LDFOU2 +END MODULE LDFOU2_MOD diff --git a/src/trans/gpu/internal/ldfou2ad_mod.F90 b/src/trans/gpu/internal/ldfou2ad_mod.F90 new file mode 100755 index 0000000..a1ba6b0 --- /dev/null +++ b/src/trans/gpu/internal/ldfou2ad_mod.F90 @@ -0,0 +1,97 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LDFOU2AD_MOD +CONTAINS +SUBROUTINE LDFOU2AD(KM,KF_UV,PAIA,PSIA) + +!**** *LDFOU2AD* - Division by a*cos(theta) of u and v + +! Purpose. +! -------- +! In Fourier space divide u and v by a*cos(theta). + +!** Interface. +! ---------- +! CALL LDFOU2AD(KM,PAIA,PSIA) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! PAIA - antisymmetric fourier fields +! PSIA - symmetric fourierfields + +! Implicit arguments : RACTHE - 1./(a*cos(theta)) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Message Passing option added +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV + +REAL(KIND=JPRBT) ,INTENT(INOUT) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: J, JGL ,IFLD ,ISL + + +! ------------------------------------------------------------------ + +!* 1. DIVIDE U V BY A*COS(THETA) +! -------------------------- + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IFLD = 4*KF_UV + +!* 1.1 U AND V + +DO JGL=ISL,R%NDGNH + DO J=1,IFLD + PAIA(J,JGL) = PAIA(J,JGL)*F%RACTHE(JGL) + PSIA(J,JGL) = PSIA(J,JGL)*F%RACTHE(JGL) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE LDFOU2AD +END MODULE LDFOU2AD_MOD diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 new file mode 100755 index 0000000..e00d54b --- /dev/null +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -0,0 +1,501 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEDIR_MOD +CONTAINS +SUBROUTINE LEDIR(KF_FS,KLED2,PAIA,POA1,KMODE) + +!**** *LEDIR* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL LEDIR(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- use butterfly or dgemm + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + Mats Hamrud + George Modzynski + +! Modifications. +! -------------- +! J.Hague : Oct 2012 DR_HOOK round calls to DGEMM: +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPIB ,JPRB, JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_DIM ,ONLY : R_NDGNH,R_NSMAX,R_NTMAX +USE TPM_GEOMETRY ,ONLY : G_NDGLU +USE TPM_FIELDS ,ONLY : F_RW, & + & ZAA,ZAS,LDZAA,LDZAS,TDZAA,TDZAS,& + & DZBST,DLDZBA,DLDZBS,DTDZBA,DTDZBS,& + & DZCST,DZCAT,DLDZCA,DLDZCS,DTDZCA,DTDZCS,& + & ZAMAX, ZSMAX,& + & IF_FS_DIR,ZAA0,DZBST0,DZCAT0,ZAS0,DZCST0,KMLOC0 +USE TPM_DISTR +USE TPM_GEN ,ONLY : NOUT +USE TPM_FLT +USE BUTTERFLY_ALG_MOD +USE HIPBLAS_MOD ,ONLY : HIP_DGEMM_BATCHED, HIP_SGEMM_BATCHED +!USE HIP_GEMM_BATCHED_MOD!!, ONLY: HIP_TCGEMM_BATCHED, HIP_GEMM_BATCHED +USE, INTRINSIC :: ISO_C_BINDING +USE IEEE_ARITHMETIC + +IMPLICIT NONE + + +! DUMMY ARGUMENTS +INTEGER(KIND=JPIM) :: KM +INTEGER(KIND=JPIM) :: KMLOC +INTEGER(KIND=JPIM) :: KFC +INTEGER(KIND=JPIM) :: KIFC +INTEGER(KIND=JPIM) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 +INTEGER(KIND=JPIM), INTENT(IN) :: KMODE + +REAL(KIND=JPRBT), INTENT(IN) :: PAIA(:,:,:) +!REAL(KIND=JPRBT), INTENT(IN) :: PSIA(:,:,:), PAIA(:,:,:) +REAL(KIND=JPRBT), INTENT(OUT) :: POA1(:,:,:) + +! LOCAL VARIABLES +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, IF, J, JK, IRET +INTEGER(KIND=JPIM) :: ITHRESHOLD +REAL(KIND=JPRB) :: RRPELTMDIR = 100.0_JPRB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +INTEGER :: ISTAT + +IF (LHOOK) CALL DR_HOOK('LE_DGEMM',0,ZHOOK_HANDLE) + +KFC = 2*KF_FS +KIFC = KFC + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& PRESENT(F_RW) & +!$ACC& PRESENT(D_NUMP,D_MYMS,R_NDGNH,G_NDGLU,R_NSMAX,R_NTMAX) & +!$ACC& PRESENT(PAIA) & +!$ACC& PRESENT(ZAA,ZAS,DZBST,DZCST,DZCAT) & +!$ACC& PRESENT(POA1,dzbst0,dzcat0,dzbst0,dzcst0) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP(TO:F_RW) & +!$OMP& MAP(TO:D_NUMP,D_MYMS,R_NDGNH,G_NDGLU,R_NSMAX,R_NTMAX) & +!$OMP& MAP(PRESENT,ALLOC:PAIA) & +!$OMP& MAP(PRESENT,ALLOC:ZAA,ZAS,DZBST,DZCST,DZCAT) & +!$OMP& MAP(PRESENT,ALLOC:POA1,dzbst0,dzcat0,dzbst0,dzcst0) +#endif + + +!! Initialize rescaling arrays to zero +!!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) +!!$ACC PARALLEL LOOP COLLAPSE(2) +!DO KMLOC=1,SIZE(ZAMAX,2) +! DO JK=1,SIZE(ZAMAX,1) +! ZAMAX(JK,KMLOC) = 0.0_JPRBT +! ZSMAX(JK,KMLOC) = 0.0_JPRBT +! ENDDO +!ENDDO + + +! anti-symmetric + +IF ( KMODE == -1 ) THEN + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,KDGLU,ISL,ISKIP) DEFAULT(NONE) & +!$OMP& SHARED(D_NUMP,R_NDGNH,KFC,D_MYMS,G_NDGLU,DZBST,DLDZBA,DTDZBA,PAIA,F_RW) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KDGLU,ISL,ISKIP) DEFAULT(NONE) & +!$ACC& COPYIN(KFC,DZBST,DLDZBA,DTDZBA) & +!$ACC& PRESENT(D_NUMP,R_NDGNH,D_MYMS,G_NDGLU,PAIA,F_RW) +#endif +DO KMLOC=1,D_NUMP + DO J=1,R_NDGNH + DO JK=1,KFC + + KM = D_MYMS(KMLOC) + KDGLU = MIN(R_NDGNH,G_NDGLU(KM)) + IF (J .LE. KDGLU) THEN + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + DZBST((JK-1)/ISKIP+1+(J-1+(KMLOC-1)*DLDZBA)*DTDZBA)=PAIA(JK,ISL+J-1,KMLOC)*F_RW(ISL+J-1) + END IF + END IF + ENDDO + ENDDO +END DO + + +! Get C in transpose format to get better memory access patterns later +!C=A*B => +! C^T=B^T*A^T +#ifdef OMPGPU +!$OMP TARGET DATA USE_DEVICE_PTR(ZAA,DZBST,DZCAT) +#endif +#ifdef ACCGPU +!$ACC HOST_DATA USE_DEVICE(ZAA,DZBST,DZCAT) +#endif +#ifdef PARKINDTRANS_SINGLE +CALL HIP_SGEMM_BATCHED( & + & 'N', 'N', & + & DTDZBA, TDZAA, DLDZBA, & + & 1.0_JPRBT, & + & DZBST, DTDZBA, DLDZBA, & + & ZAA, LDZAA, TDZAA, & + & 0._JPRBT, & + & DZCAT, DTDZCA, DLDZCA, & + & D_NUMP) +#else +CALL HIP_DGEMM_BATCHED( & + & 'N', 'N', & + & DTDZBA, TDZAA, DLDZBA, & + & 1.0_JPRBT, & + & DZBST, DTDZBA, DLDZBA, & + & ZAA, LDZAA, TDZAA, & + & 0._JPRBT, & + & DZCAT, DTDZCA, DLDZCA, & + & D_NUMP) +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END HOST_DATA +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,IA,ILS) DEFAULT(NONE) & +!$OMP& SHARED(D_NUMP,R_NTMAX,KFC,D_MYMS,POA1,DZCAT,DLDZCA,DTDZCA) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,IA,ILS) DEFAULT(NONE) & +!$ACC& COPYIN(KFC,DLDZCA,DTDZCA) & +!$ACC& PRESENT(D_NUMP,R_NTMAX,D_MYMS,POA1,DZCAT) +#endif +DO KMLOC=1,D_NUMP + DO J=1,(R_NTMAX+2)/2 + DO JK=1,KFC + + KM = D_MYMS(KMLOC) + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILA = (R_NTMAX-KM+2)/2 + IA = 1+MOD(R_NTMAX-KM+2,2) + IF (J .LE. ILA) THEN + POA1(JK,IA+(J-1)*2,KMLOC) = DZCAT((JK-1)/ISKIP+1+(J-1+(KMLOC-1)*DLDZCA)*DTDZCA) + END IF + END IF + ENDDO + ENDDO +ENDDO + +! compute m=0 in double precision: +IF(KMLOC0 > 0) THEN + PRINT*,'computing m=0 in double precision' + ISKIP = 2 + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KDGLU,ISL) DEFAULT(NONE) & + !$OMP& SHARED(R_NDGNH,KFC,G_NDGLU,DZBST0,DTDZBA,PAIA,KMLOC0,F_RW,ISKIP) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KDGLU,ISL) DEFAULT(NONE) & + !$ACC& COPYIN(KFC,DTDZBA,KMLOC0,ISKIP) & + !$ACC& PRESENT(R_NDGNH,G_NDGLU,DZBST0,PAIA,F_RW) +#endif + DO J=1,R_NDGNH + DO JK=1,KFC + + KDGLU = MIN(R_NDGNH,G_NDGLU(0)) + IF (J .LE. KDGLU) THEN + ISL = MAX(R_NDGNH-G_NDGLU(0)+1,1) + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + DZBST0((JK-1)/ISKIP+1+(J-1)*DTDZBA)=PAIA(JK,ISL+J-1,KMLOC0)*F_RW(ISL+J-1) + END IF + END IF + ENDDO + ENDDO + + + ! Get C in transpose format to get better memory access patterns later + !C=A*B => + ! C^T=B^T*A^T + +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZAA0,DZBST0,DZCAT0) +#endif +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZAA0,DZBST0,DZCAT0) +#endif + CALL HIP_DGEMM_BATCHED( & + & 'N','N', & + & DTDZBA, TDZAA, DLDZBA, & + & 1.0_JPRD, & + & DZBST0,DTDZBA, DLDZBA,& + & ZAA0,LDZAA, TDZAA, & + & 0._JPRD, & + & DZCAT0,DTDZCA, DLDZCA, & + & 1) + !CALL HIP_DGEMM('N','N',DTDZBA,TDZAA,DLDZBA,1.0_JPRD,DZBST0,DTDZBA,& + ! &ZAA0,LDZAA,0._JPRD,DZCAT0,DTDZCA) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ILA,IA,ILS) DEFAULT(NONE) & + !$OMP& SHARED(R_NTMAX,KFC,POA1,DZCAT0,DTDZCA,KMLOC0,ISKIP) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(ILA,IA,ILS) DEFAULT(NONE) & + !$ACC& COPYIN(KFC,DTDZCA,KMLOC0,ISKIP) & + !$ACC& PRESENT(R_NTMAX,POA1,DZCAT0) +#endif + DO J=1,(R_NTMAX+2)/2 + DO JK=1,KFC + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILA = (R_NTMAX+2)/2 + IA = 1+MOD(R_NTMAX+2,2) + IF (J .LE. ILA) THEN + POA1(JK,IA+(J-1)*2,KMLOC0) = DZCAT0((JK-1)/ISKIP+1+(J-1)*DTDZCA) + END IF + END IF + ENDDO +ENDDO +ENDIF + +ELSE + +! symmetric + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,KDGLU,ISL,ISKIP) DEFAULT(NONE) & +!$OMP& SHARED(D_NUMP,R_NDGNH,KFC,D_MYMS,G_NDGLU,DZBST,DLDZBS,DTDZBS,PAIA,F_RW) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KDGLU,ISL,ISKIP) DEFAULT(NONE) & +!$ACC & COPYIN(KFC,DLDZBS,DTDZBS) & +!$ACC & PRESENT(D_NUMP,R_NDGNH,D_MYMS,G_NDGLU,DZBST,PAIA,F_RW) +#endif +DO KMLOC=1,D_NUMP + DO J=1,R_NDGNH + DO JK=1,KFC + KM = D_MYMS(KMLOC) + KDGLU = MIN(R_NDGNH,G_NDGLU(KM)) + IF (J .LE. KDGLU) THEN + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + DZBST((JK-1)/ISKIP+1+(J-1+(KMLOC-1)*DLDZBS)*DTDZBS)=PAIA(JK,ISL+J-1,KMLOC)*F_RW(ISL+J-1) + END IF + END IF + ENDDO + ENDDO +END DO + +! Get C in transpose format to get better memory access patterns later +!C=A*B => +! C^T=B^T*A^T +#ifdef OMPGPU +!$OMP TARGET DATA USE_DEVICE_PTR(ZAS,DZBST,DZCST) +#endif +#ifdef ACCGPU +!$ACC HOST_DATA USE_DEVICE(ZAS,DZBST,DZCST) +#endif +#ifdef PARKINDTRANS_SINGLE +CALL HIP_SGEMM_BATCHED( & + & 'N', 'N', & + & DTDZBS, TDZAS, DLDZBS, & + & 1.0_JPRBT, & + & DZBST, DTDZBS, DLDZBS, & + & ZAS, LDZAS, TDZAS, & + & 0._JPRBT, & + & DZCST, DTDZCS, DLDZCS, & + & D_NUMP) +#else +CALL HIP_DGEMM_BATCHED( & + & 'N', 'N', & + & DTDZBS, TDZAS, DLDZBS, & + & 1.0_JPRBT, & + & DZBST, DTDZBS, DLDZBS, & + & ZAS, LDZAS, TDZAS, & + & 0._JPRBT, & + & DZCST, DTDZCS, DLDZCS, & + & D_NUMP) +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END HOST_DATA +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,IS,ILS) DEFAULT(NONE) & +!$OMP& SHARED(D_NUMP,R_NTMAX,KFC,D_MYMS,POA1,DZCST,DLDZCS,DTDZCS) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,ILS,IS) DEFAULT(NONE) & +!$ACC& COPYIN(KFC,DLDZCS,DTDZCS) & +!$ACC& PRESENT(D_NUMP,R_NTMAX,D_MYMS,POA1,DZCST) +#endif + DO KMLOC=1,D_NUMP + DO J=1,(R_NTMAX+3)/2 + DO JK=1,KFC + + KM = D_MYMS(KMLOC) + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILS = (R_NTMAX-KM+3)/2 + IF (J .LE. ILS) THEN + IS = 1+MOD(R_NTMAX-KM+1,2) + POA1(JK,IS+(J-1)*2,KMLOC) = DZCST((JK-1)/ISKIP+1+(J-1+(KMLOC-1)*DLDZCS)*DTDZCS) + END IF + END IF + ENDDO + ENDDO +ENDDO + +IF(KMLOC0 > 0) THEN + ISKIP=2 +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KDGLU,ISL) DEFAULT(NONE) & + !$OMP& SHARED(R_NDGNH,KFC,G_NDGLU,DZBST0,DTDZBS,PAIA,KMLOC0,F_RW,ISKIP) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KDGLU,ISL) DEFAULT(NONE) & + !$ACC& COPYIN(KFC,DTDZBS,KMLOC0,ISKIP) & + !$ACC& PRESENT(R_NDGNH,G_NDGLU,DZBST0,PAIA,F_RW) +#endif + DO J=1,R_NDGNH + DO JK=1,KFC + KDGLU = MIN(R_NDGNH,G_NDGLU(0)) + IF (J .LE. KDGLU) THEN + ISL = MAX(R_NDGNH-G_NDGLU(0)+1,1) + IF (MOD((JK-1),ISKIP) .eq. 0) THEN + DZBST0((JK-1)/ISKIP+1+(J-1)*DTDZBS)=PAIA(JK,ISL+J-1,KMLOC0)*F_RW(ISL+J-1) + END IF + END IF + ENDDO + ENDDO + + ! Get C in transpose format to get better memory access patterns later + !C=A*B => + ! C^T=B^T*A^T + +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZAS0,DZBST0,DZCST0) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZAS0,DZBST0,DZCST0) +#endif + CALL HIP_DGEMM_BATCHED('N','N',& + & DTDZBS,TDZAS,DLDZBS,& + & 1.0_JPRD,DZBST0,DTDZBS,DLDZBS,& + & ZAS0,LDZAS,TDZAS,& + & 0._JPRD,DZCST0,DTDZCS,DLDZCS,1) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ILA,ILS,IS) DEFAULT(NONE) & + !$OMP& SHARED(R_NTMAX,KFC,POA1,DZCST0,DTDZCS,KMLOC0,ISKIP) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(ILA,ILS,IS) DEFAULT(NONE) & + !$ACC& COPYIN(KFC,DTDZCS,KMLOC0,ISKIP) & + !$ACC& PRESENT(R_NTMAX,POA1,DZCST0) +#endif + DO J=1,(R_NTMAX+3)/2 + DO JK=1,KFC + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILS = (R_NTMAX+3)/2 + IF (J .LE. ILS) THEN + IS = 1+MOD(R_NTMAX+1,2) + POA1(JK,IS+(J-1)*2,KMLOC0) = DZCST0((JK-1)/ISKIP+1+(J-1)*DTDZCS) + end if + end if + ENDDO + ENDDO + +ENDIF + +ENDIF + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + + +IF (LHOOK) CALL DR_HOOK('LE_DGEMM',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE LEDIR +END MODULE LEDIR_MOD diff --git a/src/trans/gpu/internal/ledirad_mod.F90 b/src/trans/gpu/internal/ledirad_mod.F90 new file mode 100755 index 0000000..4d8fab9 --- /dev/null +++ b/src/trans/gpu/internal/ledirad_mod.F90 @@ -0,0 +1,207 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEDIRAD_MOD +CONTAINS +SUBROUTINE LEDIRAD(KM,KMLOC,KFC,KIFC,KDGLU,KLED2,PAIA,PSIA,POA1) + +!**** *LEDIRAD* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL LEDIRAD(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! Modified ! 16/10/12 J.Hague : DR_HOOK round calls to DGEMM: +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT ,JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +!USE TPM_TRANS +! +USE TPM_FLT +USE TPM_FIELDS +USE TPM_DISTR +USE BUTTERFLY_ALG_MOD, ONLY : MULT_BUTM + +IMPLICIT NONE + + +! DUMMY ARGUMENTS +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KIFC +INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 + +REAL(KIND=JPRBT), INTENT(OUT) :: PSIA(:,:), PAIA(:,:) +REAL(KIND=JPRBT), INTENT(IN) :: POA1(:,:) + +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, J, JK,JGL,J1 +INTEGER(KIND=JPIM) :: IF,ITHRESHOLD +REAL(KIND=JPRBT) :: ZB(KDGLU,KIFC), ZCA((R%NTMAX-KM+2)/2,KIFC), ZCS((R%NTMAX-KM+3)/2,KIFC) +LOGICAL, PARAMETER :: LLDOUBLE = (JPRD == JPRBT) +CHARACTER(LEN=1) :: CLX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + +CLX = 'S' +IF (LLDOUBLE) CLX = 'D' + +IA = 1+MOD(R%NTMAX-KM+2,2) +IS = 1+MOD(R%NTMAX-KM+1,2) +ILA = (R%NTMAX-KM+2)/2 +ILS = (R%NTMAX-KM+3)/2 +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + +IF(KM == 0)THEN + ISKIP = 2 + DO JGL=ISL,R%NDGNH + DO J1=2,KFC,2 + PSIA(J1,JGL)=0.0_JPRBT + PAIA(J1,JGL)=0.0_JPRBT + ENDDO + ENDDO +ELSE + ISKIP = 1 +ENDIF + + +IF (KIFC > 0 .AND. KDGLU > 0 ) THEN + + ITHRESHOLD=S%ITHRESHOLD + +!* 1. ANTISYMMETRIC PART. + + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO J=1,ILA + ZCA(J,IF) = POA1(IA+(J-1)*2,JK) + ENDDO + ENDDO + + IF(ILA <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('N','N',KDGLU,KIFC,ILA,1.0_JPRBT,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZCA,ILA,0._JPRBT,ZB,KDGLU) + ELSE + CALL SGEMM('N','N',KDGLU,KIFC,ILA,1.0_JPRBT,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZCA,ILA,0._JPRBT,ZB,KDGLU) + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('N',S%FA(KMLOC)%YBUT_STRUCT_A,KIFC,ZCA,ZB) + + ENDIF + + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO J=1,KDGLU + PAIA(JK,ISL+J-1) = ZB(J,IF)*F%RW(ISL+J-1) + ENDDO + ENDDO + + +!* 1.3 SYMMETRIC PART. + + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO J=1,ILS + ZCS(J,IF) = POA1(IS+(J-1)*2,JK) + ENDDO + ENDDO + + + IF(ILS <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('N','N',KDGLU,KIFC,ILS,1.0_JPRBT,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZCS,ILS,0._JPRBT,ZB,KDGLU) + ELSE + CALL SGEMM('N','N',KDGLU,KIFC,ILS,1.0_JPRBT,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZCS,ILS,0._JPRBT,ZB,KDGLU) + + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('N',S%FA(KMLOC)%YBUT_STRUCT_S,KIFC,ZCS,ZB) + + ENDIF + + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO J=1,KDGLU + PSIA(JK,ISL+J-1) = ZB(J,IF)*F%RW(ISL+J-1) + ENDDO + ENDDO + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE LEDIRAD +END MODULE LEDIRAD_MOD diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 new file mode 100755 index 0000000..f6bd218 --- /dev/null +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -0,0 +1,353 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEINV_MOD +CONTAINS +SUBROUTINE LEINV(KFC,KSTA,KF_OUT_LT,PAOA1,PSOA1) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) + +! Implicit arguments : None. +! -------------------- + +! Method. use butterfly or dgemm +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + Mats Hamrud + George Modzynski +! +! Modifications. +! -------------- +! J.Hague : Oct 2012 DR_HOOK round calls to DGEMM: +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_DIM ,ONLY : R, R_NDGNH,R_NSMAX +USE TPM_GEOMETRY ,ONLY : G, G_NDGLU +USE TPM_FIELDS ,ONLY : F, ZIA, & + & ZAA,ZAS,LDZAA,LDZAS,TDZAA,TDZAS,& + & IZBS,ILDZBA,ILDZBS,ITDZBA,ITDZBS,& + & IZCS,IZCST,ILDZCA,ILDZCS,ITDZCA,ITDZCS,& + & TDZAS, IF_FS_INV, ZAMAX, ZSMAX +USE TPM_DISTR ,ONLY : D_NUMP,D_MYMS, MYPROC +USE TPM_GEN ,ONLY : NOUT +USE TPM_FLT +USE HIPBLAS_MOD ,ONLY : HIP_SGEMM_BATCHED, HIP_DGEMM_BATCHED +! issue ? address error +!USE HIP_GEMM_BATCHED_MOD +USE, INTRINSIC :: ISO_C_BINDING +USE IEEE_ARITHMETIC + +IMPLICIT NONE + + +! DUMMY ARGUMENTS +INTEGER(KIND=JPIM) :: KM +INTEGER(KIND=JPIM) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KSTA +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM) :: KIFC +INTEGER(KIND=JPIM) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +!REAL(KIND=JPRB), INTENT(IN) :: PIA(:,:,:) +REAL(KIND=JPRBT), INTENT(OUT) :: PSOA1(:,:,:) +REAL(KIND=JPRBT), INTENT(OUT) :: PAOA1(:,:,:) + +! LOCAL +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, J1, IF, JGL,JK, J,JI, IRET +INTEGER(KIND=JPIM) :: ITHRESHOLD + +INTEGER(KIND=JPIM) :: ISTAT + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +!* 1.1 PREPARATIONS. +IF (LHOOK) CALL DR_HOOK('LE_DGEMM',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. +#ifdef ACCGPU +!$ACC DATA COPYIN (S,S%ITHRESHOLD,S%LUSEFLT) & +!$ACC& COPYIN (D_MYMS,G_NDGLU,D_NUMP,R_NDGNH,R_NSMAX, KFC, KSTA) & +!$ACC& COPYIN (IF_FS_INV) & +!$ACC& PRESENT (ZIA,ZAA,ZAS) & +!$ACC& PRESENT (IZCST,IZBS) & +!!$ACC& COPYIN (PIA) & +!$ACC& COPYOUT (PSOA1,PAOA1) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(TO:S,S%ITHRESHOLD,S%LUSEFLT,D_MYMS,G_NDGLU,D_NUMP,R_NDGNH,R_NSMAX) & +!$OMP& MAP(PRESENT,ALLOC:ZAA,ZAS,IZCST,ZIA,PSOA1,PAOA1,IZBS) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM) & +!$OMP& SHARED(D_MYMS,PSOA1,PAOA1,KFC,R_NDGNH,D_NUMP) DEFAULT(NONE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KMLOC,JGL) DEFAULT(NONE) & +!$ACC& PRESENT(D_MYMS,PSOA1,PAOA1,KFC,KSTA,R_NDGNH,D_NUMP) +#endif +DO KMLOC=1,D_NUMP + DO JGL=1,R_NDGNH + DO J1=2,KFC,2 + + KM = D_MYMS(KMLOC) + IF(KM == 0)THEN + PSOA1(J1,JGL,KMLOC) = 0.0_JPRBT + PAOA1(J1,JGL,KMLOC) = 0.0_JPRBT + END IF + ENDDO + ENDDO + !end loop over wavenumber +END DO + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,ILS,IA) & +!$OMP& SHARED(D_NUMP,R_NSMAX,KFC,KSTA,D_MYMS,IZBS,TDZAA,IF_FS_INV,ZIA) DEFAULT(NONE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KMLOC,J,JK,KM,ISKIP,ILA,ILS,IA) DEFAULT(NONE) & +!$ACC& PRESENT(D_NUMP,R_NSMAX,KFC,KSTA,D_MYMS,IZBS,IF_FS_INV,ZIA,TDZAA) +#endif +DO KMLOC=1,D_NUMP + DO J=1,(R_NSMAX+2)/2 + DO JK=1,KFC + + KM = D_MYMS(KMLOC) + IF (KM == 0) THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILA = (R_NSMAX-KM+2)/2 + IF (J .LE. ILA) THEN + IA = 1+MOD(R_NSMAX-KM+2,2) + IZBS((JK-1)/ISKIP+1+(J-1+(KMLOC-1)*TDZAA)*IF_FS_INV)=ZIA(KSTA+JK-1,IA+1+(J-1)*2,KMLOC) + ENDIF + ENDIF + ENDDO + ENDDO +ENDDO + +ITHRESHOLD=S%ITHRESHOLD + +! operate on full arrays, where non-relavent entries have been set to zero +! CALL HIP_DGEMM_BATCHED('N','N',LDZAA,TDZBA,TDZAA,1.0_JPRB,ZAA,LDZAA,TDZAA,ZBA,LDZBA,TDZBA,0._JPRB,ZCA,LDZCA,TDZCA,D_NUMP) +! Get C in transpose format to get better memory access patterns later +!C=A*B => +! C^T=B^T*A^T + +! OVERLOADED FOR SINGLE AND DOUBLE PRECISION +#ifdef ACCGPU +!$ACC HOST_DATA USE_DEVICE(ZAA,IZBS,IZCST) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA USE_DEVICE_PTR(ZAA,IZBS,IZCST) +#endif +#ifdef PARKINDTRANS_SINGLE +CALL HIP_SGEMM_BATCHED( & + & 'N', 'T', & + & ITDZCA, ILDZCA, ILDZBA, & + & 1.0_JPRBT, & + & IZBS, ITDZBA, ILDZBA,& + & ZAA, LDZAA, TDZAA, & + & 0._JPRBT, & + & IZCST, ITDZCA, ILDZCA, & + & D_NUMP) +#else +CALL HIP_DGEMM_BATCHED( & + & 'N', 'T', & + & ITDZCA, ILDZCA, ILDZBA, & + & 1.0_JPRBT, & + & IZBS, ITDZBA, ILDZBA,& + & ZAA, LDZAA, TDZAA, & + & 0._JPRBT, & + & IZCST, ITDZCA, ILDZCA, & + & D_NUMP) +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END HOST_DATA +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) & +!$OMP& SHARED(D_NUMP,R_NDGNH,KFC,D_MYMS,G_NDGLU,PAOA1,IZCST,ITDZCA,ILDZCA) DEFAULT(NONE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) DEFAULT(NONE) & +!$ACC& PRESENT(D_NUMP,R_NDGNH,KFC,D_MYMS,G_NDGLU,PAOA1,IZCST,ITDZCA,ILDZCA) +#endif +DO KMLOC=1,D_NUMP + DO JI=1,R_NDGNH + DO JK=1,KFC + KM = D_MYMS(KMLOC) + KDGLU = MIN(R_NDGNH,G_NDGLU(KM)) + IF (JI .LE. KDGLU) then + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + END IF + + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + PAOA1(JK,ISL+JI-1,KMLOC) = IZCST((JK-1)/ISKIP+1+(JI-1)*ITDZCA+(KMLOC-1)*ILDZCA*ITDZCA) + END IF + END IF + ENDDO + ENDDO +END DO + +! 2. +++++++++++++ symmetric + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ISKIP,ILS,IS) & +!$OMP& SHARED(D_NUMP,R_NSMAX,KFC,KSTA,D_MYMS,IZBS,ITDZBS,ILDZBS,ZIA) DEFAULT(NONE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ISKIP,ILS,IS) DEFAULT(NONE) & +!$ACC& PRESENT(D_NUMP,R_NSMAX,KFC,KSTA,D_MYMS,IZBS,ITDZBS,ILDZBS,ZIA) +#endif +DO KMLOC=1,D_NUMP + DO J=1,(R_NSMAX+3)/2 + DO JK=1,KFC + KM = D_MYMS(KMLOC) + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILS = (R_NSMAX-KM+3)/2 + IF (J .LE. ILS) THEN + IS = 1+MOD(R_NSMAX-KM+1,2) + IZBS((JK-1)/ISKIP+1+(J-1)*ITDZBS+(KMLOC-1)*ILDZBS*ITDZBS)=ZIA(KSTA+JK-1,IS+1+(J-1)*2,KMLOC) + END IF + END IF + ENDDO + ENDDO +ENDDO + +!C=A*B => +! C^T=B^T*A^T + +#ifdef OMPGPU +!$OMP TARGET DATA USE_DEVICE_PTR(ZAS,IZBS,IZCST) +#endif +#ifdef ACCGPU +!$ACC HOST_DATA USE_DEVICE(ZAS,IZBS,IZCST) +#endif +#ifdef PARKINDTRANS_SINGLE +CALL HIP_SGEMM_BATCHED( & + & 'N', 'T', & + & ITDZCS, ILDZCS, ILDZBS, & + & 1.0_JPRBT, & + & IZBS, ITDZBS, ILDZBS, & + & ZAS, LDZAS, TDZAS, & + & 0._JPRBT, & + & IZCST, ITDZCS, ILDZCS, & + & D_NUMP) +#else +CALL HIP_DGEMM_BATCHED( & + & 'N', 'T', & + & ITDZCS, ILDZCS, ILDZBS, & + & 1.0_JPRBT, & + & IZBS, ITDZBS, ILDZBS, & + & ZAS, LDZAS, TDZAS, & + & 0._JPRBT, & + & IZCST, ITDZCS, ILDZCS, & + & D_NUMP) +#endif +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END HOST_DATA +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) & +!$OMP& SHARED(D_NUMP,R_NDGNH,KFC,D_MYMS,G_NDGLU,PSOA1,IZCST,ITDZCS,ILDZCS) DEFAULT(NONE) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) DEFAULT(NONE) & +!$ACC& PRESENT(D_NUMP,R_NDGNH,KFC,D_MYMS,G_NDGLU,PSOA1,IZCST,ITDZCS,ILDZCS) +#endif +DO KMLOC=1,D_NUMP + DO JI=1,R_NDGNH + DO JK=1,KFC + KM = D_MYMS(KMLOC) + KDGLU = MIN(R_NDGNH,G_NDGLU(KM)) + IF (JI .LE. KDGLU) then + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + END IF + + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + PSOA1(JK,ISL+JI-1,KMLOC) = IZCST((JK-1)/ISKIP+1+(JI-1)*ITDZCS+(KMLOC-1)*ITDZCS*ILDZCS) + END IF + END IF + ENDDO + ENDDO +END DO +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +!* 1. PERFORM LEGENDRE TRANFORM. + +IF (LHOOK) CALL DR_HOOK('LE_DGEMM',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE LEINV +END MODULE LEINV_MOD diff --git a/src/trans/gpu/internal/leinvad_mod.F90 b/src/trans/gpu/internal/leinvad_mod.F90 new file mode 100755 index 0000000..0b8de57 --- /dev/null +++ b/src/trans/gpu/internal/leinvad_mod.F90 @@ -0,0 +1,197 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEINVAD_MOD +CONTAINS +SUBROUTINE LEINVAD(KM,KMLOC,KFC,KIFC,KF_OUT_LT,KDGLU,PIA,PAOA1,PSOA1) + +!**** *LEINVAD* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINVAD(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINVAD in IFS CY22R1 +! Modified ! 16/10/12 J.Hague : DR_HOOK round calls to DGEMM: +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D +! +USE TPM_FLT +USE BUTTERFLY_ALG_MOD, ONLY : MULT_BUTM + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KIFC +INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(OUT) :: PIA(:,:) +REAL(KIND=JPRBT), INTENT(INOUT) :: PSOA1(:,:) +REAL(KIND=JPRBT), INTENT(INOUT) :: PAOA1(:,:) + +! LOCAL VARIABLES +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, IOAD1, JK,JI +INTEGER(KIND=JPIM) :: IF,ITHRESHOLD +REAL(KIND=JPRBT) :: ZBA((R%NSMAX-KM+2)/2,KIFC), ZBS((R%NSMAX-KM+3)/2,KIFC), ZC(KDGLU,KIFC) +LOGICAL, PARAMETER :: LLDOUBLE = (JPRD == JPRBT) +CHARACTER(LEN=1) :: CLX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + +CLX = 'S' +IF (LLDOUBLE) CLX = 'D' + +IA = 1+MOD(R%NSMAX-KM+2,2) +IS = 1+MOD(R%NSMAX-KM+1,2) +ILA = (R%NSMAX-KM+2)/2 +ILS = (R%NSMAX-KM+3)/2 +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) +IOAD1 = 2*KF_OUT_LT + +IF(KM == 0)THEN + ISKIP = 2 +ELSE + ISKIP = 1 +ENDIF + +IF( KDGLU > 0 ) THEN + + ITHRESHOLD=S%ITHRESHOLD + + +! 1. +++++++++++++ anti-symmetric + + ! we need the transpose of C + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO JI=1,KDGLU + ZC(JI,IF) = PAOA1(JK,ISL+JI-1) + ENDDO + ENDDO + + IF(ILA <= ITHRESHOLD .OR. .NOT.S%LUSEFLT) THEN + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRBT,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZC,KDGLU,0._JPRBT,ZBA,ILA) + ELSE + CALL SGEMM('T','N',ILA,KIFC,KDGLU,1.0_JPRBT,S%FA(KMLOC)%RPNMA,KDGLU,& + &ZC,KDGLU,0._JPRBT,ZBA,ILA) + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_1',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('T',S%FA(KMLOC)%YBUT_STRUCT_A,KIFC,ZC,ZBA) + + ENDIF + + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO JI=1,ILA + PIA(IA+1+(JI-1)*2,JK) = ZBA(JI,IF) + ENDDO + ENDDO + +! 2. +++++++++++++ symmetric + + ! we need the transpose of C + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO JI=1,KDGLU + ZC(JI,IF) = PSOA1(JK,ISL+JI-1) + ENDDO + ENDDO + + IF(ILS <= ITHRESHOLD .OR. .NOT.S%LUSEFLT ) THEN + + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',0,ZHOOK_HANDLE) + IF(LLDOUBLE)THEN + CALL DGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRBT,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZC,KDGLU,0._JPRBT,ZBS,ILS) + ELSE + CALL SGEMM('T','N',ILS,KIFC,KDGLU,1.0_JPRBT,S%FA(KMLOC)%RPNMS,KDGLU,& + &ZC,KDGLU,0._JPRBT,ZBS,ILS) + END IF + IF (LHOOK) CALL DR_HOOK('LE_'//CLX//'GEMM_2',1,ZHOOK_HANDLE) + + ELSE + + CALL MULT_BUTM('T',S%FA(KMLOC)%YBUT_STRUCT_S,KIFC,ZC,ZBS) + + ENDIF + + IF=0 + DO JK=1,KFC,ISKIP + IF=IF+1 + DO JI=1,ILS + PIA(IS+1+(JI-1)*2,JK) = ZBS(JI,IF) + ENDDO + ENDDO + + +ENDIF +! +! ------------------------------------------------------------------ + + +END SUBROUTINE LEINVAD +END MODULE LEINVAD_MOD diff --git a/src/trans/gpu/internal/ltdir_ctl_mod.F90 b/src/trans/gpu/internal/ltdir_ctl_mod.F90 new file mode 100755 index 0000000..8e4492f --- /dev/null +++ b/src/trans/gpu/internal/ltdir_ctl_mod.F90 @@ -0,0 +1,122 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_CTL_MOD + CONTAINS + SUBROUTINE LTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + + !**** *LTDIR_CTL* - Control routine for direct Legendre transform + + ! Purpose. + ! -------- + ! Direct Legendre transform + + !** Interface. + ! ---------- + ! CALL LTDIR_CTL(...) + + ! Explicit arguments : + ! -------------------- + ! KF_FS - number of fields in Fourier space + ! KF_UV - local number of spectral u-v fields + ! KF_SCALARS - local number of scalar spectral fields + ! PSPVOR(:,:) - spectral vorticity (output) + ! PSPDIV(:,:) - spectral divergence (output) + ! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + ! KFLDPTRUV(:) - field pointer for vorticity and divergence (input) + ! KFLDPTRSC(:) - field pointer for scalarvalued fields (input) + + ! ------------------------------------------------------------------ + + USE PARKIND1 ,ONLY : JPIM ,JPRB + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DIM ,ONLY : R + USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN + USE TPM_DISTR ,ONLY : D + USE TPM_GEOMETRY ,ONLY : G + USE TPM_FIELDS ,ONLY : F + + + USE LTDIR_MOD ,ONLY : LTDIR + USE TRLTOM_MOD ,ONLY : TRLTOM, TRLTOM_CUDAAWARE + + USE TPM_FIELDS ,ONLY : ZSIA,ZAIA,ZOA1,ZEPSNM + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 + +#ifdef ACCGPU + !$ACC DATA PRESENT(FOUBUF_IN) CREATE(FOUBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(ALLOC:FOUBUF_IN,FOUBUF) +#endif + + ! Transposition from Fourier space distribution to spectral space distribution + ! requires currently both on the host !!! + + IBLEN = D%NLENGT0B*2*KF_FS + CALL GSTATS(153,0) +#ifdef USE_CUDA_AWARE_MPI_FT + !WRITE(NOUT,*) 'ltdir_ctl:TRLTOM_CUDAAWARE' + CALL TRLTOM_CUDAAWARE(FOUBUF_IN,FOUBUF,2*KF_FS) +#else + CALL TRLTOM(FOUBUF_IN,FOUBUF,2*KF_FS) +#ifdef ACCGPU + !$ACC UPDATE DEVICE(FOUBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE TO(FOUBUF) +#endif +#endif + CALL GSTATS(153,1) + + ! Direct Legendre transform + + CALL GSTATS(103,0) + ILED2 = 2*KF_FS + CALL GSTATS(1645,0) + IF(KF_FS>0) THEN + + CALL LTDIR(KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + ENDIF +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + CALL GSTATS(1645,1) + + CALL GSTATS(103,1) + + ! ----------------------------------------------------------------- + + END SUBROUTINE LTDIR_CTL + END MODULE LTDIR_CTL_MOD diff --git a/src/trans/gpu/internal/ltdir_ctlad_mod.F90 b/src/trans/gpu/internal/ltdir_ctlad_mod.F90 new file mode 100755 index 0000000..28b7746 --- /dev/null +++ b/src/trans/gpu/internal/ltdir_ctlad_mod.F90 @@ -0,0 +1,110 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE LTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + +!**** *LTDIR_CTLAD* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL LTDIR_CTLAD(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE LTDIRAD_MOD ,ONLY : LTDIRAD +USE TRMTOL_MOD ,ONLY : TRMTOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 + +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +CALL GSTATS(105,0) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1646,0) +IF(KF_FS > 0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTDIRAD(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1646,1) + +CALL GSTATS(105,1) + +CALL GSTATS(181,0) +CALL TRMTOL(FOUBUF,FOUBUF_IN,2*KF_FS) +CALL GSTATS(181,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +! ------------------------------------------------------------------ + +END SUBROUTINE LTDIR_CTLAD +END MODULE LTDIR_CTLAD_MOD diff --git a/src/trans/gpu/internal/ltdir_mod.F90 b/src/trans/gpu/internal/ltdir_mod.F90 new file mode 100755 index 0000000..5e4995d --- /dev/null +++ b/src/trans/gpu/internal/ltdir_mod.F90 @@ -0,0 +1,210 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_MOD + CONTAINS + SUBROUTINE LTDIR(KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + + + USE PARKIND1 ,ONLY : JPIM ,JPRB + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE TPM_DIM ,ONLY : R + USE TPM_DISTR ,ONLY : D, myproc + USE TPM_GEOMETRY + + USE PREPSNM_MOD ,ONLY : PREPSNM + USE PRFI2B_MOD ,ONLY : PRFI2B + USE LDFOU2_MOD ,ONLY : LDFOU2 + USE LEDIR_MOD ,ONLY : LEDIR + USE UVTVD_MOD + USE UPDSP_MOD ,ONLY : UPDSP + + USE TPM_FIELDS ,ONLY : ZAIA,ZOA1,ZOA2,ZEPSNM + + !**** *LTDIR* - Control of Direct Legendre transform step + + ! Purpose. + ! -------- + ! Tranform from Fourier space to spectral space, compute + ! vorticity and divergence. + + !** Interface. + ! ---------- + ! *CALL* *LTDIR(...)* + + ! Explicit arguments : + ! -------------------- KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + + ! Implicit arguments : None + ! -------------------- + + ! Method. + ! ------- + + ! Externals. + ! ---------- + ! PREPSNM - prepare REPSNM for wavenumber KM + ! PRFI2 - prepares the Fourier work arrays for model variables. + ! LDFOU2 - computations in Fourier space + ! LEDIR - direct Legendre transform + ! UVTVD - + ! UPDSP - updating of spectral arrays (fields) + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 87-11-24 + ! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite + ! for uv formulation + ! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies + ! Modified 93-11-18 M. Hamrud - use only one Fourier buffer + ! Modified 94-04-06 R. El khatib Full-POS implementation + ! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div + ! instead of u,v->vor,div + ! MPP Group : 95-10-01 Support for Distributed Memory version + ! K. YESSAD (AUGUST 1996): + ! - Legendre transforms for transmission coefficients. + ! Modified : 04/06/99 D.Salmond : change order of AIA and SIA + ! R. El Khatib 12-Jul-2012 LDSPC2 replaced by UVTVD + ! ------------------------------------------------------------------ + + IMPLICIT NONE + + INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart + END INTERFACE + + INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop + END INTERFACE + + + + ! DUMMY INTEGER SCALARS + INTEGER(KIND=JPIM) :: KM + INTEGER(KIND=JPIM) :: KMLOC + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IFC, IIFC, IDGLU + INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + + !call cudaProfilerStart + + ! ------------------------------------------------------------------ + IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',0,ZHOOK_HANDLE) + + ! ------------------------------------------------------------------ + + !* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM + ! -------------------------------------- + + + ! ------------------------------------------------------------------ + + !* 2. PREPARE WORK ARRAYS. + ! -------------------- + + ! serial to save memory, Nils + + ! anti-symmetric + + CALL PRFI2B(KF_FS,ZAIA,-1) + CALL LDFOU2(KF_UV,ZAIA) + CALL LEDIR(KF_FS,KLED2,ZAIA,ZOA1,-1) + + ! symmetric + + CALL PRFI2B(KF_FS,ZAIA,1) + CALL LDFOU2(KF_UV,ZAIA) + CALL LEDIR(KF_FS,KLED2,ZAIA,ZOA1,1) + + ! ------------------------------------------------------------------ + + !* 5. COMPUTE VORTICITY AND DIVERGENCE. + ! --------------------------------- + + IF( KF_UV > 0 ) THEN + !stop 'Error: code path not (yet) supported in GPU version' + + !!CALL PREPSNM + + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL UVTVD(KF_UV) + ! CALL UVTVD(KF_UV,ZEPSNM,ZOA1(IUS:IUE,:,:),ZOA1(IVS:IVE,:,:),& +! & ZOA2(IVORS:IVORE,:,:),ZOA2(IDIVS:IDIVE,:,:)) + ENDIF + ! ------------------------------------------------------------------ + + !* 6. UPDATE SPECTRAL ARRAYS. + ! ----------------------- + + !end loop over wavenumber + + !END DO + + !loop over wavenumber + !DO KMLOC=1,D%NUMP + ! KM = D%MYMS(KMLOC) + + ! this is on the host, so need to cp from device, Nils + CALL UPDSP(KF_UV,KF_SCALARS,ZOA1,ZOA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + ! ------------------------------------------------------------------ + + IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',1,ZHOOK_HANDLE) + + !end loop over wavenumber + !END DO + + + !call cudaProfilerStop + END SUBROUTINE LTDIR + END MODULE LTDIR_MOD diff --git a/src/trans/gpu/internal/ltdirad_mod.F90 b/src/trans/gpu/internal/ltdirad_mod.F90 new file mode 100755 index 0000000..52ae821 --- /dev/null +++ b/src/trans/gpu/internal/ltdirad_mod.F90 @@ -0,0 +1,189 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIRAD_MOD +CONTAINS +SUBROUTINE LTDIRAD(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY + +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI2AD_MOD ,ONLY : PRFI2AD +USE LDFOU2AD_MOD ,ONLY : LDFOU2AD +USE LEDIRAD_MOD ,ONLY : LEDIRAD +USE UVTVDAD_MOD +USE UPDSPAD_MOD ,ONLY : UPDSPAD + + +!**** *LTDIRAD* - Control of Direct Legendre transform step - adjoint + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *LTDIRAD(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI2AD - prepares the Fourier work arrays for model variables. +! LDFOU2AD - computations in Fourier space +! LEDIRAD - direct Legendre transform +! UVTVDAD - +! UPDSPAD - updating of spectral arrays (fields) + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! R. El Khatib 12-Jul-2012 LDSPC2AD replaced by UVTVDAD +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFC, IIFC, IDGLU +INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE + +! LOCAL REALS +REAL(KIND=JPRBT) :: ZSIA(KLED2,R%NDGNH), ZAIA(KLED2,R%NDGNH) +REAL(KIND=JPRBT) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRBT) :: ZOA1(R%NLED4,KLED2), ZOA2(R%NLED4,MAX(4*KF_UV,1)) + + +! ------------------------------------------------------------------ + +!* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM +! -------------------------------------- + + + + +! ------------------------------------------------------------------ + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL UPDSPAD(KM,KF_UV,KF_SCALARS,ZOA1,ZOA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +! ------------------------------------------------------------------ + +!* 5. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- + +IF( KF_UV > 0 ) THEN + stop 'Error: code path not (yet) supported in GPU version' + !CALL PREPSNM(KM,KMLOC,ZEPSNM) + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV +! SET PART OF ZOA1 CONTAINING U AND V TO 0. + ZOA1(:,IUS:IVE) = 0.0_JPRB + CALL UVTVDAD(KM,KF_UV,ZEPSNM,ZOA1(:,IUS:IUE),ZOA1(:,IVS:IVE),& + & ZOA2(:,IVORS:IVORE),ZOA2(:,IDIVS:IDIVE)) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. DIRECT LEGENDRE TRANSFORM. +! -------------------------- +IFC = 2*KF_FS +IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +IIFC = IFC +IF(KM == 0)THEN + IIFC = IFC/2 +ENDIF +CALL LEDIRAD(KM,KMLOC,IFC,IIFC,IDGLU,KLED2,ZAIA,ZSIA,ZOA1) + +! ------------------------------------------------------------------ + +!* 3. FOURIER SPACE COMPUTATIONS. +! --------------------------- + +CALL LDFOU2AD(KM,KF_UV,ZAIA,ZSIA) + +! ------------------------------------------------------------------ + +!* 2. PREPARE WORK ARRAYS. +! -------------------- + +CALL PRFI2AD(KM,KMLOC,KF_FS,ZAIA,ZSIA) + + +! ------------------------------------------------------------------ + +END SUBROUTINE LTDIRAD +END MODULE LTDIRAD_MOD + diff --git a/src/trans/gpu/internal/ltinv_ctl_mod.F90 b/src/trans/gpu/internal/ltinv_ctl_mod.F90 new file mode 100755 index 0000000..eefe9a5 --- /dev/null +++ b/src/trans/gpu/internal/ltinv_ctl_mod.F90 @@ -0,0 +1,141 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_CTL_MOD + CONTAINS + SUBROUTINE LTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + + !**** *LTINV_CTL* - Control routine for inverse Legandre transform. + + ! Purpose. + ! -------- + ! Control routine for the inverse LEGENDRE transform + + !** Interface. + ! ---------- + ! CALL INV_TRANS_CTL(...) + ! KF_OUT_LT - number of fields coming out from inverse LT + ! KF_UV - local number of spectral u-v fields + ! KF_SCALARS - local number of scalar spectral fields + ! KF_SCDERS - local number of derivatives of scalar spectral fields + ! PSPVOR(:,:) - spectral vorticity (input) + ! PSPDIV(:,:) - spectral divergence (input) + ! PSPSCALAR(:,:) - spectral scalarvalued fields (input) + ! KFLDPTRUV(:) - field pointer array for vor./div. + ! KFLDPTRSC(:) - field pointer array for PSPSCALAR + ! FSPGL_PROC - external procedure to be executed in fourier space + ! before transposition + + ! Method. + ! ------- + + ! Externals. + ! ---------- + ! + + ! Author. + ! ------- + ! Mats Hamrud *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 00-06-03 + + ! ------------------------------------------------------------------ + + USE PARKIND1 ,ONLY : JPIM ,JPRB + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DIM ,ONLY : R + USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN + USE TPM_DISTR ,ONLY : D + USE TPM_GEOMETRY ,ONLY : G + + USE TPM_FLT + + USE LTINV_MOD ,ONLY : LTINV + USE TRMTOL_MOD ,ONLY : TRMTOL, TRMTOL_CUDAAWARE + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + EXTERNAL FSPGL_PROC + OPTIONAL FSPGL_PROC + + INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1, i, j + +#ifdef ACCGPU + !$ACC DATA CREATE(FOUBUF_IN) PRESENT(FOUBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(ALLOC:FOUBUF_IN,FOUBUF) +#endif + + CALL GSTATS(102,0) + ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS + IDIM1 = 2*KF_OUT_LT + IBLEN = D%NLENGT0B*2*KF_OUT_LT + + IF(KF_OUT_LT > 0) THEN + CALL GSTATS(1647,0) + + ! from PSPXXX to FOUBUF_IN + CALL LTINV(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + + CALL GSTATS(1647,1) + ENDIF + CALL GSTATS(102,1) + + CALL GSTATS(152,0) + ! from FOUBUF_IN to FOUBUF +#ifdef USE_CUDA_AWARE_MPI_FT + !WRITE(NOUT,*) 'ltinv_ctl:TRMTOL_CUDAAWARE' + CALL TRMTOL_CUDAAWARE(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +#else +#ifdef ACCGPU + !$ACC UPDATE HOST(FOUBUF_IN) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(FOUBUF_IN) +#endif + CALL TRMTOL(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +#ifdef ACCGPU + !$ACC UPDATE DEVICE(FOUBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE TO(FOUBUF) +#endif +#endif + CALL GSTATS(152,1) +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif + + ! ------------------------------------------------------------------ + + END SUBROUTINE LTINV_CTL + END MODULE LTINV_CTL_MOD diff --git a/src/trans/gpu/internal/ltinv_ctlad_mod.F90 b/src/trans/gpu/internal/ltinv_ctlad_mod.F90 new file mode 100755 index 0000000..81ec805 --- /dev/null +++ b/src/trans/gpu/internal/ltinv_ctlad_mod.F90 @@ -0,0 +1,119 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_CTLAD_MOD +CONTAINS +SUBROUTINE LTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + +!**** *LTINV_CTLAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D +USE LTINVAD_MOD ,ONLY : LTINVAD +USE TRLTOM_MOD ,ONLY : TRLTOM + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1 + +! ------------------------------------------------------------------ + +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) +ENDIF +CALL GSTATS(180,0) +CALL TRLTOM(FOUBUF,FOUBUF_IN,2*KF_OUT_LT) +CALL GSTATS(180,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) + +CALL GSTATS(104,0) +CALL GSTATS(1648,0) +IF(KF_OUT_LT > 0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL LTINVAD(IM,JM,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1648,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +CALL GSTATS(104,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE LTINV_CTLAD +END MODULE LTINV_CTLAD_MOD diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 new file mode 100755 index 0000000..67e5088 --- /dev/null +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -0,0 +1,368 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_MOD + CONTAINS + SUBROUTINE LTINV(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + + USE PARKIND1 ,ONLY : JPIM ,JPRB + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE TPM_DIM ,ONLY : R + USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B, foubuf_in + USE TPM_FLT + USE TPM_GEOMETRY + USE TPM_DISTR ,ONLY : D + USE TPM_GEN ,ONLY : NOUT + !USE PRLE1_MOD + USE PREPSNM_MOD ,ONLY : PREPSNM + USE PRFI1B_MOD ,ONLY : PRFI1B + USE VDTUV_MOD ,ONLY : VDTUV + USE SPNSDE_MOD ,ONLY : SPNSDE + USE LEINV_MOD ,ONLY : LEINV + USE ASRE1B_MOD ,ONLY : ASRE1B + USE FSPGL_INT_MOD ,ONLY : FSPGL_INT + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + USE IEEE_ARITHMETIC + !USE TPM_FIELDS ,ONLY : F,ZIA,ZSOA1,ZAOA1,ISTAN,ISTAS,ZEPSNM + USE TPM_FIELDS ,ONLY : F,ZIA,ZSOA1,ZAOA1,ZEPSNM + + + !**** *LTINV* - Inverse Legendre transform + ! + ! Purpose. + ! -------- + ! Tranform from Laplace space to Fourier space, compute U and V + ! and north/south derivatives of state variables. + + !** Interface. + ! ---------- + ! *CALL* *LTINV(...) + + ! Explicit arguments : + ! -------------------- + ! KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + ! PSPVOR - spectral vorticity + ! PSPDIV - spectral divergence + ! PSPSCALAR - spectral scalar variables + + ! Implicit arguments : The Laplace arrays of the model. + ! -------------------- The values of the Legendre polynomials + ! The grid point arrays of the model + ! Method. + ! ------- + + ! Externals. + ! ---------- + + ! PREPSNM - prepare REPSNM for wavenumber KM + ! PRFI1B - prepares the spectral fields + ! VDTUV - compute u and v from vorticity and divergence + ! SPNSDE - compute north-south derivatives + ! LEINV - Inverse Legendre transform + ! ASRE1 - recombination of symmetric/antisymmetric part + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + ! Temperton, 1991, MWR 119 p1303 + + ! Author. + ! ------- + ! Mats Hamrud *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 00-02-01 From LTINV in IFS CY22R1 + ! ------------------------------------------------------------------ + + IMPLICIT NONE + + + INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart + END INTERFACE + + INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop + END INTERFACE + + + INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT + INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV + INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS + INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS + INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + + EXTERNAL FSPGL_PROC + OPTIONAL FSPGL_PROC + + !REAL(KIND=JPRBT) :: ZEPSNM(d%nump,0:R%NTMAX+2) + + INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU + INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU + INTEGER(KIND=JPIM) :: IFIRST, ILAST, IDIM1,IDIM2,IDIM3,J3 + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + !CHARACTER(LEN=10) :: CLHOOK + + + INTEGER(KIND=JPIM) :: KM + INTEGER(KIND=JPIM) :: KMLOC + + + !call cudaProfilerStart + + + ! ------------------------------------------------------------------ + + !* 1. PERFORM LEGENDRE TRANFORM. + ! -------------------------- + + !WRITE(CLHOOK,FMT='(A,I4.4)') 'LTINV_',KM + IF (LHOOK) CALL DR_HOOK('LTINV_MOD',0,ZHOOK_HANDLE) + + ! ------------------------------------------------------------------ + + + !* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. + ! ---------------------------------------------- + + IFIRST = 1 + ILAST = 0 + + !* 1. PREPARE ZEPSNM. + ! --------------- + + !IF ( KF_UV > 0 .OR. KF_SCDERS > 0 ) THEN + ! CALL PREPSNM(ZEPSNM) + ! !$ACC update host(ZEPSNM) + ! !$OMP TARGET UPDATE FROM(ZEPSNM) + !ENDIF + +! COPY FROM PSPXXXX TO ZIA + + IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + + IDIM2=UBOUND(PSPVOR,2) + CALL GSTATS(431,0) +#ifdef ACCGPU + !$ACC DATA COPYIN(PSPVOR,PSPDIV) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:PSPVOR,PSPDIV) +#endif + CALL GSTATS(431,1) + CALL PRFI1B(IVORL,PSPVOR,KF_UV,IDIM2,KFLDPTRUV) + CALL PRFI1B(IDIVL,PSPDIV,KF_UV,IDIM2,KFLDPTRUV) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + ! ------------------------------------------------------------------ + + !CALL VDTUV(KF_UV,ZEPSNM,ZIA(IVORL:IVORU,:,:),ZIA(IDIVL:IDIVU,:,:),& + ! & ZIA(IUL:IUU,:,:),ZIA(IVL:IVU,:,:)) + CALL VDTUV(KF_UV) + ILAST = ILAST+8*KF_UV + + ENDIF + + IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + + IDIM2=UBOUND(PSPSCALAR,2) + CALL GSTATS(431,0) +#ifdef ACCGPU + !$ACC DATA COPYIN(PSPSCALAR) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:PSPSCALAR) +#endif + CALL GSTATS(431,1) + CALL PRFI1B(IFIRST,PSPSCALAR(:,:),KF_SCALARS,IDIM2,KFLDPTRSC) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + IDIM2=UBOUND(PSPSC2,2) + CALL GSTATS(431,0) +#ifdef ACCGPU + !$ACC DATA COPYIN(PSPSC2) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:PSPSC2) +#endif + CALL GSTATS(431,1) + !CALL PRFI1B(ZIA(IFIRST:ILAST,:,:),PSPSC2(:,:),NF_SC2,IDIM2) + CALL PRFI1B(IFIRST,PSPSC2(:,:),NF_SC2,IDIM2) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1*IDIM3 + IDIM2=UBOUND(PSPSC3A,2) + CALL GSTATS(431,0) +#ifdef ACCGPU + !$ACC DATA COPYIN(PSPSC3A) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:PSPSC3A) +#endif + CALL GSTATS(431,1) + DO J3=1,IDIM3 + CALL PRFI1B(IFIRST,PSPSC3A(:,:,J3),IDIM1,IDIM2) + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + IDIM2=UBOUND(PSPSC3B,2) + CALL GSTATS(431,0) +#ifdef ACCGPU + !$ACC DATA COPYIN(PSPSC3B) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:PSPSC3B) +#endif + CALL GSTATS(431,1) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + + CALL PRFI1B(IFIRST,PSPSC3B(:,:,J3),IDIM1,IDIM2) + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + ENDIF + ENDIF + + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF + ENDIF + + IF (KF_SCDERS > 0) THEN + ! stop 'Error: code path not (yet) supported in GPU version' + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + !CALL SPNSDE(KF_SCALARS,ZEPSNM,ZIA(ISL:ISU,:,:),ZIA(IDL:IDU,:,:)) + CALL SPNSDE(ISL,IDL,KF_SCALARS,ZEPSNM) + ENDIF + + ! ------------------------------------------------------------------ + + + !* 4. INVERSE LEGENDRE TRANSFORM. + ! --------------------------- + + ! FROM ZIA TO ZAOA1 and ZSOA1 + + ISTA = 1 + IFC = 2*KF_OUT_LT + IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV + ENDIF + IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV + ENDIF + + IF( KF_OUT_LT > 0 ) THEN + !call cudaProfilerStart + !CALL LEINV(IFC,ISTA,KF_OUT_LT,ZIA(ISTA:ISTA+IFC-1,:,:),ZAOA1,ZSOA1) + CALL LEINV(IFC,ISTA,KF_OUT_LT,ZAOA1,ZSOA1) + !call cudaProfilerStop + + ! ------------------------------------------------------------------ + + !* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. + ! -------------------------------------------- + + !FROM ZAOA1/ZSOA to FOUBUF_IN + + CALL ASRE1B(KF_OUT_LT,ZAOA1,ZSOA1) + ! ------------------------------------------------------------------ + + ! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + + IF(PRESENT(FSPGL_PROC)) THEN + stop 'Error: SPGL_PROC is not (yet) optimized in GPU version' + CALL FSPGL_INT(KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& + & KFLDPTRUV,KFLDPTRSC) + ENDIF + + ENDIF + IF (LHOOK) CALL DR_HOOK('LTINV_MOD',1,ZHOOK_HANDLE) + ! ------------------------------------------------------------------ + + !call cudaProfilerStop + + END SUBROUTINE LTINV +END MODULE LTINV_MOD diff --git a/src/trans/gpu/internal/ltinvad_mod.F90 b/src/trans/gpu/internal/ltinvad_mod.F90 new file mode 100755 index 0000000..ed9c75f --- /dev/null +++ b/src/trans/gpu/internal/ltinvad_mod.F90 @@ -0,0 +1,236 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINVAD_MOD +CONTAINS +SUBROUTINE LTINVAD(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPM_GEOMETRY + +!USE PRLE1AD_MOD +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI1BAD_MOD ,ONLY : PRFI1BAD +USE VDTUVAD_MOD ,ONLY : VDTUVAD +USE SPNSDEAD_MOD ,ONLY : SPNSDEAD +USE LEINVAD_MOD ,ONLY : LEINVAD +USE ASRE1BAD_MOD ,ONLY : ASRE1BAD +!USE FSPGL_INT_MOD + + +!**** *LTINVAD* - Inverse Legendre transform + +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINVAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- +! PRLE1AD - prepares the Legendre polonymials +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1AD - prepares the spectral fields +! VDTUVAD - compute u and v from vorticity and divergence +! SPNSDEAD- compute north-south derivatives +! LEINVAD - Inverse Legendre transform +! ASRE1AD - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINVAD in IFS CY22R1 +! ------------------------------------------------------------------ + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(R%NLEI1,KLEI2) +REAL(KIND=JPRBT) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRBT) :: ZSOA1(KDIM1,R%NLEI3),ZAOA1(KDIM1,R%NLEI3) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: ILAST,IFIRST,IDIM1,IDIM3,J3 + +! LOCAL LOGICAL SCALARS + +! LOCAL REAL SCALARS + +! ------------------------------------------------------------------ + +!* 1. PREPARE AND ZEPSNM. +! ------------------- + +stop 'Error: code path not (yet) supported in GPU version' +!CALL PREPSNM(KM,KMLOC,ZEPSNM) + +! ------------------------------------------------------------------ +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +!IF(PRESENT(FSPGL_PROC)) THEN +! CALL FSPGL_INT(KM,KMLOC,FSPGL_PROC) +!ENDIF + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + +CALL ASRE1BAD(KF_OUT_LT,KM,KMLOC,ZAOA1,ZSOA1) + +! ------------------------------------------------------------------ + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +ZIA(:,ISTA:ISTA+IFC-1) = 0.0_JPRBT + +IDGLU = MIN(R%NDGNH,G%NDGLU(KM)) +IIFC=IFC +IF(KM == 0)THEN + IIFC=IFC/2 +ENDIF +CALL LEINVAD(KM,KMLOC,IFC,IIFC,KF_OUT_LT,IDGLU,ZIA(:,ISTA:ISTA+IFC-1),ZAOA1,ZSOA1) + +! ------------------------------------------------------------------ + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +ZIA(:,1:ISTA-1) = 0.0_JPRBT + +IFIRST = 1 +ILAST = 4*KF_UV +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL VDTUVAD(KM,KF_UV,ZEPSNM,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU)) + CALL PRFI1BAD(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL PRFI1BAD(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL SPNSDEAD(KM,KF_SCALARS,ZEPSNM,ZIA(:,ISL:ISU),ZIA(:,IDL:IDU)) +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL PRFI1BAD(KM,ZIA(:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF +ENDIF + + +! ------------------------------------------------------------------ + + +END SUBROUTINE LTINVAD +END MODULE LTINVAD_MOD diff --git a/src/trans/gpu/internal/myrecvset_mod.F90 b/src/trans/gpu/internal/myrecvset_mod.F90 new file mode 100755 index 0000000..093323f --- /dev/null +++ b/src/trans/gpu/internal/myrecvset_mod.F90 @@ -0,0 +1,83 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE MYRECVSET_MOD +CONTAINS +FUNCTION MYRECVSET(KSETS,KMYSET,KSET) + + +!**** *MYRECVSET* RETURNS SET NUMBER TO SEND TO + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! ISENDSET = MYRECVSET(KSETS,KMYSET,KSET) + +! Explicit arguments : +! -------------------- +! input: KSETS + +! Implicit arguments : NONE +! -------------------- +! Method. +! ------- + +! + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE +INTEGER(KIND=JPIM) :: MYRECVSET +INTEGER(KIND=JPIM),INTENT(IN) :: KSETS,KMYSET,KSET + + +! ------------------------------------------------------------------ + +!* 1. Check input argument for validity +! --------------------------------- + +IF(KSETS < 1 .OR. KMYSET > KSETS .OR. KSET > KSETS-1) THEN + + CALL ABORT_TRANS(' MYRECVSET: INVALID ARGUMENT ') + +ELSE + +!* 2. Compute output parameters +! ------------------------- + + MYRECVSET = MOD(-KSET-1+KMYSET+KSETS,KSETS)+1 + +ENDIF + +END FUNCTION MYRECVSET +END MODULE MYRECVSET_MOD diff --git a/src/trans/gpu/internal/mysendset_mod.F90 b/src/trans/gpu/internal/mysendset_mod.F90 new file mode 100755 index 0000000..636025e --- /dev/null +++ b/src/trans/gpu/internal/mysendset_mod.F90 @@ -0,0 +1,80 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE MYSENDSET_MOD +CONTAINS +FUNCTION MYSENDSET(KSETS,KMYSET,KSET) + + +!**** *MYSENDSET* RETURNS SET NUMBER TO SEND TO + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! ISENDSET = MYSENDSET(KSETS,KMYSET,KSET) + +! Explicit arguments : +! -------------------- +! input: KSETS + +! Implicit arguments : NONE +! -------------------- +! Method. +! ------- + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE +INTEGER(KIND=JPIM) :: MYSENDSET +INTEGER(KIND=JPIM),INTENT(IN) :: KSETS,KMYSET,KSET + + +! ------------------------------------------------------------------ + +!* 1. Check input argument for validity +! --------------------------------- + +IF(KSETS < 1 .OR. KMYSET > KSETS .OR. KSET > KSETS-1) THEN + + CALL ABORT_TRANS(' MYSENDSET: INVALID ARGUMENT ') + +ELSE + +!* 2. Compute output parameters +! ------------------------- + + MYSENDSET = MOD(KMYSET+KSET-1,KSETS)+1 + +ENDIF + +END FUNCTION MYSENDSET +END MODULE MYSENDSET_MOD diff --git a/src/trans/gpu/internal/parkind_ectrans.F90 b/src/trans/gpu/internal/parkind_ectrans.F90 new file mode 100644 index 0000000..d2d8803 --- /dev/null +++ b/src/trans/gpu/internal/parkind_ectrans.F90 @@ -0,0 +1,38 @@ +! (C) Copyright 2021- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PARKIND_ECTRANS +! +! Re-export precision-related symbols defined in fiat / parkind1, +! and add ECTRANS-internal precision-related symbols + +USE PARKIND1 +! +IMPLICIT NONE +SAVE +! +! Real Kind of compile-time precision for internal trans use +! ---------------------------------------------------------- +! +#ifdef PARKINDTRANS_SINGLE +INTEGER, PARAMETER :: JPRBT = SELECTED_REAL_KIND(6,37) +#else +INTEGER, PARAMETER :: JPRBT = SELECTED_REAL_KIND(13,300) +#endif + + +! +! Half precision +! -------------- + +!!INTEGER, PARAMETER :: JPRL = 2 + + + +END MODULE PARKIND_ECTRANS diff --git a/src/trans/gpu/internal/pe2set_mod.F90 b/src/trans/gpu/internal/pe2set_mod.F90 new file mode 100755 index 0000000..9a8ce8d --- /dev/null +++ b/src/trans/gpu/internal/pe2set_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 1998- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PE2SET_MOD +CONTAINS +SUBROUTINE PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) + + +!**** *PE2SET* - Convert from PE number to set numbers + +! Purpose. +! -------- +! Convert from PE number to set numbers in both +! grid-point space and spectral space + +!** Interface. +! ---------- +! *CALL* *PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) + +! Explicit arguments : +! -------------------- +! input: KPE - integer processor number +! in the range 1 .. NPROC +! output: KPRGPNS - integer A set number in grid space +! in the range 1 .. NPRGPNS +! KPRGPEW - integer B set number in grid space +! in the range 1 .. NPRGPEW +! KPRTRW - integer A set number in spectral space +! in the range 1 .. NPRTRW +! KPRTRV - integer B set number in spectral space +! in the range 1 .. NPRTRV + +! Implicit arguments : YOMMP parameters +! NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NPROC + +! -------------------- +! Method. +! ------- + +! PE allocation order is row oriented (e.g. NPRGPNS or NPRTRW = 4): + +! 1 2 3 4 +! 5 6 7 8 +! 9 10 11 12 +! 13 14 15 16 +! . . . . + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! David Dent *ECMWF* + +! Modifications. +! -------------- +! Original : 98-08-19 +! Revision : 98-10-13 row ordering +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW, NPROC, NPRTRV +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE +INTEGER(KIND=JPIM),INTENT(IN) :: KPE +INTEGER(KIND=JPIM),INTENT(OUT) :: KPRGPNS,KPRGPEW,KPRTRW,KPRTRV + +INTEGER(KIND=JPIM) :: IPE,JA +! ------------------------------------------------------------------ + +!* 1. Check input argument for validity +! --------------------------------- + +IF(KPE <= 0.OR.KPE > NPROC) THEN + WRITE(*,'(A,2I8)') ' PE2SET INVALID ARGUMENT ',KPE,NPROC + CALL ABORT_TRANS(' PE2SET INVALID ARGUMENT ') + +ELSE + +!* 2. Compute output parameters +! ------------------------- + + IF( LEQ_REGIONS )THEN + KPRGPNS=1 + IPE=KPE + DO JA=1,N_REGIONS_NS + IF( IPE > N_REGIONS(JA) )THEN + IPE=IPE-N_REGIONS(JA) + KPRGPNS=KPRGPNS+1 + CYCLE + ENDIF + KPRGPEW=IPE + EXIT + ENDDO + ELSE + KPRGPEW=MOD(KPE-1,NPRGPEW)+1 + KPRGPNS=(KPE-1)/NPRGPEW+1 + ENDIF + KPRTRV =MOD(KPE-1,NPRTRV)+1 + KPRTRW =(KPE-1)/NPRTRV+1 + +ENDIF + +END SUBROUTINE PE2SET +END MODULE PE2SET_MOD diff --git a/src/trans/gpu/internal/pre_suleg_mod.F90 b/src/trans/gpu/internal/pre_suleg_mod.F90 new file mode 100755 index 0000000..d7f519e --- /dev/null +++ b/src/trans/gpu/internal/pre_suleg_mod.F90 @@ -0,0 +1,71 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRE_SULEG_MOD +CONTAINS +SUBROUTINE PRE_SULEG +USE PARKIND1 ,ONLY : JPRD, JPIM +USE PARKIND2 ,ONLY : JPRH +USE TPM_GEN +USE TPM_DIM +USE TPM_CONSTANTS +USE TPM_DISTR +USE TPM_FIELDS + +INTEGER(KIND=JPIM) :: INM, IM, ICOUNT,JMLOC,JN +LOGICAL :: LLP1,LLP2 + + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +ICOUNT = 0 +DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + DO JN=IM,R%NTMAX+2 + ICOUNT = ICOUNT+1 + ENDDO +ENDDO + +ALLOCATE(F%REPSNM(ICOUNT)) +IF (LLP2) WRITE(NOUT,9) 'F%REPSNM ',SIZE(F%REPSNM ),SHAPE(F%REPSNM ) +ALLOCATE(F%RN(-1:R%NTMAX+3)) +IF (LLP2) WRITE(NOUT,9) 'F%RN ',SIZE(F%RN ),SHAPE(F%RN ) +ALLOCATE(F%RLAPIN(-1:R%NSMAX+2)) +IF (LLP2) WRITE(NOUT,9) 'F%RLAPIN ',SIZE(F%RLAPIN ),SHAPE(F%RLAPIN ) +ALLOCATE(F%NLTN(-1:R%NTMAX+3)) +IF (LLP2) WRITE(NOUT,9) 'F%NLTN ',SIZE(F%NLTN ),SHAPE(F%NLTN ) + +ICOUNT = 0 +DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + DO JN=IM,R%NTMAX+2 + ICOUNT = ICOUNT+1 + F%REPSNM(ICOUNT) = SQRT(REAL(JN*JN-IM*IM,JPRD)/& + &REAL(4*JN*JN-1,JPRD)) + ENDDO +ENDDO + +DO JN=-1,R%NTMAX+3 + F%RN(JN) = REAL(JN,JPRD) + F%NLTN(JN) = R%NTMAX+2-JN +ENDDO +F%RLAPIN(:) = 0.0_JPRD +F%RLAPIN(0) = 0.0_JPRD +F%RLAPIN(-1) = 0.0_JPRD +DO JN=1,R%NSMAX+2 + F%RLAPIN(JN)=-(REAL(RA,JPRD)*REAL(RA,JPRD)/REAL(JN*(JN+1),JPRD)) +ENDDO + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE PRE_SULEG +END MODULE PRE_SULEG_MOD diff --git a/src/trans/gpu/internal/prepsnm_mod.F90 b/src/trans/gpu/internal/prepsnm_mod.F90 new file mode 100755 index 0000000..8ac16f1 --- /dev/null +++ b/src/trans/gpu/internal/prepsnm_mod.F90 @@ -0,0 +1,105 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PREPSNM_MOD + CONTAINS + SUBROUTINE PREPSNM + + + !**** *PREPSNM* - Prepare REPSNM for wavenumber KM + + ! Purpose. + ! -------- + ! Copy the REPSNM values for specific zonal wavenumber M + ! to work array + + !** Interface. + ! ---------- + ! CALL PREPSNM(...) + + ! Explicit arguments : KM - zonal wavenumber + ! ------------------- KMLOC - local zonal wavenumber + ! PEPSNM - REPSNM for zonal + ! wavenumber KM + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + + + ! Reference. + ! ---------- + + + ! Author. + ! ------- + ! Lars Isaksen *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 00-02-01 From LTINV in IFS CY22R1 + + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + + USE TPM_DIM ,ONLY : R + USE TPM_FIELDS ,ONLY : F, ZEPSNM + USE TPM_DISTR ,ONLY : D + USE TPM_GEN ,ONLY : NOUT + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM) :: KM,KMLOC + !!REAL(KIND=JPRB), INTENT(INOUT) :: PEPSNM(:,:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: JN + INTEGER(KIND=JPIM) :: R_NTMAX + + + ! ------------------------------------------------------------------ + + !* 1. COPY REPSNM. + ! ------------ + + + + + !!!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + !!!$ACC parallel loop + DO KMLOC=1,D%NUMP + KM = D%MYMS(KMLOC) + + IF (KM > 0) THEN +!#ifdef ACCGPU +! !$ACC loop +!#endif + DO JN=0,KM-1 + ZEPSNM(KMLOC,JN) = 0.0_JPRBT + ENDDO + ENDIF + + DO JN=KM,R%NTMAX+2 + ZEPSNM(KMLOC,JN) = F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN) + ENDDO + ! end loop over wavenumber + END DO + !!!!$OMP END TARGET DATA + !!!!$ACC end data + + ! ------------------------------------------------------------------ + + END SUBROUTINE PREPSNM + + END MODULE PREPSNM_MOD diff --git a/src/trans/gpu/internal/prfi1_mod.F90 b/src/trans/gpu/internal/prfi1_mod.F90 new file mode 100755 index 0000000..ca07a7f --- /dev/null +++ b/src/trans/gpu/internal/prfi1_mod.F90 @@ -0,0 +1,114 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1_MOD +CONTAINS +SUBROUTINE PRFI1(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_DISTR +!USE TPM_TRANS + +USE PRFI1B_MOD ,ONLY : PRFI1B + + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1 in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR + +stop 'Error: prfi1 not (yet) supported in GPU version' + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +! IFIRST = 1 +! ILAST = 4*KF_UV + +! !* 1.1 VORTICITY AND DIVERGENCE. + +! IF(KF_UV > 0)THEN +! IVOR = 1 +! IDIV = 2*KF_UV+1 +! CALL PRFI1B(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) +! CALL PRFI1B(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) +! ILAST = ILAST+4*KF_UV +! ENDIF + +! !* 1.2 SCALAR VARIABLES. + +! IF(KF_SCALARS > 0)THEN +! IFIRST = ILAST+1 +! ILAST = IFIRST - 1 + 2*KF_SCALARS +! CALL PRFI1B(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +! ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1 +END MODULE PRFI1_MOD diff --git a/src/trans/gpu/internal/prfi1ad_mod.F90 b/src/trans/gpu/internal/prfi1ad_mod.F90 new file mode 100755 index 0000000..f2347bb --- /dev/null +++ b/src/trans/gpu/internal/prfi1ad_mod.F90 @@ -0,0 +1,111 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1AD_MOD +CONTAINS +SUBROUTINE PRFI1AD(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!USE TPM_DISTR +!USE TPM_TRANS + +USE PRFI1BAD_MOD ,ONLY : PRFI1BAD + + +!**** *PRFI1AD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1AD(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1AD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL PRFI1BAD(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL PRFI1BAD(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL PRFI1BAD(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1AD +END MODULE PRFI1AD_MOD diff --git a/src/trans/gpu/internal/prfi1b_mod.F90 b/src/trans/gpu/internal/prfi1b_mod.F90 new file mode 100755 index 0000000..7dcc02e --- /dev/null +++ b/src/trans/gpu/internal/prfi1b_mod.F90 @@ -0,0 +1,224 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1B_MOD + CONTAINS + SUBROUTINE PRFI1B(KFIRST,PSPEC,KFIELDS,KDIM,KFLDPTR) + + USE PARKIND1 ,ONLY : JPIM ,JPRB + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DIM ,ONLY : R,R_NSMAX + USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NASM0 + USE TPM_FIELDS ,ONLY : ZIA + USE IEEE_ARITHMETIC + + !**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + + ! Purpose. + ! -------- + ! To extract the spectral fields for a specific zonal wavenumber + ! and put them in an order suitable for the inverse Legendre . + ! tranforms.The ordering is from NSMAX to KM for better conditioning. + ! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing + ! u,v and derivatives in spectral space. + + !** Interface. + ! ---------- + ! *CALL* *PRFI1B(...)* + + ! Explicit arguments : KM - zonal wavenumber + ! ------------------ PIA - spectral components for transform + ! PSPEC - spectral array + ! KFIELDS - number of fields + + + ! Implicit arguments : None. + ! -------------------- + + ! Method. + ! ------- + + ! Externals. None. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 00-02-01 From PRFI1B in IFS CY22R1 + + ! ------------------------------------------------------------------ + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIRST + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS + INTEGER(KIND=JPIM) :: KM,KMLOC + REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) + !REAL(KIND=JPRB) ,INTENT(INOUT) :: PIA(:,:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KDIM + INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF,IFLD + + ! ------------------------------------------------------------------ + + !* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. + ! -------------------------------------------------- + +#ifdef ACCGPU + !$ACC DATA & + !$ACC PRESENT(D_NUMP,R_NSMAX,D_MYMS,D_NASM0) & + !$ACC COPYIN(PSPEC) & + !$ACC PRESENT(ZIA) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(PRESENT,ALLOC:D_NUMP,R_NSMAX,D_MYMS,D_NASM0,PSPEC) +#endif + +#ifdef ACCGPU + !$ACC DATA IF(PRESENT(KFLDPTR)) PRESENT(KFLDPTR) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(PRESENT(KFLDPTR)) MAP(PRESENT,ALLOC:KFLDPTR) +#endif + + + IF(PRESENT(KFLDPTR)) THEN + + + !loop over wavenumber +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ILCM,IFLD,IOFF,IR,II,INM) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ILCM,IFLD,IOFF,IR,II,INM) DEFAULT(NONE) & + !$ACC& COPYIN(KFIELDS,KFIRST) & + !$ACC& PRESENT(D_NUMP,D_MYMS,D_NASM0,R_NSMAX,KFLDPTR,ZIA) +#endif + DO KMLOC=1,D_NUMP + DO J=1,R_NSMAX+1 + DO JFLD=1,KFIELDS + KM = D_MYMS(KMLOC) + ILCM = R_NSMAX+1-KM + IFLD = KFLDPTR(JFLD) + IF (J .LE. ILCM) THEN + IOFF = D_NASM0(KM) + INM = IOFF+(ILCM-J)*2 + IR = KFIRST+2*(JFLD-1) + II = IR+1 + ZIA(IR,J+2,KMLOC) = PSPEC(IFLD,INM ) + ZIA(II,J+2,KMLOC) = PSPEC(IFLD,INM+1) + END IF + ENDDO + ENDDO + + ! end loop over wavenumber + END DO +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KM,ILCM) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,ILCM) & + !$ACC& COPYIN(KFIRST,KFIELDS) & + !$ACC& PRESENT(D_NUMP,D_MYMS,R_NSMAX,ZIA) +#endif + DO KMLOC=1,D_NUMP + DO JFLD=KFIRST,2*KFIELDS+KFIRST-1 + KM = D_MYMS(KMLOC) + ILCM = R_NSMAX+1-KM + ZIA(JFLD,1,KMLOC) = 0.0_JPRB + ZIA(JFLD,2,KMLOC) = 0.0_JPRB + ZIA(JFLD,ILCM+3,KMLOC) = 0.0_JPRB + ENDDO + ! end loop over wavenumber + END DO + + ELSE + + !loop over wavenumber + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ILCM,IOFF,INM,IR,II) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KMLOC,J,JFLD,KM,ILCM,IOFF,INM,IR,II) DEFAULT(NONE) & + !$ACC& COPYIN(KFIRST,KFIELDS,KDIM) & + !$ACC& PRESENT(ZIA,PSPEC,D_NUMP,D_MYMS,D_NASM0,R_NSMAX) +#endif + DO KMLOC=1,D_NUMP + DO J=1,R_NSMAX+1 + DO JFLD=1,KFIELDS + KM = D_MYMS(KMLOC) + ILCM = R_NSMAX+1-KM + if (J .le. ILCM) then + IOFF = D_NASM0(KM) + INM = IOFF+(ILCM-J)*2 + IR = KFIRST+2*(JFLD-1) + II = IR+1 + IF( INM .LT. KDIM ) THEN + ZIA(IR,J+2,KMLOC) = PSPEC(JFLD,INM ) + ZIA(II,J+2,KMLOC) = PSPEC(JFLD,INM+1) + ENDIF + end if + ENDDO + ENDDO + + ! end loop over wavenumber + END DO + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KM,ILCM) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,KMLOC,JFLD,ILCM) DEFAULT(NONE) & + !$ACC& COPYIN(KFIELDS,KFIRST) & + !$ACC& PRESENT(ZIA,D_NUMP,D_MYMS,R_NSMAX) +#endif + DO KMLOC=1,D_NUMP + DO JFLD=KFIRST,2*KFIELDS+KFIRST-1 + KM = D_MYMS(KMLOC) + ILCM = R_NSMAX+1-KM + ZIA(JFLD,1,KMLOC) = 0.0_JPRB + ZIA(JFLD,2,KMLOC) = 0.0_JPRB + ZIA(JFLD,ILCM+3,KMLOC) = 0.0_JPRB + ENDDO + ! end loop over wavenumber + END DO + + END IF + +#ifdef ACCGPU +!!$ACC UPDATE HOST(ZIA) +!$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif + + + ! ------------------------------------------------------------------ + + END SUBROUTINE PRFI1B +END MODULE PRFI1B_MOD diff --git a/src/trans/gpu/internal/prfi1bad_mod.F90 b/src/trans/gpu/internal/prfi1bad_mod.F90 new file mode 100755 index 0000000..d529273 --- /dev/null +++ b/src/trans/gpu/internal/prfi1bad_mod.F90 @@ -0,0 +1,112 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI1BAD_MOD +CONTAINS +SUBROUTINE PRFI1BAD(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1BAD(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1BAD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF, IFLD + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + + +ILCM = R%NSMAX+1-KM +IOFF = D%NASM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 + PSPEC(IFLD,INM ) = PSPEC(IFLD,INM ) + PIA(J+2,IR) + PSPEC(IFLD,INM+1) = PSPEC(IFLD,INM+1) + PIA(J+2,II) + ENDDO + ENDDO +ELSE + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + PSPEC(JFLD,INM ) = PSPEC(JFLD,INM ) + PIA(J+2,IR) + PSPEC(JFLD,INM+1) = PSPEC(JFLD,INM+1) + PIA(J+2,II) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI1BAD +END MODULE PRFI1BAD_MOD diff --git a/src/trans/gpu/internal/prfi2_mod.F90 b/src/trans/gpu/internal/prfi2_mod.F90 new file mode 100755 index 0000000..24c93be --- /dev/null +++ b/src/trans/gpu/internal/prfi2_mod.F90 @@ -0,0 +1,100 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2_MOD + CONTAINS + SUBROUTINE PRFI2(KF_FS,PAIA,PSIA) + + !**** *PRFI2* - Prepare input work arrays for direct transform + + ! Purpose. + ! -------- + ! To extract the Fourier fields for a specific zonal wavenumber + ! and put them in an order suitable for the direct Legendre + ! tranforms, i.e. split into symmetric and anti-symmetric part. + + !** Interface. + ! ---------- + ! *CALL* *PRFI2(..) + + ! Explicit arguments : + ! -------------------- KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + ! PAIA - antisymmetric part of Fourier + ! components for KM (output) + ! PSIA - symmetric part of Fourier + ! components for KM (output) + + ! Implicit arguments : The Grid point arrays of the model. + ! -------------------- + + ! Method. + ! ------- + + ! Externals. PRFI2B - basic copying routine + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 87-11-25 + ! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite + ! for uv formulation + ! Modified : 93-03-19 D. Giard - CDCONF='T' + ! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' + ! Modified : 93-05-13 D. Giard - correction of the previous bug + ! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer + ! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' + ! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div + ! instead of u,v->vor,div + ! MPP Group: 95-10-01 Support for Distributed Memory version + ! Modified : 04/06/99 D.Salmond : change order of AIA and SIA + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + + !USE TPM_TRANS + + USE PRFI2B_MOD ,ONLY : PRFI2B + ! + + IMPLICIT NONE + + + ! DUMMY INTEGER SCALARS + INTEGER(KIND=JPIM) :: KM + INTEGER(KIND=JPIM) :: KMLOC + INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + + + REAL(KIND=JPRBT) , INTENT(OUT) :: PSIA(:,:,:), PAIA(:,:,:) + + + ! LOCAL INTEGER SCALARS + + + ! ------------------------------------------------------------------ + + !* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. + ! ------------------------------------------- + +! CALL PRFI2B(KF_FS,PAIA,PSIA) + + ! ------------------------------------------------------------------ + + END SUBROUTINE PRFI2 +END MODULE PRFI2_MOD diff --git a/src/trans/gpu/internal/prfi2ad_mod.F90 b/src/trans/gpu/internal/prfi2ad_mod.F90 new file mode 100755 index 0000000..cce0139 --- /dev/null +++ b/src/trans/gpu/internal/prfi2ad_mod.F90 @@ -0,0 +1,91 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2AD_MOD +CONTAINS +SUBROUTINE PRFI2AD(KM,KMLOC,KF_FS,PAIA,PSIA) + +!**** *PRFI2AD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *PRFI2AD(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. PRFI2ADB - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE PRFI2BAD_MOD ,ONLY : PRFI2BAD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRBT) , INTENT(IN) :: PSIA(:,:), PAIA(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL PRFI2BAD(KF_FS,KM,KMLOC,PAIA,PSIA) + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI2AD +END MODULE PRFI2AD_MOD diff --git a/src/trans/gpu/internal/prfi2b_mod.F90 b/src/trans/gpu/internal/prfi2b_mod.F90 new file mode 100755 index 0000000..11af2d1 --- /dev/null +++ b/src/trans/gpu/internal/prfi2b_mod.F90 @@ -0,0 +1,136 @@ +! (C) Copyright 1990- ECMWF. +! (C) Copyright 1990- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2B_MOD + CONTAINS + SUBROUTINE PRFI2B(KFIELD,PAIA,KMODE) + + !**** *PRFI2B* - Prepare input work arrays for direct transform + + ! Purpose. + ! -------- + ! To extract the Fourier fields for a specific zonal wavenumber + ! and put them in an order suitable for the direct Legendre + ! tranforms, i.e. split into symmetric and anti-symmetric part. + + !** Interface. + ! ---------- + ! *CALL* *PRFI2B(..) + + ! Explicit arguments : + ! ------------------- KFIELD - number of fields + ! KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + ! PAOA - antisymmetric part of Fourier + ! fields for zonal wavenumber KM + ! PSOA - symmetric part of Fourier + ! fields for zonal wavenumber KM + + ! Implicit arguments : FOUBUF in TPM_TRANS + ! -------------------- + + ! Method. + ! ------- + + ! Externals. None. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 90-07-01 + ! MPP Group: 95-10-01 Support for Distributed Memory version + ! Modified : 04/06/99 D.Salmond : change order of AIA and SIA + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + + USE TPM_DIM ,ONLY : R, R_NDGNH, R_NDGL + USE TPM_TRANS ,ONLY : FOUBUF + USE TPM_GEOMETRY ,ONLY : G, G_NDGLU + USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1,MYPROC + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + INTEGER(KIND=JPIM),INTENT(IN) :: KMODE + INTEGER(KIND=JPIM) :: KM,KMLOC + REAL(KIND=JPRBT) , INTENT(OUT) :: PAIA(:,:,:) +!! REAL(KIND=JPRBT) , INTENT(OUT) :: PSIA(:,:,:), PAIA(:,:,:) + + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IGLS, ISL, JF, JGL, iunit + + INTEGER(KIND=JPIM) :: OFFSET1, OFFSET2 + + + ! ------------------------------------------------------------------ + + !* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. + ! ------------------------------------------------ + + +#ifdef ACCGPU +!$ACC DATA PRESENT(PAIA,FOUBUF, D_NPNTGTB1,D_NSTAGT1B,D_MYMS,R_NDGL,R_NDGNH,G_NDGLU,D_NPROCL) +#endif +#ifdef OMPGPU +!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! +!$OMP TARGET DATA MAP(ALLOC:PAIA,FOUBUF, D_NPNTGTB1,D_NSTAGT1B,D_MYMS,R_NDGL,R_NDGNH,G_NDGLU,D_NPROCL) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,ISL,IGLS,OFFSET1,OFFSET2) & +!$OMP& SHARED(D_NUMP,R_NDGNH,KFIELD,D_MYMS,G_NDGLU,R_NDGL,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1,KMODE,PAIA,FOUBUF) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,ISL,IGLS,OFFSET1,OFFSET2) & +!$ACC& COPYIN(KFIELD,KMODE) & +!$ACC& PRESENT(D_NUMP,R_NDGNH,D_MYMS,G_NDGLU,R_NDGL,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1,PAIA,FOUBUF) +#endif +DO KMLOC=1,D_NUMP + DO JGL=1,R_NDGNH + DO JF=1,KFIELD*2 + KM = D_MYMS(KMLOC) + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + if (JGL .ge. ISL) then + IGLS = R_NDGL+1-JGL + OFFSET1 = (D_NSTAGT1B(D_NPROCL(JGL) )+D_NPNTGTB1(KMLOC,JGL ))*2*KFIELD + OFFSET2 = (D_NSTAGT1B(D_NPROCL(IGLS))+D_NPNTGTB1(KMLOC,IGLS))*2*KFIELD + IF( KMODE == -1 ) THEN + PAIA(JF,JGL,KMLOC) = FOUBUF(OFFSET1+JF)-FOUBUF(OFFSET2+JF) + ELSE + PAIA(JF,JGL,KMLOC) = FOUBUF(OFFSET1+JF)+FOUBUF(OFFSET2+JF) +! PSIA(JF,JGL,KMLOC) = FOUBUF(OFFSET1+JF)+FOUBUF(OFFSET2+JF) + ENDIF + end if + ENDDO + ENDDO +END DO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + + ! ------------------------------------------------------------------ + + END SUBROUTINE PRFI2B +END MODULE PRFI2B_MOD diff --git a/src/trans/gpu/internal/prfi2bad_mod.F90 b/src/trans/gpu/internal/prfi2bad_mod.F90 new file mode 100755 index 0000000..33d6080 --- /dev/null +++ b/src/trans/gpu/internal/prfi2bad_mod.F90 @@ -0,0 +1,99 @@ +! (C) Copyright 1990- ECMWF. +! (C) Copyright 1990- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2BAD_MOD +CONTAINS +SUBROUTINE PRFI2BAD(KFIELD,KM,KMLOC,PAIA,PSIA) + +!**** *PRFI2BAD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *PRFI2BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRBT) , INTENT(IN) :: PSIA(:,:), PAIA(:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IGLS, ISL, ISTAN, ISTAS, JF, JGL + + +! ------------------------------------------------------------------ + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +ISL = MAX(R%NDGNH-G%NDGLU(KM)+1,1) + +DO JGL=ISL,R%NDGNH + IGLS = R%NDGL+1-JGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + ISTAS = (D%NSTAGT1B(D%NPROCL(IGLS))+D%NPNTGTB1(KMLOC,IGLS))*2*KFIELD +!DIR$ IVDEP +!OCL NOVREC + DO JF=1,KFIELD*2 + FOUBUF(ISTAN+JF) = PSIA(JF,JGL)+PAIA(JF,JGL) + FOUBUF(ISTAS+JF) = PSIA(JF,JGL)-PAIA(JF,JGL) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE PRFI2BAD +END MODULE PRFI2BAD_MOD diff --git a/src/trans/gpu/internal/read_legpol_mod.F90 b/src/trans/gpu/internal/read_legpol_mod.F90 new file mode 100755 index 0000000..5d99ff9 --- /dev/null +++ b/src/trans/gpu/internal/read_legpol_mod.F90 @@ -0,0 +1,286 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE READ_LEGPOL_MOD +CONTAINS +SUBROUTINE READ_LEGPOL +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT ,JPRD +USE TPM_GEN +USE TPM_DISTR +USE TPM_DIM +USE TPM_GEOMETRY +USE TPM_FLT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_CTL +USE BYTES_IO_MOD +USE BUTTERFLY_ALG_MOD +USE SHAREDMEM_MOD + +!**** *READ_LEGPOL * - read in Leg.Pol. and assocciated arrays from file or memory segment + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! *CALL* *READ_LEGPOL* + +! Explicit arguments : None +! -------------------- + +! Implicit arguments : +! -------------------- +! + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! + +! ------- +! Mats Hamrud and Willem Deconinck *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 + +IMPLICIT NONE + +INTEGER(KIND=JPIM),PARAMETER :: JPIBUFL=4 +INTEGER(KIND=JPIM) :: IRBYTES,IIBYTES,JMLOC,IPRTRV,IMLOC,IM,ILA,ILS +INTEGER(KIND=JPIM) :: IDGLU,ISIZE,IBYTES,IRET,IFILE,JSETV,IDUM,JGL,II,IDGLU2 +INTEGER(KIND=JPIM),POINTER :: IBUF(:) +REAL(KIND=JPRBT) ,ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM) ,POINTER :: IBUFA(:) +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 +TYPE(CLONE) :: YLCLONE +CHARACTER(LEN=8) :: CLABEL +CHARACTER(LEN=16) :: CLABEL_16 + +! ------------------------------------------------------------------ + +IRBYTES = 8 +IIBYTES = 4 +IDUM = 3141 + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_OPEN(IFILE,C%CLEGPOLFNAME,'R') + ALLOCATE(IBUF(JPIBUFL)) +ELSE + NULLIFY(IBUF) +ENDIF +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) +ENDIF +CLABEL = TRANSFER(IBUF(1:2),CLABEL) +IF( S%LUSEFLT .AND. CLABEL /= 'LEGPOLBF') THEN + WRITE(NERR,*) S%LUSEFLT,CLABEL + CALL ABORT_TRANS('READ_LEGPOL:WRONG LABEL') +ELSEIF(.NOT. S%LUSEFLT .AND. CLABEL /= 'LEGPOL ') THEN + WRITE(NERR,*) S%LUSEFLT,CLABEL + CALL ABORT_TRANS('READ_LEGPOL:WRONG LABEL') +ENDIF +IF(IBUF(3) /= R%NSMAX) CALL ABORT_TRANS('READ_LEGPOL:WRONG SPECTRAL TRUNCATION') +IF(IBUF(4) /= R%NDGNH) CALL ABORT_TRANS('READ_LEGPOL:WRONG NO OF GAUSSIAN LATITUDES') +IF(C%CIO_TYPE == 'file') THEN + ALLOCATE(IBUFA(2*R%NDGNH)) + CALL BYTES_IO_READ(IFILE,IBUFA,2*R%NDGNH*IIBYTES,IRET) +ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,2*R%NDGNH,IBUFA,ADVANCE=.TRUE.) +ENDIF +II = 0 +DO JGL=1,R%NDGNH + II = II+1 + IF(IBUFA(II) /= G%NLOEN(JGL)) THEN + WRITE(NERR,*) 'WRONG NUMBER OF LONGITUDE POINTS ', JGL,G%NLOEN(JGL),IBUFA(II) + CALL ABORT_TRANS('READ_LEGPOL:WRONG NLOEN') + ENDIF + II=II+1 + IF(IBUFA(II) /= G%NMEN(JGL)) THEN + WRITE(NERR,*) 'WRONG CUT-OFF WAVE NUMBER ', JGL,G%NMEN(JGL),IBUFA(II) + CALL ABORT_TRANS('READ_LEGPOL:WRONG NMEN') + ENDIF +ENDDO +IF(C%CIO_TYPE == 'file') THEN + DEALLOCATE(IBUFA) +ENDIF + +DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) +! Anti-symmetric + IF( S%LUSEFLT .AND. ILA > S%ITHRESHOLD) THEN + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + IF(IBUF(1) /= IDGLU .OR. IBUF(2) /= ILA ) THEN + WRITE(NERR,*) 'READ_LEGPOL ERROR ', IBUF,IDGLU,ILA + CALL ABORT_TRANS('READ_LEGPOL:WRONG MATRIX SIZE') + ENDIF + + ISIZE = IBUF(3) + IF(C%CIO_TYPE == 'file') THEN + ALLOCATE(YLCLONE%COMMSBUF(ISIZE)) + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_READ(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,YLCLONE) + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,YDMEMBUF=C%STORAGE) + ENDIF + ELSE + IF(C%CIO_TYPE == 'file') THEN + ISIZE = IDGLU*ILA + ALLOCATE(ZBUF(ISIZE)) + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(IMLOC)%RPNMA(IDGLU,ILA)) + S%FA(IMLOC)%RPNMA(:,:) = RESHAPE(ZBUF,(/IDGLU,ILA/)) + DEALLOCATE(ZBUF) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,IDGLU,ILA,S%FA(IMLOC)%RPNMA,ADVANCE=.TRUE.) + ENDIF + ENDIF +! Symmetric + IF( S%LUSEFLT .AND. ILS > S%ITHRESHOLD) THEN + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + IF(IBUF(1) /= IDGLU .OR. IBUF(2) /= ILS ) THEN + WRITE(NERR,*) 'READ_LEGPOL ERROR ', IBUF,IDGLU,ILA + CALL ABORT_TRANS('READ_LEGPOL:WRONG MATRIX ZIZE') + ENDIF + ISIZE = IBUF(3) + IF(C%CIO_TYPE == 'file') THEN + ALLOCATE(YLCLONE%COMMSBUF(ISIZE)) + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_READ(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,YLCLONE) + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,YDMEMBUF=C%STORAGE) + ENDIF + ELSE + IF(C%CIO_TYPE == 'file') THEN + ISIZE = IDGLU*ILS + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(IMLOC)%RPNMS(IDGLU,ILS)) + S%FA(IMLOC)%RPNMS(:,:) = RESHAPE(ZBUF,(/IDGLU,ILS/)) + DEALLOCATE(ZBUF) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,IDGLU,ILS,S%FA(IMLOC)%RPNMS,ADVANCE=.TRUE.) + ENDIF + ENDIF + ENDDO +ENDDO + +! Lat-lon grid +IF(S%LDLL) THEN + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + CLABEL_16 = TRANSFER(IBUF,CLABEL_16) + IF(CLABEL_16 /= 'LATLON---BEG-BEG')CALL ABORT_TRANS('READ_LEGPOL:WRONG LAT/LON LABEL') + + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IDGLU2 = S%NDGNHD + + IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) + ENDIF + IF(IBUF(1) /= IM .OR. IBUF(2) /= IDGLU .OR. IBUF(3) /= IDGLU2 ) THEN + WRITE(NERR,*) 'READ_LEGPOL ERROR ', IBUF,IM,IDGLU,IDGLU2 + CALL ABORT_TRANS('READ_LEGPOL:WRONG LAT-LON MATRIX SIZE') + ENDIF + + IF(C%CIO_TYPE == 'file') THEN + + ISIZE = 2*IDGLU*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(JMLOC)%RPNMWI(2*IDGLU,2)) + S%FA(JMLOC)%RPNMWI(:,:) = RESHAPE(ZBUF,(/2*IDGLU,2/)) + DEALLOCATE(ZBUF) + + ISIZE = 2*IDGLU2*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + CALL BYTES_IO_READ(IFILE,ZBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(NERR,*) 'BYTES_IO_READ ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('READ_LEGPOL:BYTES_IO_READ FAILED') + ENDIF + ALLOCATE(S%FA(JMLOC)%RPNMWO(2*IDGLU2,2)) + S%FA(JMLOC)%RPNMWO(:,:) = RESHAPE(ZBUF,(/2*IDGLU2,2/)) + DEALLOCATE(ZBUF) + + ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,2*IDGLU,2,S%FA(JMLOC)%RPNMWI,ADVANCE=.TRUE.) + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,2*IDGLU2,2,S%FA(JMLOC)%RPNMWO,ADVANCE=.TRUE.) + ENDIF + ENDDO +ENDIF + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_READ(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +ELSE + CALL SHAREDMEM_ASSOCIATE(C%STORAGE,JPIBUFL,IBUF,ADVANCE=.TRUE.) +ENDIF +CLABEL_16 = TRANSFER(IBUF,CLABEL_16) +IF(CLABEL_16 /= 'LEGPOL---EOF-EOF')CALL ABORT_TRANS('READ_LEGPOL:WRONG END LABEL') +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_CLOSE(IFILE) + DEALLOCATE(IBUF) +ENDIF + +END SUBROUTINE READ_LEGPOL +END MODULE READ_LEGPOL_MOD diff --git a/src/trans/gpu/internal/set2pe_mod.F90 b/src/trans/gpu/internal/set2pe_mod.F90 new file mode 100755 index 0000000..c7f69d3 --- /dev/null +++ b/src/trans/gpu/internal/set2pe_mod.F90 @@ -0,0 +1,131 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 1998- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SET2PE_MOD +CONTAINS +SUBROUTINE SET2PE(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) + + +!**** *SET2PE* - Convert from set numbers to PE number + +! Purpose. +! -------- +! Convert from set numbers in either grid-point space or spectral space +! to PE number + +!** Interface. +! ---------- +! *CALL* *SET2PE(KPRGPNS,KPRGPEW,KPRTRW,KPRTRV,KPE) + +! Explicit arguments : +! -------------------- + +! input : KPRGPNS - integer A set number in grid space +! in the range 1 .. NPRGPNS +! KPRGPEW - integer B set number in grid space +! in the range 1 .. NPRGPEW +! KPRTRW - integer A set number in spectral space +! in the range 1 .. NPRTRW +! KPRTRV - integer B set number in spectral space +! in the range 1 .. NPRTRV +! output: KPE - integer processor number +! in the range 1 .. NPROC + +! Normally, one pair of input set numbers will be set to zero +! SET2PE will compute KPE from the first pair if they are valid numbers. +! else from the other pair, + +! Implicit arguments : YOMMP parameters +! NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NPROC + +! -------------------- +! Method. +! ------- + +! Externals. +! ---------- +! NONE + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! David Dent *ECMWF* + +! Modifications. +! -------------- +! Original : 98-08-19 +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM + +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRV, NPRTRW +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE +INTEGER(KIND=JPIM),INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KPRTRV +INTEGER(KIND=JPIM),INTENT(OUT) :: KPE + +INTEGER(KIND=JPIM) :: IPE,JA +! ------------------------------------------------------------------ + +!* 1. Choose from input parameters +! ---------------------------- + +IF(KPRGPNS > 0.AND.KPRGPEW > 0) THEN + + IF( LEQ_REGIONS )THEN + IF( KPRGPNS > N_REGIONS_NS )THEN + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPNS,N_REGIONS_NS + CALL ABOR1(' SET2PE INVALID ARGUMENT ') + ENDIF + IF( KPRGPEW > N_REGIONS(KPRGPNS) )THEN + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPEW,N_REGIONS(KPRGPNS) + CALL ABOR1(' SET2PE INVALID ARGUMENT ') + ENDIF + KPE=0 + DO JA=1,KPRGPNS-1 + KPE=KPE+N_REGIONS(JA) + ENDDO + KPE=KPE+KPRGPEW + ELSE + IF(KPRGPNS <= NPRGPNS.AND.KPRGPEW <= NPRGPEW) THEN + +!* 2. Grid-space set values supplied +! ------------------------------ + + KPE=(KPRGPNS-1)*NPRGPEW + KPRGPEW + ELSE + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPNS,KPRGPEW + CALL ABORT_TRANS(' SET2PE INVALID ARGUMENT ') + ENDIF + ENDIF + +ELSE + +!* 3. Spectral space set values supplied +! ---------------------------------- + + IF(KPRTRW <= NPRTRW.AND.KPRTRV <= NPRTRV) THEN + KPE=(KPRTRW-1)*NPRTRV + KPRTRV + ELSE + WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRTRW,KPRTRV + CALL ABORT_TRANS(' SET2PE INVALID ARGUMENT ') + ENDIF + +ENDIF + +END SUBROUTINE SET2PE +END MODULE SET2PE_MOD diff --git a/src/trans/gpu/internal/set_resol_mod.F90 b/src/trans/gpu/internal/set_resol_mod.F90 new file mode 100755 index 0000000..6598ac3 --- /dev/null +++ b/src/trans/gpu/internal/set_resol_mod.F90 @@ -0,0 +1,72 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SET_RESOL_MOD +CONTAINS +SUBROUTINE SET_RESOL(KRESOL,LDSETUP) +USE PARKIND1 ,ONLY : JPIM + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL,LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FFT ,ONLY : T, FFT_RESOL +USE TPM_FFTH ,ONLY : TC, FFTH_RESOL +USE TPM_FLT +USE TPM_CTL ,ONLY : C, CTL_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSETUP + +! Local varaibles +INTEGER(KIND=JPIM) :: IRESOL +LOGICAL :: LLSETUP + +! ------------------------------------------------------------------ + +IF(MSETUP0 == 0) CALL ABORT_TRANS('SET_RESOL:TRANS NOT SETUP') +LLSETUP = .FALSE. +IF(PRESENT(LDSETUP)) LLSETUP = LDSETUP +IRESOL = 1 +IF(PRESENT(KRESOL)) THEN + IRESOL = KRESOL + IF(IRESOL < 1 .OR. IRESOL > NMAX_RESOL) THEN + WRITE(NOUT,*)'SET_RESOL: UNKNOWN RESOLUTION ',IRESOL,NMAX_RESOL + CALL ABORT_TRANS('SET_RESOL:IRESOL < 1 .OR. KRESOL > NMAX_RESOL') + ENDIF + IF(.NOT.LLSETUP) THEN + IF(.NOT.LENABLED(IRESOL)) THEN + WRITE(NOUT,*)'SET_RESOL: UNKNOWN RESOLUTION ',IRESOL,LENABLED + CALL ABORT_TRANS('SET_RESOL:IRESOL NOT ENABLED') + ENDIF + ENDIF +ENDIF +IF(IRESOL /= NCUR_RESOL) THEN + NCUR_RESOL = IRESOL + R => DIM_RESOL(NCUR_RESOL) + F => FIELDS_RESOL(NCUR_RESOL) + G => GEOM_RESOL(NCUR_RESOL) + D => DISTR_RESOL(NCUR_RESOL) + T => FFT_RESOL(NCUR_RESOL) + TC => FFTH_RESOL(NCUR_RESOL) + S => FLT_RESOL(NCUR_RESOL) + C => CTL_RESOL(NCUR_RESOL) +ENDIF + +END SUBROUTINE SET_RESOL +END MODULE SET_RESOL_MOD diff --git a/src/trans/gpu/internal/setup_dims_mod.F90 b/src/trans/gpu/internal/setup_dims_mod.F90 new file mode 100755 index 0000000..c0277d3 --- /dev/null +++ b/src/trans/gpu/internal/setup_dims_mod.F90 @@ -0,0 +1,50 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SETUP_DIMS_MOD +CONTAINS +SUBROUTINE SETUP_DIMS + +USE PARKIND1 ,ONLY : JPIM + +USE TPM_DIM ,ONLY : R +USE TPM_FLT ,ONLY : S +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JN,ISPOLEG + +! ------------------------------------------------------------------ + +ISPOLEG = 0 +DO JM=0,R%NSMAX + DO JN=JM,R%NTMAX+1 + ISPOLEG = ISPOLEG+1 + ENDDO +ENDDO +R%NSPOLEG = ISPOLEG + +R%NSPEC_G = (R%NSMAX+1)*(R%NSMAX+2)/2 +R%NSPEC2_G = R%NSPEC_G*2 + +R%NDGNH = (R%NDGL+1)/2 + +R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) +R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) +IF (S%LSOUTHPNM) R%NLEI3=2*R%NLEI3 + +R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) +R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) + +! ------------------------------------------------------------------ + +END SUBROUTINE SETUP_DIMS +END MODULE SETUP_DIMS_MOD diff --git a/src/trans/gpu/internal/setup_geom_mod.F90 b/src/trans/gpu/internal/setup_geom_mod.F90 new file mode 100755 index 0000000..9c68830 --- /dev/null +++ b/src/trans/gpu/internal/setup_geom_mod.F90 @@ -0,0 +1,110 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SETUP_GEOM_MOD +CONTAINS +SUBROUTINE SETUP_GEOM + +USE PARKIND1 ,ONLY : JPRD, JPIM + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_FIELDS ,ONLY : F +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +REAL(KIND=JPRD) :: ZSQM2(R%NDGL) +INTEGER(KIND=JPIM) :: IDGLU(0:R%NSMAX,R%NDGNH) +INTEGER(KIND=JPIM) :: JGL,JM,NSMAXLIN + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_GEOM ===' + + ALLOCATE (G%NMEN(R%NDGL)) + IF(LLP2)WRITE(NOUT,9) 'G%NMEN ',SIZE(G%NMEN ),SHAPE(G%NMEN ) + + NSMAXLIN = R%NDGL-1 + IF (R%NSMAX>=NSMAXLIN .OR. .NOT. G%LREDUCED_GRID) THEN + ! linear or full grid + DO JGL=1,R%NDGL + G%NMEN(JGL) = MIN(R%NSMAX,(G%NLOEN(JGL)-1)/2) + ENDDO + ELSEIF (R%NSMAX>=R%NDGL*2/3-1) THEN + ! quadratic grid + ZSQM2(:) = 3*(NSMAXLIN-R%NSMAX)/R%NDGL*F%R1MU2(:) + G%NMEN(1) = MIN(R%NSMAX,INT(REAL(G%NLOEN(1)-1,JPRD)/(2.0_JPRD+ZSQM2(1)))) + DO JGL=2,R%NDGNH + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL-1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL))))) + ENDDO + ! * SOUTHERN HEMISPHERE : + G%NMEN(R%NDGL) = MIN(R%NSMAX,INT(REAL(G%NLOEN(R%NDGL)-1,JPRD)/(2.0_JPRD+ZSQM2(R%NDGL)))) + DO JGL=R%NDGL-1, R%NDGNH+1, -1 + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL+1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL))))) + ENDDO + ELSE + ! cubic grid + ZSQM2(:) = F%R1MU2(:) + G%NMEN(1) = MIN(R%NSMAX,INT(REAL(G%NLOEN(1)-1,JPRD)/(2.0_JPRD+ZSQM2(1)))-1) + DO JGL=2,R%NDGNH + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL-1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL)))-1)) + ENDDO + ! * SOUTHERN HEMISPHERE : + G%NMEN(R%NDGL) = MIN(R%NSMAX,INT(REAL(G%NLOEN(R%NDGL)-1,JPRD)/(2.0_JPRD+ZSQM2(R%NDGL)))-1) + DO JGL=R%NDGL-1, R%NDGNH+1, -1 + G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL+1),& + &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL)))-1)) + ENDDO + ENDIF + IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JGL,G%NLOEN,G%NMEN) '')') + WRITE(NOUT,FMT='(8(1X,''('',I4,I4,I4,'')''))')& + &(JGL,G%NLOEN(JGL),G%NMEN(JGL),JGL=1,R%NDGL) + ENDIF + ALLOCATE(G%NDGLU(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'G%NDGLU ',SIZE(G%NDGLU ),SHAPE(G%NDGLU ) + IDGLU(:,:) = 0 + G%NDGLU(:) = 0 + DO JGL=1,R%NDGNH + DO JM=0,G%NMEN(JGL) + IDGLU(JM,JGL) = 1 + ENDDO + ENDDO + DO JM=0,R%NSMAX + DO JGL=1,R%NDGNH + G%NDGLU(JM) = G%NDGLU(JM)+IDGLU(JM,JGL) + ENDDO + ENDDO + IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JM,G%NDGLU) '')') + WRITE(NOUT,FMT='(10(1X,''('',I4,I4,'')''))')& + &(JM,G%NDGLU(JM),JM=0,R%NSMAX) + ENDIF + +ENDIF + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SETUP_GEOM +END MODULE SETUP_GEOM_MOD diff --git a/src/trans/gpu/internal/shuffle_mod.F90 b/src/trans/gpu/internal/shuffle_mod.F90 new file mode 100755 index 0000000..5cfd173 --- /dev/null +++ b/src/trans/gpu/internal/shuffle_mod.F90 @@ -0,0 +1,137 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SHUFFLE_MOD +CONTAINS +SUBROUTINE SHUFFLE(KF_UV_G,KF_SCALARS_G,KSHFUV_G,KIVSETUV,KSHFSC_G,KIVSETSC,& + & KVSETUV,KVSETSC) + +!**** *SHUFFLE* - Re-shuffle fields for load balancing + +! Purpose. +! -------- +! Re-shuffle fields for load balancing if NPRTRV>1. Note that the +! relative order of the local spectral fields has to maintained. + +!** Interface. +! ---------- +! CALL SHUFFLE(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KSHFUV_G - reshuffling index for uv fields +! KIVSETUV - reshuffled KVSETUV +! KSHFSC_G - reshuffling index for scalar fields +! KIVSETSC - reshuffled KVSETSC +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. + +! Externals. NONE +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM + +!USE TPM_GEN +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : NPRTRV +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G,KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(OUT) :: KSHFUV_G(:),KSHFSC_G(:) +INTEGER(KIND=JPIM), INTENT(OUT) :: KIVSETUV(:),KIVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) + +INTEGER(KIND=JPIM) :: IHELP(MAX(KF_UV_G,KF_SCALARS_G),NPRTRV),IHELPC(NPRTRV) +INTEGER(KIND=JPIM) :: IDW,J + +! ------------------------------------------------------------------ + +IF(NPRTRV > 1) THEN + IHELP(:,:) = 0 + IHELPC(:) = 0 + DO J=1,KF_UV_G + IHELPC(KVSETUV(J)) = IHELPC(KVSETUV(J))+1 + IHELP(IHELPC(KVSETUV(J)),KVSETUV(J)) = J + ENDDO + IDW = KF_UV_G+1 + DO + DO J=NPRTRV,1,-1 + IF(IHELPC(J) > 0) THEN + IDW = IDW-1 + KSHFUV_G(IDW) = IHELP(IHELPC(J),J) + IHELPC(J) =IHELPC(J)-1 + ENDIF + ENDDO + IF(IDW == 1) EXIT + ENDDO + + IHELP(:,:) = 0 + IHELPC(:) = 0 + DO J=1,KF_SCALARS_G + IHELPC(KVSETSC(J)) = IHELPC(KVSETSC(J))+1 + IHELP(IHELPC(KVSETSC(J)),KVSETSC(J)) = J + ENDDO + IDW = KF_SCALARS_G+1 + DO + DO J=NPRTRV,1,-1 + IF(IHELPC(J) > 0) THEN + IDW = IDW-1 + KSHFSC_G(IDW) = IHELP(IHELPC(J),J) + IHELPC(J) =IHELPC(J)-1 + ENDIF + ENDDO + IF(IDW == 1) EXIT + ENDDO + + DO J=1,KF_UV_G + KIVSETUV(J) = KVSETUV(KSHFUV_G(J)) + ENDDO + DO J=1,KF_SCALARS_G + KIVSETSC(J) = KVSETSC(KSHFSC_G(J)) + ENDDO +ELSE + DO J=1,KF_UV_G + KSHFUV_G(J) = J + KIVSETUV(J) = 1 + ENDDO + DO J=1,KF_SCALARS_G + KSHFSC_G(J) = J + KIVSETSC(J) = 1 + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SHUFFLE +END MODULE SHUFFLE_MOD diff --git a/src/trans/gpu/internal/spnorm_ctl_mod.F90 b/src/trans/gpu/internal/spnorm_ctl_mod.F90 new file mode 100755 index 0000000..9aa9f98 --- /dev/null +++ b/src/trans/gpu/internal/spnorm_ctl_mod.F90 @@ -0,0 +1,62 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNORM_CTL_MOD +CONTAINS +SUBROUTINE SPNORM_CTL(PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET,PNORM) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MYPROC, MYSETV + +USE SPNORMD_MOD ,ONLY : SPNORMD +USE SPNORMC_MOD ,ONLY : SPNORMC +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFLD,KFLD_G +INTEGER(KIND=JPIM) :: IVSET(KFLD_G) +REAL(KIND=JPRBT) :: ZMET(0:R%NSMAX) +REAL(KIND=JPRBT) :: ZSM(KFLD,D%NUMP) +REAL(KIND=JPRBT) :: ZGM(KFLD_G,0:R%NSMAX) + +! ------------------------------------------------------------------ + +IF(PRESENT(KVSET)) THEN + IVSET(:) = KVSET(:) +ELSE + IVSET(:) = MYSETV +ENDIF + +IF(PRESENT(PMET)) THEN + ZMET(:) = PMET(:) +ELSE + ZMET(:) = 1.0_JPRBT +ENDIF + +CALL SPNORMD(PSPEC,KFLD,ZMET,ZSM) + +CALL SPNORMC(ZSM,KFLD_G,IVSET,KMASTER,ZGM) + +IF(MYPROC == KMASTER) THEN + PNORM(1:KFLD_G) = REAL(SQRT(SUM(ZGM(1:KFLD_G,:),DIM=2)) , KIND=JPRB) +! PNORM(1:KFLD_G) = SQRT(PNORM(1:KFLD_G)) +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE SPNORM_CTL +END MODULE SPNORM_CTL_MOD diff --git a/src/trans/gpu/internal/spnormc_mod.F90 b/src/trans/gpu/internal/spnormc_mod.F90 new file mode 100755 index 0000000..edc469c --- /dev/null +++ b/src/trans/gpu/internal/spnormc_mod.F90 @@ -0,0 +1,90 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNORMC_MOD +CONTAINS +SUBROUTINE SPNORMC(PSM,KFLD_G,KVSET,KMASTER,PGM) + + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, MYPROC, NPROC + +USE PE2SET_MOD ,ONLY : PE2SET +! + +IMPLICIT NONE + +REAL(KIND=JPRBT) ,INTENT(IN) :: PSM(:,:) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KMASTER +REAL(KIND=JPRBT) ,INTENT(OUT) :: PGM(:,0:) + +REAL(KIND=JPRBT) :: ZRECVBUF(KFLD_G*(R%NSMAX+1)) +INTEGER(KIND=JPIM) :: IFLDR(NPRTRV) + +INTEGER(KIND=JPIM) :: ISTOTAL,JFLD,ITAG,JROC,IMSGLEN,IRECVID +INTEGER(KIND=JPIM) :: IRECVNUMP,IRECVFLD,IFLD,JMLOC,IM,IBUFLENR,IA,IB +INTEGER(KIND=JPIM) :: IRECVSETA,IRECVSETB +! ------------------------------------------------------------------ + +ISTOTAL = SIZE(PSM) +IBUFLENR = SIZE(ZRECVBUF) + +IFLDR(:) = 0 +DO JFLD=1,KFLD_G + IFLDR(KVSET(JFLD)) = IFLDR(KVSET(JFLD))+1 +ENDDO +ITAG = 100 + +IF (NPROC > 1.AND.MYPROC /= KMASTER) THEN + CALL MPL_SEND(PSM(:,:),KDEST=NPRCIDS(KMASTER),KTAG=ITAG,& + &CDSTRING='SPNORMC:') +ENDIF + +IF (MYPROC == KMASTER) THEN + DO JROC=1,NPROC + IF (JROC == KMASTER) THEN + ZRECVBUF(1:ISTOTAL) = RESHAPE(PSM,SHAPE(ZRECVBUF(1:ISTOTAL))) + IRECVID = MYPROC + IMSGLEN = ISTOTAL + ELSE + CALL MPL_RECV(ZRECVBUF(1:IBUFLENR),KTAG=ITAG,& + &KFROM=IRECVID,CDSTRING='SPNORMC :') + ENDIF + CALL PE2SET(IRECVID,IA,IB,IRECVSETA,IRECVSETB) + IRECVNUMP = D%NUMPP(IRECVSETA) + IRECVFLD = IFLDR(IRECVSETB) + IFLD = 0 + DO JFLD=1,KFLD_G + IF(KVSET(JFLD) == IRECVSETB) THEN + IFLD=IFLD+1 + DO JMLOC=1,IRECVNUMP + IM = D%NALLMS(D%NPTRMS(IRECVSETA)-1+JMLOC) + PGM(JFLD,IM) = ZRECVBUF((JMLOC-1)*IRECVFLD+IFLD) + ENDDO + ENDIF + ENDDO + ENDDO +ENDIF + +! Perform barrier synchronisation to guarantee all processors have +! completed communication + +IF( NPROC > 1 )THEN + CALL MPL_BARRIER(CDSTRING='SPNORMC') +ENDIF +! ------------------------------------------------------------------ + +END SUBROUTINE SPNORMC +END MODULE SPNORMC_MOD diff --git a/src/trans/gpu/internal/spnormd_mod.F90 b/src/trans/gpu/internal/spnormd_mod.F90 new file mode 100755 index 0000000..77aafd3 --- /dev/null +++ b/src/trans/gpu/internal/spnormd_mod.F90 @@ -0,0 +1,66 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNORMD_MOD +CONTAINS +SUBROUTINE SPNORMD(PSPEC,KFLD,PMET,PSM) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRBT) ,INTENT(IN) :: PMET(0:R%NSMAX) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD +REAL(KIND=JPRBT) ,INTENT(OUT) :: PSM(:,:) + +INTEGER(KIND=JPIM) :: JM ,JFLD ,JN ,IM ,ISP + +! ------------------------------------------------------------------ + + +CALL GSTATS(1651,0) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JM,IM,JN,ISP,JFLD) +DO JM=1,D%NUMP + PSM(:,JM) = 0.0_JPRBT + IM = D%MYMS(JM) + IF(IM == 0)THEN + DO JN=0,R%NSMAX + ISP = D%NASM0(0)+JN*2 + DO JFLD=1,KFLD + PSM(JFLD,JM) = PSM(JFLD,JM)+PMET(JN)*PSPEC(JFLD,ISP)**2 + ENDDO + ENDDO + ELSE + DO JN=IM,R%NSMAX + ISP = D%NASM0(IM)+(JN-IM)*2 + DO JFLD=1,KFLD + PSM(JFLD,JM) = PSM(JFLD,JM)+2.0_JPRBT*PMET(JN)*& + &(PSPEC(JFLD,ISP)**2+PSPEC(JFLD,ISP+1)**2) + ENDDO + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1651,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNORMD +END MODULE SPNORMD_MOD + + + + + diff --git a/src/trans/gpu/internal/spnsde_mod.F90 b/src/trans/gpu/internal/spnsde_mod.F90 new file mode 100755 index 0000000..8e6fcd9 --- /dev/null +++ b/src/trans/gpu/internal/spnsde_mod.F90 @@ -0,0 +1,211 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNSDE_MOD +CONTAINS +SUBROUTINE SPNSDE(KIN,KOUT,KF_SCALARS,PEPSNM) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +USE TPM_DISTR ,ONLY : D +USE TPM_FIELDS ,ONLY : ZIA +!USE TPM_TRANS + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: KM, KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KIN +INTEGER(KIND=JPIM), INTENT(IN) :: KOUT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +!REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(1:D%NUMP,0:R%NTMAX+2) +!REAL(KIND=JPRB), INTENT(IN) :: PF(:,:,:) +!REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IJ, ISKIP, J, JN,JI,ISMAX, IR, II +REAL(KIND=JPRBT) :: ZZEPSNM(-1:R%NSMAX+4) +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+4) + +#ifdef ACCGPU +!$ACC DATA & +!$ACC CREATE (ZN,ZZEPSNM) & +!$ACC COPYIN (F,F%RN, KIN, KOUT) & +!$ACC PRESENT (PEPSNM, ZIA) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP(PRESENT,ALLOC:ZN,ZZEPSNM) & +!$OMP& MAP(PRESENT,ALLOC:F,F%RN) & +!$OMP& MAP(PRESENT,ALLOC:PEPSNM, ZIA) +#endif + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + + +!* 1.1 COMPUTE + +ISMAX = R%NSMAX +!loop over wavenumber +DO KMLOC=1,D%NUMP + KM = D%MYMS(KMLOC) +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + !! DEFAULT(NONE) PRIVATE(IJ) & + !!$OMP& SHARED(KM,ISMAX,F,ZN,ZZEPSNM,PEPSNM,KMLOC) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IJ) & + !$ACC& COPYIN(KM,ISMAX,KMLOC) & + !$ACC& PRESENT(F,F%RN,ZN,ZZEPSNM,PEPSNM) +#endif + DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + ! kernels does not work, move here, Nils + ZN(0) = F%RN(ISMAX+3) + IF( JN >= 0 ) THEN + ZZEPSNM(IJ) = PEPSNM(KMLOC,JN) + ELSE + ZZEPSNM(IJ) = 0 + ENDIF + ENDDO +!#ifdef OMPGPU +!!$OMP TARGET +!#endif +!#ifdef ACCGPU + !!$ACC KERNELS DEFAULT(NONE) PRESENT(ISMAX) +!#endif +!#ifdef ACCGPU + !!$ACC END KERNELS +!#endif +!#ifdef OMPGPU +!!$OMP END TARGET +!#endif + + IF(KM == 0) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + !! DEFAULT(NONE) PRIVATE(IR) & + !!$OMP& SHARED(KF_SCALARS,ISMAX,KMLOC,ZN,ZZEPSNM,KIN,KOUT,ZIA) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IR) & + !$ACC& COPYIN(KF_SCALARS,ISMAX,KMLOC,KIN,KOUT) & + !$ACC& PRESENT(ZN,ZZEPSNM,ZIA) +#endif + DO J=1,KF_SCALARS + IR = 2*J-1 + DO JI=2,ISMAX+3 + ZIA(KOUT+IR-1,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*ZIA(KIN+IR-1,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZIA(KIN+IR-1,JI-1,KMLOC) + ENDDO + ENDDO + ELSE + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + !! DEFAULT(NONE) PRIVATE(IR,II) & + !!$OMP& SHARED(KF_SCALARS,ISMAX,KM,KMLOC,ZN,ZZEPSNM,KIN,KOUT,ZIA) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) DEFAULT(NONE) PRIVATE(IR,II) & + !$ACC& COPYIN(KF_SCALARS,ISMAX,KM,KMLOC,KIN,KOUT) & + !$ACC& PRESENT(ZN,ZZEPSNM,ZIA) +#endif + DO J=1,KF_SCALARS + DO JI=2,ISMAX+3-KM + IR = 2*J-1 + II = IR+1 + ZIA(KOUT+IR-1,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*ZIA(KIN+IR-1,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZIA(KIN+IR-1,JI-1,KMLOC) + ZIA(KOUT+II-1,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*ZIA(KIN+II-1,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZIA(KIN+II-1,JI-1,KMLOC) + !PNSD(IR,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*PF(IR,JI+1,KMLOC)+& + ! &ZN(JI-2)*ZZEPSNM(JI-1)*PF(IR,JI-1,KMLOC) + !PNSD(II,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*PF(II,JI+1,KMLOC)+& + ! &ZN(JI-2)*ZZEPSNM(JI-1)*PF(II,JI-1,KMLOC) + ENDDO + ENDDO + ENDIF + +!end loop over wavenumber +END DO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNSDE +END MODULE SPNSDE_MOD diff --git a/src/trans/gpu/internal/spnsdead_mod.F90 b/src/trans/gpu/internal/spnsdead_mod.F90 new file mode 100755 index 0000000..e2fa28a --- /dev/null +++ b/src/trans/gpu/internal/spnsdead_mod.F90 @@ -0,0 +1,119 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNSDEAD_MOD +CONTAINS +SUBROUTINE SPNSDEAD(KM,KF_SCALARS,PEPSNM,PF,PNSD) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +!USE TPM_GEN +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +!USE TPM_TRANS + +!**** *SPNSDEAD* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDEAD(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDEAD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(INOUT) :: PF(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PNSD(:,:) + +INTEGER(KIND=JPIM) :: IJ, ISKIP, J, JN,JI,ISMAX +REAL(KIND=JPRBT) :: ZEPSNM(-1:R%NSMAX+4) +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+4) + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +ISMAX = R%NSMAX +DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) +ENDDO + +ZN(0) = F%RN(ISMAX+3) +IF(KM == 0) THEN + ISKIP = 2 +ELSE + ISKIP = 1 +ENDIF + +!cdir loopchg +!cdir select(vector) +DO J=1,2*KF_SCALARS,ISKIP + DO JI=2,ISMAX+3-KM + PF(JI+1,J) = PF(JI+1,J)-ZN(JI+1)*ZEPSNM(JI) *PNSD(JI,J) + PF(JI-1,J) = PF(JI-1,J)+ZN(JI-2)*ZEPSNM(JI-1)*PNSD(JI,J) + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNSDEAD +END MODULE SPNSDEAD_MOD diff --git a/src/trans/gpu/internal/sufft_mod.F90 b/src/trans/gpu/internal/sufft_mod.F90 new file mode 100755 index 0000000..0267b46 --- /dev/null +++ b/src/trans/gpu/internal/sufft_mod.F90 @@ -0,0 +1,47 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUFFT_MOD + CONTAINS + SUBROUTINE SUFFT + + USE PARKIND1 ,ONLY : JPIM + + USE TPM_DIM ,ONLY : R + USE TPM_GEN ,ONLY : NOUT, NPRINTLEV + USE TPM_DISTR ,ONLY : D, MYSETW + USE TPM_GEOMETRY ,ONLY : G + USE TPM_FFT ,ONLY : T + USE TPM_FFTH ,ONLY : TC, INIT_PLANS_FFT + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM) :: JGL,IGLG + LOGICAL :: LLP1,LLP2 + + ! ------------------------------------------------------------------ + + IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUFFT ===' + + CALL INIT_PLANS_FFT(R%NDLON) + + ENDIF + + ! ------------------------------------------------------------------ + + 9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + + END SUBROUTINE SUFFT +END MODULE SUFFT_MOD diff --git a/src/trans/gpu/internal/sugaw_mod.F90 b/src/trans/gpu/internal/sugaw_mod.F90 new file mode 100755 index 0000000..ef9b892 --- /dev/null +++ b/src/trans/gpu/internal/sugaw_mod.F90 @@ -0,0 +1,431 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUGAW_MOD +CONTAINS +SUBROUTINE SUGAW(KDGL,KM,KN,PL,PW,PANM,PFN) + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE PARKIND2 ,ONLY : JPRH + +USE TPM_CONSTANTS ,ONLY : RA + +USE TPM_GEN ,ONLY : NOUT +USE GAWL_MOD ,ONLY : GAWL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SUPOLF_MOD +USE TPM_POL + +!**** *SUGAW * - Routine to initialize the Gaussian +! abcissa and the associated weights + +! Purpose. +! -------- +! Initialize arrays PL, and PW (quadrature abscissas and weights) +!** Interface. +! ---------- +! *CALL* *SUGAW(KN,PFN,PL,PW) * + +! Explicit arguments : +! -------------------- +! INPUT: +! KDGL : Number of Gauss abscissas +! KM : Polynomial order m +! KN : Polynomial degree n +! PFN : Fourier coefficients of series expansion for +! the ordinary Legendre polynomials +! OUTPUT: +! PL (KN) : abscissas of Gauss +! PW (KN) : Weights of the Gaussian integration + +! PL (i) is the abscissa i starting from the northern pole, it is +! the cosine of the colatitude of the corresponding row of the collocation +! grid. + +! Implicit arguments : +! -------------------- +! None + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- + +! S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon Press (1962) +! P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre quadrature, +! SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! Michel Rochas : 90-08-30 +! Philippe Courtier : 92-12-19 Multitasking +! Ryad El Khatib : 94-04-20 Remove unused comdecks pardim and yomdim +! Mats Hamrud : 94-08-12 Printing level +! K. Yessad (Sep 2008): cleaning, improve comments. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KN + +REAL(KIND=JPRD) ,INTENT(IN) :: PANM + +REAL(KIND=JPRD),INTENT(OUT) :: PW(KDGL) +REAL(KIND=JPRD),INTENT(OUT) :: PL(KDGL) + +REAL(KIND=JPRD) ,OPTIONAL, INTENT(IN) :: PFN(0:KDGL,0:KDGL) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRD) :: ZLI(KDGL),ZT(KDGL),ZFN(0:KDGL/2),ZL(KDGL) +REAL(KIND=JPRD) :: ZREG(KDGL),ZMOD(KDGL),ZM(KDGL),ZRR(KDGL) +INTEGER(KIND=JPIM) :: ITER(KDGL) + +INTEGER(KIND=JPIM) :: IALLOW, INS2, ISYM, JGL, IK, IODD, I, IMAX + +REAL(KIND=JPRD) :: Z, ZEPS, Z0, ZPI + +! computations in extended precision for alternative root finding +! which also works for associated polynomials (m>0) +REAL(KIND=JPRH) :: ZLK, ZLK1, ZLLDN, ZANM +REAL(KIND=JPRH) :: ZTHETA, ZTHETA0, ZX, ZX0, ZDX0, ZH, ZPIH, ZS0 +REAL(KIND=JPRH) :: ZK1, ZK2, ZK3, ZK4 +REAL(KIND=JPRH) :: ZF1, ZF2, ZF3 +REAL(KIND=JPRH) :: FP, FQ, FP1, FQ1 +REAL(KIND=JPRH) :: X, ZXOLD, ZBIG, ZEPSH + +INTEGER(KIND=JPIM) :: ISTEPMAX + +LOGICAL :: LLP2, LLREF, LLOLD + +REAL(KIND=JPRD) :: ZDDPOL(0:KN) + +INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(ZLK) + +FP(X) = 1._JPRH-X**2 +FQ(X) = REAL(KN*(KN+1),JPRH)-REAL(KM**2,JPRH)/(1._JPRH-X**2) +FP1(X) = -2._JPRH*X +FQ1(X) = -2._JPRH*X*REAL(KM**2,JPRH)/SQRT(1._JPRH-X**2) + +! ------------------------------------------------------------------ +! ------------------------------------------------------------------ +!* 1. Initialization + root + weight computation +! ------------------------------------------ + +LLP2 = .FALSE. +INS2 = KDGL/2 + +LLOLD=( KM == 0 .AND. KN == KDGL ).AND.PRESENT(PFN) + + +CALL GSTATS(1650,0) + +ZEPS = EPSILON(Z) +ZEPSH = EPSILON(X) + +ZBIG = SQRT(HUGE(X)) + +!* 1.1 Find the roots of the ordinary +! Legendre polynomial of degree KN using an analytical first guess +! and then refine to machine precision via Newton's method +! in double precision following Swarztrauber (2002) + +! Nils Comment: in principle the else case could also be used for this but +! this is slightly more accurate and consistent with the past + +IF( LLOLD ) THEN + + ZPI = 2.0_JPRD*ASIN(1.0_JPRD) + IODD=MOD(KDGL,2) + IK=IODD + DO JGL=IODD,KDGL,2 + ZFN(IK)=PFN(KDGL,JGL) + IK=IK+1 + ENDDO + + DO JGL=1,INS2 + Z = REAL(4*JGL-1,JPRD)*ZPI/REAL(4*KN+2,JPRD) + ! analytic initial guess for cos(theta) (same quality as RK below) + ! ZX = 1._JPRD-REAL(KN-1,JPRD)/REAL(8*KN*KN*KN,JPRD)-(1._JPRD/REAL(384*KN*KN*KN*KN))*(39._JPRD-28._JPRD/SIN(Z)**2) + ! PL(JGL) = ACOS(ZX*COS(Z)) + ZL(JGL) = Z+1.0_JPRD/(TAN(Z)*REAL(8*KN**2,JPRD)) + ZREG(JGL) = COS(Z) + ZLI(JGL) = COS(ZL(JGL)) + ENDDO + + ! refine PL here via Newton's method + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JGL) + DO JGL=INS2,1,-1 + CALL GAWL(ZFN,ZL(JGL),PW(JGL),ZEPS,KN,ITER(JGL),ZMOD(JGL)) + ENDDO + !$OMP END PARALLEL DO + + ! convert to physical latitude space PMU + !DIR$ IVDEP + !OCL NOVREC + DO JGL=1,INS2 + PL(JGL) = COS(ZL(JGL)) + ENDDO + +ELSE + +!* 1.2 Find the roots of the associated +! Legendre polynomial of degree KN and the associated Gaussian weights +! using a Runge-Kutta 4 integration of the Pruefer transformed Sturm-Liouville problem +! (Tygert (J. Comput. Phys. 2008) and Glaser et al., SIAM J. SCI. COMPUT. Vol. 29 (4) 1420-1438) +! + + ISTEPMAX=10 + + ZANM = REAL(PANM, JPKD) + ZPIH = 2.0_JPRH*ASIN(1.0_JPRH) + + ZX0 = 0._JPRH + Z0 = 0._JPRD + + ! first guess starting point + IF( MOD(KN-KM,2) == 0 ) THEN + ! even, extremum at X == 0 + ZTHETA0 = 0._JPRH + ZH = -0.5_JPRH*ZPIH/REAL(ISTEPMAX,JPRH) + ELSE + ! odd, root at X == 0 + ZTHETA0 = 0.5_JPRH*ZPIH + ZX0 = 0._JPRH + ZH = -ZPIH/REAL(ISTEPMAX,JPRH) + ENDIF + + ZX = ZX0 + ZTHETA = ZTHETA0 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ! Formula (81) in Tygert + ZDX0=-1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + + ! loop over all roots + LLREF=.TRUE. + DO JGL=INS2,1,-1 + + ! runge-kutta + DGL:DO IK=1,ISTEPMAX + + ZK1 = ZDX0 + ZTHETA = ZTHETA + 0.5_JPRH*ZH + + ZX = ZX0 + 0.5_JPRH*ZH*ZK1 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZK2 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ZX = ZX0 + 0.5_JPRH*ZH*ZK2 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZK3 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ZTHETA = ZTHETA + 0.5_JPRH*ZH + ZX = ZX0 + ZH*ZK3 + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZK4 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ZX = ZX0 + (1._JPRH/6._JPRH)*ZH*(ZK1+2._JPRH*ZK2+2._JPRH*ZK3+ZK4) + ZXOLD = ZX0 + + ZX0 = ZX + + IF( .NOT.ZX==ZX ) THEN + WRITE(NOUT,*) 'invoke overflow ...ZX ',KM, KN, JGL + ZX = ZXOLD + ZX0 = ZXOLD + EXIT DGL + ENDIF + + ZF1 = SQRT(FQ(ZX)/FP(ZX)) + ZF2 = FQ1(ZX)/FQ(ZX) + ZF3 = FP1(ZX)/FP(ZX) + + ZDX0 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + + ENDDO DGL + +! ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! Everything from here until <> is to refine the +! root and compute the starting point for the next root search +! ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + ! should not happen, but does if loss of accuracy in supolf occurs (useful for debugging) + IF( JGL < INS2 ) LLREF = PW(JGL+1).GT.ZEPSH + + IF( LLREF ) THEN + + ! chosen for speed/accuracy compromise + IMAX=3 + LOOP: DO I=1,IMAX + ! supol fast + ZS0 = ACOS(ZX0) + CALL SUPOLF(KM,KN,REAL(ZX0,JPRD),ZDDPOL) + ZLK=REAL(ZDDPOL(KN),JPKD) + ZLK1= REAL(ZDDPOL(KN-1),JPKD) + ZLLDN= -(ZANM*ZLK1-DDI(KN)*COS(ZS0)*ZLK)/SIN(ZS0) + + IF( ABS(ZLLDN) > ZEPSH ) THEN + ! single Newton refinement in theta + ZS0 = ZS0 - ZLK/ZLLDN + ZX = COS(ZS0) + ELSE + ! do nothing + ZX = ZX0 + ENDIF + + IF( ABS(ZX-ZX0) > 1000._JPRD*ZEPS ) THEN + ZX0 = ZX + ELSE + EXIT LOOP + ENDIF + ENDDO LOOP + + ! recompute for accuracy weights + CALL SUPOLF(KM,KN,REAL(ZX,JPRD),ZDDPOL) + ! option f in Schwarztrauber to compute the weights + ZS0 = ACOS(ZX) + ZLK=REAL(ZDDPOL(KN),JPKD) + ZLK1= REAL(ZDDPOL(KN-1),JPKD) + ZLLDN= -(ZANM*ZLK1-DDI(KN)*COS(ZS0)*ZLK)/SIN(ZS0) + + PW(JGL) = REAL(REAL(2*KN+1,JPRH)/ZLLDN**2,JPRD) + + ! catch overflow, should never happen + IF( .NOT.(PW(JGL)==PW(JGL)) ) THEN + WRITE(NOUT,*) 'invoke overflow ...PW ',KM, KN, JGL + PW(JGL) = 0.0_JPRD + ENDIF + + ELSE + ! should never happen ... + WRITE(NOUT,*) 'Refinement not possible ... PW set to 0',KM, KN, JGL + PW(JGL) = 0.0_JPRD + ENDIF + + ZX0 = ZX + PL(JGL) = REAL(ZX0,JPRD) + + ! catch overflow, should never happen + IF( .NOT.(PW(JGL)==PW(JGL)) ) THEN + WRITE(NOUT,*) 'invoke overflow ...PW ',KM, KN, JGL + PW(JGL) = 0.0_JPRD + ENDIF + +! ++++++++++++++++++++++++++++++++++++++++++++++++ +! <<<< END REFINEMENT >>>> +! ++++++++++++++++++++++++++++++++++++++++++++++++ + + ZF1 = SQRT(FQ(ZX0)/FP(ZX0)) + ZF2 = FQ1(ZX0)/FQ(ZX0) + ZF3 = FP1(ZX0)/FP(ZX0) + + ! continue to next root with refined ZX,ZR as initial condition + ZH = -ZPIH/REAL(ISTEPMAX,JPRH) + ZTHETA = 0.5_JPRH*ZPIH + ZDX0 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) + ENDDO + +ENDIF + +CALL GSTATS(1650,1) +! ------------------------------------------------------------------ + +!DIR$ IVDEP +!OCL NOVREC +DO JGL=1,KDGL/2 + ISYM = KDGL-JGL+1 + PL(ISYM) = -PL(JGL) + PW(ISYM) = PW(JGL) +ENDDO + +! ------------------------------------------------------------------ + +!* 3. Diagnostics. +! ------------ + +IF( LLOLD ) THEN + + IF(LLP2)THEN + DO JGL=1,INS2 + ZM(JGL) = (ACOS(PL(JGL))-ACOS(ZLI(JGL)))*RA + ZRR(JGL) = (ACOS(PL(JGL))-ACOS(ZREG(JGL)))*RA + ZT(JGL) = ACOS(PL(JGL))*180._JPRD/ZPI + ENDDO + ENDIF + + IALLOW = 20 + DO JGL=1,INS2 + + IF(LLP2)THEN + WRITE(UNIT=NOUT,FMT=& + &'('' M ='',I4,'' ROW ='',I4,'' ITERATIONS='',I4,'' ROOT='',F30.20,& + &'' WEIGHT='',F30.20,'' MODIF :'',E8.2)')KM,JGL,ITER(JGL),PL(JGL)& + &,PW(JGL),PL(JGL)-ZLI(JGL) + WRITE(UNIT=NOUT,FMT=& + &'(10X,'' LAST INC. : '',E8.2,'' MODIF IN M : '',F10.3,& + &'' FROM THE REGULAR GRID : '',F10.3,'' COLAT '',F10.3)')& + &ZMOD(JGL),ZM(JGL),ZRR(JGL),ZT(JGL) + ENDIF + + IF(ITER(JGL) > IALLOW)THEN + WRITE(UNIT=NOUT,FMT='('' CONVERGENCE FAILED IN SUGAW '')') + WRITE(UNIT=NOUT,FMT='('' ALLOWED : '',I4,''& + &NECESSARY : '',& + &I4)')IALLOW,ITER(JGL) + CALL ABORT_TRANS(' FAILURE IN SUGAW ') + ENDIF + + ENDDO + +ELSE + + IF(LLP2)THEN + DO JGL=1,INS2 + WRITE(UNIT=NOUT,FMT=& + &'('' M ='',I4,'' ROW ='',I4,'' ITERATIONS='',I4,'' ROOT='',F30.20,& + &'' WEIGHT='',F30.20,'' COLAT '',F10.3)')KM,JGL,0,PL(JGL),PW(JGL),& + & ACOS(PL(JGL))*180._JPRD/ZPIH + ENDDO + ENDIF + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUGAW +END MODULE SUGAW_MOD diff --git a/src/trans/gpu/internal/suleg_mod.F90 b/src/trans/gpu/internal/suleg_mod.F90 new file mode 100755 index 0000000..2c53121 --- /dev/null +++ b/src/trans/gpu/internal/suleg_mod.F90 @@ -0,0 +1,1207 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SULEG_MOD +#ifdef __NEC__ +#define SIZEOF(x) STORAGE_SIZE(x)/KIND(x) +#endif +CONTAINS +SUBROUTINE SULEG +!DEC$ OPTIMIZE:1 + +USE PARKIND_ECTRANS ,ONLY : JPRD, JPIM, JPRBT +USE PARKIND2 ,ONLY : JPRH +USE MPL_MODULE + +USE TPM_GEN +USE TPM_DIM +USE TPM_CONSTANTS +USE TPM_DISTR +USE TPM_FIELDS ,ONLY : FIELDS_RESOL, F +USE TPM_FLT +USE TPM_GEOMETRY +USE TPM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE PRE_SULEG_MOD +USE SUGAW_MOD +USE SUPOL_MOD +USE SUPOLF_MOD +USE TPM_POL +USE SUTRLE_MOD +USE SETUP_GEOM_MOD +USE BUTTERFLY_ALG_MOD +USE SEEFMM_MIX +USE SET2PE_MOD +USE ABORT_TRANS_MOD +USE PREPSNM_MOD ,ONLY : PREPSNM +USE WRITE_LEGPOL_MOD +USE READ_LEGPOL_MOD + +!**** *SULEG * - initialize the Legendre polynomials + +! Purpose. +! -------- +! Initialize COMMON YOMLEG + +!** Interface. +! ---------- +! *CALL* *SULEG* + +! Explicit arguments : +! -------------------- + +! Implicit arguments : +! -------------------- +! COMMON YOMLEG + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- +! SUGAW (Gaussian latitudes) +! SUPOLM (polynomials) +! LFI routines for external IO's +! Called by SUGEM. + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! +! S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon Press (1962) +! P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre quadrature, +! SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! MODIFICATION : 91-04 J.M. Piriou: +! - Read gaussian latitudes and PNM on LFI +! - If file missing, computes +! 91-04 M.Hamrud: +! - IO Scheme introduced +! MODIFICATION : 91-07-03 P.Courtier suppress derivatives +! MODIFICATION : 91-07-03 P.Courtier computes RATATH and RACTHE +! MODIFICATION : 91-07-03 P.Courtier change upper limit (NSMAX+1) +! MODIFICATION : 91-07-03 P.Courtier change ordering +! Order of the PNM in the file, as in the model : +! - increasing wave numbers m +! - for a given m, from n=NSMAX+1 to m +! MODIFICATION : 92-07-02 R. Bubnova: shift RATATH calculation +! to SUGEM1 +! MODIFICATION : 92-12-17 P.Courtier multitask computations +! Modified by R. EL Khatib : 93-04-02 Set-up defaults controled by LECMWF +! MODIFICATION : 93-03-19 D.Giard : n <= NTMAX +! K. YESSAD : 93-05-11 : DLMU --> global array DRMU(NDGSA:NDGEN). +! (not stored currently on LFI files). +! MODIFICATION : 94-02-03 R. El Khatib : subroutine SULEG2 to write out +! the Leg. polynomials on workfile or LFI file +! Modification : 94-08-31 M. Tolstykh: Setup for CUD interpolation +! Modified by K. YESSAD (MARCH 1995): Extra-latitudes computations +! according to value of NDGSUR and LRPOLE only. +! + change fancy loop numbering. +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision +! on NEC +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! G.Mozdzynski: March 2011 Support 2D (RW,RV) initialisation of legendre coeffs +! G.Mozdzynski: July 2012 distribute FLT initialisation over NPRTRV +! R. El Khatib 14-Jun-2013 optional computation on the stretched latitudes +! F. Vana 05-Mar-2015 Support for single precision +! Nils Wedi, 20-Apr-2015 Support dual latitude/longitude set +! T. Wilhelmsson, 22-Sep-2016 Support single precision for dual too +! ------------------------------------------------------------------ + +IMPLICIT NONE + +! LOCAL +! ------------------------------------------------------------------ +REAL(KIND=JPRD),ALLOCATABLE :: ZPNMG(:) +REAL(KIND=JPRD),ALLOCATABLE :: ZFN(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZLRMUZ2(:) +REAL(KIND=JPRBT) :: ZEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRD) :: ZLRMUZ(R%NDGL) +REAL(KIND=JPRD) :: ZW(R%NDGL) + +REAL(KIND=JPRD) :: ZANM +REAL(KIND=JPRD) :: ZFNN +REAL(KIND=JPRD) :: ZPI, ZINC, ZOFF, ZTEMP, ZORIG, ZTHETA, ZCOS + +REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFV(:),ZRCVBUFV(:,:) +REAL(KIND=JPRD), ALLOCATABLE :: ZPNMCDO(:,:),ZPNMCDD(:,:) +REAL(KIND=JPRBT), ALLOCATABLE :: ZRCVBUTFV(:,:) +REAL(KIND=JPRBT) :: ZDUM(2) +REAL(KIND=KIND(ZRCVBUTFV)) :: ZBYTES +INTEGER(KIND=JPIM) :: IBYTES +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRV) +INTEGER(KIND=JPIM) :: IRECVREQ(NPRTRV) +INTEGER(KIND=JPIM) :: IKOUNT(NPRTRV) +INTEGER(KIND=JPIM) :: IRECVLENMAXV(NPRTRV) + +INTEGER(KIND=JPIM) :: INM, IM, IRECV, ISEND, ISREQ, IRREQ, & + &JGL, JM, JMLOC, IMLOC, JN, JNM, IODD, INN, INMAX, JI, IMAXN, ITAG, ITAG1, & + &INX, ISL, ISTART, ITHRESHOLD, INSMAX, IMAXCOLS,ILATSMAX,JW,JV,J, & + &IDGLU, ILA, ILS, IA, IS, I, ILATS, ILOOP, IPRTRV, JSETV, JH, & + &IMAXRECVA, IMAXRECVS, IRECVLENMAX, ICLONELEN, IHEMIS, INNH, IGL, IGL1, IGL2, & + &IDGLU2, ISYM, INZ + +REAL(KIND=JPRD) :: ZEPS_INT_DEC +REAL(KIND=JPRD) :: ZEPS +REAL(KIND=JPRD),ALLOCATABLE :: ZLFPOL(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZLPOL(:) + +TYPE(CLONE),ALLOCATABLE :: ZCLONEA(:),ZCLONES(:) + +LOGICAL :: LLP1,LLP2 + +! For latitudes on the stretched geometry +REAL(KIND=JPRH) :: ZTAN +REAL(KIND=JPRH) :: ZSTRETMU(R%NDGL) + +! ------------------------------------------------------------------ + +!* 0. Some initializations. +! --------------------- + +IBYTES = MPL_BYTES(ZBYTES) + +ZEPS = 1000._JPRD*EPSILON(ZEPS) +!ZEPS_INT_DEC = EPSILON(ZEPS) +ZEPS_INT_DEC = 1.0E-7_JPRD +!ZEPS_INT_DEC = 1.0E-5_JPRD + +IHEMIS=1 +IF (S%LSOUTHPNM) IHEMIS=2 +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SULEG ===' + +IF( NPROC > 1 )THEN + CALL GSTATS(798,0) + CALL MPL_BARRIER(CDSTRING='SULEG:') + CALL GSTATS(798,1) +ENDIF + +CALL GSTATS(140,0) +CALL GSTATS(1801,0) + +IF(.NOT.D%LGRIDONLY) THEN + CALL PRE_SULEG +ENDIF + +ALLOCATE(F%RMU(R%NDGL)) +IF (LLP2) WRITE(NOUT,9) 'F%RMU ',SIZE(F%RMU ),SHAPE(F%RMU ) +ALLOCATE(F%RW(R%NDGL)) +IF (LLP2) WRITE(NOUT,9) 'F%RW ',SIZE(F%RW ),SHAPE(F%RW ) + + +!* 1.0 Initialize Fourier coefficients for ordinary Legendre polynomials +! ------------------------------------------------------------------------ + +ALLOCATE(ZFN(0:R%NDGL,0:R%NDGL)) +IF (LLP2) WRITE(NOUT,9) 'ZFN ',SIZE(ZFN ),SHAPE(ZFN ) + + + +! determines the number of stripes in butterfly NSMAX/IMAXCOLS +! IMAXCOLS = (R%NSMAX - 1)/4 + 1 +! IMAXCOLS=64 (min flops) +IMAXCOLS=64 + +! the threshold of efficiency +IF(NPROC == 1 .OR. R%NDGNH <= 2560) THEN + ITHRESHOLD = R%NDGNH/4 + DO + IF(ITHRESHOLD >= IMAXCOLS*4) EXIT + IMAXCOLS = IMAXCOLS/2 + ENDDO +ELSE + ITHRESHOLD = 900 +ENDIF + +ITHRESHOLD = MAX(ITHRESHOLD,IMAXCOLS+1) +S%ITHRESHOLD = ITHRESHOLD + +!* 3.1 Gaussian latitudes and weights +! --------------------------------------- + +!IF (S%LUSE_BELUSOV.OR.S%LUSEFLT) +CALL INI_POL(R%NTMAX+3) + +IF(.NOT.D%LGRIDONLY) THEN + ISTART=1 +ELSE + ISTART=R%NDGL +ENDIF + +INMAX=R%NDGL +! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) +! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 +ZFN(0,0)=2._JPRD +DO JN=ISTART,R%NDGL + ZFNN=ZFN(0,0) + DO JGL=1,JN + ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) + ENDDO + + IODD=MOD(JN,2) + ZFN(JN,JN)=ZFNN + DO JGL=2,JN-IODD,2 + ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) + ENDDO +ENDDO + +! compute latitudes and weights for original Gaussian latitudes +ZANM=SQRT(REAL(2*INMAX+1,JPRD)*REAL(INMAX**2,JPRD)/REAL(2*INMAX-1,JPRD)) +INN=R%NDGL +CALL GSTATS(1801,2) +CALL SUGAW(INN,0,INMAX,ZLRMUZ(1:INN),ZW(1:INN),ZANM,ZFN) +CALL GSTATS(1801,3) + +IF (ABS(G%RSTRET-1.0_JPRD)>100._JPRD*EPSILON(1._JPRD)) THEN + WRITE(NOUT,*) '=== SULEG: Change Gaussian latitudes to the transformed sphere ===' + INNH=(INN+1)/2 + ZTAN=(1.0_JPRD-G%RSTRET**2)/(1.0_JPRD+G%RSTRET**2) +! North hemisphere + DO JGL=1,INNH + ZSTRETMU(JGL)=(ZTAN+REAL(ZLRMUZ(JGL),JPRH))/(1.0_JPRD+ZTAN*REAL(ZLRMUZ(JGL),JPRH)) + ENDDO +! South hemisphere + DO JGL=1,INNH + IGL=2*INNH-JGL+1 + ZSTRETMU(IGL)=(ZTAN-REAL(ZLRMUZ(JGL),JPRH))/(1.0_JPRD-ZTAN*REAL(ZLRMUZ(JGL),JPRH)) + ENDDO + DO JGL=1,INN + ZLRMUZ(JGL)=REAL(ZSTRETMU(JGL),JPRD) + ENDDO +ENDIF + +DO JGL=1,R%NDGL + F%RW(JGL) = ZW(JGL) + F%RMU(JGL) = ZLRMUZ(JGL) +ENDDO + +IF (LLP1) WRITE(NOUT,*) '=== SULEG: Finished Gaussian latitudes ===' + +!* 3.1.1 specify a dual set of output (inv_trans) or input (dir_trans) latitudes / longitudes + +IF( S%LDLL ) THEN + + INMAX = S%NDGL + INN= S%NDGL + + S%NDGNHD = (INMAX+1)/2 + ALLOCATE(ZLRMUZ2(INN)) + + ! here we want to use the positions of the specified dual grid + ! accuracy requirement is ZLRMUZ2(JGL) < F%RMU(1) + ! so we use approximations for the remaining latitudes outside this range + ! we approximate the vicinity to the pole/equator + + ZPI = 2.0_JPRD*ASIN(1.0_JPRD) + + ZORIG = ASIN(F%RMU(1)) + IF( S%LSHIFTLL ) THEN + ZINC = ZPI/REAL(INN,JPRD) + ZOFF = 0.5_JPRD*ZINC + ZTEMP = ZOFF + ZINC*REAL(S%NDGNHD-1,JPRD) + ZLRMUZ2(1) = SIN(MIN(ZTEMP,ZORIG) - 0.5_JPRD*MAX(0._JPRD,ZTEMP - ZORIG)) + ZLRMUZ2(S%NDGNHD) = SIN(ZOFF) + ELSE + ZINC = ZPI/REAL(INN-2,JPRD) + ZOFF=-0.5_JPRD*ZINC + ZTEMP = ZOFF + ZINC*REAL(S%NDGNHD-1,JPRD) + ZLRMUZ2(1) = SIN(MIN(ZTEMP,ZORIG) - 0.5_JPRD*MAX(0._JPRD,ZTEMP - ZORIG)) + ZOFF=0.01_JPRD*ZINC + ZLRMUZ2(S%NDGNHD) = SIN(ZOFF) + ZOFF=0._JPRD + ENDIF + DO JGL=2, S%NDGNHD-1 + ZLRMUZ2(JGL) = SIN(ZOFF + ZINC*REAL(S%NDGNHD-JGL,JPRD)) + ENDDO + DO JGL=1, S%NDGNHD + ISYM = INN-JGL+1 + ZLRMUZ2(ISYM) = -ZLRMUZ2(JGL) + ENDDO + + IF( LLP2 ) THEN + WRITE(NOUT,*) 'dual latitudes' + DO JGL= 1, INN + WRITE(NOUT,*) 'dual JGL=',JGL,(180._JPRD/ZPI)*ZINC,(180._JPRD/ZPI)*ASIN(ZLRMUZ2(JGL)),(180._JPRD/ZPI)*ASIN(F%RMU(JGL)) + ENDDO + ENDIF + + ALLOCATE(F%RMU2(INMAX)) + IF (LLP2) WRITE(NOUT,9) 'F%RMU2 ',SIZE(F%RMU2 ),SHAPE(F%RMU2 ) + ALLOCATE(F%RACTHE2(INMAX)) + IF (LLP2) WRITE(NOUT,9) 'F%RACTHE2 ',SIZE(F%RACTHE2),SHAPE(F%RACTHE2 ) + DO JGL=1,INN + F%RMU2(JGL) = ZLRMUZ2(JGL) + F%RACTHE2(JGL) = 1.0_JPRD/(SQRT(1.0_JPRD-ZLRMUZ2(JGL)*ZLRMUZ2(JGL))+ZEPS)/REAL(RA,JPRD) + ENDDO + + IF (LLP1) WRITE(NOUT,*) '=== SULEG: Finished dual Gaussian latitudes ===' + + ! inverse + direct map for FMM + INX=2*R%NDGNH + INZ=2*S%NDGNHD + ALLOCATE(S%FMM_INTI) + CALL SETUP_SEEFMM(INX,F%RMU,INZ,F%RMU2,S%FMM_INTI) + +ENDIF + +!* 3.2 Computes related arrays + +IF(.NOT.D%LGRIDONLY) THEN + + ALLOCATE(S%FA(D%NUMP)) + + ALLOCATE(F%R1MU2(R%NDGL)) + IF (LLP2) WRITE(NOUT,9) 'F%R1MU2 ',SIZE(F%R1MU2),SHAPE(F%R1MU2 ) + ALLOCATE(F%RACTHE(R%NDGL)) + IF (LLP2) WRITE(NOUT,9) 'F%RACTHE ',SIZE(F%RACTHE),SHAPE(F%RACTHE ) + + IF( S%LUSE_BELUSOV) THEN + ALLOCATE(F%RPNM(R%NLEI3,D%NSPOLEGL)) + IF (LLP2) WRITE(NOUT,9) 'F%RPNM ',SIZE(F%RPNM),SHAPE(F%RPNM) + DO JNM=1,D%NSPOLEGL + F%RPNM(R%NLEI3,JNM) = 0.0_JPRD + ENDDO + ENDIF + +!* 3.2 Computes related arrays + + DO JGL=1,R%NDGL +! test cosine differently + ZTHETA = ASIN(ZLRMUZ(JGL)) + ZCOS = COS(ZTHETA) + F%R1MU2(JGL) = ZCOS**2 + F%RACTHE(JGL) = 1.0_JPRD/ZCOS/REAL(RA,JPRD) +! F%R1MU2(JGL) = 1.0_JPRD-ZLRMUZ(JGL)*ZLRMUZ(JGL) +! F%RACTHE(JGL) = 1.0_JPRD/SQRT(1.0_JPRD-ZLRMUZ(JGL)*ZLRMUZ(JGL))/REAL(RA,JPRD) + ENDDO + +!* 3.3 Working arrays + +! compute the Legendre polynomials as a function of the z_k (Gaussian Latitudes) +! this may be faster than calling supolf for each m but uses extra communication +! and the parallelism is more limited ? Nils + + IF( S%LUSE_BELUSOV .AND. .NOT. C%LREAD_LEGPOL ) THEN + + INSMAX = R%NTMAX+1 + + IF( INSMAX /= R%NDGL) THEN + DEALLOCATE(ZFN) + ALLOCATE(ZFN(0:INSMAX,0:INSMAX)) + ! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) + ! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + ZFN(0,0)=2._JPRD + DO JN=1,INSMAX + ZFNN=ZFN(0,0) + DO JGL=1,JN + ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) + ENDDO + + IODD=MOD(JN,2) + ZFN(JN,JN)=ZFNN + DO JGL=2,JN-IODD,2 + ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) + ENDDO + ENDDO + ENDIF + + ALLOCATE(ZLFPOL(0:INSMAX,0:INSMAX)) + ALLOCATE(ZPNMG(R%NSPOLEG)) + + DO JH=1,IHEMIS + + IF (JH==1) THEN + IGL1=D%NLATLS(MYSETW,MYSETV) + IGL2=D%NLATLE(MYSETW,MYSETV) + ELSE + IGL1=R%NDGL-D%NLATLE(MYSETW,MYSETV)+1 + IGL2=R%NDGL-D%NLATLS(MYSETW,MYSETV)+1 + ENDIF + + ILOOP=0 + DO JGL=IGL1,IGL2 + + INM = 0 + CALL SUPOL(INSMAX,ZLRMUZ(JGL),ZFN,ZLFPOL) + DO JM=0,R%NSMAX + DO JN=INSMAX,JM,-1 + INM = INM+1 + ZPNMG(INM) = ZLFPOL(JM,JN) + ENDDO + ENDDO + + CALL GSTATS(1801,2) + ILOOP = JGL-IGL1+1 + CALL SUTRLE(ZPNMG,JGL,ILOOP) + CALL GSTATS(1801,3) + + ENDDO + + ILATSMAX=0 + DO JW=1,NPRTRW + DO JV=1,NPRTRV + ILATSMAX=MAX(ILATSMAX,D%NLATLE(JW,JV)-D%NLATLS(JW,JV)+1) + ENDDO + ENDDO + + ILATS=IGL2-IGL1+1 + IF (S%LSOUTHPNM .AND. IHEMIS==1 .AND. ILATSMAX-1 >= ILATS) THEN + ! I don't know what to do for south pole. But isn't this piece of code + ! a dead stuff for poles rows ? + CALL ABORT_TRANS('SULEG: WILL BE BROKEN FOR SOUTH HEMISPHERE') + ENDIF + + DO J=ILATS,ILATSMAX-1 + ILOOP=ILOOP+1 + CALL GSTATS(1801,2) + CALL SUTRLE(ZPNMG,-1,ILOOP) + CALL GSTATS(1801,3) + ENDDO + + ENDDO + + DEALLOCATE(ZLFPOL) + IF( ALLOCATED(ZFN) ) DEALLOCATE(ZFN) + + DEALLOCATE(ZPNMG) + + IF(LLP1) WRITE(NOUT,*) '=== SULEG: Finished RPNM ===' + + ENDIF + + CALL SETUP_GEOM + + IMAXN=R%NTMAX+1 + + ITAG=MTAGLETR + ITAG1=MTAGLETR+1 + + IMAXRECVA=0 + IMAXRECVS=0 + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IMAXRECVA = MAX(IDGLU*ILA,IMAXRECVA) + IMAXRECVS = MAX(IDGLU*ILS,IMAXRECVS) + + !find nearest starting latitude of the dual set + IF( S%LDLL ) THEN + + INMAX=MIN(R%NTMAX+1,S%NDGL) + IDGLU2=S%NDGNHD + S%FA(JMLOC)%ISLD = 1 + LLA:DO JGL=1,S%NDGNHD-1 + IF( (ZLRMUZ2(JGL) < ZLRMUZ(ISL)) ) THEN + S%FA(JMLOC)%ISLD = JGL + IDGLU2 = S%NDGNHD-S%FA(JMLOC)%ISLD+1 + EXIT LLA + ENDIF + ENDDO LLA + + IF( .NOT. C%LREAD_LEGPOL ) THEN + ! compute auxiliary quantities for the dual mapping + + ! output data latitudes + ALLOCATE(ZPNMCDO(2*IDGLU2,2)) + !$OMP PARALLEL PRIVATE(JGL,ZLPOL) + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + !$OMP DO SCHEDULE(DYNAMIC,1) + DO JGL=1,2*IDGLU2 + CALL SUPOLF(IM,INMAX,ZLRMUZ2(S%FA(JMLOC)%ISLD+JGL-1),ZLPOL(0:INMAX)) + ZPNMCDO(JGL,1)=ZLPOL(INMAX-1) + ZPNMCDO(JGL,2)=ZLPOL(INMAX) + ENDDO + !$OMP END DO + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + !$OMP END PARALLEL + + ! internal (gg-roots) latitudes + ALLOCATE(ZPNMCDD(2*IDGLU,2)) + !$OMP PARALLEL PRIVATE(JGL,ZLPOL,JI,JN) + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + !$OMP DO SCHEDULE(DYNAMIC,1) + DO JGL=1,2*IDGLU + CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX)) + ZPNMCDD(JGL,1)=ZLPOL(INMAX-1) + ZPNMCDD(JGL,2)=ZLPOL(INMAX) + ENDDO + !$OMP END DO + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + !$OMP END PARALLEL + + stop 'Error: code path not (yet) supported in GPU version' + !CALL PREPSNM(IM,JMLOC,ZEPSNM) + ALLOCATE(S%FA(JMLOC)%RPNMWI(2*IDGLU,1:2)) + DO JGL=1,2*IDGLU + ! inverse trafo + S%FA(JMLOC)%RPNMWI(JGL,1) = F%RW(ISL+JGL-1)*ZPNMCDD(JGL,1) + S%FA(JMLOC)%RPNMWI(JGL,2) = F%RW(ISL+JGL-1)*ZPNMCDD(JGL,2) + ! direct trafo needed if mapping to another set of gg roots + !S%FA(JMLOC)%RPNMWI(JGL,3) = -ZEPSNM(IMAXN)*ZPNMCDD(JGL,2) + !S%FA(JMLOC)%RPNMWI(JGL,4) = -ZEPSNM(IMAXN)*ZPNMCDD(JGL,1) + ENDDO + DEALLOCATE(ZPNMCDD) + ALLOCATE(S%FA(JMLOC)%RPNMWO(2*IDGLU2,1:2)) + DO JGL=1,2*IDGLU2 + ! inverse trafo + S%FA(JMLOC)%RPNMWO(JGL,1) = -ZEPSNM(IMAXN)*ZPNMCDO(JGL,2) + S%FA(JMLOC)%RPNMWO(JGL,2) = -ZEPSNM(IMAXN)*ZPNMCDO(JGL,1) + ! only needed in direct trafo, need if mapping to another set of roots + !S%FA(JMLOC)%RPNMWO(JGL,3) = F%RW2(S%FA(JMLOC)%ISLD+JGL-1)*ZPNMCDO(JGL,1) + !S%FA(JMLOC)%RPNMWO(JGL,4) = F%RW2(S%FA(JMLOC)%ISLD+JGL-1)*ZPNMCDO(JGL,2) + ENDDO + DEALLOCATE(ZPNMCDO) + ENDIF ! LREAD_LEGPOL + ENDIF ! LDLL + + ENDDO + + IF( S%LDLL ) THEN + DEALLOCATE(ZLRMUZ2) + ENDIF + +!!$ IF( S%LUSEFLT.AND.LMPOFF ) THEN +!!$ CALL ABORT_TRANS('SULEG: LUSEFLT=T and LMPOFF=T not supported') +!!$ ENDIF + CALL GSTATS(1801,2) + + IF(.NOT.C%LREAD_LEGPOL) THEN + IF( S%LUSEFLT )THEN + ALLOCATE(ZCLONEA(D%NUMP)) + ALLOCATE(ZCLONES(D%NUMP)) + ENDIF + +! not correct logic +! IF (S%LUSE_BELUSOV.OR.S%LUSEFLT) THEN + + DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP +++++++++++++++++++++++ +! IF( S%LUSEFLT )THEN +! ZCLONES(JMLOC)%COMMSBUF => NULL() +! ZCLONEA(JMLOC)%COMMSBUF => NULL() +! ENDIF + + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + + ! --------------------anti-symmetric----------------------- + ! --------------------anti-symmetric----------------------- + ! --------------------anti-symmetric----------------------- + + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + ALLOCATE(S%FA(IMLOC)%RPNMA(IDGLU,ILA)) + ENDDO + + IF( .NOT. S%LUSE_BELUSOV ) THEN + + ISREQ = 0 + IRREQ = 0 + + ALLOCATE (ZRCVBUFV(IMAXRECVA,IPRTRV)) + CALL GSTATS(851,0) + DO JSETV=1,IPRTRV + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + IRREQ = IRREQ+1 + CALL MPL_RECV(ZRCVBUFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + + ALLOCATE(ZSNDBUFV(IDGLU*ILA)) + + IF(MOD(IMAXN-IM,2) == 0) THEN + INMAX=IMAXN+1 + ELSE + INMAX=IMAXN + ENDIF + + CALL GSTATS(1251,0) + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) + DO JGL=1,IDGLU + CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=3) + DO JI=1,ILA + JN=IM+2*(JI-1)+1 + ZSNDBUFV((JGL-1)*ILA+JI)=ZLPOL(JN) + ENDDO + ENDDO + !$OMP END PARALLEL DO + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + CALL GSTATS(1251,1) + + CALL GSTATS(851,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZSNDBUFV(:),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + ENDIF + + CALL GSTATS(851,0) + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + + IF( NPROC==1.AND.LMPOFF )THEN + ZRCVBUFV(1:SIZE(ZSNDBUFV(:)),1)=ZSNDBUFV(:) + ENDIF + CALL GSTATS(851,1) + + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IA,ILA,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILA > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMA) + ALLOCATE(S%FA(IMLOC)%RPNMDA(IDGLU,ILA)) + DO JGL=1,IDGLU + DO JI=1,ILA + S%FA(IMLOC)%RPNMDA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JGL=1,IDGLU + DO JI=1,ILA + S%FA(IMLOC)%RPNMA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) + ENDDO + ENDDO + ENDIF + ELSE + DO JGL=1,IDGLU + DO JI=1,ILA + S%FA(IMLOC)%RPNMA(JGL,ILA-JI+1)=ZRCVBUFV((JGL-1)*ILA+JI,JSETV) + ENDDO + ENDDO + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) + IF( ALLOCATED(ZRCVBUFV) ) DEALLOCATE(ZRCVBUFV) + + ELSE + + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IA,ILA,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IA = 1+MOD(R%NSMAX-IM+2,2) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILA > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMA) + ALLOCATE(S%FA(IMLOC)%RPNMDA(IDGLU,ILA)) + DO JI=1,ILA + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMDA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JI=1,ILA + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) + ENDDO + ENDDO + ENDIF + ELSE + DO JI=1,ILA + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMA(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IA+(JI-1)*2) + ENDDO + ENDDO + END IF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + ENDIF + + ! -------------------- anti-symmetric FLT iniitialisation ----------------------- + + IF( S%LUSEFLT) THEN + IRECVLENMAX=0 + ISREQ = 0 + IRREQ = 0 + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( ILA > ITHRESHOLD ) THEN + S%LSYM = .FALSE. + INX = IDGLU + CALL CONSTRUCT_BUTTERFLY(ZEPS_INT_DEC,IMAXCOLS,INX,ILA,S%FA(IMLOC)%RPNMDA,& + & S%FA(IMLOC)%YBUT_STRUCT_A) + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,ZCLONEA(IMLOC)) + IRECVLENMAX=SIZE(ZCLONEA(IMLOC)%COMMSBUF) + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZCLONEA(IMLOC)%COMMSBUF(:),KDEST=NPRCIDS(ISEND),& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ELSE + IRECVLENMAX=2 + ZDUM(:)=0.0_JPRBT + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZDUM(:),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ENDIF + ENDIF + IF(.NOT.LMPOFF) THEN + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + CALL MPL_SEND(IRECVLENMAX,KDEST=NPRCIDS(ISEND),KTAG=ITAG1,CDSTRING='SULEG:') + ENDDO + IRECVLENMAX=0 + DO JSETV=1,NPRTRV ! ? should this be IPRTRV ? + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(IRECVLENMAXV(JSETV),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG1,CDSTRING='SULEG:') + IRECVLENMAX=MAX(IRECVLENMAX,IRECVLENMAXV(JSETV)) + ENDDO + IF( MYPROC == 1 )THEN + IF( IRECVLENMAX > 2 )THEN + WRITE(NOUT,'("SULEG: ANTI-SYM MAX BUTTERFLY CLONE LEN=",I8)')IRECVLENMAX + ENDIF + ENDIF + IF( IRECVLENMAX == 0 )THEN + WRITE(NOUT,'("SULEG: ANTI-SYM WARNING CLONE LEN=",I8,I8)') MYPROC, IRECVLENMAX + ENDIF + IF( IRECVLENMAX > 0 )THEN + ALLOCATE (ZRCVBUTFV(IRECVLENMAX,IPRTRV)) + DO JSETV=1,IPRTRV + IRREQ = IRREQ+1 + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCVBUTFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDDO + END IF + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KBYTES=IBYTES,KOUNT=IKOUNT(1:IRREQ),KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + CALL GSTATS(852,1) + IF( IRECVLENMAX > 0 )THEN + CALL GSTATS(1252,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ILA,IDGLU,INX,ICLONELEN) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + IF( ILA > ITHRESHOLD ) THEN + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + INX=IDGLU + IF( .NOT.ALLOCATED(ZCLONEA(IMLOC)%COMMSBUF) )THEN + ICLONELEN=IKOUNT(JSETV) + ALLOCATE(ZCLONEA(IMLOC)%COMMSBUF(ICLONELEN)) + ZCLONEA(IMLOC)%COMMSBUF(1:ICLONELEN) = ZRCVBUTFV(1:ICLONELEN,JSETV) + CALL UNPACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,ZCLONEA(IMLOC)) + ENDIF + IF(ALLOCATED(ZCLONEA(IMLOC)%COMMSBUF) ) THEN + IF( SIZEOF(ZCLONEA(IMLOC)%COMMSBUF) > 0 ) DEALLOCATE(ZCLONEA(IMLOC)%COMMSBUF) + ! ZCLONEA(IMLOC)%COMMSBUF=>NULL() + ENDIF + IF( ASSOCIATED(S%FA(IMLOC)%RPNMA) .AND. .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMA) + IF( ASSOCIATED(S%FA(IMLOC)%RPNMDA) ) DEALLOCATE(S%FA(IMLOC)%RPNMDA) + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1252,1) + DEALLOCATE(ZRCVBUTFV) + ENDIF + ENDIF + ENDIF + + ! --------------------symmetric----------------------- + ! --------------------symmetric----------------------- + ! --------------------symmetric----------------------- + + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + ALLOCATE(S%FA(IMLOC)%RPNMS(IDGLU,ILS)) + ENDDO + + IF( .NOT. S%LUSE_BELUSOV ) THEN + + ISREQ = 0 + IRREQ = 0 + + ALLOCATE (ZRCVBUFV(IMAXRECVS,IPRTRV)) + CALL GSTATS(851,0) + DO JSETV=1,IPRTRV + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + IRREQ = IRREQ+1 + CALL MPL_RECV(ZRCVBUFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + + ALLOCATE(ZSNDBUFV(IDGLU*ILS)) + + IF(MOD(IMAXN-IM,2) == 0) THEN + INMAX=IMAXN + ELSE + INMAX=IMAXN+1 + ENDIF + + IF (.NOT.ALLOCATED(ZLPOL)) ALLOCATE(ZLPOL(0:INMAX)) + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) + DO JGL=1,IDGLU + CALL SUPOLF(IM,INMAX,ZLRMUZ(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=2) + DO JI=1,ILS + JN=IM+2*(JI-1) + ZSNDBUFV((JGL-1)*ILS+JI)=ZLPOL(JN) + ENDDO + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) + + CALL GSTATS(851,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF( .NOT.LMPOFF )THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZSNDBUFV(:),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(851,1) + + ENDIF + + CALL GSTATS(851,0) + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + IF( NPROC==1.AND.LMPOFF )THEN + ZRCVBUFV(1:SIZE(ZSNDBUFV(:)),1)=ZSNDBUFV(:) + ENDIF + CALL GSTATS(851,1) + + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IS,ILS,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILS > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMS) + ALLOCATE(S%FA(IMLOC)%RPNMDS(IDGLU,ILS)) + DO JGL=1,IDGLU + DO JI=1,ILS + S%FA(IMLOC)%RPNMDS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JGL=1,IDGLU + DO JI=1,ILS + S%FA(IMLOC)%RPNMS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) + ENDDO + ENDDO + ENDIF + ELSE + DO JGL=1,IDGLU + DO JI=1,ILS + S%FA(IMLOC)%RPNMS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) + ENDDO + ENDDO + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) + IF( ALLOCATED(ZRCVBUFV) ) DEALLOCATE(ZRCVBUFV) + + ELSE + CALL GSTATS(1251,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ISL,IS,ILS,IDGLU,JGL,JI) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ISL = MAX(R%NDGNH-G%NDGLU(IM)+1,1) + IS = 1+MOD(R%NSMAX-IM+1,2) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( S%LUSEFLT .AND. ILS > ITHRESHOLD ) THEN + IF( .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMS) + ALLOCATE(S%FA(IMLOC)%RPNMDS(IDGLU,ILS)) + DO JI=1,ILS + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMDS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) + ENDDO + ENDDO + IF( S%LKEEPRPNM ) THEN + DO JI=1,ILS + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) + ENDDO + ENDDO + ENDIF + ELSE + DO JI=1,ILS + DO JGL=1,IDGLU + S%FA(IMLOC)%RPNMS(JGL,JI) = F%RPNM(ISL+JGL-1,D%NPMS(IM)+IS+(JI-1)*2) + ENDDO + ENDDO + END IF + END DO + !$OMP END PARALLEL DO + CALL GSTATS(1251,1) + + ENDIF + + ! -------------------- symmetric FLT iniitialisation ----------------------- + + IF( S%LUSEFLT) THEN + IRECVLENMAX=0 + ISREQ = 0 + IRREQ = 0 + IF( JMLOC+MYSETV-1 <= D%NUMP )THEN + IMLOC=JMLOC+MYSETV-1 + IM = D%MYMS(IMLOC) + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IF( ILS > ITHRESHOLD ) THEN + S%LSYM = .TRUE. + INX = IDGLU + CALL CONSTRUCT_BUTTERFLY(ZEPS_INT_DEC,IMAXCOLS,INX,ILS,S%FA(IMLOC)%RPNMDS,& + & S%FA(IMLOC)%YBUT_STRUCT_S) + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,ZCLONES(IMLOC)) + IRECVLENMAX=SIZE(ZCLONES(IMLOC)%COMMSBUF) + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZCLONES(IMLOC)%COMMSBUF(:),KDEST=NPRCIDS(ISEND),& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ELSE + IRECVLENMAX=2 + ZDUM(:)=0.0_JPRBT + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + IF(.NOT.LMPOFF) THEN + ISREQ = ISREQ+1 + CALL MPL_SEND(ZDUM(:),KDEST=NPRCIDS(ISEND), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDIF + ENDDO + CALL GSTATS(852,1) + ENDIF + ENDIF + IF(.NOT. LMPOFF) THEN + CALL GSTATS(852,0) + DO JSETV=1,NPRTRV + CALL SET2PE(ISEND,0,0,MYSETW,JSETV) + CALL MPL_SEND(IRECVLENMAX,KDEST=NPRCIDS(ISEND),KTAG=ITAG1,CDSTRING='SULEG:') + ENDDO + IRECVLENMAX=0 + DO JSETV=1,NPRTRV ! ? should this be IPRTRV ? + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(IRECVLENMAXV(JSETV),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG1,CDSTRING='SULEG:') + IRECVLENMAX=MAX(IRECVLENMAX,IRECVLENMAXV(JSETV)) + ENDDO + IF( MYPROC == 1 )THEN + IF( IRECVLENMAX > 2 )THEN + WRITE(NOUT,'("SULEG: SYM MAX BUTTERFLY CLONE LEN=",I8)')IRECVLENMAX + ENDIF + ENDIF + IF( IRECVLENMAX == 0 )THEN + WRITE(NOUT,'("SULEG: SYM WARNING CLONE LEN=",I8,I8)')MYPROC, IRECVLENMAX + ENDIF + IF( IRECVLENMAX > 0 )THEN + ALLOCATE (ZRCVBUTFV(IRECVLENMAX,IPRTRV)) + DO JSETV=1,IPRTRV + IRREQ = IRREQ+1 + CALL SET2PE(IRECV,0,0,MYSETW,JSETV) + CALL MPL_RECV(ZRCVBUTFV(:,JSETV),KSOURCE=NPRCIDS(IRECV), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SULEG:') + ENDDO + ENDIF + IF(ISREQ > 0) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + IF(IRREQ > 0) THEN + CALL MPL_WAIT(KBYTES=IBYTES,KOUNT=IKOUNT(1:IRREQ),KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: SULEG') + ENDIF + CALL GSTATS(852,1) + + + IF( IRECVLENMAX > 0 )THEN + CALL GSTATS(1252,0) + !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JSETV,IMLOC,IM,ILS,IDGLU,INX,ICLONELEN) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILS = (R%NSMAX-IM+3)/2 + IF( ILS > ITHRESHOLD ) THEN + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + INX=IDGLU + IF( .NOT.ALLOCATED(ZCLONES(IMLOC)%COMMSBUF) )THEN + ICLONELEN=IKOUNT(JSETV) + ALLOCATE(ZCLONES(IMLOC)%COMMSBUF(ICLONELEN)) + ZCLONES(IMLOC)%COMMSBUF(1:ICLONELEN) = ZRCVBUTFV(1:ICLONELEN,JSETV) + CALL UNPACK_BUTTERFLY_STRUCT( S%FA(IMLOC)%YBUT_STRUCT_S,ZCLONES(IMLOC)) + ENDIF + IF( ALLOCATED(ZCLONES(IMLOC)%COMMSBUF) ) THEN + IF( SIZEOF(ZCLONES(IMLOC)%COMMSBUF) > 0 ) DEALLOCATE(ZCLONES(IMLOC)%COMMSBUF) + ! ZCLONES(IMLOC)%COMMSBUF=>NULL() + ENDIF + IF( ASSOCIATED(S%FA(IMLOC)%RPNMS) .AND. .NOT. S%LKEEPRPNM ) DEALLOCATE(S%FA(IMLOC)%RPNMS) + IF( ASSOCIATED(S%FA(IMLOC)%RPNMDS) ) DEALLOCATE(S%FA(IMLOC)%RPNMDS) + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1252,1) + DEALLOCATE(ZRCVBUTFV) + ENDIF + ENDIF + ENDIF + + ENDDO ! +++++++++++++++++++++ END JMLOC LOOP +++++++++++++++++++++++ + +! ENDIF ! (S%LUSE_BELUSOV.OR.S%LUSEFLT) + + IF( S%LUSEFLT )THEN + DEALLOCATE(ZCLONEA) + DEALLOCATE(ZCLONES) + ENDIF + + IF( LLP1 .AND. S%LUSEFLT ) THEN + WRITE(NOUT,*) '=== SULEG: Finished SETUP_BUTTERFLY ===' + ENDIF + ENDIF + + CALL GSTATS(1801,3) + IF(S%LUSE_BELUSOV) DEALLOCATE(F%RPNM) + + IF(C%LWRITE_LEGPOL) CALL WRITE_LEGPOL + IF(C%LREAD_LEGPOL) CALL READ_LEGPOL + + +ENDIF +CALL GSTATS(1801,1) +CALL GSTATS(140,1) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!IF (S%LUSE_BELUSOV.OR.S%LUSEFLT) +CALL END_POL + +END SUBROUTINE SULEG +END MODULE SULEG_MOD diff --git a/src/trans/gpu/internal/sump_trans0_mod.F90 b/src/trans/gpu/internal/sump_trans0_mod.F90 new file mode 100755 index 0000000..e61269e --- /dev/null +++ b/src/trans/gpu/internal/sump_trans0_mod.F90 @@ -0,0 +1,115 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMP_TRANS0_MOD +CONTAINS +SUBROUTINE SUMP_TRANS0 + +! Set up distributed environment for the transform package (part 0) + +USE PARKIND1 ,ONLY : JPIM +USE MPL_MODULE ,ONLY : MPL_GROUPS_CREATE, MPL_MYRANK, MPL_NPROC + +USE TPM_GEN ,ONLY : NOUT, LMPOFF, NPRINTLEV +USE TPM_DISTR ,ONLY : LEQ_REGIONS, MTAGDISTGP, MTAGDISTSP, MTAGGL, & + & MTAGLETR, MTAGLG, MTAGLM, MTAGML, MTAGPART, & + & MYSETV, MYSETW, NPRCIDS, & + & NPRGPEW, NPRGPNS, NPRTRNS, NPRTRV, NPRTRW, & + & MYPROC, NPROC + +USE EQ_REGIONS_MOD ,ONLY : EQ_REGIONS, MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE PE2SET_MOD ,ONLY : PE2SET +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +LOGICAL :: LLP1,LLP2 +INTEGER(KIND=JPIM) :: IPROC,JJ + +! ------------------------------------------------------------------ + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS0 ===' + + +NPROC = NPRGPNS*NPRGPEW +NPRTRNS = NPRTRW +IF(MOD(NPROC,NPRTRW) /= 0 .OR. NPRTRW > NPROC) THEN + CALL ABORT_TRANS('SUMP_TRANS0: NPROC INCONSISTENT WITH NPRTRW') +ENDIF +NPRTRV = NPROC/NPRTRW +IF(LLP1) WRITE(NOUT,*)'NPROC =',NPROC,' NPRGPNS=',NPRGPNS,' NPRGPEW=',& + & NPRGPEW,' NPRTRW=',NPRTRW,' NPRTRV=',NPRTRV + +IF(NPROC > 1 ) THEN + IPROC = MPL_NPROC() + IF(IPROC /= NPROC) THEN + WRITE(NOUT,*) 'SUMP_TRANS0: NPROC=',NPROC,' BUT MPL_NPROC RETURNS',& + & IPROC + CALL ABORT_TRANS('SUMP_TRANS0: NPROC INCONSISTENT WITH MPL_NPROC') + ENDIF + MYPROC = MPL_MYRANK() +ELSE + MYPROC = 1 +ENDIF + +IF (MYPROC > NPROC) THEN + CALL ABORT_TRANS('SUMP_TRANS0: INCONSISTENCY IN NUMBER OF PROCESSORS USED') +ENDIF + +IF( LEQ_REGIONS )THEN + ALLOCATE(N_REGIONS(NPROC+2)) + N_REGIONS(:)=0 + CALL EQ_REGIONS(NPROC) +ELSE + N_REGIONS_NS=NPRGPNS + ALLOCATE(N_REGIONS(N_REGIONS_NS)) + N_REGIONS(:)=NPRGPEW + N_REGIONS_EW=NPRGPEW +ENDIF +CALL PE2SET(MYPROC,MY_REGION_NS,MY_REGION_EW,MYSETW,MYSETV) +IF(LLP1) WRITE(NOUT,*)'MYPROC=',MYPROC,'MY_REGION_NS =',MY_REGION_NS,& + & ' MY_REGION_EW=',MY_REGION_EW,' MYSETW=',MYSETW,' MYSETV=',MYSETV + + +ALLOCATE(NPRCIDS(NPROC)) +IF(LLP2)WRITE(NOUT,9) 'NPRCIDS ',SIZE(NPRCIDS ),SHAPE(NPRCIDS ) +DO JJ=1,NPROC + NPRCIDS(JJ) = JJ +ENDDO + +! Message passing tags + +MTAGLETR = 18000 +MTAGML = 19000 +MTAGLG = 20000 +MTAGPART = 21000 +MTAGDISTSP = 22000 +MTAGGL = 23000 +MTAGLM = 24000 +MTAGDISTGP = 25000 + +! Create communicators for MPI groups + +IF (.NOT.LMPOFF) THEN + CALL MPL_GROUPS_CREATE(NPRTRW, NPRTRV) +ENDIF + +! Setup labels for timing package (gstats) + +! CF ifs/utility GSTATS_OUTPUT_IFS + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUMP_TRANS0 +END MODULE SUMP_TRANS0_MOD diff --git a/src/trans/gpu/internal/sump_trans_mod.F90 b/src/trans/gpu/internal/sump_trans_mod.F90 new file mode 100755 index 0000000..4175f94 --- /dev/null +++ b/src/trans/gpu/internal/sump_trans_mod.F90 @@ -0,0 +1,276 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMP_TRANS_MOD +CONTAINS +SUBROUTINE SUMP_TRANS + +! Set up distributed environment for the transform package (part 2) + +! Modifications : +! P.Marguinaud : 11-Sep-2012 : Fix twice allocated pointer + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT, JPRD + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC + +!USE SUWAVEDI_MOD +!USE PE2SET_MOD +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUMPLAT_MOD ,ONLY : SUMPLAT +USE SUSTAONL_MOD ,ONLY : SUSTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM +INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM +INTEGER(KIND=JPIM) :: I1,I2,I3,IAUX0,IAUX1,JA1 +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF +INTEGER(KIND=JPIM),ALLOCATABLE :: IGPTOTL(:,:) + +REAL(KIND=JPRBT),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPRBT) :: ZMEDIAP +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS ===' + +IF(.NOT.D%LGRIDONLY) THEN + ALLOCATE(D%NULTPP(NPRTRNS)) + IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) + ALLOCATE(D%NPTRLS(NPRTRNS)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) + ALLOCATE(D%NPROCL(R%NDGL)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) + + CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) + D%NDGL_FS = D%NULTPP(MYSETW) + +! Help arrays for spectral to fourier space transposition + ALLOCATE(D%NLTSGTB (NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) + ALLOCATE(D%NLTSFTB (NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) + ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) + ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) + ALLOCATE(D%MSTABF (NPRTRNS+1)) + IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) + + D%NLTSGTB(:) = 0 + DO JGL=1,D%NDGL_FS + IGL = D%NPTRLS(MYSETW)+JGL-1 + DO JM=0,G%NMEN(IGL) + D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 + ENDDO + ENDDO + DO JA=1,NPRTRW + IPLAT = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA)+JGL-1 + DO JM=1,D%NUMP + IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN + IPLAT = IPLAT + 1 + ENDIF + ENDDO + ENDDO + D%NLTSFTB(JA) = IPLAT + ENDDO + + DO JA=1,NPRTRW-1 + ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) + IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) + D%MSTABF(IRECVSET) = ISENDSET + ENDDO + D%MSTABF(MYSETW) = MYSETW + + ALLOCATE(D%NPNTGTB0(0:R%NSMAX,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) + ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) + IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) + + DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(MYSETW) + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 + IM = D%NALLMS(JML) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB0(IM,JGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB0(IM,JGL) = -99 + ENDIF + ENDDO + ENDDO + ENDDO + + DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA) + JGL - 1 + DO JM=1,D%NUMP + IM = D%MYMS(JM) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB1(JM,IGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB1(JM,IGL) = -99 + ENDIF + ENDDO + ENDDO + ENDDO + + IAUX0 = 0 + IAUX1 = 0 + DO JA=1,NPRTRNS-1 + I1 = MYSENDSET(NPRTRNS,MYSETW,JA) + I2 = MYRECVSET(NPRTRNS,MYSETW,JA) + I3 = -1 + DO JA1=1,NPRTRNS-1 + IF(MYSENDSET(NPRTRNS,MYSETW,JA1) == I2) I3 =MYRECVSET(NPRTRNS,MYSETW,JA1) + ENDDO + IAUX0 = MAX(D%NLTSFTB(I1),D%NLTSGTB(I2),IAUX0) + IAUX1 = MAX(D%NLTSGTB(I2),D%NLTSFTB(I3),IAUX1) + ENDDO + IAUX0 = MAX(D%NLTSGTB(MYSETW),IAUX0) + IAUX1 = MAX(D%NLTSGTB(MYSETW),IAUX1) + DO JA=1,NPRTRNS+1 + D%NSTAGT0B(JA) = (JA-1)*IAUX0 + D%NSTAGT1B(JA) = (JA-1)*IAUX1 + ENDDO + D%NLENGT0B = IAUX0*NPRTRNS + D%NLENGT1B = IAUX1*NPRTRNS +ENDIF + +! GRIDPOINT SPACE + +ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) +ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) +ALLOCATE(D%NPTRLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) +ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) +ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) +ALLOCATE(D%LSPLITLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) +ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) + + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + ALLOCATE(ZDUM(1)) + CALL SUMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT,LEQ_REGIONS,& + &D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + &D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + &ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + &IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN) +ELSE + CALL SUMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT,LEQ_REGIONS,& + &D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + &D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + &D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + &IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN) +ENDIF +D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF + +IF (LLP1) THEN + IF(.NOT.D%LGRIDONLY) THEN + WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUMPLAT: ''/)') + WRITE(NOUT,FMT='('' D%NULTPP '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) + WRITE(NOUT,FMT='('' D%NPROCL '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) + ENDIF + WRITE(NOUT,FMT='('' D%NFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') + WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF + WRITE(NOUT,FMT='('' D%NPTRLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%LSPLITLAT '')') + WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='(/)') +ENDIF +ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) +ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + CALL SUSTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) +ELSE + CALL SUSTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) +ENDIF +! IGPTOTL is the number of grid points in each individual processor +ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IGPTOTL(:,:)=0 + +DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + IGPTOT = 0 + DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) + IGPTOT = IGPTOT+D%NONL(JGL,JB) + ENDDO + IGPTOTL(JA,JB) = IGPTOT + ENDDO +ENDDO +D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) +D%NGPTOTMX = MAXVAL(IGPTOTL) +D%NGPTOTG = SUM(IGPTOTL) +ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) +D%NGPTOTL(:,:) = IGPTOTL(:,:) + +IF(.NOT.D%LGRIDONLY) THEN + ALLOCATE(D%NSTAGTF(D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) + IOFF = 0 + DO JGL=1,D%NDGL_FS + D%NSTAGTF(JGL) = IOFF + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IOFF = IOFF + G%NLOEN(IGL)+3 + ENDDO + D%NLENGTF = IOFF +ENDIF + +IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) +DEALLOCATE(IGPTOTL) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUMP_TRANS +END MODULE SUMP_TRANS_MOD + diff --git a/src/trans/gpu/internal/sump_trans_preleg_mod.F90 b/src/trans/gpu/internal/sump_trans_preleg_mod.F90 new file mode 100755 index 0000000..78038f4 --- /dev/null +++ b/src/trans/gpu/internal/sump_trans_preleg_mod.F90 @@ -0,0 +1,149 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMP_TRANS_PRELEG_MOD +CONTAINS +SUBROUTINE SUMP_TRANS_PRELEG + +! Set up distributed environment for the transform package (part 1) + +USE PARKIND1 ,ONLY : JPIM + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW + +USE SUWAVEDI_MOD ,ONLY : SUWAVEDI +!USE ABORT_TRANS_MOD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JW,JV,JJ,JM,JMLOC,ILATPP,IRESTL,IMLOC,IDT,INM,ILAST + +INTEGER(KIND=JPIM) :: IMYMS(R%NSMAX+1),INUMTPP(NPRTRW) +INTEGER(KIND=JPIM) :: IDUMI1,IDUMI2,IDUMI3 +INTEGER(KIND=JPIM) :: IDUM2(0:R%NSMAX), IDUM3(NPRTRW+1), IDUM4(R%NSMAX+1) + +LOGICAL :: LLP1,LLP2 + +! ------------------------------------------------------------------ + +IF(.NOT.D%LGRIDONLY) THEN + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS_PRELEG ===' + +!* 1. Initialize partitioning of wave numbers to PEs ! +! ---------------------------------------------- + + ALLOCATE(D%NASM0(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) + ALLOCATE(D%NATM0(0:R%NTMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) + ALLOCATE(D%NUMPP(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) + ALLOCATE(D%NPOSSP(NPRTRW+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) + ALLOCATE(D%NPROCM(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) + ALLOCATE(D%NPTRMS(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) + ALLOCATE(D%NALLMS(R%NSMAX+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) + ALLOCATE(D%NDIM0G(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) + + CALL SUWAVEDI(R%NSMAX,R%NTMAX,NPRTRW,MYSETW,& + &D%NASM0,D%NSPOLEGL,D%NPROCM,D%NUMPP,& + &D%NSPEC,D%NSPEC2,D%NSPEC2MX,D%NPOSSP,IMYMS,& + &D%NPTRMS,D%NALLMS,D%NDIM0G) + CALL SUWAVEDI(R%NTMAX,R%NTMAX,NPRTRW,MYSETW,& + &KASM0=D%NATM0,KUMPP=INUMTPP,KSPEC2=D%NTPEC2) + + D%NUMP = D%NUMPP (MYSETW) + ALLOCATE(D%MYMS(D%NUMP)) + IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) + D%MYMS(:) = IMYMS(1:D%NUMP) + D%NUMTP = INUMTPP(MYSETW) + ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) + IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) + ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) + IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) + + D%NLATLS(:,:) = 999999 + D%NLATLE(:,:) = -1 + + ILATPP = R%NDGNH/NPRTRW + IRESTL = R%NDGNH-NPRTRW*ILATPP + DO JW=1,NPRTRW + IF (JW > IRESTL) THEN + D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JW-IRESTL-1)*ILATPP+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 + ELSE + D%NLATLS(JW,1) = (JW-1)*(ILATPP+1)+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP + ENDIF + ENDDO + ILAST=0 + DO JW=1,NPRTRW + ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV + IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP + DO JV=1,NPRTRV + IF (JV > IRESTL) THEN + D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 + ELSE + D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP + ENDIF + ENDDO + ILAST=D%NLATLE(JW,NPRTRV) + ENDDO + + IF (LLP1) THEN + DO JW=1,NPRTRW + DO JV=1,NPRTRV + WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& + & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) + ENDDO + ENDDO + ENDIF + + ALLOCATE(D%NPMT(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) + ALLOCATE(D%NPMS(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) + ALLOCATE(D%NPMG(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) + IDT = R%NTMAX-R%NSMAX + INM = 0 + DO JMLOC=1,D%NUMP + IMLOC = D%MYMS(JMLOC) + D%NPMT(IMLOC) = INM + D%NPMS(IMLOC) = INM+IDT + INM = INM+R%NTMAX+2-IMLOC + ENDDO + INM = 0 + DO JM=0,R%NSMAX + D%NPMG(JM) = INM + INM = INM+R%NTMAX+2-JM + ENDDO + + D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 + +ENDIF + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUMP_TRANS_PRELEG +END MODULE SUMP_TRANS_PRELEG_MOD diff --git a/src/trans/gpu/internal/sumplat_mod.F90 b/src/trans/gpu/internal/sumplat_mod.F90 new file mode 100755 index 0000000..effffa2 --- /dev/null +++ b/src/trans/gpu/internal/sumplat_mod.F90 @@ -0,0 +1,256 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLAT_MOD +CONTAINS +SUBROUTINE SUMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,LDSPLITLAT,KMYPROC,KLOEN) + +!**** *SUMPLAT * - Initialize gridpoint distrbution in N-S direction + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLAT * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction +! LDSPLIT -true for latitudes shared between sets +! LDEQ_REGIONS -true if eq_regions partitioning +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! PMEDIAP -mean weight per PE if weighted distribution +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KFRSTLAT -first latitude row on processor +! KLSTLAT -last latitude row on processor +! KFRSTLOFF -offset for first latitude in set +! KPTRLAT -pointer to start of latitude +! KPTRFRSTLAT-pointer to first latitude +! KPTRLSTLAT -pointer to last latitude +! KPTRFLOFF -offset for pointer to first latitude +! LDSPLITLAT -true for latitudes which are split + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : MYPROC + +USE SUMPLATB_MOD ,ONLY : SUMPLATB +USE SUMPLATBEQ_MOD ,ONLY : SUMPLATBEQ +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + + +! * DUMMY: +REAL(KIND=JPRBT),INTENT(OUT) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +REAL(KIND=JPRBT),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(OUT) :: LDSPLITLAT(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(KDGL) + +! * LOCAL: +! === END OF INTERFACE BLOCK === +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IPTRLATITUDE, JA, JGL + +LOGICAL :: LLFOURIER +LOGICAL :: LLDEBUG=.FALSE. + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- +INDIC(:)=0 +ILAST(:)=0 + +IF(LDWEIGHTED_DISTR.AND..NOT.LDEQ_REGIONS)THEN + CALL ABORT_TRANS ('SUMPLAT: LDWEIGHTED_DISTR=T AND LDEQ_REGIONS=F NOT SUPPORTED') +ENDIF + +IF( LDEQ_REGIONS )THEN + CALL SUMPLATBEQ(1,KDGL,KPROC,KPROCA,KLOEN,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ELSE + LLFOURIER=.FALSE. + CALL SUMPLATB(1,KDGL,KPROCA,KLOEN,LDSPLIT,LLFOURIER,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ENDIF + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': COMPUTATION OF +! KFRSTLAT TO LDSPLITLAT. +! --------------------------------------------- + + +! * Computation of first and last latitude of processor sets +! ----------- in grid-point-space ----------------------- + +IF(KMYPROC==1.AND.LLDEBUG)THEN + WRITE(0,'("")') + WRITE(0,'("SUMPLAT_MOD:LDWEIGHTED_DISTR=",L1)')LDWEIGHTED_DISTR + WRITE(0,'("")') + DO JA=1,KPROCA + WRITE(0,'("SUMPLAT_MOD: JA=",I5," ILAST=",I5," INDIC=",I5)')& + &JA,ILAST(JA),INDIC(JA) + ENDDO + WRITE(0,'("")') + IF( LDEQ_REGIONS .AND. LDSPLIT )THEN + DO JA=1,KPROCA + WRITE(0,'("SUMPLAT_MOD: JA=",I5," KPROCAGP=",I12)')& + &JA,KPROCAGP(JA) + ENDDO + WRITE(0,'("")') + ENDIF +ENDIF + +KFRSTLAT(1) = 1 +KLSTLAT(KPROCA) = KDGL +DO JA=1,KPROCA-1 + IF ((.NOT. LDSPLIT) .OR. INDIC(JA) == 0) THEN + KFRSTLAT(JA+1) = ILAST(JA) + 1 + KLSTLAT(JA) = ILAST(JA) + ELSE + KFRSTLAT(JA+1) = INDIC(JA) + KLSTLAT(JA) = INDIC(JA) + ENDIF +ENDDO +KFRSTLOFF=KFRSTLAT(KMYSETA)-1 + +! * Initialise following data structures:- +! NPTRLAT (pointer to the start of each latitude) +! LSPLITLAT (TRUE if latitude is split over two A sets) +! NPTRFRSTLAT (pointer to the first latitude of each A set) +! NPTRLSTLAT (pointer to the last latitude of each A set) + +DO JGL=1,KDGL + KPTRLAT (JGL)=-999 + LDSPLITLAT(JGL)=.FALSE. +ENDDO +IPTRLATITUDE=0 +DO JA=1,KPROCA + DO JGL=KFRSTLAT(JA),KLSTLAT(JA) + IPTRLATITUDE=IPTRLATITUDE+1 + LDSPLITLAT(JGL)=.TRUE. + IF( KPTRLAT(JGL) == -999 )THEN + KPTRLAT(JGL)=IPTRLATITUDE + LDSPLITLAT(JGL)=.FALSE. + ENDIF + ENDDO +ENDDO +DO JA=1,KPROCA + IF( LDSPLITLAT(KFRSTLAT(JA)) .AND. JA /= 1)THEN + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA))+1 + ELSE + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA)) + ENDIF + IF( LDSPLITLAT(KLSTLAT(JA)) .AND. JA == KPROCA)THEN + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA))+1 + ELSE + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA)) + ENDIF +ENDDO +KPTRFLOFF=KPTRFRSTLAT(KMYSETA)-1 + +IF(KMYPROC==1.AND.LLDEBUG)THEN + DO JGL=1,KDGL + WRITE(0,'("SUMPLAT_MOD: JGL=",I5," KPTRLAT=",I5," LDSPLITLAT=",L4)')& + & JGL,KPTRLAT(JGL),LDSPLITLAT(JGL) + ENDDO + DO JA=1,KPROCA + WRITE(0,'("SUMPLAT_MOD: JA=",I5," KFRSTLAT=",I5," KLSTLAT=",I5,& + & " KPTRFRSTLAT=",I5," KPTRLSTLAT=",I5," KLSTLAT-KFRSTLAT=",I5,& + & " SUM(G%NLOEN(KFRSTLAT:KLSTLAT))=",I10)')& + & JA,KFRSTLAT(JA),KLSTLAT(JA),KPTRFRSTLAT(JA),KPTRLSTLAT(JA),& + & KLSTLAT(JA)-KFRSTLAT(JA),SUM(G%NLOEN(KFRSTLAT(JA):KLSTLAT(JA))) + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUMPLAT +END MODULE SUMPLAT_MOD + + + diff --git a/src/trans/gpu/internal/sumplatb_mod.F90 b/src/trans/gpu/internal/sumplatb_mod.F90 new file mode 100755 index 0000000..fb5033a --- /dev/null +++ b/src/trans/gpu/internal/sumplatb_mod.F90 @@ -0,0 +1,226 @@ +! (C) Copyright 1998- ECMWF. +! (C) Copyright 1998- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLATB_MOD +CONTAINS +SUBROUTINE SUMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,LDFOURIER,& + & KMEDIAP,KRESTM,KINDIC,KLAST) + +!**** *SUMPLATB * - Routine to initialize parallel environment + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLATB * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! LDFOURIER -true for fourier space partitioning + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! K. YESSAD (after old version of sumplat.F). + +! Modifications. +! -------------- +! Original : 98-12-07 +! G. Mozdzynski (August 2012): rewrite of fourier latitude distribution +! ------------------------------------------------------------------ + + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRBT + +USE TPM_DISTR +USE ABORT_TRANS_MOD + +IMPLICIT NONE + + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDFOURIER +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) + +! * LOCAL: +INTEGER(KIND=JPIB) :: ICOST(KDGSA:KDGL) +INTEGER(KIND=JPIM) :: ILATS(KPROCA) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: ICOMP, IGL, JA, JGL, ILAST, IREST, IA +INTEGER(KIND=JPIM) :: ITOT_TOP, ITOT_BOT, IGL_TOP, IGL_BOT +INTEGER(KIND=JPIB) :: IMEDIA,ITOT +REAL(KIND=JPRBT) :: ZLG +LOGICAL :: LLDONE,LLSIMPLE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- + +! * Computation of KMEDIAP and KRESTM. + +IF( LDFOURIER )THEN + +! DO JGL=1,KDGL +! ZLG=LOG(FLOAT(KLOENG(JGL))) +! ICOST(JGL)=KLOENG(JGL)*ZLG*SQRT(ZLG) +! ENDDO + + DO JGL=1,KDGL + ICOST(JGL)=KLOENG(JGL) + ENDDO + +ELSE + + DO JGL=1,KDGL + ICOST(JGL)=KLOENG(JGL) + ENDDO + +ENDIF + +IMEDIA = SUM(ICOST(KDGSA:KDGL)) +KMEDIAP = IMEDIA / KPROCA +KRESTM = IMEDIA - KMEDIAP * KPROCA +IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +! * Computation of intermediate quantities KINDIC and KLAST + +KINDIC(:)=0 +KLAST(:)=0 + +IF (LDSPLIT) THEN + + IREST = 0 + ILAST =0 + DO JA=1,KPROCA + IF (JA <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = KMEDIAP + ELSE + ICOMP = KMEDIAP - 1 + ENDIF + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGL + ILAST = JGL + IF(ITOT+ICOST(JGL) < ICOMP) THEN + ITOT = ITOT+ICOST(JGL) + ELSEIF(ITOT+ICOST(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = ICOST(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + +ELSE + + ITOT_TOP=0 + ITOT_BOT=0 + IGL_TOP=1 + IGL_BOT=KDGL + DO JA=1,(KPROCA-1)/2+1 + IF( JA /= KPROCA/2+1 )THEN + LLDONE=.TRUE. + DO WHILE ( LLDONE ) + IF( ITOT_TOP+ICOST(IGL_TOP) < KMEDIAP )THEN + KLAST(JA)=IGL_TOP + ITOT_TOP=ITOT_TOP+ICOST(IGL_TOP) + IGL_TOP=IGL_TOP+1 + ELSE + ITOT_TOP=ITOT_TOP-KMEDIAP + LLDONE=.FALSE. + ENDIF + ENDDO + KLAST(KPROCA-JA+1)=IGL_BOT + LLDONE=.TRUE. + DO WHILE ( LLDONE ) + IF( ITOT_BOT+ICOST(IGL_BOT) < KMEDIAP )THEN + ITOT_BOT=ITOT_BOT+ICOST(IGL_BOT) + IGL_BOT=IGL_BOT-1 + ELSE + ITOT_BOT=ITOT_BOT-KMEDIAP + LLDONE=.FALSE. + ENDIF + ENDDO + ELSE + KLAST(JA)=IGL_BOT + ENDIF + ENDDO + + LLSIMPLE=.FALSE. + DO JA=1,KPROCA + IF( KLAST(JA)==0 )THEN + LLSIMPLE=.TRUE. + EXIT + ENDIF + ENDDO + IF( LLSIMPLE )THEN +! WRITE(0,'("SUMPLATB_MOD: REVERTING TO SIMPLE LATITUDE DISTRIBUTION")') + ILATS(:)=0 + IA=0 + DO JGL=1,KDGL + IA=IA+1 + ILATS(IA)=ILATS(IA)+1 + IF( IA==KPROCA ) IA=0 + ENDDO + KLAST(1)=ILATS(1) + DO JA=2,KPROCA + KLAST(JA)=KLAST(JA-1)+ILATS(JA) + ENDDO + ENDIF + +ENDIF + +END SUBROUTINE SUMPLATB +END MODULE SUMPLATB_MOD diff --git a/src/trans/gpu/internal/sumplatbeq_mod.F90 b/src/trans/gpu/internal/sumplatbeq_mod.F90 new file mode 100755 index 0000000..1738773 --- /dev/null +++ b/src/trans/gpu/internal/sumplatbeq_mod.F90 @@ -0,0 +1,289 @@ +! (C) Copyright 2006- ECMWF. +! (C) Copyright 2006- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLATBEQ_MOD +CONTAINS +SUBROUTINE SUMPLATBEQ(KDGSA,KDGL,KPROC,KPROCA,KLOENG,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,KINDIC,KLAST) + +!**** *SUMPLATBEQ * - Routine to initialize parallel environment +! (latitude partitioning for LEQ_REGIONS=T) + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLATBEQ * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! LDEQ_REGIONS -true if eq_regions partitioning +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! PMEDIAP -mean weight per PE if weighted distribution +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! G. Mozdzynski + +! Modifications. +! -------------- +! Original : April 2006 +! ------------------------------------------------------------------ + + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : MYPROC +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +REAL(KIND=JPRBT), INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +REAL(KIND=JPRBT), INTENT(OUT) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) + +! * LOCAL: + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: ICOMP, IGL, IMAXI, IMEDIA, IMEDIAP, ITOT, JA, JB, IA, JGL,& + &ILAST,IREST,IPE,I2REGIONS,IGP +REAL(KIND=JPRBT) :: ZMEDIA, ZCOMP +LOGICAL :: LLDONE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- +100 CONTINUE +! * Computation of KMEDIAP and KRESTM. + +IF (.NOT.LDWEIGHTED_DISTR) THEN + + IMEDIA = SUM(KLOENG(KDGSA:KDGL)) + KMEDIAP = IMEDIA / KPROC + + IF( KPROC > 1 )THEN +! test if KMEDIAP is too small and no more than 2 asets would be required +! for the first latitude + IF( LDSPLIT )THEN + I2REGIONS=N_REGIONS(1)+N_REGIONS(2) + IF( KMEDIAP < (KLOENG(KDGSA)-1)/I2REGIONS+1 )THEN + WRITE(0,'("SUMPLATBEQ: KMEDIAP=",I6," I2REGIONS=",I3," KLOENG(KDGSA)=",I4)')& + &KMEDIAP,I2REGIONS,KLOENG(KDGSA) + CALL ABORT_TRANS ('SUMPLATBEQ: NPROC TOO BIG FOR THIS RESOLUTION, LDSPLIT=T') + ENDIF + ELSE +! test for number asets too large for the number of latitudes + IF( KPROCA > KDGL )THEN + WRITE(0,'("SUMPLATBEQ: KMEDIAP=",I6," KPROCA=",I4," KDGL=",I4)')& + &KMEDIAP,KPROCA,KDGL + CALL ABORT_TRANS ('SUMPLATBEQ: NPROC TOO BIG FOR THIS RESOLUTION, LDSPLIT=F') + ENDIF + ENDIF + ENDIF + + KRESTM = IMEDIA - KMEDIAP * KPROC + IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +ELSE + + ZMEDIA = SUM(PWEIGHT(:)) + PMEDIAP = ZMEDIA / KPROC + +ENDIF + +! * Computation of intermediate quantities KINDIC and KLAST + +IF (LDSPLIT) THEN + + KPROCAGP(:)=0 + IREST = 0 + ILAST =0 + IPE=0 + ZCOMP=0 + IGP=0 + DO JA=1,KPROCA + ICOMP=0 + DO JB=1,N_REGIONS(JA) + IF( LDWEIGHTED_DISTR )THEN + DO WHILE ( ( JA == KPROCA .OR. ZCOMP < PMEDIAP ) .AND. IGP < SIZE(PWEIGHT) ) + IGP = IGP + 1 + ICOMP = ICOMP + 1 + ZCOMP = ZCOMP + PWEIGHT(IGP) + ENDDO + ZCOMP = ZCOMP - PMEDIAP + ELSE + IPE=IPE+1 + IF (IPE <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = ICOMP + KMEDIAP + ELSE + ICOMP = ICOMP + (KMEDIAP-1) + ENDIF + ENDIF + ENDDO + KPROCAGP(JA)=ICOMP + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGL + ILAST = JGL + IF(ITOT+KLOENG(JGL) < ICOMP) THEN + ITOT = ITOT+KLOENG(JGL) + ELSEIF(ITOT+KLOENG(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = KLOENG(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + IF( LDWEIGHTED_DISTR )THEN + IF( KLAST(KPROCA) /= KDGL )THEN + DO JA=1,KPROCA + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ_MOD: JA=",I3," KLAST=",I3," KINDIC=",I3)')& + &JA,KLAST(JA),KINDIC(JA) + ENDIF + ENDDO + WRITE(0,'("SUMPLATBEQ: LWEIGHTED_DISTR=T FAILED TO PARTITION GRID, REVERTING TO ",& + & " LWEIGHTED_DISTR=F PARTITIONING")') + LDWEIGHTED_DISTR=.FALSE. + GOTO 100 + ENDIF + ENDIF + IF( SUM(KPROCAGP(:)) /= SUM(KLOENG(KDGSA:KDGL)) )THEN + IF( MYPROC == 1 )THEN + WRITE(0,'("SUM(KPROCAGP(:))=",I12)')SUM(KPROCAGP(:)) + WRITE(0,'("SUM(KLOENG(:))=",I12)')SUM(KLOENG(KDGSA:KDGL)) + ENDIF + CALL ABORT_TRANS ('SUMPLATBEQ: PROBLEM IN PARTITIONING ') + ENDIF + +ELSE + + IF( LDWEIGHTED_DISTR )THEN + CALL ABORT_TRANS ('SUMPLATBEQ: LSPLIT=F NOT SUPPORTED FOR WEIGHTED DISTRIBUTION ') + ENDIF + + KINDIC(:) = 0 + LLDONE=.FALSE. + IMEDIAP=KMEDIAP + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: IMEDIAP=",I6)')IMEDIAP + ENDIF + DO WHILE(.NOT.LLDONE) +! loop until a satisfactory distribution can be found + IA=1 + IMAXI=IMEDIAP*N_REGIONS(IA) + DO JGL=1,KDGL + KLAST(IA)=JGL + IMAXI=IMAXI-KLOENG(JGL) + IF( IA == KPROCA .AND. JGL == KDGL )THEN + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: EXIT 1")') + ENDIF + EXIT + ENDIF + IF( IA == KPROCA .AND. JGL < KDGL )THEN + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: EXIT 2")') + ENDIF + KLAST(KPROCA)=KDGL + EXIT + ENDIF + IF( IA < KPROCA .AND. JGL == KDGL )THEN + DO JA=KPROCA,IA+1,-1 + KLAST(JA)=KDGL+JA-KPROCA + ENDDO + DO JA=KPROCA,2,-1 + IF( KLAST(JA) <= KLAST(JA-1) )THEN + KLAST(JA-1)=KLAST(JA)-1 + ENDIF + ENDDO + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: EXIT 3")') + ENDIF + EXIT + ENDIF + IF( IMAXI <= 0 )THEN + IA=IA+1 + IMAXI=IMAXI+IMEDIAP*N_REGIONS(IA) + ENDIF + ENDDO + IF( KPROCA > 1 .AND. KLAST(KPROCA) == KLAST(KPROCA-1) )THEN + IMEDIAP=IMEDIAP-1 + IF( MYPROC == 1 )THEN + WRITE(0,'("SUMPLATBEQ: REDUCING IMEDIAP=",I6)')IMEDIAP + ENDIF + IF( IMEDIAP <= 0 )THEN + CALL ABORT_TRANS ('SUMPLATBEQ: PROBLEM PARTITIONING WITH LSPLIT=F, IMEDIAP <= 0') + ENDIF + ELSE + LLDONE=.TRUE. + ENDIF + ENDDO +ENDIF + +END SUBROUTINE SUMPLATBEQ +END MODULE SUMPLATBEQ_MOD diff --git a/src/trans/gpu/internal/sumplatf_mod.F90 b/src/trans/gpu/internal/sumplatf_mod.F90 new file mode 100755 index 0000000..7a5545f --- /dev/null +++ b/src/trans/gpu/internal/sumplatf_mod.F90 @@ -0,0 +1,150 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUMPLATF_MOD +CONTAINS +SUBROUTINE SUMPLATF(KDGL,KPROCA,KMYSETA,& + &KULTPP,KPROCL,KPTRLS) + +!**** *SUMPLATF * - Initialize fourier space distibution in N-S direction + +! Purpose. +! -------- + + +!** Interface. +! ---------- +! *CALL* *SUMPLATF * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction + +! Explicit arguments - output: +! -------------------- + +! KULTPP -number of latitudes in process +! (in Fourier space) +! KPROCL -process responsible for latitude +! (in Fourier space) +! KPTRLS -pointer to first global latitude +! of process (in Fourier space) + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +USE TPM_GEOMETRY ,ONLY : G + +USE SUMPLATB_MOD ,ONLY : SUMPLATB +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +INTEGER(KIND=JPIM),INTENT(OUT) :: KULTPP(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCL(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLS(:) + +! * LOCAL: +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IA, ILAT, ISTART, IMEDIAP,IRESTM, JA, JLTLOC + +LOGICAL :: LLSPLIT,LLFOURIER + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- + +LLSPLIT = .FALSE. +LLFOURIER = .TRUE. + +CALL SUMPLATB(1,KDGL,KPROCA,G%NLOEN,LLSPLIT,LLFOURIER,& + &IMEDIAP,IRESTM,INDIC,ILAST) + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': +! ------------------------------ + + + +! * Definitions related to distribution of latitudes along sets +! ------------ in fourier-space ----------------------------- +ISTART = 0 +KULTPP(1) = ILAST(1) +DO JA=1,KPROCA + IF(JA > 1) THEN + IF(ILAST(JA) /= 0) THEN + KULTPP(JA) = ILAST(JA)-ILAST(JA-1) + ELSE + KULTPP(JA) = 0 + ENDIF + ENDIF + DO JLTLOC=1,KULTPP(JA) + ILAT = ISTART + JLTLOC + KPROCL(ILAT) = JA + ENDDO + ISTART = ISTART + KULTPP(JA) +ENDDO + +! * Computes KPTRLS. + +IA = KPROCL(1) +KPTRLS(IA) = 1 +DO JA=IA+1,KPROCA + KPTRLS(JA) = KPTRLS(JA-1) + KULTPP(JA-1) +ENDDO + +END SUBROUTINE SUMPLATF +END MODULE SUMPLATF_MOD diff --git a/src/trans/gpu/internal/supol_mod.F90 b/src/trans/gpu/internal/supol_mod.F90 new file mode 100755 index 0000000..da7fcbf --- /dev/null +++ b/src/trans/gpu/internal/supol_mod.F90 @@ -0,0 +1,173 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUPOL_MOD +CONTAINS +SUBROUTINE SUPOL(KNSMAX,PDDMU,PFN,PDDPOL) + +!**** *SUPOL * - Routine to compute the Legendre polynomials + +! Purpose. +! -------- +! For a given value of mu, computes the Legendre polynomials. + +!** Interface. +! ---------- +! *CALL* *SUPOL(...) + +! Explicit arguments : +! -------------------- +! KNSMAX : Truncation (triangular) [in] +! PDDMU : Abscissa at which the polynomials are computed (mu) [in] +! PFN : Fourier coefficients of series expansion +! for the ordinary Legendre polynomials [in] +! PDDPOL : Polynomials (the first index is m and the second n) [out] + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- +! See documentation about spectral transforms +! (doc (IDTS) by K. Yessad, appendix 3, or doc (NTA30) by M. Rochas) + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-10-15 +! K. YESSAD (MAY 1998): modification to avoid underflow. +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision +! on NEC +! K. YESSAD (NOV 2008): make consistent arp/SUPOLA and tfl/SUPOL. +! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 +! R. El Khatib 30-Apr-2013 Open-MP parallelization +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE TPM_POL + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX +REAL(KIND=JPRD) ,INTENT(IN) :: PDDMU +REAL(KIND=JPRD) ,INTENT(IN) :: PFN(0:KNSMAX,0:KNSMAX) + +REAL(KIND=JPRD) ,INTENT(OUT) :: PDDPOL(0:KNSMAX,0:KNSMAX) + +REAL(KIND=JPRD) :: ZDLX,ZDLX1,ZDLSITA,ZDL1SITA,ZDLS,ZDLK,ZDLLDN + +INTEGER(KIND=JPIM) :: JM, JN, JK +REAL(KIND=JPRD) :: Z +REAL(KIND=JPRD) :: DCL, DDL + +! ------------------------------------------------------------------ + +!* 1. First two columns. +! ------------------ + +ZDLX=PDDMU +ZDLX1=ACOS(ZDLX) +ZDLSITA=SQRT(1.0_JPRD-ZDLX*ZDLX) + +PDDPOL(0,0)=1._JPRD +ZDLLDN = 0.0_JPRD + +! IF WE ARE LESS THAN 1Meter FROM THE POLE, +IF(ABS(REAL(ZDLSITA,KIND(Z))) <= SQRT(EPSILON(Z)))THEN + ZDLX=1._JPRD + ZDLSITA=0._JPRD + ZDL1SITA=0._JPRD +ELSE + ZDL1SITA=1.0_JPRD/ZDLSITA +ENDIF + +!* ordinary Legendre polynomials from series expansion +! --------------------------------------------------- + +! even N +!$OMP PARALLEL DO PRIVATE(JN,ZDLK,ZDLLDN,JK) +DO JN=2,KNSMAX,2 + ZDLK = 0.5_JPRD*PFN(JN,0) + ZDLLDN = 0.0_JPRD + ! represented by only even k + DO JK=2,JN,2 + ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 + ZDLK = ZDLK + PFN(JN,JK)*COS(DDI(JK)*ZDLX1) + ! normalised associated Legendre polynomial == \overbar{P_n}^1 + ZDLLDN = ZDLLDN + DDA(JN)*PFN(JN,JK)*DDI(JK)*SIN(DDI(JK)*ZDLX1) + ENDDO + PDDPOL(0,JN) = ZDLK + PDDPOL(1,JN) = ZDLLDN +ENDDO +!$OMP END PARALLEL DO +! odd N +!$OMP PARALLEL DO PRIVATE(JN,ZDLK,ZDLLDN,JK) +DO JN=1,KNSMAX,2 + ZDLK = 0.0_JPRD + ZDLLDN = 0.0_JPRD + ! represented by only odd k + DO JK=1,JN,2 + ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 + ZDLK = ZDLK + PFN(JN,JK)*COS(DDI(JK)*ZDLX1) + ! normalised associated Legendre polynomial == \overbar{P_n}^1 + ZDLLDN = ZDLLDN + DDA(JN)*PFN(JN,JK)*DDI(JK)*SIN(DDI(JK)*ZDLX1) + ENDDO + PDDPOL(0,JN) = ZDLK + PDDPOL(1,JN) = ZDLLDN +ENDDO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ + +!* 2. Diagonal (the terms 0,0 and 1,1 have already been computed) +! Belousov, equation (23) +! ----------------------------------------------------------- + +ZDLS=ZDL1SITA*TINY(ZDLS) + +#ifdef VPP +!OCL SCALAR +#endif +DO JN=2,KNSMAX + PDDPOL(JN,JN)=PDDPOL(JN-1,JN-1)*ZDLSITA*DDH(JN) + IF ( ABS(PDDPOL(JN,JN)) < ZDLS ) PDDPOL(JN,JN)=0.0_JPRD +ENDDO + +! ------------------------------------------------------------------ + +!* 3. General recurrence (Belousov, equation 17) +! ----------------------------------------- + +DO JN=3,KNSMAX +!DIR$ IVDEP +!OCL NOVREC + DO JM=2,JN-1 + PDDPOL(JM,JN)=DDC(JM,JN)*PDDPOL(JM-2,JN-2)& + &-DDD(JM,JN)*PDDPOL(JM-2,JN-1)*ZDLX & + &+DDE(JM,JN)*PDDPOL(JM ,JN-1)*ZDLX + ENDDO +ENDDO + +! ------------------------------------------------------------------ + +END SUBROUTINE SUPOL +END MODULE SUPOL_MOD diff --git a/src/trans/gpu/internal/supolf_mod.F90 b/src/trans/gpu/internal/supolf_mod.F90 new file mode 100755 index 0000000..06d599d --- /dev/null +++ b/src/trans/gpu/internal/supolf_mod.F90 @@ -0,0 +1,284 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUPOLF_MOD +CONTAINS +SUBROUTINE SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) + +!**** *SUPOL * - Routine to compute the Legendre polynomials + +! Purpose. +! -------- +! For a given value of mu and M, computes the Legendre +! polynomials upto KNSMAX + +!** Interface. +! ---------- +! *CALL* *SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) + +! Explicit arguments : +! -------------------- +! KM : zonal wavenumber M +! KNSMAX : Truncation (triangular) +! DDMU : Abscissa at which the polynomials are computed (mu) +! DDPOL : Polynomials (the first index is m and the second n) +! KCHEAP : odd/even saving switch + + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + George Mozdzynski + Mats Hamrud + +! Modifications. +! -------------- +! Original : 87-10-15 +! K. YESSAD (MAY 1998): modification to avoid underflow. +! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision +! on NEC +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPRD, JPIM + +USE TPM_POL + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX +REAL(KIND=JPRD) ,INTENT(IN) :: DDMU +REAL(KIND=JPRD) ,INTENT(OUT) :: DDPOL(0:KNSMAX) + +INTEGER(KIND=JPIM),INTENT(IN), OPTIONAL :: KCHEAP + +REAL(KIND=JPRD) :: DLX,DLX1,DLSITA,DLSITA2,DL1SITA,DLK,DL1, DLKM1, DLKM2 + +INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(DLX) + +INTEGER(KIND=JPIM) :: JN, KKL, ICHEAP, IC, IEND +REAL(KIND=JPRD) :: DCL, DDL + +REAL(KIND=JPRD) :: ZFAC, ZLSITA, ZFAC0, ZFAC1, ZMULT, ZEPS + +INTEGER(KIND=JPIM) :: JCORR, ICORR3, ICORR(KNSMAX) +REAL(KIND=JPRD) :: ZSCALE, ZISCALE + +DCL(KKL)=SQRT((REAL(KKL-KM+1,JPKD)*REAL(KKL-KM+2,JPKD)* & + & REAL(KKL+KM+1,JPKD)*REAL(KKL+KM+2,JPKD))/(REAL(2*KKL+1,JPKD)*REAL(2*KKL+3,JPKD)*& + & REAL(2*KKL+3,JPKD)*REAL(2*KKL+5,JPKD))) +DDL(KKL)=(2.0_JPKD*REAL(KKL,JPKD)*REAL(KKL+1,JPKD)-2.0_JPKD*REAL(KM**2,JPKD)-1.0_JPKD)/ & + & (REAL(2*KKL-1,JPKD)*REAL(2*KKL+3,JPKD)) + +! ------------------------------------------------------------------ + +!* 1. First two columns. +! ------------------ + +ZEPS = EPSILON(ZSCALE) +ICORR3=0 + +ICHEAP=1 +IF( PRESENT(KCHEAP) ) THEN + ICHEAP = KCHEAP +ENDIF + +DLX=DDMU +DLX1=ACOS(DLX) +DLSITA2=1.0_JPRD-DLX*DLX +DLSITA=SQRT(DLSITA2) + +!* ordinary Legendre polynomials from series expansion +! --------------------------------------------------- + +! this is supol_fast just using single KM +IF( ABS(REAL(DLSITA,JPRD)) <= ZEPS ) THEN + DLX=1._JPRD + DLSITA=0._JPRD + DL1SITA=0._JPRD + DLSITA2=0._JPRD +ELSE + DL1SITA=1.0_JPRD/DLSITA +ENDIF + +DLKM2=1._JPRD +DLKM1=DLX + +IF( KM == 0 ) THEN + DDPOL(0)=DLKM2 + DDPOL(1)=DLKM1*DFB(1)/DFA(1) + DO JN=2,KNSMAX + DLK=DFF(JN)*DLX*DLKM1-DFG(JN)*DLKM2 + DL1=DFI(JN)*(DLKM1-DLX*DLK)*DL1SITA + DDPOL(JN)=DLK*DFB(JN)/DFA(JN) + DLKM2=DLKM1 + DLKM1=DLK + ENDDO +ELSEIF( KM == 1 ) THEN + DDPOL(0)=0 + DDPOL(1)=DLSITA*DFB(1) + DO JN=2,KNSMAX + DLK=DFF(JN)*DLX*DLKM1-DFG(JN)*DLKM2 + DL1=DFI(JN)*(DLKM1-DLX*DLK)*DL1SITA + DDPOL(JN)=DL1*DFB(JN) + DLKM2=DLKM1 + DLKM1=DLK + ENDDO +ELSE + +! ------------------------------------------------------------------ +!* KM >= 2 +! ------------------------------------------------------------------ + +! ZSCALE=1._JPRD/ZEPS + ! Maintaining the consistency with the CY41R1 reference + ZSCALE=1.0E+100_JPRD + ZISCALE=1.0E-100_JPRD + ! General case + !ZSCALE = 10._JPRD**( MAXEXPONENT(ZSCALE)/10) + !ZISCALE = 10._JPRD**(-MAXEXPONENT(ZSCALE)/10) + + IEND=KM/2 + ZLSITA=1._JPRD +! WRITE(*,*) 'SUPOLF: DLSITA2=',DLSITA2,' DDMU=',DDMU,' DLX=',DLX + DO JN=1,IEND + ZLSITA=ZLSITA*DLSITA2 + IF( ABS(ZLSITA) < ZISCALE ) THEN + ZLSITA=ZLSITA*ZSCALE + ICORR3=ICORR3+1 + ENDIF + ENDDO + IF( MOD(KM,2) == 1 ) ZLSITA=ZLSITA*DLSITA +! WRITE(*,*) 'SUPOLF: ZSCALE=',ZSCALE,' ICORR3=',ICORR3,' KM=',KM,' ZLSITA=',ZLSITA + + ZFAC0=1._JPRD + ZFAC=1._JPRD + DO JN=1,KM-1 + ZFAC=ZFAC*SQRT(REAL(2*JN-1,JPRD)) + ZFAC=ZFAC/SQRT(REAL(2*JN,JPRD)) + ENDDO + ZFAC=ZFAC*SQRT(REAL(2*KM-1,JPRD)) +! WRITE(*,*) 'SUPOLF: ZSCALE=',ZSCALE,' ICORR3=',ICORR3,' ZFAC=',ZFAC + + ZFAC1=1._JPRD + DO IC=0,MIN(KNSMAX-KM,3) + + ! (2m+i)! + ZFAC0 = ZFAC0 * REAL(2*KM+IC,JPRD) + + SELECT CASE (IC) + CASE (0) + ZMULT=ZFAC + CASE (1) + ZFAC=ZFAC*REAL(2*KM+IC,JPRD) + ZMULT=ZFAC*DLX + CASE (2) + ZMULT=0.5_JPRD*ZFAC*(REAL(2*KM+3,JPRD)*DLX*DLX-1._JPRD) + CASE (3) + ZFAC=ZFAC*REAL(2*KM+IC,JPRD) + ZMULT=(1._JPRD/6._JPRD)*DLX*ZFAC*(REAL(2*KM+5,JPRD)*DLX*DLX-3._JPRD) + END SELECT + + DDPOL(KM+IC) = ZLSITA*ZMULT*SQRT(2._JPRD*(REAL(KM+IC,JPRD)+0.5_JPRD)*ZFAC1/ZFAC0) + + ZFAC1=ZFAC1*REAL(IC+1,JPRD) + + ENDDO + + ICORR(:)=ICORR3 + IF( ICHEAP == 2 ) THEN + ! symmetric case + DO JN=KM+2,KNSMAX-2,2 + + IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN + DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE + DDPOL(JN)=DDPOL(JN)/ZSCALE + ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 + ENDIF + + DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) + ENDDO + + DO JN=KM,KNSMAX,2 + DO JCORR=1,ICORR(JN) + DDPOL(JN)=DDPOL(JN)/ZSCALE + IF( DDPOL(JN) < ZEPS ) THEN + DDPOL(JN) = ZEPS + ENDIF + ENDDO + ENDDO + + ELSEIF( ICHEAP == 3 ) THEN + ! antisymmetric case + DO JN=KM+3,KNSMAX-2,2 + + IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN + DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE + DDPOL(JN)=DDPOL(JN)/ZSCALE + ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 + ENDIF + + DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) + ENDDO + + DO JN=KM+1,KNSMAX,2 + DO JCORR=1,ICORR(JN) + DDPOL(JN)=DDPOL(JN)/ZSCALE + IF( DDPOL(JN) < ZEPS ) THEN + DDPOL(JN) = ZEPS + ENDIF + ENDDO + ENDDO + + ELSE + DO JN=KM+2,KNSMAX-2 + + IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN + DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE + DDPOL(JN-1)=DDPOL(JN-1)/ZSCALE + DDPOL(JN)=DDPOL(JN)/ZSCALE + DDPOL(JN+1)=DDPOL(JN+1)/ZSCALE + ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 + ENDIF + + DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) + + ENDDO + + DO JN=KM,KNSMAX + DO JCORR=1,ICORR(JN) + DDPOL(JN)=DDPOL(JN)/ZSCALE + IF( DDPOL(JN) < ZEPS ) THEN + DDPOL(JN) = ZEPS + ENDIF + ENDDO + ENDDO + + ENDIF + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUPOLF +END MODULE SUPOLF_MOD diff --git a/src/trans/gpu/internal/sustaonl_mod.F90 b/src/trans/gpu/internal/sustaonl_mod.F90 new file mode 100755 index 0000000..9b01dae --- /dev/null +++ b/src/trans/gpu/internal/sustaonl_mod.F90 @@ -0,0 +1,457 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUSTAONL_MOD +CONTAINS +SUBROUTINE SUSTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) + +!**** *SUSTAONL * - Routine to initialize parallel environment + +! Purpose. +! -------- +! Initialize D%NSTA and D%NONL. +! Calculation of distribution of grid points to processors : +! Splitting of grid in B direction + +!** Interface. +! ---------- +! *CALL* *SUSTAONL * + +! Explicit arguments : +! -------------------- +! KMEDIAP - mean number of grid points per PE +! KRESTM - number of PEs with one extra point +! LDWEIGHTED_DISTR -true if weighted distribution +! PWEIGHT -weight per grid-point if weighted distribution +! PMEDIAP -mean weight per PE if weighted distribution +! KPROCAGP -number of grid points per A set + +! Implicit arguments : +! -------------------- + + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) +! R. El Khatib 05-Apr-2007 Enable back vectorization on NEC +! R. El Khatib 30-Apr-2013 Optimization +! R. El Khatib 26-Apr-2018 vectorization +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT, JPRD +USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC + +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +! DUMMY +INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM +REAL(KIND=JPRBT),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR +REAL(KIND=JPRBT),INTENT(IN) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) + +! LOCAL + +INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL),ISENDREQ(NPROC) +INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) +INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, IDWIDE,& + &IGL, IGL1, IGL2, IGLOFF, IGPTA, & + &IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & + &ILSEND, INPLAT, INXLAT, IPOS, & + &IPROCB, IPTSRE, IRECV, IPE, & + &IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & + &ILAT, ILON, ILOEN +INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZWEIGHT(:,:) +INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) +REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 + +LOGICAL :: LLABORT +LOGICAL :: LLP1,LLP2 + +REAL(KIND=JPRBT) :: ZCOMP,ZPI,ZLON +REAL(KIND=JPRBT) :: ZDIVID(R%NDGL) +INTEGER(KIND=JPIM) :: ILATMD,ILATMD1 + +! ----------------------------------------------------------------- + +ZPI = 2.0_JPRBT*ASIN(1.0_JPRBT) + +IXPTLAT (:)=999999 +ILSTPTLAT(:)=999999 + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IDWIDE = R%NDGL/2 +IBUFLEN = R%NDGL*N_REGIONS_EW*2 +IDGLG = R%NDGL + +I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) +I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) + +ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 + +IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) + +IF (D%LSPLIT) THEN + IF( LEQ_REGIONS )THEN + IGPTA=0 + DO JA=1,MY_REGION_NS-1 + IGPTA = IGPTA + KPROCAGP(JA) + ENDDO + IGPTS = KPROCAGP(MY_REGION_NS) + ELSE + IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN + IGPTS = KMEDIAP + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ELSE + IGPTS = KMEDIAP-1 + IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) + ENDIF + ENDIF +ELSE + IGPTA = IGPTPRSETS + IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) +ENDIF + +IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) +IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP +IXPTLAT(1) = IGPTA-IGPTPRSETS+1 +ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) +INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 +DO JGL=2,ILEN + IXPTLAT(JGL) = 1 + ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) + INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) +ENDDO +ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS + +DO JB=1,N_REGIONS_EW + DO JGL=1,R%NDGL+N_REGIONS_NS-1 + D%NSTA(JGL,JB) = 0 + D%NONL(JGL,JB) = 0 + ENDDO +ENDDO + + +! grid point decomposition +! --------------------------------------- +IF( NPROC > 1 )THEN + DO JGL=1,ILEN + ZDIVID(JGL) = 360000.0_JPRBT/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRBT) + ENDDO + IF( LDWEIGHTED_DISTR )THEN + ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) + IGL=0 + DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IGL=IGL+1 + ZWEIGHT(JL,JGL)=PWEIGHT(IGL) + ENDDO + ENDDO + ZCOMP=0 + IGPTS=0 + ENDIF + + DO JB=1,N_REGIONS(MY_REGION_NS) + + IF( .NOT.LDWEIGHTED_DISTR )THEN + + IF (JB <= IREST) THEN + IPTSRE = IGPTSP+1 + ELSE + IPTSRE = IGPTSP + ENDIF + DO JNPTSRE=1,IPTSRE + + ILATMD = 360000 !! 360*1000 + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + ILATMD1 = NINT(REAL(IXPTLAT(JGL)-1,JPRB)*ZDIVID(JGL)) + IF(ILATMD1 < ILATMD) THEN + ILATMD = ILATMD1 + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ENDDO + + ELSE + + DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & + & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) + + IGPTS = IGPTS + 1 + ILATMD = 360000 !! 360*1000 + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + ILATMD1 = NINT(REAL(IXPTLAT(JGL)-1,JPRB)*ZDIVID(JGL)) + IF(ILATMD1 < ILATMD) THEN + ILATMD = ILATMD1 + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN + CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') + ENDIF + ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 + ILOEN=G%NLOEN(ILAT) + IF(ILON<1.OR.ILON>ILOEN)THEN + CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') + ENDIF + ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ENDDO + + ZCOMP = ZCOMP - PMEDIAP + + ENDIF + + ENDDO + + IF( LDWEIGHTED_DISTR )THEN + DEALLOCATE(ZWEIGHT) + ENDIF + + ! Exchange local partitioning info to produce global view + ! + + CALL GSTATS_BARRIER(795) + CALL GSTATS(814,0) + IF( LEQ_REGIONS )THEN + + ITAG = MTAGPART + IPOS = 0 + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + ILENG(NPRCIDS(IRECV))=ILEN + ENDDO + ENDDO + IOFF(1)=0 + DO JJ=2,NPROC + IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) + ENDDO + ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) + CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') + DO JA=1,N_REGIONS_NS + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + IPOS = IOFF(NPRCIDS(IRECV)) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUFG(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUFG(IPOS) + ENDDO + ENDIF + ENDDO + ENDDO + DEALLOCATE(ICOMBUFG) + + ELSE + + ITAG = MTAGPART + IPOS = 0 + DO JB=1,N_REGIONS(MY_REGION_NS) + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) + ENDDO + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + DO JA=1,N_REGIONS_NS + CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) + IF(ISEND /= MYPROC) THEN + CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & + & CDSTRING='SUSTAONL:') + ENDIF + ENDDO + + DO JA=1,N_REGIONS_NS + CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 + CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & + & KOUNT=ILRECV,CDSTRING='SUSTAONL:') + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IPOS = 0 + DO JB=1,N_REGIONS(JA) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUF(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUF(IPOS) + ENDDO + ENDDO + ENDIF + ENDDO + + ENDIF + CALL GSTATS(814,1) + CALL GSTATS_BARRIER2(795) +ELSE + DO JGL=1,R%NDGL + D%NSTA(JGL,1) = 1 + D%NONL(JGL,1) = G%NLOEN(JGL) + ENDDO +ENDIF + +! Confirm consistency of global partitioning, specifically testing for +! multiple assignments of same grid point and unassigned grid points + +LLABORT = .FALSE. +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + ICHK(JL,JGL) = 1 + ENDDO +ENDDO +DO JA=1,N_REGIONS_NS + IGLOFF = D%NPTRFRSTLAT(JA) + DO JB=1,N_REGIONS(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL = IGLOFF+JGL-IGL1 + DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + IF( ICHK(JL,JGL) /= 1 )THEN + WRITE(NOUT,'(" SUSTAONL : seta=",i4," setb=",i4,& + &" row=",I4," sta=",I4," INVALID GRID POINT")')& + &JA,JB,JGL,JL + WRITE(0,'(" SUSTAONL : seta=",i4," setb=",i4,& + &" ROW=",I4," sta=",I4," INVALID GRID POINT")')& + &JA,JB,JGL,JL + LLABORT = .TRUE. + ENDIF + ICHK(JL,JGL) = 2 + ENDDO + ENDDO + ENDDO +ENDDO +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IF( ICHK(JL,JGL) /= 2 )THEN + WRITE(NOUT,'(" SUSTAONL : row=",i4," sta=",i4,& + &" GRID POINT NOT ASSIGNED")') JGL,JL + LLABORT = .TRUE. + ENDIF + ENDDO +ENDDO +IF( LLABORT )THEN + WRITE(NOUT,'(" SUSTAONL : inconsistent partitioning")') + CALL ABORT_TRANS(' SUSTAONL: inconsistent partitioning') +ENDIF + + +IF (LLP1) THEN + WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUSTAONL '')') + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') + WRITE(UNIT=NOUT,FMT='('' '')') + IPROCB = MIN(32,N_REGIONS_EW) + WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I5))') (JB,JB=1,IPROCB) + DO JA=1,N_REGIONS_NS + IPROCB = MIN(32,N_REGIONS(JA)) + WRITE(UNIT=NOUT,FMT='('' '')') + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL=IGLOFF+JGL-IGL1 + WRITE(UNIT=NOUT,FMT='(" SETA=",I5," LAT=",I5," NSTA=",& + &32(1X,I5))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='(" SETA=",I5," LAT=",I5," D%NONL=",& + &32(1X,I5))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' '')') +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE SUSTAONL +END MODULE SUSTAONL_MOD diff --git a/src/trans/gpu/internal/sutrle_mod.F90 b/src/trans/gpu/internal/sutrle_mod.F90 new file mode 100755 index 0000000..5f5b3c4 --- /dev/null +++ b/src/trans/gpu/internal/sutrle_mod.F90 @@ -0,0 +1,366 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUTRLE_MOD +CONTAINS +SUBROUTINE SUTRLE(PNM,KGL,KLOOP) + +!**** *sutrle * - transposition of Legendre polynomials during set-up + +! Purpose. +! -------- +! transposition of Legendre polynomials during set-up + +!** Interface. +! ---------- +! *call* *sutrle(pnm) + +! Explicit arguments : +! -------------------- + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! P.Towers : 10-01-12 Corrected over allocation of ZSNDBUF (XT4 fix) +! G.Mozdzynski: March 2011 Support 2D (RW,RV) initialisation of legendre coeffs +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPRD, JPIM +USE MPL_MODULE ,ONLY : MPL_ALLREDUCE, MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MTAGLETR, NCOMBFLEN, NPRCIDS, NPRTRW, NPRTRV, & + & MYSETV, MYSETW, NPROC +USE TPM_FIELDS ,ONLY : F +USE SET2PE_MOD ,ONLY : SET2PE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +REAL(KIND=JPRD),INTENT(IN) :: PNM(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KGL +INTEGER(KIND=JPIM),INTENT(IN) :: KLOOP + +! LOCAL + +REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFV(:),ZRCVBUFV(:,:) +REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFW(:,:),ZRCVBUFW(:,:) +INTEGER(KIND=JPIM) :: ILREC, IM, IPOS, & + & IRECVSET, IRECV, ISEND, ISENDSET, ITAG,ISENDSIZE, IRECVSIZE, & + & J, JM, JMLOC, JN, JV, JROC ,IOFFT, IOFFG, IGL, ISREQ, IRREQ +INTEGER(KIND=JPIM) :: ISENDREQ(MAX(NPRTRW,NPRTRV)) +INTEGER(KIND=JPIM) :: IRECVREQ(MAX(NPRTRW,NPRTRV)) +INTEGER(KIND=JPIM) :: IGLVS(NPRTRV) +INTEGER(KIND=JPIM) :: IGLVR(NPRTRV) +INTEGER(KIND=JPIM) :: IPOSW(NPRTRW) + +! ------------------------------------------------------------------ + +!* 0. Some initializations. +! --------------------- + +ITAG = MTAGLETR+KLOOP + +! Perform barrier synchronisation to guarantee all processors have +! completed all previous communication + +IF( NPROC > 1 .AND. KLOOP ==1)THEN + CALL GSTATS(783,0) + CALL MPL_BARRIER(CDSTRING='SUTRLE:') + CALL GSTATS(783,1) +ENDIF + +! +! First do communications in NPRTRV direction +! + +!* Calculate send buffer size + +IF(KGL > 0) THEN + ISENDSIZE = R%NSPOLEG+1 +ELSE + ISENDSIZE=1 +ENDIF + +ALLOCATE (ZSNDBUFV(ISENDSIZE)) +ALLOCATE (ZRCVBUFV(R%NSPOLEG+1,NPRTRV)) + +!* copy data to be sent into zsndbufv + +ZSNDBUFV(1) = KGL +IF(KGL > 0) THEN + CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(J) + DO J=1,R%NSPOLEG + ZSNDBUFV(J+1) = PNM(J) + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1141,1) +ENDIF + +ISREQ = 0 +DO JROC=1,NPRTRV-1 + ISEND = MYSETV-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRV + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,MYSETW,ISENDSET) + ISREQ = ISREQ+1 + CALL GSTATS(801,0) + CALL MPL_SEND(ZSNDBUFV(1:ISENDSIZE),KDEST=NPRCIDS(ISEND), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + + +IRREQ=0 +DO JROC=1,NPRTRV-1 + IRECV = MYSETV+JROC + IF (IRECV > NPRTRV) IRECV = IRECV-NPRTRV + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,MYSETW,IRECVSET) + IRREQ = IRREQ+1 + CALL GSTATS(801,0) + CALL MPL_RECV(ZRCVBUFV(:,IRECVSET),KSOURCE=NPRCIDS(IRECV), & + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + +IF(ISREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +IF(IRREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +!* copy data from buffer to f%rpnm +CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,IRECV,IRECVSET,IGL,JMLOC,IM,IOFFT,IOFFG,JN) +DO JROC=1,NPRTRV-1 + IRECV = MYSETV+JROC + IF (IRECV > NPRTRV) IRECV = IRECV-NPRTRV + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,MYSETW,IRECVSET) + IGL = ZRCVBUFV(1,IRECVSET) + IGLVS(IRECVSET)=IGL + IF( IGL > 0 )THEN + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + IOFFT = D%NPMT(IM) + IOFFG = D%NPMG(IM) + DO JN=1,R%NTMAX-IM+2 + F%RPNM(IGL,IOFFT+JN) = ZRCVBUFV(1+IOFFG+JN,IRECVSET) + ENDDO + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO + +DEALLOCATE (ZSNDBUFV) + +!* copy data from pnm to rpnm + +IGLVS(MYSETV)=KGL +IF(KGL > 0) THEN + ZRCVBUFV(1,MYSETV)=KGL + ZRCVBUFV(2:R%NSPOLEG+1,MYSETV)=PNM(1:R%NSPOLEG) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JMLOC,IM,IOFFT,IOFFG,JN) + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + IOFFT = D%NPMT(IM) + IOFFG = D%NPMG(IM) + DO JN=1,R%NTMAX-IM+2 + F%RPNM(KGL,IOFFT+JN) = PNM(IOFFG+JN) + ENDDO + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1141,1) + + +! +! Now do communications in the NPRTRW direction +! + +!* Calculate send buffer size + +ISENDSIZE=0 +DO JROC=1,NPRTRW-1 + ISEND = MYSETW-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRW + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV) + IPOS = 0 + DO JM=0,R%NSMAX + IF (ISENDSET == D%NPROCM(JM) ) IPOS = IPOS + R%NTMAX-JM+2 + ENDDO + ISENDSIZE = MAX(IPOS,ISENDSIZE) +ENDDO +ISENDSIZE=ISENDSIZE*NPRTRV+NPRTRV +IRECVSIZE=ISENDSIZE +IF( NPROC > 1 )THEN + CALL GSTATS(801,0) + CALL MPL_ALLREDUCE(IRECVSIZE,'MAX',CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDIF + +ALLOCATE (ZSNDBUFW(ISENDSIZE,NPRTRW)) +ALLOCATE (ZRCVBUFW(IRECVSIZE,NPRTRW)) + +CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,ISEND,ISENDSET,IPOS,JV,IGL,JM,JN) +DO JROC=1,NPRTRW-1 + ISEND = MYSETW-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRW + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV) +!* copy data to be sent into zsndbufw + IPOS=0 + DO JV=1,NPRTRV + IPOS=IPOS+1 + ZSNDBUFW(IPOS,ISENDSET) = IGLVS(JV) + ENDDO + DO JV=1,NPRTRV + IGL = IGLVS(JV) + IF( IGL > 0 )THEN + DO JM=0,R%NSMAX + IF (ISENDSET == D%NPROCM(JM) ) THEN + DO JN=1,R%NTMAX-JM+2 + IPOS = IPOS + 1 + ZSNDBUFW(IPOS,ISENDSET) = ZRCVBUFV(1+D%NPMG(JM)+JN,JV) + ENDDO + ENDIF + ENDDO + ENDIF + ENDDO + IPOSW(ISENDSET)=IPOS +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1141,1) + +ISREQ = 0 +DO JROC=1,NPRTRW-1 + ISEND = MYSETW-JROC + IF (ISEND <= 0) ISEND = ISEND+NPRTRW + ISENDSET = ISEND + CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV) + ISENDSIZE = IPOSW(ISENDSET) + ISREQ = ISREQ+1 + CALL GSTATS(801,0) + CALL MPL_SEND(ZSNDBUFW(1:ISENDSIZE,ISENDSET),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + + +IRREQ = 0 +DO JROC=1,NPRTRW-1 + + IRECV = MYSETW+JROC + IF (IRECV > NPRTRW) IRECV = IRECV-NPRTRW + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,IRECVSET,MYSETV) +!* receive message (if not empty) + + IRREQ = IRREQ+1 + CALL GSTATS(801,0) + CALL MPL_RECV(ZRCVBUFW(:,IRECVSET),KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),& + & KTAG=ITAG,CDSTRING='SUTRLE:') + CALL GSTATS(801,1) +ENDDO + +IF(ISREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +IF(IRREQ > 0) THEN + CALL GSTATS(801,0) + CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), & + & CDSTRING='SUTRLE: WAIT') + CALL GSTATS(801,1) +ENDIF + +CALL GSTATS(1141,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,IRECV,IRECVSET,IPOS,IGLVR,JV,IGL,JMLOC,IM,IOFFT,JN) +DO JROC=1,NPRTRW-1 + IRECV = MYSETW+JROC + IF (IRECV > NPRTRW) IRECV = IRECV-NPRTRW + IRECVSET = IRECV + CALL SET2PE(IRECV,0,0,IRECVSET,MYSETV) +!* copy data from buffer to f%rpnm + IPOS=0 + DO JV=1,NPRTRV + IPOS=IPOS+1 + IGLVR(JV)=ZRCVBUFW(IPOS,IRECVSET) + ENDDO + DO JV=1,NPRTRV + IGL = IGLVR(JV) + IF( IGL > 0 )THEN + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + IOFFT = D%NPMT(IM) + DO JN=1,R%NTMAX-IM+2 + IPOS = IPOS + 1 + F%RPNM(IGL,IOFFT+JN) = ZRCVBUFW(IPOS,IRECVSET) + ENDDO + ENDDO + ENDIF + ENDDO +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1141,1) + +DEALLOCATE (ZRCVBUFV) +DEALLOCATE (ZSNDBUFW) +DEALLOCATE (ZRCVBUFW) + +IF( NPROC > 1 .AND. KLOOP ==1)THEN + CALL GSTATS(783,0) + CALL MPL_BARRIER(CDSTRING='SUTRLE:') + CALL GSTATS(783,1) +ENDIF +END SUBROUTINE SUTRLE +END MODULE SUTRLE_MOD diff --git a/src/trans/gpu/internal/suwavedi_mod.F90 b/src/trans/gpu/internal/suwavedi_mod.F90 new file mode 100755 index 0000000..171f3b2 --- /dev/null +++ b/src/trans/gpu/internal/suwavedi_mod.F90 @@ -0,0 +1,183 @@ +! (C) Copyright 1996- ECMWF. +! (C) Copyright 1996- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SUWAVEDI_MOD +CONTAINS +SUBROUTINE SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS,& + &KPTRMS,KALLMS,KDIM0G) + +!**** *SUWAVEDI * - Routine to initialize spectral wave distribution + +! Purpose. +! -------- +! Initialize arrays controlling spectral wave distribution + +!** Interface. +! ---------- +! *CALL* *SUWAVEDI * + +! Explicit arguments : +! -------------------- +! KSMAX - Spectral truncation limit (input) +! KTMAX - Overtruncation for KSMAX (input) +! KPRTRW - Number of processors in A-direction (input) +! KMYSETW - A-set for present processor (input) +! KASM0 - Offsets for spectral waves (output) +! KSPOLEGL - Local version of NSPOLEG (output) +! KPROCM - Where a certain spectral wave belongs (output) +! KUMPP - Number of spectral waves on this PE (output) +! KSPEC - Local version on NSPEC (output) +! KSPEC2 - Local version on NSPEC2 (output) +! KSPEC2MX - Maximum KSPEC2 across PEs (output) +! KPOSSP - Global spectral fields partitioning (output) +! KMYMS - This PEs spectral zonal wavenumbers (output) + +! Implicit arguments : NONE +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 96-01-10 +! L.Isaksen: 96-02-02 - Calculation of KSPEC2MX added +! K.YESSAD : 97-02-18 - Add KTMAX, bug correction for KSPOLEGL. +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +IMPLICIT NONE + + +! DUMMY +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPTRMS(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KALLMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KDIM0G(0:KSMAX) + +! LOCAL +INTEGER(KIND=JPIM) :: IK, IL, IND, IPOS, ISPEC2P, JA, JM,JMLOC,IM +INTEGER(KIND=JPIM) :: ISPOLEGL,ISPEC2MX,IASM0(0:KSMAX),IPROCM(0:KSMAX) +INTEGER(KIND=JPIM) :: IUMPP(KPRTRW),IMYMS(KSMAX+1),IPOSSP(KPRTRW+1) +INTEGER(KIND=JPIM) :: IPTRMS(KPRTRW),IALLMS(KSMAX+1),IDIM0G(0:KSMAX) +INTEGER(KIND=JPIM) :: ISPEC(KPRTRW),IC(KPRTRW) + + +! ----------------------------------------------------------------- + +!* 1. Initialize partitioning of wave numbers to PEs +! ---------------------------------------------- + +ISPEC(:) = 0 + +IUMPP(:) = 0 +IASM0(:) = -99 +ISPOLEGL = 0 + +IL = 1 +IND = 1 +IK = 0 +IPOS = 1 +DO JM=0,KSMAX + IK = IK + IND + IF (IK > KPRTRW) THEN + IK = KPRTRW + IND = -1 + ELSEIF (IK < 1) THEN + IK = 1 + IND = 1 + ENDIF + IPROCM(JM) = IK + ISPEC(IK) = ISPEC(IK)+KSMAX-JM+1 + IUMPP(IK) = IUMPP(IK)+1 + IF (IK == KMYSETW) THEN + ISPOLEGL = ISPOLEGL +KTMAX+1-JM+1 + IMYMS(IL) = JM + IASM0(JM) = IPOS + IPOS = IPOS+(KSMAX-JM+1)*2 + IL = IL+1 + ENDIF +ENDDO + +IPOSSP(1) = 1 +ISPEC2P = 2*ISPEC(1) +ISPEC2MX = ISPEC2P +IPTRMS(1) = 1 +DO JA=2,KPRTRW + IPOSSP(JA) = IPOSSP(JA-1)+ISPEC2P + ISPEC2P = 2*ISPEC(JA) + ISPEC2MX = MAX(ISPEC2MX,ISPEC2P) +! pointer to the first wave number of a given wave-set in NALLMS array + IPTRMS(JA) = IPTRMS(JA-1)+IUMPP(JA-1) +ENDDO +IPOSSP(KPRTRW+1) = IPOSSP(KPRTRW)+ISPEC2P + +! IALLMS : wave numbers for all wave-set concatenated together to give all +! wave numbers in wave-set order. +IC(:) = 0 +DO JM=0,KSMAX + IALLMS(IC(IPROCM(JM))+IPTRMS(IPROCM(JM))) = JM + IC(IPROCM(JM)) = IC(IPROCM(JM))+1 +ENDDO + +IPOS = 1 +DO JA=1,KPRTRW + DO JMLOC=1,IUMPP(JA) + IM = IALLMS(IPTRMS(JA)+JMLOC-1) + IDIM0G(IM) = IPOS + IPOS = IPOS+(KSMAX+1-IM)*2 + ENDDO +ENDDO + +IF(PRESENT(KSPEC)) KSPEC = ISPEC(KMYSETW) +IF(PRESENT(KSPEC2)) KSPEC2 = 2*ISPEC(KMYSETW) +IF(PRESENT(KSPEC2MX)) KSPEC2MX = ISPEC2MX +IF(PRESENT(KSPOLEGL)) KSPOLEGL = ISPOLEGL + +IF(PRESENT(KASM0)) KASM0(:) = IASM0(:) +IF(PRESENT(KPROCM)) KPROCM(:) = IPROCM(:) +IF(PRESENT(KUMPP)) KUMPP(:) = IUMPP(:) +IF(PRESENT(KMYMS)) KMYMS(:) = IMYMS(:) +IF(PRESENT(KPOSSP)) KPOSSP(:) = IPOSSP(:) +IF(PRESENT(KPTRMS)) KPTRMS(:) = IPTRMS(:) +IF(PRESENT(KALLMS)) KALLMS(:) = IALLMS(:) +IF(PRESENT(KDIM0G)) KDIM0G(:) = IDIM0G(:) + +END SUBROUTINE SUWAVEDI +END MODULE SUWAVEDI_MOD + + diff --git a/src/trans/gpu/internal/tpm_constants.F90 b/src/trans/gpu/internal/tpm_constants.F90 new file mode 100755 index 0000000..6f8ab2b --- /dev/null +++ b/src/trans/gpu/internal/tpm_constants.F90 @@ -0,0 +1,20 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_CONSTANTS +USE PARKIND_ECTRANS ,ONLY : JPRBT + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRBT) :: RA ! Radius of Earth + +END MODULE TPM_CONSTANTS diff --git a/src/trans/gpu/internal/tpm_ctl.F90 b/src/trans/gpu/internal/tpm_ctl.F90 new file mode 100755 index 0000000..7b967ee --- /dev/null +++ b/src/trans/gpu/internal/tpm_ctl.F90 @@ -0,0 +1,43 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_CTL + +USE PARKIND1 ,ONLY : JPIM +USE, INTRINSIC :: iso_c_binding, ONLY: C_PTR, C_NULL_PTR +USE SHAREDMEM_MOD ,ONLY : SHAREDMEM +IMPLICIT NONE + +SAVE + + +TYPE CTL_TYPE + +LOGICAL :: LREAD_LEGPOL = .FALSE. +LOGICAL :: LWRITE_LEGPOL = .FALSE. +CHARACTER(LEN=256) :: CLEGPOLFNAME='legpol_file' +CHARACTER(LEN=4) :: CIO_TYPE='file' +TYPE(SHAREDMEM) :: STORAGE + +END TYPE CTL_TYPE + + +TYPE(CTL_TYPE),ALLOCATABLE,TARGET :: CTL_RESOL(:) +TYPE(CTL_TYPE),POINTER :: C + + +END MODULE TPM_CTL + + + + + + + diff --git a/src/trans/gpu/internal/tpm_dim.F90 b/src/trans/gpu/internal/tpm_dim.F90 new file mode 100755 index 0000000..0e93894 --- /dev/null +++ b/src/trans/gpu/internal/tpm_dim.F90 @@ -0,0 +1,58 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_DIM + +! Module for dimensions. + +USE PARKIND1 ,ONLY : JPIM + +IMPLICIT NONE + +SAVE + +TYPE DIM_TYPE +! SPECTRAL SPACE DIMENSIONS + + INTEGER(KIND=JPIM) :: NSMAX ! Truncation order + INTEGER(KIND=JPIM) :: NTMAX ! Truncation order for tendencies + INTEGER(KIND=JPIM) :: NSPOLEG ! Number of Legandre polynomials + INTEGER(KIND=JPIM) :: NSPEC_G ! Number of complex spectral coefficients (global) + INTEGER(KIND=JPIM) :: NSPEC2_G ! 2*NSPEC_G + +! COLLOCATION GRID DIMENSIONS + + INTEGER(KIND=JPIM) :: NDGL ! Number of rows of latitudes + INTEGER(KIND=JPIM) :: NDLON ! Maximum number of longitude points (near equator) + INTEGER(KIND=JPIM) :: NDGNH ! Number of rows in northern hemisphere + +! Legendre transform dimensions + INTEGER(KIND=JPIM) :: NLEI1 ! R%NSMAX+4+MOD(R%NSMAX+4+1,2) + INTEGER(KIND=JPIM) :: NLEI3 ! R%NDGNH+MOD(R%NDGNH+2,2) + INTEGER(KIND=JPIM) :: NLED3 ! R%NTMAX+2+MOD(R%NTMAX+3,2) + INTEGER(KIND=JPIM) :: NLED4 ! R%NTMAX+3+MOD(R%NTMAX+4,2) + +! Width of E'-zone + INTEGER(KIND=JPIM) :: NNOEXTZL ! Longitude direction + INTEGER(KIND=JPIM) :: NNOEXTZG ! Latitude direction + +END TYPE DIM_TYPE + +TYPE(DIM_TYPE),ALLOCATABLE,TARGET :: DIM_RESOL(:) +TYPE(DIM_TYPE),POINTER :: R + +! flat copies of above +INTEGER(KIND=JPIM) :: R_NSMAX ! Truncation order +INTEGER(KIND=JPIM) :: R_NTMAX ! Truncation order for tendencies +INTEGER(KIND=JPIM) :: R_NDGNH ! Number of rows in northern hemisphere +INTEGER(KIND=JPIM) :: R_NDGL ! Number of rows of latitudes +INTEGER(KIND=JPIM) :: R_NNOEXTZL ! Longitude direction + +END MODULE TPM_DIM diff --git a/src/trans/gpu/internal/tpm_distr.F90 b/src/trans/gpu/internal/tpm_distr.F90 new file mode 100755 index 0000000..3e5264c --- /dev/null +++ b/src/trans/gpu/internal/tpm_distr.F90 @@ -0,0 +1,189 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_DISTR + +! Module for distributed memory environment. + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +IMPLICIT NONE + +SAVE + +!* Variables describing distributed memory parallelization + +INTEGER(KIND=JPIM) :: NPROC ! Number of processors (NPRGPNS*NPRGPEW) +INTEGER(KIND=JPIM) :: NPRGPNS ! No. of sets in N-S direction (grid-point space) +INTEGER(KIND=JPIM) :: NPRGPEW ! No. of sets in E-W direction (grid-point space) +INTEGER(KIND=JPIM) :: NPRTRW ! No. of sets in wave direction (spectral space) +INTEGER(KIND=JPIM) :: NPRTRV ! NPROC/NPRTRW +INTEGER(KIND=JPIM) :: NPRTRNS ! No. of sets in N-S direction (Fourier space) + ! (always equal to NPRTRW) +LOGICAL :: LEQ_REGIONS ! TRUE - Use new eq_regions partitioning + ! FALSE- Use old NPRGPNS x NPRGPEW partitioning +INTEGER(KIND=JPIM) :: MYPROC ! My processor number +INTEGER(KIND=JPIM) :: MYSETW ! My set number in wave direction (spectral space) +INTEGER(KIND=JPIM) :: MYSETV ! My set number in field direction(S.S and F.S) +INTEGER(KIND=JPIM) :: NCOMBFLEN ! Size of communication buffer + +INTEGER(KIND=JPIM) :: MTAGLETR ! Tag +INTEGER(KIND=JPIM) :: MTAGML ! Tag +INTEGER(KIND=JPIM) :: MTAGLG ! Tag +INTEGER(KIND=JPIM) :: MTAGGL ! Tag +INTEGER(KIND=JPIM) :: MTAGPART ! Tag +INTEGER(KIND=JPIM) :: MTAGDISTSP ! Tag +INTEGER(KIND=JPIM) :: MTAGLM ! Tag +INTEGER(KIND=JPIM) :: MTAGDISTGP ! Tag + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPRCIDS(:) ! Array containing the process ids + +TYPE DISTR_TYPE +LOGICAL :: LGRIDONLY ! TRUE - only grid space structures are available +LOGICAL :: LWEIGHTED_DISTR ! TRUE - weighted distribution +LOGICAL :: LSPLIT ! TRUE - latitudes are shared between a-sets +LOGICAL :: LCPNMONLY ! TRUE - Compute Legendre polynomials only, not FFTs + +! SPECTRAL SPACE + +INTEGER(KIND=JPIM) :: NUMP ! No. of spectral waves handled by this processor +INTEGER(KIND=JPIM) :: NSPEC ! No. of complex spectral coefficients (on this PE) +INTEGER(KIND=JPIM) :: NSPEC2 ! 2*NSPEC +INTEGER(KIND=JPIM) :: NSPEC2MX ! maximun NSPEC2 among all PEs +INTEGER(KIND=JPIM) :: NTPEC2 ! cf. NSPEC2 but for truncation NTMAX +INTEGER(KIND=JPIM) :: NUMTP ! cf. NUMP but for truncation NTMAX + +INTEGER(KIND=JPIM) :: NSPOLEGL ! No. of legendre polynomials on this PE +INTEGER(KIND=JPIM) :: NLEI3D ! (NLEI3-1)/NPRTRW+1 + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: MYMS(:) ! Wave numbers handled by this PE +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NUMPP(:) ! No. of wave numbers each wave set is + ! responsible for +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPOSSP(:) ! Not needed in transform? +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCM(:) ! Process that does the calc. for certain + ! wavenumber M +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NDIM0G(:) ! Defines partitioning of global spectral + ! fields among PEs + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NASM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NATM0(:) ! Same as NASM0 but for NTMAX +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NALLMS(:) ! Wave numbers for all a-set concatenated + ! together to give all wave numbers in a-set + ! order. Used when global spectral norms + ! have to be gathered. +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRMS(:) ! Pointer to the first wave number of a given + ! a-set in nallms array. + + +! Legendre polynomials + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLATLS(:,:) ! First latitude for which each a-set,bset calcul. +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLATLE(:,:) ! Last latitude for which each a-set,bset calcul. + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMT(:) ! Adress for legendre polynomial for + ! given M (NTMAX) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMS(:) ! Adress for legendre polynomial for + ! given M (NSMAX) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMG(:) ! Global version of NPMS + +! FOURIER SPACE + +INTEGER(KIND=JPIM) :: NDGL_FS ! Number of rows of latitudes for which this process is + ! performing Fourier Space calculations + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in + ! Fourier/gridpoint buffer +INTEGER(KIND=JPIM) :: NLENGTF ! Second dimension of Fourier/gridpoint buffer + ! (sum of (NLOEN+3) over local latitudes) + +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NULTPP(:) ! No of lats. for each wave_set (F.S) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCL(:) ! Process responsible for each lat. (F.S) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLS(:) ! Pointer to first lat. (F.S) + +! NSTAGT0B to NLENGT1B: help arrays for spectral to fourier space transposition +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGT0B(:) ! Start adresses for segments within buffer + ! (according to processors to whom data + ! is going to be sent) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGT1B(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPNTGTB0(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPNTGTB1(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTSFTB(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTSGTB(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: MSTABF(:) +INTEGER(KIND=JPIM) :: NLENGT0B ! dimension +INTEGER(KIND=JPIM) :: NLENGT1B ! dimension + +! GRIDPOINT SPACE + +INTEGER(KIND=JPIM) :: NDGL_GP ! D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NFRSTLAT(:) ! First lat of each a-set +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLSTLAT(:) ! Last lat of each a-set +INTEGER(KIND=JPIM) :: NFRSTLOFF ! Offset for first lat of own a-set + ! i.e. NFRSTLOFF=NFRSTLAT(MYSETA)-1 +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLAT(:) ! Pointer to start of latitude +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRFRSTLAT(:) ! Pointer to the first latitude of each + ! a-set in NSTA and NONL arrays +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLSTLAT(:) ! Pointer to the last latitude of each + ! a-set in NSTA and NONL arrays +INTEGER(KIND=JPIM) :: NPTRFLOFF ! Offset for pointer to the first latitude of own a-set + ! NSTA and NONL arrays, i.e. NPTRFRSTLAT(MYSETA)-1 +LOGICAL ,ALLOCATABLE :: LSPLITLAT(:) ! True if latitude is split over 2 a-sets + +! NSTA(R%NDGL+NPRGPNS-1,NPRGPEW) : Position of first grid column +! for the latitudes on a processor. The information is +! available for all processors. The b-sets are distinguished +! by the last dimension of NSTA(). The latitude band for +! each a-set is addressed by NPTRFRSTLAT(JASET), +! NPTRLSTLAT(JASET), and NPTRFLOFF=NPTRFRSTLAT(MYSETA) on +! this processors a-set. Each split latitude has two entries +! in NSTA(,:) which necessitates the rather complex +! addressing of NSTA(,:) and the overdimensioning of NSTA by +! NPRGPNS. +! NONL(R%NDGL+NPRGPNS-1,NPRGPEW) : Number of grid columns for +! the latitudes on a processor. Similar to NSTA() in data +! structure. +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTA(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NONL(:,:) + +INTEGER(KIND=JPIM) :: NGPTOT ! Total number of grid columns on this PE +INTEGER(KIND=JPIM) :: NGPTOTG ! Total number of grid columns on the Globe +INTEGER(KIND=JPIM) :: NGPTOTMX ! Maximum number of grid columns on any of the PEs +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NGPTOTL(:,:) ! Number of grid columns on each PE. + +REAL(KIND=JPRBT) ,ALLOCATABLE :: RWEIGHT(:) ! Weight per grid-point (if weighted distribution) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCA_GP(:) ! Number of grid-points per a-set + +INTEGER(KIND=JPIM) :: IADJUST_D +INTEGER(KIND=JPIM) :: IADJUST_I + +END TYPE DISTR_TYPE + +!flat versions of the above +INTEGER(KIND=JPIM) :: D_NUMP ! No. of spectral waves handled by this processor +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MYMS(:) ! Wave numbers handled by this PE +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT0B(:) ! Start adresses for segments within buffer + ! (according to processors to whom data + ! is going to be sent) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT1B(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCL(:) ! Process responsible for each lat. (F.S) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB1(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NASM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGTF(:) ! Offset for specific latitude in +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MSTABF(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB0(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCM(:) ! Process that does the calc. for certain +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPTRLS(:) ! Pointer to first lat. (F.S) + + +TYPE(DISTR_TYPE),ALLOCATABLE,TARGET :: DISTR_RESOL(:) +TYPE(DISTR_TYPE),POINTER :: D + +END MODULE TPM_DISTR + diff --git a/src/trans/gpu/internal/tpm_fft.F90 b/src/trans/gpu/internal/tpm_fft.F90 new file mode 100755 index 0000000..01594a8 --- /dev/null +++ b/src/trans/gpu/internal/tpm_fft.F90 @@ -0,0 +1,29 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FFT +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +! Module for Fourier transforms. + +IMPLICIT NONE + +SAVE + +TYPE FFT_TYPE + REAL(KIND=JPRBT) ,ALLOCATABLE :: TRIGS(:,:) ! list of trigonometric function values + INTEGER(KIND=JPIM),ALLOCATABLE :: NFAX(:,:) ! list of factors of truncation +END TYPE FFT_TYPE + +TYPE(FFT_TYPE),ALLOCATABLE,TARGET :: FFT_RESOL(:) +TYPE(FFT_TYPE),POINTER :: T + + +END MODULE TPM_FFT diff --git a/src/trans/gpu/internal/tpm_ffth.F90 b/src/trans/gpu/internal/tpm_ffth.F90 new file mode 100755 index 0000000..9801ea7 --- /dev/null +++ b/src/trans/gpu/internal/tpm_ffth.F90 @@ -0,0 +1,239 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FFTH + + ! Author. + ! ------- + ! George Mozdzynski + ! + ! Modifications. + ! -------------- + ! Original October 2014 + + USE, INTRINSIC :: ISO_C_BINDING + + USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + !USE MPL_MODULE ,ONLY : MPL_MYRANK + + IMPLICIT NONE + + SAVE + + PRIVATE + PUBLIC CREATE_PLAN_FFT, DESTROY_PLAN_FFT, DESTROY_ALL_PLANS_FFT, INIT_PLANS_FFT, EXECUTE_PLAN_FFT, & + & FFTH_RESOL, TC + + TYPE FFTH_TYPE + INTEGER(KIND=C_INT),POINTER :: N_PLANS(:) + TYPE(FFTH_PLAN),POINTER :: FFTH_PLANS(:) + INTEGER(KIND=C_INT) :: N_MAX=0 + INTEGER(KIND=C_INT) :: N_MAX_PLANS=8 + END TYPE FFTH_TYPE + + + TYPE FFTH_PLAN + INTEGER(KIND=C_INT) :: NPLAN_ID=123456 + TYPE(C_PTR) :: NPLAN + INTEGER(KIND=C_INT) :: N + INTEGER(KIND=C_INT) :: NLOT + INTEGER(KIND=C_INT) :: NTYPE + LOGICAL :: LNONSTRIDED + TYPE(FFTH_PLAN),POINTER :: NEXT_PLAN => NULL() + END TYPE FFTH_PLAN + + TYPE(FFTH_TYPE),ALLOCATABLE,TARGET :: FFTH_RESOL(:) + TYPE(FFTH_TYPE),POINTER :: TC + + + + ! ------------------------------------------------------------------ + CONTAINS + ! ------------------------------------------------------------------ + + + SUBROUTINE INIT_PLANS_FFT(KDLON) + INTEGER(KIND=C_INT),INTENT(IN) :: KDLON + + TC%N_MAX=KDLON + ALLOCATE(TC%FFTH_PLANS(TC%N_MAX)) + ALLOCATE(TC%N_PLANS(TC%N_MAX)) + TC%N_PLANS(:)=0 + RETURN + END SUBROUTINE INIT_PLANS_FFT + + + SUBROUTINE CREATE_PLAN_FFT(KPLAN,KTYPE,KN,KLOT,LDNONSTRIDED) + TYPE(C_PTR),INTENT(OUT) :: KPLAN + INTEGER(KIND=C_INT),INTENT(IN) :: KTYPE,KN,KLOT + LOGICAL, OPTIONAL, INTENT(IN) :: LDNONSTRIDED + + TYPE(C_PTR) :: IPLAN + INTEGER(KIND=C_INT) :: IRANK, ISTRIDE + INTEGER(KIND=C_INT) :: JL, JN + INTEGER(KIND=C_INT) :: IRDIST,ICDIST,IN(1),IEMBED(1) + INTEGER(KIND=C_INT) :: JNONSTRIDED + LOGICAL :: LLNONSTRIDED + LOGICAL :: LLFOUND + LOGICAL :: LLRESTRICT_PLANS=.FALSE. + TYPE(FFTH_PLAN),POINTER :: CURR_FFTH_PLAN,START_FFTH_PLAN + INTERFACE + SUBROUTINE CREATE_PLAN_FFTH(KPLAN,KTYPE,KN,KLOT,KNONSTRIDED) BIND(C,NAME="create_plan_ffth_") + USE, INTRINSIC :: ISO_C_BINDING + TYPE(C_PTR) :: KPLAN + INTEGER(C_INT) :: KTYPE,KN,KLOT,KNONSTRIDED + END SUBROUTINE CREATE_PLAN_FFTH + END INTERFACE + + LLNONSTRIDED=.FALSE. ! default: strided + IF ( PRESENT(LDNONSTRIDED) ) LLNONSTRIDED=LDNONSTRIDED + JNONSTRIDED=0 + IF ( LLNONSTRIDED ) JNONSTRIDED=1 + + IF( KN > TC%N_MAX )THEN + stop 'CREATE_PLAN_FFT: KN > N_MAX THAT WAS INITIALISED IN INIT_PLANS_FFTH' + ENDIF + + ! daand note: these parameters aren't actually used. + IRANK=1 + ISTRIDE=1 + IN(1)=KN + IEMBED(1)=IN(1) + ICDIST=KN/2+1 + IRDIST=ICDIST*2 + + !!$OMP CRITICAL + LLFOUND=.FALSE. + !IF( TC%FFTH_PLANS(KN)%NPLAN_ID /= 123456 )THEN + ! WRITE(*,'("CREATE_PLAN_FFT.1: PLAN_ID=",I10)')TC%FFTH_PLANS(KN)%NPLAN_ID + ! stop 'CREATE_PLAN_FFT.1: NPLAN_ID /= 123456' + !ENDIF + CURR_FFTH_PLAN=>TC%FFTH_PLANS(KN) + IF( CURR_FFTH_PLAN%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFT.2: PLAN_ID=",I10)')CURR_FFTH_PLAN%NPLAN_ID + stop 'CREATE_PLAN_FFT.2: NPLAN_ID /= 123456' + ENDIF + ! search for plan in existing plans + + DO JL=1,TC%N_PLANS(KN) + IF( ( KLOT == CURR_FFTH_PLAN%NLOT ) & + & .AND. ( KTYPE == CURR_FFTH_PLAN%NTYPE ) .AND. ( LLNONSTRIDED .EQV. CURR_FFTH_PLAN%LNONSTRIDED ) )THEN + LLFOUND=.TRUE. + IPLAN=CURR_FFTH_PLAN%NPLAN + EXIT + ELSEIF( JL /= TC%N_PLANS(KN) )THEN + CURR_FFTH_PLAN=>CURR_FFTH_PLAN%NEXT_PLAN + IF( CURR_FFTH_PLAN%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFT.3: PLAN_ID=",I10)')CURR_FFTH_PLAN%NPLAN_ID + stop 'CREATE_PLAN_FFT.3: NPLAN_ID /= 123456' + ENDIF + ENDIF + ENDDO + IF( .NOT.LLFOUND )THEN + IF( LLRESTRICT_PLANS )THEN + IF( TC%N_PLANS(KN) == TC%N_MAX_PLANS )THEN + ! destroy the plan at the start of the list + ! WRITE(*,'("CREATE_PLAN_FFT: BEG: DESTROYING A PLAN AT THE START OF THE LIST")') + CALL DESTROY_PLAN_FFT(TC%FFTH_PLANS(KN)%NPLAN) + TC%FFTH_PLANS(KN)%NPLAN_ID=999999 + START_FFTH_PLAN=>TC%FFTH_PLANS(KN) + TC%FFTH_PLANS(KN)=TC%FFTH_PLANS(KN)%NEXT_PLAN + ! DEALLOCATE(START_FFTH_PLAN) + TC%N_PLANS(KN)=TC%N_PLANS(KN)-1 + ! WRITE(*,'("CREATE_PLAN_FFT: END: DESTROYING A PLAN AT THE START OF THE LIST")') + ENDIF + ENDIF + + CALL CREATE_PLAN_FFTH(IPLAN,KTYPE,KN,KLOT,JNONSTRIDED) + + KPLAN=IPLAN + TC%N_PLANS(KN)=TC%N_PLANS(KN)+1 + IF( TC%N_PLANS(KN) /= 1 )THEN + ALLOCATE(CURR_FFTH_PLAN%NEXT_PLAN) + CURR_FFTH_PLAN=>CURR_FFTH_PLAN%NEXT_PLAN + ENDIF + IF( CURR_FFTH_PLAN%NPLAN_ID /= 123456 )THEN + WRITE(*,'("CREATE_PLAN_FFT.4: PLAN_ID=",I10)')CURR_FFTH_PLAN%NPLAN_ID + stop 'CREATE_PLAN_FFT.4: NPLAN_ID /= 123456' + ENDIF + CURR_FFTH_PLAN%NPLAN=IPLAN + CURR_FFTH_PLAN%NLOT=KLOT + CURR_FFTH_PLAN%N=KN + CURR_FFTH_PLAN%NTYPE=KTYPE + CURR_FFTH_PLAN%LNONSTRIDED=LLNONSTRIDED + CURR_FFTH_PLAN%NEXT_PLAN=>NULL() + ! write(*,'("CREATE_PLAN_FFT: KN=",I5," NPLANS=",I3," KLOT=",I6," KTYPE=",I2,& + ! & " NEW IPLAN=",Z16)')KN,TC%N_PLANS(KN),KLOT,KTYPE,IPLAN + ELSE + KPLAN=IPLAN + ENDIF + !!$OMP END CRITICAL + + RETURN + + END SUBROUTINE CREATE_PLAN_FFT + + + SUBROUTINE DESTROY_PLAN_FFT(KPLAN) + TYPE(C_PTR),INTENT(IN) :: KPLAN + CALL DESTROY_PLAN_FFTH(KPLAN) + RETURN + END SUBROUTINE DESTROY_PLAN_FFT + + + SUBROUTINE DESTROY_ALL_PLANS_FFT + INTEGER(KIND=C_INT) :: JL, JN + TYPE(FFTH_PLAN),POINTER :: CURR_FFTH_PLAN + DO JN=1,TC%N_MAX + CURR_FFTH_PLAN=>TC%FFTH_PLANS(JN) + ENDDO + !WRITE(*,'("DESTROY_ALL_PLANS_FFTH: MPL_RANK=",I6," SUM(TC%N_PLANS(:))=",I10)')& + ! & MPL_MYRANK(), SUM(TC%N_PLANS(:)) + DEALLOCATE(TC%FFTH_PLANS) + DEALLOCATE(TC%N_PLANS) + RETURN + END SUBROUTINE DESTROY_ALL_PLANS_FFT + + SUBROUTINE EXECUTE_PLAN_FFT(KN,N,X_IN,X_OUT,PLAN_PTR) + TYPE(C_PTR) :: PLAN_PTR + INTEGER(KIND=C_INT) :: KN + INTEGER(KIND=C_INT) :: N + REAL(KIND=JPRBT), TARGET :: X_IN + REAL(KIND=JPRBT), TARGET :: X_OUT + + INTERFACE + SUBROUTINE EXECUTE_PLAN_FFTH_C (KN, N, X_IN_PTR, X_OUT_PTR, PLAN_PTR) & + & BIND(C,NAME="execute_plan_ffth_c_") + USE, INTRINSIC :: ISO_C_BINDING + TYPE(C_PTR), VALUE :: PLAN_PTR + INTEGER(KIND=C_INT), VALUE :: KN + INTEGER(KIND=C_INT), VALUE :: N + TYPE(C_PTR), VALUE :: X_IN_PTR, X_OUT_PTR + END SUBROUTINE EXECUTE_PLAN_FFTH_C + END INTERFACE + +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(X_IN,X_OUT) +#endif + +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(X_IN,X_OUT) +#endif + CALL EXECUTE_PLAN_FFTH_C(KN,N,C_LOC(X_IN),C_LOC(X_OUT),PLAN_PTR) +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif + + END SUBROUTINE EXECUTE_PLAN_FFT + + END MODULE TPM_FFTH + \ No newline at end of file diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 new file mode 100755 index 0000000..005a315 --- /dev/null +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -0,0 +1,123 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FIELDS + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRB, JPRBT, JPRD +USE ISO_C_BINDING + +IMPLICIT NONE + +SAVE + +TYPE FIELDS_TYPE +REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials +REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes +REAL(KIND=JPRBT) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRBT) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 +REAL(KIND=JPRBT) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) + +REAL(KIND=JPRBT) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms +REAL(KIND=JPRBT) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) +REAL(KIND=JPRBT) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN + +REAL(KIND=JPRBT) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes +REAL(KIND=JPRBT) ,ALLOCATABLE :: RACTHE2(:)! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes +END TYPE FIELDS_TYPE + +!flat copies of the above +REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RN(:) ! n (to avoid integer to real conversion) +REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator +REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator + +TYPE(FIELDS_TYPE),ALLOCATABLE,TARGET :: FIELDS_RESOL(:) +TYPE(FIELDS_TYPE),POINTER :: F + +! scratch arrays for ltinv and ltdir and associated dimension variables + +REAL(KIND=JPRBT),ALLOCATABLE :: ZAA(:,:,:) !! JPRL for 1/2 +REAL(KIND=JPRBT),ALLOCATABLE :: ZAS(:,:,:) !! JPRL for 1/2 + +REAL(KIND=JPRBT), POINTER :: IZBA(:,:,:) !! JPRL for 1/2 +!!origSam REAL(KIND=JPRBT),ALLOCATABLE,TARGET :: IZBS(:,:,:) !! JPRL for 1/2 +REAL(KIND=JPRBT),ALLOCATABLE :: IZBS(:) !! from working RAPS +REAL(KIND=JPRBT),ALLOCATABLE :: IZCA(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: IZCS(:,:,:) +!REAL(KIND=JPRBT),ALLOCATABLE :: IZCAT(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: IZCST(:) + +REAL(KIND=JPRBT),ALLOCATABLE :: DZBA(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: DZBS(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: DZBAT(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: DZBST(:) !! JPRL for 1/2 +REAL(KIND=JPRBT),ALLOCATABLE :: DZCA(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: DZCS(:,:,:) +!REAL(KIND=JPRBT),POINTER :: DZCAT(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: DZCAT(:) +REAL(KIND=JPRBT),ALLOCATABLE,TARGET :: DZCST(:) + +! Arrays used for rescaling to allow half-precision Legende transforms +REAL(KIND=JPRBT), ALLOCATABLE :: ZAMAX(:,:) +REAL(KIND=JPRBT), ALLOCATABLE :: ZSMAX(:,:) + +! for m=0 in ledir_mod: +REAL(KIND=JPRD),ALLOCATABLE :: ZAA0(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZAS0(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: DZBST0(:) +REAL(KIND=JPRD),ALLOCATABLE :: DZCAT0(:) +REAL(KIND=JPRD),ALLOCATABLE :: DZCST0(:) +INTEGER(KIND=JPIM) :: KMLOC0 + +INTEGER(KIND=JPIM) :: LDZAA +INTEGER(KIND=JPIM) :: LDZAS +INTEGER(KIND=JPIM) :: TDZAA +INTEGER(KIND=JPIM) :: TDZAS + +INTEGER(KIND=JPIM) :: ILDZBA +INTEGER(KIND=JPIM) :: ILDZBS +INTEGER(KIND=JPIM) :: ILDZCA +INTEGER(KIND=JPIM) :: ILDZCS + + + +INTEGER(KIND=JPIM) :: DLDZBA +INTEGER(KIND=JPIM) :: DLDZBS +INTEGER(KIND=JPIM) :: DLDZCA +INTEGER(KIND=JPIM) :: DLDZCS + +! enable calling setup_trans with a different set of fields than inv_trans and dir_trans: +! IF_FS_INV0: size used for the allocation in setup_trans +! IF_FS_INV: size used in inv_trans and dir_Trans, needs to be <= IF_FS_INV0 +INTEGER(KIND=JPIM) :: IF_FS_INV, IF_FS_INV0 +INTEGER(KIND=JPIM) :: IF_FS_DIR, IF_FS_DIR0 +INTEGER(KIND=JPIM) :: NFLEV, NFLEV0 +INTEGER(KIND=JPIM) :: ITDZBA, ITDZBA0 +INTEGER(KIND=JPIM) :: ITDZBS, ITDZBS0 +INTEGER(KIND=JPIM) :: DTDZBA, DTDZBA0 +INTEGER(KIND=JPIM) :: DTDZBS, DTDZBS0 +INTEGER(KIND=JPIM) :: DTDZCA, DTDZCA0 +INTEGER(KIND=JPIM) :: DTDZCS, DTDZCS0 +INTEGER(KIND=JPIM) :: ITDZCA, ITDZCA0 +INTEGER(KIND=JPIM) :: ITDZCS, ITDZCS0 + +REAL(KIND=JPRB),ALLOCATABLE, TARGET :: ZIA(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZEPSNM(:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZSOA1(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZAOA1(:,:,:) +INTEGER(KIND=JPIM),ALLOCATABLE :: ISTAN(:,:) +INTEGER(KIND=JPIM),ALLOCATABLE :: ISTAS(:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZSIA(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZAIA(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE, TARGET :: ZOA1(:,:,:) +REAL(KIND=JPRBT),ALLOCATABLE, TARGET :: ZOA2(:,:,:) + +END MODULE TPM_FIELDS diff --git a/src/trans/gpu/internal/tpm_flt.F90 b/src/trans/gpu/internal/tpm_flt.F90 new file mode 100755 index 0000000..a3d7e11 --- /dev/null +++ b/src/trans/gpu/internal/tpm_flt.F90 @@ -0,0 +1,67 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FLT + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT, JPRD +USE BUTTERFLY_ALG_MOD,ONLY : BUTTERFLY_STRUCT +USE SEEFMM_MIX +IMPLICIT NONE + +SAVE + + +TYPE FLT_TYPE +INTEGER(KIND=JPIM) :: NSPOLEGL +INTEGER(KIND=JPIM) :: NDGNH +INTEGER(KIND=JPIM) :: INS2 +INTEGER(KIND=JPIM) :: INA2 +REAL(KIND=JPRBT) ,POINTER :: RPNMS(:,:) ! Legendre polynomials +REAL(KIND=JPRBT) ,POINTER :: RPNMA(:,:) ! Legendre polynomials +REAL(KIND=JPRD) ,POINTER :: RPNMDS(:,:) ! Legendre polynomials +REAL(KIND=JPRD) ,POINTER :: RPNMDA(:,:) ! Legendre polynomials +REAL(KIND=JPRBT) :: RCS +REAL(KIND=JPRBT) :: RCA +!REAL(KIND=JPRBT) ,POINTER :: RPNMCDO(:,:) ! Legendre polynomials for C-D formula at orig roots +!REAL(KIND=JPRBT) ,POINTER :: RPNMCDD(:,:) ! Legendre polynomials for C-D formula at dual roots +REAL(KIND=JPRBT) ,POINTER :: RPNMWI(:,:) ! special weights +REAL(KIND=JPRBT) ,POINTER :: RPNMWO(:,:) ! special weights +INTEGER(KIND=JPIM) :: ISLD ! starting latitude dual + +! Butterfly + +INTEGER(KIND=JPIM) :: MAXCOLS +TYPE(BUTTERFLY_STRUCT) :: YBUT_STRUCT_S,YBUT_STRUCT_A + +END TYPE FLT_TYPE + +TYPE FLT_TYPE_WRAP +TYPE(FLT_TYPE),ALLOCATABLE :: FA(:) +LOGICAL :: LDLL +LOGICAL :: LSHIFTLL +LOGICAL :: LUSEFLT +LOGICAL :: LUSE_BELUSOV +LOGICAL :: LKEEPRPNM +LOGICAL :: LSOUTHPNM ! .TRUE. to compute Legendre polynomials on southern hemisphere +INTEGER(KIND=JPIM) :: IMLOC +INTEGER(KIND=JPIM) :: ITHRESHOLD +INTEGER(KIND=JPIM) :: NDGNHD ! dual set dimension +INTEGER(KIND=JPIM) :: NDLON ! dual number of longitudes +INTEGER(KIND=JPIM) :: NDGL ! dual number of latitudes +LOGICAL :: LSYM +TYPE(FMM_TYPE),POINTER :: FMM_INTI ! FMM interpolation + +END TYPE FLT_TYPE_WRAP + +TYPE(FLT_TYPE_WRAP),ALLOCATABLE,TARGET :: FLT_RESOL(:) +TYPE(FLT_TYPE_WRAP),POINTER :: S + + +END MODULE TPM_FLT diff --git a/src/trans/gpu/internal/tpm_gen.F90 b/src/trans/gpu/internal/tpm_gen.F90 new file mode 100755 index 0000000..cf38f74 --- /dev/null +++ b/src/trans/gpu/internal/tpm_gen.F90 @@ -0,0 +1,45 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_GEN + +! Module for general control variables. + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +IMPLICIT NONE + +SAVE + +INTEGER(KIND=JPIM) :: NOUT ! Unit number for "standard" output +INTEGER(KIND=JPIM) :: NERR ! Unit number for error messages +INTEGER(KIND=JPIM) :: NPRINTLEV ! Printing level, 0=no print, 1=standard,2=debug + +INTEGER(KIND=JPIM) :: MSETUP0 = 0 ! Control of setup calls +INTEGER(KIND=JPIM) :: NMAX_RESOL = 0 ! Maximum allowed number of resolutions +INTEGER(KIND=JPIM) :: NCUR_RESOL = 0 ! Current resolution +INTEGER(KIND=JPIM) :: NDEF_RESOL = 0 ! Number of defined resolutions +INTEGER(KIND=JPIM) :: NPROMATR ! Packet size for transform (in no of fields) + ! NPROMATR=0 means do all fields together (dflt) + +LOGICAL :: LALLOPERM ! Allocate some shared data structures permanently +LOGICAL :: LMPOFF ! true: switch off message passing +LOGICAL :: LSYNC_TRANS ! true: activate barriers in trmtol and trltom + +! Use of synchronization/blocking in Transpose (some networks do get flooded) +! 0 = Post IRECVs up-front, use ISENDs, use WAITANY to recv data (default) +! 1 = Use ISENDs, use blocking RECVs, add barrier at the end of each cycle +! 2 = Use buffered SENDs, use blocking RECVs, add barrier at the end of each cycle +INTEGER(KIND=JPIM) :: NTRANS_SYNC_LEVEL = 0 + +LOGICAL, ALLOCATABLE :: LENABLED(:) ! true: the resolution is enabled (it has been + ! initialised and has not been released afterward) + +END MODULE TPM_GEN diff --git a/src/trans/gpu/internal/tpm_geometry.F90 b/src/trans/gpu/internal/tpm_geometry.F90 new file mode 100755 index 0000000..2dbda55 --- /dev/null +++ b/src/trans/gpu/internal/tpm_geometry.F90 @@ -0,0 +1,44 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_GEOMETRY + +! Module containing data describing Gaussian grid. + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +IMPLICIT NONE + +SAVE + +TYPE GEOM_TYPE +INTEGER(KIND=JPIM),ALLOCATABLE :: NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL +INTEGER(KIND=JPIM),ALLOCATABLE :: NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER +INTEGER(KIND=JPIM),ALLOCATABLE :: NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES +! FOR A GIVEN WAVE NUMBER M + +LOGICAL :: LAM ! LAM geometry if T, Global geometry if F +LOGICAL :: LREDUCED_GRID ! Reduced Gaussian grid if T +! quadratic Gaussian grid otherwise. +REAL(KIND=JPRBT) :: RSTRET ! Stretching factor (for Legendre polynomials +! computed on stretched latitudes only) +END TYPE GEOM_TYPE + +!flat copies of the above +INTEGER(KIND=JPIM),ALLOCATABLE :: G_NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES +INTEGER(KIND=JPIM),ALLOCATABLE :: G_NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER +INTEGER(KIND=JPIM) :: G_NMEN_MAX +INTEGER(KIND=JPIM),ALLOCATABLE :: G_NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL +INTEGER(KIND=JPIM) :: G_NLOEN_MAX + +TYPE(GEOM_TYPE),ALLOCATABLE,TARGET :: GEOM_RESOL(:) +TYPE(GEOM_TYPE),POINTER :: G + +END MODULE TPM_GEOMETRY diff --git a/src/trans/gpu/internal/tpm_pol.F90 b/src/trans/gpu/internal/tpm_pol.F90 new file mode 100755 index 0000000..27eaa82 --- /dev/null +++ b/src/trans/gpu/internal/tpm_pol.F90 @@ -0,0 +1,123 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_POL + +! MODIFICATIONS. +! -------------- +! R. El Khatib 17-Feb-2016 Optional allocation/computation of DDC/DDD/DDE +! since they are (big and) not used in supolf. + +USE PARKIND1 ,ONLY : JPRD, JPIM + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRD),ALLOCATABLE :: DDC(:,:), DDD(:,:), DDE(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: DDA(:), DDI(:), DDH(:) + +REAL(KIND=JPRD),ALLOCATABLE :: DFA(:), DFB(:), DFF(:), DFG(:), DFI(:), DFH(:) + +CONTAINS +!====================================================================== +SUBROUTINE INI_POL(KNSMAX,LDFAST) + +INTEGER(KIND=JPIM), INTENT(IN) :: KNSMAX +LOGICAL, INTENT(IN), OPTIONAL :: LDFAST + +REAL(KIND=JPRD) :: DA,DC,DD,DE +INTEGER(KIND=JPIM) :: KKN, KKM + +INTEGER(KIND=JPIM) :: JN, JM +LOGICAL :: LLFAST + +DC(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN+KKM-1,JPRD)& + &*REAL(KKN+KKM-3,JPRD))& + &/ (REAL(2*KKN-3,JPRD)*REAL(KKN+KKM,JPRD)& + &*REAL(KKN+KKM-2,JPRD)) ) +DD(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN+KKM-1,JPRD)& + &*REAL(KKN-KKM+1,JPRD))& + &/ (REAL(2*KKN-1,JPRD)*REAL(KKN+KKM,JPRD)& + &*REAL(KKN+KKM-2,JPRD)) ) +DE(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN-KKM,JPRD))& + &/ (REAL(2*KKN-1,JPRD)*REAL(KKN+KKM,JPRD)) ) +DA(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN-KKM,JPRD)& + &*REAL(KKN+KKM,JPRD))& + &/ REAL(2*KKN-1,JPRD) ) + +IF (PRESENT(LDFAST)) THEN + LLFAST=LDFAST +ELSE + LLFAST=.FALSE. +ENDIF +IF (.NOT.LLFAST) ALLOCATE( DDC(0:KNSMAX,0:KNSMAX) ) +IF (.NOT.LLFAST) ALLOCATE( DDD(0:KNSMAX,0:KNSMAX) ) +IF (.NOT.LLFAST) ALLOCATE( DDE(0:KNSMAX,0:KNSMAX) ) + +ALLOCATE( DDA(0:KNSMAX) ) +ALLOCATE( DDI(0:KNSMAX) ) +ALLOCATE( DDH(0:KNSMAX) ) + +ALLOCATE( DFA(0:KNSMAX) ) +ALLOCATE( DFB(0:KNSMAX) ) +ALLOCATE( DFF(0:KNSMAX) ) +ALLOCATE( DFG(0:KNSMAX) ) +ALLOCATE( DFI(0:KNSMAX) ) +ALLOCATE( DFH(0:KNSMAX) ) + + +DO JN=1,KNSMAX + DFA(JN) = 1._JPRD/SQRT(REAL(JN*(JN+1),JPRD)) + DFB(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(JN*(JN+1),JPRD)) + DFF(JN) = REAL(2*JN-1,JPRD)/REAL(JN,JPRD) + DFG(JN) = REAL(JN-1,JPRD)/REAL(JN,JPRD) + DFI(JN) = REAL(JN,JPRD) + DFH(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(2*JN,JPRD)) +ENDDO + +IF (.NOT.LLFAST) THEN + DO JN=3,KNSMAX + DO JM=2,JN-1 + DDC(JM,JN) = DC(JN,JM) + DDD(JM,JN) = DD(JN,JM) + DDE(JM,JN) = DE(JN,JM) + ENDDO + ENDDO +ENDIF + +DO JN=1,KNSMAX + DDA(JN) = 1._JPRD/SQRT(REAL(JN*(JN+1),JPRD)) + DDI(JN) = REAL(JN,JPRD) + DDH(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(2*JN,JPRD)) +ENDDO + +END SUBROUTINE INI_POL + +SUBROUTINE END_POL + +IF (ALLOCATED (DDC) ) DEALLOCATE( DDC ) +IF (ALLOCATED (DDD) ) DEALLOCATE( DDD ) +IF (ALLOCATED (DDE) ) DEALLOCATE( DDE ) + +DEALLOCATE( DDA ) +DEALLOCATE( DDI ) +DEALLOCATE( DDH ) + +DEALLOCATE( DFA ) +DEALLOCATE( DFB ) +DEALLOCATE( DFF ) +DEALLOCATE( DFG ) +DEALLOCATE( DFI ) +DEALLOCATE( DFH ) + +END SUBROUTINE END_POL + +END MODULE TPM_POL diff --git a/src/trans/gpu/internal/tpm_trans.F90 b/src/trans/gpu/internal/tpm_trans.F90 new file mode 100755 index 0000000..47b41a2 --- /dev/null +++ b/src/trans/gpu/internal/tpm_trans.F90 @@ -0,0 +1,67 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_TRANS + +! Module to contain variables "local" to a specific call to a transform + +! +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + +IMPLICIT NONE + +SAVE + +!INTEGER_M :: NF_UV ! Number of u-v fields (spectral/fourier space) +!INTEGER_M :: NF_SCALARS ! Number of scalar fields (spectral/fourier space) +!INTEGER_M :: NF_SCDERS ! Number of fields for derivatives of scalars + ! (inverse transform, spectral/fourier space) +!INTEGER_M :: NF_OUT_LT ! Number of fields that comes out of Inverse + ! Legendre transform +INTEGER(KIND=JPIM) :: NF_SC2 ! Number of fields in "SPSC2" arrays. +INTEGER(KIND=JPIM) :: NF_SC3A ! Number of fields in "SPSC3A" arrays. +INTEGER(KIND=JPIM) :: NF_SC3B ! Number of fields in "SPSC3B" arrays. + +!LOGICAL :: LUV ! uv fields requested +!LOGICAL :: LSCALAR ! scalar fields requested +LOGICAL :: LVORGP ! vorticity requested +LOGICAL :: LDIVGP ! divergence requested +LOGICAL :: LUVDER ! E-W derivatives of U and V requested +LOGICAL :: LSCDERS ! derivatives of scalar variables are req. +LOGICAL :: LATLON ! lat-lon output requested + +!INTEGER_M :: NLEI2 ! 8*NF_UV + 2*NF_SCALARS + 2*NF_SCDERS (dimension in + ! inverse Legendre transform) +!INTEGER_M :: NLED2 ! 2*NF_FS (dimension in direct Legendre transform) + +!INTEGER_M :: NF_FS ! Total number of fields in Fourier space + +!INTEGER_M :: NF_GP ! Total number of field in grid-point space +!INTEGER_M :: NF_UV_G ! Global version of NF_UV (grid-point space) +!INTEGER_M :: NF_SCALARS_G ! Global version of NF_SCALARS (grid-point space) + +REAL(KIND=JPRBT), ALLOCATABLE :: FOUBUF_IN(:) ! Fourier buffer +REAL(KIND=JPRBT), ALLOCATABLE :: FOUBUF(:) ! Fourier buffer + +INTEGER(KIND=JPIM) :: NPROMA ! Blocking factor for gridpoint input/output +INTEGER(KIND=JPIM) :: NGPBLKS ! Number of NPROMA blocks + +LOGICAL :: LGPNORM = .FALSE. ! indicates whether transform is being done for gpnorm + +REAL(KIND=JPRBT),ALLOCATABLE,TARGET :: ZGTF(:,:) +REAL(KIND=JPRBT),ALLOCATABLE,TARGET :: ZGTFTMP(:,:) + +REAL(KIND=JPRBT),ALLOCATABLE :: ZAVE(:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZMINGL(:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZMAXGL(:,:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZMINGPN(:) +REAL(KIND=JPRBT),ALLOCATABLE :: ZMAXGPN(:) + +END MODULE TPM_TRANS diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 new file mode 100755 index 0000000..1b81c6b --- /dev/null +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -0,0 +1,1638 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRGTOL_MOD + CONTAINS + SUBROUTINE TRGTOL_CUDAAWARE(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED) + + !**** *TRGTOL * - transposition of grid point data from column + ! structure to latitudinal. Reorganize data between + ! grid point calculations and direct Fourier Transform + + ! Version using CUDA-aware MPI + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trgtol(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (output) + ! PGP - Blocked grid point data (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original: 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow + ! NPRTRV to differ from NPRGPEW + ! : 98-06-17 add mailbox control logic (from TRLTOM) + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! KINDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of GTOL_PACK,GTOL_UNPACK + ! 03-04-02 G. Radnoti: call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_BARRIER + + USE TPM_GEN ,ONLY : NOUT, LSYNC_TRANS + USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL, & + & MYSETV, MYSETW, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + USE MPL_DATA_MODULE ,ONLY : MPL_COMM_OML + USE OML_MOD ,ONLY : OML_MY_THREAD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! +#ifdef ACCGPU + USE MPI +#endif + + IMPLICIT NONE + +#ifdef OMPGPU + include 'mpif.h' +#endif + + REAL(KIND=JPRBT),INTENT(OUT) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSED ! key indicating whether PGLAT is transposed, i.e. (nlon*nlat,nfld) instead of (nfld,nlon*nlat) + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + INTEGER(KIND=JPIM) :: ICOMBUFS_FLD(NPROC),ICOMBUFR_FLD(NPROC) + REAL(KIND=JPRBT) :: ZDUM(2) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*4) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IGL, IGLL, ILAST,& + &ILASTLAT, ILEN, JROC, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ISENDSET, ITAG, J, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLD, & + &II,INDOFFX,IBUFLENS,IBUFLENR,INRECV, IPROC,IFLDS, & + &INSEND,INS,INR,IR, IUNIT, JKL, JK_MAX + + ! LOCAL LOGICAL SCALARS + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: KINDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(MAX(KF_GP,KF_FS)) + INTEGER(KIND=JPIM) :: ISEND_FLD_TOTAL(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: IRECV_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: IRECVCOUNT,ISENDCOUNT + INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS),IFLDA(KF_GP),JJ,JI,IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM) :: IERROR, irank + + REAL(KIND=JPRBT) :: TIMEF, tc + LOGICAL :: LLTRANSPOSED + +#ifdef PARKINDTRANS_SINGLE +#define TRGTOL_DTYPE MPI_REAL +#else +#define TRGTOL_DTYPE MPI_DOUBLE_PRECISION +#endif + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + + IF (LHOOK) CALL DR_HOOK('TRGTOL_CUDAAWARE',0,ZHOOK_HANDLE) + + iunit=300+myproc + + CALL GSTATS(1805,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY = .TRUE. + IF(PRESENT(PGPUV)) LLPGPUV = .TRUE. + IF(PRESENT(PGP3A)) LLPGP3A = .TRUE. + IF(PRESENT(PGP3B)) LLPGP3B = .TRUE. + IF(PRESENT(PGP2)) LLPGP2 = .TRUE. + IF (PRESENT(LDTRANSPOSED)) LLTRANSPOSED=LDTRANSPOSED + + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + LLUV(:) = .FALSE. + IUVPARS(:) = -99 + IUVLEVS(:) = -99 + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + +#ifdef ACCGPU + !$ACC DATA COPYIN(LLGP2,LLGP3A,LLGP3B,LLUV,IGPTRSEND) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:LLGP2,LLGP3A,LLGP3B,LLUV,IGPTRSEND) +#endif + + ITAG = MTAGGL + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETA + ISEND = JROC + ISENDSET = ISETV + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + ISEND_FLD_TOTAL(JROC) = IPOS + ISENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS + + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(IRECVSET)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(IRECVSET)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + IRECVTOT(JROC) = IPOS*KF_FS + + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(IRECVCOUNT,INRECV)) +#ifdef ACCGPU + !$ACC DATA IF(IBUFLENS > 0) CREATE(ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(IBUFLENS > 0) MAP(ALLOC:ZCOMBUFS) +#endif +#ifdef ACCGPU + !$ACC DATA IF(IBUFLENR > 0) CREATE(ZCOMBUFR) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(IBUFLENR > 0) MAP(ALLOC:ZCOMBUFR) +#endif + +#ifdef OMPGPU + !$OMP TARGET +#endif +#ifdef ACCGPU + !$ACC KERNELS DEFAULT(NONE) COPYIN(IBUFLENS,IBUFLENR) +#endif + IF (IBUFLENS > 0) ZCOMBUFS(:,:) = 0. + IF (IBUFLENR > 0) ZCOMBUFR(:,:) = 0. +#ifdef ACCGPU + !$ACC END KERNELS +#endif +#ifdef OMPGPU + !$OMP END TARGET +#endif + + CALL GSTATS(1805,1) + + ! Send loop............................................................. + + ! Copy local contribution +#ifdef ACCGPU + !$ACC DATA PRESENT(PGLAT) COPYIN(KPTRGP,INDOFF,KINDEX) + !$ACC DATA IF(PRESENT(PGP)) PRESENT(PGP) + !$ACC DATA IF(PRESENT(PGPUV)) PRESENT(PGPUV) COPYIN(IUVLEVS,IUVPARS) + !$ACC DATA IF(PRESENT(PGP2)) PRESENT(PGP2) COPYIN(IGP2PARS) + !$ACC DATA IF(PRESENT(PGP3A)) PRESENT(PGP3A) COPYIN(IGP3ALEVS,IGP3APARS) + !$ACC DATA IF(PRESENT(PGP3B)) PRESENT(PGP3B) COPYIN(IGP3BLEVS,IGP3BPARS) +#endif +#ifdef OMPGPU +!WARNING: following lines should be PRESENT,ALLOC but cause issues with AMD compiler! + !$OMP TARGET DATA MAP(ALLOC:PGLAT) MAP(TO:KPTRGP,INDOFF,KINDEX) + !$OMP TARGET DATA IF(PRESENT(PGP)) MAP(ALLOC:PGP) + !$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(ALLOC:PGPUV) MAP(TO:IUVLEVS,IUVPARS) + !$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(ALLOC:PGP2) MAP(TO:IGP2PARS) + !$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(ALLOC:PGP3A) MAP(TO:IGP3ALEVS,IGP3APARS) + !$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(ALLOC:PGP3B) MAP(TO:IGP3BLEVS,IGP3BPARS) +#endif + + IF(ISENDTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + JK_MAX = 0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + IF (JK_MAX<(ILAST-IFIRST+1)) JK_MAX = (ILAST-IFIRST+1) + ENDIF + ENDDO + +#ifdef ACCGPU + !$ACC DATA COPYIN(IFLDOFF,IGPTROFF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:IFLDOFF,IGPTROFF) +#endif + + CALL GSTATS(1601,0) + IF(LLPGPONLY) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$OMP& SHARED(NGPBLKS,IFLDS,JK_MAX,IGPTRSEND,MYSETW,INDOFF,MYPROC,IGPTROFF,IFLDOFF,KINDEX,PGLAT,PGP) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$ACC& COPYIN(NGPBLKS,IFLDS,JK_MAX,IGPTRSEND,MYSETW,INDOFF,MYPROC,IGPTROFF,IFLDOFF,KINDEX,LLTRANSPOSED) & + !$ACC& PRESENT(PGLAT,PGP) +#endif + DO JBLK=1,NGPBLKS + DO JFLD=1,IFLDS + DO JKL=1, JK_MAX + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + ILAST = IGPTRSEND(2,JBLK,MYSETW) + JK = JKL+IFIRST-1 + IF(IFIRST > 0 .AND. JK <= ILAST) THEN + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IFLD = IFLDOFF(JFLD) + IF ( LLTRANSPOSED ) THEN + PGLAT(KINDEX(IPOS),JFLD) = PGP(JK,IFLD,JBLK) + ELSE + PGLAT(JFLD,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO + ELSE +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$OMP& SHARED(NGPBLKS,IFLDS,JK_MAX,IGPTRSEND,MYSETW,INDOFF,MYPROC,IGPTROFF,IFLDOFF, & + !$OMP& LLUV,LLGP2,LLGP3A,LLGP3B,PGLAT,KINDEX,PGPUV,PGP2,PGP3A,PGP3B,IUVLEVS, & + !$OMP& IUVPARS,IGP2PARS,IGP3ALEVS,IGP3APARS,IGP3BLEVS,IGP3BPARS) +#endif +#ifdef ACCGPU_ASTIBNYDUNADBT + !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$ACC& PRESENT(NGPBLKS,IFLDS,JK_MAX,IGPTRSEND,MYSETW,INDOFF,MYPROC,IGPTROFF,IFLDOFF, & + !$ACC& LLUV,LLGP2,LLGP3A,LLGP3B,PGLAT,KINDEX,PGPUV,PGP2,PGP3A,PGP3B,IUVLEVS, & + !$ACC& IUVPARS,IGP2PARS,IGP3ALEVS,IGP3APARS,IGP3BLEVS,IGP3BPARS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$ACC& COPYIN(NGPBLKS,IFLDS,JK_MAX,IGPTRSEND,MYSETW,INDOFF,MYPROC,IGPTROFF,IFLDOFF, & + !$ACC& LLUV,LLGP2,LLGP3A,LLGP3B,KINDEX,IUVLEVS, & + !$ACC& IUVPARS,IGP2PARS,IGP3ALEVS,IGP3APARS,IGP3BLEVS,IGP3BPARS,LLTRANSPOSED) & + !$ACC& PRESENT(PGLAT,PGPUV,PGP2,PGP3A,PGP3B) +#endif + DO JBLK=1,NGPBLKS + DO JFLD=1,IFLDS + DO JKL=1, JK_MAX + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + ILAST = IGPTRSEND(2,JBLK,MYSETW) + JK = JKL+IFIRST-1 + IF(IFIRST > 0 .AND. JK <= ILAST) THEN + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGLAT(KINDEX(IPOS),JFLD) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ELSE + PGLAT(JFLD,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ENDIF + ELSEIF(LLGP2(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGLAT(KINDEX(IPOS),JFLD) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ELSE + PGLAT(JFLD,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ENDIF + ELSEIF(LLGP3A(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGLAT(KINDEX(IPOS),JFLD) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ELSE + PGLAT(JFLD,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ENDIF + ELSEIF(LLGP3B(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGLAT(KINDEX(IPOS),JFLD) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ELSE + PGLAT(JFLD,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO + ENDIF + CALL GSTATS(1601,1) + +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + ENDIF + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() +#endif + !....Pack loop......................................................... + + CALL GSTATS(1602,0) + DO INS=1,INSEND + ISEND=JSEND(INS) + CALL PE2SET(ISEND,ISETA,ISETB,ISETW,ISETV) + ISENDSET = ISETV + ISEND_FLD_END = ISEND_FLD_TOTAL(ISEND) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + JK_MAX = 0 + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IJPOS(JBLK)=IPOS + IPOS = IPOS+ILAST-IFIRST+1 + IF (JK_MAX<(ILAST-IFIRST+1)) JK_MAX = (ILAST-IFIRST+1) + ENDIF + ENDDO + + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) DEFAULT(NONE) PRIVATE(JK,JI,IFLDT,IFIRST,ILAST) & + !$OMP& SHARED(ISEND_FLD_END,NGPBLKS,IFLDA,ISETW,IPOS,JK_MAX,IGPTRSEND,IJPOS,LLINDER,LLPGPONLY, & + !$OMP& LLUV,LLGP2,LLGP3A,LLGP3B,ZCOMBUFS,INS,KPTRGP,PGP,PGPUV,PGP2,PGP3A,PGP3B,IUVLEVS, & + !$OMP& IUVPARS,IGP2PARS,IGP3ALEVS,IGP3APARS,IGP3BLEVS,IGP3BPARS) +#endif +#ifdef ACCGPU_ATBNIADADBYODQ + !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(JK,JI,IFLDT,IFIRST,ILAST) COPYIN(INS,JK_MAX,IJPOS,IFLDA) & + !$ACC& PRESENT(ISEND_FLD_END,NGPBLKS,IFLDA,ISETW,IPOS,JK_MAX,IGPTRSEND,IJPOS,LLINDER,LLPGPONLY, & + !$ACC& LLUV,LLGP2,LLGP3A,LLGP3B,ZCOMBUFS,INS,KPTRGP,PGP,PGPUV,PGP2,PGP3A,PGP3B,IUVLEVS, & + !$ACC& IUVPARS,IGP2PARS,IGP3ALEVS,IGP3APARS,IGP3BLEVS,IGP3BPARS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(JK,JI,IFLDT,IFIRST,ILAST) COPYIN(INS,JK_MAX,IJPOS,IFLDA) & + !$ACC& COPYIN(ISEND_FLD_END,NGPBLKS,IFLDA,ISETW,IPOS,JK_MAX,IGPTRSEND,IJPOS,LLINDER,LLPGPONLY, & + !$ACC& LLUV,LLGP2,LLGP3A,LLGP3B,INS,KPTRGP,IUVLEVS, & + !$ACC& IUVPARS,IGP2PARS,IGP3ALEVS,IGP3APARS,IGP3BLEVS,IGP3BPARS) & + !$ACC& PRESENT(ZCOMBUFS,PGP,PGPUV,PGP2,PGP3A,PGP3B) +#endif + DO JJ=1,ISEND_FLD_END + DO JBLK=1,NGPBLKS + DO JKL=1, JK_MAX + IFLDT=IFLDA(JJ) + IFIRST = IGPTRSEND(1,JBLK,ISETW) + ILAST = IGPTRSEND(2,JBLK,ISETW) + JK = JKL+IFIRST-1 + JI=(JJ-1)*IPOS+IJPOS(JBLK)+JKL + IF(IFIRST > 0 .AND. JK <= ILAST) THEN + IF(LLINDER) THEN + ZCOMBUFS(JI,INS) = PGP(JK,KPTRGP(IFLDT),JBLK) + ELSEIF(LLPGPONLY) THEN + ZCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) + ELSEIF(LLUV(IFLDT)) THEN + ZCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) + ELSEIF(LLGP2(IFLDT)) THEN + ZCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) + ELSEIF(LLGP3A(IFLDT)) THEN + ZCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) + ELSEIF(LLGP3B(IFLDT)) THEN + ZCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO + + ICOMBUFS_FLD(INS) = IFLD + ENDDO + + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRBT + CALL MPI_COMM_RANK(MPI_COMM_WORLD, IRANK, IERROR) + !IF(irank==0) WRITE(*,*) "packing (trgtol) in sec: ", Tc +#endif + + CALL GSTATS(1602,1) + + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(761) + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',1,ZHOOK_HANDLE_BAR) + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,0) + ELSE + CALL GSTATS(804,0) + ENDIF + IR=0 + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() +#endif + IF (LSYNC_TRANS) THEN + CALL GSTATS(423,0) + CALL MPL_BARRIER(CDSTRING='TRGTOL BARRIER') + CALL GSTATS(423,1) + ENDIF + CALL GSTATS(413,0) + +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZCOMBUFR,ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZCOMBUFR,ZCOMBUFS) +#endif + ! Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + CALL MPI_IRECV(ZCOMBUFR(1:IRECVTOT(IRECV),INR),IRECVTOT(IRECV), & + & TRGTOL_DTYPE,NPRCIDS(IRECV)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + IR=IR+1 + CALL MPI_IRECV(ICOMBUFR_FLD(INR),1, & + & MPI_INTEGER,NPRCIDS(IRECV)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + ENDDO + + !....Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + CALL MPI_ISEND(ZCOMBUFS(1:ISENDTOT(ISEND),INS),ISENDTOT(ISEND), & + & TRGTOL_DTYPE,NPRCIDS(ISEND)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + IR=IR+1 + CALL MPI_ISEND(ICOMBUFS_FLD(INS),1, & + & MPI_INTEGER,NPRCIDS(ISEND)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + + IF(IR > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='TRGTOL_CUDAAWARE: WAIT FOR SENDS AND RECEIVES') + ENDIF + CALL GSTATS(413,1) + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=(TIMEF()-Tc)/1000.0_JPRBT + ! !IF(irank==0) WRITE(*,*) "non-CUDA-aware isend/irecv (trgtol) in sec: ", Tc + !#endif + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,1) + ELSE + CALL GSTATS(804,1) + ENDIF + CALL GSTATS_BARRIER2(761) + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=TIMEF() + !#endif + ! Unpack loop......................................................... + + CALL GSTATS(1603,0) + + + DO INR=1,INRECV + IRECV=JRECV(INR) + ILEN = IRECVTOT(IRECV)/KF_FS + IRECV_FLD_END = ICOMBUFR_FLD(INR) +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) DEFAULT(NONE) PRIVATE(II) & + !$OMP& MAP(TO:IRECV,ILEN,IRECV_FLD_END) & + !$OMP& SHARED(IRECV_FLD_END,ILEN,KINDEX,INDOFF,IRECV,ZCOMBUFR,PGLAT,INR) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) DEFAULT(NONE) PRIVATE(II) COPYIN(IRECV,ILEN,IRECV_FLD_END) & + !$ACC& COPYIN(IRECV_FLD_END,ILEN,IRECV,INR,LLTRANSPOSED) + !$ACC& PRESENT(KINDEX,ZCOMBUFR,PGLAT) + +#endif + DO JFLD=1,IRECV_FLD_END + DO JL=1,ILEN + II = KINDEX(INDOFF(IRECV)+JL) + IF ( LLTRANSPOSED ) THEN + PGLAT(II,JFLD) = ZCOMBUFR(JL+(JFLD-1)*ILEN,INR) + ELSE + PGLAT(JFLD,II) = ZCOMBUFR(JL+(JFLD-1)*ILEN,INR) + ENDIF + ENDDO + ENDDO + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=(TIMEF()-Tc)/1000.0_JPRBT + ! !IF(irank==0) WRITE(*,*) "unpacking (trgtol) in sec: ", Tc + !#endif + + CALL GSTATS(1603,1) + +#ifdef OMPGPU + !$OMP END TARGET DATA !! PRESENT(PGP3B) + !$OMP END TARGET DATA !! PRESENT(PGP3A) + !$OMP END TARGET DATA !! PRESENT(PGP2) + !$OMP END TARGET DATA !! PRESENT(PGPUV) + !$OMP END TARGET DATA !! PRESENT(PGP) + !$OMP END TARGET DATA !! PRESENT(PGLAT) +#endif + +#ifdef OMPGPU + !$OMP END TARGET DATA !! ZCOMBUFS + !$OMP END TARGET DATA !! ZCOMBUFS +#endif + +#ifdef ACCGPU + !$ACC END DATA !! PRESENT(PGP3B) + !$ACC END DATA !! PRESENT(PGP3A) + !$ACC END DATA !! PRESENT(PGP2) + !$ACC END DATA !! PRESENT(PGPUV) + !$ACC END DATA !! PRESENT(PGP) + !$ACC END DATA !! PRESENT(PGLAT) +#endif + +#ifdef ACCGPU + !$ACC END DATA !! ZCOMBUFS + !$ACC END DATA !! ZCOMBUFS +#endif + + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRGTOL_CUDAAWARE',1,ZHOOK_HANDLE) + + END SUBROUTINE TRGTOL_CUDAAWARE + + SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED) + + !**** *TRGTOL * - transposition of grid point data from column + ! structure to latitudinal. Reorganize data between + ! grid point calculations and direct Fourier Transform + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trgtol(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (output) + ! PGP - Blocked grid point data (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original: 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow + ! NPRTRV to differ from NPRGPEW + ! : 98-06-17 add mailbox control logic (from TRLTOM) + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! KINDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of GTOL_PACK,GTOL_UNPACK + ! 03-04-02 G. Radnoti: call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL, & + & MYSETV, MYSETW, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! +#ifdef ACCGPU + USE MPI +#endif + + IMPLICIT NONE + + REAL(KIND=JPRBT),INTENT(OUT) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSED + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + REAL(KIND=JPRBT) :: ZDUM(2) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*2) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IGL, IGLL, ILAST,& + &ILASTLAT, ILEN, JROC, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ISENDSET, ITAG, J, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLD, & + &II,INDOFFX,IBUFLENS,IBUFLENR,INRECV, IPROC,IFLDS, & + &INSEND,INS,INR,IR, iunit + + ! LOCAL LOGICAL SCALARS + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: KINDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(MAX(KF_GP,KF_FS)) + INTEGER(KIND=JPIM) :: ISEND_FLD_TOTAL(NPROC),ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: IRECVCOUNT,ISENDCOUNT + INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS),IFLDA(KF_GP),JJ,JI,IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM) :: IERROR, irank + LOGICAL :: LLTRANSPOSED + + REAL(KIND=JPRBT) :: TIMEF, tc + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + + IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) + + + LLTRANSPOSED=.FALSE. + IF (PRESENT(LDTRANSPOSED)) LLTRANSPOSED=LDTRANSPOSED + + iunit=300+myproc + + CALL GSTATS(1805,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY = .TRUE. + IF(PRESENT(PGPUV)) LLPGPUV = .TRUE. + IF(PRESENT(PGP3A)) LLPGP3A = .TRUE. + IF(PRESENT(PGP3B)) LLPGP3B = .TRUE. + IF(PRESENT(PGP2)) LLPGP2 = .TRUE. + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + LLUV(:) = .FALSE. + IUVPARS(:) = -99 + IUVLEVS(:) = -99 + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + + ITAG = MTAGGL + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETA + ISEND = JROC + ISENDSET = ISETV + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + ISEND_FLD_TOTAL(JROC) = IPOS + ISENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS + + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(IRECVSET)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(IRECVSET)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + IRECVTOT(JROC) = IPOS*KF_FS + + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(-1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(-1:IRECVCOUNT,INRECV)) + + CALL GSTATS(1805,1) + + ! Send loop............................................................. + + ! Copy local contribution + + IF(ISENDTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + CALL GSTATS(1601,0) + + +IF (.NOT. LLTRANSPOSED) THEN + + +#ifdef NECSX + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#endif + + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + PGLAT(JFLD,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ENDDO + ELSE + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO + +ELSE + + + + +#ifdef NECSX + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#endif + + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + PGLAT(KINDEX(IPOS),JFLD) = PGP(JK,IFLD,JBLK) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(KINDEX(IPOS),JFLD) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(KINDEX(IPOS),JFLD) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(KINDEX(IPOS),JFLD) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(KINDEX(IPOS),JFLD) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ENDDO + ELSE + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO +ENDIF + + CALL GSTATS(1601,1) + + ENDIF + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() +#endif + !....Pack loop......................................................... + + ISEND_FLD_START=1 + CALL GSTATS(1602,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,JBLK,IFIRST,ILAST,JK,JJ,JI,& + !$OMP& INS,ISEND,ISETA,ISETB,ISETW,ISETV,ISENDSET,ISEND_FLD_END,IFLD,IPOS,& + !$OMP& IFLDA,JFLD,IJPOS) + DO INS=1,INSEND + ISEND=JSEND(INS) + CALL PE2SET(ISEND,ISETA,ISETB,ISETW,ISETV) + ISENDSET = ISETV + ISEND_FLD_END = ISEND_FLD_TOTAL(ISEND) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IJPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + + DO JJ=ISEND_FLD_START(ISEND),ISEND_FLD_END + IFLDT=IFLDA(JJ) + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,KPTRGP(IFLDT),JBLK) + ENDDO + ELSE + IF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) + ENDDO + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + + IPOS=(ISEND_FLD_END-ISEND_FLD_START(ISEND)+1)*IPOS + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = IFLD + ENDDO + !$OMP END PARALLEL DO +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRBT + CALL MPI_COMM_RANK(MPI_COMM_WORLD, IRANK, IERROR) + !IF(irank==0) WRITE(*,*) "packing (trgtol) in sec: ", Tc +#endif + + CALL GSTATS(1602,1) + + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(761) + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',1,ZHOOK_HANDLE_BAR) + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,0) + ELSE + CALL GSTATS(804,0) + ENDIF + IR=0 + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() +#endif + ! Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRLTOG:' ) + !print*,irank,size(ZCOMBUFR(-1:IRECVTOT(IRECV),INR)) + ENDDO + + !....Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + CALL MPL_SEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRGTOL:' ) + ENDDO + + IF(IR > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='TRGTOL: WAIT FOR SENDS AND RECEIVES') + ENDIF + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=(TIMEF()-Tc)/1000.0_JPRBT + ! !IF(irank==0) WRITE(*,*) "non-CUDA-aware isend/irecv (trgtol) in sec: ", Tc + !#endif + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,1) + ELSE + CALL GSTATS(804,1) + ENDIF + CALL GSTATS_BARRIER2(761) + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=TIMEF() + !#endif + ! Unpack loop......................................................... + + CALL GSTATS(1603,0) + +IF (.not. LLTRANSPOSED) THEN + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD,INR,IRECV,ILEN,IRECV_FLD_START,IRECV_FLD_END,IPOS) + DO INR=1,INRECV + IRECV=JRECV(INR) + ILEN = IRECVTOT(IRECV)/KF_FS + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + DO JFLD=IRECV_FLD_START,IRECV_FLD_END + DO JL=1,ILEN + II = KINDEX(INDOFF(IRECV)+JL) + PGLAT(JFLD,II) = ZCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO +ELSE + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD,INR,IRECV,ILEN,IRECV_FLD_START,IRECV_FLD_END,IPOS) + DO INR=1,INRECV + IRECV=JRECV(INR) + ILEN = IRECVTOT(IRECV)/KF_FS + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + DO JFLD=IRECV_FLD_START,IRECV_FLD_END + DO JL=1,ILEN + II = KINDEX(INDOFF(IRECV)+JL) + PGLAT(II,JFLD) = ZCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO +ENDIF + + ! this appears to be important (otherwise, old data picked in PGLAT) + ! in particular, one would have thought that above ACC copy and update on the + ! device is the same as OMP loop + update device command below, but it seems not, and winds still in field index 1 from prev inv_trans !!! + + +! daand: I think it should be possible to use TRGTOL from CPU (eg for norm computation); hence the ACC UPDATE should be done in the calling routine. +#ifdef ACCGPU + !**$ACC UPDATE DEVICE(PGLAT) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE TO(PGLAT) +#endif +#ifdef ACCGPU + !**$ACC WAIT +#endif + !$OMP BARRIER + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=(TIMEF()-Tc)/1000.0_JPRBT + ! !IF(irank==0) WRITE(*,*) "unpacking (trgtol) in sec: ", Tc + !#endif + + CALL GSTATS(1603,1) + + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) + + END SUBROUTINE TRGTOL + END MODULE TRGTOL_MOD diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 new file mode 100755 index 0000000..6d606c5 --- /dev/null +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -0,0 +1,1636 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRLTOG_MOD + CONTAINS + SUBROUTINE TRLTOG_CUDAAWARE(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED) + + !**** *trltog * - transposition of grid point data from latitudinal + ! to column structure. This takes place between inverse + ! FFT and grid point calculations. + ! TRLTOG is the inverse of TRGTOL + + ! Version using CUDA-aware MPI + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trltog(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (input) + ! PGP - Blocked grid point data (output) + ! KVSET - "v-set" for each field (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow NPRTRV + ! to differ from NPRGPEW + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! INDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of LTOG_PACK,LTOG_UNPACK + ! 03-0-02 G. Radnoti: Call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB , JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_MYRANK, MPL_BARRIER + + USE TPM_GEN ,ONLY : NOUT, LSYNC_TRANS + USE TPM_DISTR ,ONLY : D, MYSETV, MYSETW, MTAGLG, & + & NPRCIDS, NPRTRNS, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + USE MPL_DATA_MODULE ,ONLY : MPL_COMM_OML + USE OML_MOD ,ONLY : OML_MY_THREAD + ! +#ifdef ACCGPU + USE MPI +#endif + + IMPLICIT NONE + +#ifdef OMPGPU + include 'mpif.h' +#endif + + REAL(KIND=JPRBT), INTENT(IN) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSED ! key indicating whether PGLAT is transposed, i.e. (nlon*nlat,nfld) instead of (nfld,nlon*nlat) + + ! LOCAL VARIABLES + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + INTEGER(KIND=JPIM) :: ICOMR_KFFS(NPROC), ICOMS_KFFS(NPROC) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*4) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IFLD, IGL, IGLL,& + &ILAST, ILASTLAT, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ITAG, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLDS, IPROC,JROC, & + &INRECV, INSEND,INR,INS,IR, JKL, JK_MAX + INTEGER(KIND=JPIM) :: II,INDOFFX,ILEN,IBUFLENS,IBUFLENR + + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: KINDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(KF_GP) + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: ISENDCOUNT,IRECVCOUNT,J + INTEGER(KIND=JPIM) :: JPOS(NGPBLKS),IFLDA(KF_GP),JI,JJ + INTEGER(KIND=JPIM) :: IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM), DIMENSION(MPI_STATUS_SIZE,NPROC*2) :: ISTATUS + INTEGER(KIND=JPIM) :: IERROR + + REAL(KIND=JPRBT) :: TIMEF, Tc + LOGICAL :: LLTRANSPOSED + +#ifdef PARKINDTRANS_SINGLE +#define TRLTOG_DTYPE MPI_REAL +#else +#define TRLTOG_DTYPE MPI_DOUBLE_PRECISION +#endif + + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + IF (LHOOK) CALL DR_HOOK('TRLTOG_CUDAAWARE',0,ZHOOK_HANDLE) + +#ifdef gnarls + write (20,*) __FILE__, __LINE__ + write (20,*) 'shape(pglat) = ',shape(pglat) +!$acc data present(pglat) +!$acc update host(pglat) +!$acc end data +write (20,*) 'PGLAT = ' +write (20,'(6E18.8)') transpose(PGLAT) +#endif + + CALL GSTATS(1806,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + LLTRANSPOSED=.FALSE. + IF(PRESENT(KPTRGP)) LLINDER=.TRUE. + IF(PRESENT(PGP)) LLPGPONLY=.TRUE. + IF(PRESENT(PGPUV)) LLPGPUV=.TRUE. + IF(PRESENT(PGP3A)) LLPGP3A=.TRUE. + IF(PRESENT(PGP3B)) LLPGP3B=.TRUE. + IF(PRESENT(PGP2)) LLPGP2=.TRUE. + IF (PRESENT(LDTRANSPOSED)) LLTRANSPOSED=LDTRANSPOSED + + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + + LLUV(:) = .FALSE. + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + ITAG = MTAGLG + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + ISEND = JROC + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + IRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + ISENDTOT(JROC) = IPOS*KF_FS + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(1:IRECVCOUNT,INRECV)) +#ifdef ACCGPU + !$ACC DATA COPYIN(IBUFLENS,IBUFLENR) + !$ACC DATA IF(IBUFLENS > 0) CREATE(ZCOMBUFS) + !$ACC DATA IF(IBUFLENR > 0) CREATE(ZCOMBUFR) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(IBUFLENS > 0) MAP(ALLOC:ZCOMBUFS) + !$OMP TARGET DATA IF(IBUFLENR > 0) MAP(ALLOC:ZCOMBUFR) +#endif + +IF (IBUFLENS > 0) THEN +!$ACC KERNELS DEFAULT(NONE) + ZCOMBUFS(:,:) = 0 +!$ACC END KERNELS +ENDIF +IF (IBUFLENR > 0) THEN +!$ACC KERNELS DEFAULT(NONE) + ZCOMBUFR(:,:) = 0 +!$ACC END KERNELS +ENDIF + +#ifdef ACCGPU + !$ACC DATA & + !$ACC PRESENT(PGLAT) & + !$ACC COPYIN(IGPTRSEND,INDOFF,KINDEX, LLUV,LLGP2,LLGP3A,LLGP3B,KPTRGP) + !$ACC DATA IF(PRESENT(PGP)) COPYOUT(PGP) + !$ACC DATA IF(PRESENT(PGPUV)) COPYOUT(PGPUV) COPYIN(IUVLEVS,IUVPARS) + !$ACC DATA IF(PRESENT(PGP2)) COPYOUT(PGP2) COPYIN(IGP2PARS) + !$ACC DATA IF(PRESENT(PGP3A)) COPYOUT(PGP3A) COPYIN(IGP3APARS,IGP3ALEVS) + !$ACC DATA IF(PRESENT(PGP3B)) COPYOUT(PGP3B) COPYIN(IGP3BPARS, IGP3BLEVS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA & + !$OMP& MAP(PRESENT,ALLOC:PGLAT) & + !$OMP& MAP(TO:IGPTRSEND,INDOFF,KINDEX, LLUV,LLGP2,LLGP3A,LLGP3B,KPTRGP) + !$OMP TARGET DATA IF(PRESENT(PGP)) MAP(FROM:PGP) + !$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(FROM:PGPUV) MAP(TO:IUVLEVS,IUVPARS) + !$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(FROM:PGP2) MAP(TO:IGP2PARS) + !$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(FROM:PGP3A) MAP(TO:IGP3APARS,IGP3ALEVS) + !$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(FROM:PGP3B) MAP(TO:IGP3BPARS, IGP3BLEVS) +#endif + + CALL GSTATS(1806,1) + + ! Copy local contribution + IF( IRECVTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + JK_MAX=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + IF (JK_MAX<(ILAST-IFIRST+1)) JK_MAX = (ILAST-IFIRST+1) + ENDIF + ENDDO + + CALL GSTATS(1604,0) + + IF (LLPGPONLY) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$OMP& MAP(TO:IGPTROFF,IFLDOFF,JK_MAX) COLLAPSE(3) & + !$OMP& SHARED(NGPBLKS,IFLDS,IGPTRSEND,MYSETW,INDOFF,MYPROC, & + !$OMP& IGPTROFF,LLINDER,KPTRGP,PGP,PGLAT,KINDEX,IFLDOFF,JK_MAX) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$ACC& COPYIN(IGPTROFF,IFLDOFF,JK_MAX) COLLAPSE(3) & + !$ACC& COPYIN(NGPBLKS,IFLDS,IGPTRSEND,MYSETW,INDOFF,MYPROC, & + !$ACC& LLINDER,KPTRGP,KINDEX,LLTRANSPOSED) & + !$ACC& PRESENT(PGP,PGLAT) +#endif + DO JBLK=1,NGPBLKS + DO JFLD=1,IFLDS + DO JKL=1,JK_MAX + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + ILAST = IGPTRSEND(2,JBLK,MYSETW) + JK = JKL+IFIRST-1 + IF(IFIRST>0 .AND. JK<=ILAST) THEN + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JKL + IF(LLINDER) THEN + IFLD = KPTRGP(JFLD) + IF (LLTRANSPOSED) THEN + PGP(JK,IFLD,JBLK) = PGLAT(KINDEX(IPOS),JFLD) + ELSE + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,KINDEX(IPOS)) + ENDIF + ELSE + IFLD = IFLDOFF(JFLD) + IF (LLTRANSPOSED) THEN + PGP(JK,IFLD,JBLK) = PGLAT(KINDEX(IPOS),JFLD) + ELSE + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,KINDEX(IPOS)) + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO + ELSE +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$OMP& MAP(TO:IGPTROFF,IFLDOFF,JK_MAX) COLLAPSE(3) & + !$OMP& SHARED(NGPBLKS,IFLDS,IGPTRSEND,MYSETW,INDOFF,MYPROC, & + !$OMP& IGPTROFF,LLUV,PGPUV,IUVLEVS,IUVPARS,LLGP2,PGP2,IGP2PARS, & + !$OMP& PGLAT,LLGP3A,PGP3A,IGP3ALEVS,IGP3APARS,LLGP3B,PGP3B,& + !$OMP& IGP3BLEVS,IGP3BPARS,KINDEX,IFLDOFF,JK_MAX) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IPOS,IFIRST,ILAST,IFLD,JK) & + !$ACC& COPYIN(IGPTROFF,IFLDOFF,JK_MAX) COLLAPSE(3) & + !$ACC& COPYIN(NGPBLKS,IFLDS,IGPTRSEND,MYSETW,INDOFF,MYPROC, & + !$ACC& LLUV,PGPUV,IUVLEVS,IUVPARS,LLGP2,PGP2,IGP2PARS, & + !$ACC& LLGP3A,PGP3A,IGP3ALEVS,IGP3APARS,LLGP3B,PGP3B,& + !$ACC& IGP3BLEVS,IGP3BPARS,KINDEX,LLTRANSPOSED) & + !$ACC& PRESENT(PGLAT,PGPUV,PGP2,PGP3A,PGP3B) +#endif + DO JBLK=1,NGPBLKS + DO JFLD=1,IFLDS + DO JKL=1,JK_MAX + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + ILAST = IGPTRSEND(2,JBLK,MYSETW) + JK = JKL+IFIRST-1 + IF(IFIRST>0 .AND. JK<=ILAST) THEN + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JKL + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(KINDEX(IPOS),JFLD) + ELSE + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD,KINDEX(IPOS)) + ENDIF + ELSEIF(LLGP2(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(KINDEX(IPOS),JFLD) + ELSE + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD,KINDEX(IPOS)) + ENDIF + ELSEIF(LLGP3A(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(KINDEX(IPOS),JFLD) + ELSE + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD,KINDEX(IPOS)) + ENDIF + ELSEIF(LLGP3B(IFLD)) THEN + IF ( LLTRANSPOSED ) THEN + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(KINDEX(IPOS),JFLD) + ELSE + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD,KINDEX(IPOS)) + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO + ENDIF + CALL GSTATS(1604,1) + + ENDIF + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=TIMEF() +#endif + ! + ! loop over the number of processors we need to communicate with. + ! NOT MYPROC + ! + ! Pack loop......................................................... + + CALL GSTATS(1605,0) + + DO INS=1,INSEND + ISEND=JSEND(INS) + ILEN = ISENDTOT(ISEND)/KF_FS +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(II) MAP(TO:ILEN) COLLAPSE(2) & + !$OMP& SHARED(ILEN,KF_FS,KINDEX,INDOFF,ISEND,ZCOMBUFS,INS,PGLAT) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(II) COPYIN(ILEN) COLLAPSE(2) & + !$ACC& PRESENT(KINDEX,INDOFF,ZCOMBUFS,PGLAT) & + !$ACC& COPYIN(ILEN,INS,KF_FS,ISEND,LLTRANSPOSED) +#endif + DO JFLD=1,KF_FS + DO JL=1,ILEN + II = KINDEX(INDOFF(ISEND)+JL) + IF (LLTRANSPOSED) THEN + ZCOMBUFS((JFLD-1)*ILEN+JL,INS) = PGLAT(II,JFLD) + ELSE + ZCOMBUFS((JFLD-1)*ILEN+JL,INS) = PGLAT(JFLD,II) + ENDIF + ENDDO + ENDDO + ICOMS_KFFS(INS) = KF_FS + ENDDO + + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=(TIMEF()-Tc)/1000.0_JPRBT + !IF(MPL_MYRANK==1) WRITE(*,*) "packing (trltog) in sec: ", Tc +#endif + + CALL GSTATS(1605,1) + + IR=0 + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(762) + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',1,ZHOOK_HANDLE_BAR) + CALL GSTATS(805,0) + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=TIMEF() +#endif + + IF (LSYNC_TRANS) THEN + CALL GSTATS(422,0) + CALL MPL_BARRIER(CDSTRING='TRLTOG BARRIER') + CALL GSTATS(422,1) + ENDIF + CALL GSTATS(412,0) + + !...Receive loop......................................................... +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZCOMBUFS,ZCOMBUFR) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZCOMBUFS,ZCOMBUFR) +#endif + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + CALL MPI_IRECV(ZCOMBUFR(1:IRECVTOT(IRECV),INR), & + & IRECVTOT(IRECV), & + & TRLTOG_DTYPE,NPRCIDS(IRECV)-1, & + & ITAG, MPL_COMM_OML(OML_MY_THREAD()), IREQ(IR), & + & IERROR ) + IR=IR+1 + CALL MPI_IRECV(ICOMR_KFFS(INR), 1, & + & MPI_INTEGER,NPRCIDS(IRECV)-1, & + & ITAG, MPL_COMM_OML(OML_MY_THREAD()), IREQ(IR), & + & IERROR ) + ENDDO + + !...Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + CALL MPI_ISEND(ZCOMBUFS(1:ISENDTOT(ISEND),INS),& + & ISENDTOT(ISEND), & + & TRLTOG_DTYPE, NPRCIDS(ISEND)-1,ITAG, & + & MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR), & + & IERROR) + IR=IR+1 + CALL MPI_ISEND(ICOMS_KFFS(INS),1, & + & MPI_INTEGER, NPRCIDS(ISEND)-1,ITAG, & + & MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR), & + & IERROR) + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + + IF(IR > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='TRLTOG_CUDAAWARE: WAIT FOR SENDS AND RECEIVES') + ENDIF + + CALL GSTATS(412,1) +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=(TIMEF()-Tc)/1000.0_JPRBT + !IF(MPL_MYRANK==1) WRITE(*,*) "CUDA-aware isend/irecv (trltog) in sec: ", Tc +#endif + + CALL GSTATS(805,1) + CALL GSTATS_BARRIER2(762) + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=TIMEF() +#endif + ! Unpack loop......................................................... + + CALL GSTATS(1606,0) + DO INR=1,INRECV + IRECV=JRECV(INR) + CALL PE2SET(IRECV,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETV +! IRECV_FLD_START = 1 !! INT(ZCOMBUFR(-1,INR),KIND=JPIM) !! is this always 1 ? + IRECV_FLD_END = ICOMR_KFFS(INR) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == IRECVSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + JK_MAX=0 + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + JPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + IF (JK_MAX<(ILAST-IFIRST+1)) JK_MAX = (ILAST-IFIRST+1) + ENDIF + ENDDO + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IFIRST,ILAST,JI,JK,IFLDT) & + !$OMP& MAP(TO:INR,KF_FS,IPOS,JPOS,IFLD,IFLDA,JK_MAX,IRECV_FLD_END) COLLAPSE(3) & + !$OMP& SHARED(NGPBLKS,IRECV_FLD_END,JK_MAX,IFLDA,IGPTRSEND,ISETW,IPOS,JPOS, & + !$OMP& LLINDER,PGP,KPTRGP,ZCOMBUFR,INR,LLPGPONLY,LLUV,PGPUV,IUVLEVS, & + !$OMP& IUVPARS,LLGP2,PGP2,IGP2PARS,LLGP3A,PGP3A,IGP3ALEVS,IGP3APARS, & + !$OMP& LLGP3B,PGP3B,IGP3BLEVS,IGP3BPARS) +#endif +#ifdef ACCGPU_SABTNADBY + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IFIRST,ILAST,JI,JK,IFLDT) & + !$ACC& COPYIN(INR,KF_FS,IPOS,JPOS,IFLD,IFLDA,JK_MAX,IRECV_FLD_END) COLLAPSE(3) & + !$ACC& PRESENT(NGPBLKS,IRECV_FLD_END,JK_MAX,IFLDA,IGPTRSEND,ISETW,IPOS,JPOS, & + !$ACC& LLINDER,PGP,KPTRGP,ZCOMBUFR,INR,LLPGPONLY,LLUV,PGPUV,IUVLEVS, & + !$ACC& IUVPARS,LLGP2,PGP2,IGP2PARS,LLGP3A,PGP3A,IGP3ALEVS,IGP3APARS, & + !$ACC& LLGP3B,PGP3B,IGP3BLEVS,IGP3BPARS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IFIRST,ILAST,JI,JK,IFLDT) & + !$ACC& COPYIN(INR,KF_FS,IPOS,JPOS,IFLD,IFLDA,JK_MAX,IRECV_FLD_END) COLLAPSE(3) & + !$ACC& COPYIN(NGPBLKS,IRECV_FLD_END,JK_MAX,IFLDA,IGPTRSEND,ISETW,IPOS,JPOS, & + !$ACC& LLINDER,KPTRGP,INR,LLPGPONLY,LLUV,IUVLEVS, & + !$ACC& IUVPARS,LLGP2,IGP2PARS,LLGP3A,IGP3ALEVS,IGP3APARS, & + !$ACC& LLGP3B,IGP3BLEVS,IGP3BPARS) & + !$ACC& PRESENT(PGP,ZCOMBUFR,PGPUV,PGP2,PGP3A,PGP3B) +#endif + DO JBLK=1,NGPBLKS + DO JJ=1,IRECV_FLD_END + DO JKL=1,JK_MAX + IFLDT=IFLDA(JJ) + IFIRST = IGPTRSEND(1,JBLK,ISETW) + ILAST = IGPTRSEND(2,JBLK,ISETW) + JK = JKL+IFIRST-1 + JI=(JJ-1)*IPOS+JPOS(JBLK)+JKL + IF(IFIRST > 0 .AND. JK<=ILAST) THEN + IF(LLINDER) THEN + PGP(JK,KPTRGP(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ELSEIF(LLPGPONLY) THEN + PGP(JK,IFLDT,JBLK) = ZCOMBUFR(JI,INR) + ELSEIF(LLUV(IFLDT)) THEN + PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ELSEIF(LLGP2(IFLDT)) THEN + PGP2(JK,IGP2PARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ELSEIF(LLGP3A(IFLDT)) THEN + PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ELSEIF(LLGP3B(IFLDT)) THEN + PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO + + ENDDO + + CALL GSTATS(431,0) +#ifdef OMPGPU + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA +#endif + +#ifdef OMPGPU + !$OMP END TARGET DATA !! CREATE ZCOMBUFR + !$OMP END TARGET DATA !! CREATE ZCOMBUFS +#endif +#ifdef ACCGPU + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + +#ifdef ACCGPU + !$ACC END DATA !! CREATE ZCOMBUFR + !$ACC END DATA !! CREATE ZCOMBUFS + !$ACC END DATA !! COPYIN IBUFLENS,IBUFLENR +#endif + CALL GSTATS(431,1) + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=(TIMEF()-Tc)/1000.0_JPRBT + !IF(MPL_MYRANK==1) WRITE(*,*) "unpacking (trltog) in sec: ", Tc +#endif + + CALL GSTATS(1606,1) + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRLTOG_CUDAAWARE',1,ZHOOK_HANDLE) + + END SUBROUTINE TRLTOG_CUDAAWARE + + SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,LDTRANSPOSED) + + !**** *trltog * - transposition of grid point data from latitudinal + ! to column structure. This takes place between inverse + ! FFT and grid point calculations. + ! TRLTOG is the inverse of TRGTOL + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trltog(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (input) + ! PGP - Blocked grid point data (output) + ! KVSET - "v-set" for each field (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow NPRTRV + ! to differ from NPRGPEW + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! INDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of LTOG_PACK,LTOG_UNPACK + ! 03-0-02 G. Radnoti: Call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB , JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_MYRANK + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DISTR ,ONLY : D, MYSETV, MYSETW, MTAGLG, & + & NPRCIDS, NPRTRNS, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! +#ifdef ACCGPU + USE MPI +#endif + + IMPLICIT NONE + +#ifdef OMPGPU + include 'mpif.h' +#endif + + REAL(KIND=JPRBT), INTENT(IN) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSED ! key indicating whether PGLAT is transposed, i.e. (nlon*nlat,nfld) instead of (nfld,nlon*nlat) + + ! LOCAL VARIABLES + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*2) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IFLD, IGL, IGLL,& + &ILAST, ILASTLAT, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ITAG, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLDS, IPROC,JROC, & + &INRECV, INSEND,INR,INS,IR + INTEGER(KIND=JPIM) :: II,INDOFFX,ILEN,IBUFLENS,IBUFLENR + + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: INDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(KF_GP) + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: ISENDCOUNT,IRECVCOUNT,J + INTEGER(KIND=JPIM) :: JPOS(NGPBLKS),IFLDA(KF_GP),JI,JJ + INTEGER(KIND=JPIM) :: IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM) :: IERROR + LOGICAL :: LLTRANSPOSED + + REAL(KIND=JPRBT) :: TIMEF, tc + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + IF (LHOOK) CALL DR_HOOK('TRLTOG',0,ZHOOK_HANDLE) + + LLTRANSPOSED=.FALSE. + IF (PRESENT(LDTRANSPOSED)) LLTRANSPOSED=LDTRANSPOSED + + CALL GSTATS(1806,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY=.TRUE. + IF(PRESENT(PGPUV)) LLPGPUV=.TRUE. + IF(PRESENT(PGP3A)) LLPGP3A=.TRUE. + IF(PRESENT(PGP3B)) LLPGP3B=.TRUE. + IF(PRESENT(PGP2)) LLPGP2=.TRUE. + + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + + LLUV(:) = .FALSE. + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + ITAG = MTAGLG + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + ISEND = JROC + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + IRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + ISENDTOT(JROC) = IPOS*KF_FS + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + INDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(-1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(-1:IRECVCOUNT,INRECV)) + + CALL GSTATS(1806,1) + + + ! Copy local contribution + IF( IRECVTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + CALL GSTATS(1604,0) + +IF (.NOT. LLTRANSPOSED) THEN + +#ifdef NECSX + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#endif + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + IF(LLINDER) THEN + DO JFLD=1,IFLDS + IFLD = KPTRGP(JFLD) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ENDDO + ENDIF + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSE + WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO + +ELSE + +#ifdef NECSX + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#endif + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + IF(LLINDER) THEN + DO JFLD=1,IFLDS + IFLD = KPTRGP(JFLD) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(INDEX(IPOS),JFLD) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(INDEX(IPOS),JFLD) + ENDDO + ENDDO + ENDIF + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(INDEX(IPOS),JFLD) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(INDEX(IPOS),JFLD) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(INDEX(IPOS),JFLD) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(INDEX(IPOS),JFLD) + ENDDO + ELSE + WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO + +ENDIF + + CALL GSTATS(1604,1) + + ENDIF + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=TIMEF() +#endif + ! + ! loop over the number of processors we need to communicate with. + ! NOT MYPROC + ! + ! Pack loop......................................................... + + CALL GSTATS(1605,0) + +IF (.NOT. LLTRANSPOSED) THEN + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD,INS,ISEND,ILEN,ISEND_FLD_END) + DO INS=1,INSEND + ISEND=JSEND(INS) + ISEND_FLD_START(ISEND)= 1 + ILEN = ISENDTOT(ISEND)/KF_FS + ISEND_FLD_END = KF_FS +#ifdef NECSX + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) +#else + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END +#endif + ZCOMBUFS((JFLD-ISEND_FLD_START(ISEND))*ILEN+JL,INS) = PGLAT(JFLD,II) + ENDDO + ENDDO + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = KF_FS + ENDDO + !$OMP END PARALLEL DO +ELSE + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD,INS,ISEND,ILEN,ISEND_FLD_END) + DO INS=1,INSEND + ISEND=JSEND(INS) + ISEND_FLD_START(ISEND)= 1 + ILEN = ISENDTOT(ISEND)/KF_FS + ISEND_FLD_END = KF_FS +#ifdef NECSX + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) +#else + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END +#endif + ZCOMBUFS((JFLD-ISEND_FLD_START(ISEND))*ILEN+JL,INS) = PGLAT(II,JFLD) + ENDDO + ENDDO + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = KF_FS + ENDDO + !$OMP END PARALLEL DO + + +ENDIF + + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=(TIMEF()-Tc)/1000.0_JPRBT + !IF(MPL_MYRANK==1) WRITE(*,*) "packing (trltog) in sec: ", Tc +#endif + + CALL GSTATS(1605,1) + + IR=0 + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(762) + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',1,ZHOOK_HANDLE_BAR) + CALL GSTATS(805,0) + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=TIMEF() +#endif + !...Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRLTOG:' ) + ENDDO + + !...Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + CALL MPL_SEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND),& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRLTOG:') + ENDDO + + IF(IR > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='TRLTOG: WAIT FOR SENDS AND RECEIVES') + ENDIF + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=(TIMEF()-Tc)/1000.0_JPRBT + !IF(MPL_MYRANK==1) WRITE(*,*) "non-CUDA-aware isend/irecv (trltog) in sec: ", Tc +#endif + + CALL GSTATS(805,1) + CALL GSTATS_BARRIER2(762) + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=TIMEF() +#endif + ! Unpack loop......................................................... + + CALL GSTATS(1606,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,JBLK,IFIRST,ILAST,JK,& + !$OMP& JJ,JI,JPOS,INR,IRECV,IRECVSET,IRECV_FLD_START,IRECV_FLD_END,IPOS,& + !$OMP& ISETA,ISETB,ISETW,ISETV,JFLD,IFLD,IFLDA) + DO INR=1,INRECV + IRECV=JRECV(INR) + CALL PE2SET(IRECV,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETV + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == IRECVSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + JPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + + DO JJ=IRECV_FLD_START,IRECV_FLD_END + IFLDT=IFLDA(JJ) + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP(JK,KPTRGP(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP(JK,IFLDT,JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO + + IPOS=(IRECV_FLD_END-IRECV_FLD_START+1)*IPOS + ENDDO + !$OMP END PARALLEL DO + +#ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !Tc=(TIMEF()-Tc)/1000.0_JPRBT + !IF(MPL_MYRANK==1) WRITE(*,*) "unpacking (trltog) in sec: ", Tc +#endif + + CALL GSTATS(1606,1) + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRLTOG',1,ZHOOK_HANDLE) + + END SUBROUTINE TRLTOG + END MODULE TRLTOG_MOD + diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 new file mode 100755 index 0000000..9dd87cc --- /dev/null +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -0,0 +1,374 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRLTOM_MOD + CONTAINS + SUBROUTINE TRLTOM_CUDAAWARE(PFBUF_IN,PFBUF,KFIELD) + + !**** *TRLTOM * - transposition in Fourierspace + + ! Purpose. + ! -------- + ! Transpose Fourier coefficients from partitioning + ! over latitudes to partitioning over wave numbers + ! This is done between inverse Legendre Transform + ! and inverse FFT. + ! This is the inverse routine of TRMTOL. + + !** Interface. + ! ---------- + ! *CALL* *TRLTOM(...)* + + ! Explicit arguments : PFBUF - Fourier coefficient buffer. It is + ! -------------------- used for both input and output. + + ! KFIELD - Number of fields communicated + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 95-10-01 + ! Modified : 97-06-18 G. Mozdzynski - control MPI mailbox use + ! (NCOMBFLEN) for nphase.eq.1 + ! Modified : 99-05-28 D.Salmond - Optimise copies. + ! Modified : 00-02-02 M.Hamrud - Remove NPHASE + ! D.Salmond : 01-11-23 LIMP_NOOLAP Option for non-overlapping message + ! passing and buffer packing + ! G.Mozdzynski : 08-01-01 Cleanup + ! Y.Seity : 07-08-30 Add barrier synchonisation under LSYNC_TRANS + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK, MPL_WAIT, JP_NON_BLOCKING_STANDARD + + USE TPM_DISTR ,ONLY : D, MTAGLM, MYSETW, NPRTRW, NPROC, MYPROC + USE TPM_GEN ,ONLY : LSYNC_TRANS + +#ifdef ACCGPU + USE MPI +#endif + + !USE SET2PE_MOD + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + !USE ABORT_TRANS_MOD + ! + + IMPLICIT NONE + + + INTERFACE + + FUNCTION ALLTOALLV_CUDAIPC(input,len,soff,output,roff,mtol_or_ltom) BIND(C,name='Alltoallv_CUDAIPC') + USE, INTRINSIC :: ISO_C_BINDING + IMPLICIT NONE + real(c_double), dimension(*) :: input,output + integer(c_int), dimension(*) :: len,soff,roff + integer(c_int),value :: mtol_or_ltom + integer(c_int) :: ALLTOALLV_CUDAIPC + END FUNCTION ALLTOALLV_CUDAIPC + + END INTERFACE + +#ifdef OMPGPU + include 'mpif.h' +#endif + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF(:) + REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF_IN(:) + + INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) + + INTEGER(KIND=JPIM) :: ITAG, J, ILEN, ISTA + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR2 + + INTEGER(KIND=JPIM) :: IREQ + INTEGER(KIND=JPIM) :: IERROR + ! ------------------------------------------------------------------ + + REAL(KIND=JPRBT) :: T1, T2, TIMEF, Tc + INTEGER(KIND=JPIM) :: MTOL_OR_LTOM, NOFULLPEERACCESS + INTEGER(KIND=JPIM) :: IRANK,IUNIT + INTEGER(KIND=JPIM) :: FROM_SEND,FROM_RECV,TO_RECV,TO_SEND + + IF (LHOOK) CALL DR_HOOK('TRLTOM_CUDAAWARE',0,ZHOOK_HANDLE) + +#ifdef PARKINDTRANS_SINGLE +#define TRLTOM_DTYPE MPI_REAL +#else +#define TRLTOM_DTYPE MPI_DOUBLE_PRECISION +#endif + + ITAG = MTAGLM + + DO J=1,NPRTRW + ILENS(J) = D%NLTSGTB(J)*KFIELD + IOFFS(J) = D%NSTAGT1B(D%MSTABF(J))*KFIELD + ILENR(J) = D%NLTSFTB(J)*KFIELD + IOFFR(J) = D%NSTAGT1B(J)*KFIELD + ENDDO + + IF(NPROC > 1) THEN + CALL GSTATS(806,0) + IF (LSYNC_TRANS) THEN + CALL GSTATS(420,0) + CALL MPL_BARRIER(CDSTRING='TRLTOM BARRIER') + CALL GSTATS(420,1) + ENDIF + +! daand: I believe this fix was for NVIDIA, but it's not necessary on lumi +#ifdef gnarls + ! copy to self workaround + IRANK = MPL_MYRANK(MPL_ALL_MS_COMM) + IF (ILENS(IRANK) .ne. ILENR(IRANK)) THEN + PRINT *, "ERROR", ILENS(IRANK), ILENR(IRANK) + stop 1 + ENDIF + IF (ILENS(IRANK) > 0) THEN + FROM_SEND = IOFFS(IRANK) + 1 + TO_SEND = FROM_SEND + ILENS(IRANK) - 1 + FROM_RECV = IOFFR(IRANK) + 1 + TO_RECV = FROM_RECV + ILENR(IRANK) - 1 +#ifdef OMPGPU + !$OMP TARGET +#endif +#ifdef ACCGPU + !$ACC KERNELS ASYNC(1) DEFAULT(NONE) & + !$ACC& PRESENT(PFBUF,PFBUF_IN) & + !$ACC& COPYIN(FROM_RECV,TO_RECV,FROM_SEND,TO_SEND) +#endif + PFBUF(FROM_RECV:TO_RECV) = PFBUF_IN(FROM_SEND:TO_SEND) +#ifdef ACCGPU + !$ACC END KERNELS +#endif +#ifdef OMPGPU + !$OMP END TARGET +#endif + ILENS(IRANK) = 0 + ILENR(IRANK) = 0 + ENDIF +#endif + + CALL GSTATS(411,0) +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(PFBUF_IN, PFBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(PFBUF_IN, PFBUF) +#endif + + CALL MPI_ALLTOALLV(PFBUF_IN,ILENS,IOFFS,TRLTOM_DTYPE,& + & PFBUF,ILENR,IOFFR, TRLTOM_DTYPE, & + & MPL_ALL_MS_COMM, IERROR) + +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + CALL GSTATS(411,1) +#ifdef ACCGPU + !!!$ACC WAIT(1) +#endif + !!!$OMP BARRIER + + CALL MPI_BARRIER(MPI_COMM_WORLD,IERROR) + CALL GSTATS(806,1) + ELSE + ILEN = D%NLTSGTB(MYSETW)*KFIELD + ISTA = D%NSTAGT1B(MYSETW)*KFIELD+1 + CALL GSTATS(1607,0) +#ifdef OMPGPU +!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) MAP(ALLOC:PFBUF,PFBUF_IN) & + !$OMP& SHARED(ISTA,ILEN,PFBUF,PFBUF_IN) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(ISTA,ILEN) +#endif + DO J=ISTA,ISTA+ILEN-1 + PFBUF(J) = PFBUF_IN(J) + ENDDO + CALL GSTATS(1607,1) + ENDIF + + IF (LHOOK) CALL DR_HOOK('TRLTOM_CUDAAWARE',1,ZHOOK_HANDLE) + ! ------------------------------------------------------------------ + END SUBROUTINE TRLTOM_CUDAAWARE + + SUBROUTINE TRLTOM(PFBUF_IN,PFBUF,KFIELD) + + !**** *TRLTOM * - transposition in Fourierspace + + ! Purpose. + ! -------- + ! Transpose Fourier coefficients from partitioning + ! over latitudes to partitioning over wave numbers + ! This is done between inverse Legendre Transform + ! and inverse FFT. + ! This is the inverse routine of TRMTOL. + + !** Interface. + ! ---------- + ! *CALL* *TRLTOM(...)* + + ! Explicit arguments : PFBUF - Fourier coefficient buffer. It is + ! -------------------- used for both input and output. + + ! KFIELD - Number of fields communicated + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 95-10-01 + ! Modified : 97-06-18 G. Mozdzynski - control MPI mailbox use + ! (NCOMBFLEN) for nphase.eq.1 + ! Modified : 99-05-28 D.Salmond - Optimise copies. + ! Modified : 00-02-02 M.Hamrud - Remove NPHASE + ! D.Salmond : 01-11-23 LIMP_NOOLAP Option for non-overlapping message + ! passing and buffer packing + ! G.Mozdzynski : 08-01-01 Cleanup + ! Y.Seity : 07-08-30 Add barrier synchonisation under LSYNC_TRANS + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK, MPL_WAIT, JP_NON_BLOCKING_STANDARD + + USE TPM_DISTR ,ONLY : D, MTAGLM, MYSETW, NPRTRW, NPROC, MYPROC + USE TPM_GEN ,ONLY : LSYNC_TRANS + +#ifdef ACCGPU + USE MPI +#endif + + !USE SET2PE_MOD + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + !USE ABORT_TRANS_MOD + ! + + IMPLICIT NONE + + + INTERFACE + + FUNCTION ALLTOALLV_CUDAIPC(input,len,soff,output,roff,mtol_or_ltom) BIND(C,name='Alltoallv_CUDAIPC') + USE, INTRINSIC :: ISO_C_BINDING + IMPLICIT NONE + real(c_double), dimension(*) :: input,output + integer(c_int), dimension(*) :: len,soff,roff + integer(c_int),value :: mtol_or_ltom + integer(c_int) :: ALLTOALLV_CUDAIPC + END FUNCTION ALLTOALLV_CUDAIPC + + END INTERFACE + +#ifdef OMPGPU + include 'mpif.h' +#endif + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF(:) + REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF_IN(:) + + INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) + + INTEGER(KIND=JPIM) :: ITAG, J, ILEN, ISTA + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR2 + + REAL(KIND=JPRBT) :: ZDUM(1) + INTEGER(KIND=JPIM) :: IREQ + INTEGER(KIND=JPIM) :: IERROR + ! ------------------------------------------------------------------ + + REAL(KIND=JPRBT) :: T1, T2, TIMEF, tc + INTEGER(KIND=JPIM) :: MTOL_OR_LTOM, NOFULLPEERACCESS + INTEGER(KIND=JPIM) :: IRANK,iunit + + IF (LHOOK) CALL DR_HOOK('TRLTOM',0,ZHOOK_HANDLE) + + ITAG = MTAGLM + + DO J=1,NPRTRW + ILENS(J) = D%NLTSGTB(J)*KFIELD + IOFFS(J) = D%NSTAGT1B(D%MSTABF(J))*KFIELD + ILENR(J) = D%NLTSFTB(J)*KFIELD + IOFFR(J) = D%NSTAGT1B(J)*KFIELD + ENDDO + + IF(NPROC > 1) THEN + CALL GSTATS(806,0) + + CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN,KSENDCOUNTS=ILENS,& + & PRECVBUF=PFBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & KCOMM=MPL_ALL_MS_COMM,CDSTRING='TRLTOM:') + + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + CALL GSTATS(806,1) + ELSE + ILEN = D%NLTSGTB(MYSETW)*KFIELD + ISTA = D%NSTAGT1B(MYSETW)*KFIELD+1 + CALL GSTATS(1607,0) + DO J=ISTA,ISTA+ILEN-1 + PFBUF(J) = PFBUF_IN(J) + ENDDO + CALL GSTATS(1607,1) + ENDIF + + IF (LHOOK) CALL DR_HOOK('TRLTOM',1,ZHOOK_HANDLE) + ! ------------------------------------------------------------------ + END SUBROUTINE TRLTOM + END MODULE TRLTOM_MOD diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 new file mode 100755 index 0000000..277a8cc --- /dev/null +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -0,0 +1,359 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRMTOL_MOD + +CONTAINS +SUBROUTINE TRMTOL_CUDAAWARE(PFBUF_IN,PFBUF,KFIELD) + +!**** *trmtol * - transposition in Fourier space + +! Purpose. +! -------- +! Transpose Fourier buffer data from partitioning +! over wave numbers to partitioning over latitudes. +! It is called between direct FFT and direct Legendre +! transform. +! This routine is the inverse of TRLTOM. + + +!** Interface. +! ---------- +! *call* *trmtol(...)* + +! Explicit arguments : PFBUF - Fourier coefficient buffer. It is +! -------------------- used for both input and output. +! KFIELD - Number of fields communicated + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified : 97-06-17 G. Mozdzynski - control MPI mailbox use +! (NCOMBFLEN) for nphase.eq.1 +! Modified : 99-05-28 D.Salmond - Optimise copies. +! Modified : 00-02-02 M.Hamrud - Remove NPHASE +! D.Salmond : 01-11-23 LIMP_NOOLAP Option for non-overlapping message +! passing and buffer packing +! G.Mozdzynski: 08-01-01 Cleanup +! Y.Seity : 07-08-31 add barrien synchronisation under LSYNC_TRANS +! ------------------------------------------------------------------ + + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_MYRANK + +USE TPM_DISTR ,ONLY : D, MTAGML, MYSETW, NPRTRW, NPROC, MYPROC +USE TPM_GEN ,ONLY : LSYNC_TRANS + +#ifdef ACCGPU +USE MPI +#endif + +IMPLICIT NONE + +#ifdef OMPGPU +include 'mpif.h' +#endif + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFIELD +REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF(:) +REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF_IN(:) + +INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) + +INTEGER(KIND=JPIM) :: ITAG, J, ILEN, ISTA + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR2 + +REAL(KIND=JPRBT) :: ZDUM(1) +INTEGER(KIND=JPIM) :: IREQ, IERROR, IRANK +INTEGER(KIND=JPIM) :: FROM_SEND,FROM_RECV,TO_RECV,TO_SEND + +#ifdef PARKINDTRANS_SINGLE +#define TRMTOL_DTYPE MPI_REAL +#else +#define TRMTOL_DTYPE MPI_DOUBLE_PRECISION +#endif + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('TRMTOL_CUDAAWARE',0,ZHOOK_HANDLE) + +ITAG = MTAGML + +DO J=1,NPRTRW + ILENS(J) = D%NLTSFTB(J)*KFIELD + IOFFS(J) = D%NSTAGT0B(J)*KFIELD + ILENR(J) = D%NLTSGTB(J)*KFIELD + IOFFR(J) = D%NSTAGT0B(D%MSTABF(J))*KFIELD +ENDDO + +IF(NPROC > 1) THEN + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(764) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR',1,ZHOOK_HANDLE_BAR) + !IF (LSYNC_TRANS) THEN + ! CALL GSTATS(421,0) + ! CALL MPL_BARRIER(CDSTRING='TRLTOM BARRIER') + ! CALL GSTATS(421,1) + !ENDIF + +! daand: I believe this fix was for NVIDIA, but it's not necessary on lumi +#ifdef gnarls + + ! copy to self workaround + IRANK = MPL_MYRANK(MPL_ALL_MS_COMM) + IF (ILENS(IRANK) > 0) THEN + FROM_SEND = IOFFS(IRANK) + 1 + TO_SEND = FROM_SEND + ILENS(IRANK) - 1 + FROM_RECV = IOFFR(IRANK) + 1 + TO_RECV = FROM_RECV + ILENR(IRANK) - 1 +#ifdef OMPGPU + !$OMP TARGET +#endif +#ifdef ACCGPU + !$ACC KERNELS ASYNC(1) DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(FROM_RECV,TO_RECV,FROM_SEND,TO_SEND) +#endif + PFBUF(FROM_RECV:TO_RECV) = PFBUF_IN(FROM_SEND:TO_SEND) +#ifdef ACCGPU + !$ACC END KERNELS +#endif +#ifdef OMPGPU + !$OMP END TARGET +#endif + ILENS(IRANK) = 0 + ILENR(IRANK) = 0 + ENDIF +#endif + +! CALL GSTATS(410,0) + +#ifdef ACCGPU + !$ACC DATA PRESENT(PFBUF_IN, PFBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(PRESENT,ALLOC:PFBUF_IN, PFBUF) +#endif +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(PFBUF_IN, PFBUF) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(PFBUF_IN, PFBUF) +#endif + CALL GSTATS(807,0) + CALL MPI_ALLTOALLV(PFBUF_IN,ILENS,IOFFS,TRMTOL_DTYPE,& + & PFBUF,ILENR,IOFFR,TRMTOL_DTYPE,& + & MPL_ALL_MS_COMM,IERROR) + +#ifdef OMPGPU + !$OMP END TARGET DATA + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA + !$ACC END DATA +#endif +! CALL GSTATS(410,1) + CALL GSTATS(807,1) +!#ifdef ACCGPU +! !$ACC WAIT(1) +!#endif +! !$OMP BARRIER + + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR2',0,ZHOOK_HANDLE_BAR2) + CALL GSTATS_BARRIER2(764) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR2',1,ZHOOK_HANDLE_BAR2) +ELSE + ILEN = D%NLTSGTB(MYSETW)*KFIELD + ISTA = D%NSTAGT0B(MYSETW)*KFIELD+1 + CALL GSTATS(1608,0) +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) MAP(PRESENT,ALLOC:PFBUF,PFBUF_IN) & + !$OMP& SHARED(ISTA,ILEN,PFBUF,PFBUF_IN) +#endif +#ifdef ACCGPU + !*!$ACC PARALLEL LOOP DEFAULT(NONE) PRESENT(ISTA,ILEN,PFBUF,PFBUF_IN) + !$ACC PARALLEL LOOP DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(ISTA,ILEN) +#endif + DO J=ISTA,ISTA+ILEN-1 + PFBUF(J) = PFBUF_IN(J) + ENDDO + CALL GSTATS(1608,1) +ENDIF + +IF (LHOOK) CALL DR_HOOK('TRMTOL_CUDAAWARE',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRMTOL_CUDAAWARE + +SUBROUTINE TRMTOL(PFBUF_IN,PFBUF,KFIELD) + +!**** *trmtol * - transposition in Fourier space + +! Purpose. +! -------- +! Transpose Fourier buffer data from partitioning +! over wave numbers to partitioning over latitudes. +! It is called between direct FFT and direct Legendre +! transform. +! This routine is the inverse of TRLTOM. + + +!** Interface. +! ---------- +! *call* *trmtol(...)* + +! Explicit arguments : PFBUF - Fourier coefficient buffer. It is +! -------------------- used for both input and output. +! KFIELD - Number of fields communicated + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified : 97-06-17 G. Mozdzynski - control MPI mailbox use +! (NCOMBFLEN) for nphase.eq.1 +! Modified : 99-05-28 D.Salmond - Optimise copies. +! Modified : 00-02-02 M.Hamrud - Remove NPHASE +! D.Salmond : 01-11-23 LIMP_NOOLAP Option for non-overlapping message +! passing and buffer packing +! G.Mozdzynski: 08-01-01 Cleanup +! Y.Seity : 07-08-31 add barrien synchronisation under LSYNC_TRANS +! ------------------------------------------------------------------ + + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_MYRANK + +USE TPM_DISTR ,ONLY : D, MTAGML, MYSETW, NPRTRW, NPROC, MYPROC +USE TPM_GEN ,ONLY : LSYNC_TRANS + + +IMPLICIT NONE + + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF(:) +REAL(KIND=JPRBT) ,INTENT(INOUT) :: PFBUF_IN(:) + +INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) + +INTEGER(KIND=JPIM) :: ITAG, J, ILEN, ISTA + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR2 + +REAL(KIND=JPRBT) :: ZDUM(1) +INTEGER(KIND=JPIM) :: IREQ + + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('TRMTOL',0,ZHOOK_HANDLE) + +#ifdef gnarls +write (20,*) __FILE__, __LINE__; call flush(20) +write (20,*) 'PFBUF_IN = '; write (20,'(6E18.8)') PFBUF_IN(1:size(pfbuf_in,1)) +call flush(20) +#endif + +! daand: initializing to fixed value for debugging; not necessary! +PFBUF(:)=-1. + +ITAG = MTAGML + +DO J=1,NPRTRW + ILENS(J) = D%NLTSFTB(J)*KFIELD + IOFFS(J) = D%NSTAGT0B(J)*KFIELD + ILENR(J) = D%NLTSGTB(J)*KFIELD + IOFFR(J) = D%NSTAGT0B(D%MSTABF(J))*KFIELD +ENDDO + +IF(NPROC > 1) THEN + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR',0,ZHOOK_HANDLE_BAR) + ! CALL GSTATS_BARRIER(764) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR',1,ZHOOK_HANDLE_BAR) +! IF (LSYNC_TRANS) THEN +! CALL MPL_BARRIER(CDSTRING='TRMTOL') +! ENDIF + + CALL GSTATS(807,0) + CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN,KSENDCOUNTS=ILENS,& + & PRECVBUF=PFBUF,KRECVCOUNTS=ILENR,KSENDDISPL=IOFFS,KRECVDISPL=IOFFR,& + & KCOMM=MPL_ALL_MS_COMM,CDSTRING='TRMTOL:') + CALL GSTATS(807,1) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR2',0,ZHOOK_HANDLE_BAR2) + !CALL GSTATS_BARRIER2(764) + IF (LHOOK) CALL DR_HOOK('TRMTOL_BAR2',1,ZHOOK_HANDLE_BAR2) +ELSE + ILEN = D%NLTSGTB(MYSETW)*KFIELD + ISTA = D%NSTAGT0B(MYSETW)*KFIELD+1 + CALL GSTATS(1608,0) + DO J=ISTA,ISTA+ILEN-1 + PFBUF(J) = PFBUF_IN(J) + ENDDO + CALL GSTATS(1608,1) +ENDIF + +#ifdef gnarls +write (20,*) __FILE__, __LINE__; call flush(20) +write (20,*) 'PFBUF = '; write (20,'(6E18.8)') PFBUF(1:size(pfbuf,1)) +call flush(20) +#endif + +IF (LHOOK) CALL DR_HOOK('TRMTOL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE TRMTOL +END MODULE TRMTOL_MOD diff --git a/src/trans/gpu/internal/updsp_mod.F90 b/src/trans/gpu/internal/updsp_mod.F90 new file mode 100755 index 0000000..8c05cd2 --- /dev/null +++ b/src/trans/gpu/internal/updsp_mod.F90 @@ -0,0 +1,183 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSP_MOD +CONTAINS +SUBROUTINE UPDSP(KF_UV,KF_SCALARS,POA1,POA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *UPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL UPDSP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR ,ONLY : D + +USE UPDSPB_MOD ,ONLY : UPDSPB + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV,KF_SCALARS + +REAL(KIND=JPRBT) , INTENT(IN) :: POA1(:,:,:) +REAL(KIND=JPRBT) , INTENT(IN) :: POA2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,JFLD,IFLD,IDIM1,IDIM3,J3 + + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +#ifdef ACCGPU +!$ACC DATA PRESENT(PSPVOR,PSPDIV) IF(KF_UV > 0) +!$ACC DATA PRESENT(PSPSCALAR) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSCALAR)) +!$ACC DATA PRESENT(PSPSC2) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC2) .AND. NF_SC2 > 0) +!$ACC DATA PRESENT(PSPSC3A) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3A) .AND. NF_SC3A > 0) +!$ACC DATA PRESENT(PSPSC3B) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3B) .AND. NF_SC3B > 0) +#endif +#ifdef OMPGPU +!WARNING: following lines should be PRESENT,ALLOC but cause issues with AMD compiler! +!$OMP TARGET DATA MAP(ALLOC:PSPVOR,PSPDIV) IF(KF_UV > 0) +!$OMP TARGET DATA MAP(ALLOC:PSPSCALAR) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSCALAR)) +!$OMP TARGET DATA MAP(ALLOC:PSPSC2) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC2) .AND. NF_SC2 > 0) +!$OMP TARGET DATA MAP(ALLOC:PSPSC3A) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3A) .AND. NF_SC3A > 0) +!$OMP TARGET DATA MAP(ALLOC:PSPSC3B) IF(KF_SCALARS > 0 .AND. PRESENT(PSPSC3B) .AND. NF_SC3B > 0) +#endif + +IST = 1 +IF (KF_UV > 0) THEN + !stop 'Error: code path not (yet) supported in GPU version' + + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL UPDSPB(KF_UV,IVORS,POA2,PSPVOR,KFLDPTRUV) + CALL UPDSPB(KF_UV,IDIVS,POA2,PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL UPDSPB(KF_SCALARS,IST,POA1,PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL UPDSPB(IDIM1,IST,POA1,PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPB(IDIM1,IST,POA1,PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPB(IDIM1,IST,POA1,PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +#ifdef OMPGPU +!$OMP END TARGET DATA +!$OMP END TARGET DATA +!$OMP END TARGET DATA +!$OMP END TARGET DATA +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +!$ACC END DATA +!$ACC END DATA +!$ACC END DATA +!$ACC END DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSP +END MODULE UPDSP_MOD diff --git a/src/trans/gpu/internal/updspad_mod.F90 b/src/trans/gpu/internal/updspad_mod.F90 new file mode 100755 index 0000000..228ea78 --- /dev/null +++ b/src/trans/gpu/internal/updspad_mod.F90 @@ -0,0 +1,178 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPAD_MOD +CONTAINS +SUBROUTINE UPDSPAD(KM,KF_UV,KF_SCALARS,POA1,POA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *UPDSPAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL UPDSPAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPADB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPADB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR ,ONLY : D + +USE UPDSPBAD_MOD ,ONLY : UPDSPBAD +! + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS + +REAL(KIND=JPRBT) , INTENT(OUT) :: POA1(:,:) +REAL(KIND=JPRBT) , INTENT(OUT) :: POA2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND, JN, ISE,IFLD,JFLD +INTEGER(KIND=JPIM) :: IDIM1,IDIM3,J3 + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + + +!* 1.1 VORTICITY AND DIVERGENCE. + +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + IF (KM == 0) THEN + IF(PRESENT(KFLDPTRUV)) THEN + DO JFLD=1,KF_UV + IFLD = KFLDPTRUV(JFLD) + PSPVOR(IFLD,D%NASM0(0)) = 0.0_JPRBT + PSPDIV(IFLD,D%NASM0(0)) = 0.0_JPRBT + ENDDO + DO JN=0,R%NSMAX + ISE = 1+JN*2+1 + DO JFLD=1,KF_UV + IFLD = KFLDPTRUV(JFLD) + PSPDIV(IFLD,ISE) = 0.0_JPRBT + PSPVOR(IFLD,ISE) = 0.0_JPRBT + ENDDO + ENDDO + ELSE + PSPVOR(:,D%NASM0(0)) = 0.0_JPRBT + PSPDIV(:,D%NASM0(0)) = 0.0_JPRBT + DO JN=0,R%NSMAX + ISE = 1+JN*2+1 + PSPDIV(:,ISE) = 0.0_JPRBT + PSPVOR(:,ISE) = 0.0_JPRBT + ENDDO + ENDIF + ENDIF + CALL UPDSPBAD(KM,KF_UV,POA2(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL UPDSPBAD(KM,KF_UV,POA2(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL UPDSPBAD(KM,KF_SCALARS,POA1(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL UPDSPBAD(KM,IDIM1,POA1(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPBAD(KM,IDIM1,POA1(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL UPDSPBAD(KM,IDIM1,POA1(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSPAD +END MODULE UPDSPAD_MOD diff --git a/src/trans/gpu/internal/updspb_mod.F90 b/src/trans/gpu/internal/updspb_mod.F90 new file mode 100755 index 0000000..18833c1 --- /dev/null +++ b/src/trans/gpu/internal/updspb_mod.F90 @@ -0,0 +1,160 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPB_MOD + CONTAINS + SUBROUTINE UPDSPB(KFIELD,KST,POA,PSPEC,KFLDPTR) + + + !**** *UPDSPB* - Update spectral arrays after direct Legendre transform + + ! Purpose. + ! -------- + ! To update spectral arrays for a fixed zonal wave-number + ! from values in POA. + + !** Interface. + ! ---------- + ! CALL UPDSPB(....) + + ! Explicit arguments : KM - zonal wavenumber + ! -------------------- KFIELD - number of fields + ! POA - work array + ! PSPEC - spectral array + + ! Implicit arguments : None + ! -------------------- + + ! Method. + ! ------- + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 88-02-02 + ! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) + ! R. El Khatib : 94-08-02 Replace number of fields by indexes of the + ! first and last field + ! L. Isaksen : 95-06-06 Reordering of spectral arrays + ! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + + USE TPM_DIM ,ONLY : R_NSMAX,R_NTMAX + !USE TPM_FIELDS + USE TPM_DISTR ,ONLY : D_NUMP,D_MYMS,D_NASM0 + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + INTEGER(KIND=JPIM),INTENT(IN) :: KST + INTEGER(KIND=JPIM) :: KM,KMLOC + REAL(KIND=JPRBT) ,INTENT(IN) :: POA(:,:,:) + REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) + INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN, ISMAX, ITMAX, IASM0,IFLD + + + ! ------------------------------------------------------------------ + + !* 0. NOTE. + ! ----- + + ! The following transfer reads : + ! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) + ! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) + ! with n from m to NSMAX + ! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. + ! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) + ! nn is the loop index. + + IF(PRESENT(KFLDPTR)) THEN + stop 'Error: code path not (yet) supported in GPU version' + ENDIF + + !* 1. UPDATE SPECTRAL FIELDS. + ! ----------------------- + + !loop over wavenumber +#ifdef ACCGPU + !$ACC DATA COPYIN(KFIELD,KST,POA) +#endif +#ifdef OMPGPU +!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! + !$OMP TARGET DATA MAP(ALLOC:PSPEC,POA) & + !$OMP& MAP(TO:R_NTMAX,R_NSMAX,D_NUMP,D_MYMS,D_NASM0) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,IASM0,INM,IR,II) DEFAULT(NONE) & + !$OMP& SHARED(R_NTMAX,R_NSMAX,D_NUMP,D_MYMS,D_NASM0,PSPEC,KST,KFIELD,POA) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,IASM0,INM,IR,II) DEFAULT(NONE) & + !$ACC& PRESENT(R_NTMAX,R_NSMAX,D_NUMP,D_MYMS,D_NASM0,PSPEC,KST,KFIELD,POA) +#endif + DO KMLOC=1,D_NUMP + DO JN=R_NTMAX+2-R_NSMAX,R_NTMAX+2 + DO JFLD=1,KFIELD + + KM = D_MYMS(KMLOC) + IASM0 = D_NASM0(KM) + + IF(KM == 0) THEN + + IF (JN .LE. R_NTMAX+2-KM) THEN + + INM = IASM0+(R_NTMAX+2-JN)*2 + IR = KST+2*JFLD-2 + PSPEC(JFLD,INM) = POA(IR,JN,KMLOC) + PSPEC(JFLD,INM+1) = 0.0_JPRBT + + END IF + ELSE + + + IF (JN .LE. R_NTMAX+2-KM) THEN + INM = IASM0+((R_NTMAX+2-JN)-KM)*2 + + IR = KST+2*JFLD-2 + II = IR+1 + PSPEC(JFLD,INM) = POA(IR,JN,KMLOC) + PSPEC(JFLD,INM+1) = POA(II,JN,KMLOC) + + END IF + END IF + + ENDDO + ENDDO + ENDDO +#ifdef ACCGPU + !$ACC END PARALLEL LOOP +#endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + ! ------------------------------------------------------------------ + + END SUBROUTINE UPDSPB +END MODULE UPDSPB_MOD diff --git a/src/trans/gpu/internal/updspb_vd_mod.F90 b/src/trans/gpu/internal/updspb_vd_mod.F90 new file mode 100755 index 0000000..e758d8b --- /dev/null +++ b/src/trans/gpu/internal/updspb_vd_mod.F90 @@ -0,0 +1,170 @@ +! (C) Copyright 2000- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPB_VD_MOD +CONTAINS +SUBROUTINE UPDSPB_VD(KFIELD,PSPVOR,PSPDIV,KFLDPTR) + + !**** *UPDSPB* - Update spectral arrays after direct Legendre transform + + ! Purpose. + ! -------- + ! To update spectral arrays for a fixed zonal wave-number + ! from values in POA. + + !** Interface. + ! ---------- + ! CALL UPDSPB(....) + + ! Explicit arguments : KM - zonal wavenumber + ! -------------------- KFIELD - number of fields + ! POA - work array + ! PSPEC - spectral array + + ! Implicit arguments : None + ! -------------------- + + ! Method. + ! ------- + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 88-02-02 + ! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) + ! R. El Khatib : 94-08-02 Replace number of fields by indexes of the + ! first and last field + ! L. Isaksen : 95-06-06 Reordering of spectral arrays + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + + USE TPM_DIM ,ONLY : R,R_NSMAX,R_NTMAX + USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NASM0 + USE TPM_FIELDS ,ONLY : ZOA2 + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + INTEGER(KIND=JPIM) :: KM,KMLOC + REAL(KIND=JPRB) ,INTENT(OUT) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,INTENT(OUT) :: PSPDIV(:,:) + INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN, ISMAX, ITMAX, IASM0,IFLD + INTEGER(KIND=JPIM) :: IVORS, IDIVS + + + ! ------------------------------------------------------------------ + + !* 0. NOTE. + ! ----- + + ! The following transfer reads : + ! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) + ! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) + ! with n from m to NSMAX + ! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. + ! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) + ! nn is the loop index. + + IVORS = 1 + IDIVS = 2*KFIELD+1 + + !* 1. UPDATE SPECTRAL FIELDS. + ! ----------------------- +#ifdef ACCGPU + !$ACC DATA & + !$ACC& PRESENT(ZOA2) & + !$ACC& COPY(PSPVOR,PSPDIV) & + !$ACC& COPY(D,D_NUMP,D_MYMS,R,R_NSMAX,R_NTMAX,D,D_NASM0) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA & + !$OMP& MAP(PRESENT,ALLOC:ZOA2) & + !$OMP& MAP(FROM:PSPVOR,PSPDIV) & + !$OMP& MAP(TO:D,D_NUMP,D_MYMS,R,R_NSMAX,R_NTMAX,D,D_NASM0) +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,INM,IR,II,IASM0,IFLD) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,INM,IR,II,IASM0,IFLD) +#endif + DO KMLOC=1,D_NUMP + DO JN=R_NTMAX+2-R_NSMAX,R_NTMAX+2 + DO JFLD=1,KFIELD + + KM = D_MYMS(KMLOC) + IASM0 = D_NASM0(KM) + + IF(KM == 0) THEN + + IF (JN .LE. R_NTMAX+2-KM) THEN + INM = IASM0+(R_NTMAX+2-JN)*2 + IR = 2*JFLD-1 + PSPVOR(JFLD,INM) = ZOA2(IVORS+IR-1,JN,KMLOC) + PSPDIV(JFLD,INM) = ZOA2(IDIVS+IR-1,JN,KMLOC) + PSPVOR(JFLD,INM+1) = 0.0_JPRBT + PSPDIV(JFLD,INM+1) = 0.0_JPRBT + END IF + IF(PRESENT(KFLDPTR)) THEN + IFLD = KFLDPTR(JFLD) + PSPVOR(IFLD,IASM0) = 0.0_JPRBT + PSPDIV(IFLD,IASM0) = 0.0_JPRBT + ELSE + PSPVOR(JFLD,IASM0) = 0.0_JPRBT + PSPDIV(JFLD,IASM0) = 0.0_JPRBT + ENDIF + + ELSE + + + IF (JN .LE. R_NTMAX+2-KM) THEN + INM = IASM0+((R_NTMAX+2-JN)-KM)*2 + + IR = 2*JFLD-1 + II = IR+1 + PSPVOR(JFLD,INM) = ZOA2(IVORS+IR-1,JN,KMLOC) + PSPVOR(JFLD,INM+1) = ZOA2(IVORS+II-1,JN,KMLOC) + PSPDIV(JFLD,INM) = ZOA2(IDIVS+IR-1,JN,KMLOC) + PSPDIV(JFLD,INM+1) = ZOA2(IDIVS+II-1,JN,KMLOC) + + END IF + END IF + + ENDDO + + ENDDO + !end loop over wavenumber + END DO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC end data +#endif + + ! ------------------------------------------------------------------ + + END SUBROUTINE UPDSPB_VD +END MODULE UPDSPB_VD_MOD diff --git a/src/trans/gpu/internal/updspbad_mod.F90 b/src/trans/gpu/internal/updspbad_mod.F90 new file mode 100755 index 0000000..2ecb0ae --- /dev/null +++ b/src/trans/gpu/internal/updspbad_mod.F90 @@ -0,0 +1,160 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPBAD_MOD +CONTAINS +SUBROUTINE UPDSPBAD(KM,KFIELD,POA,PSPEC,KFLDPTR) + + +!**** *UPDSPBAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL UPDSPBAD(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +!USE TPM_FIELDS +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRBT) ,INTENT(OUT) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN, ISMAX, ITMAX, IASM0,IFLD + + +! ------------------------------------------------------------------ + +!* 0. NOTE. +! ----- + +! The following transfer reads : +! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) +! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) +! with n from m to NSMAX +! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. +! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) +! nn is the loop index. + + + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +ISMAX = R%NSMAX +ITMAX = R%NTMAX +IASM0 = D%NASM0(KM) + + +POA(:,:) = 0.0_JPRBT + +!* 1.1 KM=0 + +IF(KM == 0) THEN + IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + IFLD = KFLDPTR(JFLD) + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+(ITMAX+2-JN)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + PSPEC(IFLD,INM) = 0.0_JPRBT + ENDDO + ENDDO + ELSE + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+(ITMAX+2-JN)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + POA(JN,IR) = PSPEC(JFLD,INM) + PSPEC(JFLD,INM) = 0.0_JPRBT + ENDDO + ENDDO + ENDIF +!* 1.2 KM!=0 + +ELSE + IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+((ITMAX+2-JN)-KM)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + POA(JN,II) = PSPEC(IFLD,INM+1) + PSPEC(IFLD,INM) = 0.0_JPRBT + PSPEC(IFLD,INM+1) = 0.0_JPRBT + ENDDO + ENDDO + ELSE + DO JN=ITMAX+2-ISMAX,ITMAX+2-KM + INM = IASM0+((ITMAX+2-JN)-KM)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR = 2*JFLD-1 + II = IR+1 + POA(JN,IR) = PSPEC(JFLD,INM) + POA(JN,II) = PSPEC(JFLD,INM+1) + PSPEC(JFLD,INM) = 0.0_JPRBT + PSPEC(JFLD,INM+1) = 0.0_JPRBT + ENDDO + ENDDO + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSPBAD +END MODULE UPDSPBAD_MOD diff --git a/src/trans/gpu/internal/uvtvd_mod.F90 b/src/trans/gpu/internal/uvtvd_mod.F90 new file mode 100755 index 0000000..19794be --- /dev/null +++ b/src/trans/gpu/internal/uvtvd_mod.F90 @@ -0,0 +1,210 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UVTVD_MOD +CONTAINS +SUBROUTINE UVTVD(KFIELD) +!SUBROUTINE UVTVD(KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +!**** *UVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL UVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R, R_NTMAX +USE TPM_FIELDS ,ONLY : F +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS +USE TPM_FIELDS ,ONLY : ZOA1,ZOA2,ZEPSNM +! + +IMPLICIT NONE + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +INTEGER(KIND=JPIM) :: KM, KMLOC + +!REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(1:d%nump,0:R%NTMAX+2) +!REAL(KIND=JPRBT), INTENT(OUT) :: PVOR(:,:,:),PDIV(:,:,:) +!REAL(KIND=JPRBT), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, ITMAX +INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE + +! LOCAL REAL SCALARS +REAL(KIND=JPRBT) :: ZKM +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+3) +REAL(KIND=JPRBT), POINTER :: PU(:,:,:),PV(:,:,:),PVOR(:,:,:),PDIV(:,:,:) + +IUS = 1 +IUE = 2*KFIELD +IVS = 2*KFIELD+1 +IVE = 4*KFIELD +IVORS = 1 +IVORE = 2*KFIELD +IDIVS = 2*KFIELD+1 +IDIVE = 4*KFIELD + +! ------------------------------------------------------------------ + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +PU => ZOA1(IUS:IUE,:,:) +PV => ZOA1(IVS:IVE,:,:) +PVOR => ZOA2(IVORS:IVORE,:,:) +PDIV => ZOA2(IDIVS:IDIVE,:,:) + +#ifdef ACCGPU +!$ACC DATA& +!$ACC& CREATE(ZN) & +!$ACC& COPY(D_MYMS,D_NUMP,R_NTMAX) & +!$ACC& COPY(F,F%RN,F%NLTN) & +!$ACC& PRESENT(ZEPSNM,PU,PV,PVOR,PDIV) +#endif +#ifdef OMPGPU +!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! +!$OMP TARGET DATA& +!$OMP& MAP(ALLOC:ZN) & +!$OMP& MAP(TO:D_MYMS,D_NUMP,R_NTMAX) & +!$OMP& MAP(TO:F,F%RN,F%NLTN) & +!$OMP& MAP(ALLOC:ZEPSNM,PU,PV,PVOR,PDIV) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +!! DEFAULT(NONE) SHARED(R_NTMAX,ZN,F) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP DEFAULT(NONE) PRESENT(R_NTMAX,ZN,F) +#endif +DO J=-1,R_NTMAX+3 + ZN(j) = F%RN(j) +ENDDO +!* 1.1 SET N=KM-1 COMPONENT TO 0 FOR U AND V + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) +!! PRIVATE(KM,IN) DEFAULT(NONE) & +!!$OMP& SHARED(D_NUMP,KFIELD,D_MYMS,F,PU,PV) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IN) DEFAULT(NONE) & +!$ACC& COPYIN(KFIELD) & +!$ACC& PRESENT(D_NUMP,D_MYMS,F,F%NLTN,PU,PV) +#endif +DO KMLOC=1,D_NUMP + DO J=1,2*KFIELD + KM = D_MYMS(KMLOC) + IN = F%NLTN(KM-1) +! IN=R_NTMAX+3-KM + PU(J,IN,KMLOC) = 0.0_JPRBT + PV(J,IN,KMLOC) = 0.0_JPRBT + ENDDO +ENDDO + +!* 1.2 COMPUTE VORTICITY AND DIVERGENCE. + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM) DEFAULT(NONE) & +!$OMP& SHARED(D_NUMP,R_NTMAX,KFIELD,D_MYMS,PVOR,PV,PU,ZN,PDIV,ZEPSNM) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM) DEFAULT(NONE) & +!$ACC& COPYIN(KFIELD) & +!$ACC& PRESENT(D_NUMP,R_NTMAX,D_MYMS,PVOR,PV,PU,ZN,PDIV,ZEPSNM) +#endif +DO KMLOC=1,D_NUMP + DO JN=0,R_NTMAX + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + KM = D_MYMS(KMLOC) + ZKM = REAL(KM,JPRBT) + IN = R_NTMAX+2-JN + + IF(KM /= 0 .AND. JN.GE.KM) THEN + PVOR(IR,IN,KMLOC) = -ZKM*PV(II,IN,KMLOC)-& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*PU(IR,IN-1,KMLOC)+& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*PU(IR,IN+1,KMLOC) + PVOR(II,IN,KMLOC) = +ZKM*PV(IR,IN,KMLOC)-& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*PU(II,IN-1,KMLOC)+& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*PU(II,IN+1,KMLOC) + PDIV(IR,IN,KMLOC) = -ZKM*PU(II,IN,KMLOC)+& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*PV(IR,IN-1,KMLOC)-& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*PV(IR,IN+1,KMLOC) + PDIV(II,IN,KMLOC) = +ZKM*PU(IR,IN,KMLOC)+& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*PV(II,IN-1,KMLOC)-& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*PV(II,IN+1,KMLOC) + ELSE + IF(KM == 0) THEN + PVOR(IR,IN,KMLOC) = -& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*PU(IR,IN-1,KMLOC)+& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*PU(IR,IN+1,KMLOC) + PDIV(IR,IN,KMLOC) = & + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*PV(IR,IN-1,KMLOC)-& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*PV(IR,IN+1,KMLOC) + ENDIF + ENDIF + ENDDO + ENDDO +ENDDO +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif +! ------------------------------------------------------------------ + +END SUBROUTINE UVTVD +END MODULE UVTVD_MOD diff --git a/src/trans/gpu/internal/uvtvdad_mod.F90 b/src/trans/gpu/internal/uvtvdad_mod.F90 new file mode 100755 index 0000000..a37d966 --- /dev/null +++ b/src/trans/gpu/internal/uvtvdad_mod.F90 @@ -0,0 +1,139 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UVTVDAD_MOD +CONTAINS +SUBROUTINE UVTVDAD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +!**** *UVTVDAD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL UVTVDAD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +!USE TPM_DISTR +! + +IMPLICIT NONE + +! DUMMY INTEGER SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +INTEGER(KIND=JPIM), INTENT(IN) :: KM + +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRBT), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRBT), INTENT(INOUT) :: PU (:,:),PV (:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, ITMAX + +! LOCAL REAL SCALARS +REAL(KIND=JPRBT) :: ZKM +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+3) + + +! ------------------------------------------------------------------ + + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM = KM +ITMAX = R%NTMAX +ZN(KM-1:ITMAX+3) = F%RN(KM-1:ITMAX+3) + +!* 1.2 COMPUTE VORTICITY AND DIVERGENCE. + +IF(KM /= 0) THEN + DO JN=KM,ITMAX + IN = ITMAX+2-JN +!DIR$ IVDEP +!OCL NOVREC + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + + PV(IN,II) = PV(IN,II)-ZKM*PVOR(IN,IR) + PU(IN-1,IR) = PU(IN-1,IR)-ZN(JN)*PEPSNM(JN+1)*PVOR(IN,IR) + PU(IN+1,IR) = PU(IN+1,IR)+ZN(JN+1)*PEPSNM(JN)*PVOR(IN,IR) + + PV(IN,IR) = PV(IN,IR)+ZKM*PVOR(IN,II) + PU(IN-1,II) = PU(IN-1,II)-ZN(JN)*PEPSNM(JN+1)*PVOR(IN,II) + PU(IN+1,II) = PU(IN+1,II)+ZN(JN+1)*PEPSNM(JN)*PVOR(IN,II) + + PU(IN,II) = PU(IN,II)-ZKM*PDIV(IN,IR) + PV(IN-1,IR) = PV(IN-1,IR)+ZN(JN)*PEPSNM(JN+1)*PDIV(IN,IR) + PV(IN+1,IR) = PV(IN+1,IR)-ZN(JN+1)*PEPSNM(JN)*PDIV(IN,IR) + + PU(IN,IR) = PU(IN,IR)+ZKM*PDIV(IN,II) + PV(IN-1,II) = PV(IN-1,II)+ZN(JN)*PEPSNM(JN+1)*PDIV(IN,II) + PV(IN+1,II) = PV(IN+1,II)-ZN(JN+1)*PEPSNM(JN)*PDIV(IN,II) + ENDDO + ENDDO +ELSE + DO JN=KM,ITMAX + IN = ITMAX+2-JN + DO J=1,KFIELD + IR = 2*J-1 + PU(IN-1,IR) = PU(IN-1,IR)-ZN(JN )*PEPSNM(JN+1)*PVOR(IN,IR) + PU(IN+1,IR) = PU(IN+1,IR)+ZN(JN+1)*PEPSNM(JN )*PVOR(IN,IR) + PV(IN-1,IR) = PV(IN-1,IR)+ZN(JN )*PEPSNM(JN+1)*PDIV(IN,IR) + PV(IN+1,IR) = PV(IN+1,IR)-ZN(JN+1)*PEPSNM(JN )*PDIV(IN,IR) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UVTVDAD +END MODULE UVTVDAD_MOD diff --git a/src/trans/gpu/internal/vd2uv_ctl_mod.F90 b/src/trans/gpu/internal/vd2uv_ctl_mod.F90 new file mode 100755 index 0000000..b875505 --- /dev/null +++ b/src/trans/gpu/internal/vd2uv_ctl_mod.F90 @@ -0,0 +1,81 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VD2UV_CTL_MOD +CONTAINS +SUBROUTINE VD2UV_CTL(KF_UV,PSPVOR,PSPDIV,PU,PV) + +!**** *VD2UV_CTL* - Control routine for going from vor/div to spectral U and V. + +! Purpose. +! -------- +! Control routine for computing spectral U (u*cos(theta)) and V + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) +! KF_UV - local number of spectral u-v fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PU(:,:) - U (out) +! PV(:,:) - V (out) + +! Method. +! ------- + +! Externals. +! ---------- + + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_DISTR ,ONLY : D + +USE VD2UV_MOD ,ONLY : VD2UV + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +REAL(KIND=JPRB),INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB),INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PU(:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PV(:,:) + +INTEGER(KIND=JPIM) :: JM,IM,ILEI2 + +! ------------------------------------------------------------------ + +CALL GSTATS(102,0) +ILEI2 = 8*KF_UV + +CALL GSTATS(1647,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL VD2UV(IM,JM,KF_UV,ILEI2,PSPVOR,PSPDIV,PU,PV) +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1647,1) +CALL GSTATS(102,1) + +! ------------------------------------------------------------------ + +END SUBROUTINE VD2UV_CTL +END MODULE VD2UV_CTL_MOD diff --git a/src/trans/gpu/internal/vd2uv_mod.F90 b/src/trans/gpu/internal/vd2uv_mod.F90 new file mode 100755 index 0000000..0806699 --- /dev/null +++ b/src/trans/gpu/internal/vd2uv_mod.F90 @@ -0,0 +1,157 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VD2UV_MOD +CONTAINS +SUBROUTINE VD2UV(KM,KMLOC,KF_UV,KLEI2,PSPVOR,PSPDIV,PU,PV) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_CONSTANTS +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +USE PREPSNM_MOD ,ONLY : PREPSNM +USE PRFI1B_MOD ,ONLY : PRFI1B +USE VDTUV_MOD ,ONLY : VDTUV + + +!**** *VD2UV* - U and V from Vor/div +! +! Purpose. +! -------- +! +!** Interface. +! ---------- +! *CALL* *VD2UV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PU(:,:) - spectral U (out) +! PV(:,:) - spectral V (out) + + +! Implicit arguments : + +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 +! +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + +REAL(KIND=JPRB) , INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PU(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PV(:,:) + +REAL(KIND=JPRB) :: ZIA(R%NLEI1,KLEI2) +REAL(KIND=JPRBT) :: ZEPSNM(0:R%NTMAX+2),ZA_R + +INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU, JGL, JFLD,ILCM +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,II,IR,INM,J +INTEGER(KIND=JPIM) :: IFIRST, ILAST, IOFF + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('VD2UV_MOD',0,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!* 1. PREPARE ZEPSNM. +! --------------- + +stop 'Error: code path not (yet) supported in GPU version' +!CALL PREPSNM(KM,KMLOC,ZEPSNM) + +! ------------------------------------------------------------------ + + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IFIRST = 1 +ILAST = 4*KF_UV + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + stop 'Error: code path not (yet) supported in GPU version' + !CALL PRFI1B(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV) + !CALL PRFI1B(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV) + + !CALL VDTUV(KM,KF_UV,ZEPSNM,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + ! & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU)) + ILCM = R%NSMAX+1-KM + IOFF = D%NASM0(KM) + ZA_R = 1.0_JPRBT/RA + DO J=1,ILCM + INM = IOFF+(ILCM-J)*2 + DO JFLD=1,KF_UV + IR = 2*(JFLD-1)+1 + II = IR+1 + PU(JFLD,INM ) = ZIA(J+2,IR+IUL-1)*ZA_R + PU(JFLD,INM+1) = ZIA(J+2,II+IUL-1)*ZA_R + PV(JFLD,INM ) = ZIA(J+2,IR+IVL-1)*ZA_R + PV(JFLD,INM+1) = ZIA(J+2,II+IVL-1)*ZA_R + ENDDO + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('VD2UV_MOD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE VD2UV +END MODULE VD2UV_MOD + + + + diff --git a/src/trans/gpu/internal/vdtuv_mod.F90 b/src/trans/gpu/internal/vdtuv_mod.F90 new file mode 100755 index 0000000..ed23ed4 --- /dev/null +++ b/src/trans/gpu/internal/vdtuv_mod.F90 @@ -0,0 +1,226 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VDTUV_MOD +CONTAINS +SUBROUTINE VDTUV(KFIELD) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R, R_NSMAX +USE TPM_FIELDS ,ONLY : F_RN, F_RLAPIN, ZEPSNM, ZIA +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS +USE TPM_GEN ,ONLY : NOUT + + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: KM, kmloc +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +!REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(1:D%NUMP,0:R%NTMAX+2) +!REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:,:),PDIV(:,:,:) +!REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:,:),PV (:,:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, ISMAX,JI + +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU +INTEGER(KIND=JPIM) :: ZKM + +! LOCAL REAL SCALARS +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+4) +REAL(KIND=JPRBT) :: ZLAPIN(-1:R%NSMAX+4) +REAL(KIND=JPRBT) :: ZZEPSNM(-1:R%NSMAX+4) +REAL(KIND=JPRBT), POINTER :: PU(:,:,:),PV(:,:,:),PVOR(:,:,:),PDIV(:,:,:) + +IVORL = 1 +IVORU = 2*KFIELD +IDIVL = 2*KFIELD+1 +IDIVU = 4*KFIELD +IUL = 4*KFIELD+1 +IUU = 6*KFIELD +IVL = 6*KFIELD+1 +IVU = 8*KFIELD + +PU => ZIA(IUL:IUU,:,:) +PV => ZIA(IVL:IVU,:,:) +PVOR => ZIA(IVORL:IVORU,:,:) +PDIV => ZIA(IDIVL:IDIVU,:,:) + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& CREATE (ZZEPSNM, ZN, ZLAPIN) & +!$ACC& COPYIN (R_NSMAX, D_MYMS,D_NUMP,F_RLAPIN,F_RN) & +!$ACC& PRESENT(ZEPSNM, PVOR, PDIV) & +!$ACC& PRESENT(PU, PV) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP (PRESENT,ALLOC:ZEPSNM, ZN, ZLAPIN) & +!$OMP& MAP (TO:R_NSMAX, D_MYMS,D_NUMP,F_RLAPIN,F_RN) & +!$OMP& MAP(PRESENT,ALLOC:ZEPSNM, PVOR, PDIV) & +!$OMP& MAP(PRESENT,ALLOC:PU, PV) +#endif + +! ------------------------------------------------------------------ + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ISMAX = R_NSMAX +DO KMLOC=1,D_NUMP + ZKM = D_MYMS(KMLOC) +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IJ) & + !$OMP& SHARED(ZKM,ISMAX,ZN,ZLAPIN,ZZEPSNM,ZEPSNM,KMLOC,F_RN,F_RLAPIN) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IJ) & + !$ACC& PRESENT(F_RN,F_RLAPIN,ZN,ZLAPIN,ZZEPSNM,ZEPSNM) & + !$ACC& COPYIN(ISMAX,ZKM,KMLOC) +#endif + DO JN=ZKM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F_RN(JN) + ZLAPIN(IJ) = F_RLAPIN(JN) + ZN(0) = F_RN(ISMAX+3) + IF( JN >= 0 ) THEN + ZZEPSNM(IJ) = ZEPSNM(KMLOC,JN) + ELSE + ZZEPSNM(IJ) = 0._JPRBT + ENDIF + ENDDO + !!! alternative !$ACC UPDATE DEVICE(ZN, ZLAPIN, ZZEPSNM) + +!* 1.1 U AND V (KM=0) . + +IF(ZKM == 0) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IR) & + !$OMP& SHARED(KFIELD,ISMAX,KMLOC,PU,ZN,ZLAPIN,PVOR,PV,PDIV,ZZEPSNM) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP DEFAULT(NONE) PRIVATE(IR) & + !$ACC& COPYIN(ZN,ZLAPIN,ZZEPSNM) & + !$ACC& PRESENT(PU,PVOR,PV,PDIV) & + !$ACC& COPYIN(KFIELD,ISMAX,KMLOC) +#endif + DO J=1,KFIELD + IR = 2*J-1 + DO JI=2,ISMAX+3 + PU(IR,JI,KMLOC) = +& + &ZN(JI+1)*ZZEPSNM(JI)*ZLAPIN(JI+1)*PVOR(IR,JI+1,KMLOC)-& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZLAPIN(JI-1)*PVOR(IR,JI-1,KMLOC) + PV(IR,JI,KMLOC) = -& + &ZN(JI+1)*ZZEPSNM(JI)*ZLAPIN(JI+1)*PDIV(IR,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZLAPIN(JI-1)*PDIV(IR,JI-1,KMLOC) + ENDDO + ENDDO +ELSE +!* 1.2 U AND V (KM!=0) . + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) PRIVATE(IR,II) & + !$OMP& SHARED(KFIELD,ZKM,ISMAX,KMLOC,PU,ZN,ZLAPIN,PVOR,PV,PDIV,ZZEPSNM) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) DEFAULT(NONE) PRIVATE(IR,II) & + !$ACC& PRESENT(PU,PVOR,PV,PDIV) & + !$ACC& COPYIN(ZN,ZLAPIN,ZZEPSNM) & + !$ACC& COPYIN(KFIELD,ZKM,ISMAX,KMLOC) +#endif + DO J=1,KFIELD + DO JI=2,ISMAX+3-ZKM + !ZKM = D_MYMS(KMLOC) + IR = 2*J-1 + II = IR+1 + !IF (ZKM>0 .AND. JI<=ISMAX+3-zKM) THEN + PU(IR,JI,KMLOC) = -ZKM*ZLAPIN(JI)*PDIV(II,JI,KMLOC)+& + &ZN(JI+1)*ZZEPSNM(JI)*ZLAPIN(JI+1)*PVOR(IR,JI+1,KMLOC)-& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZLAPIN(JI-1)*PVOR(IR,JI-1,KMLOC) + PU(II,JI,KMLOC) = +ZKM*ZLAPIN(JI)*PDIV(IR,JI,KMLOC)+& + &ZN(JI+1)*ZZEPSNM(JI)*ZLAPIN(JI+1)*PVOR(II,JI+1,KMLOC)-& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZLAPIN(JI-1)*PVOR(II,JI-1,KMLOC) + PV(IR,JI,KMLOC) = -ZKM*ZLAPIN(JI)*PVOR(II,JI,KMLOC)-& + &ZN(JI+1)*ZZEPSNM(JI)*ZLAPIN(JI+1)*PDIV(IR,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZLAPIN(JI-1)*PDIV(IR,JI-1,KMLOC) + PV(II,JI,KMLOC) = +ZKM*ZLAPIN(JI)*PVOR(IR,JI,KMLOC)-& + &ZN(JI+1)*ZZEPSNM(JI)*ZLAPIN(JI+1)*PDIV(II,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*ZLAPIN(JI-1)*PDIV(II,JI-1,KMLOC) + !ENDIF + ENDDO + ENDDO + ENDIF +ENDDO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif +! ------------------------------------------------------------------ + +END SUBROUTINE VDTUV +END MODULE VDTUV_MOD + diff --git a/src/trans/gpu/internal/vdtuvad_mod.F90 b/src/trans/gpu/internal/vdtuvad_mod.F90 new file mode 100755 index 0000000..46412ef --- /dev/null +++ b/src/trans/gpu/internal/vdtuvad_mod.F90 @@ -0,0 +1,145 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VDTUVAD_MOD +CONTAINS +SUBROUTINE VDTUVAD(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F + + +!**** *VDTUVAD* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUVAD(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(INOUT) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PU (:,:),PV (:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, ISMAX,JI + +! LOCAL REAL SCALARS +REAL(KIND=JPRBT) :: ZKM +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+4) +REAL(KIND=JPRBT) :: ZLAPIN(-1:R%NSMAX+4) +REAL(KIND=JPRBT) :: ZEPSNM(-1:R%NSMAX+4) + + + +! ------------------------------------------------------------------ + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM = KM +ISMAX = R%NSMAX +DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + ZLAPIN(IJ) = F%RLAPIN(JN) + IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) +ENDDO +ZN(0) = F%RN(ISMAX+3) + +!* 1.1 U AND V (KM=0) . + +IF(KM == 0) THEN + DO J=1,KFIELD + IR = 2*J-1 + DO JI=2,ISMAX+3-KM + PDIV(JI-1,IR) = PDIV(JI-1,IR)+ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PV(JI,IR) + PVOR(JI-1,IR) = PVOR(JI-1,IR)-ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PU(JI,IR) + PDIV(JI+1,IR) = PDIV(JI+1,IR)-ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PV(JI,IR) + PVOR(JI+1,IR) = PVOR(JI+1,IR)+ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PU(JI,IR) + ENDDO + ENDDO +!* 1.2 U AND V (KM!=0) . + +ELSE + DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JI=2,ISMAX+3-KM + PDIV(JI-1,II) = PDIV(JI-1,II)+ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PV(JI,II) + PDIV(JI-1,IR) = PDIV(JI-1,IR)+ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PV(JI,IR) + PVOR(JI-1,II) = PVOR(JI-1,II)-ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PU(JI,II) + PVOR(JI-1,IR) = PVOR(JI-1,IR)-ZN(JI-2)*ZEPSNM(JI-1)*ZLAPIN(JI-1)*PU(JI,IR) + PDIV(JI+1,II) = PDIV(JI+1,II)-ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PV(JI,II) + PDIV(JI+1,IR) = PDIV(JI+1,IR)-ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PV(JI,IR) + PVOR(JI+1,II) = PVOR(JI+1,II)+ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PU(JI,II) + PVOR(JI+1,IR) = PVOR(JI+1,IR)+ZN(JI+1)*ZEPSNM(JI)*ZLAPIN(JI+1)*PU(JI,IR) + PVOR(JI,IR) = PVOR(JI,IR)+ZKM*ZLAPIN(JI)*PV(JI,II) + PVOR(JI,II) = PVOR(JI,II)-ZKM*ZLAPIN(JI)*PV(JI,IR) + PDIV(JI,IR) = PDIV(JI,IR)+ZKM*ZLAPIN(JI)*PU(JI,II) + PDIV(JI,II) = PDIV(JI,II)-ZKM*ZLAPIN(JI)*PU(JI,IR) + ENDDO + ENDDO +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE VDTUVAD +END MODULE VDTUVAD_MOD diff --git a/src/trans/gpu/internal/write_legpol_mod.F90 b/src/trans/gpu/internal/write_legpol_mod.F90 new file mode 100755 index 0000000..06996b3 --- /dev/null +++ b/src/trans/gpu/internal/write_legpol_mod.F90 @@ -0,0 +1,229 @@ +! (C) Copyright 2015- ECMWF. +! (C) Copyright 2015- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE WRITE_LEGPOL_MOD +CONTAINS +SUBROUTINE WRITE_LEGPOL +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +USE TPM_GEN +USE TPM_DISTR +USE TPM_DIM +USE TPM_GEOMETRY +USE TPM_FLT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_CTL +USE BUTTERFLY_ALG_MOD +USE BYTES_IO_MOD + +!**** *WRITE_LEGPOL * - write out Leg.Pol. and assocciated arrays to file + +! Purpose. +! -------- +! + +!** Interface. +! ---------- +! *CALL* *WRITE_LEGPOL* + +! Explicit arguments : None +! -------------------- + +! Implicit arguments : +! -------------------- +! + +! Method. +! ------- +! See documentation + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! + +! ------- +! Mats Hamrud and Willem Deconinck *ECMWF* + +! Modifications. +! -------------- +! Original : July 2015 + +IMPLICIT NONE + +INTEGER(KIND=JPIM),PARAMETER :: JPIBUFL=4 +INTEGER(KIND=JPIM) :: IRBYTES,IIBYTES,JMLOC,IPRTRV,IMLOC,IM,ILA,ILS,IFILE,JSETV +INTEGER(KIND=JPIM) :: IDGLU,ISIZE,IBYTES,IRET,IBUF(JPIBUFL),IDUM,JGL,II +INTEGER(KIND=JPIM) :: IDGLU2 +TYPE(CLONE) :: YLCLONE +REAL(KIND=JPRBT) ,ALLOCATABLE :: ZBUF(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: IBUFA(:) +! ------------------------------------------------------------------ + +IRBYTES = 8 +IIBYTES = 4 +IDUM = 3141 + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_OPEN(IFILE,C%CLEGPOLFNAME,'W',IRET) + IF ( IRET < JPBYTES_IO_SUCCESS ) CALL ABORT_TRANS('WRITE_LEGPOL: BYTES_IO_OPEN FAILED') +ENDIF +IF( S%LUSEFLT ) THEN + IBUF(1:2) = TRANSFER('LEGPOLBF',IBUF(1:2)) +ELSE + IBUF(1:2) = TRANSFER('LEGPOL ',IBUF(1:2)) +ENDIF +IBUF(3) = R%NSMAX +IBUF(4) = R%NDGNH +CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +IF ( IRET < JPBYTES_IO_SUCCESS ) CALL ABORT_TRANS('WRITE_LEGPOL: BYTES_IO_WRITE FAILED') +ALLOCATE(IBUFA(2*R%NDGNH)) +II = 0 +DO JGL=1,R%NDGNH + II = II+1 + IBUFA(II) = G%NLOEN(JGL) + II=II+1 + IBUFA(II) = G%NMEN(JGL) +ENDDO +CALL BYTES_IO_WRITE(IFILE,IBUFA,2*R%NDGNH*IIBYTES,IRET) +IF ( IRET < JPBYTES_IO_SUCCESS ) CALL ABORT_TRANS('WRITE_LEGPOL: BYTES_IO_WRITE FAILED') +DEALLOCATE(IBUFA) +DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ + IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) + DO JSETV=1,IPRTRV + IMLOC=JMLOC+JSETV-1 + IM = D%MYMS(IMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) +! Anti-symmetric + IF( S%LUSEFLT .AND. ILA > S%ITHRESHOLD) THEN + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_A,YLCLONE) + ISIZE = SIZE(YLCLONE%COMMSBUF) + IBUF(:) = (/IDGLU,ILA,ISIZE,IDUM/) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IIBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_WRITE(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + IF(IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + ISIZE = IDGLU*ILA + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(IMLOC)%RPNMA,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + ENDIF +! Symmetric + IF( S%LUSEFLT .AND. ILS > S%ITHRESHOLD) THEN + CALL PACK_BUTTERFLY_STRUCT(S%FA(IMLOC)%YBUT_STRUCT_S,YLCLONE) + ISIZE = SIZE(YLCLONE%COMMSBUF) + IBUF(:) = (/IDGLU,ILS,ISIZE,IDUM/) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IIBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + IBYTES = ISIZE*IRBYTES + CALL BYTES_IO_WRITE(IFILE,YLCLONE%COMMSBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(YLCLONE%COMMSBUF) + ELSE + ISIZE = IDGLU*ILS + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(IMLOC)%RPNMS,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + ENDIF + ENDDO +ENDDO + +! Lat-lon grid + +IF(S%LDLL) THEN + IBUF(:) = TRANSFER('LATLON---BEG-BEG',IBUF(1:4)) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DO JMLOC=1,D%NUMP + IM = D%MYMS(JMLOC) + ILA = (R%NSMAX-IM+2)/2 + ILS = (R%NSMAX-IM+3)/2 + IDGLU = MIN(R%NDGNH,G%NDGLU(IM)) + IDGLU2 = S%NDGNHD + IBUF(:) = (/IM,IDGLU,IDGLU2,IDUM/) + CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IIBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + + ISIZE = 2*IDGLU*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(JMLOC)%RPNMWI,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + + ISIZE = 2*IDGLU2*2 + IBYTES = ISIZE*IRBYTES + ALLOCATE(ZBUF(ISIZE)) + ZBUF(:) = RESHAPE(S%FA(JMLOC)%RPNMWO,(/ISIZE/)) + CALL BYTES_IO_WRITE(IFILE,ZBUF,IBYTES,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + WRITE(0,*) 'BYTES_IO_WRITE ',IFILE,' ',IBYTES,' FAILED',IRET + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') + ENDIF + DEALLOCATE(ZBUF) + + ENDDO +ENDIF +!End marker +IBUF(:) = TRANSFER('LEGPOL---EOF-EOF',IBUF(1:4)) +CALL BYTES_IO_WRITE(IFILE,IBUF,JPIBUFL*IIBYTES,IRET) +IF( IRET < JPBYTES_IO_SUCCESS ) THEN + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_WRITE FAILED') +ENDIF + +IF(C%CIO_TYPE == 'file') THEN + CALL BYTES_IO_CLOSE(IFILE,IRET) + IF( IRET < JPBYTES_IO_SUCCESS ) THEN + CALL ABORT_TRANS('WRITE_LEGPOL:BYTES_IO_CLOSE FAILED') + ENDIF +ENDIF + +END SUBROUTINE WRITE_LEGPOL +END MODULE WRITE_LEGPOL_MOD diff --git a/src/trans/gpu/internal_reducedmem/asre1b_mod.F90 b/src/trans/gpu/internal_reducedmem/asre1b_mod.F90 new file mode 100755 index 0000000..65382a8 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/asre1b_mod.F90 @@ -0,0 +1,108 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE ASRE1B_MOD +CONTAINS +SUBROUTINE ASRE1B(KFIELD,PAOA,PSOA) + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DIM ,ONLY : R, R_NDGNH, R_NDGL +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_GEOMETRY ,ONLY : G, G_NDGLU +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1 +USE TPM_GEN ,ONLY : NOUT + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 + +! ------------------------------------------------------------------ + + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +INTEGER(KIND=JPIM) :: KM,KMLOC +REAL(KIND=JPRBT), INTENT(IN) :: PSOA(:,:,:) +REAL(KIND=JPRBT), INTENT(IN) :: PAOA(:,:,:) +!INTEGER(KIND=JPIM), INTENT(OUT) :: ISTAN(:,:) +!INTEGER(KIND=JPIM), INTENT(OUT) :: ISTAS(:,:) + +! LOCAL INTEGERS +INTEGER(KIND=JPIM) :: ISL, IGLS, JFLD, JGL ,IPROC, IPROCS, IDGNH, ISTAN, ISTAS + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(KMLOC,JGL,JFLD,KM,ISL,IPROC,IGLS,IPROCS,ISTAN,ISTAS) +DO KMLOC=1,D_NUMP + KM = D_MYMS(KMLOC) + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + DO JGL=ISL, R_NDGNH +! if (JGL .ge. ISL) then + IPROC = D_NPROCL(JGL) + ISTAN = (D_NSTAGT0B(IPROC) + D_NPNTGTB1(KMLOC,JGL))*2*KFIELD + IGLS = R_NDGL+1-JGL + IPROCS = D_NPROCL(IGLS) + ISTAS = (D_NSTAGT0B(IPROCS) + D_NPNTGTB1(KMLOC,IGLS))*2*KFIELD + DO JFLD=1,2*KFIELD + FOUBUF_IN(ISTAN+JFLD) = PAOA(JFLD,JGL,KMLOC)+PSOA(JFLD,JGL,KMLOC) + FOUBUF_IN(ISTAS+JFLD) = PSOA(JFLD,JGL,KMLOC)-PAOA(JFLD,JGL,KMLOC) + ENDDO +! end if + ENDDO +ENDDO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ + +END SUBROUTINE ASRE1B +END MODULE ASRE1B_MOD diff --git a/src/trans/gpu/internal_reducedmem/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal_reducedmem/dir_trans_ctl_mod.F90 new file mode 100755 index 0000000..42b291c --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/dir_trans_ctl_mod.F90 @@ -0,0 +1,209 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE DIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *DIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : FOUBUF_IN, NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTDIR_CTL_MOD ,ONLY : LTDIR_CTL +USE FTDIR_CTL_MOD ,ONLY : FTDIR_CTL +USE TPM_TRANS ,ONLY : ZGTF +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB + + +! ------------------------------------------------------------------ + +! Perform transform + +#ifdef OMPGPU +!$OMP TARGET +#endif +#ifdef ACCGPU +!$ACC KERNELS +#endif +ZGTF(:,:) = 0._JPRBT +#ifdef ACCGPU +!$ACC END KERNELS +#endif +#ifdef OMPGPU +!$OMP END TARGET +#endif + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ENDIF + CALL LTDIR_CTL(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC) + ENDDO +ELSE + + ! No splitting of fields, transform done in one go + + CALL FTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL LTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE DIR_TRANS_CTL +END MODULE DIR_TRANS_CTL_MOD diff --git a/src/trans/gpu/internal_reducedmem/fourier_in_mod.F90 b/src/trans/gpu/internal_reducedmem/fourier_in_mod.F90 new file mode 100755 index 0000000..6c41a66 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/fourier_in_mod.F90 @@ -0,0 +1,100 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_IN_MOD +CONTAINS +SUBROUTINE FOURIER_IN(PREEL,KFIELDS) + +!**** *FOURIER_IN* - Copy fourier data from buffer to local array + +! Purpose. +! -------- +! Routine for copying fourier data from buffer to local array + +!** Interface. +! ---------- +! CALL FOURIER_IN(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, D_NSTAGTF,D_MSTABF,D_NSTAGT0B,D_NPNTGTB0,D_NPROCM,D_NPTRLS +USE TPM_TRANS ,ONLY : FOUBUF +USE TPM_GEOMETRY ,ONLY : G, G_NMEN,G_NMEN_MAX +USE TPM_GEN ,ONLY : NOUT +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS + +INTEGER(KIND=JPIM) :: KGL + +REAL(KIND=JPRBT), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC,iimax1,iimax2,iimax3,iunit + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(KGL,JM,JF,IGLG,IPROC,ISTA) +DO KGL=IBEG,IEND,IINC + DO JM=0,G_NMEN_MAX + DO JF=1,KFIELDS + + IGLG = D_NPTRLS(MYSETW)+KGL-1 + + IF ( JM .LE. G_NMEN(IGLG)) THEN + + IPROC = D_NPROCM(JM) + ISTA = (D_NSTAGT0B(D_MSTABF(IPROC))+D_NPNTGTB0(JM,KGL))*2*KFIELDS + + PREEL(2*JF-1,2*JM+1+D_NSTAGTF(KGL)) = FOUBUF(ISTA+2*JF-1) + PREEL(2*JF, 2*JM+1+D_NSTAGTF(KGL)) = FOUBUF(ISTA+2*JF ) + !TODO (Andreas): should be able to remove the factor 2 in the second dimension (in front of jm) + !and reduce the size of the array. Will need to adapt fsc_mod accordingly! This is actually more + !difficult: d_nstagtf(kgl) is not necessarily even! + + END IF + ENDDO + ENDDO +END DO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_IN +END MODULE FOURIER_IN_MOD + diff --git a/src/trans/gpu/internal_reducedmem/fourier_out_mod.F90 b/src/trans/gpu/internal_reducedmem/fourier_out_mod.F90 new file mode 100755 index 0000000..5fdd067 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/fourier_out_mod.F90 @@ -0,0 +1,97 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FOURIER_OUT_MOD +CONTAINS +SUBROUTINE FOURIER_OUT(KFIELDS) + +!**** *FOURIER_OUT* - Copy fourier data from local array to buffer + +! Purpose. +! -------- +! Routine for copying fourier data from local array to buffer + +!** Interface. +! ---------- +! CALL FOURIER_OUT(...) + +! Explicit arguments : PREEL - local fourier/GP array +! -------------------- KFIELDS - number of fields +! +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, D_NPTRLS,D_NSTAGTF,D_MSTABF,D_NSTAGT1B,D_NPNTGTB0,D_NPROCM, D_NPROCL +USE TPM_TRANS ,ONLY : FOUBUF_IN, ZGTFTMP +USE TPM_GEOMETRY ,ONLY : G, G_NMEN,G_NMEN_MAX +! + +IMPLICIT NONE + +!REAL(KIND=JPRBT), INTENT(IN) :: PREEL(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM) :: KGL + +INTEGER(KIND=JPIM) :: JM,JF,IGLG,IPROC,IR,II,ISTA, ISTA1,JMMAX, iunit + +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC, IOFF,iimax1,iimax2,iimax3 + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(KGL,JM,JF,IGLG,JMMAX,IPROC,ISTA,IOFF) +DO KGL=IBEG,IEND,IINC + DO JM=0,G_NMEN_MAX + DO JF=1,KFIELDS + + IGLG = D_NPTRLS(MYSETW)+KGL-1 + JMMAX = G_NMEN(IGLG) + IF (JM .LE. JMMAX) THEN + + IPROC = D_NPROCM(JM) + ISTA = (D_NSTAGT1B(D_MSTABF(IPROC))+D_NPNTGTB0(JM,KGL))*2*KFIELDS + IOFF = 1+D_NSTAGTF(KGL) + + ! imaginary may be not JM+1 but JM+G_NMEN(IGLG)+1 + FOUBUF_IN(ISTA+2*JF-1) = ZGTFTMP(2*JF-1, 2*JM+IOFF) + FOUBUF_IN(ISTA+2*JF ) = ZGTFTMP(2*JF , 2*JM+IOFF) + + END IF + ENDDO + ENDDO +END DO +!$OMP END PARALLEL DO + +! ------------------------------------------------------------------ + +END SUBROUTINE FOURIER_OUT +END MODULE FOURIER_OUT_MOD + diff --git a/src/trans/gpu/internal_reducedmem/ftdir_ctl_mod.F90 b/src/trans/gpu/internal_reducedmem/ftdir_ctl_mod.F90 new file mode 100755 index 0000000..ef0c72d --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ftdir_ctl_mod.F90 @@ -0,0 +1,222 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_CTL_MOD +CONTAINS +SUBROUTINE FTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTDIR_CTL - Direct Fourier transform control + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTL(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NOUT +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : ZGTF, ZGTFTMP +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE TRGTOL_MOD ,ONLY : TRGTOL, TRGTOL_CUDAAWARE +USE FOURIER_OUT_MOD ,ONLY : FOURIER_OUT +USE FTDIR_MOD ,ONLY : FTDIR +use ieee_arithmetic +! + +IMPLICIT NONE + + +INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart +END INTERFACE + +INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop +END INTERFACE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! Local variables +!REAL(KIND=JPRBT),ALLOCATABLE :: ZGTF(:,:) + +INTEGER(KIND=JPIM) :: IST,JGL,IGL,JF_FS +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC +INTEGER(KIND=JPIM) :: ISIZE,IFIELDS,ICHUNK,ICHUNKS,JK + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +!call cudaProfilerStart() + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +! Transposition + +CALL GSTATS(158,0) + +! needed ??? JF_FS=KF_FS-D%IADJUST_D +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) + +CALL GSTATS(158,1) +CALL GSTATS(106,0) + +! Fourier transform + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +!write(301,*) 'sizey: ', myproc, size(zgtf,1), KF_FS + +CALL GSTATS(1640,0) +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +IF(KF_FS>0) THEN + ! TRY THIS IN CHUNKS, ISIZE is even, need equal and even chunks too + ISIZE=size(zgtf,1) + !ICHUNKS=2 + !ICHUNK=ISIZE/ICHUNKS + !ICHUNK=ICHUNK+MOD(ICHUNK,2) + !DO JK=ICHUNKS,1,-1 + ! repeat some fields to have constant chunk size + !IOFF=MAX(1,ISIZE-(ICHUNKS-JK+1)*ICHUNK+1) + IOFF=1 + !ICHUNK=2*KF_FS+2 + ICHUNK=ISIZE + CALL FTDIR(ICHUNK) + !ENDDO +#ifdef ACCGPU + !$ACC UPDATE HOST(ZGTFTMP) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(ZGTFTMP) +#endif +ENDIF + +! Save Fourier data transfer from ZGTFTMP to FOUBUF_IN + CALL FOURIER_OUT(KF_FS) +!!$OMP END PARALLEL DO +CALL GSTATS(1640,1) +CALL GSTATS(106,1) +! ------------------------------------------------------------------ +!call cudaProfilerStop() +END SUBROUTINE FTDIR_CTL +END MODULE FTDIR_CTL_MOD + diff --git a/src/trans/gpu/internal_reducedmem/ftdir_mod.F90 b/src/trans/gpu/internal_reducedmem/ftdir_mod.F90 new file mode 100755 index 0000000..7cbe710 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ftdir_mod.F90 @@ -0,0 +1,169 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTDIR_MOD +CONTAINS +SUBROUTINE FTDIR(KFIELDS) + + +!**** *FTDIR - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW + +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRB, JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC,D_NSTAGTF,D_NPTRLS +USE TPM_TRANS ,ONLY : ZGTF, ZGTFTMP +USE TPM_GEOMETRY ,ONLY : G,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX +USE TPM_FFT ,ONLY : T +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, EXECUTE_PLAN_FFT +USE TPM_DIM ,ONLY : R,R_NNOEXTZL +USE CUDA_DEVICE_MOD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM) :: KGL +!!!REAL(KIND=JPRBT), INTENT(INOUT) :: PREEL(KFIELDS,D%NLENGTF) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,IST1 +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE +INTEGER(KIND=JPIM) :: IPLAN_R2C +INTEGER(KIND=JPIM) :: JMAX +REAL(KIND=JPRBT) :: SCAL +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time + +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC,ISCAL +INTEGER(KIND=JPIM) :: OFFSET_VAR, IUNIT, ISIZE, II, IMAX +integer :: istat, idev + +! ------------------------------------------------------------------ + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +OFFSET_VAR=D_NPTRLS(MYSETW) + +IMAX = G_NLOEN_MAX + 2 + R_NNOEXTZL + +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(KGL,IOFF,IGLG,IPLAN_R2C,istat) +DO KGL=IBEG,IEND,IINC + + IOFF=D%NSTAGTF(KGL)+1 + IGLG = D%NPTRLS(MYSETW)+KGL-1 + !ILEN = G_NLOEN(IGLG)+R_NNOEXTZL+3-IST + !IRLEN=G_NLOEN(IGLG)+R_NNOEXTZL + !ICLEN=(IRLEN/2+1)*2 + + !istat = cuda_SetDevice(idev) + CALL CREATE_PLAN_FFT(IPLAN_R2C,-1,G%NLOEN(IGLG),KFIELDS) +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZGTF, ZGTFTMP) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZGTF, ZGTFTMP) +#endif + CALL EXECUTE_PLAN_FFT(-1,G%NLOEN(IGLG),C_LOC(ZGTF(1,IOFF)),C_LOC(ZGTFTMP(1,IOFF)),IPLAN_R2C) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif +END DO +!!$OMP END PARALLEL DO + +ISTAT = CUDA_SYNCHRONIZE() + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& COPY(D,D_NSTAGTF,D_NPTRLS,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX,R_NNOEXTZL) & +!$ACC& PRESENT(ZGTFTMP) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP(TO:D,D_NSTAGTF,D_NPTRLS,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX,R_NNOEXTZL) & +!$OMP& MAP(ALLOC:ZGTFTMP) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(JMAX,KGL,IOFF,SCAL,IST) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(JMAX,KGL,IOFF,SCAL,IST) +#endif +DO IGLG=IBEG+OFFSET_VAR-1,IEND+OFFSET_VAR-1,IINC + DO JJ=1, IMAX + DO JF=1,KFIELDS + JMAX = G_NLOEN(IGLG) + IST = 2*(G_NMEN(IGLG)+1) + IF (JJ .LE. JMAX) THEN + KGL=IGLG-OFFSET_VAR+1 + IOFF=D_NSTAGTF(KGL)+1 + SCAL = 1._JPRBT/REAL(G_NLOEN(IGLG),JPRBT) + ZGTFTMP(JF,IOFF+JJ-1)= SCAL * ZGTFTMP(JF, IOFF+JJ-1) + END IF + + ! case JJ>0 + IF( JJ .le. (JMAX+R_NNOEXTZL+2-IST)) ZGTFTMP(JF,IST+IOFF+JJ-1) = 0.0_JPRBT + ! case JJ=0 + IF (G_NLOEN(IGLG)==1) ZGTFTMP(JF,IST+IOFF-1) = 0.0_JPRBT + ENDDO + ENDDO +ENDDO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE FTDIR +END MODULE FTDIR_MOD diff --git a/src/trans/gpu/internal_reducedmem/ftinv_ctl_mod.F90 b/src/trans/gpu/internal_reducedmem/ftinv_ctl_mod.F90 new file mode 100755 index 0000000..1d5da29 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ftinv_ctl_mod.F90 @@ -0,0 +1,276 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_CTL_MOD +CONTAINS +SUBROUTINE FTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *FTINV_CTL - Inverse Fourier transform control + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV_CTL(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +!USE TPM_GEOMETRY +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP,LATLON, ZGTF, ZGTFTMP +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC +USE TPM_FLT ,ONLY : S +USE FOURIER_IN_MOD ,ONLY : FOURIER_IN +USE FSC_MOD ,ONLY : FSC +USE FTINV_MOD ,ONLY : FTINV +USE TRLTOG_MOD ,ONLY : TRLTOG, TRLTOG_CUDAAWARE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE IEEE_ARITHMETIC +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC + +INTEGER(KIND=JPIM) :: IST_UV, IST_SC, IST_NSDERS, IST_UVDERS, IST_EWDERS, JF_FS + +IST_UV = 1 +IST_SC = 1 +IST_NSDERS = 1 +IST_UVDERS = 1 +IST_EWDERS = 1 + +! ------------------------------------------------------------------ + +! 1. Copy Fourier data to local array + +CALL GSTATS(107,0) + +IF (KF_UV > 0 .OR. KF_SCDERS > 0 .OR. (LATLON.AND.S%LDLL) ) THEN + IST = 1 + IF (LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF (LDIVGP) THEN + IST = IST+KF_UV + ENDIF + IST_UV = IST + IST = IST+2*KF_UV + IST_SC = IST + IST = IST+KF_SCALARS + IST_NSDERS = IST + IST = IST+KF_SCDERS + IF (LUVDER) THEN + IST_UVDERS = IST + IST = IST+2*KF_UV + ENDIF + IF (KF_SCDERS > 0) THEN + IST_EWDERS = IST + ENDIF +ENDIF +IF (MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +CALL GSTATS(1639,0) +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +CALL FOURIER_IN(ZGTF,KF_OUT_LT) ! COPIES DATA FROM FOUBUF +#ifdef ACCGPU +!$ACC UPDATE DEVICE(ZGTF) +#endif +#ifdef OMPGPU +!$OMP TARGET UPDATE TO(ZGTF) +#endif + +! 2. Fourier space computations + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL FSC(KF_UV,KF_SCALARS,KF_SCDERS,IST_UV,IST_SC,IST_NSDERS,IST_EWDERS,IST_UVDERS) +ENDIF + +! 3. Fourier transform +IF(KF_FS > 0) THEN + CALL FTINV(ZGTF,ZGTFTMP,size(zgtf,1)) +#ifdef ACCGPU + !$ACC UPDATE HOST(ZGTFTMP) ASYNC(1) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(ZGTFTMP) NOWAIT +#endif +ENDIF + +CALL GSTATS(1639,1) + +CALL GSTATS(107,1) + +! 4. Transposition + +IF (PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:)=-1 +IF (PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF (PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF (PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF (LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF (PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF (LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF (IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTL_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF (KF_UV_G > 0) THEN + IF (LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF ( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF (KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF (LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF (KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF + +#ifdef ACCGPU +!$ACC WAIT(1) +#endif +#ifdef OMPGPU +!$OMP BARRIER +#endif + +CALL GSTATS(157,0) +JF_FS=KF_FS-D%IADJUST_I +!WRITE(NOUT,*) 'ftinv_ctl:TRLTOG' +CALL TRLTOG(ZGTFTMP,JF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(157,1) +! ------------------------------------------------------------------ + +!DEALLOCATE(ZGTF) + +END SUBROUTINE FTINV_CTL +END MODULE FTINV_CTL_MOD diff --git a/src/trans/gpu/internal_reducedmem/ftinv_mod.F90 b/src/trans/gpu/internal_reducedmem/ftinv_mod.F90 new file mode 100755 index 0000000..b17e977 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ftinv_mod.F90 @@ -0,0 +1,161 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE FTINV_MOD +CONTAINS +SUBROUTINE FTINV(PREELIN,PREELOUT,KFIELDS) + +!**** *FTINV - Inverse Fourier transform + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL FTINV(..) + +! Explicit arguments : PREELTMP - in Fourier/grid-point array +! PREEL - out Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-04-24 : 2D model (NLOEN=1) +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! G. Mozdzynski (Oct 2014): support for FFTW transforms +! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + +USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC +USE TPM_GEOMETRY ,ONLY : G +USE TPM_GEN ,ONLY : NOUT +USE TPM_FFT ,ONLY : T +USE TPM_FFTH ,ONLY : CREATE_PLAN_FFT, DESTROY_PLAN_FFT, EXECUTE_PLAN_FFT +USE TPM_DIM ,ONLY : R +USE CUDA_DEVICE_MOD + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM) :: KGL +REAL(KIND=JPRBT), INTENT(INOUT) :: PREELIN(:,:) +REAL(KIND=JPRBT), INTENT(OUT) :: PREELOUT(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,IST1 +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE +LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time +INTEGER(KIND=JPIM) :: IPLAN_C2R +INTEGER(KIND=JPIM) :: IBEG,IEND,IINC,ISIZE +INTEGER :: ISTAT,IDEV + +! ------------------------------------------------------------------ + + + +IF(MYPROC > NPROC/2)THEN + IBEG=1 + IEND=D%NDGL_FS + IINC=1 +ELSE + IBEG=D%NDGL_FS + IEND=1 + IINC=-1 +ENDIF + +ISIZE=size(PREELIN,1) + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& COPYIN(D,D%NSTAGTF,D%NPTRLS,G%NMEN,G%NLOEN,R,R%NNOEXTZL) & +!$ACC& COPYIN(D%NSTAGTF,D%NPTRLS,G%NMEN,G%NLOEN,R%NNOEXTZL) & +!$ACC& PRESENT(PREELIN) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP(TO:D,D%NSTAGTF,D%NPTRLS,G%NMEN,G%NLOEN,R,R%NNOEXTZL) & +!$OMP& MAP(TO:D%NSTAGTF,D%NPTRLS,G%NMEN,G%NLOEN,R%NNOEXTZL) & +!$OMP& MAP(ALLOC:PREELIN) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP +#endif +DO KGL=IBEG,IEND,IINC + + IOFF = D%NSTAGTF(KGL)+1 + IGLG = D%NPTRLS(MYSETW)+KGL-1 + IST = 2*(G%NMEN(IGLG)+1) + ILEN = G%NLOEN(IGLG)+R%NNOEXTZL+2-IST + IST1=1 + IF (G%NLOEN(IGLG)==1) IST1=0 + +#ifdef ACCGPU + !$ACC LOOP COLLAPSE(2) +#endif + !!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + DO JJ=IST1,ILEN + DO JF=1,KFIELDS + PREELIN(JF,IST+IOFF+JJ-1) = 0.0_JPRBT + ENDDO + ENDDO + +END DO +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +!!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(ISTAT,KGL,IOFF,IGLG,IPLAN_C2R) +DO KGL=IBEG,IEND,IINC + IOFF=D%NSTAGTF(KGL)+1 + IGLG = D%NPTRLS(MYSETW)+KGL-1 + !IF (G%NLOEN(IGLG)>1) THEN + CALL CREATE_PLAN_FFT(IPLAN_C2R,1,G%NLOEN(IGLG),KFIELDS) +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(PREELIN,PREELOUT) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(PREELIN,PREELOUT) +#endif + CALL EXECUTE_PLAN_FFT(1,G%NLOEN(IGLG),C_LOC(PREELIN(1, IOFF)),C_LOC(PREELOUT(1, IOFF)),IPLAN_C2R) +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + !ENDIF +END DO +!!$OMP END PARALLEL DO +ISTAT = CUDA_SYNCHRONIZE() + +! ------------------------------------------------------------------ + +END SUBROUTINE FTINV +END MODULE FTINV_MOD diff --git a/src/trans/gpu/internal_reducedmem/inv_trans_ctl_mod.F90 b/src/trans/gpu/internal_reducedmem/inv_trans_ctl_mod.F90 new file mode 100755 index 0000000..a6b4555 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/inv_trans_ctl_mod.F90 @@ -0,0 +1,298 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE INV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE LTINV_CTL_MOD ,ONLY : LTINV_CTL +USE FTINV_CTL_MOD ,ONLY : FTINV_CTL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB + +! ------------------------------------------------------------------ + +! Perform transform + +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC,& + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL LTINV_CTL(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,FSPGL_PROC=FSPGL_PROC) + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL FTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + CALL LTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + &PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + &PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + &FSPGL_PROC=FSPGL_PROC) + + CALL FTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE INV_TRANS_CTL +END MODULE INV_TRANS_CTL_MOD diff --git a/src/trans/gpu/internal_reducedmem/leinv_mod.F90 b/src/trans/gpu/internal_reducedmem/leinv_mod.F90 new file mode 100755 index 0000000..4a26a90 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/leinv_mod.F90 @@ -0,0 +1,327 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LEINV_MOD +CONTAINS + SUBROUTINE LEINV(KFC,KF_OUT_LT,PIA) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! Implicit arguments : None. +! -------------------- + +! Method. use butterfly or dgemm +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Nils Wedi + Mats Hamrud + George Modzynski +! +! Modifications. +! -------------- +! J.Hague : Oct 2012 DR_HOOK round calls to DGEMM: +! F. Vana 05-Mar-2015 Support for single precision +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPIB, JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R, R_NDGNH,R_NSMAX +USE TPM_GEOMETRY ,ONLY : G, G_NDGLU +!!USE TPM_FIELDS ,ONLY : F, ZIA, & +USE TPM_FIELDS ,ONLY : ZSOA1,ZAOA1, & + & ZAA,ZAS,LDZAA,LDZAS,TDZAA,TDZAS,& + & IZBS,ILDZBA,ILDZBS,ITDZBA,ITDZBS,& + & IZCS,IZCST,ILDZCA,ILDZCS,ITDZCA,ITDZCS,& + & TDZAS,IF_FS_INV + +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS + ! +USE TPM_FLT +USE TPM_GEN ,ONLY : NOUT ! Fpr nout + +USE CUDA_GEMM_BATCHED_MOD +USE CUDA_DEVICE_MOD + +USE OPENACC +USE ISO_C_BINDING +USE IEEE_ARITHMETIC + +IMPLICIT NONE + + INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart + END INTERFACE + + INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop + END INTERFACE + + +INTEGER(KIND=JPIM) :: KM +INTEGER(KIND=JPIM) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM) :: KIFC +INTEGER(KIND=JPIM) :: KDGLU +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(IN) :: PIA(:,:,:) +!REAL(KIND=JPRB), INTENT(OUT) :: PSOA1(:,:,:) +!REAL(KIND=JPRB), INTENT(OUT) :: PAOA1(:,:,:) + +! LOCAL +INTEGER(KIND=JPIM) :: IA, ILA, ILS, IS, ISKIP, ISL, J1, IF, JGL,JK, J,JI, IRET +INTEGER(KIND=JPIM) :: ITHRESHOLD + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +!* 1.1 PREPARATIONS. +IF (LHOOK) CALL DR_HOOK('LE_DGEMM',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +!* 1.1 PREPARATIONS. + + +#ifdef ACCGPU +!$ACC DATA COPYIN (S,S%ITHRESHOLD,S%LUSEFLT) & +!$ACC& COPYIN (D,D_MYMS,R,G,G_NDGLU,D_NUMP,R_NDGNH,R_NSMAX) & +!$ACC& PRESENT (ZAA,ZAS) & +!$ACC& PRESENT (IZBS,IZCST) & +!$ACC& PRESENT (ZSOA1,ZAOA1) & +!$ACC& PRESENT (PIA) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP (TO:S,S%ITHRESHOLD,S%LUSEFLT) & +!$OMP& MAP (TO:D,D_MYMS,R,G,G_NDGLU,D_NUMP,R_NDGNH,R_NSMAX) & +!$OMP& MAP (ALLOC:ZAA,ZAS) & +!$OMP& MAP (ALLOC:IZBS,IZCST) & +!$OMP& MAP (ALLOC:ZSOA1,ZAOA1) & +!$OMP& MAP (ALLOC:PIA) +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM) +#endif +DO KMLOC=1,D_NUMP + DO JGL=1,R_NDGNH + DO J1=2,KFC,2 + KM = D_MYMS(KMLOC) + IF(KM == 0)THEN + ZSOA1(J1,JGL,KMLOC) = 0.0_JPRBT + ZAOA1(J1,JGL,KMLOC) = 0.0_JPRBT + END IF + ENDDO + ENDDO +END DO + + ! 1. +++++++++++++ anti-symmetric + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,IA) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ISKIP,ILA,IA) +#endif +DO KMLOC=1,D_NUMP + DO J=1,(R_NSMAX+2)/2 + DO JK=1,KFC + + KM = D_MYMS(KMLOC) + IF (KM == 0) THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILA = (R_NSMAX-KM+2)/2 + IF (J .LE. ILA) THEN + IA = 1+MOD(R_NSMAX-KM+2,2) + IZBS((JK-1)/ISKIP+1+(J-1)*ITDZBA+(KMLOC-1)*ILDZBA*ITDZBA)=PIA(JK,IA+1+(J-1)*2,KMLOC) + ENDIF + ENDIF + ENDDO + ENDDO +ENDDO + +ITHRESHOLD=S%ITHRESHOLD + +! operate on full arrays, where non-relavent entries have been set to zero +! CALL HIP_DGEMM_BATCHED('N','N',LDZAA,TDZBA,TDZAA,1.0_JPRB,ZAA,LDZAA,TDZAA,ZBA,LDZBA,TDZBA,0._JPRB,ZCA,LDZCA,TDZCA,D_NUMP) +! Get C in transpose format to get better memory access patterns later +!C=A*B => +! C^T=B^T*A^T + + +! OVERLOADED FOR SINGLE AND DOUBLE PRECISION +#ifdef ACCGPU +!$ACC HOST_DATA USE_DEVICE(ZAA,IZBS,IZCST) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA USE_DEVICE_PTR(ZAA,IZBS,IZCST) +#endif +CALL HIP_GEMM_BATCHED('N','T',ITDZCA,ILDZCA,ILDZBA,1.0_JPRB,IZBS,ITDZBA,ILDZBA,& + & ZAA,LDZAA,TDZAA,0._JPRB,IZCST,ITDZCA,ILDZCA,D_NUMP) +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END HOST_DATA +#endif + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) +#endif +DO KMLOC=1,D_NUMP + DO JI=1,R_NDGNH + DO JK=1,KFC + KM = D_MYMS(KMLOC) + KDGLU = MIN(R_NDGNH,G_NDGLU(KM)) + IF (JI .LE. KDGLU) then + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + END IF + + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ZAOA1(JK,ISL+JI-1,KMLOC) = IZCST((JK-1)/ISKIP+1+(JI-1)*ITDZCA+(KMLOC-1)*ILDZCA*ITDZCA) + END IF + END IF + ENDDO + ENDDO +END DO + +! 2. +++++++++++++ symmetric + +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ISKIP,ILS,IS) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,ISKIP,ILS,IS) +#endif +DO KMLOC=1,D_NUMP + DO J=1,(R_NSMAX+3)/2 + DO JK=1,KFC + KM = D_MYMS(KMLOC) + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + ENDIF + + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ILS = (R_NSMAX-KM+3)/2 + IF (J .LE. ILS) THEN + IS = 1+MOD(R_NSMAX-KM+1,2) + IZBS((JK-1)/ISKIP+1+(J-1)*ITDZBS+(KMLOC-1)*ILDZBS*ITDZBS)=PIA(JK,IS+1+(J-1)*2,KMLOC) + END IF + END IF + ENDDO + ENDDO +ENDDO + +!C=A*B => +! C^T=B^T*A^T + +#ifdef ACCGPU +!$ACC HOST_DATA USE_DEVICE(ZAS,IZBS,IZCST) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA USE_DEVICE_PTR(ZAS,IZBS,IZCST) +#endif +CALL HIP_GEMM_BATCHED('N','T',ITDZCS,ILDZCS,ILDZBS,1.0_JPRB,IZBS,ITDZBS,ILDZBS,& + & ZAS,LDZAS,TDZAS,0._JPRB,IZCST,ITDZCS,ILDZCS,D_NUMP) +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END HOST_DATA +#endif + +!!istat = cuda_Synchronize() +#ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) +#endif +#ifdef ACCGPU +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,KDGLU,ISKIP,ISL) +#endif +DO KMLOC=1,D_NUMP + DO JI=1,R_NDGNH + DO JK=1,KFC + KM = D_MYMS(KMLOC) + KDGLU = MIN(R_NDGNH,G_NDGLU(KM)) + IF (JI .LE. KDGLU) then + IF(KM == 0)THEN + ISKIP = 2 + ELSE + ISKIP = 1 + END IF + + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + IF (MOD((JK-1),ISKIP) .EQ. 0) THEN + ZSOA1(JK,ISL+JI-1,KMLOC) = IZCST((JK-1)/ISKIP+1+(JI-1)*ITDZCS+(KMLOC-1)*ITDZCS*ILDZCS) + END IF + END IF + ENDDO + ENDDO +END DO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +IF (LHOOK) CALL DR_HOOK('LE_DGEMM',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE LEINV +END MODULE LEINV_MOD diff --git a/src/trans/gpu/internal_reducedmem/ltdir_ctl_mod.F90 b/src/trans/gpu/internal_reducedmem/ltdir_ctl_mod.F90 new file mode 100755 index 0000000..97f699c --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ltdir_ctl_mod.F90 @@ -0,0 +1,98 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_CTL_MOD + CONTAINS + SUBROUTINE LTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + + !**** *LTDIR_CTL* - Control routine for direct Legendre transform + + ! Purpose. + ! -------- + ! Direct Legendre transform + + !** Interface. + ! ---------- + ! CALL LTDIR_CTL(...) + + ! Explicit arguments : + ! -------------------- + ! KF_FS - number of fields in Fourier space + ! KF_UV - local number of spectral u-v fields + ! KF_SCALARS - local number of scalar spectral fields + ! PSPVOR(:,:) - spectral vorticity (output) + ! PSPDIV(:,:) - spectral divergence (output) + ! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + ! KFLDPTRUV(:) - field pointer for vorticity and divergence (input) + ! KFLDPTRSC(:) - field pointer for scalarvalued fields (input) + + ! ------------------------------------------------------------------ + + USE PARKIND1 ,ONLY : JPIM ,JPRB + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DIM ,ONLY : R + USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN + USE TPM_DISTR ,ONLY : D + USE TPM_GEOMETRY ,ONLY : G + USE TPM_FIELDS ,ONLY : F + + + USE LTDIR_MOD ,ONLY : LTDIR + USE TRLTOM_MOD ,ONLY : TRLTOM, TRLTOM_CUDAAWARE + + USE TPM_FIELDS ,ONLY : ZSIA,ZAIA,ZOA1,ZEPSNM + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 + + ! Transposition from Fourier space distribution to spectral space distribution + ! requires currently both on the host !!! + + IBLEN = D%NLENGT0B*2*KF_FS + CALL GSTATS(153,0) + CALL TRLTOM(FOUBUF_IN,FOUBUF,2*KF_FS) + CALL GSTATS(153,1) + + ! Direct Legendre transform + + CALL GSTATS(103,0) + ILED2 = 2*KF_FS + CALL GSTATS(1645,0) + IF(KF_FS>0) THEN + + CALL LTDIR(KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + ENDIF + CALL GSTATS(1645,1) + + CALL GSTATS(103,1) + + ! ----------------------------------------------------------------- + + END SUBROUTINE LTDIR_CTL + END MODULE LTDIR_CTL_MOD diff --git a/src/trans/gpu/internal_reducedmem/ltdir_mod.F90 b/src/trans/gpu/internal_reducedmem/ltdir_mod.F90 new file mode 100755 index 0000000..52346f0 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ltdir_mod.F90 @@ -0,0 +1,223 @@ +! (C) Copyright 1987- ECMWF. +! (C) Copyright 1987- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTDIR_MOD + CONTAINS + SUBROUTINE LTDIR(KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC) + + + USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPRB + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE TPM_DIM ,ONLY : R + USE TPM_DISTR ,ONLY : D + USE TPM_GEOMETRY + + !USE PREPSNM_MOD ,ONLY : PREPSNM + USE PRFI2B_MOD ,ONLY : PRFI2B + USE LDFOU2_MOD ,ONLY : LDFOU2 + USE LEDIR_MOD ,ONLY : LEDIR + USE UVTVD_MOD + USE UPDSP_MOD ,ONLY : UPDSP + + USE TPM_FIELDS ,ONLY : ZAIA,ZOA1,ZOA2,ZEPSNM + USE CUDA_DEVICE_MOD + + !**** *LTDIR* - Control of Direct Legendre transform step + + ! Purpose. + ! -------- + ! Tranform from Fourier space to spectral space, compute + ! vorticity and divergence. + + !** Interface. + ! ---------- + ! *CALL* *LTDIR(...)* + + ! Explicit arguments : + ! -------------------- KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + + ! Implicit arguments : None + ! -------------------- + + ! Method. + ! ------- + + ! Externals. + ! ---------- + ! PREPSNM - prepare REPSNM for wavenumber KM + ! PRFI2 - prepares the Fourier work arrays for model variables. + ! LDFOU2 - computations in Fourier space + ! LEDIR - direct Legendre transform + ! UVTVD - + ! UPDSP - updating of spectral arrays (fields) + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 87-11-24 + ! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite + ! for uv formulation + ! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies + ! Modified 93-11-18 M. Hamrud - use only one Fourier buffer + ! Modified 94-04-06 R. El khatib Full-POS implementation + ! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div + ! instead of u,v->vor,div + ! MPP Group : 95-10-01 Support for Distributed Memory version + ! K. YESSAD (AUGUST 1996): + ! - Legendre transforms for transmission coefficients. + ! Modified : 04/06/99 D.Salmond : change order of AIA and SIA + ! R. El Khatib 12-Jul-2012 LDSPC2 replaced by UVTVD + ! ------------------------------------------------------------------ + + IMPLICIT NONE + + INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart + END INTERFACE + + INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop + END INTERFACE + + + + ! DUMMY INTEGER SCALARS + INTEGER(KIND=JPIM) :: KM + INTEGER(KIND=JPIM) :: KMLOC + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IFC, IIFC, IDGLU + INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE, ISTAT + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + + !call cudaProfilerStart + + ! ------------------------------------------------------------------ + IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',0,ZHOOK_HANDLE) + + ! ------------------------------------------------------------------ + + !* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM + ! -------------------------------------- + + + ! ------------------------------------------------------------------ + + !* 2. PREPARE WORK ARRAYS. + ! -------------------- + + ! serial to save memory, Nils + + ! anti-symmetric + + CALL PRFI2B(KF_FS,ZAIA,-1) +#ifdef ACCGPU + !$ACC UPDATE DEVICE(ZAIA) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE TO(ZAIA) +#endif + CALL LDFOU2(KF_UV,ZAIA) + CALL LEDIR(KF_FS,KLED2,ZAIA,ZOA1,-1) + ISTAT = CUDA_SYNCHRONIZE() + + ! symmetric + + CALL PRFI2B(KF_FS,ZAIA,1) +#ifdef ACCGPU + !$ACC UPDATE DEVICE(ZAIA) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE TO(ZAIA) +#endif + CALL LDFOU2(KF_UV,ZAIA) + CALL LEDIR(KF_FS,KLED2,ZAIA,ZOA1,1) + ISTAT = CUDA_SYNCHRONIZE() + ! ------------------------------------------------------------------ + + !* 5. COMPUTE VORTICITY AND DIVERGENCE. + ! --------------------------------- + + IF( KF_UV > 0 ) THEN + !stop 'Error: code path not (yet) supported in GPU version' + + !!CALL PREPSNM + + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL UVTVD(KF_UV) + ! CALL UVTVD(KF_UV,ZEPSNM,ZOA1(IUS:IUE,:,:),ZOA1(IVS:IVE,:,:),& +! & ZOA2(IVORS:IVORE,:,:),ZOA2(IDIVS:IDIVE,:,:)) + ENDIF + ! ------------------------------------------------------------------ + + !* 6. UPDATE SPECTRAL ARRAYS. + ! ----------------------- + + !end loop over wavenumber + + !END DO + + !loop over wavenumber + !DO KMLOC=1,D%NUMP + ! KM = D%MYMS(KMLOC) + + ! this is on the host, so need to cp from device, Nils + CALL UPDSP(KF_UV,KF_SCALARS,ZOA1,ZOA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + + ! ------------------------------------------------------------------ + + IF (LHOOK) CALL DR_HOOK('LTDIR_MOD',1,ZHOOK_HANDLE) + + !end loop over wavenumber + !END DO + + !call cudaProfilerStop + END SUBROUTINE LTDIR +END MODULE LTDIR_MOD diff --git a/src/trans/gpu/internal_reducedmem/ltinv_ctl_mod.F90 b/src/trans/gpu/internal_reducedmem/ltinv_ctl_mod.F90 new file mode 100755 index 0000000..e171c71 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ltinv_ctl_mod.F90 @@ -0,0 +1,113 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_CTL_MOD + CONTAINS + SUBROUTINE LTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + + !**** *LTINV_CTL* - Control routine for inverse Legandre transform. + + ! Purpose. + ! -------- + ! Control routine for the inverse LEGENDRE transform + + !** Interface. + ! ---------- + ! CALL INV_TRANS_CTL(...) + ! KF_OUT_LT - number of fields coming out from inverse LT + ! KF_UV - local number of spectral u-v fields + ! KF_SCALARS - local number of scalar spectral fields + ! KF_SCDERS - local number of derivatives of scalar spectral fields + ! PSPVOR(:,:) - spectral vorticity (input) + ! PSPDIV(:,:) - spectral divergence (input) + ! PSPSCALAR(:,:) - spectral scalarvalued fields (input) + ! KFLDPTRUV(:) - field pointer array for vor./div. + ! KFLDPTRSC(:) - field pointer array for PSPSCALAR + ! FSPGL_PROC - external procedure to be executed in fourier space + ! before transposition + + ! Method. + ! ------- + + ! Externals. + ! ---------- + ! + + ! Author. + ! ------- + ! Mats Hamrud *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 00-06-03 + + ! ------------------------------------------------------------------ + + USE PARKIND1 ,ONLY : JPIM ,JPRB + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DIM ,ONLY : R + USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN + USE TPM_DISTR ,ONLY : D + USE TPM_GEOMETRY ,ONLY : G + + USE TPM_FLT + + USE LTINV_MOD ,ONLY : LTINV + USE TRMTOL_MOD ,ONLY : TRMTOL, TRMTOL_CUDAAWARE + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + EXTERNAL FSPGL_PROC + OPTIONAL FSPGL_PROC + + INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1, i, j + + + CALL GSTATS(102,0) + ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS + IDIM1 = 2*KF_OUT_LT + IBLEN = D%NLENGT0B*2*KF_OUT_LT + + IF(KF_OUT_LT > 0) THEN + CALL GSTATS(1647,0) + + ! from PSPXXX to FOUBUF_IN + CALL LTINV(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + + CALL GSTATS(1647,1) + ENDIF + CALL GSTATS(102,1) + + + CALL GSTATS(152,0) + ! from FOUBUF_IN to FOUBUF + CALL TRMTOL(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) + CALL GSTATS(152,1) + + ! ------------------------------------------------------------------ + + END SUBROUTINE LTINV_CTL + END MODULE LTINV_CTL_MOD diff --git a/src/trans/gpu/internal_reducedmem/ltinv_mod.F90 b/src/trans/gpu/internal_reducedmem/ltinv_mod.F90 new file mode 100755 index 0000000..cfffe2e --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/ltinv_mod.F90 @@ -0,0 +1,316 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE LTINV_MOD + CONTAINS + SUBROUTINE LTINV(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC) + + USE PARKIND1 ,ONLY : JPIM ,JPRB + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE TPM_DIM ,ONLY : R, R_NSMAX !!olivier + USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B, foubuf_in + USE TPM_FLT + USE TPM_GEOMETRY + USE TPM_DISTR ,ONLY : D, MYPROC, D_NUMP,D_MYMS,D_NASM0 !! olivier + USE TPM_GEN ,ONLY : NOUT + !USE PRLE1_MOD + !USE PREPSNM_MOD ,ONLY : PREPSNM + USE PRFI1B_MOD ,ONLY : PRFI1B + USE VDTUV_MOD ,ONLY : VDTUV + USE SPNSDE_MOD ,ONLY : SPNSDE + USE LEINV_MOD ,ONLY : LEINV + USE ASRE1B_MOD ,ONLY : ASRE1B + USE FSPGL_INT_MOD ,ONLY : FSPGL_INT + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + USE IEEE_ARITHMETIC + USE TPM_FIELDS ,ONLY : F,ZIA,ZSOA1,ZAOA1,ZEPSNM + USE CUDA_DEVICE_MOD + + !**** *LTINV* - Inverse Legendre transform + ! + ! Purpose. + ! -------- + ! Tranform from Laplace space to Fourier space, compute U and V + ! and north/south derivatives of state variables. + + !** Interface. + ! ---------- + ! *CALL* *LTINV(...) + + ! Explicit arguments : + ! -------------------- + ! KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + ! PSPVOR - spectral vorticity + ! PSPDIV - spectral divergence + ! PSPSCALAR - spectral scalar variables + + ! Implicit arguments : The Laplace arrays of the model. + ! -------------------- The values of the Legendre polynomials + ! The grid point arrays of the model + ! Method. + ! ------- + + ! Externals. + ! ---------- + + ! PREPSNM - prepare REPSNM for wavenumber KM + ! PRFI1B - prepares the spectral fields + ! VDTUV - compute u and v from vorticity and divergence + ! SPNSDE - compute north-south derivatives + ! LEINV - Inverse Legendre transform + ! ASRE1 - recombination of symmetric/antisymmetric part + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + ! Temperton, 1991, MWR 119 p1303 + + ! Author. + ! ------- + ! Mats Hamrud *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 00-02-01 From LTINV in IFS CY22R1 + ! ------------------------------------------------------------------ + + IMPLICIT NONE + + + INTERFACE + SUBROUTINE cudaProfilerStart() BIND(C,name='cudaProfilerStart') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStart + END INTERFACE + + INTERFACE + SUBROUTINE cudaProfilerStop() BIND(C,name='cudaProfilerStop') + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + IMPLICIT NONE + END SUBROUTINE cudaProfilerStop + END INTERFACE + + + INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT + INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV + INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS + INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS + INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) + REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) + INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + + + EXTERNAL FSPGL_PROC + OPTIONAL FSPGL_PROC + + !REAL(KIND=JPRB) :: ZEPSNM(d%nump,0:R%NTMAX+2) + + INTEGER(KIND=JPIM) :: IFC, ISTA, IIFC, IDGLU + INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU + INTEGER(KIND=JPIM) :: IFIRST, ILAST, IDIM1,IDIM2,IDIM3,J3, istat, iunit, IMLOC, IA + INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF,IFLD + + REAL(KIND=JPRB) :: ZKM + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + !CHARACTER(LEN=10) :: CLHOOK + + + INTEGER(KIND=JPIM) :: KM + INTEGER(KIND=JPIM) :: KMLOC + + + !call cudaProfilerStart + + + ! ------------------------------------------------------------------ + + !* 1. PERFORM LEGENDRE TRANFORM. + ! -------------------------- + + !WRITE(CLHOOK,FMT='(A,I4.4)') 'LTINV_',KM + IF (LHOOK) CALL DR_HOOK('LTINV_MOD',0,ZHOOK_HANDLE) + + ! ------------------------------------------------------------------ + + + !* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. + ! ---------------------------------------------- + + IFIRST = 1 + ILAST = 0 + + !* 1. PREPARE ZEPSNM. + ! --------------- + +#ifdef ACCGPU + !$ACC DATA & + !$ACC COPYIN (D_NUMP,R_NSMAX,D,D_MYMS,D_NASM0,R,KFLDPTRUV) & + !$ACC COPYOUT(ZIA) & + !$ACC COPYIN (PSPVOR,PSPDIV,PSPSCALAR,PSPSC2,PSPSC3A,PSPSC3B) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA & + !$OMP& MAP (TO:D_NUMP,R_NSMAX,D,D_MYMS,D_NASM0,R,KFLDPTRUV) & + !$OMP& MAP (FROM:ZIA) & + !$OMP& MAP (TO:PSPVOR,PSPDIV,PSPSCALAR,PSPSC2,PSPSC3A,PSPSC3B) +#endif + + istat = cuda_Synchronize() + IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + + IDIM2=UBOUND(PSPVOR,2) + CALL PRFI1B(ZIA(IVORL:IVORU,:,:),PSPVOR,KF_UV,IDIM2,KFLDPTRUV) + CALL PRFI1B(ZIA(IDIVL:IDIVU,:,:),PSPDIV,KF_UV,IDIM2,KFLDPTRUV) + + ! ------------------------------------------------------------------ + + CALL VDTUV(KF_UV,ZEPSNM,ZIA(IVORL:IVORU,:,:),ZIA(IDIVL:IDIVU,:,:),& + & ZIA(IUL:IUU,:,:),ZIA(IVL:IVU,:,:)) + ILAST = ILAST+8*KF_UV + + + ENDIF + + IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + + IDIM2=UBOUND(PSPSCALAR,2) + CALL PRFI1B(ZIA(IFIRST:ILAST,:,:),PSPSCALAR(1:KF_SCALARS,:),KF_SCALARS,IDIM2,KFLDPTRSC) + !!ACC update host(ZIA) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + IDIM2=UBOUND(PSPSC2,2) + CALL PRFI1B(ZIA(IFIRST:ILAST,:,:),PSPSC2(:,:),NF_SC2,IDIM2) + + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + IDIM2=UBOUND(PSPSC3A,2) + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + DO J3=1,IDIM3 + CALL PRFI1B(ZIA(IFIRST:ILAST,:,:),PSPSC3A(:,:,J3),IDIM1,IDIM2) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + IDIM2=UBOUND(PSPSC3B,2) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + + CALL PRFI1B(ZIA(IFIRST:ILAST,:,:),PSPSC3B(:,:,J3),IDIM1,IDIM2) + ENDDO + ENDIF + ENDIF + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF + ENDIF + + IF (KF_SCDERS > 0) THEN + ! stop 'Error: code path not (yet) supported in GPU version' + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL SPNSDE(KF_SCALARS,ZEPSNM,ZIA(ISL:ISU,:,:),ZIA(IDL:IDU,:,:)) + ENDIF + +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + ! ------------------------------------------------------------------ + + + !* 4. INVERSE LEGENDRE TRANSFORM. + ! --------------------------- + + ISTA = 1 + IFC = 2*KF_OUT_LT + IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV + ENDIF + IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV + ENDIF + + !istat = cuda_Synchronize() + IF( KF_OUT_LT > 0 ) THEN + !call cudaProfilerStart + CALL LEINV(IFC,KF_OUT_LT,ZIA(ISTA:ISTA+IFC-1,:,:)) + !call cudaProfilerStop + + ! ------------------------------------------------------------------ + + !* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. + ! -------------------------------------------- + + ISTAT = CUDA_SYNCHRONIZE() +#ifdef ACCGPU + !$ACC UPDATE HOST(ZAOA1,ZSOA1) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(ZAOA1,ZSOA1) +#endif + CALL ASRE1B(KF_OUT_LT,ZAOA1,ZSOA1) + !CALL ASRE1B(KF_OUT_LT,ZAOA1,ZSOA1,ISTAN,ISTAS) + ! ------------------------------------------------------------------ + + ! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + + + IF(PRESENT(FSPGL_PROC)) THEN + !stop 'Error: SPGL_PROC is not (yet) supported in GPU version' + CALL FSPGL_INT(KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& + & KFLDPTRUV,KFLDPTRSC) + ENDIF + + ENDIF + IF (LHOOK) CALL DR_HOOK('LTINV_MOD',1,ZHOOK_HANDLE) + ! ------------------------------------------------------------------ + + !call cudaProfilerStop + + END SUBROUTINE LTINV + END MODULE LTINV_MOD + diff --git a/src/trans/gpu/internal_reducedmem/prfi2b_mod.F90 b/src/trans/gpu/internal_reducedmem/prfi2b_mod.F90 new file mode 100755 index 0000000..6ab6955 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/prfi2b_mod.F90 @@ -0,0 +1,112 @@ +! (C) Copyright 1990- ECMWF. +! (C) Copyright 1990- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE PRFI2B_MOD + CONTAINS + SUBROUTINE PRFI2B(KFIELD,PAIA,KMODE) + + !**** *PRFI2B* - Prepare input work arrays for direct transform + + ! Purpose. + ! -------- + ! To extract the Fourier fields for a specific zonal wavenumber + ! and put them in an order suitable for the direct Legendre + ! tranforms, i.e. split into symmetric and anti-symmetric part. + + !** Interface. + ! ---------- + ! *CALL* *PRFI2B(..) + + ! Explicit arguments : + ! ------------------- KFIELD - number of fields + ! KM - zonal wavenumber + ! KMLOC - local zonal wavenumber + ! PAOA - antisymmetric part of Fourier + ! fields for zonal wavenumber KM + ! PSOA - symmetric part of Fourier + ! fields for zonal wavenumber KM + + ! Implicit arguments : FOUBUF in TPM_TRANS + ! -------------------- + + ! Method. + ! ------- + + ! Externals. None. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 90-07-01 + ! MPP Group: 95-10-01 Support for Distributed Memory version + ! Modified : 04/06/99 D.Salmond : change order of AIA and SIA + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + + USE TPM_DIM ,ONLY : R, R_NDGNH, R_NDGL + USE TPM_TRANS ,ONLY : FOUBUF + USE TPM_GEOMETRY ,ONLY : G, G_NDGLU + USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1,MYPROC + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + INTEGER(KIND=JPIM),INTENT(IN) :: KMODE + INTEGER(KIND=JPIM) :: KM,KMLOC + REAL(KIND=JPRBT) , INTENT(OUT) :: PAIA(:,:,:) + !REAL(KIND=JPRB) , INTENT(OUT) :: PSIA(:,:,:), PAIA(:,:,:) + + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IGLS, ISL, JF, JGL, iunit + + INTEGER(KIND=JPIM) :: OFFSET1, OFFSET2 + + + ! ------------------------------------------------------------------ + + !* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. + ! ------------------------------------------------ + +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(KMLOC,JGL,JF,KM,ISL,IGLS,OFFSET1,OFFSET2) +DO KMLOC=1,D_NUMP + DO JGL=1,R_NDGNH + DO JF=1,KFIELD*2 + KM = D_MYMS(KMLOC) + ISL = MAX(R_NDGNH-G_NDGLU(KM)+1,1) + IF (JGL .GE. ISL) THEN + IGLS = R_NDGL+1-JGL + OFFSET1 = (D_NSTAGT1B(D_NPROCL(JGL) )+D_NPNTGTB1(KMLOC,JGL ))*2*KFIELD + OFFSET2 = (D_NSTAGT1B(D_NPROCL(IGLS))+D_NPNTGTB1(KMLOC,IGLS))*2*KFIELD + IF( KMODE == -1 ) THEN + PAIA(JF,JGL,KMLOC) = FOUBUF(OFFSET1+JF)-FOUBUF(OFFSET2+JF) + ELSE + PAIA(JF,JGL,KMLOC) = FOUBUF(OFFSET1+JF)+FOUBUF(OFFSET2+JF) + ENDIF + END IF + ENDDO + ENDDO +END DO +!$OMP END PARALLEL DO + + ! ------------------------------------------------------------------ + + END SUBROUTINE PRFI2B + END MODULE PRFI2B_MOD diff --git a/src/trans/gpu/internal_reducedmem/spnsde_mod.F90 b/src/trans/gpu/internal_reducedmem/spnsde_mod.F90 new file mode 100755 index 0000000..fc513dc --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/spnsde_mod.F90 @@ -0,0 +1,173 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SPNSDE_MOD +CONTAINS +SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT + +USE TPM_GEN ,ONLY : NOUT +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F, ZEPSNM +USE TPM_DISTR ,ONLY : D +!USE TPM_TRANS + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: KM, KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +!REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(0:R%NTMAX+2) +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PF(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IJ, ISKIP, J, JN,JI,ISMAX, IR, II +REAL(KIND=JPRBT) :: ZZEPSNM(-1:R%NSMAX+4) +REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+4) + +#ifdef ACCGPU +!$ACC DATA & +!$ACC CREATE (ZN,ZZEPSNM) & +!$ACC COPYIN (D,D%MYMS,F,F%RN) & +!$ACC PRESENT (ZEPSNM, PF) & +!$ACC COPYOUT (PNSD) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA MAP(ALLOC:ZN, ZZEPSNM, ZEPSNM, PF) MAP(TO:D,D%MYMS,F,F%RN) +#endif + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + + +!* 1.1 COMPUTE + +ISMAX = R%NSMAX +!loop over wavenumber +DO KMLOC=1,D%NUMP + KM = D%MYMS(KMLOC) +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IJ) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IJ) +#endif + DO JN=KM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZN(IJ) = F%RN(JN) + IF( JN >= 0 ) ZZEPSNM(IJ) = ZEPSNM(KMLOC,JN) + ENDDO + ZN(0) = F%RN(ISMAX+3) + + IF(KM == 0) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IR) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IR) +#endif + DO J=1,KF_SCALARS + IR = 2*J-1 + DO JI=2,ISMAX+3 + PNSD(IR,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*PF(IR,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*PF(IR,JI-1,KMLOC) + ENDDO + ENDDO + ELSE + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(IR,II) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(IR,II) +#endif + DO J=1,KF_SCALARS + DO JI=2,ISMAX+3-KM + IR = 2*J-1 + II = IR+1 + PNSD(IR,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*PF(IR,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*PF(IR,JI-1,KMLOC) + PNSD(II,JI,KMLOC) = -ZN(JI+1)*ZZEPSNM(JI)*PF(II,JI+1,KMLOC)+& + &ZN(JI-2)*ZZEPSNM(JI-1)*PF(II,JI-1,KMLOC) + ENDDO + ENDDO + ENDIF + +!end loop over wavenumber +END DO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif + +! ------------------------------------------------------------------ + +END SUBROUTINE SPNSDE +END MODULE SPNSDE_MOD diff --git a/src/trans/gpu/internal_reducedmem/trgtol_mod.F90 b/src/trans/gpu/internal_reducedmem/trgtol_mod.F90 new file mode 100755 index 0000000..069bcc1 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/trgtol_mod.F90 @@ -0,0 +1,1610 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRGTOL_MOD + CONTAINS + SUBROUTINE TRGTOL_CUDAAWARE(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + !**** *TRGTOL * - transposition of grid point data from column + ! structure to latitudinal. Reorganize data between + ! grid point calculations and direct Fourier Transform + + ! Version using CUDA-aware MPI + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trgtol(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (output) + ! PGP - Blocked grid point data (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original: 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow + ! NPRTRV to differ from NPRGPEW + ! : 98-06-17 add mailbox control logic (from TRLTOM) + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! KINDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of GTOL_PACK,GTOL_UNPACK + ! 03-04-02 G. Radnoti: call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB ,JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL, & + & MYSETV, MYSETW, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! + + USE MPI + + !USE MPL_MPIF + USE MPL_DATA_MODULE ,ONLY : MPL_COMM_OML + USE OML_MOD ,ONLY : OML_MY_THREAD + USE MPL_MESSAGE_MOD + USE MPL_NPROC_MOD + USE MPL_STATS_MOD + USE YOMMPLSTATS + + IMPLICIT NONE + + REAL(KIND=JPRBT),INTENT(OUT) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*2) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IGL, IGLL, ILAST,& + &ILASTLAT, ILEN, JROC, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ISENDSET, ITAG, J, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLD, & + &II,INDOFFX,IBUFLENS,IBUFLENR,INRECV, IPROC,IFLDS, & + &INSEND,INS,INR,IR + + ! LOCAL LOGICAL SCALARS + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: KINDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(KF_GP) + INTEGER(KIND=JPIM) :: ISEND_FLD_TOTAL(NPROC),ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: IRECVCOUNT,ISENDCOUNT + INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS),IFLDA(KF_GP),JJ,JI,IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + + INTEGER(KIND=JPIM), dimension(MPI_STATUS_SIZE,NPROC*2) :: ISTATUS + INTEGER(KIND=JPIM) :: IERROR, irank + + REAL(KIND=JPRB) :: T1, T2, TIMEF, tc + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + + IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) + + CALL GSTATS(1805,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY = .TRUE. + IF(PRESENT(PGPUV)) LLPGPUV = .TRUE. + IF(PRESENT(PGP3A)) LLPGP3A = .TRUE. + IF(PRESENT(PGP3B)) LLPGP3B = .TRUE. + IF(PRESENT(PGP2)) LLPGP2 = .TRUE. + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + LLUV(:) = .FALSE. + IUVPARS(:) = -99 + IUVLEVS(:) = -99 + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + + ITAG = MTAGGL + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETA + ISEND = JROC + ISENDSET = ISETV + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + ISEND_FLD_TOTAL(JROC) = IPOS + ISENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS + + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(IRECVSET)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(IRECVSET)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + IRECVTOT(JROC) = IPOS*KF_FS + + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(-1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(-1:IRECVCOUNT,INRECV)) + + CALL GSTATS(1805,1) + + ! Send loop............................................................. + + ! Copy local contribution + + IF(ISENDTOT(MYPROC) > 0 )THEN + + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + CALL GSTATS(1601,0) + + #ifdef NECSX + !!$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #else + !!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #endif +#ifdef ACCGPU + !$ACC DATA & + !$ACC PRESENT(PGLAT) & + !$ACC COPYIN(IGPTRSEND) & + !$ACC COPYIN(IFLDOFF,INDOFF,IGPTROFF,KINDEX) + !$ACC DATA IF(PRESENT(PGP)) COPYIN(PGP) + !$ACC DATA IF(PRESENT(PGPUV)) COPYIN(PGPUV) + !$ACC DATA IF(PRESENT(PGP2)) COPYIN(PGP2) + !$ACC DATA IF(PRESENT(PGP3A)) COPYIN(PGP3A) + !$ACC DATA IF(PRESENT(PGP3B)) COPYIN(PGP3B) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA & + !$OMP& MAP(ALLOC:PGLAT) & + !$OMP& MAP(TO:IGPTRSEND) & + !$OMP& MAP(TO:IFLDOFF,INDOFF,IGPTROFF,KINDEX) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(PRESENT(PGP)) MAP(TO:PGP) + !$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(TO:PGPUV) + !$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(TO:PGP2) + !$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(TO:PGP3A) + !$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(TO:PGP3B) +#endif + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP TILE(16,32) +#endif + DO JK=IFIRST,ILAST + DO JFLD=1,IFLDS + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IFLD = IFLDOFF(JFLD) + PGLAT(JFLD,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IPOS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IPOS) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IPOS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IPOS) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IPOS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IPOS) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IPOS) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP PRIVATE(IPOS) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ENDDO + ELSE + WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + +#ifdef OMPGPU + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + !!$OMP END PARALLEL DO + CALL GSTATS(1601,1) + + ENDIF + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + !....Pack loop......................................................... + +#ifdef ACCGPU + !$ACC ENTER DATA CREATE(ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(ALLOC:ZCOMBUFS) +#endif + + ISEND_FLD_START=1 + CALL GSTATS(1602,0) + + DO INS=1,INSEND + ISEND=JSEND(INS) + CALL PE2SET(ISEND,ISETA,ISETB,ISETW,ISETV) + ISENDSET = ISETV + ISEND_FLD_END = ISEND_FLD_TOTAL(ISEND) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IJPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + + IF(.not. LLPGPONLY) THEN + stop("Error: only LLPGPONLY is supported on GPU as yet") + END IF + + +#ifdef ACCGPU + !$ACC DATA COPYIN(IFLDA,IGPTRSEND,NGPBLKS,ISEND_FLD_START,ISEND_FLD_END,IGPTRSEND,IPOS,IJPOS,INS) PRESENT(ZCOMBUFS) + !$ACC DATA IF(PRESENT(PGP)) PRESENT(PGP) + !$ACC DATA IF(PRESENT(PGPUV)) PRESENT(PGPUV) + !$ACC DATA IF(PRESENT(PGP2)) PRESENT(PGP2) + !$ACC DATA IF(PRESENT(PGP3A)) PRESENT(PGP3A) + !$ACC DATA IF(PRESENT(PGP3B)) PRESENT(PGP3B) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:IFLDA,IGPTRSEND,NGPBLKS,ISEND_FLD_START,ISEND_FLD_END,IGPTRSEND,IPOS,IJPOS,INS) & + !$OMP& MAP(ALLOC:ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(PRESENT(PGP)) MAP(ALLOC:PGP) + !$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(ALLOC:PGPUV) + !$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(ALLOC:PGP2) + !$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(ALLOC:PGP3A) + !$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(ALLOC:PGP3B) +#endif + + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP +#endif + DO JJ=ISEND_FLD_START(ISEND),ISEND_FLD_END + IFLDT=IFLDA(JJ) +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IF(LLINDER) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) + ENDDO + ELSE + IF(LLPGPONLY) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) + ENDDO + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + +#ifdef ACCGPU + !$ACC UPDATE HOST(ZCOMBUFS(-1:,INS)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(ZCOMBUFS(-1:,INS)) +#endif + IPOS=(ISEND_FLD_END-ISEND_FLD_START(ISEND)+1)*IPOS + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = IFLD + ENDDO + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + CALL MPI_COMM_RANK(MPI_COMM_WORLD, IRANK, IERROR) + !IF(irank==0) WRITE(*,*) "packing (trgtol) in sec: ", Tc + #endif + + CALL GSTATS(1602,1) + + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(761) + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',1,ZHOOK_HANDLE_BAR) + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,0) + ELSE + CALL GSTATS(804,0) + ENDIF + IR=0 + + T2=TIMEF() + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + T2=(TIMEF()-T2)/1000.0_JPRB + #ifdef AGVERBOSE + !WRITE(*,*) "AGTIME BARRIER (trgtol 1) in sec: ", T2 + #endif + + T1=TIMEF() + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif +#ifdef ACCGPU + !$ACC ENTER DATA CREATE(ZCOMBUFR) +#endif +#ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(ALLOC:ZCOMBUFR) +#endif + +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZCOMBUFR,ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZCOMBUFR,ZCOMBUFS) +#endif + + ! Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + !CALL MPL_RECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR), & + ! & KSOURCE=NPRCIDS(IRECV), & + ! & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + ! & KTAG=ITAG,CDSTRING='TRLTOG:' ) + + IERROR=0 + CALL MPI_IRECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR),SIZE(ZCOMBUFR(-1:IRECVTOT(IRECV),INR)),INT(MPI_REAL8),NPRCIDS(IRECV)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + + ENDDO + + !....Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + !CALL MPL_SEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & + ! & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + ! & KTAG=ITAG,CDSTRING='TRGTOL:' ) + IERROR=0 + CALL MPI_ISEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),SIZE(ZCOMBUFS(-1:ISENDTOT(ISEND),INS)),INT(MPI_REAL8),NPRCIDS(ISEND)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + + + ENDDO + + IF(IR > 0) THEN + !CALL MPL_WAIT(ZDUM,KREQUEST=IREQ(1:IR), & + ! & CDSTRING='TRGTOL: WAIT FOR SENDS AND RECEIVES') + IERROR=0 + CALL MPI_WAITALL(IR,IREQ,ISTATUS,IERROR) + + ENDIF +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(irank==0) WRITE(*,*) "CUDA-aware isend/irecv (trgtol) in sec: ", Tc + #endif + T2=TIMEF() + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + T2=(TIMEF()-T2)/1000.0_JPRB + #ifdef AGVERBOSE + !WRITE(*,*) "AGTIME BARRIER (trgtol 2) in sec: ", T2 + #endif + + T1=(TIMEF()-T1)/1000.0_JPRB + #ifdef AGVERBOSE + !WRITE(*,*) "TRGTOL COMMS time (s): ", T1 + #endif + +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:ZCOMBUFS) +#endif +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(ZCOMBUFS) +#endif + + + + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,1) + ELSE + CALL GSTATS(804,1) + ENDIF + !CALL GSTATS_BARRIER2(761) + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + ! Unpack loop......................................................... + + CALL GSTATS(1603,0) +#ifdef ACCGPU + !$ACC DATA COPYIN(JRECV,IRECVTOT,KF_FS,KINDEX,INDOFF,ISEND_FLD_START,ISEND_FLD_END) PRESENT(ZCOMBUFR,PGLAT) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:JRECV,IRECVTOT,KF_FS,KINDEX,INDOFF,ISEND_FLD_START,ISEND_FLD_END) & + !$OMP& MAP(ALLOC:ZCOMBUFR,PGLAT) +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP +#endif + DO INR=1,INRECV + IRECV=JRECV(INR) + ILEN = IRECVTOT(IRECV)/KF_FS + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JL=1,ILEN + II = KINDEX(INDOFF(IRECV)+JL) +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JFLD=IRECV_FLD_START,IRECV_FLD_END + PGLAT(JFLD,II) = ZCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) + ENDDO + ENDDO + IPOS = ILEN*(IRECV_FLD_END-IRECV_FLD_START+1) + ENDDO + +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(irank==0) WRITE(*,*) "unpacking (trgtol) in sec: ", Tc + #endif + + CALL GSTATS(1603,1) + +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:ZCOMBUFR) +#endif +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(ZCOMBUFR) +#endif + + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) + + END SUBROUTINE TRGTOL_CUDAAWARE + SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + !**** *TRGTOL * - transposition of grid point data from column + ! structure to latitudinal. Reorganize data between + ! grid point calculations and direct Fourier Transform + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trgtol(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (output) + ! PGP - Blocked grid point data (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original: 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow + ! NPRTRV to differ from NPRGPEW + ! : 98-06-17 add mailbox control logic (from TRLTOM) + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! KINDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of GTOL_PACK,GTOL_UNPACK + ! 03-04-02 G. Radnoti: call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB ,JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL, & + & MYSETV, MYSETW, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! + USE MPI + + IMPLICIT NONE + + REAL(KIND=JPRBT),INTENT(OUT) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*2) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IGL, IGLL, ILAST,& + &ILASTLAT, ILEN, JROC, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ISENDSET, ITAG, J, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLD, & + &II,INDOFFX,IBUFLENS,IBUFLENR,INRECV, IPROC,IFLDS, & + &INSEND,INS,INR,IR, iunit + + ! LOCAL LOGICAL SCALARS + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: KINDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(MAX(KF_GP,KF_FS)) + INTEGER(KIND=JPIM) :: ISEND_FLD_TOTAL(NPROC),ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: IRECVCOUNT,ISENDCOUNT + INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS),IFLDA(KF_GP),JJ,JI,IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM) :: IERROR, irank + + REAL(KIND=JPRB) :: TIMEF, tc + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + + IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) + + iunit=300+myproc + + CALL GSTATS(1805,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY = .TRUE. + IF(PRESENT(PGPUV)) LLPGPUV = .TRUE. + IF(PRESENT(PGP3A)) LLPGP3A = .TRUE. + IF(PRESENT(PGP3B)) LLPGP3B = .TRUE. + IF(PRESENT(PGP2)) LLPGP2 = .TRUE. + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + LLUV(:) = .FALSE. + IUVPARS(:) = -99 + IUVLEVS(:) = -99 + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + + ITAG = MTAGGL + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETA + ISEND = JROC + ISENDSET = ISETV + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + ISEND_FLD_TOTAL(JROC) = IPOS + ISENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS + + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(IRECVSET)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(IRECVSET)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + IRECVTOT(JROC) = IPOS*KF_FS + + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(IRECVSET)+JGL-D%NFRSTLAT(IRECVSET) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + KINDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(-1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(-1:IRECVCOUNT,INRECV)) + + CALL GSTATS(1805,1) + + ! Send loop............................................................. + + ! Copy local contribution + + IF(ISENDTOT(MYPROC) > 0 )THEN + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + CALL GSTATS(1601,0) + #ifdef NECSX + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #endif + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + !!$ACC parallel loop private(IPOS) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + !!$ACC loop private(IFLD) + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + PGLAT(JFLD,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + !if(jfld<=5 .and. kindex(ipos)<5) write(nout,*)'trgtol: ipos=',ipos,' idx=',kindex(ipos),' jk=',jk,' lev=',iuvlevs(ifld),' pars=',iuvpars(ifld),' pglat=',PGLAT(JFLD,KINDEX(IPOS)) + !if( jfld.eq.1 ) write(nout,*)'trgtoluv: ',kindex(ipos),' lev=',iuvlevs(ifld),' pars=',iuvpars(ifld),' pglat=',PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + !if( jk.eq.ifirst ) write(iunit,*)'trgtol: ',JK,JFLD,IFLD,kindex(ipos),' lev=',IGP3ALEVS(ifld),' pars=',IGP3APARS(ifld),' pglat=',PGLAT(JFLD,KINDEX(IPOS)) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGLAT(JFLD,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ENDDO + ELSE + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1601,1) + + ENDIF + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + !....Pack loop......................................................... + + ISEND_FLD_START=1 + CALL GSTATS(1602,0) + !!IF( NPROC > 1 )THEN + !!$ACC wait(1) + !!$ACC update host(PGLAT) + !!ENDIF + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,JBLK,IFIRST,ILAST,JK,JJ,JI,& + !$OMP& INS,ISEND,ISETA,ISETB,ISETW,ISETV,ISENDSET,ISEND_FLD_END,IFLD,IPOS,& + !$OMP& IFLDA,JFLD,IJPOS) + DO INS=1,INSEND + ISEND=JSEND(INS) + CALL PE2SET(ISEND,ISETA,ISETB,ISETW,ISETV) + ISENDSET = ISETV + ISEND_FLD_END = ISEND_FLD_TOTAL(ISEND) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISENDSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IJPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + + DO JJ=ISEND_FLD_START(ISEND),ISEND_FLD_END + IFLDT=IFLDA(JJ) + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,KPTRGP(IFLDT),JBLK) + ENDDO + ELSE + IF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-ISEND_FLD_START(ISEND))*IPOS+IJPOS(JBLK)+JK-IFIRST+1 + ZCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) + ENDDO + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + + IPOS=(ISEND_FLD_END-ISEND_FLD_START(ISEND)+1)*IPOS + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = IFLD + ENDDO + !$OMP END PARALLEL DO + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + CALL MPI_COMM_RANK(MPI_COMM_WORLD, IRANK, IERROR) + !IF(irank==0) WRITE(*,*) "packing (trgtol) in sec: ", Tc + #endif + + CALL GSTATS(1602,1) + + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(761) + IF (LHOOK) CALL DR_HOOK('TRGTOL_BAR',1,ZHOOK_HANDLE_BAR) + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,0) + ELSE + CALL GSTATS(804,0) + ENDIF + IR=0 + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + ! Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRLTOG:' ) + !print*,irank,size(ZCOMBUFR(-1:IRECVTOT(IRECV),INR)) + ENDDO + + !....Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + CALL MPL_SEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRGTOL:' ) + ENDDO + + IF(IR > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='TRGTOL: WAIT FOR SENDS AND RECEIVES') + ENDIF + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=(TIMEF()-Tc)/1000.0_JPRB + ! !IF(irank==0) WRITE(*,*) "non-CUDA-aware isend/irecv (trgtol) in sec: ", Tc + !#endif + + IF(.NOT.LGPNORM)THEN + CALL GSTATS(803,1) + ELSE + CALL GSTATS(804,1) + ENDIF + CALL GSTATS_BARRIER2(761) + + !#ifdef COMVERBOSE + ! call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=TIMEF() + !#endif + ! Unpack loop......................................................... + + CALL GSTATS(1603,0) + + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD,INR,IRECV,ILEN,IRECV_FLD_START,IRECV_FLD_END,IPOS) + DO INR=1,INRECV + IRECV=JRECV(INR) + ILEN = IRECVTOT(IRECV)/KF_FS + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + DO JFLD=IRECV_FLD_START,IRECV_FLD_END + DO JL=1,ILEN + II = KINDEX(INDOFF(IRECV)+JL) + PGLAT(JFLD,II) = ZCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO + + ! this appears to be important (otherwise, old data picked in PGLAT) + ! in particular, one would have thought that above ACC copy and update on the + ! device is the same as OMP loop + update device command below, but it seems not, and winds still in field index 1 from prev inv_trans !!! +#ifdef ACCGPU + !$ACC update device(PGLAT) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE TO(PGLAT) +#endif +#ifdef ACCGPU + !$ACC wait +#endif + !$OMP BARRIER + + !#ifdef COMVERBOSE + !call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + ! Tc=(TIMEF()-Tc)/1000.0_JPRB + ! !IF(irank==0) WRITE(*,*) "unpacking (trgtol) in sec: ", Tc + !#endif + + CALL GSTATS(1603,1) + + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) + + END SUBROUTINE TRGTOL + END MODULE TRGTOL_MOD diff --git a/src/trans/gpu/internal_reducedmem/trltog_mod.F90 b/src/trans/gpu/internal_reducedmem/trltog_mod.F90 new file mode 100755 index 0000000..25724d0 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/trltog_mod.F90 @@ -0,0 +1,1578 @@ +! (C) Copyright 1995- ECMWF. +! (C) Copyright 1995- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRLTOG_MOD + CONTAINS + SUBROUTINE TRLTOG_CUDAAWARE(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + + !**** *trltog * - transposition of grid point data from latitudinal + ! to column structure. This takes place between inverse + ! FFT and grid point calculations. + ! TRLTOG is the inverse of TRGTOL + + ! Version using CUDA-aware MPI + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trltog(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (input) + ! PGP - Blocked grid point data (output) + ! KVSET - "v-set" for each field (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow NPRTRV + ! to differ from NPRGPEW + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! INDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of LTOG_PACK,LTOG_UNPACK + ! 03-0-02 G. Radnoti: Call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB ,JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_MYRANK + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DISTR ,ONLY : D, MYSETV, MYSETW, MTAGLG, & + & NPRCIDS, NPRTRNS, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! + + USE MPI + + !USE MPL_MPIF + USE MPL_DATA_MODULE ,ONLY: MPL_COMM_OML + USE MPL_MESSAGE_MOD + USE MPL_NPROC_MOD + USE MPL_STATS_MOD + USE YOMMPLSTATS + USE OML_MOD ,ONLY: OML_MY_THREAD + + IMPLICIT NONE + + + REAL(KIND=JPRBT),INTENT(IN) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) + + ! LOCAL VARIABLES + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*2) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IFLD, IGL, IGLL,& + &ILAST, ILASTLAT, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ITAG, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLDS, IPROC,JROC, & + &INRECV, INSEND,INR,INS,IR + INTEGER(KIND=JPIM) :: II,INDOFFX,ILEN,IBUFLENS,IBUFLENR + + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: INDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(KF_GP) + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: ISENDCOUNT,IRECVCOUNT,J + INTEGER(KIND=JPIM) :: JPOS(NGPBLKS),IFLDA(KF_GP),JI,JJ + INTEGER(KIND=JPIM) :: IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM), dimension(MPI_STATUS_SIZE,NPROC*2) :: ISTATUS + INTEGER(KIND=JPIM) :: IERROR + + REAL(KIND=JPRB) :: T1, T2, TIMEF, tc + + + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + IF (LHOOK) CALL DR_HOOK('TRLTOG',0,ZHOOK_HANDLE) + + + CALL GSTATS(1806,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY=.TRUE. + IF(PRESENT(PGPUV)) LLPGPUV=.TRUE. + IF(PRESENT(PGP3A)) LLPGP3A=.TRUE. + IF(PRESENT(PGP3B)) LLPGP3B=.TRUE. + IF(PRESENT(PGP2)) LLPGP2=.TRUE. + + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + + LLUV(:) = .FALSE. + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + ITAG = MTAGLG + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + ISEND = JROC + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + IRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + ISENDTOT(JROC) = IPOS*KF_FS + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + INDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(-1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(-1:IRECVCOUNT,INRECV)) + + CALL GSTATS(1806,1) + + + ! Copy local contribution + IF( IRECVTOT(MYPROC) > 0 )THEN + + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + CALL GSTATS(1604,0) + #ifdef NECSX + !!$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #else + !!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #endif +#ifdef ACCGPU + !$ACC DATA & + !$ACC PRESENT(PGLAT) & + !$ACC COPYIN(IGPTRSEND) & + !$ACC COPYIN(IFLDOFF,INDOFF,IGPTROFF,INDEX) + !$ACC DATA IF(PRESENT(PGP)) PRESENT(PGP) + !$ACC DATA IF(PRESENT(PGPUV)) PRESENT(PGPUV) + !$ACC DATA IF(PRESENT(PGP2)) PRESENT(PGP2) + !$ACC DATA IF(PRESENT(PGP3A)) PRESENT(PGP3A) + !$ACC DATA IF(PRESENT(PGP3B)) PRESENT(PGP3B) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA & + !$OMP& MAP(ALLOC:PGLAT) & + !$OMP& MAP(TO:IGPTRSEND) & + !$OMP& MAP(TO:IFLDOFF,INDOFF,IGPTROFF,INDEX) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(PRESENT(PGP)) MAP(ALLOC:PGP) + !$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(ALLOC:PGPUV) + !$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(ALLOC:PGP2) + !$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(ALLOC:PGP3A) + !$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(ALLOC:PGP3B) +#endif + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC parallel loop tile(16,32) +#endif + DO JFLD=1,IFLDS + DO JK=IFIRST,ILAST + IFLD = IFLDOFF(JFLD) + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ENDDO + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + !$OMP PARALLEL DO PRIVATE(IPOS) +#ifdef ACCGPU + !$acc parallel loop private(ipos) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + !$OMP PARALLEL DO PRIVATE(IPOS) +#ifdef ACCGPU + !$acc parallel loop private(ipos) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + !$OMP PARALLEL DO PRIVATE(IPOS) +#ifdef ACCGPU + !$acc parallel loop private(ipos) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + !$OMP PARALLEL DO PRIVATE(IPOS) +#ifdef ACCGPU + !$acc parallel loop private(ipos) +#endif + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSE + WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !!$OMP END PARALLEL DO +#ifdef OMPGPU + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + CALL GSTATS(1604,1) + + ENDIF + + ! + ! loop over the number of processors we need to communicate with. + ! NOT MYPROC + ! + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + ! Pack loop......................................................... + +#ifdef ACCGPU + !$acc data create(ZCOMBUFR,ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(ALLOC:ZCOMBUFR,ZCOMBUFS) +#endif + + CALL GSTATS(1605,0) + +#ifdef ACCGPU + !$ACC DATA COPYIN(JSEND,ISENDTOT,KF_FS,INDEX,INDOFF,ISEND_FLD_START,ISEND_FLD_END,PGLAT) PRESENT(ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:JSEND,ISENDTOT,KF_FS,INDEX,INDOFF,ISEND_FLD_START,ISEND_FLD_END,PGLAT) & + !$OMP& MAP(ALLOC:ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP +#endif + DO INS=1,INSEND + ISEND=JSEND(INS) + ISEND_FLD_START(ISEND)= 1 + ILEN = ISENDTOT(ISEND)/KF_FS + ISEND_FLD_END = KF_FS + +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END + + ZCOMBUFS((JFLD-ISEND_FLD_START(ISEND))*ILEN+JL,INS) = PGLAT(JFLD,II) + ENDDO + ENDDO + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = KF_FS + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(MPL_MYRANK==1) WRITE(*,*) "packing (trltog) in sec: ", Tc + #endif + + CALL GSTATS(1605,1) + + IR=0 + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',0,ZHOOK_HANDLE_BAR) + !CALL GSTATS_BARRIER(762) + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',1,ZHOOK_HANDLE_BAR) + CALL GSTATS(805,0) + + + !T2=TIMEF() + !call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !T2=(TIMEF()-T2)/1000.0_JPRB + #ifdef AGVERBOSE + WRITE(*,*) "AGTIME BARRIER (trltog 1) in sec: ", T2 + #endif + + T1=TIMEF() + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + +#ifdef ACCGPU + !$ACC HOST_DATA USE_DEVICE(ZCOMBUFR,ZCOMBUFS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA USE_DEVICE_PTR(ZCOMBUFR,ZCOMBUFS) +#endif + !...Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + + IERROR=0 + + CALL MPI_IRECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR),SIZE(ZCOMBUFR(-1:IRECVTOT(IRECV),INR)),INT(MPI_REAL8),NPRCIDS(IRECV)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + + ENDDO + + !...Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + + IERROR=0 + CALL MPI_ISEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),SIZE(ZCOMBUFS(-1:ISENDTOT(ISEND),INS)),INT(MPI_REAL8),NPRCIDS(ISEND)-1,ITAG,MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR),IERROR) + + ENDDO + + IF(IR > 0) THEN + IERROR=0 + CALL MPI_WAITALL(IR,IREQ,ISTATUS,IERROR) + ENDIF + +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END HOST_DATA +#endif + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(MPL_MYRANK==1) WRITE(*,*) "CUDA-aware isend/irecv (trltog) in sec: ", Tc + #endif + !T2=TIMEF() + !call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + !T2=(TIMEF()-T2)/1000.0_JPRB + #ifdef AGVERBOSE + !WRITE(*,*) "AGTIME BARRIER (trltog 2) in sec: ", T2 + #endif + + T1=(TIMEF()-T1)/1000.0_JPRB + #ifdef AGVERBOSE + !WRITE(*,*) "TRLTOG COMMS time (s): ", T1 + #endif + + CALL GSTATS(805,1) + !CALL GSTATS_BARRIER2(762) + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + ! Unpack loop......................................................... + + CALL GSTATS(1606,0) + + + DO INR=1,INRECV + IRECV=JRECV(INR) + CALL PE2SET(IRECV,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETV + +#ifdef ACCGPU + !$ACC UPDATE HOST (ZCOMBUFR(-1,INR),ZCOMBUFR(0,INR)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM (ZCOMBUFR(-1,INR),ZCOMBUFR(0,INR)) +#endif + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == IRECVSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + JPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + + IF(.not. LLPGPONLY) THEN + stop("Error: only LLPGPONLY is supported on GPU as yet") + END IF + + +#ifdef ACCGPU + !$ACC DATA COPYIN(IFLDA,IGPTRSEND,JPOS,IRECV_FLD_START,IRECV_FLD_END) & + !$ACC PRESENT(ZCOMBUFR,KPTRGP,IUVLEVS,IGP2PARS,IGP3ALEVS,IGP3BLEVS) + !$ACC DATA IF(PRESENT(PGP)) PRESENT(PGP) + !$ACC DATA IF(PRESENT(PGPUV)) PRESENT(PGPUV) + !$ACC DATA IF(PRESENT(PGP2)) PRESENT(PGP2) + !$ACC DATA IF(PRESENT(PGP3A)) PRESENT(PGP3A) + !$ACC DATA IF(PRESENT(PGP3B)) PRESENT(PGP3B) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:IFLDA,IGPTRSEND,JPOS,IRECV_FLD_START,IRECV_FLD_END) & + !$OMP& MAP(ALLOC:ZCOMBUFR,KPTRGP,IUVLEVS,IGP2PARS,IGP3ALEVS,IGP3BLEVS) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA IF(PRESENT(PGP)) MAP(ALLOC:PGP) + !$OMP TARGET DATA IF(PRESENT(PGPUV)) MAP(ALLOC:PGPUV) + !$OMP TARGET DATA IF(PRESENT(PGP2)) MAP(ALLOC:PGP2) + !$OMP TARGET DATA IF(PRESENT(PGP3A)) MAP(ALLOC:PGP3A) + !$OMP TARGET DATA IF(PRESENT(PGP3B)) MAP(ALLOC:PGP3B) +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP +#endif + DO JJ=IRECV_FLD_START,IRECV_FLD_END + IFLDT=IFLDA(JJ) +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IF(LLINDER) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP(JK,KPTRGP(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLPGPONLY) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP(JK,IFLDT,JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN +#ifdef ACCGPU + !$ACC LOOP +#endif + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO + +#ifdef OMPGPU + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + + IPOS=(IRECV_FLD_END-IRECV_FLD_START+1)*IPOS + ENDDO + +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(MPL_MYRANK==1) WRITE(*,*) "unpacking (trltog) in sec: ", Tc + #endif + + CALL GSTATS(1606,1) + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRLTOG',1,ZHOOK_HANDLE) + + + END SUBROUTINE TRLTOG_CUDAAWARE + + SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& + &PGP,PGPUV,PGP3A,PGP3B,PGP2) + + !**** *trltog * - transposition of grid point data from latitudinal + ! to column structure. This takes place between inverse + ! FFT and grid point calculations. + ! TRLTOG is the inverse of TRGTOL + + ! Purpose. + ! -------- + + + !** Interface. + ! ---------- + ! *call* *trltog(...) + + ! Explicit arguments : + ! -------------------- + ! PGLAT - Latitudinal data ready for direct FFT (input) + ! PGP - Blocked grid point data (output) + ! KVSET - "v-set" for each field (input) + + ! Implicit arguments : + ! -------------------- + + ! Method. + ! ------- + ! See documentation + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! MPP Group *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 95-10-01 + ! D.Dent : 97-08-04 Reorganisation to allow NPRTRV + ! to differ from NPRGPEW + ! =99-03-29= Mats Hamrud and Deborah Salmond + ! JUMP in FFT's changed to 1 + ! INDEX introduced and ZCOMBUF not used for same PE + ! 01-11-23 Deborah Salmond and John Hague + ! LIMP_NOOLAP Option for non-overlapping message passing + ! and buffer packing + ! 01-12-18 Peter Towers + ! Improved vector performance of LTOG_PACK,LTOG_UNPACK + ! 03-0-02 G. Radnoti: Call barrier always when nproc>1 + ! 08-01-01 G.Mozdzynski: cleanup + ! 09-01-02 G.Mozdzynski: use non-blocking recv and send + ! ------------------------------------------------------------------ + + + + USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB ,JPRBT + USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + + USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_MYRANK + + USE TPM_GEN ,ONLY : NOUT + USE TPM_DISTR ,ONLY : D, MYSETV, MYSETW, MTAGLG, & + & NPRCIDS, NPRTRNS, MYPROC, NPROC + USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NGPBLKS + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + !USE MYSENDSET_MOD + !USE MYRECVSET_MOD + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + ! + USE MPI + + IMPLICIT NONE + + + REAL(KIND=JPRBT),INTENT(IN) :: PGLAT(:,:) + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) + + ! LOCAL VARIABLES + + REAL(KIND=JPRBT),ALLOCATABLE :: ZCOMBUFS(:,:),ZCOMBUFR(:,:) + + INTEGER(KIND=JPIM) :: ISENT (NPROC) + INTEGER(KIND=JPIM) :: IRCVD (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: IREQ (NPROC*2) + INTEGER(KIND=JPIM) :: JSEND (NPROC) + INTEGER(KIND=JPIM) :: JRECV (NPROC) + + INTEGER(KIND=JPIM) :: IFIRST, IFIRSTLAT, IFLD, IGL, IGLL,& + &ILAST, ILASTLAT, IPOS, ISETA, & + &ISETB, IRECV, IRECVSET, & + &ISETV, ISEND, ITAG, JBLK, JFLD, & + &JGL, JK, JL, JLOOP, ISETW, IFLDS, IPROC,JROC, & + &INRECV, INSEND,INR,INS,IR + INTEGER(KIND=JPIM) :: II,INDOFFX,ILEN,IBUFLENS,IBUFLENR + + LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY + LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) + LOGICAL :: LLDONE, LLINDER + INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) + INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + INTEGER(KIND=JPIM) :: INDEX(D%NLENGTF),INDOFF(NPROC),IFLDOFF(KF_GP) + INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END + INTEGER(KIND=JPIM) :: ISEND_FLD_START(NPROC),ISEND_FLD_END + INTEGER(KIND=JPIM) :: INUMFLDS + INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + + ! INTEGER FUNCTIONS + INTEGER(KIND=JPIM) :: ISENDCOUNT,IRECVCOUNT,J + INTEGER(KIND=JPIM) :: JPOS(NGPBLKS),IFLDA(KF_GP),JI,JJ + INTEGER(KIND=JPIM) :: IFLDT + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR + + INTEGER(KIND=JPIM) :: IERROR, iunit + + REAL(KIND=JPRB) :: TIMEF, tc + + ! ------------------------------------------------------------------ + + !* 0. Some initializations + ! -------------------- + IF (LHOOK) CALL DR_HOOK('TRLTOG',0,ZHOOK_HANDLE) + + + CALL GSTATS(1806,0) + + LLINDER = .FALSE. + LLPGPUV = .FALSE. + LLPGP3A = .FALSE. + LLPGP3B = .FALSE. + LLPGP2 = .FALSE. + LLPGPONLY = .FALSE. + IF(PRESENT(KPTRGP)) LLINDER = .TRUE. + IF(PRESENT(PGP)) LLPGPONLY=.TRUE. + IF(PRESENT(PGPUV)) LLPGPUV=.TRUE. + IF(PRESENT(PGP3A)) LLPGP3A=.TRUE. + IF(PRESENT(PGP3B)) LLPGP3B=.TRUE. + IF(PRESENT(PGP2)) LLPGP2=.TRUE. + + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + + LLUV(:) = .FALSE. + IF (LLPGPUV) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + + LLGP2(:)=.FALSE. + IF(LLPGP2) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + + LLGP3A(:) = .FALSE. + IF(LLPGP3A) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + + LLGP3B(:) = .FALSE. + IF(LLPGP3B) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + CALL INIGPTR(IGPTRSEND,IGPTRRECV) + LLDONE = .FALSE. + ITAG = MTAGLG + + INDOFFX = 0 + IBUFLENS = 0 + IBUFLENR = 0 + INRECV = 0 + INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) + ISEND = JROC + ISENT(JROC) = 0 + IRCVD(JROC) = 0 + + ! count up expected number of fields + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 + ENDDO + IRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF(IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + INRECV = INRECV + 1 + JRECV(INRECV)=JROC + ENDIF + + IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,IRECVTOT(JROC)) + + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + ISENDTOT(JROC) = IPOS*KF_FS + IF( JROC /= MYPROC) THEN + IBUFLENS = MAX(IBUFLENS,ISENDTOT(JROC)) + IF(ISENDTOT(JROC) > 0) THEN + INSEND = INSEND+1 + JSEND(INSEND)=JROC + ENDIF + ENDIF + + IF(IPOS > 0) THEN + INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + INDEX(IPOS+INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + ENDDO + + ISENDCOUNT=0 + IRECVCOUNT=0 + DO J=1,NPROC + ISENDCOUNT=MAX(ISENDCOUNT,ISENDTOT(J)) + IRECVCOUNT=MAX(IRECVCOUNT,IRECVTOT(J)) + ENDDO + IF (IBUFLENS > 0) ALLOCATE(ZCOMBUFS(-1:ISENDCOUNT,INSEND)) + IF (IBUFLENR > 0) ALLOCATE(ZCOMBUFR(-1:IRECVCOUNT,INRECV)) + + CALL GSTATS(1806,1) + + + ! Copy local contribution + IF( IRECVTOT(MYPROC) > 0 )THEN + !IF( NPROC > 1 )THEN + !!$ACC update host(PGLAT) async(1) + !ENDIF + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + iunit=300+myproc + CALL GSTATS(1604,0) + #ifdef NECSX + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) + #endif + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,MYSETW) + IF(LLPGPONLY) THEN + IF(LLINDER) THEN + DO JFLD=1,IFLDS + IFLD = KPTRGP(JFLD) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ENDDO + ELSE + !write(iunit,*) 'trltog ', JBLK, IFLDS, IFIRST, ILAST, PGLAT(1,1) + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP(JK,IFLD,JBLK) = PGLAT(JFLD,INDEX(IPOS)) + !write(iunit,*) 'trltog ', JBLK, JK, IFLD, IPOS, INDEX(IPOS), JFLD, PGP(JK,IFLD,JBLK) + ENDDO + ENDDO + !call flush(iunit) + ENDIF + ELSE + DO JFLD=1,IFLDS + IFLD = IFLDOFF(JFLD) + IF(LLUV(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + DO JK=IFIRST,ILAST + IPOS = INDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD,INDEX(IPOS)) + ENDDO + ELSE + WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD,IFLD + CALL ABORT_TRANS('TRLTOG_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO + CALL GSTATS(1604,1) + + ENDIF + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + ! + ! loop over the number of processors we need to communicate with. + ! NOT MYPROC + ! + ! Pack loop......................................................... + + CALL GSTATS(1605,0) + !IF( NPROC > 1 )THEN + !!$ACC wait(1) + !!$ACC if(present(PGP)) update host(PGP) + !!$ACC if(present(PGPUV)) update host(PGPUV) + !!$ACC if(present(PGP2)) update host(PGP2) + !!$ACC if(present(PGP3a)) update host(PGP3a) + !!$ACC if(present(PGP3b)) update host(PGP3b) + !ENDIF + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD,INS,ISEND,ILEN,ISEND_FLD_END) + DO INS=1,INSEND + ISEND=JSEND(INS) + ISEND_FLD_START(ISEND)= 1 + ILEN = ISENDTOT(ISEND)/KF_FS + ISEND_FLD_END = KF_FS + #ifdef NECSX + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) + #else + DO JL=1,ILEN + II = INDEX(INDOFF(ISEND)+JL) + DO JFLD=ISEND_FLD_START(ISEND),ISEND_FLD_END + #endif + ZCOMBUFS((JFLD-ISEND_FLD_START(ISEND))*ILEN+JL,INS) = PGLAT(JFLD,II) + ENDDO + ENDDO + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = KF_FS + ENDDO + !$OMP END PARALLEL DO + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(MPL_MYRANK==1) WRITE(*,*) "packing (trltog) in sec: ", Tc + #endif + + CALL GSTATS(1605,1) + + IR=0 + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',0,ZHOOK_HANDLE_BAR) + CALL GSTATS_BARRIER(762) + IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',1,ZHOOK_HANDLE_BAR) + CALL GSTATS(805,0) + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + !...Receive loop......................................................... + DO INR=1,INRECV + IR=IR+1 + IRECV=JRECV(INR) + CALL MPL_RECV(ZCOMBUFR(-1:IRECVTOT(IRECV),INR), & + & KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRLTOG:' ) + ENDDO + + !...Send loop......................................................... + DO INS=1,INSEND + IR=IR+1 + ISEND=JSEND(INS) + CALL MPL_SEND(ZCOMBUFS(-1:ISENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND),& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & + & KTAG=ITAG,CDSTRING='TRLTOG:') + ENDDO + + IF(IR > 0) THEN + CALL MPL_WAIT(KREQUEST=IREQ(1:IR), & + & CDSTRING='TRLTOG: WAIT FOR SENDS AND RECEIVES') + ENDIF + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(MPL_MYRANK==1) WRITE(*,*) "non-CUDA-aware isend/irecv (trltog) in sec: ", Tc + #endif + + CALL GSTATS(805,1) + CALL GSTATS_BARRIER2(762) + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=TIMEF() + #endif + ! Unpack loop......................................................... + + CALL GSTATS(1606,0) + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,JBLK,IFIRST,ILAST,JK,& + !$OMP& JJ,JI,JPOS,INR,IRECV,IRECVSET,IRECV_FLD_START,IRECV_FLD_END,IPOS,& + !$OMP& ISETA,ISETB,ISETW,ISETV,JFLD,IFLD,IFLDA) + DO INR=1,INRECV + IRECV=JRECV(INR) + CALL PE2SET(IRECV,ISETA,ISETB,ISETW,ISETV) + IRECVSET = ISETV + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) + IFLD = 0 + IPOS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == IRECVSET .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + JPOS(JBLK)=IPOS + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + + + DO JJ=IRECV_FLD_START,IRECV_FLD_END + IFLDT=IFLDA(JJ) + DO JBLK=1,NGPBLKS + IFIRST = IGPTRSEND(1,JBLK,ISETW) + IF(IFIRST > 0) THEN + ILAST = IGPTRSEND(2,JBLK,ISETW) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP(JK,KPTRGP(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP(JK,IFLDT,JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP2(JK,IGP2PARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK)+JK-IFIRST+1 + PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = ZCOMBUFR(JI,INR) + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO + + IPOS=(IRECV_FLD_END-IRECV_FLD_START+1)*IPOS + ENDDO + !$OMP END PARALLEL DO + + !IF( NPROC > 1 )THEN + !!$ACC if(present(PGP)) update device(PGP) + !!$ACC if(present(PGPUV)) update device(PGPUV) + !!$ACC if(present(PGP2)) update device(PGP2) + !!$ACC if(present(PGP3a)) update device(PGP3a) + !!$ACC if(present(PGP3b)) update device(PGP3b) + !ENDIF + + #ifdef COMVERBOSE + call MPI_BARRIER(MPI_COMM_WORLD,IERROR) + Tc=(TIMEF()-Tc)/1000.0_JPRB + !IF(MPL_MYRANK==1) WRITE(*,*) "unpacking (trltog) in sec: ", Tc + #endif + + CALL GSTATS(1606,1) + IF (IBUFLENS > 0) DEALLOCATE(ZCOMBUFS) + IF (IBUFLENR > 0) DEALLOCATE(ZCOMBUFR) + + IF (LHOOK) CALL DR_HOOK('TRLTOG',1,ZHOOK_HANDLE) + + END SUBROUTINE TRLTOG + END MODULE TRLTOG_MOD + diff --git a/src/trans/gpu/internal_reducedmem/updsp_mod.F90 b/src/trans/gpu/internal_reducedmem/updsp_mod.F90 new file mode 100755 index 0000000..c6e29ed --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/updsp_mod.F90 @@ -0,0 +1,186 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSP_MOD +CONTAINS +SUBROUTINE UPDSP(KF_UV,KF_SCALARS,POA1,POA2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *UPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL UPDSP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! ------------------------------------------------------------------ + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB ,JPRBT + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR ,ONLY : D + +USE UPDSPB_MOD ,ONLY : UPDSPB + +IMPLICIT NONE + + +! DUMMY INTEGER SCALARS + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV,KF_SCALARS + +REAL(KIND=JPRBT) , INTENT(IN) :: POA1(:,:,:) +REAL(KIND=JPRBT) , INTENT(IN) :: POA2(:,:,:) +REAL(KIND=JPRB) , OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) , OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) , OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,JFLD,IFLD,IDIM1,IDIM3,J3 + + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV +#ifdef ACCGPU + !$ACC UPDATE HOST(POA2(IVORS:IVORE,:,:)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(POA2(IVORS:IVORE,:,:)) +#endif + CALL UPDSPB(KF_UV,POA2(IVORS:IVORE,:,:),PSPVOR,KFLDPTRUV) +#ifdef ACCGPU + !$ACC UPDATE HOST(POA2(IDIVS:IDIVE,:,:)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(POA2(IDIVS:IDIVE,:,:)) +#endif + CALL UPDSPB(KF_UV,POA2(IDIVS:IDIVE,:,:),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 +#ifdef ACCGPU + !$ACC UPDATE HOST(POA1(IST:IEND,:,:)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(POA1(IST:IEND,:,:)) +#endif + CALL UPDSPB(KF_SCALARS,POA1(IST:IEND,:,:),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 +#ifdef ACCGPU + !$ACC UPDATE HOST(POA1(IST:IEND,:,:)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(POA1(IST:IEND,:,:)) +#endif + CALL UPDSPB(IDIM1,POA1(IST:IEND,:,:),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 +#ifdef ACCGPU + !$ACC UPDATE HOST(POA1(IST:IEND,:,:)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(POA1(IST:IEND,:,:)) +#endif + CALL UPDSPB(IDIM1,POA1(IST:IEND,:,:),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 +#ifdef ACCGPU + !$ACC UPDATE HOST(POA1(IST:IEND,:,:)) +#endif +#ifdef OMPGPU + !$OMP TARGET UPDATE FROM(POA1(IST:IEND,:,:)) +#endif + CALL UPDSPB(IDIM1,POA1(IST:IEND,:,:),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE UPDSP +END MODULE UPDSP_MOD diff --git a/src/trans/gpu/internal_reducedmem/updspb_mod.F90 b/src/trans/gpu/internal_reducedmem/updspb_mod.F90 new file mode 100755 index 0000000..ac57c9e --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/updspb_mod.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 1988- ECMWF. +! (C) Copyright 1988- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UPDSPB_MOD + CONTAINS + SUBROUTINE UPDSPB(KFIELD,POA,PSPEC,KFLDPTR) + + + !**** *UPDSPB* - Update spectral arrays after direct Legendre transform + + ! Purpose. + ! -------- + ! To update spectral arrays for a fixed zonal wave-number + ! from values in POA. + + !** Interface. + ! ---------- + ! CALL UPDSPB(....) + + ! Explicit arguments : KM - zonal wavenumber + ! -------------------- KFIELD - number of fields + ! POA - work array + ! PSPEC - spectral array + + ! Implicit arguments : None + ! -------------------- + + ! Method. + ! ------- + + ! Externals. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 88-02-02 + ! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) + ! R. El Khatib : 94-08-02 Replace number of fields by indexes of the + ! first and last field + ! L. Isaksen : 95-06-06 Reordering of spectral arrays + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB ,JPRBT + + + + USE TPM_DIM ,ONLY : R,R_NSMAX,R_NTMAX + !USE TPM_FIELDS + USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NASM0 + ! + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD + INTEGER(KIND=JPIM) :: KM,KMLOC + REAL(KIND=JPRBT) ,INTENT(IN) :: POA(:,:,:) + REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) + INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN, ISMAX, ITMAX, IASM0,IFLD + + + ! ------------------------------------------------------------------ + + !* 0. NOTE. + ! ----- + + ! The following transfer reads : + ! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) + ! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) + ! with n from m to NSMAX + ! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. + ! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) + ! nn is the loop index. + + IF(PRESENT(KFLDPTR)) THEN + stop 'Error: code path not (yet) supported in GPU version' + ENDIF + + !* 1. UPDATE SPECTRAL FIELDS. + ! ----------------------- + + !loop over wavenumber + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(KMLOC,JN,JFLD,KM,IASM0,INM,IR,II) + DO KMLOC=1,D%NUMP + DO JN=R%NTMAX+2-R%NSMAX,R%NTMAX+2 + DO JFLD=1,KFIELD + + KM = D%MYMS(KMLOC) + IASM0 = D%NASM0(KM) + + IF(KM == 0) THEN + + IF (JN .LE. R%NTMAX+2-KM) THEN + + INM = IASM0+(R%NTMAX+2-JN)*2 + IR = 2*JFLD-1 + PSPEC(JFLD,INM) = POA(IR,JN,KMLOC) + PSPEC(JFLD,INM+1) = 0.0_JPRBT + + END IF + ELSE + + + IF (JN .LE. R%NTMAX+2-KM) THEN + INM = IASM0+((R%NTMAX+2-JN)-KM)*2 + + IR = 2*JFLD-1 + II = IR+1 + PSPEC(JFLD,INM) = POA(IR,JN,KMLOC) + PSPEC(JFLD,INM+1) = POA(II,JN,KMLOC) + + END IF + END IF + + ENDDO + ENDDO + ENDDO + !$OMP END PARALLEL DO + + ! ------------------------------------------------------------------ + + END SUBROUTINE UPDSPB +END MODULE UPDSPB_MOD diff --git a/src/trans/gpu/internal_reducedmem/uvtvd_mod.F90 b/src/trans/gpu/internal_reducedmem/uvtvd_mod.F90 new file mode 100644 index 0000000..daf97c7 --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/uvtvd_mod.F90 @@ -0,0 +1,198 @@ +! (C) Copyright 1991- ECMWF. +! (C) Copyright 1991- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE UVTVD_MOD + CONTAINS + SUBROUTINE UVTVD(KFIELD) + !SUBROUTINE UVTVD(KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + + !**** *UVTVD* - Compute vor/div from u and v in spectral space + + ! Purpose. + ! -------- + ! To compute vorticity and divergence from u and v in spectral + ! space. Input u and v from KM to NTMAX+1, output vorticity and + ! divergence from KM to NTMAX. + + !** Interface. + ! ---------- + ! CALL UVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + + ! Explicit arguments : KM - zonal wave-number + ! -------------------- KFIELD - number of fields (levels) + ! PEPSNM - REPSNM for wavenumber KM + ! PU - u wind component for zonal + ! wavenumber KM + ! PV - v wind component for zonal + ! wavenumber KM + ! PVOR - vorticity for zonal + ! wavenumber KM + ! PDIV - divergence for zonal + ! wavenumber KM + + + ! Method. See ref. + ! ------- + + ! Externals. None. + ! ---------- + + ! Reference. + ! ---------- + ! ECMWF Research Department documentation of the IFS + + ! Author. + ! ------- + ! Mats Hamrud and Philippe Courtier *ECMWF* + + ! Modifications. + ! -------------- + ! Original : 91-07-01 + ! D. Giard : NTMAX instead of NSMAX + ! ------------------------------------------------------------------ + + USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT + + USE TPM_DIM ,ONLY : R, R_NTMAX + USE TPM_FIELDS ,ONLY : F + USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS + USE TPM_FIELDS ,ONLY : ZOA1,ZOA2,ZEPSNM + ! + + IMPLICIT NONE + + ! DUMMY INTEGER SCALARS + INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD + INTEGER(KIND=JPIM) :: KM, KMLOC + + !REAL(KIND=JPRB), INTENT(IN) :: PEPSNM(1:d%nump,0:R%NTMAX+2) + !REAL(KIND=JPRB), INTENT(OUT) :: PVOR(:,:,:),PDIV(:,:,:) + !REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:) + + ! LOCAL INTEGER SCALARS + INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, ITMAX + INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE + + ! LOCAL REAL SCALARS + REAL(KIND=JPRBT) :: ZKM + REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+3) + + IUS = 1 + IUE = 2*KFIELD + IVS = 2*KFIELD+1 + IVE = 4*KFIELD + IVORS = 1 + IVORE = 2*KFIELD + IDIVS = 2*KFIELD+1 + IDIVE = 4*KFIELD + +#ifdef ACCGPU + !$ACC DATA& + !$ACC& CREATE(ZN) & + !$ACC& COPY(D_MYMS,D_NUMP,R_NTMAX) & + !$ACC& COPY(F,F%RN,F%NLTN) & + !$ACC& PRESENT(ZEPSNM,ZOA1) & + !$ACC& PRESENT(ZOA2) +#endif +#ifdef OMPGPU + !$OMP TARGET DATA& + !$OMP& MAP(ALLOC:ZN) & + !$OMP& MAP(TO:D_MYMS,D_NUMP,R_NTMAX) & + !$OMP& MAP(TO:F,F%RN,F%NLTN) & + !$OMP& MAP(ALLOC:ZEPSNM,ZOA1) & + !$OMP& MAP(ALLOC:ZOA2) +#endif + + ! ------------------------------------------------------------------ + + !* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. + ! ------------------------------------------ + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP +#endif + DO J=-1,R_NTMAX+3 + ZN(J) = F%RN(J) + ENDDO + !* 1.1 SET N=KM-1 COMPONENT TO 0 FOR U AND V + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KM,IN) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IN) +#endif + DO KMLOC=1,D_NUMP + DO J=1,2*KFIELD + KM = D_MYMS(KMLOC) + IN = F%NLTN(KM-1) + ! IN=R_NTMAX+3-KM + ZOA1(IUS-1+J,IN,KMLOC) = 0.0_JPRBT + ZOA1(IVS-1+J,IN,KMLOC) = 0.0_JPRBT + ENDDO + ENDDO + + !* 1.2 COMPUTE VORTICITY AND DIVERGENCE. + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM) +#endif + DO KMLOC=1,D_NUMP + DO JN=0,R_NTMAX + DO J=1,KFIELD + IR = 2*J-2 + II = IR+1 + KM = D_MYMS(KMLOC) + ZKM = REAL(KM,JPRBT) + IN = R_NTMAX+2-JN + + IF(KM /= 0 .and. JN.GE.KM) THEN + ZOA2(IVORS+IR,IN,KMLOC) = -ZKM*ZOA1(IVS+II,IN,KMLOC)-& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*ZOA1(IUS+IR,IN-1,KMLOC)+& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*ZOA1(IUS+IR,IN+1,KMLOC) + ZOA2(IVORS+II,IN,KMLOC) = +ZKM*ZOA1(IVS+IR,IN,KMLOC)-& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*ZOA1(IUS+II,IN-1,KMLOC)+& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*ZOA1(IUS+II,IN+1,KMLOC) + ZOA2(IDIVS+IR,IN,KMLOC) = -ZKM*ZOA1(IUS+II,IN,KMLOC)+& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*ZOA1(IVS+IR,IN-1,KMLOC)-& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*ZOA1(IVS+IR,IN+1,KMLOC) + ZOA2(IDIVS+II,IN,KMLOC) = +ZKM*ZOA1(IUS+IR,IN,KMLOC)+& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*ZOA1(IVS+II,IN-1,KMLOC)-& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*ZOA1(IVS+II,IN+1,KMLOC) + ELSE + IF(KM == 0) THEN + ZOA2(IVORS+IR,IN,KMLOC) = -& + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*ZOA1(IUS+IR,IN-1,KMLOC)+& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*ZOA1(IUS+IR,IN+1,KMLOC) + ZOA2(IDIVS+IR,IN,KMLOC) = & + &ZN(JN)*ZEPSNM(KMLOC,JN+1)*ZOA1(IVS+IR,IN-1,KMLOC)-& + &ZN(JN+1)*ZEPSNM(KMLOC,JN)*ZOA1(IVS+IR,IN+1,KMLOC) + ENDIF + ENDIF + ENDDO + ENDDO + ENDDO +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif + ! ------------------------------------------------------------------ + + END SUBROUTINE UVTVD +END MODULE UVTVD_MOD + diff --git a/src/trans/gpu/internal_reducedmem/vdtuv_mod.F90 b/src/trans/gpu/internal_reducedmem/vdtuv_mod.F90 new file mode 100755 index 0000000..ec1ffbd --- /dev/null +++ b/src/trans/gpu/internal_reducedmem/vdtuv_mod.F90 @@ -0,0 +1,194 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE VDTUV_MOD +CONTAINS +SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) + +USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT + +USE TPM_DIM ,ONLY : R +USE TPM_FIELDS ,ONLY : F +USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS +USE TPM_GEN ,ONLY : NOUT + + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: KM, kmloc +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(1:D%NUMP,0:R%NTMAX+2) +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:,:) +REAL(KIND=JPRB), INTENT(IN) :: PDIV(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PV (:,:,:) + +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, ISMAX,JI + +! LOCAL REAL SCALARS +REAL(KIND=JPRBT) :: ZKM +REAL(KIND=JPRBT) :: ZZN(-1:R%NTMAX+4) +REAL(KIND=JPRBT) :: ZZLAPIN(-1:R%NSMAX+4) +REAL(KIND=JPRBT) :: ZZEPSNM(-1:R%NSMAX+4) + +#ifdef ACCGPU +!$ACC DATA & +!$ACC CREATE (ZZEPSNM, ZZN, ZZLAPIN) & +!$ACC COPYIN(PEPSNM, PVOR, PDIV) & +!$ACC COPYIN (D,D%MYMS,F,F%RLAPIN,F%RN) & +!$ACC COPYOUT(PU, PV) +#endif +#ifdef OMPGPU +!$OMP TARGET DATA & +!$OMP& MAP(ALLOC:ZZEPSNM, ZZN, ZZLAPIN) & +!$OMP& MAP(TO:PEPSNM, PVOR, PDIV) & +!$OMP& MAP(TO:D,D%MYMS,F,F%RLAPIN,F%RN) & +!$OMP& MAP(FROM:PU, PV) +#endif + +! ------------------------------------------------------------------ + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ISMAX = R%NSMAX +DO KMLOC=1,D%NUMP + ZKM = D%MYMS(KMLOC) +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP +#endif + DO JN=ZKM-1,ISMAX+2 + IJ = ISMAX+3-JN + ZZN(IJ) = F%RN(JN) + ZZLAPIN(IJ) = F%RLAPIN(JN) + IF( JN >= 0 ) ZZEPSNM(IJ) = PEPSNM(KMLOC,JN) + ENDDO + ZZN(0) = F%RN(ISMAX+3) + +!* 1.1 U AND V (KM=0) . + +IF(ZKM == 0) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(IR) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(IR) +#endif + DO J=1,KFIELD + DO JI=2,ISMAX+3 + IR = 2*J-1 + PU(IR,JI,KMLOC) = +& + &ZZN(JI+1)*ZZEPSNM(JI)*ZZLAPIN(JI+1)*PVOR(IR,JI+1,KMLOC)-& + &ZZN(JI-2)*ZZEPSNM(JI-1)*ZZLAPIN(JI-1)*PVOR(IR,JI-1,KMLOC) + PV(IR,JI,KMLOC) = -& + &ZZN(JI+1)*ZZEPSNM(JI)*ZZLAPIN(JI+1)*PDIV(IR,JI+1,KMLOC)+& + &ZZN(JI-2)*ZZEPSNM(JI-1)*ZZLAPIN(JI-1)*PDIV(IR,JI-1,KMLOC) + ENDDO + ENDDO +ELSE +!* 1.2 U AND V (KM!=0) . + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(IR,II) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(IR,II) +#endif + DO J=1,KFIELD + DO JI=2,ISMAX+3-ZKM + !ZKM = D_MYMS(KMLOC) + IR = 2*J-1 + II = IR+1 + !IF (ZKM>0 .AND. JI<=ISMAX+3-ZKM) THEN + PU(IR,JI,KMLOC) = -ZKM*ZZLAPIN(JI)*PDIV(II,JI,KMLOC)+& + &ZZN(JI+1)*ZZEPSNM(JI)*ZZLAPIN(JI+1)*PVOR(IR,JI+1,KMLOC)-& + &ZZN(JI-2)*ZZEPSNM(JI-1)*ZZLAPIN(JI-1)*PVOR(IR,JI-1,KMLOC) + PU(II,JI,KMLOC) = +ZKM*ZZLAPIN(JI)*PDIV(IR,JI,KMLOC)+& + &ZZN(JI+1)*ZZEPSNM(JI)*ZZLAPIN(JI+1)*PVOR(II,JI+1,KMLOC)-& + &ZZN(JI-2)*ZZEPSNM(JI-1)*ZZLAPIN(JI-1)*PVOR(II,JI-1,KMLOC) + PV(IR,JI,KMLOC) = -ZKM*ZZLAPIN(JI)*PVOR(II,JI,KMLOC)-& + &ZZN(JI+1)*ZZEPSNM(JI)*ZZLAPIN(JI+1)*PDIV(IR,JI+1,KMLOC)+& + &ZZN(JI-2)*ZZEPSNM(JI-1)*ZZLAPIN(JI-1)*PDIV(IR,JI-1,KMLOC) + PV(II,JI,KMLOC) = +ZKM*ZZLAPIN(JI)*PVOR(IR,JI,KMLOC)-& + &ZZN(JI+1)*ZZEPSNM(JI)*ZZLAPIN(JI+1)*PDIV(II,JI+1,KMLOC)+& + &ZZN(JI-2)*ZZEPSNM(JI-1)*ZZLAPIN(JI-1)*PDIV(II,JI-1,KMLOC) + !ENDIF + ENDDO + ENDDO + ENDIF +ENDDO + +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif +! ------------------------------------------------------------------ + +END SUBROUTINE VDTUV +END MODULE VDTUV_MOD diff --git a/src/trans/gpu/sharedmem/sharedmem.c b/src/trans/gpu/sharedmem/sharedmem.c new file mode 100644 index 0000000..29426ce --- /dev/null +++ b/src/trans/gpu/sharedmem/sharedmem.c @@ -0,0 +1,28 @@ +/* + * (C) Copyright 2015- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + + +#include + +void sharedmem_malloc_bytes (void** ptr, size_t bytes) +{ + *ptr = malloc(bytes); +} + +void sharedmem_free(void** ptr) +{ + free(*ptr); +} + +void sharedmem_advance_bytes (void** ptr, size_t bytes) +{ + char** char_ptr = (char**)ptr; + *char_ptr += bytes; +} diff --git a/src/trans/gpu/sharedmem/sharedmem_mod.F90 b/src/trans/gpu/sharedmem/sharedmem_mod.F90 new file mode 100644 index 0000000..16bb0fc --- /dev/null +++ b/src/trans/gpu/sharedmem/sharedmem_mod.F90 @@ -0,0 +1,315 @@ +! (C) Copyright 2015- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE SHAREDMEM_MOD + +! Routines to allow use of shared memery segments in Fortran + + +! Willem Deconinck and Mats Hamrud *ECMWF* +! Original : July 2015 + + +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT, C_NULL_PTR,C_SIZE_T +USE PARKIND1 ,ONLY : JPIM, JPRB ,JPRD + +#ifdef __NEC__ +#define C_SIZEOF(x) INT(KIND(x),C_SIZE_T) +#endif + +IMPLICIT NONE + +PRIVATE + +PUBLIC :: SHAREDMEM +PUBLIC :: SHAREDMEM_ALLOCATE +PUBLIC :: SHAREDMEM_MALLOC_BYTES +PUBLIC :: SHAREDMEM_CREATE +PUBLIC :: SHAREDMEM_ASSOCIATE +PUBLIC :: SHAREDMEM_ADVANCE +PUBLIC :: SHAREDMEM_DELETE + +TYPE, BIND(C) :: SHAREDMEM +! Memory buffer + TYPE(C_PTR), PRIVATE :: BEGIN=C_NULL_PTR + INTEGER(C_SIZE_T), PRIVATE :: SIZE=0 ! IN BYTES + TYPE(C_PTR), PRIVATE :: CPTR=C_NULL_PTR + INTEGER(C_SIZE_T), PRIVATE :: OFFSET=0 ! IN BYTES +END TYPE SHAREDMEM + + +INTERFACE SHAREDMEM_ASSOCIATE +! Associate fortran scalars/arrays with memory segment + MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_INT32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_REAL32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_REAL64 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_INT32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_REAL32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_REAL64 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_INT32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_REAL32 + MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_REAL64 +END INTERFACE + + +INTERFACE + +! EXTERNAL C FUNCTIONS USED IN THIS MODULE +! ---------------------------------------- + + SUBROUTINE SHAREDMEM_ADVANCE_BYTES(CPTR,BYTES) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T + TYPE(C_PTR) :: CPTR + INTEGER(C_SIZE_T), VALUE :: BYTES + END SUBROUTINE SHAREDMEM_ADVANCE_BYTES + + SUBROUTINE SHAREDMEM_MALLOC_BYTES(PTR,BYTES) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T + TYPE(C_PTR) :: PTR + INTEGER(C_SIZE_T), VALUE :: BYTES + END SUBROUTINE SHAREDMEM_MALLOC_BYTES + + SUBROUTINE SHAREDMEM_FREE(PTR) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR + TYPE(C_PTR), INTENT(IN) :: PTR + END SUBROUTINE SHAREDMEM_FREE + +END INTERFACE + +CONTAINS +!========================================================================= +SUBROUTINE SHAREDMEM_CREATE(HANDLE,CPTR,BYTES) +! Create memory buffer object from c pointer +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T, C_F_POINTER +TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE +TYPE(C_PTR) , INTENT(IN) :: CPTR +INTEGER(C_SIZE_T), INTENT(IN) :: BYTES +!------------------------------------------------------------------------ +HANDLE%BEGIN = CPTR +HANDLE%SIZE = BYTES +HANDLE%CPTR = HANDLE%BEGIN +HANDLE%OFFSET = 0 +END SUBROUTINE SHAREDMEM_CREATE +!========================================================================= +SUBROUTINE SHAREDMEM_ALLOCATE(HANDLE,BYTES) +! Create memory buffer object from Fortran +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T +TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE +INTEGER(C_SIZE_T), INTENT(IN) :: BYTES +INTEGER(C_SIZE_T) :: SIZE +!------------------------------------------------------------------------ +SIZE = BYTES +CALL SHAREDMEM_MALLOC_BYTES(HANDLE%BEGIN,SIZE) +HANDLE%SIZE = BYTES +HANDLE%CPTR = HANDLE%BEGIN +HANDLE%OFFSET = 0 +END SUBROUTINE SHAREDMEM_ALLOCATE +!========================================================================= +SUBROUTINE SHAREDMEM_DELETE(HANDLE) +! Free memory buffer +TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE +CALL SHAREDMEM_FREE(HANDLE%BEGIN) +END SUBROUTINE SHAREDMEM_DELETE +!========================================================================= + +! PRIVATE SUBROUTINES +! ------------------- + +SUBROUTINE SHAREDMEM_ASSOCIATE0_INT32(HANDLE,VALUE,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(OUT) :: VALUE + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + INTEGER(C_INT), POINTER :: FPTR(:) + INTEGER(C_INT) :: K + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) + VALUE = FPTR(1) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(K)) + HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(K) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE0_INT32 + +SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL32(HANDLE,VALUE,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + REAL(C_FLOAT), INTENT(OUT) :: VALUE + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_FLOAT), POINTER :: FPTR(:) + REAL(C_FLOAT) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) + VALUE = FPTR(1) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(R) + + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL32 + +SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL64(HANDLE,VALUE,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + REAL(C_DOUBLE), INTENT(OUT) :: VALUE + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_DOUBLE), POINTER :: FPTR(:) + REAL(C_DOUBLE) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) + VALUE = FPTR(1) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(R) + + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL64 + +SUBROUTINE SHAREDMEM_ASSOCIATE1_INT32(HANDLE,SIZE,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: SIZE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: FPTR(:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + INTEGER(C_INT) :: K + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(K)) + HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(K) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE1_INT32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL32(HANDLE,SIZE,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: SIZE + REAL(C_FLOAT), POINTER, INTENT(INOUT) :: FPTR(:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_FLOAT) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL64(HANDLE,SIZE,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: SIZE + REAL(C_DOUBLE), POINTER, INTENT(INOUT) :: FPTR(:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_DOUBLE) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL64 + +SUBROUTINE SHAREDMEM_ASSOCIATE2_INT32(HANDLE,DIM1,DIM2,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 + INTEGER(C_INT), POINTER, INTENT(INOUT) :: FPTR(:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + INTEGER(C_INT) :: K + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(K)) + HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(K) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE2_INT32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL32(HANDLE,DIM1,DIM2,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 + REAL(C_FLOAT), POINTER, INTENT(INOUT) :: FPTR(:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_FLOAT) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL32 + + +SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL64(HANDLE,DIM1,DIM2,FPTR,ADVANCE) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 + REAL(C_DOUBLE), POINTER, INTENT(INOUT) :: FPTR(:,:) + LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE + REAL(C_DOUBLE) :: R + + CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) + + IF( PRESENT(ADVANCE) ) THEN + IF( ADVANCE ) THEN + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(R)) + HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(R) + ENDIF + ENDIF + +END SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL64 + +SUBROUTINE SHAREDMEM_ADVANCE(HANDLE,BYTES) + USE, INTRINSIC :: ISO_C_BINDING + TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE + INTEGER(C_INT), INTENT(IN) :: BYTES + INTEGER(C_SIZE_T) :: SIZE + SIZE = BYTES + CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE) + HANDLE%OFFSET = HANDLE%OFFSET+BYTES +END SUBROUTINE SHAREDMEM_ADVANCE + +!============================================================================ +END MODULE SHAREDMEM_MOD diff --git a/src/trans/include/ectrans/dir_trans.h b/src/trans/include/ectrans/dir_trans.h new file mode 100644 index 0000000..8288afb --- /dev/null +++ b/src/trans/include/ectrans/dir_trans.h @@ -0,0 +1,142 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE DIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& LDLATLON,KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *DIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL DIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! LDLATLON - indicating if regular lat-lon is the input data +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDLATLON + +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) + + +END SUBROUTINE DIR_TRANS + +END INTERFACE diff --git a/src/trans/include/ectrans/dir_transad.h b/src/trans/include/ectrans/dir_transad.h new file mode 100644 index 0000000..30686d3 --- /dev/null +++ b/src/trans/include/ectrans/dir_transad.h @@ -0,0 +1,141 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE DIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2) + + +!**** *DIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL DIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split +! +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIR_TRANS_CTLAD - control routine +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + + +END SUBROUTINE DIR_TRANSAD + + +END INTERFACE diff --git a/src/trans/include/ectrans/dist_grid.h b/src/trans/include/ectrans/dist_grid.h new file mode 100644 index 0000000..ed8374c --- /dev/null +++ b/src/trans/include/ectrans/dist_grid.h @@ -0,0 +1,70 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE DIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *DIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID + +END INTERFACE diff --git a/src/trans/include/ectrans/dist_grid_32.h b/src/trans/include/ectrans/dist_grid_32.h new file mode 100644 index 0000000..a2a7524 --- /dev/null +++ b/src/trans/include/ectrans/dist_grid_32.h @@ -0,0 +1,69 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE DIST_GRID_32(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP) + +!**** *DIST_GRID_32* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL DIST_GRID_32(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_GRID_32_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPRM + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRM) , INTENT(OUT) :: PGP(:,:,:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_GRID_32 + +END INTERFACE diff --git a/src/trans/include/ectrans/dist_spec.h b/src/trans/include/ectrans/dist_spec.h new file mode 100644 index 0000000..745cfa2 --- /dev/null +++ b/src/trans/include/ectrans/dist_spec.h @@ -0,0 +1,73 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE DIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSMAX,KSORT) + +!**** *DIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL DIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- DIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE DIST_SPEC + +END INTERFACE diff --git a/src/trans/include/ectrans/gath_grid.h b/src/trans/include/ectrans/gath_grid.h new file mode 100644 index 0000000..0367b2f --- /dev/null +++ b/src/trans/include/ectrans/gath_grid.h @@ -0,0 +1,69 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE GATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *GATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL GATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID + +END INTERFACE diff --git a/src/trans/include/ectrans/gath_grid_32.h b/src/trans/include/ectrans/gath_grid_32.h new file mode 100644 index 0000000..d441839 --- /dev/null +++ b/src/trans/include/ectrans/gath_grid_32.h @@ -0,0 +1,69 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE GATH_GRID_32(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *GATH_GRID_32* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL GATH_GRID_32(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_32_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRM + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRM) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRM) , INTENT(IN) :: PGP(:,:,:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_GRID_32 + +END INTERFACE diff --git a/src/trans/include/ectrans/gath_spec.h b/src/trans/include/ectrans/gath_spec.h new file mode 100644 index 0000000..277a0fa --- /dev/null +++ b/src/trans/include/ectrans/gath_spec.h @@ -0,0 +1,73 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LDZA0IP) + +!**** *GATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDIM1_IS_FLD - If TRUE first dimension of PSCPEC and PSPECG is the field dimension [.T.] +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + + +! ------------------------------------------------------------------ + +END SUBROUTINE GATH_SPEC + +END INTERFACE diff --git a/src/trans/include/ectrans/get_current.h b/src/trans/include/ectrans/get_current.h new file mode 100644 index 0000000..882f9ee --- /dev/null +++ b/src/trans/include/ectrans/get_current.h @@ -0,0 +1,53 @@ +! (C) Copyright 2000- Meteo France. +! (C) Copyright 2000- Meteo-France. + +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE GET_CURRENT(KRESOL,LDLAM) + +!**** *GET_CURRENT* - Extract current information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting current information from the T.P. + +!** Interface. +! ---------- +! CALL GET_CURRENT(...) + +! Explicit arguments : (all optional) +! -------------------- +! KRESOL - Current resolution +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Ryad El Khatib *Meteo-France* + +! Modifications. +! -------------- +! Original : 24-Aug-2012 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM + +END SUBROUTINE GET_CURRENT +END INTERFACE diff --git a/src/trans/include/ectrans/gpnorm_trans.h b/src/trans/include/ectrans/gpnorm_trans.h new file mode 100644 index 0000000..e642196 --- /dev/null +++ b/src/trans/include/ectrans/gpnorm_trans.h @@ -0,0 +1,69 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *GPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather +! than an approach using a more expensive global gather collective communication + +!** Interface. +! ---------- +! CALL GPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB),INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +END SUBROUTINE GPNORM_TRANS +END INTERFACE diff --git a/src/trans/include/ectrans/ini_spec_dist.h b/src/trans/include/ectrans/ini_spec_dist.h new file mode 100644 index 0000000..f32f8d2 --- /dev/null +++ b/src/trans/include/ectrans/ini_spec_dist.h @@ -0,0 +1,72 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& + &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS) + + +!**** *INI_SPEC_DIST* - Initialize spectral wave distribution + +! Purpose. +! -------- +! Initialize arrays controlling spectral wave distribution + +!** Interface. +! ---------- +! CALL INI_SPEC_DIST(...) + +! Explicit arguments : +! -------------------- +! KSMAX - spectral truncation required +! KTMAX - Overtruncation for KSMAX (input) +! KPRTRW - Number of processors in A-direction (input) +! KMYSETW - A-set for present processor (input) +! KASM0 - Offsets for spectral waves (output) +! KSPOLEGL - Local version of NSPOLEG (output) +! KPROCM - Where a certain spectral wave belongs (output) +! KUMPP - Number of spectral waves on this PE (output) +! KSPEC - Local version on NSPEC (output) +! KSPEC2 - Local version on NSPEC2 (output) +! KSPEC2MX - Maximum KSPEC2 across PEs (output) +! KPOSSP - Global spectral fields partitioning (output) +! KMYMS - This PEs spectral zonal wavenumbers (output) +! Implicit arguments : NONE +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. SUWAVEDI +! ---------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) +END SUBROUTINE INI_SPEC_DIST +END INTERFACE diff --git a/src/trans/include/ectrans/inv_trans.h b/src/trans/include/ectrans/inv_trans.h new file mode 100644 index 0000000..ceb5314 --- /dev/null +++ b/src/trans/include/ectrans/inv_trans.h @@ -0,0 +1,163 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE INV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,LDLATLON,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL INV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! LDLATLON - indicating if regular lat-lon output requested +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +LOGICAL ,OPTIONAL, INTENT(IN) :: LDLATLON +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) + + +END SUBROUTINE INV_TRANS + +END INTERFACE diff --git a/src/trans/include/ectrans/inv_transad.h b/src/trans/include/ectrans/inv_transad.h new file mode 100644 index 0000000..7832ea7 --- /dev/null +++ b/src/trans/include/ectrans/inv_transad.h @@ -0,0 +1,160 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE INV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *INV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL INV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. +! +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTLAD - control of Legendre transform +! FTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) + + +END SUBROUTINE INV_TRANSAD + +END INTERFACE diff --git a/src/trans/include/ectrans/setup_trans.h b/src/trans/include/ectrans/setup_trans.h new file mode 100644 index 0000000..68dd3e0 --- /dev/null +++ b/src/trans/include/ectrans/setup_trans.h @@ -0,0 +1,116 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& +&KFLEV,KTMAX,KRESOL,PWEIGHT,LDGRIDONLY,LDUSERPNM,LDKEEPRPNM,LDUSEFLT,& +&LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,& +&LDLL,LDSHIFTLL,CDIO_LEGPOL,CDLEGPOLFNAME,KLEGPOLPTR,KLEGPOLPTR_LEN) + +!**** *SETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL SETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KDLON - number of points on each latitude [2*KDGL] +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! PWEIGHT - the weight per grid-point (for a weighted distribution) +! LDGRIDONLY - true if only grid space is required + +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space + +! LDSPLIT describe the distribution among processors of grid-point data and +! has no relevance if you are using a single processor + +! LDUSEFLT - use Fast Legandre Transform (Butterfly algorithm) +! LDUSERPNM - Use Belusov to compute legendre pol. (else new alg.) +! LDKEEPRPNM - Keep Legendre Polynomials (only applicable when using +! FLT, otherwise always kept) +! LDPNMONLY - Compute the Legendre polynomialsonly, not the FFTs. +! LDUSEFFTW - Use FFTW for FFTs +! LDLL - Setup second set of input/output latitudes +! the number of input/output latitudes to transform is equal KDGL +! or KDGL+2 in the case that includes poles + equator +! the number of input/output longitudes to transform is 2*KDGL +! LDSHIFTLL - Shift output lon/lat data by 0.5*dx and 0.5*dy + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- SETUP_DIMS - setup distribution independent dimensions +! SUMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! SUMP_TRANS - Second part of setup of distributed environment +! SUFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT,C_ASSOCIATED,C_SIZE_T + + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KSMAX,KDGL +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KDLON +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT):: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSTRET +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KFLEV +LOGICAL ,OPTIONAL,INTENT(IN):: LDGRIDONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFLT +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSERPNM +LOGICAL ,OPTIONAL,INTENT(IN):: LDKEEPRPNM +LOGICAL ,OPTIONAL,INTENT(IN):: LDPNMONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDSPSETUPONLY +LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFFTW +LOGICAL ,OPTIONAL,INTENT(IN):: LDLL +LOGICAL ,OPTIONAL,INTENT(IN):: LDSHIFTLL +CHARACTER(LEN=*),OPTIONAL,INTENT(IN):: CDIO_LEGPOL +CHARACTER(LEN=*),OPTIONAL,INTENT(IN):: CDLEGPOLFNAME +TYPE(C_PTR) ,OPTIONAL,INTENT(IN) :: KLEGPOLPTR +INTEGER(C_SIZE_T) ,OPTIONAL,INTENT(IN) :: KLEGPOLPTR_LEN + + +END SUBROUTINE SETUP_TRANS + + +END INTERFACE diff --git a/src/trans/include/ectrans/setup_trans0.h b/src/trans/include/ectrans/setup_trans0.h new file mode 100644 index 0000000..321d89b --- /dev/null +++ b/src/trans/include/ectrans/setup_trans0.h @@ -0,0 +1,90 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& +& KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN,& +& LDMPOFF,LDSYNC_TRANS,KTRANS_SYNC_LEVEL,& +& LDEQ_REGIONS,K_REGIONS_NS,K_REGIONS_EW,K_REGIONS,& +& PRAD,LDALLOPERM) + +!**** *SETUP_TRANS0* - General setup routine for transform package + +! Purpose. +! -------- +! Resolution independent part of setup of transform package +! Has to be called BEFORE SETUP_TRANS + +!** Interface. +! ---------- +! CALL SETUP_TRANS0(...) + +! Explicit arguments : All arguments are optional, [..] default value +! ------------------- +! KOUT - Unit number for listing output [6] +! KERR - Unit number for error messages [0] +! KPRINTLEV - level of output to KOUT, 0->no output,1->normal,2->debug [0] +! KMAX_RESOL - maximum number of different resolutions for this run [1] +! KPRGPNS - splitting level in N-S direction in grid-point space [1] +! KPRGPEW - splitting level in E-W direction in grid-point space [1] +! KPRTRW - splitting level in wave direction in spectral space [1] +! KCOMBFLEN - Size of communication buffer [1800000 (*8bytes) ] +! LDMPOFF - switch off message passing [false] +! LDSYNC_TRANS - switch to activate barrier before transforms [false] +! KTRANS_SYNC_LEVEL - use of synchronization/blocking [0] +! LDEQ_REGIONS - true if new eq_regions partitioning [false] +! K_REGIONS - Number of regions (1D or 2D partitioning) +! K_REGIONS_NS - Maximum number of NS partitions +! K_REGIONS_EW - Maximum number of EW partitions +! PRAD - Radius of the planet +! LDALLOPERM - Allocate certain arrays permanently + +! The total number of (MPI)-processors has to be equal to KPRGPNS*KPRGPEW + +! Method. +! ------- + +! Externals. SUMP_TRANS0 - initial setup routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! R. El Khatib 03-01-24 LDMPOFF +! G. Mozdzynski 2006-09-13 LDEQ_REGIONS +! N. Wedi 2009-11-30 add radius + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN +LOGICAL ,OPTIONAL,INTENT(IN) :: LDMPOFF +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSYNC_TRANS +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTRANS_SYNC_LEVEL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDALLOPERM +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PRAD +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS(:) +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_NS +INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_EW + +END SUBROUTINE SETUP_TRANS0 + + + +END INTERFACE diff --git a/src/trans/include/ectrans/specnorm.h b/src/trans/include/ectrans/specnorm.h new file mode 100644 index 0000000..785626d --- /dev/null +++ b/src/trans/include/ectrans/specnorm.h @@ -0,0 +1,69 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE SPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *SPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL SPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- SPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +END SUBROUTINE SPECNORM + +END INTERFACE diff --git a/src/trans/include/ectrans/sugawc.h b/src/trans/include/ectrans/sugawc.h new file mode 100644 index 0000000..7883c26 --- /dev/null +++ b/src/trans/include/ectrans/sugawc.h @@ -0,0 +1,60 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE SUGAWC(KDGLG,PMU,PW) + +!**** *SUGAWC* - Compute Gaussian latitudes and weights + +! Purpose. +! -------- +! Compute Gaussian latitudes and weights. + +!** Interface. +! ---------- +! CALL SUGAWC(...) + +! Explicit arguments : +! -------------------- +! INPUT: +! KDGLG - number of latitudes. + +! OUTPUT: +! PMU - sine of Gaussian latitudes. +! PW - Gaussian weights. + +! Method. +! ------- + +! Externals. SUGAW +! ---------- + +! Author. +! ------- +! K. Yessad, from SUGAWA and SULEG (trans) +! Original : May 2012 + +! Modifications. +! -------------- +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRD + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KDGLG +REAL(KIND=JPRD) ,INTENT(OUT) :: PMU(:) +REAL(KIND=JPRD) ,INTENT(OUT) :: PW(:) + +END SUBROUTINE SUGAWC + +END INTERFACE diff --git a/src/trans/include/ectrans/trans_end.h b/src/trans/include/ectrans/trans_end.h new file mode 100644 index 0000000..8996dfd --- /dev/null +++ b/src/trans/include/ectrans/trans_end.h @@ -0,0 +1,50 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE TRANS_END(CDMODE) + +!**** *TRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL TRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +IMPLICIT NONE +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE + + +END SUBROUTINE TRANS_END +END INTERFACE diff --git a/src/trans/include/ectrans/trans_inq.h b/src/trans/include/ectrans/trans_inq.h new file mode 100644 index 0000000..f47a267 --- /dev/null +++ b/src/trans/include/ectrans/trans_inq.h @@ -0,0 +1,187 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + &KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + &KMYMS,KASM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + &KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + &KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + &KULTPP,KPTRLS,KNMENG,& + &KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + &LDSPLITLAT,& + &KSMAX,PLAPIN,KNVALUE,KDEF_RESOL,LDLAM,& + &PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KDGLU) + +!**** *TRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL TRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KASM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation +! KNVALUE - n value for each KSPEC2 spectral coeffient + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations +! KNMENG - associated (with NLOENG) cut-off zonal wavenumber + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLAPIN - Eigen-values of the inverse Laplace operator +! KDGLU - Number of active points in an hemisphere for a given wavenumber "m" + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M. Hortal : 2001-03-05 Dimensions of the Legendre polynomials +! R. El Khatib 08-Aug-2012 KSMAX,PLAPIN,KNVALUE,LDLAM,KDEF_RESOL + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2 +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2G +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPEC2MX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNUMP +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTG +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTMX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRFLOFF + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KASM0(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KUMPP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPOSSP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KALLMS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSTA(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KONL(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL, INTENT(OUT) :: LDSPLITLAT(:) + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KULTPP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNMENG(:) + +REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGW(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLEI3 +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPOLEGL +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPMS(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDGLU(0:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PLAPIN(-1:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM + +END SUBROUTINE TRANS_INQ + + + + + + +END INTERFACE diff --git a/src/trans/include/ectrans/trans_pnm.h b/src/trans/include/ectrans/trans_pnm.h new file mode 100644 index 0000000..0596d10 --- /dev/null +++ b/src/trans/include/ectrans/trans_pnm.h @@ -0,0 +1,60 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) + +!**** *TRANS_PNM* - Compute Legendre polynomials for a given wavenember + +! Purpose. +! -------- +! Interface routine for computing Legendre polynomials for a given wavenember + +!** Interface. +! ---------- +! CALL TRANS_PNM(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) +! KM - wave number +! PRPNM - Legendre polynomials +! LDTRANSPOSE - Legendre polynomials array is transposed +! LDCHEAP - cheapest but less accurate computation + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 22-Jan-2016 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM) ,INTENT(IN) :: KM +REAL(KIND=JPRB) ,INTENT(OUT) :: PRPNM(:,:) +LOGICAL, OPTIONAL, INTENT(IN) :: LDTRANSPOSE +LOGICAL, OPTIONAL, INTENT(IN) :: LDCHEAP + +END SUBROUTINE TRANS_PNM +END INTERFACE diff --git a/src/trans/include/ectrans/trans_release.h b/src/trans/include/ectrans/trans_release.h new file mode 100644 index 0000000..9234d4c --- /dev/null +++ b/src/trans/include/ectrans/trans_release.h @@ -0,0 +1,16 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE TRANS_RELEASE(KRESOL) +USE PARKIND1 ,ONLY : JPIM +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL +END SUBROUTINE TRANS_RELEASE +END INTERFACE diff --git a/src/trans/include/ectrans/vordiv_to_uv.h b/src/trans/include/ectrans/vordiv_to_uv.h new file mode 100644 index 0000000..d68f6b5 --- /dev/null +++ b/src/trans/include/ectrans/vordiv_to_uv.h @@ -0,0 +1,68 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +INTERFACE +SUBROUTINE VORDIV_TO_UV(PSPVOR,PSPDIV,PSPU,PSPV,KSMAX,KVSETUV) + +!**** *VORDIV_TO_UV* - Convert spectral vorticity and divergence to spectral U (u*cos(theta)) and V (v*cos(theta). + +! Purpose. +! -------- +! Interface routine for Convert spectral vorticity and divergence to spectral U and V + +!** Interface. +! ---------- +! CALL VORDIV_TO_UV(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPU(:,:) - spectral U (u*cos(theta) (output) +! PSPV(:,:) - spectral V (v*cos(theta) (output) +! KSMAX - spectral resolution (input) +! KVSETUV(:) - Optionally indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- VD2UV_CTL - control vordiv to uv + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 15-06-15 + + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB), INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSPU(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PSPV(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) + +END SUBROUTINE VORDIV_TO_UV +END INTERFACE diff --git a/src/transi/CMakeLists.txt b/src/transi/CMakeLists.txt new file mode 100644 index 0000000..8ae2b74 --- /dev/null +++ b/src/transi/CMakeLists.txt @@ -0,0 +1,34 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +if( NOT ectrans_VERSION_PATCH ) + set( ectrans_VERSION_PATCH 0 ) +endif() + +configure_file( version.c.in version.c ) + +ecbuild_add_library( TARGET transi_dp + SOURCES transi_module.F90 + transi.h + transi.c + version.h + ${CMAKE_CURRENT_BINARY_DIR}/version.c + HEADER_DESTINATION include/ectrans + PUBLIC_INCLUDES $ + $ + PRIVATE_LIBS trans_dp + PRIVATE_DEFINITIONS ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} +) +ectrans_target_fortran_module_directory( TARGET transi_dp + MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/trans_dp +) +file( GLOB transi_includes include/ectrans/* ) +install( + FILES ${transi_includes} + DESTINATION include/ectrans +) diff --git a/src/transi/include/ectrans/transi.h b/src/transi/include/ectrans/transi.h new file mode 100644 index 0000000..cf4192f --- /dev/null +++ b/src/transi/include/ectrans/transi.h @@ -0,0 +1,1079 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +/*! + * @mainpage + * This project declares the C-API to the IFS trans-library.\n + * For documentation of all available functions, see @ref trans.h . + * + * @section About + * + * This library gives access to spectral transforms on the sphere. + * The library is capable to take advantage of a MPI-distributed-memory environment, + * and can use OpenMP-shared-memory parallelism internally. + * + * + * @section Usage + * + * First ectrans needs to be initialized with a function trans_init(). + * This needs to be done only once in the program. It sets up some + * global structures independent of any resolution. + * + * A number of resolutions can now be setup using trans_setup() for each + * resolution. + * Every call to trans_setup() involves allocating and computing the + * transformation coefficients, and should be done only once for + * every intended resolution as it can be very expensive and requires + * to store a lot of memory. The resolution can be referred to with + * a trans "handle" of the Trans_t type. + * + * Using this handle, one can now transform fields. Either many fields + * can be transformed simultaneously, or the transform functions can + * be called multiple times to transform any number of fields separately. + * + * The function to do a transform from gridpoints to spectral is called trans_dirtrans(). + * The function to do a transform from spectral to gridpoints is called trans_invtrans(). + * The function to do the adjoint of the spectral to gridpoints transform is called trans_invtrans_adj(). + * It also transforms the data from gridpoints to spectral. + * + * In case of distrubuted parallelism (MPI), the functions trans_dirtrans(), trans_invtrans(), + * and trans_invtrans_adj() work on distributed fields. + * In order to convert to and from a global view of the field + * (e.g. for reading / writing), one can use the functions trans_distspec(), trans_gathspec(), + * trans_distgrid(), trans_gathgrid(). + * + * Every handle needs to be cleaned up when no longer required, to release + * the memory and coefficients stored internally. This can be done with the + * function trans_delete(). + * + * Lastly, transi needs to be finalized with trans_finalize(), which will + * clean up any remaining internal global structures + * + * @author Willem Deconinck + * @date Jul 2014 + */ + +/*! + * @file transi.h + * @brief C-interface to the IFS trans-library + * + * This file declares the C-API to the IFS trans-library + * Definitions of routines are implemented in + * trans_module.F90, which redirects function calls + * to the IFS TRANS library + * + * @author Willem Deconinck (nawd) + * @date Jul 2014 + */ + +#ifndef ectrans_transi_h +#define ectrans_transi_h + +#include // size_t + +typedef int _bool; + +#ifdef __cplusplus +extern "C" { +#endif + +#include "ectrans/version.h" + +#define TRANS_FFT992 1 +#define TRANS_FFTW 2 + +#define TRANS_SUCCESS 0 + +struct Trans_t; +struct DirTrans_t; +struct InvTrans_t; +struct InvTransAdj_t; +struct DistGrid_t; +struct GathGrid_t; +struct DistSpec_t; +struct GathSpec_t; +struct VorDivToUV_t; +struct SpecNorm_t; + + +/*! + @brief Get error message relating to error code + */ +const char* trans_error_msg(int errcode); + +/*! + @brief Set limit on maximum simultaneously allocated transforms + + @note Advanced feature + + Default value is 10 + This function needs to be called before trans_init() or trans_setup(), and + ONLY if the default value needs to be changed. + */ +int trans_set_handles_limit(int limit); + +/*! + @brief Set radius of planet used in trans + + @note Advanced feature + + Default value of radius is Earth's radius (6371.22e+03) + This function needs to be called before trans_init() or trans_setup(), and + ONLY if the default value needs to be changed. + */ +int trans_set_radius(double radius); + +/*! + @brief Set nprtrv for parallel distribution of fields in spectral space + + @note Advanced feature + + Default value of nprtrv is 1, meaning that there is no + parallel distribution of the same wave number for different fields (or levels) + This function needs to be called before trans_init() or trans_setup(), and + ONLY if the default value needs to be changed. + */ +int trans_set_nprtrv(int nprtrv); + +/*! + @brief Use MPI in trans library. + + @note Advanced feature + + By default, MPI is used if MPI was detected during compilation. + To force not to use MPI, this function may be used. + */ +int trans_use_mpi(_bool); + +/*! + @brief Initialize trans library + + This initializes MPI communication, and allocates resolution-independent + storage. \n + If this routine is not called, then it will be called internally + upon the first call to trans_setup() + + @pre call trans_set_radius() and/or trans_set_nprtrv() if radius or + nprtrv need to be different from default values + */ +int trans_init(void); + +int trans_set_read(struct Trans_t*, const char* filepath); +int trans_set_write(struct Trans_t*, const char* filepath); +int trans_set_cache(struct Trans_t*, const void*, size_t); + +/*! + @brief Setup a new resolution to be used in the trans library + + @param trans Trans_t struct, that needs to have following variables defined: + - ndgl -- number of lattitudes + - nloen -- number of longitudes for each lattitude + - nsmax -- spectral truncation wave number + + All scalar values in the struct will be filled in. + Remaining array values will be deallocated and set to null. + To define array values, make individual calls to trans_inquire() + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + trans.ndgl = ... ; + trans.nloen = malloc( sizeof(int)*trans.ndgl ); + ... // Read in or compute nloen values + trans.nsmax = (2*trans.ndgl-1)/2; // For typical linear grid + trans_setup(&trans); + @endcode + @note If trans_init() was not called beforehand, it will be called + internally + */ +int trans_setup(struct Trans_t* trans); + + +/*! + @brief Inquire the trans library for array values + + @param trans Trans_t struct which needs to have been setup using trans_setup() + @param varlist comma-separated string of values to inquire + + The inquired values will be allocated if needed, and filled + in in the Trans_t struct + */ +int trans_inquire(struct Trans_t* trans, const char* varlist); + +/*! + @brief Direct spectral transform (from grid-point to spectral) + + A DirTrans_t struct, initialised with new_dirtrans(), + groups all arguments + + @param dirtrans DirTrans_t struct, containing all arguments. + + Usage: + - Transform of scalar fields + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rgp = malloc( sizeof(double) * nscalar*trans.ngptot ); + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nscalar = nscalar; // input + dirtrans.rgp = rgp; // input + dirtrans.rspscalar = rspscalar; // output + trans_dirtrans(&dirtrans); + @endcode + - Transform of U and V fields to vorticity and divergence + @code + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rgp = malloc( sizeof(double) * 2*nvordiv*trans.ngptot ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nvordiv = nvordiv; // input + dirtrans.rgp = rgp; // input -- order: U, V + dirtrans.rspvor = rspvor; // output + dirtrans.rspdiv = rspdiv; // output + trans_dirtrans(&dirtrans); + @endcode + - Transform of U and V fields to vorticity and divergence, as well as scalar fields + @code + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rgp = malloc( sizeof(double) * (2*nvordiv+nscalar)*trans.ngptot ); + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nvordiv = nvordiv; // input + dirtrans.nscalar = nscalar; // input + dirtrans.rgp = rgp; // input -- order: U, V, scalars + dirtrans.rspscalar = rspscalar; // output + dirtrans.rspvor = rspvor; // output + dirtrans.rspdiv = rspdiv; // output + trans_dirtrans(&dirtrans); + @endcode + + @note trans_dirtrans works on distributed arrays + */ +int trans_dirtrans(struct DirTrans_t* dirtrans); + +/*! + @brief Inverse spectral transform (from spectral grid-point) + + A InvTrans_t struct, initialised with new_invtrans(), + groups all arguments + + @param invtrans InvTrans_t struct, containing all arguments. + + Usage: + - Transform of scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * nscalar*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; // input + invtrans.rspscalar = rspscalar; // input + invtrans.rgp = rgp; // output + trans_invtrans(&invtrans); + @endcode + + - Transform vorticity and divergence to U and V + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * 2*nvordiv*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nvordiv = nvordiv; // input + invtrans.rspvor = rspvor; // input + invtrans.rspdiv = rspdiv; // input + invtrans.rgp = rgp; // output -- order: u, v + trans_invtrans(&invtrans); + @endcode + - Transform of vorticity, divergence *and* scalars to U, V, scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * (2*nvordiv+nscalar)*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; // input + invtrans.nvordiv = nvordiv; // input + invtrans.rspscalar = rspscalar; // input + invtrans.rspvor = rspvor; // input + invtrans.rspdiv = rspdiv; // input + invtrans.rgp = rgp; // output -- order: u, v, scalars + trans_invtrans(&invtrans); + @endcode + + @note trans_invtrans works on distributed arrays + */ +int trans_invtrans(struct InvTrans_t* invtrans); + + + +/*! + @brief Adjoint of the Inverse spectral transform (from grid-point spectral) + + A InvTransAdj_t struct, initialised with new_invtrans_adj(), + groups all arguments + + @param invtrans_adj InvTransAdj_t struct, containing all arguments. + + Usage: + - Transform of scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * nscalar*trans.ngptot ); + + // Adjoint of Inverse Transform + struct InvTransAdj_t invtrans_adj = new_invtrans_adj(&trans); + invtrans_adj.nscalar = nscalar; // input + invtrans_adj.rspscalar = rspscalar; // output + invtrans_adj.rgp = rgp; // input + trans_invtrans_adj(&invtrans_adj); + @endcode + + - Adjoint of Transform vorticity and divergence to U and V + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * 2*nvordiv*trans.ngptot ); + + // Adjoint of Inverse Transform + struct InvTransAdj_t invtrans_adj = new_invtrans_adj(&trans); + invtrans_adj.nvordiv = nvordiv; // input + invtrans_adj.rspvor = rspvor; // output + invtrans_adj.rspdiv = rspdiv; // output + invtrans_adj.rgp = rgp; // input -- order: u, v + trans_invtrans_adj(&invtrans_adj); + @endcode + - Adjoint of Transform of vorticity, divergence *and* scalars to U, V, scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * (2*nvordiv+nscalar)*trans.ngptot ); + + // Adjoint of Inverse Transform + struct InvTransAdj_t invtrans_adj = new_invtrans_adj(&trans); + invtrans_adj.nscalar = nscalar; // input + invtrans_adj.nvordiv = nvordiv; // input + invtrans_adj.rspscalar = rspscalar; // input + invtrans_adj.rspvor = rspvor; // input + invtrans_adj.rspdiv = rspdiv; // input + invtrans_adj.rgp = rgp; // output -- order: u, v, scalars + trans_invtrans_adj(&invtrans_adj); + @endcode + + @note trans_invtrans_adj works on distributed arrays + */ +int trans_invtrans_adj(struct InvTransAdj_t* invtrans_adj); + +/*! + @brief Distribute global gridpoint array among processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + int nfld = 1; + double* rgpg = NULL; + if( trans.myproc == 1 ) // Load global field in proc 1 + { + rgpg = malloc( sizeof(double) * trans.ngptotg*nfld ); + ... // load data in rgpg[nfld][ngptotg] + } + int* nfrom = malloc( sizeof(int) * nfld ); + nfrom[0] = 1; // Global field 0 sits in proc 1 + + double* rgp = malloc( sizeof(double) * nfld*trans.ngptot ); + struct DistGrid_t distgrid = new_distgrid(&trans); + distgrid.nfrom = nfrom; + distgrid.rgpg = rgpg; + distgrid.rgp = rgp; + distgrid.nfld = nfld; + trans_distgrid(&distgrid); + @endcode + */ +int trans_distgrid(struct DistGrid_t* distgrid); + +/*! + @brief Gather global gridpoint array from processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + + // Distributed field + int nfld = 1; + double* rgp = malloc( sizeof(double) * nfld*trans.ngptot ); + ... // load data in rgp[nfld][ngptot] + + // Global field + double* rgpg = NULL; + if( trans.myproc == 1 ) // We will gather to proc 1 + { + rgpg = malloc( sizeof(double) * nfld*trans.ngptotg ); + } + int* nto = malloc( sizeof(int) * nfld ); + nto[0] = 1; + + // Gather global fields + struct GathGrid_t gathgrid = new_gathgrid(&trans); + gathgrid.rgp = rgp; + gathgrid.rgpg = rgpg; + gathgrid.nto = nto; + gathgrid.nfld = nfld; + trans_gathgrid(&gathgrid); + @endcode + */ +int trans_gathgrid(struct GathGrid_t* gathgrid); + +/*! + @brief Distribute global spectral array among processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + + // Global fields to be distributed + int nscalar = 1; + double* rspscalarg = NULL; + if( trans.myproc == 1 ) + { + rspscalarg = malloc( sizeof(double) * nscalar*trans.nspec2g ); + ... // load data in rspscalarg[nspec2g][nscalar] + } + int* nfrom = malloc( sizeof(int) * nscalar ); + nfrom[0] = 1; + + // Distribute to local fields + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + struct DistSpec_t distspec = new_distspec(&trans); + distspec.rspec = rspscalar; + distspec.rspecg = rspscalarg; + distspec.nfld = nscalar; + distspec.nfrom = nto; + trans_distspec(&distspec); + @endcode + */ +int trans_distspec(struct DistSpec_t* distspec); + +/*! + @brief Gather global spectral array from processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + + // We have some distributed spectral fields "rspscalar" + int nscalar = 1; + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + ... // load data in rspscalar[nspec2][nscalar] + + // We want to gather to proc 1 + double* rspscalarg = NULL; + if( trans.myproc == 1 ) + rspscalarg = malloc( sizeof(double) * nscalar*trans.nspec2g ); + int* nto = malloc( sizeof(int) * nscalar ); + nto[0] = 1; + struct GathSpec_t gathspec = new_gathspec(&trans); + gathspec.rspec = rspscalar; + gathspec.rspecg = rspscalarg; + gathspec.nfld = nscalar; + gathspec.nto = nto; + trans_gathspec(&gathspec); + @endcode + */ +int trans_gathspec(struct GathSpec_t* gathspec); + +/*! + @brief Convert Spectral vorticity & divergence to Spectral u*cos(theta) & v*cos(theta) + + Usage: + @code{.c} + // We have some global spectral fields for vorticity,divergence,u*cos(theta),v*cos(theta) + int nfld = 1; + double* rspvor = malloc( sizeof(double) * nfld*ncoeff ); + double* rspdiv = malloc( sizeof(double) * nfld*ncoeff ); + double* rspu = malloc( sizeof(double) * nfld*ncoeff ); + double* rspv = malloc( sizeof(double) * nfld*ncoeff ); + ... // load data in rspvor[ncoeff][nfld] + ... // load data in rspdiv[ncoeff][nfld] + + struct VorDivToUV_t vordiv_to_UV = new_vordiv_to_UV(); + vordiv_to_UV.rspvor = rspvor; + vordiv_to_UV.rspdiv = rspdiv; + vordiv_to_UV.rspu = rspu; + vordiv_to_UV rspv = rspv; + vordiv_to_UV.nfld = nfld; + vordiv_to_UV.ncoeff = ncoeff; + vordiv_to_UV.nsmax = nsmax; + trans_vordiv_to_UV(&vordiv_to_UV); + @endcode + + @note + - nfld indicates the multiplicity for each variable seperately + - ncoeff is equivalent to trans.nspec2 for distributed, and trans.nspec2g for global fields + - nsmax indicates the spectral truncation T. + */ +int trans_vordiv_to_UV(struct VorDivToUV_t* vordiv_to_UV); + +/*! + @brief Compute global spectral norms + + Usage:<\b> + @code{.c} + int nfld = 1; + double* rspec = malloc( sizeof(double) * nfld*trans.nspec2 ); + double* rnorm = malloc( sizeof(double) * nfld ); + ... // load data in rspec[nspec2][nfld] + + struct SpecNorm_t specnorm = new_specnorm(&trans); + specnorm.rspec = rspec; + specnorm.rnorm = rnorm; + specnorm.nfld = nfld; + trans_specnorm(specnorm); + @endcode +*/ +int trans_specnorm(struct SpecNorm_t* specnorm); + +/*! + @brief Remove footprint of specific resolution + + @param trans Trans_t struct describing specific resolution + + All arrays will be deallocated. + */ +int trans_delete(struct Trans_t* trans); + + +/*! + @brief Finalize trans library + + This finalizes MPI communication, and deallocates resolution-independent + storage. After this, no more calls to trans should be made + */ +int trans_finalize(void); + + +/*! + @brief Struct that holds information to do transforms + for one particular grid resolution + + The values ndgl, nloen, and nsmax need to be provided yourself, all other + values will be defined during the trans_setup() call or trans_inquire() calls + + - All scalar values will be defined by trans_setup() + - All array values will be allocated if needed, and defined by + individual calls to trans_inquire() + + @note Many of these values are of no interest for normal usage + */ +struct Trans_t { + + /*! @{ @name INPUT */ + int ndgl; //!< @brief Number of lattitudes + int* nloen; //!< @brief Number of longitude points for each latitude \n + //!< DIMENSIONS(1:NDGL) + int nlon; //!< @brief Number of longitude points for all latitudes \n + int nsmax; //!< @brief Spectral truncation wave number + + _bool lsplit; //!< @brief If false, the distribution does not allow latitudes to be split + int llatlon; //!< @brief If true, the transforms compute extra coefficients for + //!< latlon transforms + int flt; //!< @brief If true, the Fast-Legendre-Transform method is used + //!< which is faster for higher resolutions (N1024) + int fft; //!< @brief FFT library to use underneith \n + //!< FFT992 = 1 ; FFTW = 2 + + char* readfp; + char* writefp; + const void* cache; + size_t cachesize; + /*! @} */ + + /*! @{ @name PARALLELISATION */ + int myproc; //!< @brief Current MPI task (numbering starting at 1) + int nproc; //!< @brief Number of parallel MPI tasks + /*! @} */ + + /*! @{ @name MULTI-TRANSFORMS-MANAGEMENT */ + int handle; //!< @brief Resolution tag for which info is required ,default is the + //!< first defined resulution (input) + _bool ldlam; //!< @brief True if the corresponding resolution is LAM, false if it is global + /*! @} */ + + /*! @{ @name SPECTRAL SPACE */ + int nspec; //!< @brief Number of complex spectral coefficients on this PE + int nspec2; //!< @brief Number of complex spectral coefficients on this PE times 2 (real and imag) + int nspec2g; //!< @brief global KSPEC2 + int nspec2mx; //!< @brief Maximun KSPEC2 among all PEs + int nump; //!< @brief Number of spectral waves handled by this PE + int ngptot; //!< @brief Total number of grid columns on this PE + int ngptotg; //!< @brief Total number of grid columns on the Globe + int ngptotmx; //!< @brief Maximum number of grid columns on any of the PEs + int* ngptotl; //!< @brief Number of grid columns on each PE \n + //!< DIMENSIONS(1:N_REGIONS_NS,1:N_REGIONS_EW) + int* nmyms; //!< @brief This PEs spectral zonal wavenumbers + //!< DIMENSIONS(1:NUMP) + int* nasm0; //!< @brief Address in a spectral array of (m, n=m) \n + //!< DIMENSIONS(0:NSMAX) + int nprtrw; //!< @brief Number of processors in A-direction (input) + int* numpp; //!< @brief No. of wave numbers each wave set is responsible for. \n + //!< DIMENSIONS(1:NPRTRW) + int* npossp; //!< @brief Defines partitioning of global spectral fields among PEs \n + //!< DIMENSIONS(1:NPRTRW+1) + int* nptrms; //!< @brief Pointer to the first wave number of a given a-set \n + //!< DIMENSIONS(1:NPRTRW) + int* nallms; //!< @brief Wave numbers for all wave-set concatenated together + //!< to give all wave numbers in wave-set order \n + //!< DIMENSIONS(1:NSMAX+1) + int* ndim0g; //!< @brief Defines partitioning of global spectral fields among PEs \n + //!< DIMENSIONS(0:NSMAX) + int* nvalue; //!< @brief n value for each KSPEC2 spectral coeffient\n + //!< DIMENSIONS(1:NSPEC2) + /*! @} */ + + /*! @{ @name GRIDPOINT SPACE */ + int n_regions_NS;//!< @brief Number of regions in North-South direction + int n_regions_EW;//!< @brief Number of regions in East-West direction + int my_region_NS;//!< @brief My region in North-South direction + int my_region_EW;//!< @brief My region in East-West direction + int* n_regions; //!< @brief Number of East-West Regions per band of North-South Regions + //!< @brief DIMENSIONS(1:N_REGIONS_NS) + int* nfrstlat; //!< @brief First latitude of each a-set in grid-point space + //!< DIMENSIONS(1:N_REGIONS_NS) + int* nlstlat; //!< @brief Last latitude of each a-set in grid-point space + //!< DIMENSIONS(1:N_REGIONS_NS) + int nfrstloff; //!< @brief Offset for first lat of own a-set in grid-point space + int* nptrlat; //!< @brief Pointer to the start of each latitude + //!< DIMENSIONS(1:NDGL) + int* nptrfrstlat; //!< @brief Pointer to the first latitude of each a-set in + //!< NSTA and NONL arrays + //!< DIMENSIONS(1:N_REGIONS_NS) + int* nptrlstlat; //!< @brief Pointer to the last latitude of each a-set in + //!< NSTA and NONL arrays + //!< DIMENSIONS(1:N_REGIONS_NS) + int nptrfloff; //!< @brief Offset for pointer to the first latitude of own a-set + //!< NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 + int* nsta; //!< @brief Position of first grid column for the latitudes on a + //!< processor. \n + //!< DIMENSIONS(1:NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) + //!< @details The information is available for all processors. + //!< The b-sets are distinguished by the last dimension of + //!< nsta(). The latitude band for each a-set is addressed by + //!< nptrfrstlat(jaset),nptrlstlat(jaset), and + //!< nptrfloff=nptrfrstlat(myseta) on this processors a-set. + //!< Each split latitude has two entries in nsta(,:) which + //!< necessitates the rather complex addressing of nsta(,:) + //!< and the overdimensioning of nsta by N_REGIONS_NS. + int* nonl; //!< @brief Number of grid columns for the latitudes on a processor. + //!< Similar to nsta() in data structure. \n + //!< DIMENSIONS(1:NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) + _bool* ldsplitlat; //!< @brief True if latitude is split in grid point space over + //!< two a-sets. \n + //!< DIMENSIONS(1:NDGL) + /*! @} */ + + /*! @{ @name FOURIER SPACE */ + int nprtrns; //!< @brief No. of sets in N-S direction (Fourier space) + //!< (always equal to NPRTRW) + int* nultpp; //!< @brief Number of latitudes for which each a-set is calculating + //!< the FFT's. \n + //!< DIMENSIONS(1:NPRTRNS) + int* nptrls; //!< @brief Pointer to first global latitude of each a-set for which + //!< it performs the Fourier calculations \n + //!< DIMENSIONS(1:NPRTRNS) + int* nnmeng; //!< @brief associated (with NLOENG) cut-off zonal wavenumber \n + //!< DIMENSIONS(1:NDGL) + /*! @} */ + + + /*! @{ @name LEGENDRE */ + double* rmu; //!< @brief sin(Gaussian latitudes) \n + //!< DIMENSIONS(1:NDGL) + double* rgw; //!< @brief Gaussian weights \n + //!< DIMENSIONS(1:NDGL) + double* rpnm; //!< @brief Legendre polynomials \n + //!< DIMENSIONS(1:NLEI3,1:NSPOLEGL) + int nlei3; //!< @brief First dimension of Legendre polynomials + int nspolegl; //!< @brief Second dimension of Legendre polynomials + int* npms; //!< @brief Adress for legendre polynomial for given M (NSMAX) \n + //!< DIMENSIONS(0:NSMAX) + double* rlapin; //!< @brief Eigen-values of the inverse Laplace operator \n + //!< DIMENSIONS(-1:NSMAX+2) + int* ndglu; //!< @brief Number of active points in an hemisphere for a given wavenumber "m" \n + //!< DIMENSIONS(0:NSMAX) + /*! @} */ + +}; + +/*! + @brief Constructor for Trans_t, setting default values + @return Trans_t struct to be used as argument for trans_setup() + */ +int trans_new( struct Trans_t* ); + +/*! + @brief Set gridpoint resolution for trans + @param trans [in] Trans_t used to setup + @param ndgl [in] Number of lattitudes + @param nloen [in] Number of longitude points for each latitude \n + DIMENSIONS(1:NDGL) + */ +int trans_set_resol( struct Trans_t* trans, int ndgl, const int* nloen ); + +/*! + @brief Set gridpoint resolution for trans for longitude-latitude grids + @param trans [in] Trans_t used to setup + @param nlon [in] Number of longitudes + @param nlat [in] Number of latitudes (pole to pole) + + - If nlat is odd, the grid must includes poles and equator + - If nlat is even, the grid must be its dual (excluding pole and equator), + so points are shifted with 0.5*dx and 0.5*dy + */ +int trans_set_resol_lonlat( struct Trans_t* trans, int nlon, int nlat ); + +/*! + @brief Set spectral truncation wave number for trans + @param trans [in] Trans_t used to setup + @param nsmax [in] Spectral truncation wave number + */ +int trans_set_trunc( struct Trans_t* trans, int nsmax ); + + +/*! + @brief Arguments structure for trans_dirtrans() + + Use new_dirtrans() to initialise defaults for the struct (constructor) + */ +struct DirTrans_t +{ + const double* rgp; //!< @brief [input] gridpoint fields + //!< @details Dimensioning: rgp[#ngpblks][2*#nvordiv+#nscalar][#nproma]\n\n + //!< The ordering of the output fields is as follows (all + //!< parts are optional depending on the input switches): + //!< - u : if #nvordiv > 0 + //!< - v : if #nvordiv > 0 + //!< - scalars : if #nscalar > 0 + double* rspscalar; //!< @brief [output] spectral scalar valued fields + //!< @details Dimensioning: rspscalar[@link Trans_t::nspec2 nspec2 @endlink][#nscalar] + double* rspvor; //!< @brief [output] spectral vorticity + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + double* rspdiv; //!< @brief [output] spectral divergence + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + int nproma; //!< @brief [input,default=@link Trans_t::ngptot ngptot@endlink] Blocking factor for distributed gridpoint array + int nscalar; //!< @brief [input,default=0] Number of scalar fields present in RGP + int nvordiv; //!< @brief [input,default=0] Number of vorticity/divergence fields in RGP + int ngpblks; //!< @brief [input,default=1] Blocking factor for distributed gridpoint array + int lglobal; //!< @brief [input,default=0] rgp is a global input field --> nproma==1,ngpblks==ngptotg + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_dirtrans() +}; +/*! + @brief Constructor for DirTrans_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return DirTrans_t struct to be used as argument for trans_dirtrans() + */ +struct DirTrans_t new_dirtrans(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_invtrans() + + Use new_invtrans() to initialise defaults for the struct (constructor) + */ +struct InvTrans_t +{ + const double* rspscalar; //!< @brief [input,default=NULL] spectral scalar valued fields + //!< @details Dimensioning: rspscalar[@link Trans_t::nspec2 nspec2 @endlink][#nscalar] + const double* rspvor; //!< @brief [input] spectral vorticity + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + const double* rspdiv; //!< @brief [input] spectral divergence + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + double* rgp; //!< @brief [output] gridpoint fields + //!< @details Dimensioning: rgp[#ngpblks][2*#nvordiv+#nscalar][#nproma]\n\n + //!< The ordering of the output fields is as follows (all + //!< parts are optional depending on the input switches): + //! - vorticity : if #nvordiv > 0 and #lvordivgp true + //! - divergence : if #nvordiv > 0 and #lvordivgp true + //!< - u : if #nvordiv > 0 + //!< - v : if #nvordiv > 0 + //!< - scalars : if #nscalar > 0 + //!< - N-S derivative of scalars : if #nscalar > 0 and #lscalarders true + //!< - E-W derivative of u : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of v : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of scalars : if #nscalar > 0 and #lscalarders true + int nproma; //!< @brief [input,default=@link Trans_t::ngptot ngptot@endlink] Blocking factor for distributed gridpoint array + int nscalar; //!< @brief [input,default=0] Number of scalar fields present in RGP + int nvordiv; //!< @brief [input,default=0] Number of vorticity/divergence fields in RGP + int lscalarders; //!< @brief [input,default=0] Indicate if derivatives of scalars are requested + int luvder_EW; //!< @brief [input,default=0] Indicate if East-West derivative of u and v is requested + int lvordivgp; //!< @brief [input,default=0] Indicate if grid-point vorticity and divergence is requested + int ngpblks; //!< @brief [input,default=1] Blocking factor for distributed gridpoint array + int lglobal; //!< @brief [input,default=0] rgp is a global output field --> nproma==1,ngpblks==ngptotg + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + * @brief Constructor for InvTrans_t, resetting default values + * @param trans [in] Trans_t used to set defaults + * @return InvTrans_t struct to be used as argument for trans_invtrans() + */ +struct InvTrans_t new_invtrans(struct Trans_t* trans); + + +//! Adjoint of spectral inverse. + +struct InvTransAdj_t +{ + double* rspscalar; //!< @brief [output,default=NULL] spectral scalar valued fields + //!< @details Dimensioning: rspscalar[@link Trans_t::nspec2 nspec2 @endlink][#nscalar] + double* rspvor; //!< @brief [output] spectral vorticity + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + double* rspdiv; //!< @brief [output] spectral divergence + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + const double* rgp; //!< @brief [input] gridpoint fields + //!< @details Dimensioning: rgp[#ngpblks][2*#nvordiv+#nscalar][#nproma]\n\n + //!< The ordering of the output fields is as follows (all + //!< parts are optional depending on the input switches): + //! - vorticity : if #nvordiv > 0 and #lvordivgp true + //! - divergence : if #nvordiv > 0 and #lvordivgp true + //!< - u : if #nvordiv > 0 + //!< - v : if #nvordiv > 0 + //!< - scalars : if #nscalar > 0 + //!< - N-S derivative of scalars : if #nscalar > 0 and #lscalarders true + //!< - E-W derivative of u : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of v : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of scalars : if #nscalar > 0 and #lscalarders true + int nproma; //!< @brief [input,default=@link Trans_t::ngptot ngptot@endlink] Blocking factor for distributed gridpoint array + int nscalar; //!< @brief [input,default=0] Number of scalar fields present in RGP + int nvordiv; //!< @brief [input,default=0] Number of vorticity/divergence fields in RGP + int lscalarders; //!< @brief [input,default=0] Indicate if derivatives of scalars are requested + int luvder_EW; //!< @brief [input,default=0] Indicate if East-West derivative of u and v is requested + int lvordivgp; //!< @brief [input,default=0] Indicate if grid-point vorticity and divergence is requested + int ngpblks; //!< @brief [input,default=1] Blocking factor for distributed gridpoint array + int lglobal; //!< @brief [input,default=0] rgp is a global output field --> nproma==1,ngpblks==ngptotg + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans_adj() +}; +/*! + * @brief Constructor for InvTransAdj_t, resetting default values + * @param trans [in] Trans_t used to set defaults + * @return InvTransAdj_t struct to be used as argument for trans_invtrans_adj() + */ +struct InvTransAdj_t new_invtrans_adj(struct Trans_t* trans); + + + +/*! + @brief Arguments structure for trans_distgrid() + + Use new_distgrid() to initialise defaults for the struct (constructor) + */ +struct DistGrid_t +{ + const double* rgpg; //!< @brief Global gridpoint array + //!< Fortran DIMENSIONS(1:NGPTOTG,1:NFLDG) + //!< C/C++ DIMENSIONS[NFLDG][NGPTOTG] + double* rgp; //!< @brief Distributed gridpoint array + //!< Fortran DIMENSIONS(1:NPROMA,1:NFLD,1:NGPBLKS) + //!< C/C++ DIMENSIONS[NGPBLKS][NFDL][NPROMA] + //!< Default: NPROMA=NGPTOT, NGPBLKS=1 + const int* nfrom; //!< @brief Processors responsible for distributing each field + //!< DIMENSIONS(1:NFLD) + int nproma; //!< @brief Blocking factor for distributed gridpoint array + int nfld; //!< @brief Number of distributed fields + int ngpblks; //!< @brief Blocking factor for distributed gridpoint array + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for DistGrid_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return DistGrid_t struct to be used as argument for trans_distgrid() + */ +struct DistGrid_t new_distgrid(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_gathgrid() + + Use new_gathgrid() to initialise defaults for the struct (constructor) + */ +struct GathGrid_t +{ + double* rgpg; //!< @brief Global gridpoint array + //!< Fortran DIMENSIONS(1:NGPTOTG,1:NFLDG) + //!< C/C++ DIMENSIONS[NFLDG][NGPTOTG] + //!< DIMENSIONS(1:NFLDG,1:NGPTOTG) + const double* rgp; //!< @brief Distributed gridpoint array + //!< Fortran DIMENSIONS(1:NPROMA,1:NFLD,1:NGPBLKS) + //!< C/C++ DIMENSIONS[NGPBLKS][NFDL][NPROMA] + //!< Default: NPROMA=NGPTOT, NGPBLKS=1 + const int* nto; //!< @brief Processors responsible for gathering each field + //!< Fortran DIMENSIONS(1:NFLD) + int nproma; //!< @brief Blocking factor for distributed gridpoint array + int nfld; //!< @brief Number of distributed fields + int ngpblks; //!< @brief Blocking factor for distributed gridpoint array + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for GathGrid_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return GathGrid_t struct to be used as argument for trans_gathgrid() + */ +struct GathGrid_t new_gathgrid(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_distspec() + + Use new_distspec() to initialise defaults for the struct (constructor) + */ +struct DistSpec_t +{ + const double* rspecg; //!< @brief Global spectral array + //!< Fortran DIMENSIONS(1:NFLDG,1:NSPEC2G) + //!< C/C++ DIMENSIONS[NSPEC2G][NFLDG] + double* rspec; //!< @brief Local spectral array + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + const int* nfrom; //!< @brief Processors responsible for distributing each field + //!< Fortran DIMENSIONS(1:NFLD) + int nfld; //!< @brief Number of distributed fields + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for DistSpec_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return DistSpec_t struct to be used as argument for trans_distspec() + */ +struct DistSpec_t new_distspec(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_gathspec() + + Use new_gathspec() to initialise defaults for the struct (constructor) + */ +struct GathSpec_t +{ + double* rspecg; //!< @brief Global spectral array + //!< Fortran DIMENSIONS(1:NFLDG,1:NSPEC2G) + //!< C/C++ DIMENSIONS[NSPEC2G][NFLDG] + const double* rspec; //!< @brief Local spectral array + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + const int* nto; //!< @brief Processors responsible for gathering each field + //!< DIMENSIONS(1:NFLD) + int nfld; //!< @brief Number of distributed fields + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for GathSpec_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return GathSpec_t struct to be used as argument for trans_gathspec() + */ +struct GathSpec_t new_gathspec(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_vordiv_to_UV() + + Use new_vordiv_to_uv() to initialise defaults for the struct (constructor) + */ +struct VorDivToUV_t +{ + const double* rspvor; //!< @brief Local spectral array for vorticity + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + const double* rspdiv; //!< @brief Local spectral array for divergence + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + double* rspu; //!< @brief Local spectral array for U*cos(theta) + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + double* rspv; //!< @brief Local spectral array for V*cos(theta) + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + int nfld; //!< @brief Number of distributed fields + int nsmax; //!< @brief Spectral resolution (T) + int ncoeff; //!< @brief number of spectral coefficients + //!< (equivalent to nspec2 for distributed or nspec2g for global) + int count; //!< @brief Internal storage for calls to trans_vordiv_toUV() +}; +/*! + @brief Constructor for VorDivToUV_t, resetting default values + @return VorDivToUV_t struct to be used as argument for trans_gathspec() + */ +struct VorDivToUV_t new_vordiv_to_UV(void); + + +struct SpecNorm_t +{ + const double *rspec; //!< @brief Spectral array to compute norm of + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + int nmaster; //!< @brief Processor to receive norms (value 1 means MPI_RANK 0) + const double *rmet; //!< @brief metric, OPTIONAL + //! DIMENSIONS(0:NSMAX) + double* rnorm; //!< @brief Norms (output for processor nmaster) + //!< DIMENSIONS(1:NFLD) + int nfld; //!< @brief Number of fields + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for SpecNorm_t, resetting default values + @return SpecNorm_t struct to be used as argument for trans_specnorm() + */ +struct SpecNorm_t new_specnorm(struct Trans_t* trans); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/transi/include/ectrans/version.h b/src/transi/include/ectrans/version.h new file mode 100644 index 0000000..ba12e7f --- /dev/null +++ b/src/transi/include/ectrans/version.h @@ -0,0 +1,45 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#ifndef ectrans_version_h +#define ectrans_version_h + +#ifndef __cplusplus +// C99 header, defines bool as _Bool ( only required for C compiler ) +#include +#else +extern "C" { +#endif + +const char * ectrans_version(); + +unsigned int ectrans_version_int(); + +const char * ectrans_version_str(); + +const char * ectrans_git_sha1(); + +const char * ectrans_git_sha1_abbrev(unsigned int length); + +const char * ectrans_fiat_version(); + +unsigned int ectrans_fiat_version_int(); + +const char * ectrans_fiat_version_str(); + +const char * ectrans_fiat_git_sha1(); + +const char * ectrans_fiat_git_sha1_abbrev(unsigned int length); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/transi/transi.c b/src/transi/transi.c new file mode 100644 index 0000000..3db1a1d --- /dev/null +++ b/src/transi/transi.c @@ -0,0 +1,268 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +/* + * @file transi/trans.c + * @brief C-interface to the IFS trans-library + * @author Willem Deconinck (nawd) + * @date Jul 2014 + */ + +#include +#include +#include "transi.h" + +/* + * These functions are to be used in the fortran part (trans_module.F90) to + * allocate and deallocate arrays of type C_PTR + */ +void transi_malloc_bool (void* ptr[], int len) { *ptr = malloc(sizeof(bool ) * len); } +void transi_malloc_int (void* ptr[], int len) { *ptr = malloc(sizeof(int ) * len); } +void transi_malloc_float (void* ptr[], int len) { *ptr = malloc(sizeof(float ) * len); } +void transi_malloc_double(void* ptr[], int len) { *ptr = malloc(sizeof(double) * len); } +void transi_free(void* ptr[]) { free(*ptr); *ptr=NULL; } + +#define TRANS_ERROR -1 +#define TRANS_NOTIMPL -2 +#define TRANS_MISSING_ARG -3 +#define TRANS_UNRECOGNIZED_ARG -4 +#define TRANS_STALE_ARG -5 + +const char* trans_error_msg(int errcode) +{ + switch( errcode ) + { + case TRANS_SUCCESS: + return "No error"; + case TRANS_ERROR: + return "Trans: Error"; + case TRANS_NOTIMPL: + return "Trans: Not (yet) implemented"; + case TRANS_MISSING_ARG: + return "Trans: Required member of the argument structure is missing or not allocated"; + case TRANS_UNRECOGNIZED_ARG: + return "Trans: Unrecognized argument"; + case TRANS_STALE_ARG: + return "Trans: Passed argument was already used in previous call"; + default: + return "Trans: Unknown error"; + } +} + +int trans_new( struct Trans_t* trans ) +{ + trans->llatlon = 0; + trans->lsplit = true; + trans->flt = -1; + trans->fft = TRANS_FFTW; + trans->nsmax = -1; + trans->ndgl = -1; + trans->nlon = -1; + trans->nloen = NULL; + trans->readfp = NULL; + trans->writefp = NULL; + trans->cache = NULL; + trans->cachesize = 0; + return TRANS_SUCCESS; +} + +int trans_set_resol( struct Trans_t* trans, int ndgl, const int* nloen ) +{ + size_t i; + trans->ndgl = ndgl; + trans->nloen = malloc( sizeof(int) * ndgl ); + for ( i = 0; i < ndgl; ++i ) + trans->nloen[i] = nloen[i]; + return TRANS_SUCCESS; +} + +int trans_set_resol_lonlat( struct Trans_t* trans, int nlon, int nlat ) +{ + size_t i; + if( nlat%2 == 0 ) // The shifted lonlat grid (excluding poles and equator) + { + trans->ndgl = nlat; + trans->nlon = nlon; + trans->llatlon = 2; + if( trans->nloen ) free(trans->nloen); + trans->nloen = malloc( sizeof(int) * nlat ); + for ( i = 0; i < nlat; ++i ) + trans->nloen[i] = nlon; + } + else // The lonlat grid including poles and equator + { + trans->ndgl = nlat-1; // Internally coefficients are computed with ndgl+2 (equator duplicated) + trans->nlon = nlon; + trans->llatlon = 1; + } + return TRANS_SUCCESS; +} + +int trans_set_trunc( struct Trans_t* trans, int nsmax ) +{ + trans->nsmax = nsmax; + return TRANS_SUCCESS; +} + +int trans_set_read(struct Trans_t* trans, const char* filepath) +{ + trans->readfp = malloc( sizeof(char)*1024 ); + strcpy(trans->readfp, filepath); + return TRANS_SUCCESS; +} + +int trans_set_write(struct Trans_t* trans, const char* filepath) +{ + trans->writefp = malloc( sizeof(char)*1024 ); + strcpy(trans->writefp, filepath); + return TRANS_SUCCESS; +} + +int trans_set_cache(struct Trans_t* trans, const void* cache , size_t cachesize) +{ + trans->cache = cache; + trans->cachesize = cachesize; + return TRANS_SUCCESS; +} + +struct DirTrans_t new_dirtrans(struct Trans_t* trans) +{ + struct DirTrans_t dirtrans; + dirtrans.count = 0; + dirtrans.rgp = NULL; + dirtrans.rspscalar = NULL; + dirtrans.rspvor = NULL; + dirtrans.rspdiv = NULL; + dirtrans.ngpblks = 1; + dirtrans.nproma = trans->ngptot; + dirtrans.nscalar = 0; + dirtrans.nvordiv = 0; + dirtrans.lglobal = 0; + dirtrans.trans = trans; + return dirtrans; +} + +struct InvTrans_t new_invtrans(struct Trans_t* trans) +{ + struct InvTrans_t invtrans; + invtrans.count = 0; + invtrans.rspscalar = NULL; + invtrans.rspvor = NULL; + invtrans.rspdiv = NULL; + invtrans.rgp = NULL; + invtrans.ngpblks = 1; + invtrans.nproma = trans->ngptot; + invtrans.nscalar = 0; + invtrans.nvordiv = 0; + invtrans.lscalarders = 0; + invtrans.luvder_EW = 0; + invtrans.lvordivgp = 0; + invtrans.lglobal = 0; + invtrans.trans = trans; + return invtrans; +} + +struct InvTransAdj_t new_invtrans_adj(struct Trans_t* trans) +{ + struct InvTransAdj_t invtrans_adj; + invtrans_adj.count = 0; + invtrans_adj.rspscalar = NULL; + invtrans_adj.rspvor = NULL; + invtrans_adj.rspdiv = NULL; + invtrans_adj.rgp = NULL; + invtrans_adj.ngpblks = 1; + invtrans_adj.nproma = trans->ngptot; + invtrans_adj.nscalar = 0; + invtrans_adj.nvordiv = 0; + invtrans_adj.lscalarders = 0; + invtrans_adj.luvder_EW = 0; + invtrans_adj.lvordivgp = 0; + invtrans_adj.lglobal = 0; + invtrans_adj.trans = trans; + return invtrans_adj; +} + + +struct DistGrid_t new_distgrid(struct Trans_t* trans) +{ + struct DistGrid_t distgrid; + distgrid.count = 0; + distgrid.rgpg = NULL; + distgrid.rgp = NULL; + distgrid.nfrom = NULL; + distgrid.ngpblks = 1; + distgrid.nproma = trans->ngptot; + distgrid.nfld = 0; + distgrid.trans = trans; + return distgrid; +} + +struct GathGrid_t new_gathgrid(struct Trans_t* trans) +{ + struct GathGrid_t gathgrid; + gathgrid.count = 0; + gathgrid.rgpg = NULL; + gathgrid.rgp = NULL; + gathgrid.nto = NULL; + gathgrid.ngpblks = 1; + gathgrid.nproma = trans->ngptot; + gathgrid.nfld = 0; + gathgrid.trans = trans; + return gathgrid; +} + +struct DistSpec_t new_distspec(struct Trans_t* trans) +{ + struct DistSpec_t distspec; + distspec.count = 0; + distspec.rspecg = NULL; + distspec.rspec = NULL; + distspec.nfrom = NULL; + distspec.trans = trans; + return distspec; +} + +struct GathSpec_t new_gathspec(struct Trans_t* trans) +{ + struct GathSpec_t gathspec; + gathspec.count = 0; + gathspec.rspecg = NULL; + gathspec.rspec = NULL; + gathspec.nto = NULL; + gathspec.trans = trans; + return gathspec; +} + +struct VorDivToUV_t new_vordiv_to_UV() +{ + struct VorDivToUV_t vdtouv; + vdtouv.count = 0; + vdtouv.rspvor = NULL; + vdtouv.rspdiv = NULL; + vdtouv.rspu = NULL; + vdtouv.rspv = NULL; + vdtouv.nfld = 0; + vdtouv.ncoeff = 0; + vdtouv.nsmax = 0; + return vdtouv; +} + +struct SpecNorm_t new_specnorm(struct Trans_t* trans) +{ + struct SpecNorm_t specnorm; + specnorm.rspec = NULL; + specnorm.nmaster = 1; + specnorm.rmet = NULL; + specnorm.rnorm = NULL; + specnorm.nfld = 0; + specnorm.trans = trans; + specnorm.count = 0; + return specnorm; +} diff --git a/src/transi/transi.h b/src/transi/transi.h new file mode 100644 index 0000000..cf4192f --- /dev/null +++ b/src/transi/transi.h @@ -0,0 +1,1079 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +/*! + * @mainpage + * This project declares the C-API to the IFS trans-library.\n + * For documentation of all available functions, see @ref trans.h . + * + * @section About + * + * This library gives access to spectral transforms on the sphere. + * The library is capable to take advantage of a MPI-distributed-memory environment, + * and can use OpenMP-shared-memory parallelism internally. + * + * + * @section Usage + * + * First ectrans needs to be initialized with a function trans_init(). + * This needs to be done only once in the program. It sets up some + * global structures independent of any resolution. + * + * A number of resolutions can now be setup using trans_setup() for each + * resolution. + * Every call to trans_setup() involves allocating and computing the + * transformation coefficients, and should be done only once for + * every intended resolution as it can be very expensive and requires + * to store a lot of memory. The resolution can be referred to with + * a trans "handle" of the Trans_t type. + * + * Using this handle, one can now transform fields. Either many fields + * can be transformed simultaneously, or the transform functions can + * be called multiple times to transform any number of fields separately. + * + * The function to do a transform from gridpoints to spectral is called trans_dirtrans(). + * The function to do a transform from spectral to gridpoints is called trans_invtrans(). + * The function to do the adjoint of the spectral to gridpoints transform is called trans_invtrans_adj(). + * It also transforms the data from gridpoints to spectral. + * + * In case of distrubuted parallelism (MPI), the functions trans_dirtrans(), trans_invtrans(), + * and trans_invtrans_adj() work on distributed fields. + * In order to convert to and from a global view of the field + * (e.g. for reading / writing), one can use the functions trans_distspec(), trans_gathspec(), + * trans_distgrid(), trans_gathgrid(). + * + * Every handle needs to be cleaned up when no longer required, to release + * the memory and coefficients stored internally. This can be done with the + * function trans_delete(). + * + * Lastly, transi needs to be finalized with trans_finalize(), which will + * clean up any remaining internal global structures + * + * @author Willem Deconinck + * @date Jul 2014 + */ + +/*! + * @file transi.h + * @brief C-interface to the IFS trans-library + * + * This file declares the C-API to the IFS trans-library + * Definitions of routines are implemented in + * trans_module.F90, which redirects function calls + * to the IFS TRANS library + * + * @author Willem Deconinck (nawd) + * @date Jul 2014 + */ + +#ifndef ectrans_transi_h +#define ectrans_transi_h + +#include // size_t + +typedef int _bool; + +#ifdef __cplusplus +extern "C" { +#endif + +#include "ectrans/version.h" + +#define TRANS_FFT992 1 +#define TRANS_FFTW 2 + +#define TRANS_SUCCESS 0 + +struct Trans_t; +struct DirTrans_t; +struct InvTrans_t; +struct InvTransAdj_t; +struct DistGrid_t; +struct GathGrid_t; +struct DistSpec_t; +struct GathSpec_t; +struct VorDivToUV_t; +struct SpecNorm_t; + + +/*! + @brief Get error message relating to error code + */ +const char* trans_error_msg(int errcode); + +/*! + @brief Set limit on maximum simultaneously allocated transforms + + @note Advanced feature + + Default value is 10 + This function needs to be called before trans_init() or trans_setup(), and + ONLY if the default value needs to be changed. + */ +int trans_set_handles_limit(int limit); + +/*! + @brief Set radius of planet used in trans + + @note Advanced feature + + Default value of radius is Earth's radius (6371.22e+03) + This function needs to be called before trans_init() or trans_setup(), and + ONLY if the default value needs to be changed. + */ +int trans_set_radius(double radius); + +/*! + @brief Set nprtrv for parallel distribution of fields in spectral space + + @note Advanced feature + + Default value of nprtrv is 1, meaning that there is no + parallel distribution of the same wave number for different fields (or levels) + This function needs to be called before trans_init() or trans_setup(), and + ONLY if the default value needs to be changed. + */ +int trans_set_nprtrv(int nprtrv); + +/*! + @brief Use MPI in trans library. + + @note Advanced feature + + By default, MPI is used if MPI was detected during compilation. + To force not to use MPI, this function may be used. + */ +int trans_use_mpi(_bool); + +/*! + @brief Initialize trans library + + This initializes MPI communication, and allocates resolution-independent + storage. \n + If this routine is not called, then it will be called internally + upon the first call to trans_setup() + + @pre call trans_set_radius() and/or trans_set_nprtrv() if radius or + nprtrv need to be different from default values + */ +int trans_init(void); + +int trans_set_read(struct Trans_t*, const char* filepath); +int trans_set_write(struct Trans_t*, const char* filepath); +int trans_set_cache(struct Trans_t*, const void*, size_t); + +/*! + @brief Setup a new resolution to be used in the trans library + + @param trans Trans_t struct, that needs to have following variables defined: + - ndgl -- number of lattitudes + - nloen -- number of longitudes for each lattitude + - nsmax -- spectral truncation wave number + + All scalar values in the struct will be filled in. + Remaining array values will be deallocated and set to null. + To define array values, make individual calls to trans_inquire() + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + trans.ndgl = ... ; + trans.nloen = malloc( sizeof(int)*trans.ndgl ); + ... // Read in or compute nloen values + trans.nsmax = (2*trans.ndgl-1)/2; // For typical linear grid + trans_setup(&trans); + @endcode + @note If trans_init() was not called beforehand, it will be called + internally + */ +int trans_setup(struct Trans_t* trans); + + +/*! + @brief Inquire the trans library for array values + + @param trans Trans_t struct which needs to have been setup using trans_setup() + @param varlist comma-separated string of values to inquire + + The inquired values will be allocated if needed, and filled + in in the Trans_t struct + */ +int trans_inquire(struct Trans_t* trans, const char* varlist); + +/*! + @brief Direct spectral transform (from grid-point to spectral) + + A DirTrans_t struct, initialised with new_dirtrans(), + groups all arguments + + @param dirtrans DirTrans_t struct, containing all arguments. + + Usage: + - Transform of scalar fields + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rgp = malloc( sizeof(double) * nscalar*trans.ngptot ); + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nscalar = nscalar; // input + dirtrans.rgp = rgp; // input + dirtrans.rspscalar = rspscalar; // output + trans_dirtrans(&dirtrans); + @endcode + - Transform of U and V fields to vorticity and divergence + @code + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rgp = malloc( sizeof(double) * 2*nvordiv*trans.ngptot ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nvordiv = nvordiv; // input + dirtrans.rgp = rgp; // input -- order: U, V + dirtrans.rspvor = rspvor; // output + dirtrans.rspdiv = rspdiv; // output + trans_dirtrans(&dirtrans); + @endcode + - Transform of U and V fields to vorticity and divergence, as well as scalar fields + @code + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rgp = malloc( sizeof(double) * (2*nvordiv+nscalar)*trans.ngptot ); + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nvordiv = nvordiv; // input + dirtrans.nscalar = nscalar; // input + dirtrans.rgp = rgp; // input -- order: U, V, scalars + dirtrans.rspscalar = rspscalar; // output + dirtrans.rspvor = rspvor; // output + dirtrans.rspdiv = rspdiv; // output + trans_dirtrans(&dirtrans); + @endcode + + @note trans_dirtrans works on distributed arrays + */ +int trans_dirtrans(struct DirTrans_t* dirtrans); + +/*! + @brief Inverse spectral transform (from spectral grid-point) + + A InvTrans_t struct, initialised with new_invtrans(), + groups all arguments + + @param invtrans InvTrans_t struct, containing all arguments. + + Usage: + - Transform of scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * nscalar*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; // input + invtrans.rspscalar = rspscalar; // input + invtrans.rgp = rgp; // output + trans_invtrans(&invtrans); + @endcode + + - Transform vorticity and divergence to U and V + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * 2*nvordiv*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nvordiv = nvordiv; // input + invtrans.rspvor = rspvor; // input + invtrans.rspdiv = rspdiv; // input + invtrans.rgp = rgp; // output -- order: u, v + trans_invtrans(&invtrans); + @endcode + - Transform of vorticity, divergence *and* scalars to U, V, scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * (2*nvordiv+nscalar)*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; // input + invtrans.nvordiv = nvordiv; // input + invtrans.rspscalar = rspscalar; // input + invtrans.rspvor = rspvor; // input + invtrans.rspdiv = rspdiv; // input + invtrans.rgp = rgp; // output -- order: u, v, scalars + trans_invtrans(&invtrans); + @endcode + + @note trans_invtrans works on distributed arrays + */ +int trans_invtrans(struct InvTrans_t* invtrans); + + + +/*! + @brief Adjoint of the Inverse spectral transform (from grid-point spectral) + + A InvTransAdj_t struct, initialised with new_invtrans_adj(), + groups all arguments + + @param invtrans_adj InvTransAdj_t struct, containing all arguments. + + Usage: + - Transform of scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * nscalar*trans.ngptot ); + + // Adjoint of Inverse Transform + struct InvTransAdj_t invtrans_adj = new_invtrans_adj(&trans); + invtrans_adj.nscalar = nscalar; // input + invtrans_adj.rspscalar = rspscalar; // output + invtrans_adj.rgp = rgp; // input + trans_invtrans_adj(&invtrans_adj); + @endcode + + - Adjoint of Transform vorticity and divergence to U and V + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * 2*nvordiv*trans.ngptot ); + + // Adjoint of Inverse Transform + struct InvTransAdj_t invtrans_adj = new_invtrans_adj(&trans); + invtrans_adj.nvordiv = nvordiv; // input + invtrans_adj.rspvor = rspvor; // output + invtrans_adj.rspdiv = rspdiv; // output + invtrans_adj.rgp = rgp; // input -- order: u, v + trans_invtrans_adj(&invtrans_adj); + @endcode + - Adjoint of Transform of vorticity, divergence *and* scalars to U, V, scalars + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // Missing setup of trans + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rgp = malloc( sizeof(double) * (2*nvordiv+nscalar)*trans.ngptot ); + + // Adjoint of Inverse Transform + struct InvTransAdj_t invtrans_adj = new_invtrans_adj(&trans); + invtrans_adj.nscalar = nscalar; // input + invtrans_adj.nvordiv = nvordiv; // input + invtrans_adj.rspscalar = rspscalar; // input + invtrans_adj.rspvor = rspvor; // input + invtrans_adj.rspdiv = rspdiv; // input + invtrans_adj.rgp = rgp; // output -- order: u, v, scalars + trans_invtrans_adj(&invtrans_adj); + @endcode + + @note trans_invtrans_adj works on distributed arrays + */ +int trans_invtrans_adj(struct InvTransAdj_t* invtrans_adj); + +/*! + @brief Distribute global gridpoint array among processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + int nfld = 1; + double* rgpg = NULL; + if( trans.myproc == 1 ) // Load global field in proc 1 + { + rgpg = malloc( sizeof(double) * trans.ngptotg*nfld ); + ... // load data in rgpg[nfld][ngptotg] + } + int* nfrom = malloc( sizeof(int) * nfld ); + nfrom[0] = 1; // Global field 0 sits in proc 1 + + double* rgp = malloc( sizeof(double) * nfld*trans.ngptot ); + struct DistGrid_t distgrid = new_distgrid(&trans); + distgrid.nfrom = nfrom; + distgrid.rgpg = rgpg; + distgrid.rgp = rgp; + distgrid.nfld = nfld; + trans_distgrid(&distgrid); + @endcode + */ +int trans_distgrid(struct DistGrid_t* distgrid); + +/*! + @brief Gather global gridpoint array from processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + + // Distributed field + int nfld = 1; + double* rgp = malloc( sizeof(double) * nfld*trans.ngptot ); + ... // load data in rgp[nfld][ngptot] + + // Global field + double* rgpg = NULL; + if( trans.myproc == 1 ) // We will gather to proc 1 + { + rgpg = malloc( sizeof(double) * nfld*trans.ngptotg ); + } + int* nto = malloc( sizeof(int) * nfld ); + nto[0] = 1; + + // Gather global fields + struct GathGrid_t gathgrid = new_gathgrid(&trans); + gathgrid.rgp = rgp; + gathgrid.rgpg = rgpg; + gathgrid.nto = nto; + gathgrid.nfld = nfld; + trans_gathgrid(&gathgrid); + @endcode + */ +int trans_gathgrid(struct GathGrid_t* gathgrid); + +/*! + @brief Distribute global spectral array among processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + + // Global fields to be distributed + int nscalar = 1; + double* rspscalarg = NULL; + if( trans.myproc == 1 ) + { + rspscalarg = malloc( sizeof(double) * nscalar*trans.nspec2g ); + ... // load data in rspscalarg[nspec2g][nscalar] + } + int* nfrom = malloc( sizeof(int) * nscalar ); + nfrom[0] = 1; + + // Distribute to local fields + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + struct DistSpec_t distspec = new_distspec(&trans); + distspec.rspec = rspscalar; + distspec.rspecg = rspscalarg; + distspec.nfld = nscalar; + distspec.nfrom = nto; + trans_distspec(&distspec); + @endcode + */ +int trans_distspec(struct DistSpec_t* distspec); + +/*! + @brief Gather global spectral array from processors + + Usage: + @code{.c} + struct Trans_t trans; + trans_new(&trans); + ... // missing setup + + // We have some distributed spectral fields "rspscalar" + int nscalar = 1; + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + ... // load data in rspscalar[nspec2][nscalar] + + // We want to gather to proc 1 + double* rspscalarg = NULL; + if( trans.myproc == 1 ) + rspscalarg = malloc( sizeof(double) * nscalar*trans.nspec2g ); + int* nto = malloc( sizeof(int) * nscalar ); + nto[0] = 1; + struct GathSpec_t gathspec = new_gathspec(&trans); + gathspec.rspec = rspscalar; + gathspec.rspecg = rspscalarg; + gathspec.nfld = nscalar; + gathspec.nto = nto; + trans_gathspec(&gathspec); + @endcode + */ +int trans_gathspec(struct GathSpec_t* gathspec); + +/*! + @brief Convert Spectral vorticity & divergence to Spectral u*cos(theta) & v*cos(theta) + + Usage: + @code{.c} + // We have some global spectral fields for vorticity,divergence,u*cos(theta),v*cos(theta) + int nfld = 1; + double* rspvor = malloc( sizeof(double) * nfld*ncoeff ); + double* rspdiv = malloc( sizeof(double) * nfld*ncoeff ); + double* rspu = malloc( sizeof(double) * nfld*ncoeff ); + double* rspv = malloc( sizeof(double) * nfld*ncoeff ); + ... // load data in rspvor[ncoeff][nfld] + ... // load data in rspdiv[ncoeff][nfld] + + struct VorDivToUV_t vordiv_to_UV = new_vordiv_to_UV(); + vordiv_to_UV.rspvor = rspvor; + vordiv_to_UV.rspdiv = rspdiv; + vordiv_to_UV.rspu = rspu; + vordiv_to_UV rspv = rspv; + vordiv_to_UV.nfld = nfld; + vordiv_to_UV.ncoeff = ncoeff; + vordiv_to_UV.nsmax = nsmax; + trans_vordiv_to_UV(&vordiv_to_UV); + @endcode + + @note + - nfld indicates the multiplicity for each variable seperately + - ncoeff is equivalent to trans.nspec2 for distributed, and trans.nspec2g for global fields + - nsmax indicates the spectral truncation T. + */ +int trans_vordiv_to_UV(struct VorDivToUV_t* vordiv_to_UV); + +/*! + @brief Compute global spectral norms + + Usage:<\b> + @code{.c} + int nfld = 1; + double* rspec = malloc( sizeof(double) * nfld*trans.nspec2 ); + double* rnorm = malloc( sizeof(double) * nfld ); + ... // load data in rspec[nspec2][nfld] + + struct SpecNorm_t specnorm = new_specnorm(&trans); + specnorm.rspec = rspec; + specnorm.rnorm = rnorm; + specnorm.nfld = nfld; + trans_specnorm(specnorm); + @endcode +*/ +int trans_specnorm(struct SpecNorm_t* specnorm); + +/*! + @brief Remove footprint of specific resolution + + @param trans Trans_t struct describing specific resolution + + All arrays will be deallocated. + */ +int trans_delete(struct Trans_t* trans); + + +/*! + @brief Finalize trans library + + This finalizes MPI communication, and deallocates resolution-independent + storage. After this, no more calls to trans should be made + */ +int trans_finalize(void); + + +/*! + @brief Struct that holds information to do transforms + for one particular grid resolution + + The values ndgl, nloen, and nsmax need to be provided yourself, all other + values will be defined during the trans_setup() call or trans_inquire() calls + + - All scalar values will be defined by trans_setup() + - All array values will be allocated if needed, and defined by + individual calls to trans_inquire() + + @note Many of these values are of no interest for normal usage + */ +struct Trans_t { + + /*! @{ @name INPUT */ + int ndgl; //!< @brief Number of lattitudes + int* nloen; //!< @brief Number of longitude points for each latitude \n + //!< DIMENSIONS(1:NDGL) + int nlon; //!< @brief Number of longitude points for all latitudes \n + int nsmax; //!< @brief Spectral truncation wave number + + _bool lsplit; //!< @brief If false, the distribution does not allow latitudes to be split + int llatlon; //!< @brief If true, the transforms compute extra coefficients for + //!< latlon transforms + int flt; //!< @brief If true, the Fast-Legendre-Transform method is used + //!< which is faster for higher resolutions (N1024) + int fft; //!< @brief FFT library to use underneith \n + //!< FFT992 = 1 ; FFTW = 2 + + char* readfp; + char* writefp; + const void* cache; + size_t cachesize; + /*! @} */ + + /*! @{ @name PARALLELISATION */ + int myproc; //!< @brief Current MPI task (numbering starting at 1) + int nproc; //!< @brief Number of parallel MPI tasks + /*! @} */ + + /*! @{ @name MULTI-TRANSFORMS-MANAGEMENT */ + int handle; //!< @brief Resolution tag for which info is required ,default is the + //!< first defined resulution (input) + _bool ldlam; //!< @brief True if the corresponding resolution is LAM, false if it is global + /*! @} */ + + /*! @{ @name SPECTRAL SPACE */ + int nspec; //!< @brief Number of complex spectral coefficients on this PE + int nspec2; //!< @brief Number of complex spectral coefficients on this PE times 2 (real and imag) + int nspec2g; //!< @brief global KSPEC2 + int nspec2mx; //!< @brief Maximun KSPEC2 among all PEs + int nump; //!< @brief Number of spectral waves handled by this PE + int ngptot; //!< @brief Total number of grid columns on this PE + int ngptotg; //!< @brief Total number of grid columns on the Globe + int ngptotmx; //!< @brief Maximum number of grid columns on any of the PEs + int* ngptotl; //!< @brief Number of grid columns on each PE \n + //!< DIMENSIONS(1:N_REGIONS_NS,1:N_REGIONS_EW) + int* nmyms; //!< @brief This PEs spectral zonal wavenumbers + //!< DIMENSIONS(1:NUMP) + int* nasm0; //!< @brief Address in a spectral array of (m, n=m) \n + //!< DIMENSIONS(0:NSMAX) + int nprtrw; //!< @brief Number of processors in A-direction (input) + int* numpp; //!< @brief No. of wave numbers each wave set is responsible for. \n + //!< DIMENSIONS(1:NPRTRW) + int* npossp; //!< @brief Defines partitioning of global spectral fields among PEs \n + //!< DIMENSIONS(1:NPRTRW+1) + int* nptrms; //!< @brief Pointer to the first wave number of a given a-set \n + //!< DIMENSIONS(1:NPRTRW) + int* nallms; //!< @brief Wave numbers for all wave-set concatenated together + //!< to give all wave numbers in wave-set order \n + //!< DIMENSIONS(1:NSMAX+1) + int* ndim0g; //!< @brief Defines partitioning of global spectral fields among PEs \n + //!< DIMENSIONS(0:NSMAX) + int* nvalue; //!< @brief n value for each KSPEC2 spectral coeffient\n + //!< DIMENSIONS(1:NSPEC2) + /*! @} */ + + /*! @{ @name GRIDPOINT SPACE */ + int n_regions_NS;//!< @brief Number of regions in North-South direction + int n_regions_EW;//!< @brief Number of regions in East-West direction + int my_region_NS;//!< @brief My region in North-South direction + int my_region_EW;//!< @brief My region in East-West direction + int* n_regions; //!< @brief Number of East-West Regions per band of North-South Regions + //!< @brief DIMENSIONS(1:N_REGIONS_NS) + int* nfrstlat; //!< @brief First latitude of each a-set in grid-point space + //!< DIMENSIONS(1:N_REGIONS_NS) + int* nlstlat; //!< @brief Last latitude of each a-set in grid-point space + //!< DIMENSIONS(1:N_REGIONS_NS) + int nfrstloff; //!< @brief Offset for first lat of own a-set in grid-point space + int* nptrlat; //!< @brief Pointer to the start of each latitude + //!< DIMENSIONS(1:NDGL) + int* nptrfrstlat; //!< @brief Pointer to the first latitude of each a-set in + //!< NSTA and NONL arrays + //!< DIMENSIONS(1:N_REGIONS_NS) + int* nptrlstlat; //!< @brief Pointer to the last latitude of each a-set in + //!< NSTA and NONL arrays + //!< DIMENSIONS(1:N_REGIONS_NS) + int nptrfloff; //!< @brief Offset for pointer to the first latitude of own a-set + //!< NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 + int* nsta; //!< @brief Position of first grid column for the latitudes on a + //!< processor. \n + //!< DIMENSIONS(1:NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) + //!< @details The information is available for all processors. + //!< The b-sets are distinguished by the last dimension of + //!< nsta(). The latitude band for each a-set is addressed by + //!< nptrfrstlat(jaset),nptrlstlat(jaset), and + //!< nptrfloff=nptrfrstlat(myseta) on this processors a-set. + //!< Each split latitude has two entries in nsta(,:) which + //!< necessitates the rather complex addressing of nsta(,:) + //!< and the overdimensioning of nsta by N_REGIONS_NS. + int* nonl; //!< @brief Number of grid columns for the latitudes on a processor. + //!< Similar to nsta() in data structure. \n + //!< DIMENSIONS(1:NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) + _bool* ldsplitlat; //!< @brief True if latitude is split in grid point space over + //!< two a-sets. \n + //!< DIMENSIONS(1:NDGL) + /*! @} */ + + /*! @{ @name FOURIER SPACE */ + int nprtrns; //!< @brief No. of sets in N-S direction (Fourier space) + //!< (always equal to NPRTRW) + int* nultpp; //!< @brief Number of latitudes for which each a-set is calculating + //!< the FFT's. \n + //!< DIMENSIONS(1:NPRTRNS) + int* nptrls; //!< @brief Pointer to first global latitude of each a-set for which + //!< it performs the Fourier calculations \n + //!< DIMENSIONS(1:NPRTRNS) + int* nnmeng; //!< @brief associated (with NLOENG) cut-off zonal wavenumber \n + //!< DIMENSIONS(1:NDGL) + /*! @} */ + + + /*! @{ @name LEGENDRE */ + double* rmu; //!< @brief sin(Gaussian latitudes) \n + //!< DIMENSIONS(1:NDGL) + double* rgw; //!< @brief Gaussian weights \n + //!< DIMENSIONS(1:NDGL) + double* rpnm; //!< @brief Legendre polynomials \n + //!< DIMENSIONS(1:NLEI3,1:NSPOLEGL) + int nlei3; //!< @brief First dimension of Legendre polynomials + int nspolegl; //!< @brief Second dimension of Legendre polynomials + int* npms; //!< @brief Adress for legendre polynomial for given M (NSMAX) \n + //!< DIMENSIONS(0:NSMAX) + double* rlapin; //!< @brief Eigen-values of the inverse Laplace operator \n + //!< DIMENSIONS(-1:NSMAX+2) + int* ndglu; //!< @brief Number of active points in an hemisphere for a given wavenumber "m" \n + //!< DIMENSIONS(0:NSMAX) + /*! @} */ + +}; + +/*! + @brief Constructor for Trans_t, setting default values + @return Trans_t struct to be used as argument for trans_setup() + */ +int trans_new( struct Trans_t* ); + +/*! + @brief Set gridpoint resolution for trans + @param trans [in] Trans_t used to setup + @param ndgl [in] Number of lattitudes + @param nloen [in] Number of longitude points for each latitude \n + DIMENSIONS(1:NDGL) + */ +int trans_set_resol( struct Trans_t* trans, int ndgl, const int* nloen ); + +/*! + @brief Set gridpoint resolution for trans for longitude-latitude grids + @param trans [in] Trans_t used to setup + @param nlon [in] Number of longitudes + @param nlat [in] Number of latitudes (pole to pole) + + - If nlat is odd, the grid must includes poles and equator + - If nlat is even, the grid must be its dual (excluding pole and equator), + so points are shifted with 0.5*dx and 0.5*dy + */ +int trans_set_resol_lonlat( struct Trans_t* trans, int nlon, int nlat ); + +/*! + @brief Set spectral truncation wave number for trans + @param trans [in] Trans_t used to setup + @param nsmax [in] Spectral truncation wave number + */ +int trans_set_trunc( struct Trans_t* trans, int nsmax ); + + +/*! + @brief Arguments structure for trans_dirtrans() + + Use new_dirtrans() to initialise defaults for the struct (constructor) + */ +struct DirTrans_t +{ + const double* rgp; //!< @brief [input] gridpoint fields + //!< @details Dimensioning: rgp[#ngpblks][2*#nvordiv+#nscalar][#nproma]\n\n + //!< The ordering of the output fields is as follows (all + //!< parts are optional depending on the input switches): + //!< - u : if #nvordiv > 0 + //!< - v : if #nvordiv > 0 + //!< - scalars : if #nscalar > 0 + double* rspscalar; //!< @brief [output] spectral scalar valued fields + //!< @details Dimensioning: rspscalar[@link Trans_t::nspec2 nspec2 @endlink][#nscalar] + double* rspvor; //!< @brief [output] spectral vorticity + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + double* rspdiv; //!< @brief [output] spectral divergence + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + int nproma; //!< @brief [input,default=@link Trans_t::ngptot ngptot@endlink] Blocking factor for distributed gridpoint array + int nscalar; //!< @brief [input,default=0] Number of scalar fields present in RGP + int nvordiv; //!< @brief [input,default=0] Number of vorticity/divergence fields in RGP + int ngpblks; //!< @brief [input,default=1] Blocking factor for distributed gridpoint array + int lglobal; //!< @brief [input,default=0] rgp is a global input field --> nproma==1,ngpblks==ngptotg + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_dirtrans() +}; +/*! + @brief Constructor for DirTrans_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return DirTrans_t struct to be used as argument for trans_dirtrans() + */ +struct DirTrans_t new_dirtrans(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_invtrans() + + Use new_invtrans() to initialise defaults for the struct (constructor) + */ +struct InvTrans_t +{ + const double* rspscalar; //!< @brief [input,default=NULL] spectral scalar valued fields + //!< @details Dimensioning: rspscalar[@link Trans_t::nspec2 nspec2 @endlink][#nscalar] + const double* rspvor; //!< @brief [input] spectral vorticity + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + const double* rspdiv; //!< @brief [input] spectral divergence + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + double* rgp; //!< @brief [output] gridpoint fields + //!< @details Dimensioning: rgp[#ngpblks][2*#nvordiv+#nscalar][#nproma]\n\n + //!< The ordering of the output fields is as follows (all + //!< parts are optional depending on the input switches): + //! - vorticity : if #nvordiv > 0 and #lvordivgp true + //! - divergence : if #nvordiv > 0 and #lvordivgp true + //!< - u : if #nvordiv > 0 + //!< - v : if #nvordiv > 0 + //!< - scalars : if #nscalar > 0 + //!< - N-S derivative of scalars : if #nscalar > 0 and #lscalarders true + //!< - E-W derivative of u : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of v : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of scalars : if #nscalar > 0 and #lscalarders true + int nproma; //!< @brief [input,default=@link Trans_t::ngptot ngptot@endlink] Blocking factor for distributed gridpoint array + int nscalar; //!< @brief [input,default=0] Number of scalar fields present in RGP + int nvordiv; //!< @brief [input,default=0] Number of vorticity/divergence fields in RGP + int lscalarders; //!< @brief [input,default=0] Indicate if derivatives of scalars are requested + int luvder_EW; //!< @brief [input,default=0] Indicate if East-West derivative of u and v is requested + int lvordivgp; //!< @brief [input,default=0] Indicate if grid-point vorticity and divergence is requested + int ngpblks; //!< @brief [input,default=1] Blocking factor for distributed gridpoint array + int lglobal; //!< @brief [input,default=0] rgp is a global output field --> nproma==1,ngpblks==ngptotg + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + * @brief Constructor for InvTrans_t, resetting default values + * @param trans [in] Trans_t used to set defaults + * @return InvTrans_t struct to be used as argument for trans_invtrans() + */ +struct InvTrans_t new_invtrans(struct Trans_t* trans); + + +//! Adjoint of spectral inverse. + +struct InvTransAdj_t +{ + double* rspscalar; //!< @brief [output,default=NULL] spectral scalar valued fields + //!< @details Dimensioning: rspscalar[@link Trans_t::nspec2 nspec2 @endlink][#nscalar] + double* rspvor; //!< @brief [output] spectral vorticity + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + double* rspdiv; //!< @brief [output] spectral divergence + //!< @details Dimensioning: rspvor[@link Trans_t::nspec2 nspec2 @endlink][#nvordiv] + const double* rgp; //!< @brief [input] gridpoint fields + //!< @details Dimensioning: rgp[#ngpblks][2*#nvordiv+#nscalar][#nproma]\n\n + //!< The ordering of the output fields is as follows (all + //!< parts are optional depending on the input switches): + //! - vorticity : if #nvordiv > 0 and #lvordivgp true + //! - divergence : if #nvordiv > 0 and #lvordivgp true + //!< - u : if #nvordiv > 0 + //!< - v : if #nvordiv > 0 + //!< - scalars : if #nscalar > 0 + //!< - N-S derivative of scalars : if #nscalar > 0 and #lscalarders true + //!< - E-W derivative of u : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of v : if #nvordiv > 0 and #luvders true + //!< - E-W derivative of scalars : if #nscalar > 0 and #lscalarders true + int nproma; //!< @brief [input,default=@link Trans_t::ngptot ngptot@endlink] Blocking factor for distributed gridpoint array + int nscalar; //!< @brief [input,default=0] Number of scalar fields present in RGP + int nvordiv; //!< @brief [input,default=0] Number of vorticity/divergence fields in RGP + int lscalarders; //!< @brief [input,default=0] Indicate if derivatives of scalars are requested + int luvder_EW; //!< @brief [input,default=0] Indicate if East-West derivative of u and v is requested + int lvordivgp; //!< @brief [input,default=0] Indicate if grid-point vorticity and divergence is requested + int ngpblks; //!< @brief [input,default=1] Blocking factor for distributed gridpoint array + int lglobal; //!< @brief [input,default=0] rgp is a global output field --> nproma==1,ngpblks==ngptotg + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans_adj() +}; +/*! + * @brief Constructor for InvTransAdj_t, resetting default values + * @param trans [in] Trans_t used to set defaults + * @return InvTransAdj_t struct to be used as argument for trans_invtrans_adj() + */ +struct InvTransAdj_t new_invtrans_adj(struct Trans_t* trans); + + + +/*! + @brief Arguments structure for trans_distgrid() + + Use new_distgrid() to initialise defaults for the struct (constructor) + */ +struct DistGrid_t +{ + const double* rgpg; //!< @brief Global gridpoint array + //!< Fortran DIMENSIONS(1:NGPTOTG,1:NFLDG) + //!< C/C++ DIMENSIONS[NFLDG][NGPTOTG] + double* rgp; //!< @brief Distributed gridpoint array + //!< Fortran DIMENSIONS(1:NPROMA,1:NFLD,1:NGPBLKS) + //!< C/C++ DIMENSIONS[NGPBLKS][NFDL][NPROMA] + //!< Default: NPROMA=NGPTOT, NGPBLKS=1 + const int* nfrom; //!< @brief Processors responsible for distributing each field + //!< DIMENSIONS(1:NFLD) + int nproma; //!< @brief Blocking factor for distributed gridpoint array + int nfld; //!< @brief Number of distributed fields + int ngpblks; //!< @brief Blocking factor for distributed gridpoint array + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for DistGrid_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return DistGrid_t struct to be used as argument for trans_distgrid() + */ +struct DistGrid_t new_distgrid(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_gathgrid() + + Use new_gathgrid() to initialise defaults for the struct (constructor) + */ +struct GathGrid_t +{ + double* rgpg; //!< @brief Global gridpoint array + //!< Fortran DIMENSIONS(1:NGPTOTG,1:NFLDG) + //!< C/C++ DIMENSIONS[NFLDG][NGPTOTG] + //!< DIMENSIONS(1:NFLDG,1:NGPTOTG) + const double* rgp; //!< @brief Distributed gridpoint array + //!< Fortran DIMENSIONS(1:NPROMA,1:NFLD,1:NGPBLKS) + //!< C/C++ DIMENSIONS[NGPBLKS][NFDL][NPROMA] + //!< Default: NPROMA=NGPTOT, NGPBLKS=1 + const int* nto; //!< @brief Processors responsible for gathering each field + //!< Fortran DIMENSIONS(1:NFLD) + int nproma; //!< @brief Blocking factor for distributed gridpoint array + int nfld; //!< @brief Number of distributed fields + int ngpblks; //!< @brief Blocking factor for distributed gridpoint array + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for GathGrid_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return GathGrid_t struct to be used as argument for trans_gathgrid() + */ +struct GathGrid_t new_gathgrid(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_distspec() + + Use new_distspec() to initialise defaults for the struct (constructor) + */ +struct DistSpec_t +{ + const double* rspecg; //!< @brief Global spectral array + //!< Fortran DIMENSIONS(1:NFLDG,1:NSPEC2G) + //!< C/C++ DIMENSIONS[NSPEC2G][NFLDG] + double* rspec; //!< @brief Local spectral array + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + const int* nfrom; //!< @brief Processors responsible for distributing each field + //!< Fortran DIMENSIONS(1:NFLD) + int nfld; //!< @brief Number of distributed fields + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for DistSpec_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return DistSpec_t struct to be used as argument for trans_distspec() + */ +struct DistSpec_t new_distspec(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_gathspec() + + Use new_gathspec() to initialise defaults for the struct (constructor) + */ +struct GathSpec_t +{ + double* rspecg; //!< @brief Global spectral array + //!< Fortran DIMENSIONS(1:NFLDG,1:NSPEC2G) + //!< C/C++ DIMENSIONS[NSPEC2G][NFLDG] + const double* rspec; //!< @brief Local spectral array + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + const int* nto; //!< @brief Processors responsible for gathering each field + //!< DIMENSIONS(1:NFLD) + int nfld; //!< @brief Number of distributed fields + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for GathSpec_t, resetting default values + @param trans [in] Trans_t used to set defaults + @return GathSpec_t struct to be used as argument for trans_gathspec() + */ +struct GathSpec_t new_gathspec(struct Trans_t* trans); + +/*! + @brief Arguments structure for trans_vordiv_to_UV() + + Use new_vordiv_to_uv() to initialise defaults for the struct (constructor) + */ +struct VorDivToUV_t +{ + const double* rspvor; //!< @brief Local spectral array for vorticity + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + const double* rspdiv; //!< @brief Local spectral array for divergence + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + double* rspu; //!< @brief Local spectral array for U*cos(theta) + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + double* rspv; //!< @brief Local spectral array for V*cos(theta) + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + int nfld; //!< @brief Number of distributed fields + int nsmax; //!< @brief Spectral resolution (T) + int ncoeff; //!< @brief number of spectral coefficients + //!< (equivalent to nspec2 for distributed or nspec2g for global) + int count; //!< @brief Internal storage for calls to trans_vordiv_toUV() +}; +/*! + @brief Constructor for VorDivToUV_t, resetting default values + @return VorDivToUV_t struct to be used as argument for trans_gathspec() + */ +struct VorDivToUV_t new_vordiv_to_UV(void); + + +struct SpecNorm_t +{ + const double *rspec; //!< @brief Spectral array to compute norm of + //!< Fortran DIMENSIONS(1:NFLD,1:NSPEC2) + //!< C/C++ DIMENSIONS[NSPEC2][NFLD] + int nmaster; //!< @brief Processor to receive norms (value 1 means MPI_RANK 0) + const double *rmet; //!< @brief metric, OPTIONAL + //! DIMENSIONS(0:NSMAX) + double* rnorm; //!< @brief Norms (output for processor nmaster) + //!< DIMENSIONS(1:NFLD) + int nfld; //!< @brief Number of fields + struct Trans_t* trans; //!< @brief Internal storage of trans object + int count; //!< @brief Internal storage for calls to trans_invtrans() +}; +/*! + @brief Constructor for SpecNorm_t, resetting default values + @return SpecNorm_t struct to be used as argument for trans_specnorm() + */ +struct SpecNorm_t new_specnorm(struct Trans_t* trans); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/transi/transi_module.F90 b/src/transi/transi_module.F90 new file mode 100644 index 0000000..8df0806 --- /dev/null +++ b/src/transi/transi_module.F90 @@ -0,0 +1,2268 @@ +! (C) Copyright 2014- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +!> @file trans_module.F90 +!! @brief Fortran layer to trans +!! +!! This file contains the trans_module, +!! which bridges the IFS trans library to +!! the C-API +!! +!! @author Willem Deconinck +!! @date Jul 2014 + +#define BOOLEAN integer(c_int) + +module trans_module + +use, intrinsic :: iso_c_binding, only: & + c_ptr, & + c_char, & + c_int, & + c_size_t, & + c_float, & + c_double, & + c_null_ptr + +use, intrinsic :: iso_fortran_env, only: & + output_unit, & + error_unit + +use MPL_module, only: & + MPL_INIT, & + MPL_END, & + MPL_NPROC, & + MPL_MYRANK +implicit none + +private :: c_ptr +private :: c_char +private :: c_int +private :: c_size_t +private :: c_float +private :: c_double +private :: c_null_ptr + +private :: output_unit +private :: error_unit + +#if ECTRANS_HAVE_MPI +private :: MPL_INIT +private :: MPL_END +private :: MPL_NPROC +private :: MPL_MYRANK +#endif + +public :: Trans_t +public :: DirTrans_t +public :: InvTrans_t +public :: InvTransAdj_t +public :: GathGrid_t +public :: DistGrid_t +public :: GathSpec_t +public :: DistSpec_t +public :: VorDivToUV_t +public :: trans_use_mpi +public :: trans_set_handles_limit +public :: trans_set_radius +public :: trans_set_nprtrv +public :: trans_init +public :: trans_setup +public :: trans_inquire +public :: trans_dirtrans +public :: trans_invtrans +public :: trans_invtrans_adj +public :: trans_distgrid +public :: trans_gathgrid +public :: trans_distspec +public :: trans_gathspec +public :: trans_vordiv_to_UV +public :: trans_delete +public :: trans_finalize +public :: allocate_ptr +public :: access_ptr +public :: free_ptr + +private + +#include "setup_trans0.h" +#include "trans_end.h" +#include "trans_release.h" +#include "setup_trans.h" +#include "dir_trans.h" +#include "inv_trans.h" +#include "inv_transad.h" +#include "dist_grid.h" +#include "gath_grid.h" +#include "dist_spec.h" +#include "gath_spec.h" +#include "trans_inq.h" +#include "specnorm.h" +#include "vordiv_to_uv.h" + +integer, SAVE :: TRANS_MAX_HANDLES = 100 +integer, SAVE :: N_REGIONS_EW +integer, SAVE :: N_REGIONS_NS +integer, SAVE, allocatable, target :: N_REGIONS(:) +integer(c_int), SAVE :: NPRTRV = 1 +real(c_double), SAVE :: RRAD = 6371.22e+03 +logical, SAVE :: is_init = .False. + +integer, SAVE :: trans_out +logical, SAVE :: close_devnull + +#if ECTRANS_HAVE_MPI +logical, SAVE :: USE_MPI = .True. +#else +logical, SAVE :: USE_MPI = .False. +#endif + + +integer, private, parameter :: MAX_STR_LEN = 1024 + +integer, parameter :: FFT992 = 1 +integer, parameter :: FFTW = 2 +integer, parameter :: TRANS_SUCCESS = 0 +integer, parameter :: TRANS_ERROR = -1 +integer, parameter :: TRANS_NOTIMPL = -2 +integer, parameter :: TRANS_MISSING_ARG = -3 +integer, parameter :: TRANS_UNRECOGNIZED_ARG = -4 +integer, parameter :: TRANS_STALE_ARG = -5 + +!> @brief Interface to the Trans_t struct in transi/trans.h +type, bind(C) :: Trans_t + + ! FILL IN THESE 4 VALUES YOURSELF BEFORE callING trans_setup() */ + integer(c_int) :: ndgl ! -- Number of lattitudes + type(c_ptr) :: nloen ! -- Number of longitude points for each latitude + ! TYPE: INTEGER(1:NDGL) + integer(c_int) :: nlon ! -- Number of longitude points for all latitudes + integer(c_int) :: nsmax ! -- Spectral truncation wave number + BOOLEAN :: lsplit + integer(c_int) :: llatlon + integer(c_int) :: flt + integer(c_int) :: fft + type(c_ptr) :: readfp; + type(c_ptr) :: writefp; + type(c_ptr) :: cache; + integer(c_size_t) :: cachesize; + + + ! PARALLELISATION + integer(c_int) :: myproc ! -- Current MPI task (numbering starting at 1) + integer(c_int) :: nproc ! -- Number of parallel MPI tasks + + ! MULTI-TRANSFORMS MANAGEMENT + integer(c_int) :: handle ! -- Resolution tag for which info is required ,default is the + ! first defined resulution (input) + BOOLEAN :: ldlam ! -- True if the corresponding resolution is LAM, false if it is global + + ! SPECTRAL SPACE + integer(c_int) :: nspec ! -- Number of complex spectral coefficients on this PE + integer(c_int) :: nspec2 ! -- 2*nspec + integer(c_int) :: nspec2g ! -- global KSPEC2 + integer(c_int) :: nspec2mx ! -- Maximun KSPEC2 among all PEs + integer(c_int) :: nump ! -- Number of spectral waves handled by this PE + integer(c_int) :: ngptot ! -- Total number of grid columns on this PE + integer(c_int) :: ngptotg ! -- Total number of grid columns on the Globe + integer(c_int) :: ngptotmx ! -- Maximum number of grid columns on any of the PEs + type(c_ptr) :: ngptotl ! -- Number of grid columns one each PE + ! TYPE: INTEGER(1:N_REGIONS_NS,1:N_REGIONS_EW) + type(c_ptr) :: nmyms ! -- This PEs spectral zonal wavenumbers + ! TYPE: INTEGER(1:NUMP) + type(c_ptr) :: nasm0 ! -- Address in a spectral array of (m, n=m) + ! TYPE: INTEGER(0:NSMAX) + integer(c_int) :: nprtrw ! -- Number of processors in A-direction (input) + type(c_ptr) :: numpp ! -- No. of wave numbers each wave set is responsible for. + ! TYPE: INTEGER(1:NPRTRW) + type(c_ptr) :: npossp ! -- Defines partitioning of global spectral fields among PEs + ! TYPE: INTEGER(1:NPRTRW+1) + type(c_ptr) :: nptrms ! -- Pointer to the first wave number of a given a-set + ! TYPE: INTEGER(1:NPRTRW) + type(c_ptr) :: nallms ! -- Wave numbers for all wave-set concatenated together + ! to give all wave numbers in wave-set order + ! TYPE: INTEGER(1:NSMAX+1) + type(c_ptr) :: ndim0g ! -- Defines partitioning of global spectral fields among PEs + ! TYPE: INTEGER(0:NSMAX) + type(c_ptr) :: nvalue ! -- n value for each KSPEC2 spectral coeffient + ! TYPE: INTEGER(1:NSPEC2) + + + ! GRIDPOINT SPACE + integer(c_int) :: n_regions_NS ! + integer(c_int) :: n_regions_EW ! + integer(c_int) :: my_region_NS ! + integer(c_int) :: my_region_EW ! + type(c_ptr) :: n_regions ! -- Number of East-West Regions per band of North-South Regions + type(c_ptr) :: nfrstlat ! -- First latitude of each a-set in grid-point space + ! TYPE: INTEGER(1:N_REGIONS_NS) + type(c_ptr) :: nlstlat ! -- Last latitude of each a-set in grid-point space + ! TYPE: INTEGER(1:N_REGIONS_NS) + integer(c_int) :: nfrstloff ! -- Offset for first lat of own a-set in grid-point space + type(c_ptr) :: nptrlat ! -- Pointer to the start of each latitude + ! TYPE: INTEGER(1:NDGL) + type(c_ptr) :: nptrfrstlat ! -- Pointer to the first latitude of each a-set in + ! NSTA and NONL arrays + ! TYPE: INTEGER(1:N_REGIONS_NS) + type(c_ptr) :: nptrlstlat ! -- Pointer to the last latitude of each a-set in + ! NSTA and NONL arrays + ! TYPE: INTEGER(1:N_REGIONS_NS) + integer(c_int) :: nptrfloff ! -- Offset for pointer to the first latitude of own a-set + ! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 + type(c_ptr) :: nsta ! -- Position of first grid column for the latitudes on a + ! processor. The information is available for all processors. + ! The b-sets are distinguished by the last dimension of + ! nsta(). The latitude band for each a-set is addressed by + ! nptrfrstlat(jaset),nptrlstlat(jaset), and + ! nptrfloff=nptrfrstlat(myseta) on this processors a-set. + ! Each split latitude has two entries in nsta(,:) which + ! necessitates the rather complex addressing of nsta(,:) + ! and the overdimensioning of nsta by N_REGIONS_NS. + ! TYPE: INTEGER(1:NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) + type(c_ptr) :: nonl ! -- Number of grid columns for the latitudes on a processor. + ! Similar to nsta() in data structure. + ! TYPE: INTEGER(1:NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) + type(c_ptr) :: ldsplitlat ! -- True if latitude is split in grid point space over + ! two a-sets. + ! TYPE: BOOLEAN(1:NDGL) (BOOLEAN=c_int) + + ! FOURIER SPACE + integer(c_int) :: nprtrns ! -- No. of sets in N-S direction (Fourier space) + ! (always equal to NPRTRW) + type(c_ptr) :: nultpp ! -- Number of latitudes for which each a-set is calculating + ! the FFT's. + ! TYPE: INTEGER(1:NPRTRNS) + type(c_ptr) :: nptrls ! -- Pointer to first global latitude of each a-set for which + ! it performs the Fourier calculations + ! TYPE: INTEGER(1:NPRTRNS) + type(c_ptr) :: nnmeng ! -- associated (with NLOENG) cut-off zonal wavenumber + ! TYPE: INTEGER(1:NDGL) + + ! LEGENDRE + type(c_ptr) :: rmu ! -- sin(Gaussian latitudes) + ! TYPE: REAL(1:NDGL) + type(c_ptr) :: rgw ! -- Gaussian weights + ! TYPE: REAL(1:NDGL) + type(c_ptr) :: rpnm ! -- Legendre polynomials + ! TYPE: REAL(1:NLEI3,1:NSPOLEGL) + integer(c_int) :: nlei3 ! -- First dimension of Legendre polynomials + integer(c_int) :: nspolegl ! -- Second dimension of Legendre polynomials + type(c_ptr) :: npms ! -- Adress for legendre polynomial for given M (NSMAX) + ! TYPE: INTEGER(0:NSMAX) + type(c_ptr) :: rlapin ! -- Eigen-values of the inverse Laplace operator + ! TYPE: REAL(-1:NSMAX+2) + type(c_ptr) :: ndglu ! -- Number of active points in an hemisphere for a given wavenumber "m" + ! TYPE: INTEGER(0:NSMAX) +end type Trans_t + +type, bind(C) :: DirTrans_t + type(c_ptr) :: rgp + type(c_ptr) :: rspscalar + type(c_ptr) :: rspvor + type(c_ptr) :: rspdiv + integer(c_int) :: nproma + integer(c_int) :: nscalar + integer(c_int) :: nvordiv + integer(c_int) :: ngpblks + integer(c_int) :: lglobal + type(c_ptr) :: trans + integer(c_int) :: count +end type DirTrans_t + +type, bind(C) :: InvTrans_t + type(c_ptr) :: rspscalar + type(c_ptr) :: rspvor + type(c_ptr) :: rspdiv + type(c_ptr) :: rgp + integer(c_int) :: nproma + integer(c_int) :: nscalar + integer(c_int) :: nvordiv + integer(c_int) :: lscalarders + integer(c_int) :: luvder_EW + integer(c_int) :: lvordivgp + integer(c_int) :: ngpblks + integer(c_int) :: lglobal + type(c_ptr) :: trans + integer(c_int) :: count +end type InvTrans_t + +type, bind(C) :: InvTransAdj_t + type(c_ptr) :: rspscalar + type(c_ptr) :: rspvor + type(c_ptr) :: rspdiv + type(c_ptr) :: rgp + integer(c_int) :: nproma + integer(c_int) :: nscalar + integer(c_int) :: nvordiv + integer(c_int) :: lscalarders + integer(c_int) :: luvder_EW + integer(c_int) :: lvordivgp + integer(c_int) :: ngpblks + integer(c_int) :: lglobal + type(c_ptr) :: trans + integer(c_int) :: count +end type InvTransAdj_t + +type, bind(C) :: DistGrid_t + type(c_ptr) :: rgpg + type(c_ptr) :: rgp + type(c_ptr) :: nfrom + integer(c_int) :: nproma + integer(c_int) :: nfld + integer(c_int) :: ngpblks + type(c_ptr) :: trans + integer(c_int) :: count +end type DistGrid_t + +type, bind(C) :: GathGrid_t + type(c_ptr) :: rgpg + type(c_ptr) :: rgp + type(c_ptr) :: nto + integer(c_int) :: nproma + integer(c_int) :: nfld + integer(c_int) :: ngpblks + type(c_ptr) :: trans + integer(c_int) :: count +end type GathGrid_t + +type, bind(C) :: DistSpec_t + type(c_ptr) :: rspecg + type(c_ptr) :: rspec + type(c_ptr) :: nfrom + integer(c_int) :: nfld + type(c_ptr) :: trans + integer(c_int) :: count +end type DistSpec_t + +type, bind(C) :: GathSpec_t + type(c_ptr) :: rspecg + type(c_ptr) :: rspec + type(c_ptr) :: nto + integer(c_int) :: nfld + type(c_ptr) :: trans + integer(c_int) :: count +end type GathSpec_t + +type, bind(C) :: VorDivToUV_t + type(c_ptr) :: rspvor + type(c_ptr) :: rspdiv + type(c_ptr) :: rspu + type(c_ptr) :: rspv + integer(c_int) :: nfld + integer(c_int) :: nsmax + integer(c_int) :: ncoeff + integer(c_int) :: count +end type VorDivToUV_t + +type, bind(C) :: SpecNorm_t + type(c_ptr) :: rspec + integer(c_int) :: nmaster + type(c_ptr) :: rmet + type(c_ptr) :: rnorm + integer(c_int) :: nfld + type(c_ptr) :: trans + integer(c_int) :: count +end type SpecNorm_t + +interface + subroutine transi_malloc_bool(ptr,len) bind(C,name="transi_malloc_bool") + use, intrinsic :: iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: ptr + integer(c_int), value :: len + end subroutine transi_malloc_bool + subroutine transi_malloc_int(ptr,len) bind(C,name="transi_malloc_int") + use, intrinsic :: iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: ptr + integer(c_int), value :: len + end subroutine transi_malloc_int + subroutine transi_malloc_float(ptr,len) bind(C,name="transi_malloc_float") + use, intrinsic :: iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: ptr + integer(c_int), value :: len + end subroutine transi_malloc_float + subroutine transi_malloc_double(ptr,len) bind(C,name="transi_malloc_double") + use, intrinsic :: iso_c_binding, only: c_ptr, c_int + type(c_ptr) :: ptr + integer(c_int), value :: len + end subroutine transi_malloc_double + subroutine transi_free(ptr) bind(C,name="transi_free") + use, intrinsic :: iso_c_binding, only: c_ptr + type(c_ptr), intent(in) :: ptr + end subroutine transi_free +end interface + + +interface trans_inquire + module procedure trans_inquire_cstr + module procedure trans_inquire_fstr +end interface trans_inquire + + +interface allocate_ptr +! module procedure allocate_bool1_ptr + module procedure allocate_int1_ptr + module procedure allocate_int2_ptr + module procedure allocate_double1_ptr + module procedure allocate_double2_ptr +end interface allocate_ptr + +interface access_ptr +! module procedure allocate_bool1_ptr + module procedure allocate_int1_ptr + module procedure allocate_int2_ptr + module procedure allocate_double1_ptr + module procedure allocate_double2_ptr +end interface access_ptr + +contains + +! ============================================================================= +! From fckit_c_interop module + +function c_str_to_string(s) result(string) + use, intrinsic :: iso_c_binding + character(kind=c_char,len=1), intent(in) :: s(*) + character(len=:), allocatable :: string + integer i, nchars + i = 1 + do + if (s(i) == c_null_char) exit + i = i + 1 + enddo + nchars = i - 1 ! Exclude null character from Fortran string + allocate(character(len=nchars) :: string) + do i=1,nchars + string(i:i) = s(i) + enddo +end function + +function c_ptr_to_string(cptr) result(string) + use, intrinsic :: iso_c_binding + type(c_ptr), intent(in) :: cptr + character(kind=c_char,len=:), allocatable :: string + character, pointer :: s(:) + call c_f_pointer ( cptr , s, (/MAX_STR_LEN/) ) + string = c_str_to_string(s) +end function + +! ============================================================================= + +subroutine to_lower(str) + character(*), intent(in out) :: str + integer :: i + + do i = 1, len(str) + select case(str(i:i)) + case("A":"Z") + str(i:i) = achar(iachar(str(i:i))+32) + end select + end do +end subroutine to_lower + +subroutine transi_error(err_msg) + character(len=*), intent(in) :: err_msg + write(error_unit,'(A)') err_msg +end subroutine + +function trans_set_handles_limit(limit) bind(C,name="trans_set_handles_limit") + integer(c_int) :: trans_set_handles_limit + integer(c_int), value, intent(in) :: limit + TRANS_MAX_HANDLES = limit + trans_set_handles_limit = TRANS_SUCCESS +end function + +function trans_set_radius(radius) bind(C,name="trans_set_radius") + integer(c_int) :: trans_set_radius + real(c_double), value, intent(in) :: radius + RRAD = radius + trans_set_radius = TRANS_SUCCESS +end function + +function trans_set_nprtrv(kprtrv) bind(C,name="trans_set_nprtrv") + integer(c_int) :: trans_set_nprtrv + integer(c_int), value, intent(in) :: kprtrv + NPRTRV = kprtrv + trans_set_nprtrv = TRANS_SUCCESS +end function + +function trans_use_mpi(lmpi) bind(C,name="trans_use_mpi") + integer(c_int) :: trans_use_mpi + integer(c_int), value, intent(in) :: lmpi +#if ECTRANS_HAVE_MPI + if( lmpi == 0 ) then + USE_MPI = .False. + else + USE_MPI = .True. + endif +#endif + trans_use_mpi = TRANS_SUCCESS +end function + +function devnull(opened) + integer :: devnull + logical, intent(out), optional :: opened + integer :: devnull_unit + inquire(file="/dev/null", number=devnull_unit) + if( devnull_unit == 5 ) devnull_unit = -1 ! Willem D: Bug in gfortran and openmpi + if( devnull_unit == -1 ) then + devnull_unit = 777 + open( unit=devnull_unit, file="/dev/null" ) + if( present(opened) ) opened = .true. + else + if( present(opened) ) opened = .false. + endif + devnull = devnull_unit +end function + +function trans_init() bind(C,name="trans_init") result(iret) + integer(c_int) :: iret + integer :: NPRTRW, NPRGPNS + integer, allocatable :: I_REGIONS(:) + logical :: LMPOFF + integer :: devnull_unit + + LMPOFF = .not. USE_MPI + + trans_out = devnull( opened=close_devnull ) + + if( USE_MPI ) then + call MPL_INIT(KOUTPUT=0,KUNIT=trans_out,LDINFO=.False.) + allocate( I_REGIONS(MPL_NPROC()) ) + NPRGPNS = MPL_NPROC() + NPRTRW = MPL_NPROC()/NPRTRV; + else + allocate( I_REGIONS(1) ) + NPRGPNS = 1 + NPRTRW = 1; + endif + + call SETUP_TRANS0(KOUT=trans_out,KERR=error_unit,KPRINTLEV=0,KMAX_RESOL=TRANS_MAX_HANDLES,& + & KPRTRW=NPRTRW, LDEQ_REGIONS=.True.,KPRGPNS=NPRGPNS,KPRGPEW=1,& + & PRAD=RRAD, K_REGIONS_NS=N_REGIONS_NS,K_REGIONS_EW=N_REGIONS_EW,K_REGIONS=I_REGIONS,& + & LDMPOFF=LMPOFF ) + allocate(N_REGIONS(1:N_REGIONS_NS)) + N_REGIONS(1:N_REGIONS_NS)=I_REGIONS(1:N_REGIONS_NS) + is_init = .True. + + iret = TRANS_SUCCESS + +end function trans_init + + +function trans_setup(trans) bind(C,name="trans_setup") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(Trans_t), intent(inout) :: trans + integer(c_int), pointer :: nloen(:) + integer(c_int), pointer :: n_regions_fptr(:) + logical, parameter :: lkeeprpnm =.False. + logical, parameter :: luserpnm =.False. ! Don't use Belusov algorithm (uses twice the memory) + logical :: ldlam ! output + logical :: lgridonly, lsplit, lusefftw !input + logical :: lspeconly ! only + logical :: llatlon ! input + logical :: llatlonshift ! input + integer(c_int) :: nlon + integer(c_int) :: err + character(len=MAX_STR_LEN) :: readfp, writefp + logical :: luseflt + + iret = TRANS_SUCCESS + + lsplit = .False. + if( trans%lsplit /= 0 ) lsplit = .True. + + if( trans%fft == FFTW ) lusefftw = .True. + + llatlon = .False. + llatlonshift = .False. + if( trans%llatlon /= 0 ) llatlon = .True. + if( trans%llatlon == 2 ) llatlonshift = .True. + + if ( .not. is_init ) then + err = trans_init() + endif + + lspeconly = .False. + if( trans%ndgl < 0 ) then + lspeconly = .true. + trans%ndgl = 2 + endif + + nlon = trans%nlon + if( nlon < 0 .and. trans%ndgl >= 0 ) then + if( c_associated( trans%nloen ) ) then + call C_F_POINTER( trans%nloen, nloen, (/trans%ndgl/) ) + nlon = nloen(1) + else + nlon = 2*trans%ndgl + endif + endif + + lgridonly = .False. + if( trans%nsmax < 0 ) then + lgridonly = .true. + endif + + if( lgridonly .and. lspeconly ) then + write(error_unit,'(A)') "trans_setup: ERROR: Cannot setup with both lgridonly and lspeconly. Make up your mind." + iret = TRANS_ERROR + return + endif + + + writefp="" + if( c_associated(trans%writefp) ) then + writefp = c_ptr_to_string(trans%writefp) + !call cptr_to_f_string(trans%writefp,writefp) + endif + + readfp="" + if( c_associated(trans%readfp) ) then + readfp = c_ptr_to_string(trans%readfp) + !call cptr_to_f_string(trans%readfp,readfp) + endif + + if ( trans%cachesize > 0 ) then + if( .not. c_associated( trans%cache ) ) then + write(error_unit,'(A)') "Cache memory was not allocated" + iret = TRANS_MISSING_ARG + return + endif + endif + +#define LATLON_FLAGS LDLL=llatlon, LDSHIFTLL=llatlonshift, + +! if( trans%flt > 0 .and. trans%nsmax+1 > trans%ndgl ) then +! write(error_unit,'(A)') "trans_setup: WARNING: A bug in trans doesn't allow to use FLT with "& +! & // "truncation (nsmax+1) > nb_latitudes (ndgl). Continuing with FLT=OFF." +! endif +! if( trans%nsmax+1 > trans%ndgl ) then +! trans%flt = 0 +! endif + + luseflt = .False. + if( trans%flt > 0 ) luseflt = .True. + + if( .not. c_associated( trans%nloen ) ) then + ! Setup that involves latlon requires no nloen + + if( len_trim(readfp) > 0 ) then + if( trans%flt >= 0 ) then + + ! LONLAT; Impose FLT; READ coeffs from file + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & CDIO_LEGPOL="readf", & + & CDLEGPOLFNAME=readfp, & + & LDUSEFLT=luseflt ) + + else + + ! LONLAT; Default FLT; READ coeffs from file + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & CDIO_LEGPOL="readf", & + & CDLEGPOLFNAME=readfp ) + + endif + elseif( len_trim(writefp) > 0 ) then + if( trans%flt >= 0 ) then + + ! LONLAT; Impose FLT; WRITE coeffs to file + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & CDIO_LEGPOL="writef", & + & CDLEGPOLFNAME=writefp, & + & LDUSEFLT=luseflt ) + + else + + ! LONLAT; Impose FLT; WRITE coeffs to file + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & CDIO_LEGPOL="writef", & + & CDLEGPOLFNAME=writefp ) + + endif + elseif( trans%cachesize > 0 ) then + if( trans%flt >= 0 ) then + + ! LONLAT; Impose FLT; read CACHED coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & CDIO_LEGPOL="membuf", & + & KLEGPOLPTR=trans%cache, & + & KLEGPOLPTR_LEN=trans%cachesize, & + & LDUSEFLT=luseflt ) + + else + + ! LONLAT; Default FLT; read CACHED coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & CDIO_LEGPOL="membuf", & + & KLEGPOLPTR=trans%cache, & + & KLEGPOLPTR_LEN=trans%cachesize ) + + endif + + else + + if( trans%flt >= 0 ) then + + ! LONLAT; Impose FLT + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon, & + & LDUSEFLT=luseflt ) + + else + + ! LONLAT; Default FLT + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KDLON=nlon ) + + endif + + endif + + else ! we have nloen + + call C_F_POINTER( trans%nloen, nloen, (/trans%ndgl/) ) + + if( len_trim(readfp) > 0 ) then + + if( trans%flt >= 0 ) then + + ! REDUCEDGAUSSIANGRID; Impose FLT; READ coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, CDIO_LEGPOL="readf", & + & CDLEGPOLFNAME=trim(readfp),& + & LDUSEFLT=luseflt ) + + else + + ! REDUCEDGAUSSIANGRID; Default FLT; READ coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, CDIO_LEGPOL="readf", & + & CDLEGPOLFNAME=trim(readfp) ) + + endif + elseif( len_trim(writefp) > 0 ) then + + if( trans%flt >= 0 ) then + + ! REDUCEDGAUSSIANGRID; Impose FLT; WRITE coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, & + & CDIO_LEGPOL="writef", & + & CDLEGPOLFNAME=trim(writefp), & + & LDUSEFLT=luseflt ) + + else + + ! REDUCEDGAUSSIANGRID; Default FLT; READ coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, & + & CDIO_LEGPOL="writef", & + & CDLEGPOLFNAME=trim(writefp) ) + + endif + + elseif( trans%cachesize > 0 ) then + + if( trans%flt >= 0 ) then + + ! REDUCEDGAUSSIANGRID; Default FLT; read CACHED coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, & + & CDIO_LEGPOL="membuf", & + & KLEGPOLPTR=trans%cache, & + & KLEGPOLPTR_LEN=trans%cachesize, & + & LDUSEFLT=luseflt ) + + else + + ! REDUCEDGAUSSIANGRID; Impose FLT; read CACHED coefficients + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, & + & CDIO_LEGPOL="membuf", & + & KLEGPOLPTR=trans%cache, & + & KLEGPOLPTR_LEN=trans%cachesize ) + + endif + + else + if( trans%flt >= 0 ) then + + ! REDUCEDGAUSSIANGRID; Impose FLT + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen, & + & LDUSEFLT=luseflt ) + + else + + ! REDUCEDGAUSSIANGRID; Default FLT + call SETUP_TRANS( LATLON_FLAGS & + & KSMAX=trans%nsmax, & + & KRESOL=trans%handle, & + & KDGL=trans%ndgl, & + & LDGRIDONLY=LGRIDONLY, & + & LDSPSETUPONLY=LSPECONLY, & + & LDSPLIT=LSPLIT, & + & LDKEEPRPNM=LKEEPRPNM, & + & LDUSERPNM=LUSERPNM, & + & LDUSEFFTW=LUSEFFTW, & + & KLOEN=nloen ) + endif + endif + endif + + if( USE_MPI ) then + trans%myproc = MPL_MYRANK() + trans%nproc = MPL_NPROC() + else + trans%myproc = 1 + trans%nproc = 1 + endif + trans%n_regions_NS = N_REGIONS_NS + trans%n_regions_EW = N_REGIONS_EW + + call TRANS_INQ( KRESOL = trans%handle, & + KMY_REGION_NS = trans%my_region_NS, & + KMY_REGION_EW = trans%my_region_EW, & + KSPEC = trans%nspec, & + KSPEC2 = trans%nspec2, & + KSPEC2G = trans%nspec2g, & + KSPEC2MX = trans%nspec2mx, & + KNUMP = trans%nump, & + KGPTOT = trans%ngptot, & + KGPTOTG = trans%ngptotg, & + KGPTOTMX = trans%ngptotmx, & + KFRSTLOFF = trans%nfrstloff, & + KPTRFLOFF = trans%nptrfloff, & + KPRTRW = trans%nprtrw, & + KLEI3 = trans%nlei3, & + KSPOLEGL = trans%nspolegl, & + LDLAM = ldlam & + ) + + + if( trans%llatlon == 1 ) trans%ngptotg = trans%ngptotg-nlon + + trans%ldlam = 0 + if( ldlam ) trans%ldlam = 1 + + trans%nprtrns = trans%nprtrw + + trans%ngptotl = C_NULL_PTR + trans%nmyms = C_NULL_PTR + trans%nasm0 = C_NULL_PTR + trans%numpp = C_NULL_PTR + trans%npossp = C_NULL_PTR + trans%nptrms = C_NULL_PTR + trans%nallms = C_NULL_PTR + trans%ndim0g = C_NULL_PTR + trans%n_regions = C_NULL_PTR + trans%nfrstlat = C_NULL_PTR + trans%nlstlat = C_NULL_PTR + trans%nptrlat = C_NULL_PTR + trans%nptrfrstlat = C_NULL_PTR + trans%nptrlstlat = C_NULL_PTR + trans%nsta = C_NULL_PTR + trans%nonl = C_NULL_PTR + trans%nultpp = C_NULL_PTR + trans%nptrls = C_NULL_PTR + trans%nnmeng = C_NULL_PTR + trans%rmu = C_NULL_PTR + trans%rgw = C_NULL_PTR + trans%rpnm = C_NULL_PTR + trans%npms = C_NULL_PTR + trans%ndglu = C_NULL_PTR + trans%rlapin = C_NULL_PTR + trans%nvalue = C_NULL_PTR + trans%ldsplitlat = C_NULL_PTR + + call allocate_ptr( trans%n_regions,N_REGIONS_NS, n_regions_fptr ) + n_regions_fptr(:) = N_REGIONS(:) + +end function trans_setup + +function trans_inquire_cstr(trans,vars) bind(C,name="trans_inquire") result(iret) + integer(c_int) :: iret + type(Trans_t), intent(inout) :: trans + character(len=1,kind=c_char), dimension(*), intent(in) :: vars + character(len=MAX_STR_LEN,kind=c_char) :: vars_fstr + vars_fstr = c_str_to_string(vars) + iret = trans_inquire_fstr(trans,vars_fstr) +end function trans_inquire_cstr + +function trans_inquire_fstr(trans,vars_fstr) result(iret) + integer(c_int) :: iret + type(Trans_t), intent(inout) :: trans + character(len=*) :: vars_fstr + character(20) :: var_arr(30), var + integer :: nvars, jvar + !logical(c_bool), pointer :: bool1(:) + integer(c_int), pointer :: int1(:), int2(:,:) + real(c_double), pointer :: double1(:), double2(:,:) + logical, allocatable :: booltmp(:) + + nvars = count(transfer(vars_fstr, 'a', len(vars_fstr)) == ",") + 1 + read(vars_fstr, *) var_arr(1:nvars) + + do jvar=1,nvars + var = trim(var_arr(jvar)) + call to_lower(var) + + if ( var == "numpp" ) then + call allocate_ptr( trans%numpp, trans%nprtrw, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KUMPP=int1 ) + + elseif( var == "ngptotl" ) then + call allocate_ptr( trans%ngptotl, trans%n_regions_NS, trans%n_regions_EW, int2 ) + call TRANS_INQ( KRESOL=trans%handle, KGPTOTL=int2 ) + + elseif( var == "nmyms" ) then + call allocate_ptr( trans%nmyms, trans%nump, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KMYMS=int1 ) + + elseif( var == "nasm0" ) then + call allocate_ptr( trans%nasm0, trans%nsmax+1, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KASM0=int1 ) + + elseif( var == "npossp" ) then + call allocate_ptr( trans%npossp, trans%nprtrw+1, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPOSSP=int1 ) + + elseif( var == "nptrms" ) then + call allocate_ptr( trans%nptrms, trans%nprtrw, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPTRMS=int1 ) + + elseif( var == "nallms" ) then + call allocate_ptr( trans%nallms, trans%nsmax+1, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KALLMS=int1 ) + + elseif( var == "ndim0g" ) then + call allocate_ptr( trans%ndim0g, trans%nsmax+1, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KDIM0G=int1 ) + + elseif( var == "nvalue" ) then + call allocate_ptr( trans%nvalue, trans%nspec2, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KNVALUE=int1 ) + + elseif( var == "nfrstlat" ) then + call allocate_ptr( trans%nfrstlat, trans%n_regions_NS, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KFRSTLAT=int1 ) + + elseif( var == "nlstlat" ) then + call allocate_ptr( trans%nlstlat, trans%n_regions_NS, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KLSTLAT=int1 ) + + elseif( var == "nptrlat" ) then + call allocate_ptr( trans%nptrlat, trans%ndgl, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPTRLAT=int1 ) + + elseif( var == "nptrfrstlat" ) then + call allocate_ptr( trans%nptrfrstlat ,trans%n_regions_ns, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPTRFRSTLAT=int1 ) + + elseif( var == "nptrlstlat" ) then + call allocate_ptr( trans%nptrlstlat, trans%n_regions_ns, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPTRLSTLAT=int1 ) + + elseif( var == "nsta" ) then + call allocate_ptr( trans%nsta, trans%ndgl+trans%n_regions_NS-1, trans%n_regions_EW, int2 ) + call TRANS_INQ( KRESOL=trans%handle, KSTA=int2 ) + + elseif( var == "nonl" ) then + call allocate_ptr( trans%nonl, trans%ndgl+trans%n_regions_NS-1, trans%n_regions_EW, int2 ) + call TRANS_INQ( KRESOL=trans%handle, KONL=int2 ) + + elseif( var == "ldsplitlat" ) then + !!call allocate_ptr( trans%nonl, trans%ndgl, bool1 ) + !allocate( booltmp(trans%ndgl) ) + !call TRANS_INQ( KRESOL=trans%handle, LDSPLITLAT=booltmp ) + !bool1(:) = booltmp(:) + !deallocate( booltmp ) + iret = TRANS_NOTIMPL + return + + elseif( var == "nultpp" ) then + call allocate_ptr( trans%nultpp, trans%nprtrns, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KULTPP=int1 ) + + elseif( var == "nptrls" ) then + call allocate_ptr( trans%nptrls, trans%nprtrns, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPTRLS=int1 ) + + elseif( var == "nnmeng" ) then + call allocate_ptr( trans%nnmeng, trans%ndgl, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KNMENG=int1 ) + + elseif( var == "rmu" ) then + call allocate_ptr( trans%rmu, trans%ndgl, double1 ) + call TRANS_INQ( KRESOL=trans%handle, PMU=double1 ) + + elseif( var == "rgw" ) then + call allocate_ptr( trans%rgw, trans%ndgl, double1 ) + call TRANS_INQ( KRESOL=trans%handle, PGW=double1 ) + + elseif( var == "rpnm" ) then + call allocate_ptr( trans%rpnm, trans%nlei3, trans%nspolegl, double2 ) + call TRANS_INQ( KRESOL=trans%handle, PRPNM=double2 ) + + elseif( var == "npms" ) then + call allocate_ptr( trans%npms, trans%nsmax+1, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KPMS=int1 ) + + elseif( var == "rlapin" ) then + call allocate_ptr( trans%rlapin, trans%nsmax+4, double1 ) + call TRANS_INQ( KRESOL=trans%handle, PLAPIN=double1 ) + + elseif( var == "ndglu" ) then + call allocate_ptr( trans%ndglu, trans%nsmax+1, int1 ) + call TRANS_INQ( KRESOL=trans%handle, KDGLU=int1 ) + + elseif( var /= "ndgl" & + & .and. var /= "nsmax" & + & .and. var /= "myproc" & + & .and. var /= "nproc" & + & .and. var /= "ldlam" & + & .and. var /= "nspec" & + & .and. var /= "nspec2" & + & .and. var /= "nspec2g" & + & .and. var /= "nspec2mx" & + & .and. var /= "nump" & + & .and. var /= "ngptot" & + & .and. var /= "ngptotg" & + & .and. var /= "ngptotmx" & + & .and. var /= "n_regions_ns" & + & .and. var /= "n_regions_ew" & + & .and. var /= "my_region_ns" & + & .and. var /= "my_region_ew" & + & .and. var /= "nfrstloff" & + & .and. var /= "nptrfloff" & + & .and. var /= "nprtrns" & + & .and. var /= "nlei3" & + & .and. var /= "nspolegl" ) then + write(error_unit,*) "trans_inqure: ERROR: unrecognized variable ", var + iret = TRANS_UNRECOGNIZED_ARG + return + endif + + enddo + + iret = TRANS_SUCCESS +end function trans_inquire_fstr + +subroutine free_ptr(ptr) + use, intrinsic :: iso_c_binding + type(c_ptr) :: ptr + if( c_associated( ptr ) ) then + call transi_free(ptr) + ptr = c_null_ptr + endif +end subroutine free_ptr + +!subroutine allocate_bool1_ptr(ptr,len,bool1) +! use, intrinsic :: iso_c_binding +! type(c_ptr) :: ptr +! integer(c_int) :: len +! logical(c_bool), pointer :: bool1(:) +! if( .not. c_associated( ptr ) ) call transi_malloc_bool(ptr,len) +! call c_f_pointer( ptr, bool1, (/len/) ) +!end subroutine allocate_bool1_ptr + +subroutine allocate_int1_ptr(ptr,len,int1) + use, intrinsic :: iso_c_binding + type(c_ptr) :: ptr + integer(c_int) :: len + integer(c_int), pointer :: int1(:) + if( .not. c_associated( ptr ) ) call transi_malloc_int(ptr,len) + call c_f_pointer( ptr, int1, (/len/) ) +end subroutine allocate_int1_ptr + +subroutine allocate_int2_ptr(ptr,len1,len2,int2) + use, intrinsic :: iso_c_binding + type(c_ptr) :: ptr + integer(c_int) :: len1, len2 + integer(c_int), pointer :: int2(:,:) + if( .not. c_associated( ptr ) ) call transi_malloc_int(ptr,len1*len2) + call c_f_pointer( ptr, int2, (/len1,len2/) ) +end subroutine allocate_int2_ptr + +subroutine allocate_double1_ptr(ptr,len,double1) + use, intrinsic :: iso_c_binding + type(c_ptr) :: ptr + integer(c_int) :: len + real(c_double), pointer :: double1(:) + if( .not. c_associated( ptr ) ) call transi_malloc_double(ptr,len) + call c_f_pointer( ptr, double1, (/len/) ) +end subroutine allocate_double1_ptr + +subroutine allocate_double2_ptr(ptr,len1,len2,double2) + use, intrinsic :: iso_c_binding + type(c_ptr) :: ptr + integer(c_int) :: len1, len2 + real(c_double), pointer :: double2(:,:) + if( .not. c_associated( ptr ) ) call transi_malloc_double(ptr,len1*len2) + call c_f_pointer( ptr, double2, (/len1,len2/) ) +end subroutine allocate_double2_ptr + +function trans_delete(trans) bind(C,name="trans_delete") + use, intrinsic :: iso_c_binding + integer(c_int) :: trans_delete + type(Trans_t), intent(inout) :: trans + call free_ptr( trans%nloen ) + call free_ptr( trans%readfp ) + call free_ptr( trans%writefp ) + call free_ptr( trans%ngptotl ) + call free_ptr( trans%nmyms ) + call free_ptr( trans%nasm0 ) + call free_ptr( trans%numpp ) + call free_ptr( trans%npossp ) + call free_ptr( trans%nptrms ) + call free_ptr( trans%nallms ) + call free_ptr( trans%ndim0g ) + call free_ptr( trans%nvalue ) + call free_ptr( trans%n_regions ) + call free_ptr( trans%nfrstlat ) + call free_ptr( trans%nlstlat ) + call free_ptr( trans%nptrlat ) + call free_ptr( trans%nptrfrstlat ) + call free_ptr( trans%nptrlstlat ) + call free_ptr( trans%nsta ) + call free_ptr( trans%nonl ) + call free_ptr( trans%ldsplitlat ) + call free_ptr( trans%nultpp ) + call free_ptr( trans%nptrls ) + call free_ptr( trans%nnmeng ) + call free_ptr( trans%rmu ) + call free_ptr( trans%rgw ) + call free_ptr( trans%rpnm ) + call free_ptr( trans%npms ) + call free_ptr( trans%rlapin ) + call free_ptr( trans%ndglu ) + call trans_release( trans%handle ) + trans_delete = TRANS_SUCCESS +end function trans_delete + +function trans_finalize() bind(C,name="trans_finalize") + use, intrinsic :: iso_c_binding + integer(c_int) :: trans_finalize + call TRANS_END() + if( USE_MPI ) call MPL_END(LDMEMINFO=.FALSE.) + if( close_devnull ) then + ! Don't close devnull in case other code is also using this unit + ! close (devnull()) + endif + + is_init = .False. + + trans_finalize = TRANS_SUCCESS +end function trans_finalize + + +function get_nlon( trans ) result(nlon) + use, intrinsic :: iso_c_binding, only : c_associated, c_f_pointer + integer ::nlon + type(Trans_t) :: trans + integer, pointer :: nloen(:) + nlon = trans%nlon + if( nlon < 0 ) then + if( c_associated( trans%nloen ) ) then + call c_f_pointer( trans%nloen, nloen, (/trans%ndgl/) ) + nlon = nloen(1) + else + nlon = 2*trans%ndgl + endif + endif +end function + +function assert_global(trans,RGP) result(iret) + integer :: iret + type(Trans_t), intent(in) :: trans + real(c_double), intent(in) :: RGP(:,:,:) !(NPROMA==ngptotg,NFLD,NGPBLKS==1) + integer :: nproma, ngpblks, nlon + iret = TRANS_SUCCESS + nproma = size(RGP,1) + ngpblks = size(RGP,3) + + if( trans%nproc /= 1 ) then + call transi_error("trans_invtrans: ERROR: Configuration only valid for nproc == 1") + iret = TRANS_ERROR + return + endif + + if( trans%llatlon == 1 ) then + nlon = get_nlon(trans) + if( trans%ngptot /= trans%ngptotg + nlon ) then + call transi_error("trans: Assertion failed for lonlat grids: (ngptot == ngptotg+nlon)") + iret = TRANS_ERROR + return + endif + endif + + if( nproma /= trans%ngptotg ) then + call transi_error("trans_invtrans: ERROR: Configuration only valid for nproma == ngpgot") + iret = TRANS_ERROR + return + endif + + if( ngpblks /= 1 ) then + call transi_error("trans: ERROR: Configuration only valid for ngpblks == 1") + iret = TRANS_ERROR + return + endif +end function + +function prepare_global_invtrans(trans,RGP,RGPM) result(iret) + integer :: iret + type(Trans_t), intent(in) :: trans + real(c_double), target, intent(in) :: RGP(:,:,:) !(NPROMA==ngptotg,NFLD,NGPBLKS==1) + real(c_double), pointer, intent(out) :: RGPM(:,:,:) !(NPROMA==ngptot, NFLD,NGPBLKS==1) + !! Modified RGP to add one duplicate latitude at equator + integer :: ilat, ilon, nfld + + iret = assert_global(trans,RGP) + if( iret /= TRANS_SUCCESS ) return + + if( trans%llatlon == 1 ) then + nfld = size(RGP,2) + allocate( RGPM(trans%ngptot,nfld,1) ) + else + RGPM => RGP + endif +end function + +function finish_global_invtrans(trans,RGP,RGPM) result(iret) + integer :: iret + type(Trans_t), intent(in) :: trans + real(c_double), intent(inout) :: RGP(:,:,:) !(NPROMA==ngptotg,FIELD,NGPBLKS==1) + real(c_double), pointer, intent(inout) :: RGPM(:,:,:) !(NPROMA==ngptotg,FIELD,NGPBLKS==1) + !! Modified RGP with an added duplicate latitude at equator + integer :: nlon, ilat, ilon, icount + + iret = assert_global(trans,RGP) + if( iret /= TRANS_SUCCESS ) return + + if( trans%llatlon == 1 ) then + nlon = get_nlon(trans) + icount = 0 + do ilat=1,trans%ndgl+2 + if( ilat <= trans%ndgl/2 .or. ilat >= trans%ndgl/2+2) then + do ilon=1,nlon + icount=icount+1 + RGP(icount,:,1) = RGPM(ilon+(ilat-1)*nlon,:,1) + enddo ! ilon + endif + enddo ! ilat + deallocate(RGPM) + nullify(RGPM) + else + nullify(RGPM) + endif +end function + +function prepare_global_gptosp_trans(trans,RGP,RGPM) result(iret) + integer :: iret + type(Trans_t), intent(in) :: trans + real(c_double), target, intent(in) :: RGP(:,:,:) !(NPROMA==ngptotg,NFLD,NGPBLKS==1) + real(c_double), pointer, intent(out) :: RGPM(:,:,:) !(NPROMA==ngptot, NFLD,NGPBLKS==1) + !! Modified RGP to add one duplicate latitude at equator + integer :: nlon, ilat, ilon, icount, nfld + + iret = assert_global(trans,RGP) + if( iret /= TRANS_SUCCESS ) return + + if( trans%llatlon == 1 ) then + nfld = size(RGP,2) + nlon = get_nlon(trans) + icount = 0 + allocate( RGPM(trans%ngptot,nfld,1) ) + do ilat=1,trans%ndgl+2 ! There is 1 too little latitude in RGPM + do ilon=1,nlon + icount = icount+1 + RGPM(ilon+(ilat-1)*nlon,:,1) = RGP(icount,:,1) + enddo ! ilon + if( ilat == trans%ndgl/2+1) then + icount = icount-nlon + endif + enddo ! ilat + else + RGPM => RGP + endif +end function + +function finish_global_gptosp_trans(trans,RGP,RGPM) result(iret) + integer :: iret + type(Trans_t), intent(in) :: trans + real(c_double), intent(inout) :: RGP(:,:,:) !(NPROMA==ngptotg,FIELD,NGPBLKS==1) + real(c_double), pointer, intent(inout) :: RGPM(:,:,:) !(NPROMA==ngptotg,FIELD,NGPBLKS==1) + !! Modified RGP with an added duplicate latitude at equator + + iret = assert_global(trans,RGP) + if( iret /= TRANS_SUCCESS ) return + + if( trans%llatlon == 1 ) then + deallocate(RGPM) + nullify(RGPM) + else + nullify(RGPM) + endif +end function + +function trans_dirtrans(args) bind(C,name="trans_dirtrans") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(DirTrans_t), intent(inout) :: args + real(c_double), pointer :: RSPVOR(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RSPDIV(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RSPSCALAR(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RGP(:,:,:) !(NPROMA,IF_GP,NGPBLKS) + real(c_double), pointer :: RGPM(:,:,:) !(NPROMA,FIELD,NGPBLKS) + type(Trans_t), pointer :: trans + logical :: llatlon + + if( args%count > 0 ) then + call transi_error("trans_dirtrans: ERROR: arguments are not new") + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans_dirtrans: ERROR: trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%rgp) ) then + call transi_error( "trans_dirtrans: ERROR: Array RGP was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + + if( args%nvordiv > 0 ) then + if( .not. c_associated(args%rspvor) ) then + call transi_error( "Array RSPVOR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + if( .not. c_associated(args%rspdiv) ) then + call transi_error( "Array RSPDIV was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER(args%rspvor, RSPVOR, (/args%nvordiv, trans%nspec2/) ) + call C_F_POINTER(args%rspdiv, RSPDIV, (/args%nvordiv, trans%nspec2/) ) + endif + if( args%nscalar > 0 ) then + if( .not. c_associated(args%rspscalar) ) then + call transi_error( "Array RSPSCALAR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER(args%rspscalar, RSPSCALAR, (/args%nscalar, trans%nspec2/) ) + endif + + llatlon = .false. + if( trans%llatlon /= 0 ) llatlon = .true. + + if( args%lglobal == 1 ) then + call C_F_POINTER( args%rgp, RGP, (/trans%ngptotg,args%nscalar+2*args%nvordiv,1/) ) + iret = prepare_global_gptosp_trans(trans,RGP,RGPM) + if( iret /= TRANS_SUCCESS ) return + else + call C_F_POINTER( args%rgp, RGP, (/args%nproma,args%nscalar+2*args%nvordiv,args%ngpblks/) ) + RGPM => RGP + endif + + if( args%nvordiv > 0 .and. args%nscalar > 0 ) then + call DIR_TRANS( KRESOL=trans%handle, & + & KPROMA=args%nproma, & + & LDLATLON=llatlon, & + & PGP=RGPM, & + & PSPVOR=RSPVOR,PSPDIV=RSPDIV,PSPSCALAR=RSPSCALAR ) ! unused args: KVSETUV,KVSETSC + elseif( args%nscalar > 0 ) then + call DIR_TRANS( KRESOL=trans%handle, & + & KPROMA=args%nproma, & + & LDLATLON=llatlon, & + & PGP=RGPM, & + & PSPSCALAR=RSPSCALAR ) ! unused args: KVSETUV,KVSETSC + elseif( args%nvordiv > 0 ) then + call DIR_TRANS( KRESOL=trans%handle, & + & KPROMA=args%nproma, & + & LDLATLON=llatlon, & + & PGP=RGPM, & + & PSPVOR=RSPVOR,PSPDIV=RSPDIV ) ! unused args: KVSETUV,KVSETSC + endif + + if( args%lglobal == 1 ) then + iret = finish_global_gptosp_trans(trans,RGP,RGPM) + if( iret /= TRANS_SUCCESS ) return + else + nullify(RGPM) + endif + + iret = TRANS_SUCCESS +end function trans_dirtrans + +function trans_invtrans(args) bind(C,name="trans_invtrans") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(InvTrans_t), intent(inout) :: args + real(c_double), pointer :: RSPVOR(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RSPDIV(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RSPSCALAR(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RGP(:,:,:) !(NPROMA,FIELD,NGPBLKS) + real(c_double), pointer :: RGPM(:,:,:) !(NPROMA,FIELD,NGPBLKS) + + logical :: lscalarders + logical :: luvder_EW + logical :: lvordivgp + logical :: llatlon + type(Trans_t), pointer :: trans + integer :: nfld_gp + + if( args%count > 0 ) then + call transi_error( "trans_invtrans: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%rgp) ) then + call transi_error( "Array RGP was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + + lscalarders = .false.; if( args%lscalarders == 1 ) lscalarders = .true. + luvder_EW = .false.; if( args%luvder_EW == 1 ) luvder_EW = .true. + lvordivgp = .false.; if( args%lvordivgp == 1 ) lvordivgp = .true. + + if( args%nvordiv > 0 ) then + if( .not. c_associated(args%rspvor) ) then + call transi_error( "Array RSPVOR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + if( .not. c_associated(args%rspdiv) ) then + call transi_error( "Array RSPDIV was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER(args%rspvor, RSPVOR, (/args%nvordiv, trans%nspec2/) ) + call C_F_POINTER(args%rspdiv, RSPDIV, (/args%nvordiv, trans%nspec2/) ) + endif + if( args%nscalar > 0 ) then + if( .not. c_associated(args%rspscalar) ) then + call transi_error( "Array RSPSCALAR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER(args%rspscalar, RSPSCALAR, (/args%nscalar, trans%nspec2/) ) + endif + + llatlon = .false. + if( trans%llatlon /= 0 ) llatlon = .true. + + nfld_gp = 0 + if( lvordivgp ) nfld_gp = nfld_gp + 2*args%nvordiv ! voriticty + divergence + if( .true. ) nfld_gp = nfld_gp + args%nscalar ! scalars + if( .true. ) nfld_gp = nfld_gp + 2*args%nvordiv ! u + v + if( lscalarders ) nfld_gp = nfld_gp + args%nscalar ! scalars N-S derivatives + if( luvder_EW ) nfld_gp = nfld_gp + 2*args%nvordiv ! u + v E-W derivatives + if( lscalarders ) nfld_gp = nfld_gp + args%nscalar ! scalars E-W derivatives + + if( args%lglobal == 1 ) then + call C_F_POINTER( args%rgp, RGP, (/trans%ngptotg,nfld_gp,1/) ) + iret = prepare_global_invtrans(trans,RGP,RGPM) + if( iret /= TRANS_SUCCESS ) return + else + call C_F_POINTER( args%rgp, RGP, (/args%nproma,nfld_gp,args%ngpblks/) ) + RGPM => RGP + endif + + if( args%nvordiv > 0 .and. args%nscalar > 0 ) then + call INV_TRANS( KRESOL=trans%handle, & + & KPROMA=args%nproma, & + & LDLATLON=llatlon, & + & LDSCDERS=lscalarders, & + & LDVORGP=lvordivgp, & + & LDDIVGP=lvordivgp, & + & LDUVDER=luvder_EW, & + & PSPVOR=RSPVOR,PSPDIV=RSPDIV,PSPSCALAR=RSPSCALAR, & + & PGP=RGPM ) ! unused args: KVSETUV,KVSETSC + elseif( args%nscalar > 0 ) then + call INV_TRANS( KRESOL=trans%handle, & + & KPROMA=args%nproma, & + & LDLATLON=llatlon, & + & LDSCDERS=lscalarders, & + & PSPSCALAR=RSPSCALAR, & + & PGP=RGPM ) ! unused args: KVSETUV,KVSETSC + elseif( args%nvordiv > 0 ) then + call INV_TRANS( KRESOL=trans%handle, & + & KPROMA=args%nproma, & + & LDLATLON=llatlon, & + & LDVORGP=lvordivgp, & + & LDDIVGP=lvordivgp, & + & LDUVDER=luvder_EW, & + & PSPVOR=RSPVOR,PSPDIV=RSPDIV, & + & PGP=RGPM ) ! unused args: KVSETUV,KVSETSC + endif + + if( args%lglobal == 1 ) then + iret = finish_global_invtrans(trans,RGP,RGPM) + if( iret /= TRANS_SUCCESS ) return + else + nullify(RGPM) + endif + + + iret = TRANS_SUCCESS + +end function trans_invtrans + +function trans_invtrans_adj(args) bind(C,name="trans_invtrans_adj") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(InvTransAdj_t), intent(inout) :: args + real(c_double), pointer :: RSPVOR(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RSPDIV(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RSPSCALAR(:,:) !(FIELD,WAVE) + real(c_double), pointer :: RGP(:,:,:) !(NPROMA,FIELD,NGPBLKS) + real(c_double), pointer :: RGPM(:,:,:) !(NPROMA,FIELD,NGPBLKS) + + logical :: lscalarders + logical :: luvder_EW + logical :: lvordivgp + logical :: llatlon + type(Trans_t), pointer :: trans + integer :: nfld_gp + + if( args%count > 0 ) then + call transi_error( "trans_invtrans_adj: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans_invtrans_adj:trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%rgp) ) then + call transi_error( "trans_invtrans_adj:Array RGP was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + + lscalarders = .false.; if( args%lscalarders == 1 ) lscalarders = .true. + luvder_EW = .false.; if( args%luvder_EW == 1 ) luvder_EW = .true. + lvordivgp = .false.; if( args%lvordivgp == 1 ) lvordivgp = .true. + + if( args%nvordiv > 0 ) then + if( .not. c_associated(args%rspvor) ) then + call transi_error( "trans_invtrans_adj::Array RSPVOR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + if( .not. c_associated(args%rspdiv) ) then + call transi_error( "trans_invtrans_adj::Array RSPDIV was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER(args%rspvor, RSPVOR, (/args%nvordiv, trans%nspec2/) ) + call C_F_POINTER(args%rspdiv, RSPDIV, (/args%nvordiv, trans%nspec2/) ) + endif + if( args%nscalar > 0 ) then + if( .not. c_associated(args%rspscalar) ) then + call transi_error( "trans_invtrans_adj::Array RSPSCALAR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER(args%rspscalar, RSPSCALAR, (/args%nscalar, trans%nspec2/) ) + endif + + llatlon = .false. + if( trans%llatlon /= 0 ) llatlon = .true. + + nfld_gp = 0 + if( lvordivgp ) nfld_gp = nfld_gp + 2*args%nvordiv ! voriticty + divergence + if( .true. ) nfld_gp = nfld_gp + args%nscalar ! scalars + if( .true. ) nfld_gp = nfld_gp + 2*args%nvordiv ! u + v + if( lscalarders ) nfld_gp = nfld_gp + args%nscalar ! scalars N-S derivatives + if( luvder_EW ) nfld_gp = nfld_gp + 2*args%nvordiv ! u + v E-W derivatives + if( lscalarders ) nfld_gp = nfld_gp + args%nscalar ! scalars E-W derivatives + + if( args%lglobal == 1 ) then + call C_F_POINTER( args%rgp, RGP, (/trans%ngptotg,nfld_gp,1/) ) + iret = prepare_global_gptosp_trans(trans,RGP,RGPM) + if( iret /= TRANS_SUCCESS ) return + else + call C_F_POINTER( args%rgp, RGP, (/args%nproma,nfld_gp,args%ngpblks/) ) + RGPM => RGP + endif + + ! Note that llatlon is not an option in INV_TRANSAD unlile INV_TRANS and DIR_TRANS + if( args%nvordiv > 0 .and. args%nscalar > 0 ) then + call INV_TRANSAD( KRESOL=trans%handle, & + & KPROMA=args%nproma, & +! & LDLATLON=llatlon, & + & LDSCDERS=lscalarders, & + & LDVORGP=lvordivgp, & + & LDDIVGP=lvordivgp, & + & LDUVDER=luvder_EW, & + & PSPVOR=RSPVOR,PSPDIV=RSPDIV,PSPSCALAR=RSPSCALAR, & + & PGP=RGPM ) ! unused args: KVSETUV,KVSETSC + elseif( args%nscalar > 0 ) then + call INV_TRANSAD( KRESOL=trans%handle, & + & KPROMA=args%nproma, & +! & LDLATLON=llatlon, & + & LDSCDERS=lscalarders, & + & PSPSCALAR=RSPSCALAR, & + & PGP=RGPM ) ! unused args: KVSETUV,KVSETSC + elseif( args%nvordiv > 0 ) then + call INV_TRANSAD( KRESOL=trans%handle, & + & KPROMA=args%nproma, & +! & LDLATLON=llatlon, & + & LDVORGP=lvordivgp, & + & LDDIVGP=lvordivgp, & + & LDUVDER=luvder_EW, & + & PSPVOR=RSPVOR,PSPDIV=RSPDIV, & + & PGP=RGPM ) ! unused args: KVSETUV,KVSETSC + endif + + if( args%lglobal == 1 ) then + iret = finish_global_gptosp_trans(trans,RGP,RGPM) + if( iret /= TRANS_SUCCESS ) return + else + nullify(RGPM) + endif + + iret = TRANS_SUCCESS +end function trans_invtrans_adj + +function trans_distgrid(args) bind(C,name="trans_distgrid") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(DistGrid_t), intent(inout) :: args + real(c_double), pointer :: RGPG(:,:) ! (NFLD_from,NGPTOTG (+nlon) ) (+nlon in case of LonLat grid) + real(c_double), pointer :: LL_RGPG(:,:) ! (NFLD_from,NGPTOTG ) + real(c_double), pointer :: RGP (:,:,:) ! (NPROMA,IF_GP,NGPBLKS) + integer(c_int), pointer :: NFROM(:) + type(Trans_t), pointer :: trans + integer :: jfld, isend, jsend + integer :: icount, ilat, ilon, nlon + integer :: check + integer(c_int), pointer :: nloen(:) + + if( args%count > 0 ) then + call transi_error( "trans_distgrid: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%nfrom) ) then + call transi_error( "Array NFROM was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%nfrom, NFROM, (/args%nfld/) ) + + + isend = 0 + do jfld = 1, args%nfld + if ( NFROM(jfld) == trans%myproc ) isend = isend + 1 + enddo + + if( .not. c_associated(args%rgp) ) then + call transi_error( "Array RGP was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rgp, RGP, (/args%nproma,args%nfld,args%ngpblks/) ) + + if( isend > 0 ) then + if( .not. c_associated(args%rgpg) ) then + call transi_error( "Array RGPG was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + + + if( trans%llatlon == 1 ) then + + nlon = trans%nlon + if( nlon < 0 ) then + if( c_associated( trans%nloen ) ) then + call C_F_POINTER( trans%nloen, nloen, (/trans%ndgl/) ) + nlon = nloen(1) + else + nlon = 2*trans%ndgl + endif + endif + + call C_F_POINTER( args%rgpg, LL_RGPG, (/trans%ngptotg,isend/) ) + allocate( RGPG(trans%ngptotg+nlon,isend) ) ! 1 extra latitudes should be allocated + do jsend=1,isend + check = 0 + icount = 0 + do ilat=1,trans%ndgl+2 ! There is 1 too little latitude in LL_RGPG + do ilon=1,nlon + ICOUNT=ICOUNT+1 + RGPG(ILON+(ILAT-1)*nlon,jsend) = LL_RGPG(ICOUNT,jsend) + check = check+1 + enddo ! ilon + if( ilat == trans%ndgl/2+1) then + ICOUNT = ICOUNT-nlon + endif + enddo ! ilat + if( check /= trans%ngptotg+nlon ) then + call transi_error( "ERROR: not all values are assigned" ) + iret = TRANS_ERROR + deallocate( RGPG ) + return + endif + enddo ! jsend + else + call C_F_POINTER( args%rgpg, RGPG, (/trans%ngptotg,isend/) ) + endif ! llatlon + + call DIST_GRID(PGPG=RGPG,KFDISTG=args%nfld,KFROM=NFROM,KPROMA=args%nproma,KRESOL=trans%handle,PGP=RGP) + + if( trans%llatlon == 1 ) then + deallocate( RGPG ) + endif + else + call DIST_GRID( KFDISTG=args%nfld,KFROM=NFROM,KPROMA=args%nproma,KRESOL=trans%handle,PGP=RGP) + endif + + iret = TRANS_SUCCESS + +end function trans_distgrid + + +function trans_gathgrid(args) bind(C,name="trans_gathgrid") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(GathGrid_t), intent(inout) :: args + real(c_double), pointer :: RGPG(:,:) !(NFLD_to,NGPTOTG) + real(c_double), pointer :: RGP (:,:,:) !(NPROMA,NFLD,NGPBLKS) + integer(c_int), pointer :: NTO(:) + type(Trans_t), pointer :: trans + real(c_double), pointer :: LL_RGPG (:,:) + integer :: jfld, irecv + integer :: icount, ilat, ilon, jrecv, nlon + integer(c_int), pointer :: nloen(:) + + if( args%count > 0 ) then + call transi_error( "trans_gathgrid: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%nto) ) then + call transi_error( "trans_gath_grid: Array NTO was not allocated") + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%nto, NTO, (/args%nfld/) ) + + + irecv = 0 + do jfld = 1, args%nfld + if ( NTO(jfld) == trans%myproc ) irecv = irecv + 1 + enddo + + if( .not. c_associated(args%rgp) ) then + call transi_error( "trans_gath_grid: Array RGP was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rgp, RGP, (/args%nproma,args%nfld,args%ngpblks/) ) + + + if( irecv > 0 ) then + if( .not. c_associated(args%rgpg) ) then + call transi_error( "Array RGPG was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + if( trans%llatlon == 1 ) then + nlon = trans%nlon + if( nlon < 0 ) then + if( c_associated( trans%nloen ) ) then + call C_F_POINTER( trans%nloen, nloen, (/trans%ndgl/) ) + nlon = nloen(1) + else + nlon = 2*trans%ndgl + endif + endif + allocate( RGPG(trans%ngptotg+nlon,irecv) ) ! 1 extra latitudes + else + call C_F_POINTER( args%rgpg, RGPG, (/trans%ngptotg,irecv/) ) + endif + call GATH_GRID(KRESOL=trans%handle,KFGATHG=args%nfld,KTO=NTO,KPROMA=args%nproma,PGP=RGP,PGPG=RGPG) + + if( trans%llatlon == 1 ) then + + ! There is 1 too many latitude in RGPG + call C_F_POINTER( args%rgpg, LL_RGPG, (/trans%ngptotg,irecv/) ) + + do jrecv=1,irecv + icount = 0 + do ilat=1,trans%ndgl+2 + do ilon=1,nlon + if( ilat <= trans%ndgl/2 .or. ilat >= trans%ndgl/2+2) then + ICOUNT=ICOUNT+1 + LL_RGPG(ICOUNT,jrecv) = RGPG(ILON+(ILAT-1)*nlon,jrecv) + endif + enddo ! ilon + enddo ! ilat + if( ICOUNT /= trans%ngptotg) then + call transi_error( "CHECK failed" ) + iret = TRANS_ERROR + deallocate( RGPG ) + return + endif + enddo ! jrecv + deallocate( RGPG ) + endif + else + call GATH_GRID(KRESOL=trans%handle,KFGATHG=args%nfld,KTO=NTO,KPROMA=args%nproma,PGP=RGP) + endif + + iret = TRANS_SUCCESS + +end function trans_gathgrid + + +function trans_distspec(args) bind(C,name="trans_distspec") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(DistSpec_t), intent(inout) :: args + real(c_double), pointer :: RSPEC (:,:) ! (NFLD,NSPEC2) + real(c_double), pointer :: RSPECG(:,:) ! (NFLD_from,NSPEC2G) + integer(c_int), pointer :: NFROM(:) + type(Trans_t), pointer :: trans + integer :: jfld, isend + + if( args%count > 0 ) then + call transi_error( "trans_distspec: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%nfrom) ) then + call transi_error( "Array NFROM was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%nfrom, NFROM, (/args%nfld/) ) + + + isend = 0 + do jfld = 1, args%nfld + if ( NFROM(jfld) == trans%myproc ) isend = isend + 1 + enddo + + if( .not. c_associated(args%rspec) ) then + call transi_error( "Array RSPEC was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspec, RSPEC, (/args%nfld,trans%nspec2/) ) + + if( isend > 0 ) then + if( .not. c_associated(args%rspecg) ) then + call transi_error( "Array RSPECG was not allocated" ) + endif + call C_F_POINTER( args%rspecg, RSPECG, (/isend,trans%nspec2g/) ) + call DIST_SPEC(KRESOL=trans%handle,KFDISTG=args%nfld,KFROM=NFROM,PSPEC=RSPEC,PSPECG=RSPECG) + else + call DIST_SPEC(KRESOL=trans%handle,KFDISTG=args%nfld,KFROM=NFROM,PSPEC=RSPEC) + endif + + iret = TRANS_SUCCESS +end function trans_distspec + + + +function trans_gathspec(args) bind(C,name="trans_gathspec") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(GathSpec_t), intent(inout) :: args + real(c_double), pointer :: RSPEC(:,:) ! (NFLD,NSPEC2) + real(c_double), pointer :: RSPECG(:,:) ! (NFLD_to,NSPEC2G) + integer(c_int), pointer :: NTO(:) + type(Trans_t), pointer :: trans + integer :: jfld, irecv + + if( args%count > 0 ) then + call transi_error( "trans_gathspec: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( .not. c_associated(args%nto) ) then + call transi_error( "Array NTO was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%nto, NTO, (/args%nfld/) ) + + + irecv = 0 + do jfld = 1, args%nfld + if ( NTO(jfld) == trans%myproc ) irecv = irecv + 1 + enddo + + if( .not. c_associated(args%rspec) ) then + call transi_error( "Array RSPEC was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspec, RSPEC, (/args%nfld,trans%nspec2/) ) + + if( irecv > 0 ) then + if( .not. c_associated(args%rspecg) ) then + call transi_error( "Array RSPECG was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspecg, RSPECG, (/irecv,trans%nspec2g/) ) + call GATH_SPEC(KRESOL=trans%handle,KFGATHG=args%nfld,KTO=NTO,PSPEC=RSPEC,PSPECG=RSPECG) + else + call GATH_SPEC(KRESOL=trans%handle,KFGATHG=args%nfld,KTO=NTO,PSPEC=RSPEC) + endif + + iret = TRANS_SUCCESS + +end function trans_gathspec + + +function trans_vordiv_to_UV(args) bind(C,name="trans_vordiv_to_UV") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(VorDivToUV_t), intent(inout) :: args + real(c_double), pointer :: RSPVOR(:,:) + real(c_double), pointer :: RSPDIV(:,:) + real(c_double), pointer :: RSPU(:,:) + real(c_double), pointer :: RSPV(:,:) + integer(c_int) :: err + + if( args%count > 0 ) then + call transi_error( "trans_vordiv_to_UV: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( args%ncoeff == 0 ) then + call transi_error( "trans_vordiv_to_UV: ERROR: missing argument nspec2") + iret = TRANS_MISSING_ARG + return + endif + + if( args%nsmax == 0 ) then + call transi_error( "trans_vordiv_to_UV: ERROR: missing argument nsmax") + iret = TRANS_MISSING_ARG + return + endif + + + ! Set vorticity + if( .not. c_associated(args%rspvor) ) then + call transi_error( "Array RSPVOR was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspvor, RSPVOR, (/args%nfld,args%ncoeff/) ) + + ! Set divergence + if( .not. c_associated(args%rspdiv) ) then + call transi_error( "Array RSPDIV was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspdiv, RSPDIV, (/args%nfld,args%ncoeff/) ) + + ! Set U + if( .not. c_associated(args%rspu) ) then + call transi_error( "Array RSPU was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspu, RSPU, (/args%nfld,args%ncoeff/) ) + + ! Set V + if( .not. c_associated(args%rspv) ) then + call transi_error( "Array RSPV was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspv, RSPV, (/args%nfld,args%ncoeff/) ) + + + if ( .not. is_init ) then + err = trans_init() + endif + + ! PSPVOR(:,:) - spectral vorticity (input) + ! PSPDIV(:,:) - spectral divergence (input) + ! PSPU(:,:) - spectral U (u*cos(theta) (output) + ! PSPV(:,:) - spectral V (v*cos(theta) (output) + ! KSMAX - spectral resolution (input) + call VORDIV_TO_UV(PSPVOR=RSPVOR,PSPDIV=RSPDIV,PSPU=RSPU,PSPV=RSPV,KSMAX=args%nsmax) + iret = TRANS_SUCCESS + +end function trans_vordiv_to_UV + +function trans_specnorm(args) bind(C,name="trans_specnorm") result(iret) + use, intrinsic :: iso_c_binding + integer(c_int) :: iret + type(SpecNorm_t), intent(inout) :: args + real(c_double), pointer :: RSPEC(:,:) !(IF_GP,NGPTOTG) + real(c_double), pointer :: RNORM(:) + real(c_double), pointer :: RMET(:) + type(Trans_t), pointer :: trans + integer :: jfld, irecv + + if( args%count > 0 ) then + call transi_error( "trans_specnorm: ERROR: arguments are not new" ) + iret = TRANS_STALE_ARG + return + endif + args%count = 1 + + if( .not. c_associated(args%trans) ) then + call transi_error( "trans was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%trans, trans ) + + if( args%nfld == 0 ) then + iret = TRANS_SUCCESS + return + endif + + if( .not. c_associated(args%rspec) ) then + call transi_error( "Array RSPEC was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rspec, RSPEC, (/args%nfld,trans%nspec2/) ) + + if( .not. c_associated(args%rnorm) ) then + call transi_error( "Array RNORM was not allocated" ) + iret = TRANS_MISSING_ARG + return + endif + call C_F_POINTER( args%rnorm, RNORM, (/args%nfld/) ) + + if( c_associated(args%rmet) ) then + call C_F_POINTER( args%rmet, RMET, (/trans%nsmax+1/) ) + RMET(0:) => RMET(:) + endif + + if( .not. c_associated(args%rmet) ) then + call SPECNORM(KRESOL=trans%handle,PSPEC=RSPEC,KMASTER=args%nmaster,PNORM=RNORM) + else + call SPECNORM(KRESOL=trans%handle,PSPEC=RSPEC,KMASTER=args%nmaster,PNORM=RNORM,PMET=RMET) + endif + + iret = TRANS_SUCCESS + +end function trans_specnorm + + +end module trans_module diff --git a/src/transi/version.c.in b/src/transi/version.c.in new file mode 100644 index 0000000..b686c2d --- /dev/null +++ b/src/transi/version.c.in @@ -0,0 +1,62 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#define ECTRANS_VERSION_STR "@ectrans_VERSION_STR@" +#define ECTRANS_VERSION "@ectrans_VERSION@" + +#define ECTRANS_MAJOR_VERSION @ectrans_VERSION_MAJOR@ +#define ECTRANS_MINOR_VERSION @ectrans_VERSION_MINOR@ +#define ECTRANS_PATCH_VERSION @ectrans_VERSION_PATCH@ + +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#include +#include +#include + +const char * ectrans_version() { return ECTRANS_VERSION; } +const char * ectrans_version_str() { return ECTRANS_VERSION_STR; } + +unsigned int ectrans_version_int() +{ + return 10000*ECTRANS_MAJOR_VERSION + + 100*ECTRANS_MINOR_VERSION + + 1*ECTRANS_PATCH_VERSION; +} + +static char* __git_sha1 = 0; + +const char * ectrans_git_sha1() { return "@ectrans_GIT_SHA1@"; } +const char * ectrans_git_sha1_abbrev(unsigned int length) +{ + int N = strlen(ectrans_git_sha1())-40+length; + N = min(strlen(ectrans_git_sha1()),N); + if( __git_sha1 ) free(__git_sha1); + __git_sha1 = malloc( sizeof(char)*(N+1) ); + memcpy( __git_sha1, ectrans_git_sha1(), N ); + __git_sha1[N] = '\0'; + return __git_sha1; +} + +//----------------------------------------------------------------------------- + +extern const char * fiat_version(); +extern unsigned int fiat_version_int(); +extern const char * fiat_version_str(); +extern const char * fiat_git_sha1(); +extern const char * fiat_git_sha1_abbrev(unsigned int length); + +const char * ectrans_fiat_version() { return fiat_version(); } +const char * ectrans_fiat_version_str() { return fiat_version_str(); } +unsigned int ectrans_fiat_version_int() { return fiat_version_int(); } +const char * ectrans_fiat_git_sha1() { return fiat_git_sha1(); } +const char * ectrans_fiat_git_sha1_abbrev(unsigned int length) { return fiat_git_sha1_abbrev(length); } + +//----------------------------------------------------------------------------- diff --git a/src/transi/version.h b/src/transi/version.h new file mode 100644 index 0000000..ba12e7f --- /dev/null +++ b/src/transi/version.h @@ -0,0 +1,45 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#ifndef ectrans_version_h +#define ectrans_version_h + +#ifndef __cplusplus +// C99 header, defines bool as _Bool ( only required for C compiler ) +#include +#else +extern "C" { +#endif + +const char * ectrans_version(); + +unsigned int ectrans_version_int(); + +const char * ectrans_version_str(); + +const char * ectrans_git_sha1(); + +const char * ectrans_git_sha1_abbrev(unsigned int length); + +const char * ectrans_fiat_version(); + +unsigned int ectrans_fiat_version_int(); + +const char * ectrans_fiat_version_str(); + +const char * ectrans_fiat_git_sha1(); + +const char * ectrans_fiat_git_sha1_abbrev(unsigned int length); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..d1d7518 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,171 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +### trans_test_install + +if( HAVE_TESTS ) + + find_package( MPI ) + configure_file( test-install.sh.in ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh @ONLY ) + + unset( _test_args ) + if( CMAKE_TOOLCHAIN_FILE ) + list( APPEND _test_args "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" ) + endif() + foreach( lang C CXX Fortran ) + if( CMAKE_${lang}_COMPILER ) + list( APPEND _test_args "-DCMAKE_${lang}_COMPILER=${CMAKE_${lang}_COMPILER}" ) + endif() + endforeach() + if( NOT HAVE_DOUBLE_PRECISION ) + list( APPEND _test_args "-DCOMPONENTS=single" ) + endif() + + add_test( NAME ectrans_test_install + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh ${_test_args} ) +endif() + + +if( HAVE_DOUBLE_PRECISION ) + set( trans trans_dp ) +else() + set( trans trans_sp ) +endif() + +ecbuild_add_test(TARGET ectrans_test_adjoint + SOURCES trans/test_adjoint.F90 + LIBS ${trans} + LINKER_LANGUAGE Fortran +) +if( TEST ectrans_test_adjoint AND HAVE_OMP ) + target_link_libraries( ectrans_test_adjoint OpenMP::OpenMP_Fortran ) +endif() + +foreach( prec dp sp ) + foreach( mpi 0 1 2 ) + set( nthreads 1 ) + if( HAVE_OMP ) + list( APPEND nthreads 4 8 ) + endif() + foreach( omp ${nthreads} ) + set( t 47 ) + set( grid O48 ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld0 + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 0 + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10 + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20 + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_scders + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --scders + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_uvders + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --uvders + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt + MPI ${mpi} + OMP 1 + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_nproma16 + COMMAND ectrans-benchmark-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --nproma 16 + MPI ${mpi} + OMP ${omp} + ) + endforeach() + endforeach() +endforeach() + + +if( HAVE_TRANSI ) + + check_include_files( malloc.h EC_HAVE_MALLOC_H ) + ecbuild_debug_var( EC_HAVE_MALLOC_H ) + + + if( EC_HAVE_MALLOC_H AND NOT CMAKE_Fortran_COMPILER_ID MATCHES "Cray" ) + list( APPEND TEST_DEFINITIONS + TRANSI_HAVE_MEMORY + ) + else() + ecbuild_warn( "ectrans tests checking memory leaks are disabled as malloc.h was not found" ) + endif() + + ecbuild_add_library( TARGET ectrans_test + SOURCES transi/transi_test.h transi/transi_test.c + PUBLIC_LIBS transi_dp + NOINSTALL + ) + target_compile_definitions( ectrans_test PUBLIC ${TEST_DEFINITIONS} ) + + ecbuild_add_test( TARGET ectrans_test_transi_program + SOURCES transi/transi_test_program.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_timings + SOURCES transi/transi_test_timings.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_lonlat + SOURCES transi/transi_test_lonlat.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_io + SOURCES transi/transi_test_io.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_memory + SOURCES transi/transi_test_memory.c + LIBS ectrans_test + CONDITION EC_HAVE_MALLOC_H + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_memory_lonlat + SOURCES transi/transi_test_memory_lonlat.c + LIBS ectrans_test + CONDITION EC_HAVE_MALLOC_H + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_vordiv_to_UV + SOURCES transi/transi_test_vordiv_to_UV.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_invtrans_adjoint + SOURCES transi/transi_test_invtrans_adjoint.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + + ecbuild_add_test( TARGET ectrans_test_transi_lonlat_diff_incr + SOURCES transi/transi_test_lonlat_diff_incr.c + LIBS ectrans_test + ENVIRONMENT TRANS_USE_MPI=0 ) + +endif() diff --git a/tests/test-install.sh.in b/tests/test-install.sh.in new file mode 100755 index 0000000..82660b1 --- /dev/null +++ b/tests/test-install.sh.in @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Usage: +# test-install.sh [CMAKE_ARGUMENTS] + +SOURCE=@CMAKE_CURRENT_SOURCE_DIR@/test_install +BUILD=@CMAKE_CURRENT_BINARY_DIR@/test_install + +# Error handling +function test_failed { + EXIT_CODE=$? + { set +ex; } 2>/dev/null + if [ $EXIT_CODE -ne 0 ]; then + echo "+++++++++++++++++" + echo "Test failed" + echo "+++++++++++++++++" + fi + exit $EXIT_CODE +} +trap test_failed EXIT +set -e -o pipefail +set -x + +# Start with clean build +rm -rf $BUILD + +if [ -z ${ectrans_ROOT+x} ]; then + export ectrans_DIR=@PROJECT_BINARY_DIR@ +else + echo "ectrans_ROOT=$ectrans_ROOT" +fi + +export ecbuild_DIR=@ecbuild_MACROS_DIR@/../lib/cmake/ecbuild + +# Build +mkdir -p $BUILD && cd $BUILD +cmake $SOURCE \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DECBUILD_2_COMPAT=OFF \ + "$@" + +make VERBOSE=1 + +if [ -f bin/main_dp ]; then + bin/main_dp +fi +if [ -f bin/main_sp ]; then + bin/main_sp +fi +if [ -f bin/transi_sptogp ]; then + bin/transi_sptogp +fi + +{ set +ex; } 2>/dev/null +echo "+++++++++++++++++" +echo "Test passed" +echo "+++++++++++++++++" + diff --git a/tests/test_install/CMakeLists.txt b/tests/test_install/CMakeLists.txt new file mode 100644 index 0000000..2957549 --- /dev/null +++ b/tests/test_install/CMakeLists.txt @@ -0,0 +1,40 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +cmake_minimum_required( VERSION 3.12 FATAL_ERROR ) +project( ectrans_test_install VERSION 0.0.0 LANGUAGES Fortran ) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if( COMPONENTS ) + find_package( ectrans REQUIRED COMPONENTS ${COMPONENTS} ) +else() + find_package( ectrans REQUIRED ) +endif() + +if( TARGET trans_dp ) + add_executable( main_dp main.F90 ) + target_link_libraries( main_dp trans_dp ) +endif() + +if( TARGET trans_sp ) + add_executable( main_sp main.F90 ) + target_link_libraries( main_sp trans_sp ) +endif() + +if( TARGET transi_dp ) + enable_language( C ) + add_executable( transi_sptogp transi_sptogp.c ) + target_link_libraries( transi_sptogp transi_dp ) + + add_executable( transi_gptosp transi_gptosp.c ) + target_link_libraries( transi_gptosp transi_dp ) + +endif() diff --git a/tests/test_install/main.F90 b/tests/test_install/main.F90 new file mode 100644 index 0000000..21f2a4c --- /dev/null +++ b/tests/test_install/main.F90 @@ -0,0 +1,26 @@ +! (C) Copyright 2020- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +program main + +! From library fiat + use yomhook ! assert found + use mpl_module ! assert found +! From library parkind_(dp|sp) + use parkind1, only: JPRB ! assert found + +implicit none + +! assert includes are found +#include "setup_trans0.h" +#include "trans_end.h" + +write(0,*) "JPRB =",JPRB ! depending on link with parkind_sp or parkind_dp this will print 4 or 8 + +end program \ No newline at end of file diff --git a/tests/test_install/transi_gptosp.c b/tests/test_install/transi_gptosp.c new file mode 100644 index 0000000..bac1908 --- /dev/null +++ b/tests/test_install/transi_gptosp.c @@ -0,0 +1,195 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "ectrans/transi.h" + +#define TRANS_CHECK( CALL ) do {\ + int errcode = CALL;\ + if( errcode != TRANS_SUCCESS) {\ + printf("ERROR: %s failed @%s:%d:\n%s\n",#CALL,__FILE__,__LINE__,trans_error_msg(errcode));\ + abort();\ + }\ +} while(0) + +/*! @example transi_gptosp.c + * + * Transform gridpoint to spectral + * + * This is an example of how to setup and + * transform global gridpoint data to + * global spectral data + */ + +// Following dummy functions are implementation details +// that don't contribute to this example. They could be +// replaced with grib_api functionality +void read_grid( struct Trans_t* trans ); +void read_rgpg( struct Trans_t* trans, double* rgpg[], int* nfrom[], int* nfld ); +void write_rspecg( struct Trans_t* trans, double* rspecg[], int nfld ); + + +int main ( int arc, char **argv ) +{ + trans_use_mpi(0); + int jfld; + struct Trans_t trans; + trans_new(&trans); + + // Read resolution information + read_grid(&trans); + + // Register resolution in trans library + trans_setup(&trans); + + // Declare global gridpoint data + int nfld; + double* rgpg = NULL; + int* nfrom = NULL; + + // Read global gridpoint data (could be from grib file) + read_rgpg(&trans,&rgpg,&nfrom,&nfld); + + // Distribute data to all procs + double* rgp = malloc( sizeof(double) * nfld *trans.ngptot ); + struct DistGrid_t distgrid = new_distgrid(&trans); + distgrid.nfrom = nfrom; + distgrid.rgpg = rgpg; + distgrid.rgp = rgp; + distgrid.nfld = nfld; + trans_distgrid(&distgrid); + + + // Transform gp to sp fields + double* rspec = malloc( sizeof(double) * nfld*trans.nspec2 ); + + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nscalar = nfld; + dirtrans.rgp = rgp; + dirtrans.rspscalar = rspec; + trans_dirtrans(&dirtrans); + + + // Gather all spectral fields + double* rspecg = NULL; + if( trans.myproc == 1 ) + rspecg = malloc( sizeof(double) * nfld*trans.nspec2g ); + + int* nto = malloc( sizeof(int) * nfld ); + for( jfld=0; jfldndgl = sizeof(T159)/sizeof(int); + trans->nloen = malloc( sizeof(T159) ); + for( i = 0; indgl; i++) trans->nloen[i] = T159[i]; + + // Assume Linear Grid + trans->nsmax=(2*trans->ndgl-1)/2; +} + + +void read_rgpg(struct Trans_t* trans, double* rgpg[], int* nfrom[], int* nfld ) +{ + int i; + int jfld; + if( trans->myproc == 1 ) printf("read_rpgp ...\n"); + *nfld = 2; + if( trans->myproc == 1 ) + { + *rgpg = malloc( sizeof(double) * (*nfld)*trans->ngptotg ); + for( i=0; ingptotg; ++i ) + { + (*rgpg)[0*trans->ngptotg + i] = 1.; // scalar field 1 + (*rgpg)[1*trans->ngptotg + i] = 2.; // scalar field 2 + } + } + *nfrom = malloc( sizeof(int) * (*nfld) ); + for (jfld=0; jfld<(*nfld); ++jfld) + (*nfrom)[jfld] = 1; + if( trans->myproc == 1 ) printf("read_rpgp ... done\n"); +} + +void write_rspecg(struct Trans_t* trans, double* rspecg[], int nfld ) +{ + int i; + if( trans->myproc == 1 ) printf("write_rspecg ...\n"); + for( i=0; inspec2g; ++i ) + { + // output global fields rspecg[i][0:nfld-1] + } + if( trans->myproc == 1 ) printf("write_rspecg ... done\n"); +} + diff --git a/tests/test_install/transi_sptogp.c b/tests/test_install/transi_sptogp.c new file mode 100644 index 0000000..0b1dc15 --- /dev/null +++ b/tests/test_install/transi_sptogp.c @@ -0,0 +1,165 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "ectrans/transi.h" + +#define TRANS_CHECK( CALL ) do {\ + int errcode = CALL;\ + if( errcode != TRANS_SUCCESS) {\ + printf("ERROR: %s failed @%s:%d:\n%s\n",#CALL,__FILE__,__LINE__,trans_error_msg(errcode));\ + abort();\ + }\ +} while(0) + +/*! @example transi_sptogp.c + * + * Transform spectral to gridpoint + * + * This is an example of how to setup and + * transform global spectral data to + * global gridpoint data + */ + +// Following dummy functions are implementation details +// that don't contribute to this example. They could be +// replaced with grib_api functionality +void read_grid( struct Trans_t* trans ); +void read_rspecg( struct Trans_t* trans, double* rspecg[], int* nfrom[], int* nfld ); +void write_rgpg( struct Trans_t* trans, double* rgpg[], int nfld ); + +int main ( int arc, char **argv ) +{ + trans_use_mpi(0); + int jfld; + struct Trans_t trans; + TRANS_CHECK(trans_new(&trans)); + + // Read resolution information + read_grid(&trans); + + // Register resolution in trans library + TRANS_CHECK( trans_setup(&trans) ); + + // Declare global spectral data + int nfld; + double* rspecg = NULL; + int* nfrom = NULL; + + // Read global spectral data (could be from grib file) + read_rspecg(&trans,&rspecg,&nfrom,&nfld); + + // Distribute data to all procs + double* rspec = malloc( sizeof(double) * nfld *trans.nspec2 ); + struct DistSpec_t distspec = new_distspec(&trans); + distspec.nfrom = nfrom; + distspec.rspecg = rspecg; + distspec.rspec = rspec; + distspec.nfld = nfld; + TRANS_CHECK(trans_distspec(&distspec)); + + + // Transform sp to gp fields + double* rgp = malloc( sizeof(double) * nfld*trans.ngptot ); + + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nfld; + invtrans.rspscalar = rspec; + invtrans.rgp = rgp; + TRANS_CHECK( trans_invtrans(&invtrans) ); + + + // Gather all gridpoint fields + double* rgpg = NULL; + if( trans.myproc == 1 ) + rgpg = malloc( sizeof(double) * nfld*trans.ngptotg ); + + int* nto = malloc( sizeof(int) * nfld ); + for( jfld=0; jfldndgl=160; + trans->nloen=malloc(sizeof(int)*trans->ndgl); + for( i=0; indgl; i++) trans->nloen[i] = trans->ndgl*2; + + // Assume Linear Grid + trans->nsmax=1279; + //(2*trans->ndgl-1)/2; +} + + +void read_rspecg(struct Trans_t* trans, double* rspecg[], int* nfrom[], int* nfld ) +{ + int i; + int jfld; + if( trans->myproc == 1 ) printf("read_rspecg ...\n"); + *nfld = 2; + if( trans->myproc == 1 ) + { + *rspecg = malloc( sizeof(double) * (*nfld)*trans->nspec2g ); + for( i=0; inspec2g; ++i ) + { + (*rspecg)[i*(*nfld) + 0] = (i==0 ? 1. : 0.); // scalar field 1 + (*rspecg)[i*(*nfld) + 1] = (i==0 ? 2. : 0.); // scalar field 2 + } + } + *nfrom = malloc( sizeof(int) * (*nfld) ); + for (jfld=0; jfld<(*nfld); ++jfld) + (*nfrom)[jfld] = 1; + if( trans->myproc == 1 ) printf("read_rspecg ... done\n"); +} + +void write_rgpg(struct Trans_t* trans, double* rgpg[], int nfld ) +{ + int jfld; + if( trans->myproc == 1 ) printf("write_rgpg ...\n"); + for( jfld=0; jfldmyproc == 1 ) printf("write_rgpg ... done\n"); +} + diff --git a/tests/trans/test_adjoint.F90 b/tests/trans/test_adjoint.F90 new file mode 100644 index 0000000..3a0e824 --- /dev/null +++ b/tests/trans/test_adjoint.F90 @@ -0,0 +1,365 @@ +! (C) Copyright 2005- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +PROGRAM TEST_ADJOINT +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_INIT, MPL_END, MPL_BARRIER, MPL_MYRANK +USE ABORT_TRANS_MOD ! This is not really correct usage +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM) :: NSMAX,NDGL,NPROC,NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NREP +INTEGER(KIND=JPIM) :: IERROR,NOUT,MYPROC,NSPECG,NSPEC2G,NGPTOTG +INTEGER(KIND=JPIM) :: NFLEV,NFLEVG +INTEGER(KIND=JPIM) :: NSPEC2,NGPTOT,NPROMA,NGPBLKS,MYSETV,NUMP +INTEGER(KIND=JPIM) :: IVSET(1000) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLOEN(:),ITO(:),MYMS(:),NASM0(:),IPRCIDS(:) +INTEGER(KIND=JPIM) :: JLEV,JREP,JROC + +CHARACTER*6 CLNAME + +REAL(KIND=JPRB) , ALLOCATABLE :: ZSPECX(:,:),ZSPECY(:,:),ZSPECP(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZVORX(:,:),ZVORY(:,:),ZVORP(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZDIVX(:,:),ZDIVY(:,:),ZDIVP(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZGX(:,:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZSPECYG(:,:),ZSPECXG(:,:) +REAL(KIND=JPRB) , ALLOCATABLE :: ZNORM(:),GW(:),ZRANDSP(:),ZRANDGP(:,:),ZPERT(:,:,:) +REAL(KIND=JPRB) :: ZSC,ZWALL,ZWALL0 +REAL(KIND=JPRB),EXTERNAL :: TIMEF +LOGICAL :: LSTATS,LSTATSCPU,LSYNCSTATS,LDETAILED_STATS,& + &LSTATS_OMP, LSTATS_COMMS,& + <RACE_STATS, LBARRIER_STATS, LBARRIER_STATS2,& + &LSTATS_ALLOC, LSTATS_MEM, LXML_STATS, LMPOFF +INTEGER(KIND=JPIM) ::NTRACE_STATS, NPRNT_STATS, NSTATS_MEM +REAL(KIND=JPRB) :: ZAVEAVE(0:200) + +#include "setup_trans0.h" +#include "setup_trans.h" +#include "trans_inq.h" +#include "specnorm.h" +#include "dir_trans.h" +#include "inv_trans.h" +#include "dir_transad.h" +#include "inv_transad.h" +#include "dist_grid.h" +#include "gath_spec.h" +#include "dist_spec.h" +#include "gath_grid.h" +#include "gstats_setup.intfb.h" + +! ======== PARAMTERS WHICH MAY BE MODIFIED, ORIGINALLY COMING FROM NAMELSIT =========== +NDGL = 32 +NFLEVG = 9 +NPROMA = 8 +NPRGPNS = 1 +NPRGPEW = 1 ! NPROC must match NPRGPNS * NPRGPEW +NPRTRW = 1 +NPRTRV = 1 +NREP = 1 +! ====================================================================================== + +LSTATS = .TRUE. +LSTATSCPU = .FALSE. +LSYNCSTATS = .TRUE. +LDETAILED_STATS = .FALSE. +LBARRIER_STATS = .FALSE. +LBARRIER_STATS2 = .FALSE. +LSTATS_ALLOC = .FALSE. +LSTATS_MEM = .FALSE. +LTRACE_STATS = .FALSE. +LXML_STATS = .FALSE. +NTRACE_STATS = 0 +NPRNT_STATS = 3 +NSTATS_MEM = 0 +NPROC = NPRGPNS*NPRGPEW +IF(NPROC /= NPRTRW*NPRTRV) THEN + PRINT *,'NPRGPNS,NPRGPEW,NPRTRW,NPRTRV ',NPRGPNS,NPRGPEW,NPRTRW,NPRTRV + CALL ABORT_TRANS('NPRGPNS*NPRGPEW /= NPRTRW*NPRTRV') +ENDIF +NSMAX = (2*NDGL-1)/3 +NSPECG = (NSMAX+1)*(NSMAX+2)/2 +NSPEC2G = NSPECG*2 +IF(NPROC > 1 ) THEN + CALL MPL_INIT + MYPROC = MPL_MYRANK() + NOUT = 20 + WRITE(CLNAME,'(A,I2.2)') 'OUT.',MYPROC + OPEN(NOUT,FILE=CLNAME) + LMPOFF = .FALSE. +ELSE + LMPOFF = .TRUE. + NOUT = 6 + MYPROC = 1 +ENDIF +ALLOCATE(IPRCIDS(NPROC)) +DO JROC=1,NPROC + IPRCIDS(JROC) = JROC +ENDDO +WRITE(NOUT,*) ' NSMAX= ',NSMAX +WRITE(NOUT,*) ' NDGL=',NDGL +WRITE(NOUT,*) ' LMPOFF= ',LMPOFF + +MYSETV = MOD(MYPROC-1,NPRTRV)+1 +ALLOCATE(ZSPECYG(NFLEVG,NSPEC2G)) +ALLOCATE(ZSPECXG(NFLEVG,NSPEC2G)) +ALLOCATE(ZRANDSP(NSPEC2G)) +ALLOCATE(NLOEN(NDGL)) +ALLOCATE(ZNORM(NFLEVG)) +ALLOCATE(ITO(NFLEVG)) +NLOEN(:) = 2*NDGL +CALL SETUP_TRANS0(KOUT=NOUT,KERR=0,KPRINTLEV=0,KMAX_RESOL=1,& + & KPRGPNS=NPRGPNS,KPRGPEW=NPRGPEW,KPRTRW=NPRTRW,LDMPOFF=LMPOFF) +CALL SETUP_TRANS(KSMAX=NSMAX,KDGL=NDGL,KLOEN=NLOEN,LDSPLIT=.TRUE.) +CALL TRANS_INQ(KSPEC2=NSPEC2,KGPTOT=NGPTOT,KGPTOTG=NGPTOTG,KNUMP=NUMP) + +ALLOCATE(MYMS(NUMP)) +ALLOCATE(NASM0(0:NSMAX)) +ALLOCATE(GW(NDGL)) +ALLOCATE(ZRANDGP(NGPTOTG,1)) +CALL TRANS_INQ(KMYMS=MYMS,KASM0=NASM0,PGW=GW) +NGPBLKS = (NGPTOT-1)/NPROMA+1 + +WRITE(NOUT,*) ' NSPEC2=',NSPEC2 +WRITE(NOUT,*) ' NGPTOT=',NGPTOT + +NFLEV = 0 +DO JLEV=1,NFLEVG + IVSET(JLEV) = MOD(JLEV,NPRTRV)+1 + IF(IVSET(JLEV) == MYSETV) THEN + NFLEV = NFLEV+1 + ENDIF +ENDDO +WRITE(NOUT,*)' NFLEV=',NFLEV +WRITE(NOUT,*) 'SETUP FINISHED' +CALL FLUSH(NOUT) + +ALLOCATE(ZSPECX(NFLEV,NSPEC2)) +ALLOCATE(ZSPECY(NFLEV,NSPEC2)) +ALLOCATE(ZSPECP(NFLEV,NSPEC2)) +ALLOCATE(ZVORX(NFLEV,NSPEC2)) +ALLOCATE(ZVORY(NFLEV,NSPEC2)) +ALLOCATE(ZVORP(NFLEV,NSPEC2)) +ALLOCATE(ZDIVX(NFLEV,NSPEC2)) +ALLOCATE(ZDIVY(NFLEV,NSPEC2)) +ALLOCATE(ZDIVP(NFLEV,NSPEC2)) +ALLOCATE(ZGX(NPROMA,3*NFLEVG,NGPBLKS)) +ALLOCATE(ZPERT(NPROMA,1,NGPBLKS)) + +! Prepare perturbations + +IF(MYPROC == 1) THEN + DO JLEV=1,NFLEVG + CALL RANDOM_NUMBER(ZRANDSP) + ZSPECYG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + CALL RANDOM_NUMBER(ZRANDSP) + ZSPECXG(JLEV,:) = (1.0_JPRB-2.0_JPRB*ZRANDSP(:)) + ENDDO +ENDIF +CALL RANDOM_NUMBER(ZRANDGP) +ZRANDGP(:,:) = (1.0_JPRB-2.0_JPRB*ZRANDGP(:,:)) +ITO(:) = 1 +ZVORX = 0.0_JPRB +ZVORY = 0.0_JPRB +ZVORP = 0.0_JPRB +ZDIVX = 0.0_JPRB +ZDIVY = 0.0_JPRB +ZDIVP = 0.0_JPRB + +! Distribute perturbations + +CALL DIST_GRID(PGPG=ZRANDGP,KFDISTG=1,KFROM=ITO,PGP=ZPERT,KPROMA=NPROMA) +CALL DIST_SPEC(PSPECG=ZSPECXG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZSPECX,& + & KVSET=IVSET(1:NFLEVG)) + +!ZWALL=TIMEF() +!ZWALL0=ZWALL +CALL GSTATS(0,0) +CALL GSTATS_SETUP(NPROC,MYPROC,IPRCIDS,& + &LSTATS,LSTATSCPU,LSYNCSTATS,LDETAILED_STATS,LBARRIER_STATS,LBARRIER_STATS2,& + &LSTATS_OMP,LSTATS_COMMS,LSTATS_MEM,NSTATS_MEM,LSTATS_ALLOC,& + <RACE_STATS,NTRACE_STATS,NPRNT_STATS,LXML_STATS) + +CALL GSTATS_PSUT +DO JREP=1,NREP + + CALL DIST_SPEC(PSPECG=ZSPECYG,KFDISTG=NFLEVG,KFROM=ITO,PSPEC=ZSPECY,& + & KVSET=IVSET(1:NFLEVG)) +! Direct caclulations + + CALL INV_TRANS(PSPSCALAR=ZSPECX,PSPVOR=ZVORX,PSPDIV=ZDIVX,PGP=ZGX,& + & KPROMA=NPROMA,KVSETSC=IVSET(1:NFLEVG), KVSETUV=IVSET(1:NFLEVG)) + CALL GPC(ZGX) + CALL DIR_TRANS(PSPSCALAR=ZSPECP,PSPVOR=ZVORP,PSPDIV=ZDIVP,PGP=ZGX,& + &KPROMA=NPROMA,KVSETSC=IVSET(1:NFLEVG), KVSETUV=IVSET(1:NFLEVG)) + CALL SCALPRODSP(ZSPECP,ZSPECY,ZSC) + IF(JREP == NREP) WRITE(NOUT,*)' ',ZSC + +! Adjoint + + ZSPECP = 0.0_JPRB + CALL DIR_TRANSAD(PSPSCALAR=ZSPECY,PSPVOR=ZVORY,PSPDIV=ZDIVY,PGP=ZGX,& + &KPROMA=NPROMA, KVSETSC=IVSET(1:NFLEVG), KVSETUV=IVSET(1:NFLEVG)) + CALL GPCAD(ZGX) + CALL INV_TRANSAD(PSPSCALAR=ZSPECP,PSPVOR=ZVORP,PSPDIV=ZDIVP,PGP=ZGX,& + & KPROMA=NPROMA,KVSETSC=IVSET(1:NFLEVG),KVSETUV=IVSET(1:NFLEVG)) + + CALL SCALPRODSP(ZSPECX,ZSPECP,ZSC) + IF(JREP == NREP) WRITE(NOUT,*)' ',ZSC +! ZWALL=TIMEF() +! IF(MYPROC == 1) WRITE(0,*) 'Time spent in loop(i)=',jrep,(zwall-zwall0)*0.001 +! ZWALL0 = ZWALL +ENDDO +CALL GSTATS(0,1) + +!ZWALL=TIMEF() +!WRITE(NOUT,*) 'Time spent in main loop=',zwall*0.001 +!IF(MYPROC == 1) WRITE(0,*) 'Time spent in main loop=',zwall*0.001 + +CALL GSTATS_PRINT(NOUT,ZAVEAVE,200) +IF(NPROC > 1 ) THEN + CALL MPL_BARRIER() + CALL MPL_END +ENDIF + +CONTAINS + +SUBROUTINE GPC(PGP) + +! Grid-point computations + +REAL(KIND=JPRB) :: PGP(:,:,:) + +INTEGER(KIND=JPIM) :: JLEV,JKGLO,JROF,IEND,IBL,IOFF,IROF,JGL,JL + +CALL GSTATS(108,0) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JLEV,JKGLO,IEND,IOFF,IBL,JROF) +DO JLEV=1,NFLEVG + DO JKGLO=1,NGPTOT,NPROMA + IEND = MIN(NPROMA,NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/NPROMA+1 + DO JROF=1,IEND + PGP(JROF,JLEV,IBL) = PGP(JROF,JLEV,IBL)*ZPERT(JROF,1,IBL) + ENDDO + ENDDO +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(108,1) + + +END SUBROUTINE GPC + +SUBROUTINE GPCAD(PGP) + +! Adjoint of GPC + +REAL(KIND=JPRB) :: PGP(:,:,:) +INTEGER(KIND=JPIM) :: JLEV,JKGLO,JROF,IEND,IBL,IOFF,IROF,JGL,JL + +CALL GSTATS(135,0) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JLEV,JKGLO,IEND,IOFF,IBL,JROF) +DO JLEV=1,NFLEVG + DO JKGLO=1,NGPTOT,NPROMA + IEND = MIN(NPROMA,NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/NPROMA+1 + DO JROF=1,IEND + PGP(JROF,JLEV,IBL) = PGP(JROF,JLEV,IBL)*ZPERT(JROF,1,IBL) + ENDDO + ENDDO +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(135,1) + + +END SUBROUTINE GPCAD + +SUBROUTINE SCALPRODSP(PSP1,PSP2,PSC) + +! Scalar product in spectral space +REAL(KIND=JPRB) :: PSP1(:,:),PSP2(:,:) +REAL(KIND=JPRB) :: PSC + +INTEGER(KIND=JPIM) :: JMLOC,IM,JIR,JN,INM,JLEV +REAL(KIND=JPRB) :: ZMFACT,ZSP(NFLEV,NSPEC2),ZSPG(NFLEVG,NSPEC2G) + +PSC = 0.0_JPRB +ZSP(:,:) = 0.0_JPRB + +CALL GSTATS(110,0) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JLEV,JMLOC,IM,ZMFACT,JIR,JN,INM) +DO JLEV=1,NFLEV + DO JMLOC=1,NUMP + IM = MYMS(JMLOC) + ZMFACT=1.0_JPRB+REAL(MIN(1,IM),JPRB) + DO JIR=0,MIN(1,IM) + DO JN=IM,NSMAX + INM = NASM0(IM)+(JN-IM)*2+JIR + ZSP(JLEV,INM) = PSP1(JLEV,INM)*PSP2(JLEV,INM)*ZMFACT/2.0_JPRB + ENDDO + ENDDO + ENDDO +ENDDO +!$OMP END PARALLEL DO + +CALL GATH_SPEC(PSPECG=ZSPG,KFGATHG=NFLEVG,KTO=ITO,PSPEC=ZSP,& + & KVSET=IVSET(1:NFLEVG)) + +IF(MYPROC == 1) THEN + PSC = SUM(ZSPG) +ELSE + PSC = 0.0_JPRB +ENDIF +CALL GSTATS(110,1) + +END SUBROUTINE SCALPRODSP + +SUBROUTINE SCALPRODGP(PGP1,PGP2,PSC) + +!Scalar product in gridpoint space + +REAL(KIND=JPRB) :: PGP1(:,:,:),PGP2(:,:,:) +REAL(KIND=JPRB) :: PSC + +INTEGER(KIND=JPIM) :: JLEV,JKGLO,JROF,IEND,IBL,IOFF,IROF,JGL,JL +REAL(KIND=JPRB) :: ZGP(NPROMA,NFLEVG,NGPBLKS),ZGPG(NGPTOTG,NFLEVG),ZWTG(NGPTOTG,1),ZWT(NPROMA,1,NGPBLKS) + +PSC = 0.0_JPRB +ZGP(:,:,:) = 0.0_JPRB + +IROF=0 +DO JGL=1,NDGL + DO JL=1,NLOEN(JGL) + IROF=IROF+1 + ZWTG(IROF,1) = 1.0_JPRB + ENDDO +ENDDO +CALL DIST_GRID(PGPG=ZWTG,KFDISTG=1,KFROM=ITO,PGP=ZWT,KPROMA=NPROMA) + +DO JLEV=1,NFLEV + DO JKGLO=1,NGPTOT,NPROMA + IEND = MIN(NPROMA,NGPTOT-JKGLO+1) + IOFF = JKGLO-1 + IBL = (JKGLO-1)/NPROMA+1 + DO JROF=1,IEND + ZGP(JROF,JLEV,IBL) = PGP1(JROF,JLEV,IBL)*PGP2(JROF,JLEV,IBL)*& + &ZWT(JROF,1,IBL) + ENDDO + ENDDO +ENDDO + +CALL GATH_GRID(PGPG=ZGPG,KFGATHG=NFLEVG,KTO=ITO,PGP=ZGP,KPROMA=NPROMA) + +PSC = SUM(ZGPG) + +END SUBROUTINE SCALPRODGP + +END PROGRAM TEST_ADJOINT diff --git a/tests/transi/transi_test.c b/tests/transi/transi_test.c new file mode 100644 index 0000000..48b859f --- /dev/null +++ b/tests/transi/transi_test.c @@ -0,0 +1,159 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include + +#include "transi_test.h" + +// ----------------------------------------------------------------------------- +// IMPLEMENTATIONS + +int test_use_mpi() { + if ( getenv( "TRANS_USE_MPI" ) ) { return atoi( getenv( "TRANS_USE_MPI" ) ); } + return 1; +} + +double transi_test_time() +{ + static double time_init = -1; + double time_in_secs; + struct timeval tbuf; + if (gettimeofday(&tbuf,NULL) == -1) perror("transi_test_time"); + if (time_init == -1) time_init = + (double) tbuf.tv_sec + (tbuf.tv_usec / 1000000.0); + time_in_secs = + (double) tbuf.tv_sec + (tbuf.tv_usec / 1000000.0) - time_init; + return time_in_secs; +} + +void print_time(const char* str,double elapsed) +{ + int msec = elapsed * 1000; + printf("%s%d seconds %d milliseconds\n",str, msec/1000, msec%1000); +} + +#ifdef TRANSI_HAVE_MEMORY +#include +#endif + +void display_mallinfo(void) +{ +#ifdef TRANSI_HAVE_MEMORY + struct mallinfo mi; + + mi = mallinfo(); + + printf("Total non-mmapped bytes (arena): %d\n", mi.arena); + printf("# of free chunks (ordblks): %d\n", mi.ordblks); + printf("# of free fastbin blocks (smblks): %d\n", mi.smblks); + printf("# of mapped regions (hblks): %d\n", mi.hblks); + printf("Bytes in mapped regions (hblkhd): %d\n", mi.hblkhd); + printf("Max. total allocated space (usmblks): %d\n", mi.usmblks); + printf("Free bytes held in fastbins (fsmblks): %d\n", mi.fsmblks); + printf("Total allocated space (uordblks): %d\n", mi.uordblks); + printf("Total free space (fordblks): %d\n", mi.fordblks); + printf("Topmost releasable block (keepcost): %d\n", mi.keepcost); +#endif +} + +int allocated() +{ +#ifdef TRANSI_HAVE_MEMORY + struct mallinfo mi; + mi = mallinfo(); + return mi.hblkhd + mi.uordblks; +#else + return 0; +#endif +} + +void print_mem(const char* str,const int bytes) +{ +#ifdef TRANSI_HAVE_MEMORY + float B, KB, MB; + B = bytes; + KB = B/1024.; + MB = KB/1024.; + if( MB > 0.1 ) + printf("%s%f MB\n",str,MB); + else + printf("%s%f KB\n",str,KB); +#endif +} + + +void set_standard_rgg(struct Trans_t* trans, int N, int T) +{ + if( N==48 ) // TL95 + { + int nloen[] = {20,25,36,40,45,50,60,60,72,75,80,90,96,100,108,120,120,120,128,135,144,144,160,160,160,160,160,180,180,180,180,180,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,180,180,180,180,180,160,160,160,160,160,144,144,135,128,120,120,120,108,100,96,90,80,75,72,60,60,50,45,40,36,25,20}; + trans_set_resol(trans,sizeof(nloen)/sizeof(int),nloen); + if( T<0 ) + trans_set_trunc(trans,95); + else + trans_set_trunc(trans,T); + return; + } + + if( N==80 ) // TL159 + { + int nloen[] = {18,25,36,40,45,54,60,64,72,72,80,90,96,100,108,120,120,128,135,144,144,150,160,160,180,180,180,192,192,200,200,216,216,216,225,225,240,240,240,256,256,256,256,288,288,288,288,288,288,288,288,288,300,300,300,300,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,300,300,300,300,288,288,288,288,288,288,288,288,288,256,256,256,256,240,240,240,225,225,216,216,216,200,200,192,192,180,180,180,160,160,150,144,144,135,128,120,120,108,100,96,90,80,72,72,64,60,54,45,40,36,25,18}; + trans_set_resol(trans,sizeof(nloen)/sizeof(int),nloen); + if( T<0 ) + trans_set_trunc(trans,159); + else + trans_set_trunc(trans,T); + return; + } + + if( N==128 ) // TL255 + { + int nloen[] = {18,25,36,40,45,50,60,64,72,72,80,90,90,100,108,120,120,125,128,144,144,150,160,160,180,180,180,192,192,200,216,216,216,225,240,240,240,250,250,256,270,270,288,288,288,300,300,320,320,320,320,324,360,360,360,360,360,360,360,375,375,375,375,384,384,400,400,400,400,405,432,432,432,432,432,432,432,450,450,450,450,450,480,480,480,480,480,480,480,480,480,480,486,486,486,500,500,500,500,500,500,500,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,500,500,500,500,500,500,500,486,486,486,480,480,480,480,480,480,480,480,480,480,450,450,450,450,450,432,432,432,432,432,432,432,405,400,400,400,400,384,384,375,375,375,375,360,360,360,360,360,360,360,324,320,320,320,320,300,300,288,288,288,270,270,256,250,250,240,240,240,225,216,216,216,200,192,192,180,180,180,160,160,150,144,144,128,125,120,120,108,100,90,90,80,72,72,64,60,50,45,40,36,25,18}; + trans_set_resol(trans,sizeof(nloen)/sizeof(int),nloen); + if( T<0 ) + trans_set_trunc(trans,255); + else + trans_set_trunc(trans,T); + return; + } + + if( N==256 ) // TL511 + { + int nloen[] = {18,25,32,40,45,50,60,64,72,72,75,81,90,96,100,108,120,120,125,135,144,150,160,160,180,180,180,192,192,200,216,216,216,225,240,240,243,250,256,270,270,288,288,288,300,300,320,320,320,324,360,360,360,360,360,360,375,375,384,384,400,400,400,432,432,432,432,432,450,450,450,480,480,480,480,480,486,500,500,500,512,512,540,540,540,540,540,576,576,576,576,576,576,600,600,600,600,600,640,640,640,640,640,640,640,640,648,675,675,675,675,675,675,720,720,720,720,720,720,720,720,720,729,729,750,750,750,750,750,768,768,768,768,800,800,800,800,800,800,800,800,810,810,864,864,864,864,864,864,864,864,864,864,864,864,864,864,900,900,900,900,900,900,900,900,900,900,900,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,972,972,972,972,972,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,972,972,972,972,972,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,900,900,900,900,900,900,900,900,900,900,900,864,864,864,864,864,864,864,864,864,864,864,864,864,864,810,810,800,800,800,800,800,800,800,800,768,768,768,768,750,750,750,750,750,729,729,720,720,720,720,720,720,720,720,720,675,675,675,675,675,675,648,640,640,640,640,640,640,640,640,600,600,600,600,600,576,576,576,576,576,576,540,540,540,540,540,512,512,500,500,500,486,480,480,480,480,480,450,450,450,432,432,432,432,432,400,400,400,384,384,375,375,360,360,360,360,360,360,324,320,320,320,300,300,288,288,288,270,270,256,250,243,240,240,225,216,216,216,200,192,192,180,180,180,160,160,150,144,135,125,120,120,108,100,96,90,81,75,72,72,64,60,50,45,40,32,25,18}; + trans_set_resol(trans,sizeof(nloen)/sizeof(int),nloen); + if( T<0 ) + trans_set_trunc(trans,511); + else + trans_set_trunc(trans,T); + return; + } + + if( N==640 ) // TL1279 + { + int nloen[] = {18,25,32,40,45,50,60,60,72,72,75,81,90,90,96,100,108,120,120,125,135,144,150,160,160,180,180,180,192,192,200,216,216,216,225,240,240,243,250,256,270,270,288,288,288,300,300,320,320,320,360,360,360,360,360,360,375,375,384,384,400,400,400,432,432,432,432,450,450,450,480,480,480,480,480,486,500,500,512,512,540,540,540,540,540,576,576,576,576,576,600,600,600,600,640,640,640,640,640,640,640,648,675,675,675,675,720,720,720,720,720,720,720,720,729,750,750,750,750,768,768,768,800,800,800,800,800,810,810,864,864,864,864,864,864,864,864,900,900,900,900,900,900,960,960,960,960,960,960,960,960,960,960,960,972,972,1000,1000,1000,1000,1000,1024,1024,1024,1024,1080,1080,1080,1080,1080,1080,1080,1080,1080,1125,1125,1125,1125,1125,1125,1125,1125,1152,1152,1152,1152,1152,1200,1200,1200,1200,1200,1200,1200,1200,1215,1215,1215,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1296,1296,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1458,1458,1458,1458,1500,1500,1500,1500,1500,1500,1500,1500,1536,1536,1536,1536,1536,1536,1536,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1620,1620,1620,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1920,1920,1920,1920,1920,1920,1920,1920,1920,1920,1920,1944,1944,1944,1944,1944,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2025,2025,2025,2025,2025,2025,2048,2048,2048,2048,2048,2048,2048,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2187,2187,2187,2187,2187,2187,2187,2187,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2430,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2187,2187,2187,2187,2187,2187,2187,2187,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2048,2048,2048,2048,2048,2048,2048,2025,2025,2025,2025,2025,2025,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,1944,1944,1944,1944,1944,1920,1920,1920,1920,1920,1920,1920,1920,1920,1920,1920,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1620,1620,1620,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1536,1536,1536,1536,1536,1536,1536,1500,1500,1500,1500,1500,1500,1500,1500,1458,1458,1458,1458,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1296,1296,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1215,1215,1215,1200,1200,1200,1200,1200,1200,1200,1200,1152,1152,1152,1152,1152,1125,1125,1125,1125,1125,1125,1125,1125,1080,1080,1080,1080,1080,1080,1080,1080,1080,1024,1024,1024,1024,1000,1000,1000,1000,1000,972,972,960,960,960,960,960,960,960,960,960,960,960,900,900,900,900,900,900,864,864,864,864,864,864,864,864,810,810,800,800,800,800,800,768,768,768,750,750,750,750,729,720,720,720,720,720,720,720,720,675,675,675,675,648,640,640,640,640,640,640,640,600,600,600,600,576,576,576,576,576,540,540,540,540,540,512,512,500,500,486,480,480,480,480,480,450,450,450,432,432,432,432,400,400,400,384,384,375,375,360,360,360,360,360,360,320,320,320,300,300,288,288,288,270,270,256,250,243,240,240,225,216,216,216,200,192,192,180,180,180,160,160,150,144,135,125,120,120,108,100,96,90,90,81,75,72,72,60,60,50,45,40,32,25,18}; + trans_set_resol(trans,sizeof(nloen)/sizeof(int),nloen); + if( T<0 ) + trans_set_trunc(trans,1279); + else + trans_set_trunc(trans,T); + return; + } + + if( N==1024 ) // TL2047 + { + int nloen[] = {18,25,32,40,45,50,60,64,72,72,75,81,90,96,96,108,108,120,120,125,125,135,144,150,160,160,180,180,180,192,192,200,216,216,225,225,240,240,243,250,256,270,270,288,288,288,300,300,320,320,320,360,360,360,360,360,360,375,375,384,384,400,400,405,432,432,432,432,450,450,450,480,480,480,480,480,486,500,500,512,512,540,540,540,540,576,576,576,576,576,576,600,600,600,600,625,625,625,625,640,640,648,675,675,675,675,675,720,720,720,720,720,720,720,729,750,750,750,750,768,768,800,800,800,800,800,800,810,864,864,864,864,864,864,864,864,864,900,900,900,900,900,900,960,960,960,960,960,960,960,960,960,972,972,1000,1000,1000,1000,1000,1024,1024,1024,1024,1080,1080,1080,1080,1080,1080,1080,1080,1080,1125,1125,1125,1125,1125,1125,1125,1152,1152,1152,1152,1152,1200,1200,1200,1200,1200,1200,1200,1215,1215,1215,1250,1250,1250,1250,1250,1250,1280,1280,1280,1280,1280,1296,1296,1350,1350,1350,1350,1350,1350,1350,1350,1350,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1458,1458,1458,1500,1500,1500,1500,1500,1500,1500,1500,1536,1536,1536,1536,1536,1536,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1620,1620,1620,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1920,1920,1920,1920,1920,1920,1920,1920,1944,1944,1944,1944,1944,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,2025,2025,2025,2025,2048,2048,2048,2048,2048,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2187,2187,2187,2187,2187,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2430,2430,2430,2430,2430,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2592,2592,2592,2592,2592,2592,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2916,2916,2916,2916,2916,2916,2916,2916,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3240,3240,3240,3240,3240,3240,3240,3240,3240,3240,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4050,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3888,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3840,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3750,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3645,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3600,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3456,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3375,3240,3240,3240,3240,3240,3240,3240,3240,3240,3240,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3200,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3125,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3072,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,3000,2916,2916,2916,2916,2916,2916,2916,2916,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2880,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2700,2592,2592,2592,2592,2592,2592,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2560,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2500,2430,2430,2430,2430,2430,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2400,2304,2304,2304,2304,2304,2304,2304,2304,2304,2304,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2250,2187,2187,2187,2187,2187,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160,2048,2048,2048,2048,2048,2025,2025,2025,2025,2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,1944,1944,1944,1944,1944,1920,1920,1920,1920,1920,1920,1920,1920,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1875,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1800,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1728,1620,1620,1620,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1600,1536,1536,1536,1536,1536,1536,1500,1500,1500,1500,1500,1500,1500,1500,1458,1458,1458,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1440,1350,1350,1350,1350,1350,1350,1350,1350,1350,1296,1296,1280,1280,1280,1280,1280,1250,1250,1250,1250,1250,1250,1215,1215,1215,1200,1200,1200,1200,1200,1200,1200,1152,1152,1152,1152,1152,1125,1125,1125,1125,1125,1125,1125,1080,1080,1080,1080,1080,1080,1080,1080,1080,1024,1024,1024,1024,1000,1000,1000,1000,1000,972,972,960,960,960,960,960,960,960,960,960,900,900,900,900,900,900,864,864,864,864,864,864,864,864,864,810,800,800,800,800,800,800,768,768,750,750,750,750,729,720,720,720,720,720,720,720,675,675,675,675,675,648,640,640,625,625,625,625,600,600,600,600,576,576,576,576,576,576,540,540,540,540,512,512,500,500,486,480,480,480,480,480,450,450,450,432,432,432,432,405,400,400,384,384,375,375,360,360,360,360,360,360,320,320,320,300,300,288,288,288,270,270,256,250,243,240,240,225,225,216,216,200,192,192,180,180,180,160,160,150,144,135,125,125,120,120,108,108,96,96,90,81,75,72,72,64,60,50,45,40,32,25,18}; + trans_set_resol(trans,sizeof(nloen)/sizeof(int),nloen); + if( T<0 ) + trans_set_trunc(trans,2047); + else + trans_set_trunc(trans,T); + return; + } +} diff --git a/tests/transi/transi_test.h b/tests/transi/transi_test.h new file mode 100644 index 0000000..e174327 --- /dev/null +++ b/tests/transi/transi_test.h @@ -0,0 +1,57 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#ifndef TRANSI_TEST_H +#define TRANSI_TEST_H + +#include +#include + +#include "ectrans/transi.h" + +#define TRANS_CHECK( CALL ) do {\ + int errcode = CALL;\ + if( errcode != TRANS_SUCCESS) {\ + printf("ERROR: %s failed @%s:%d:\n%s\n",#CALL,__FILE__,__LINE__,trans_error_msg(errcode));\ + exit(1);\ + }\ +} while(0) + +#define TRANS_CHECK_ERROR( CALL, ERR ) do {\ + int errcode = CALL;\ + if( errcode != ERR ) {\ + printf("ERROR: %s should fail with errcode %s(%d) @%s:%d:\n%s\n",#CALL,#ERR,ERR,__FILE__,__LINE__,trans_error_msg(errcode));\ + exit(1);\ + }\ +} while(0) + +#define ASSERT( assertion ) do {\ + if( !(assertion) ) {\ + printf("ERROR: Assertion `%s' failed @%s:%d\n",#assertion,__FILE__,__LINE__);\ + exit(1);\ + }\ +} while(0) + +#define TRANS_ERROR -1 +#define TRANS_NOTIMPL -2 +#define TRANS_MISSING_ARG -3 +#define TRANS_UNRECOGNIZED_ARG -4 +#define TRANS_STALE_ARG -5 + +double transi_test_time(); +void print_time(const char* str,double elapsed); +void display_mallinfo(void); +int allocated(); +void print_mem(const char* str,const int bytes); +void set_standard_rgg(struct Trans_t* trans, int N, int T); + +int test_use_mpi(); + +#endif diff --git a/tests/transi/transi_test_invtrans_adjoint.c b/tests/transi/transi_test_invtrans_adjoint.c new file mode 100644 index 0000000..e864659 --- /dev/null +++ b/tests/transi/transi_test_invtrans_adjoint.c @@ -0,0 +1,223 @@ +/* + * (C) British Crown Copyright 2021 Met Office + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#include +#include +#include + +#include "ectrans/transi.h" +#include "transi_test.h" + +// ---------------------------------------------------------------------------- + +void test_invtrans_adjoint(int nlon, int nlat, int nsmax) +{ +double adjoint_tol = 1.e-6; +printf("test_invtrans_adjoint( nlon=%d, nlat=%d, nsmax=%d )\n",nlon,nlat,nsmax); + +int nout = 2; + +struct Trans_t trans; +TRANS_CHECK( trans_new(&trans) ); + +int* nloen = malloc( sizeof(int) * nlat); +{ + int i; + for( i=0; i rspscalarg[fld=%d][wave=%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); + + } +} + +printf("trans_distspec()\n"); +// Distribute spectral field (for fun) +struct DistSpec_t distspec = new_distspec(&trans); + distspec.rspec = rspscalar; + distspec.rspecg = rspscalarg; + distspec.nfld = nscalar; + distspec.nfrom = nto; +TRANS_CHECK( trans_distspec(&distspec) ); + + +printf("trans_invtrans()\n"); +// Inverse Transform +struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; + invtrans.nvordiv = nvordiv; + invtrans.rspscalar = rspscalar; + invtrans.rspvor = rspvor; + invtrans.rspdiv = rspdiv; + invtrans.rgp = rgp; +TRANS_CHECK( trans_invtrans(&invtrans) ); + +printf("trans_gathgrid()\n"); +// Gather gridpoint fields +struct GathGrid_t gathgrid = new_gathgrid(&trans); + gathgrid.rgp = rgp; + gathgrid.rgpg = rgpg; + gathgrid.nto = nfrom; + gathgrid.nfld = nfld; +TRANS_CHECK( trans_gathgrid(&gathgrid) ); + + +double adj_value2 = 0.0; +if( trans.myproc == 1 ) +{ + int i,j; + for( j=0; j=2 ) + { + for( i=0; i +#include +#include + +#include "ectrans/transi.h" +#include "transi_test.h" + + +int read_bytes(const char* fp, void** buffer, size_t* size) +{ + FILE *fileptr; + fileptr = fopen(fp, "rb"); // Open the file in binary mode + fseek(fileptr, 0, SEEK_END); // Jump to the end of the file + *size = ftell(fileptr); // Get the current byte offset in the file + rewind(fileptr); // Jump back to the beginning of the file + + *buffer = (void *)malloc((*size+1)*sizeof(char)); // Enough memory for file + \0 + fread(*buffer, *size, 1, fileptr); // Read in the entire file + fclose(fileptr); + return 0; +} + + +void test_io() +{ + int N=80; + int T=-1; + struct Trans_t trans; + void* buffer; + size_t size; + const char* filepath = "TL159_lp"; + double start,end; + int mem; + + TRANS_CHECK( trans_new(&trans) ); + set_standard_rgg(&trans,N,T); + TRANS_CHECK( trans_set_write(&trans,filepath) ); + mem = allocated(); + start = transi_test_time(); + TRANS_CHECK( trans_setup(&trans) ); + end=transi_test_time(); + print_time("Timing rgg compute+write: ",end-start); + print_mem( "Alloc rgg compute+write: ",allocated()-mem); + TRANS_CHECK( trans_delete(&trans) ); + + + read_bytes(filepath,&buffer,&size); + print_mem( "Cache size:", size); + + TRANS_CHECK( trans_new(&trans) ); + set_standard_rgg(&trans,N,T); + TRANS_CHECK( trans_set_cache(&trans,buffer,size) ); + mem = allocated(); + start = transi_test_time(); + TRANS_CHECK( trans_setup(&trans) ); + print_time("Timing rgg use cache: ",transi_test_time()-start); + print_mem( "Alloc rgg use cache: ",allocated()-mem); + TRANS_CHECK( trans_delete(&trans) ); + + + TRANS_CHECK( trans_new(&trans) ); + set_standard_rgg(&trans,N,T); + TRANS_CHECK( trans_set_read(&trans,filepath) ); + mem = allocated(); + start = transi_test_time(); + TRANS_CHECK( trans_setup(&trans) ); + print_time("Timing rgg read: ",transi_test_time()-start); + print_mem( "Alloc rgg read: ",allocated()-mem); + TRANS_CHECK( trans_delete(&trans) ); + + free(buffer); +} + +void test_io_lonlat(int nlon, int nlat, int nsmax, int flt) +{ + printf("\nll.%dx%d --- T%d\n",nlon,nlat,nsmax); + struct Trans_t trans; + void* buffer = NULL; + size_t size; + char filepath[200]; + sprintf(filepath,"T%d_ll.%dx%d_flt%d",nsmax,nlon,nlat,flt); + double start; + int mem; + +// int nlon=1280; +// int nlat=641; +// int nsmax=639; + +// int nlon=640; +// int nlat=321; +// int nsmax=319; + +// int nlon=320; +// int nlat=161; +// int nsmax=159; + + bool lonlat=true; + int N = (nlat-1)/2; + +// int nlon=160; +// int nlat=81; +// int nsmax=79; + + int nscalar = 2; + int nvordiv = 1; + int nfld = 2*nvordiv+nscalar; + + double* rspscalar = NULL; + double* rspvor = NULL; + double* rspdiv = NULL; + double* rgp = NULL; + struct InvTrans_t invtrans; + + // --------------------------------------- + // Writing + // --------------------------------------- + int j; + for( j=0; j<1; ++j ) { + printf("Writing\n"); + + TRANS_CHECK( trans_new(&trans) ); + trans.flt = flt; + if( lonlat ) + { + TRANS_CHECK( trans_set_resol_lonlat(&trans,nlon,nlat) ); + TRANS_CHECK( trans_set_trunc(&trans,nsmax) ); + } + else + { + set_standard_rgg(&trans,N,nsmax); + } + TRANS_CHECK( trans_set_write(&trans,filepath) ); + mem = allocated(); + start = transi_test_time(); + TRANS_CHECK( trans_setup(&trans) ); + print_time("Timing lonlat compute+write: ",transi_test_time()-start); + print_mem ("Alloc lonlat compute+write: ",allocated()-mem); + + + if( nscalar && rspscalar == NULL ) + rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + if( nvordiv && rspvor == NULL && rspdiv == NULL ) { + rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + } + if( nfld && rgp == NULL ) + rgp = malloc( sizeof(double) * nfld * trans.ngptot ); + + printf("trans_invtrans()...\n"); + double start_transform = transi_test_time(); + invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; + invtrans.nvordiv = nvordiv; + invtrans.rspscalar = rspscalar; + invtrans.rspvor = rspvor; + invtrans.rspdiv = rspdiv; + invtrans.rgp = rgp; + TRANS_CHECK( trans_invtrans(&invtrans) ); + print_time("trans_invtrans()...done in ",transi_test_time()-start_transform); + + TRANS_CHECK( trans_delete(&trans) ); + } + + // --------------------------------------- + // Use Cache + // --------------------------------------- + if(1) { + printf("Use Cache\n"); + + read_bytes(filepath,&buffer,&size); + + print_mem( "Cache size:", size); + + TRANS_CHECK( trans_new(&trans) ); + trans.flt = flt; + if( lonlat ) + { + TRANS_CHECK( trans_set_resol_lonlat(&trans,nlon,nlat) ); + TRANS_CHECK( trans_set_trunc(&trans,nsmax) ); + } + else + { + set_standard_rgg(&trans,N,nsmax); + } + + TRANS_CHECK( trans_set_cache(&trans,buffer,size) ); + mem = allocated(); + start = transi_test_time(); + TRANS_CHECK( trans_setup(&trans) ); + print_time("Timing lonlat use cache: ",transi_test_time()-start); + print_mem ("Alloc lonlat use cache: ",allocated()-mem); + + if( nscalar && rspscalar == NULL ) + rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + if( nvordiv && rspvor == NULL && rspdiv == NULL ) { + rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + } + if( nfld && rgp == NULL ) + rgp = malloc( sizeof(double) * nfld * trans.ngptot ); + + printf("trans_invtrans()...\n"); + double start_transform = transi_test_time(); + invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; + invtrans.nvordiv = nvordiv; + invtrans.rspscalar = rspscalar; + invtrans.rspvor = rspvor; + invtrans.rspdiv = rspdiv; + invtrans.rgp = rgp; + TRANS_CHECK( trans_invtrans(&invtrans) ); + print_time("trans_invtrans()...done in ",transi_test_time()-start_transform); + + + TRANS_CHECK( trans_delete(&trans) ); + } + + // --------------------------------------- + // Reading + // --------------------------------------- + printf("Reading\n"); + TRANS_CHECK( trans_new(&trans) ); + trans.flt = flt; + if( lonlat ) + { + TRANS_CHECK( trans_set_resol_lonlat(&trans,nlon,nlat) ); + TRANS_CHECK( trans_set_trunc(&trans,nsmax) ); + } + else + { + set_standard_rgg(&trans,N,nsmax); + } + TRANS_CHECK( trans_set_read(&trans,filepath) ); + mem = allocated(); + start = transi_test_time(); + TRANS_CHECK( trans_setup(&trans) ); + print_time("Timing lonlat read: ",transi_test_time()-start); + print_mem ("Alloc lonlat read: ",allocated()-mem); + + if( nscalar && rspscalar == NULL ) + rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + if( nvordiv && rspvor == NULL && rspdiv == NULL ) { + rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + } + if( nfld && rgp == NULL ) + rgp = malloc( sizeof(double) * nfld * trans.ngptot ); + + printf("trans_invtrans()...\n"); + invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; + invtrans.nvordiv = nvordiv; + invtrans.rspscalar = rspscalar; + invtrans.rspvor = rspvor; + invtrans.rspdiv = rspdiv; + invtrans.rgp = rgp; + TRANS_CHECK( trans_invtrans(&invtrans) ); + printf("trans_invtrans()...done\n"); + + TRANS_CHECK( trans_delete(&trans) ); + + // --------------------------------------- + + + if( buffer ) free(buffer); + if( rgp ) free(rgp); + if( rspscalar ) free(rspscalar); + if( rspvor ) free(rspvor); + if( rspdiv ) free(rspdiv); + +} + +int main ( int arc, char **argv ) +{ + trans_use_mpi( test_use_mpi() ); + setbuf(stdout, NULL); + TRANS_CHECK( trans_init() ); + + test_io(); + + int flt = false; + test_io_lonlat(320,161,511,flt); + test_io_lonlat(320,161,159,flt); +// test_io_lonlat(2400,1201,799,flt); + + + TRANS_CHECK( trans_finalize() ); + + return 0; +} + diff --git a/tests/transi/transi_test_lonlat.c b/tests/transi/transi_test_lonlat.c new file mode 100644 index 0000000..e16c37a --- /dev/null +++ b/tests/transi/transi_test_lonlat.c @@ -0,0 +1,300 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "ectrans/transi.h" + +#include "transi_test.h" + +static bool check_values = false; + +// ---------------------------------------------------------------------------- + +void test_gptosptogp(int nlon, int nlat, int nsmax) +{ + double gptosp_tol = 1.e-6; + double sptogp_tol = 1.e-6; + printf("test_gptosptogp( nlon=%d, nlat=%d, nsmax=%d )\n",nlon,nlat,nsmax); + + int nout = 2; + + struct Trans_t trans; + TRANS_CHECK( trans_new(&trans) ); + TRANS_CHECK( trans_set_resol_lonlat(&trans,nlon,nlat) ); + TRANS_CHECK( trans_set_trunc(&trans,nsmax) ); + + //set_standard_rgg(&trans,(nlat-1)/2,nsmax); + + + trans.fft = TRANS_FFTW; + trans.flt = 0; + + TRANS_CHECK( trans_setup(&trans) ); + printf("ndgl = %d\n",trans.ndgl); + printf("nsmax = %d\n",trans.nsmax); + printf("ngptotg = %d\n",trans.ngptotg); + + ASSERT(trans.ngptotg == nlon*nlat); + + // In case of odd number of latitudes, there is one latitude extra in distributed gridpoints + // This can only be checked with nproc==1 + if( trans.nproc == 1 ) + ASSERT(trans.ngptot == nlon*nlat + nlon*(nlat%2)); + + // Allocate gridpoint data + int nscalar = 2; + int nvordiv = 1; + int nfld = 2*nvordiv+nscalar; + double* rgp = malloc( sizeof(double) * nfld * trans.ngptot ); + + // Load data on proc 1 + double* rgpg = NULL; + if( trans.myproc == 1 ) + { + rgpg = malloc( sizeof(double) * nfld*trans.ngptotg ); + int i; + for( i=0; i=0 ) + { + for( i=0; i gptosp_tol) + printf("error --> rgp[fld=%d][pt=%d] : %f instead of %d\n",j,i,rgp[j*trans.ngptot+i],(j+1)); + ASSERT( fabs(rgp[j*trans.ngptot+i]-(j+1)) < gptosp_tol); + } + } + + } + } + + // Allocate spectral data + + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + + // Direct Transform + printf("trans_dirtrans()\n"); + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nscalar = nscalar; + dirtrans.nvordiv = nvordiv; + dirtrans.rgp = rgp; + dirtrans.rspscalar = rspscalar; + dirtrans.rspvor = rspvor; + dirtrans.rspdiv = rspdiv; + TRANS_CHECK( trans_dirtrans(&dirtrans) ); + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j=2 && check_values ) + { + for( i=1; i gptosp_tol ) + printf("error --> rspscalar[fld=%d][wave=%d] : %f\n",j,i,rspscalar[i*nscalar+j]); + } + } + } + } + + // Gather spectral field (for fun) + int* nto = malloc( sizeof(int) * nscalar ); + nto[0] = 1; + nto[1] = 1; + + double* rspscalarg = NULL; + if( trans.myproc == 1 ) + rspscalarg = malloc( sizeof(double) * nscalar*trans.nspec2g ); + + printf("trans_gathspec()\n"); + struct GathSpec_t gathspec = new_gathspec(&trans); + gathspec.rspec = rspscalar; + gathspec.rspecg = rspscalarg; + gathspec.nfld = nscalar; + gathspec.nto = nto; + TRANS_CHECK( trans_gathspec(&gathspec) ); + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j gptosp_tol ) + printf("error -> rspscalarg[fld=%d][wave=%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); + } + } + } + } + + printf("trans_distspec()\n"); + // Distribute spectral field (for fun) + struct DistSpec_t distspec = new_distspec(&trans); + distspec.rspec = rspscalar; + distspec.rspecg = rspscalarg; + distspec.nfld = nscalar; + distspec.nfrom = nto; + TRANS_CHECK( trans_distspec(&distspec) ); + + + printf("trans_invtrans()\n"); + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; + invtrans.nvordiv = nvordiv; + invtrans.rspscalar = rspscalar; + invtrans.rspvor = rspvor; + invtrans.rspdiv = rspdiv; + invtrans.rgp = rgp; + TRANS_CHECK( trans_invtrans(&invtrans) ); + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j= 2 && check_values ) + { + for( i=0; i sptogp_tol ) + printf("error --> rgp[fld=%d][pt=%d] : %f\n",j,i,rgp[j*trans.ngptot+i]); + ASSERT( fabs(rgp[j*trans.ngptot+i] - (j+1) )/(double)(j+1) < sptogp_tol ); + } + } + } + } + + printf("trans_gathgrid()\n"); + // Gather gridpoint fields + struct GathGrid_t gathgrid = new_gathgrid(&trans); + gathgrid.rgp = rgp; + gathgrid.rgpg = rgpg; + gathgrid.nto = nfrom; + gathgrid.nfld = nfld; + TRANS_CHECK( trans_gathgrid(&gathgrid) ); + + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j=2 && check_values ) + { + for( i=0; i sptogp_tol ) + printf("error --> rgpg[fld=%d][pt=%d] : %f\n",j,i,rgpg[j*trans.ngptotg+i]); + } + } + } + } + + // Deallocate arrays + free(rgp); + free(rgpg); + free(rspscalar); + free(rspscalarg); + free(rspvor); + free(rspdiv); + free(nfrom); + free(nto); + + TRANS_CHECK( trans_delete(&trans) ); +} + +// ---------------------------------------------------------------------------- + +int main ( int arc, char **argv ) +{ + trans_use_mpi( test_use_mpi() ); + + setbuf(stdout,NULL); // unbuffered stdout + +// test_gptosptogp(144,73,1279); // As IVER does it + + // nsmax = (2*(nlat-1)-1)/2; + printf("-----------------------------\n"); + test_gptosptogp(320,161,159); + printf("-----------------------------\n"); + test_gptosptogp(320,161,255); + printf("-----------------------------\n"); + test_gptosptogp(320,161, 95); + printf("-----------------------------\n"); + test_gptosptogp(640,161,159); + printf("-----------------------------\n"); + test_gptosptogp(160,161,159); + printf("-----------------------------\n"); + + TRANS_CHECK( trans_finalize() ); + + return 0; +} + diff --git a/tests/transi/transi_test_lonlat_diff_incr.c b/tests/transi/transi_test_lonlat_diff_incr.c new file mode 100644 index 0000000..4d3c0fc --- /dev/null +++ b/tests/transi/transi_test_lonlat_diff_incr.c @@ -0,0 +1,201 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "ectrans/transi.h" + +#include "transi_test.h" + +static bool check_values = true; + +// ---------------------------------------------------------------------------- + +void test_sptogp(int nlon, int nlat, int nsmax) +{ + double sptogp_tol = 1.e-6; + printf("test_sptogp( nlon=%d, nlat=%d, nsmax=%d )\n",nlon,nlat,nsmax); + + int nout = 2; + + struct Trans_t trans; + TRANS_CHECK( trans_new(&trans) ); + TRANS_CHECK( trans_set_resol_lonlat(&trans,nlon,nlat) ); + TRANS_CHECK( trans_set_trunc(&trans,nsmax) ); + + trans.fft = TRANS_FFTW; + trans.flt = 0; + + TRANS_CHECK( trans_setup(&trans) ); + printf("ndgl = %d\n",trans.ndgl); + printf("nsmax = %d\n",trans.nsmax); + printf("ngptotg = %d\n",trans.ngptotg); + + ASSERT(trans.ngptotg == nlon*nlat); + if( trans.nproc == 1 ) { + ASSERT(trans.ngptot == nlon*nlat + nlon*(nlat%2)); + } + ASSERT(trans.nspec2 == trans.nspec2g); + ASSERT( trans.nproc == 1 ); + + // In case of odd number of latitudes, there is one latitude extra in distributed gridpoints + // This can only be checked with nproc==1 + + // Allocate gridpoint data + int nscalar = 1; + int nvordiv = 1; + int nfld = 2*nvordiv+nscalar; + double* rgp = malloc( sizeof(double) * nfld * trans.ngptot ); + + // Load data on proc 1 + double* rgpg = NULL; + if( trans.myproc == 1 ) + { + rgpg = malloc( sizeof(double) * nfld*trans.ngptotg ); + } + + + // // Allocate spectral data + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + + // Gather spectral field (for fun) + int* nto = malloc( sizeof(int) * nfld ); + int k; + for( k=0; k sptogp_tol ) + // printf("error --> rgp[fld=%d][pt=%d] : %f\n",j,i,rgp[j*trans.ngptot+i]); + ASSERT( fabs(rgp[j*trans.ngptot+i]) < sptogp_tol ); + } + } + } + } + + + + printf("trans_gathgrid()\n"); + // Gather gridpoint fields + struct GathGrid_t gathgrid = new_gathgrid(&trans); + gathgrid.rgp = rgp; + gathgrid.rgpg = rgpg; + gathgrid.nto = nfrom; + gathgrid.nfld = nfld; + TRANS_CHECK( trans_gathgrid(&gathgrid) ); + + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j +#include +#include +#include +#include + +#include "ectrans/transi.h" + +#include "transi_test.h" + +int main ( int arc, char **argv ) +{ + int nloen[] = {18,25,36,40,45,54,60,64,72,72,80,90,96,100,108,120,120,128,135,144,144,150,160,160,180,180,180,192,192,200,200,216,216,216,225,225,240,240,240,256,256,256,256,288,288,288,288,288,288,288,288,288,300,300,300,300,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,300,300,300,300,288,288,288,288,288,288,288,288,288,256,256,256,256,240,240,240,225,225,216,216,216,200,200,192,192,180,180,180,160,160,150,144,144,135,128,120,120,108,100,96,90,80,72,72,64,60,54,45,40,36,25,18}; + + int start = allocated(); + + print_mem("Initially allocated: ",allocated()-start); + + if( strcmp(ectrans_version(),"transi/contrib") == 0 ) // equals + trans_set_handles_limit(2); + else /// Older versions cannot reuse existing handles, so allocate enough. + trans_set_handles_limit(3); + + trans_use_mpi(false); + + trans_init(); + + + int iter=0; + + //int start_loop = allocated(); + for( iter=0; iter<3; ++iter ) + { + printf("iteration %d\n",iter+1); + int start_iter = allocated(); + struct Trans_t trans; + + trans_new(&trans); + trans_set_resol(&trans,sizeof(nloen)/sizeof(int),nloen); + trans_set_trunc(&trans,159); + + trans_setup(&trans); + + print_mem("Allocated in iteration: ",allocated()-start_iter); + + trans_delete(&trans); + + print_mem("Possibly leaked in iteration: ",allocated()-start_iter); + //print_mem("total leaked in loop: ",allocated()-start_loop); + + // No memory leaks in subsequent iterations + if( iter > 0 ) + ASSERT(allocated()-start_iter == 0); + + } + + printf( "trans_finalize()\n" ); + trans_finalize(); + + print_mem("Total leaked: ",allocated()-start); + + + display_mallinfo(); + + return 0; +} + diff --git a/tests/transi/transi_test_memory_lonlat.c b/tests/transi/transi_test_memory_lonlat.c new file mode 100644 index 0000000..046375f --- /dev/null +++ b/tests/transi/transi_test_memory_lonlat.c @@ -0,0 +1,75 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include +#include +#include + +#include "ectrans/transi.h" + +#include "transi_test.h" + + +int main ( int arc, char **argv ) +{ + int start = allocated(); + + print_mem("Initially allocated: ",allocated()-start); + + if( strcmp(ectrans_version(),"transi/contrib") == 0 ) // equals + trans_set_handles_limit(2); + else /// Older versions cannot reuse existing handles, so allocate enough. + trans_set_handles_limit(3); + + trans_use_mpi(false); + + trans_init(); + + int iter=0; + + //int start_loop = allocated(); + for( iter=0; iter<3; ++iter ) + { + printf("iteration %d\n",iter+1); + int start_iter = allocated(); + struct Trans_t trans; + + trans_new(&trans); + trans_set_resol_lonlat(&trans,320,161); + trans_set_trunc(&trans,159); + + trans_setup(&trans); + + print_mem("Allocated in iteration: ",allocated()-start_iter); + + trans_delete(&trans); + + print_mem("Possibly leaked in iteration: ",allocated()-start_iter); + //print_mem("total leaked in loop: ",allocated()-start_loop); + + // No memory leaks in subsequent iterations + if( iter > 0 ) + ASSERT(allocated()-start_iter == 0); + + } + + printf( "trans_finalize()\n" ); + trans_finalize(); + + print_mem("Total leaked: ",allocated()-start); + + + display_mallinfo(); + + return 0; +} + diff --git a/tests/transi/transi_test_program.c b/tests/transi/transi_test_program.c new file mode 100644 index 0000000..929cd8b --- /dev/null +++ b/tests/transi/transi_test_program.c @@ -0,0 +1,354 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "ectrans/transi.h" +#include "transi_test.h" + +void read_grid(struct Trans_t*); + +int main ( int arc, char **argv ) +{ + trans_use_mpi( test_use_mpi() ); + + printf("ectrans version int = %d\n",ectrans_version_int()); + printf("ectrans version = %s\n",ectrans_version()); + printf("ectrans version str = %s\n",ectrans_version_str()); + printf("ectrans git sha1 [0:7] = %s\n",ectrans_git_sha1_abbrev(7)); + printf("ectrans git sha1 [0:12] = %s\n",ectrans_git_sha1_abbrev(12)); + printf("ectrans git sha1 = %s\n",ectrans_git_sha1()); + + //printf("transi started\n"); + int nout = 3; + struct Trans_t trans; + trans_new(&trans); + + read_grid(&trans); + trans_setup(&trans); + trans_inquire(&trans,"numpp,ngptotl,nmyms,nasm0,npossp,nptrms,nallms,ndim0g,nvalue"); + trans_inquire(&trans,"nfrstlat,nlstlat,nptrlat,nptrfrstlat,nptrlstlat,nsta,nonl,ldsplitlat"); + trans_inquire(&trans,"nultpp,nptrls,nnmeng"); + trans_inquire(&trans,"rmu,rgw,npms,rlapin,ndglu"); + + //Check values of numpp + if( trans.myproc == 1 ) + { + printf("nprtrw = %d\n",trans.nprtrw); + int i; + for( i=0; i 1.e-5) + printf("rgp[%d][%d] : %f\n",j,i,rgp[j*trans.ngptot+i]); + } + + } + } + + // Allocate spectral data + + double* rspscalar = malloc( sizeof(double) * nscalar*trans.nspec2 ); + double* rspvor = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + + // Direct Transform + struct DirTrans_t dirtrans = new_dirtrans(&trans); + dirtrans.nscalar = nscalar; + dirtrans.nvordiv = nvordiv; + dirtrans.rgp = rgp; + dirtrans.rspscalar = rspscalar; + dirtrans.rspvor = rspvor; + dirtrans.rspdiv = rspdiv; + trans_dirtrans(&dirtrans); + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j 1.e-5) + printf("rspscalar[%d][%d] : %f\n",j,i,rspscalar[i*nscalar+j]); + } + } + } + + // Allocate fields for u*cos(theta) and v*cos(theta) + double* rspu = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + double* rspv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); + + // Convert vorticity & divergence to u*cos(theta) & v*cos(theta) + printf("Converting spectral vorticity-divergence to u*cos(lat)-v*cos(lat)...\n"); + struct VorDivToUV_t vordiv_to_UV = new_vordiv_to_UV(); + vordiv_to_UV.rspvor = rspvor; + vordiv_to_UV.rspdiv = rspdiv; + vordiv_to_UV.rspu = rspu; + vordiv_to_UV.rspv = rspv; + vordiv_to_UV.nfld = nvordiv; + vordiv_to_UV.ncoeff = trans.nspec2; + vordiv_to_UV.nsmax = trans.nsmax; + trans_vordiv_to_UV(&vordiv_to_UV); + printf("Converting spectral vorticity-divergence to u*cos(lat)-v*cos(lat)...done\n"); + + + // Gather spectral field (for fun) + int* nto = malloc( sizeof(int) * nscalar ); + nto[0] = 1; + nto[1] = 1; + + double* rspscalarg = NULL; + if( trans.myproc == 1 ) + rspscalarg = malloc( sizeof(double) * nscalar*trans.nspec2g ); + + struct GathSpec_t gathspec = new_gathspec(&trans); + gathspec.rspec = rspscalar; + gathspec.rspecg = rspscalarg; + gathspec.nfld = nscalar; + gathspec.nto = nto; + trans_gathspec(&gathspec); + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j 1.e-5 && i > 0) + printf("rspscalarg[%d][%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); + } + } + } + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j 1.e-5 && i > 0) + printf("rspscalarg[%d][%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); + } + } + } + + // Allocate fields for u*cos(theta) and v*cos(theta) + double* rspvorg = malloc( sizeof(double) * nvordiv*trans.nspec2g ); + double* rspdivg = malloc( sizeof(double) * nvordiv*trans.nspec2g ); + + gathspec = new_gathspec(&trans); + gathspec.rspec = rspvor; + gathspec.rspecg = rspvorg; + gathspec.nfld = nvordiv; + gathspec.nto = nto; + trans_gathspec(&gathspec); + + gathspec = new_gathspec(&trans); + gathspec.rspec = rspdiv; + gathspec.rspecg = rspdivg; + gathspec.nfld = nvordiv; + gathspec.nto = nto; + trans_gathspec(&gathspec); + + // Allocate fields for u*cos(theta) and v*cos(theta) + double* rspug = malloc( sizeof(double) * nvordiv*trans.nspec2g ); + double* rspvg = malloc( sizeof(double) * nvordiv*trans.nspec2g ); + + // Convert vorticity & divergence to u*cos(theta) & v*cos(theta) + printf("Converting spectral vorticity-divergence to U-V globally...\n"); + struct VorDivToUV_t vordiv_to_UV_g = new_vordiv_to_UV(); + vordiv_to_UV_g.rspvor = rspvorg; + vordiv_to_UV_g.rspdiv = rspdivg; + vordiv_to_UV_g.rspu = rspug; + vordiv_to_UV_g.rspv = rspvg; + vordiv_to_UV_g.nfld = nvordiv; + vordiv_to_UV_g.ncoeff = trans.nspec2g; + vordiv_to_UV_g.nsmax = trans.nsmax; + trans_vordiv_to_UV(&vordiv_to_UV_g); + printf("Converting spectral vorticity-divergence to U-V globally...done\n"); + + + // Distribute spectral field (for fun) + struct DistSpec_t distspec = new_distspec(&trans); + distspec.rspec = rspscalar; + distspec.rspecg = rspscalarg; + distspec.nfld = nscalar; + distspec.nfrom = nto; + trans_distspec(&distspec); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nscalar; + invtrans.nvordiv = nvordiv; + invtrans.rspscalar = rspscalar; + invtrans.rspvor = rspvor; + invtrans.rspdiv = rspdiv; + invtrans.rgp = rgp; + trans_invtrans(&invtrans); + + if( trans.myproc == 1 ) + { + int i,j; + for( j=0; j<3; ++j) + { + for( i=0; indgl = sizeof(T159)/sizeof(int); + trans->nloen = malloc( sizeof(T159) ); + for( i = 0; indgl; i++) trans->nloen[i] = T159[i]; + + // Assume Linear Grid + trans->nsmax=(2*trans->ndgl-1)/2; +} diff --git a/tests/transi/transi_test_timings.c b/tests/transi/transi_test_timings.c new file mode 100644 index 0000000..e3770a3 --- /dev/null +++ b/tests/transi/transi_test_timings.c @@ -0,0 +1,96 @@ +/* + * (C) Copyright 2014- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "ectrans/transi.h" +#include "transi_test.h" + +// Following dummy functions are implementation details +// that don't contribute to this example. They could be +// replaced with grib_api functionality +void read_rspec( struct Trans_t* trans, double* rspec[], int nfld ); + +int main ( int argc, char **argv ) +{ + trans_use_mpi( test_use_mpi() ); + + double begin = transi_test_time(); + struct Trans_t trans; + trans_new(&trans); + double start; + int N = 80; + int nfld = 10; + printf( "Grid resolution: N = %d\n",N); + printf( "Number of fields to transform: %d\n",nfld); + printf( "\n" ); + + trans_init(); + + // Read resolution information + set_standard_rgg(&trans,N,-1); + + // Register resolution in trans library + start = transi_test_time(); + trans_setup(&trans); + + + if( trans.myproc == 1 ) printf( "trans_setup() ... "); + if( trans.myproc == 1 ) print_time("",transi_test_time()-start); + + //for( nfld = 200; nfld<=250; ++nfld ) + { + double* rspec; + read_rspec(&trans,&rspec,nfld); + double* rgp = malloc( sizeof(double) * nfld*trans.ngptot ); + + // Inverse Transform + struct InvTrans_t invtrans = new_invtrans(&trans); + invtrans.nscalar = nfld; + invtrans.rspscalar = rspec; + invtrans.rgp = rgp; + + if( trans.myproc == 1 ) printf( "trans_invtrans(%3d) ... ",nfld); + start = transi_test_time(); + trans_invtrans(&invtrans); + if( trans.myproc == 1 ) print_time("",transi_test_time()-start); + + free(rgp); + free(rspec); + } + + trans_delete(&trans); + + trans_finalize(); + + if( trans.myproc == 1 ) print_time("END transi_timings total: ",transi_test_time()-begin); + + + return 0; +} + +//--------------------------------------------------------------------------- +// Dummy functions, used in this example + + + +void read_rspec(struct Trans_t* trans, double* rspec[], int nfld ) +{ + int i,j; + *rspec = malloc( sizeof(double) * nfld*trans->nspec2 ); + for( i=0; inspec2; ++i ) + { + for( j=0; j +#include + +#include "ectrans/transi.h" +#include "transi_test.h" + +// // Following dummy functions are implementation details +// // that don't contribute to this example. They could be +// // replaced with grib_api functionality +// void read_rspec( struct Trans_t* trans, double* rspec[], int nfld ); + +int main ( int arc, char **argv ) +{ + trans_use_mpi( test_use_mpi() ); + + int nfld = 1; + int nsmax_array[] = {3};//159,160,1279,1280}; + int ni = sizeof(nsmax_array)/sizeof(int); + int i,j; + for( i=0; i