nccf_atm_monthly.sh 44.2 KB
Newer Older
#!/bin/bash
#
# ./nccf_atm_monthly.sh path_to_config_file >& EXPID-startdate.log &
#
#
# This script will extract variabls from EC-Earth monthly atmospheric output 
# which is available in MMA files, save each variable in one file and also
# combine members together. It will modify the header informaton of the
# generated files according to CMIP5 standard and the variable names will be
# modified according PCMDI standard variable names.
#
# Written by Hui Du
#
# Institut Català de Ciències del Clima / Climate Forecasting Unit (IC3/CFU)
# Created:  February 22, 2010
# Adapted: Pierre-Antoine Bretonnière - IC3 , January 2013
msg='Your experiment crashed! Your workdir \($WORKDIR\) will be kept for one week from now and then deleted'
trap " echo $msg ; rm -rf $WORKDIR | at now + 7 days ; exit" SIGINT SIGHUP SIGTERM SIGSEGV SIGKILL EXIT 
#################################
####  User Defined Funtions  #### 
#################################

# check if args are ok
function check_args(){
NB_ARGS=$#
 if [ $# -ne 1 ] ; then
  echo
  echo "USAGE: config_file "
  echo "For example: ./nccf_atm_monthly.new.sh /home/$user/cfu_git/autosubmit/pp/atmos/config_file "
  echo
  exit 1
 fi
}

function get_leadtime(){
#gets lead_time and checks if all members have the same number of months
NMONTH=9999999
for DIR in $MEM_LST
do
 cd ${DATADIR}/${EXPID}/${SDATE}/${DIR}/outputs # hard coded
# NFILE=$(ls MMA*|wc -l)
 NFILE="$( find . -maxdepth 1 -name "MMA*" |wc -l)"
# for TMPFILE in $(ls MMA*)
 for TMPFILE in $( find . -maxdepth 1 -name "MMA*" | sed -e 's/\.\///g' )
     NMONTHS_LOC=$(tar tvf $TMPFILE | grep GG | wc -l)
    [ $NMONTH -ne $NMONTHS_LOC ] && [ $NMONTH -ne 9999999 ] && echo "all members or start dates don't have the same number of months, be careful" 
    [ $NMONTH -ge $NMONTHS_LOC ] && NMONTH=$NMONTHS_LOC
  done
 export NLT=$((NFILE*NMONTH))
 cd ${WORKDIR}
done
}

function header(){
 SD=$1
 rtime 19500101 ${SD}
 ncks -h -d ensemble,0,$((ENSEMBLE-1)),1 ${HEAD_DIR}/template.nc toto.nc # select sub member
 cp toto.nc toto1.nc
 TIME_BND1=0
 LEADTIME=$(((TIME_BND1+TIME_BND2)/2))

 ncap2 -O -h -s "leadtime(0)=${LEADTIME};time_bnd(,0)=${TIME_BND1};time_bnd(,1)=${TIME_BND2}" toto.nc toto.nc

 for ((i=1;i<=$((NLT-1));i++)); do
  FORDATE=$(leadtime2date $SD $i)
  INTERVAL=$(get_hours $FORDATE)
  TIME_BND1=$TIME_BND2
  TIME_BND2=$((TIME_BND1+INTERVAL))
  LEADTIME=$(((TIME_BND1+TIME_BND2)/2))
  echo $fordate $interval $time_bnd1 $time_bnd2
  ncap2 -O -h -s "reftime(0)=${REFTIME_VALUE};leadtime(0)=${LEADTIME};time_bnd(,0)=${TIME_BND1};time_bnd(,1)=${TIME_BND2}" toto1.nc toto1.nc
  ncrcat -O -h toto.nc toto1.nc toto.nc
 done

 mv toto.nc header.nc; rm toto1.nc

 ncap2 -O -h -s "reftime(0)=reftime(1)" header.nc header.nc
}


# function to get reftime time
function rtime(){
 DATE1=$1
 DATE2=$2
# FACTOR=day # h for hour, d for days
# echo "NFRP:",$NFRP
# FACTOR=day # h for hour, d for days
 YEAR1=$(echo $DATE1|cut -c1-4)
 MONTH1=$(echo $DATE1|cut -c5-6)
 DAY1=$(echo $DATE1|cut -c7-8)
 YEAR2=$(echo $DATE2|cut -c1-4)
 MONTH2=$(echo $DATE2|cut -c5-6)
 DAY2=$(echo $DATE2|cut -c7-8)
 SEC1=$(date --utc --date "${YEAR1}-${MONTH1}-${DAY1}" +%s)
 SEC2=$(date --utc --date "${YEAR2}-${MONTH2}-${DAY2}" +%s)
# case $FACTOR in 
#  hour)
#  FACTOR=3600 # 60*60
#  ;;
#  day)
#  FACTOR=86400 # 60*60*24
#  ;;
# esac
 REFTIME_VALUE=$(((SEC2-SEC1)/FACTOR))
}


function leadtime2date(){
# function leadtime2date, based on starting date and lead time to calculate date(year &  month) of the corresponding leadtime
 INIDATE=$1
 OFFSET=$2

 YY=$(echo $INIDATE|cut -c1-4)
 MM=$(echo $INIDATE|cut -c5-6)
 MM=$(echo $MM | sed 's/^0*//;s/^$/0/' )
 YY1=$((YY+offset/12))

 NMONTH=$((offset%12))
 MM1=$((MM+NMONTH))
 if [ $MM1 -gt 12 ]; then
  YY1=$((YY1+1))
  MM1=$((MM1-12))
 fi

 if [ $MM1 -lt 10 ]; then
  MM1="0$MM1"
 fi
}

# get the total number of HOURS for a specific MONTH
function get_hours(){
 YYMM=$1
 YEAR=$(echo $YYMM|cut -c1-4)
 MONTH=$(echo $YYMM|cut -c5-6)
 NDAYS=$(cal $MONTH $YEAR |egrep "^[ 0-9][0-9]| [ 0-9][0-9]$" |wc -w)
 HOURS=$((NDAYS*24))
 echo $HOURS
}


function extract(){

#typeset var jt
#typeset var YEAR0
#typeset var YEARF
#typeset var MON0
#typeset var MONF

for MEM in ${MEM_LST[@]}; do
# untar and unzip MMA SH files and GG files 

if [  -z $( echo ${LEAD_LIST[@]} ) ];then 
 PATH_TO_SEARCH=${DATADIR}/${EXPID}/${SDATE}/$MEM
 FILE_LIST=$(find $PATH_TO_SEARCH -type f -iname "MMA*" 2> /dev/null)
 export YEAR0=${LEAD_LIST[0]}
 export MON0=${LEAD_LIST[1]}
 export YEARF=${LEAD_LIST[2]}
 export MONF=${LEAD_LIST[3]}
 export CHUNK_SIZE=${LEAD_LIST[4]}
 export YYYY0=$(echo $SDATE | cut -b -4)
 export MM0=$(echo $SDATE | cut -b 5-6)
 export LTIME0=1
 export LTIMEF=$(( ( ($YEARF - $YEAR0) * 12 + 10#$MONF - 10#$MON0+1 )/$CHUNK_SIZE ))
 jt=$LTIME0
 while [ $jt -le $((LTIMEF)) ]
  do 
pabretonniere's avatar
pabretonniere committed
   YEAR1=$(( $YEAR0 +(10#$MON0+($jt-1)*$CHUNK_SIZE-1)/ 12 ))
   MON1=$(( ( 10#$MON0 + ( $jt - 1 ) * ($CHUNK_SIZE) ) % 12))
   MON1=$(echo $(printf "%02d" $(( 10#$MON1))) )
pabretonniere's avatar
pabretonniere committed
   YEAR2=$(( $YEAR1 + ( 10#$MON1 + $CHUNK_SIZE-1 ) / 12  ))
   MON2=$(( ( $MON1 + $CHUNK_SIZE-1 ) % 12 ))
   MON2=$(echo $(printf "%02d" $(( 10#$MON2))) )
   fi
   if [ $MON2 -eq 0 ];then
    MON2=12
   FILE="$( find ${DATADIR}/${EXPID}/${SDATE}/*/outputs/ -maxdepth 1 -name "MMA_${EXPID}_${SDATE}_${MEM}_${YEAR1}"$(printf "%02d" $((10#$MON1)))"01-${YEAR2}"$(printf "%02d" $((10#$MON2)))"*.tar" )"
pabretonniere's avatar
pabretonniere committed
    echo "Expected file ${DATADIR}/${EXPID}/${SDATE}/${MEM}/outputs/MMA_${EXPID}_${SDATE}_${MEM}_${YEAR1}${MON1}01-${YEAR2}${MON2}*.tar not found, check your Namelist parameters"
   FILE_LIST="$FILE_LIST ${FILE}"
  done
fi
 echo ${FILE_LIST}
   for f in ${FILE_LIST};do
     for FILE in ${SH_FILES};do
      tar xvf ${f} ${FILE} ;gunzip -q ${FILE}; mv ${FILE%???} ${FILE%???}.$MEM
     done
      for FILE in ${GG_FILES};do
       tar xvf ${f} ${FILE} ;gunzip -q ${FILE}; mv ${FILE%???} ${FILE%???}.$MEM
      done
    done
echo "jt ltimef " $jt $((LTIMEF)) 
if [ $jt -le $((LTIMEF)) ]; then
  echo le
else
 echo gt
fi 
echo "sortie de extract" $MEM -- ${MEM_LST[@]}
}

function read_vars(){
#if no list of variables is provided in the namelist, looks for all variables in the files
OUTPUT_FILE=$1
GRID_TYPE=$2

VAR_LST_DIM=$(ncdump -h $OUTPUT_FILE | grep float | sed -e s/float//g | sed -e s/\,\ /@/g  | sed -e s/\;//g ) #get the names of the variables in output files
for VAR_DIM in $VAR_LST_DIM
do
NB_DIMS=$(echo $VAR_DIM | sed -e s/[^@]/\ /g | wc -w) #count the number of dimensions of each variable
VAR=$(echo $VAR_DIM | sed -e 's/(.*)//g' )
case $GRID_TYPE in
 GG)
[ $NB_DIMS -eq 2 ] && VAR_LST_2D_GG=$(echo ${VAR_LST_2D_GG} $VAR) || VAR_LST_3D_GG=$(echo ${VAR_LST_3D_GG} $VAR) #create a separate list of variables for 2d and 3d
[ $NB_DIMS -eq 2 ] && VAR_LST_2D_SH=$(echo ${VAR_LST_2D_SH} $VAR) || VAR_LST_3D_SH=$(echo ${VAR_LST_3D_SH} $VAR) #create a separate list of variables for 2d and 3d
;;
esac

done
}

# for surface variables (manipulate GG files)
function surface(){

 export YEAR0=${LEAD_LIST[0]}
 export MON0=${LEAD_LIST[1]}
 export YEARF=${LEAD_LIST[2]}
 export MONF=${LEAD_LIST[3]}
 MONF=$(echo $(printf "%02d" $(( 10#$MONF))) )
echo ${MEM_LST[@]}
for MEM in ${MEM_LST[@]}; do
#  FILES=$(ls MMA*GG*.nc.$MEM) 
  FILES="$( find . -maxdepth 1 -name "MMA*GG*.nc.$MEM" | sed -e 's/\.\///g' )" 
pabretonniere's avatar
pabretonniere committed
  nb_months_asked=$(( ( ($YEARF - $YEAR0) * 12 + 10#$MONF - 10#$MON0+1 ) ))
if  [[ $NLT -ne $nb_months_asked ]];then
 echo "Warning, the number of months in MMA (",$nb_files,") is different from the number of months you required according to the LEADLIST",$(( ( ($YEARF - $YEAR0) * 12 + $MONF - $MON0+1 )))," check your configfile!"
 exit
fi
####  process each variable  ####
 for VAR in ${VAR_LST_2D[@]}; do # untar once and extract all the variables
	new_name $VAR	
      case $VAR in
        "PRECIP") # for precip, have to add CP and LSP to get total precip
	      varnew=prlr
          for f in ${FILES}; do
            SUFFIX=$MEM.${f%????}
            cdo selname,CP $f CP.$SUFFIX # select CP
            ncrename -h -v CP,prlr CP.$SUFFIX 
            cdo selname,LSP $f LSP.$SUFFIX # select LSP       
            ncrename -h -v LSP,prlr LSP.$SUFFIX
            cdo add CP.$SUFFIX LSP.$SUFFIX ${varnew}.${SUFFIX} # add CP and LSP to get total precipitation 
            cdo divc,${FACTOR} ${varnew}.${SUFFIX} toto.nc;rm -f ${varnew}.${SUFFIX}; mv toto.nc ${varnew}.${SUFFIX}
#          prlr_files=$(ls ${varnew}*)
          prlr_files="$(find . -maxdepth 1 -name "${varnew}*" | sed -e 's/\.\///g' )"
          cdo -O mergetime ${prlr_files} tmp_${varnew}_$SDATE.$MEM.nc # combine all the time steps in one file
        ;;
        "SSRU")
          varnew=rsus
          for f in ${FILES}; do
            SUFFIX=$MEM.${f%????}
            cdo selname,SSRD $f SSRD.$SUFFIX 
            ncrename -h -v SSRD,rsus SSRD.$SUFFIX
            cdo selname,SSR $f SSR.$SUFFIX        
            ncrename -h -v SSR,rsus SSR.$SUFFIX
            cdo sub SSR.$SUFFIX SSRD.$SUFFIX ${varnew}.${SUFFIX}   
            cdo divc,${FACTOR} ${varnew}.${SUFFIX} toto.nc;rm -f ${varnew}.${SUFFIX}; mv toto.nc ${varnew}.${SUFFIX}
#          rsus_files=$(ls ${varnew}*)
          rsus_files="$(find . -maxdepth 1 -name "${varnew}*" | sed -e 's/\.\///g' )"
          cdo -O mergetime ${rsus_files} tmp_${varnew}_$SDATE.$MEM.nc # combine all the time steps in one file
          rm -rf rsus*.nc SSR* SSRD*
        ;;
        "STRU")
          varnew=rlus
          for f in ${FILES}; do
            SUFFIX=$MEM.${f%????}
            cdo selname,STRD $f STRD.$SUFFIX 
            ncrename -h -v STRD,rlus STRD.$SUFFIX
            cdo selname,STR $f STR.$SUFFIX                   
            ncrename -h -v STR,rlus STR.$SUFFIX
            cdo sub STR.$SUFFIX STRD.$SUFFIX ${varnew}.${SUFFIX}   
            cdo divc,${FACTOR} ${varnew}.${SUFFIX} toto.nc;rm -f ${varnew}.${SUFFIX}; mv toto.nc ${varnew}.${SUFFIX}
#          rsus_files=$(ls ${varnew}*)
          rsus_files="$( find . -maxdepth 1 -name "${varnew}*" | sed -e 's/\.\///g' )"
          cdo -O mergetime ${rsus_files} tmp_${varnew}_$SDATE.$MEM.nc # combine all the time steps in one file
          rm -rf rlus*.nc STR* STRD*
        ;;
        *)  
	      new_name $VAR       
          for f in ${FILES};do
            cdo selname,${VAR} ${f} ${VAR}.$MEM.${f%????}
          done
#          TMP_FILES=$(ls ${VAR}*.nc)
          TMP_FILES="$( find . -maxdepth 1 -name "${VAR}*.nc" | sed -e 's/\.\///g' )"
          TMP_OUT=tmp_${varnew}_$SDATE.$MEM.nc
	  # combine all the time steps in one file
	  cdo -O mergetime ${TMP_FILES} ${TMP_OUT}
          [[ ${VAR} != ${varnew} ]] && ncrename -h -v ${VAR},${varnew} ${TMP_OUT}
          case ${varnew} in
            "rss"|"rls"|"rsscs"|"rsds"|"rlds"|"hflsd"|"hfssd"|"rlt"|"rst"|"rltcs"|"rstcs")
            cdo divc,${FACTOR} ${TMP_OUT} toto.nc; rm -rf ${TMP_OUT}; mv toto.nc ${TMP_OUT}
            ncatted -O -h -a units,${varnew},m,c,"W m-2" ${TMP_OUT}
#          cdo divc,${FACTOR} ${TMP_OUT} toto.nc; rm ${TMP_OUT};mv toto.nc ${TMP_OUT}
        ;;
      esac
  done # loop for VAR
done # loop for members 
echo "after loop on memebers in surface"
# finish selecting the variables 
# combine members and change the attributes
 for VAR in ${VAR_LST_2D[@]}; do 
    new_name $VAR
    LSMBSH="${LISTMEMB[0]}-${LISTMEMB[${#LISTMEMB[@]}-1]}"
#    PREVIOUS_FILE="$(ls -tr ${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly/${varnew}_${SDATE}_*.nc | tail -1)" (if left like this, we have the "no such file" pb. but if we chose the "find" option, we can sort the files and get hte last one edited => have to make a find to be sure there are some previous_files and then, if so, get the actual previous_file with the ls command.
#savedir 
   PREVIOUS_FILE="$( find  ${SAVEDIR}/${EXPID}/*/* -maxdepth 1 -name "${varnew}_${SDATE}_*.nc" )"
echo okkkk
    if [ ! -z $( ls $PREVIOUS_FILE ) ] ; then
     PREVIOUS_FILE="$(ls -tr ${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly/${varnew}_${SDATE}_*.nc | tail -1)"
     cd ${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly/ 
#     FILE_NAME_PREVIOUS="$(ls ${varnew}_${SDATE}_*.nc | tail -1)"
     FILE_NAME_PREVIOUS="$( find . -maxdepth 1 -name "${varnew}_${SDATE}_*.nc" | tail -1 | sed -e 's/\.\///g' )"
     cd -
     IDX_1ST=$(echo ${varnew}_${SDATE}_ | wc -m )
     FIRST_MEMBER_PREVIOUS=$(echo $FILE_NAME_PREVIOUS | cut -b$IDX_1ST )
     IDX_LST=$( expr $IDX_1ST + 2 )
     LAST_MEMBER_PREVIOUS=$(echo $FILE_NAME_PREVIOUS  | cut -b$IDX_LST )
#security check:
     if [ $LAST_MEMBER_PREVIOUS -le $(expr ${LISTMEMB[0]} - 1 ) ] ; then
     cp $PREVIOUS_FILE tmp_$FILE_NAME_PREVIOUS
       if [ $LAST_MEMBER_PREVIOUS -eq $(expr ${LISTMEMB[0]} - 1 ) ] ; then
        LSMBSH=${FIRST_MEMBER_PREVIOUS}-${LISTMEMB[${#LISTMEMB[@]}-1]}
       else
        echo "Actual list of members does not follow directly the ones already post-processed! Check you did not forget any members at the beginning of your list"
        MISSING_FIRST=$(expr ${LAST_MEMBER_PREVIOUS} + 1 )
        MISSING_LAST=$(expr ${LISTMEMB[0]} - 1 )
        LSMBSH=${FIRST_MEMBER_PREVIOUS}_${LISTMEMB[${#LISTMEMB[@]}-1]}-${MISSING_FIRST}_${MISSING_LAST}
       fi
     else
      echo "Some members are going to be treated twice! If you are not adding new lead_times, revise the consistency between your member list and the previously processed files"
     fi
YRSO=${YEAR0}$(printf "%02d" $(( 10#$MON0)))_${YEARF}$(printf "%02d" $(( 10#$MONF)))
gather_memb tmp_${varnew}_$SDATE ${varnew}_${SDATE}_fc${LSMBSH}_${YRSO}.nc $varnew $VAR
#test chloe gather_memb tmp_${varnew}_$SDATE ${varnew}_${SDATE}_${LSMBSH}_${YRSO}.nc $varnew $VAR
output=${varnew}_${SDATE}_fc${LSMBSH}_${YRSO}.nc
##### Change the header informations #####
#
# Get the CFU standard attributes to be written in the variable
#
   	variables="$(cat ${HEAD_DIR}/table_of_variable | cut -f$idx -d'|' | sed -e 's/ /@/g')"  #to be changed into more interactive with xml table
  	cfustandard_name="$(echo $variables | cut -f2 -d' ' | sed -e 's/@/ /g')"  # variable standard name
   	cfulong_name="$(echo $variables     | cut -f3 -d' ' | sed -e 's/@/ /g')"  # variable long name
   	cfucell_methods="$(echo $variables  | cut -f4 -d' ' | sed -e 's/@/ /g')"  # variable cell methods
   	cfuunit="$(echo $variables          | cut -f5 -d' ' | sed -e 's/@/ /g')"  # variable unit
   	cfuunit_long="$(echo $variables     | cut -f6 -d' ' | sed -e 's/@/ /g')"  # variable unit long name
   	cfulevel_number="$(echo $variables  | cut -f7 -d' ' | sed -e 's/@/ /g')"  # variable level
   	cfulevel_type="$(echo $variables    | cut -f8 -d' ' | sed -e 's/@/ /g')"  # variable level type
   	cfulevel_units="$(echo $variables   | cut -f9 -d' ' | sed -e 's/@/ /g')"  # variable level unit
#
# Adding the variable level
#
   	ncap2 -h -O -s level_${varnew}="$cfulevel_number" ${output} ${output}
#
# Removing unnecessary attributes
#
#if [[ $idx -ne 40 ]]; then
        for att in units valid_range actual_range code table GRID_TYPE ; do 
      ncatted -O -h -a ${att},${varnew},d,c, ${output}	
        done
#
# Adding and modifying the {varnew}iable attributes
#
        ncatted -O -h -a _FillValue,${varnew},a,f,1.e+12 ${output}
        ncatted -O -h -a standard_name,${varnew},o,c,"$cfustandard_name" ${output} # {varnew}iable standard name
        ncatted -O -h -a long_name,${varnew},o,c,"$cfulong_name" ${output}         # {varnew}iable long name
        ncatted -O -h -a cell_methods,${varnew},o,c,"$cfucell_methods" ${output}   # {varnew}iable cell methods
        ncatted -O -h -a unit_long,${varnew},o,c,"$cfuunit_long" ${output}         # {varnew}iable long unit name
        ncatted -O -h -a units,${varnew},o,c,"$cfuunit" ${output}                  # {varnew}iable units
        ncatted -O -h -a data_type,level_${varnew},o,c,"$cfulevel_type" ${output}  # {varnew}iable level type
        ncatted -O -h -a units,level_${varnew},o,c,"$cfulevel_units" ${output}     # {varnew}iable level units
        ncatted -O -h -a coordinates,${varnew},o,c,"longitude latitude reftime leadtime time_bnd experiment_id source realization institution level_${varnew}" ${output} # variable coordinates
#
# If the NetCDF file had a horizontal axis name different from longitude
#
        ncrename -h -d lon,longitude -v lon,longitude  ${output}
       
        [[ ! -z $( ncdump -h ${output} | grep record ) ]] && ncrename -h -d record,ensemble ${output}
#        [[ ! -z $( ncdump -h ${output} | grep ensemble ) ]] && ncrename -h -d record,ensemble ${output}
#
# If the NetCDF file had a vertical axis name different from latitude
#
        ncrename -h -d lat,latitude -v lat,latitude  ${output}
#
# Adding variable axis
#
        ncatted -O -h -a axis,longitude,o,c,"X" ${output}       # variable longitude axis
        ncatted -O -h -a axis,latitude,o,c,"Y" ${output}        # variable latitude axis
#[[ $idx -ne 40 ]] &&
   ncatted -O -h -a axis,level_${varnew},o,c,"Z" ${output} # variable level axis
# modify level information
# reshape the dimension and make time unlimited 

#        [[ ! -z $( ncdump -h ${output} | grep record ) ]] && ncpdq -O -h -a time,ensemble ${output} ${output}
        [[ ! -z $( ncdump -h ${output} | grep ensemble ) ]] && ncpdq -O -h -a time,ensemble ${output} ${output}
        cp ${output} hhh.nc
        ncks -h -A header.nc ${output}
#if [[ $idx -ne 40 ]]; then 
       nt=$(cdo ntime ${output})
        ncatted -O -h -a standard_name,level_${varnew},c,c,"height" ${output}       # standard name
        ncatted -O -h -a long_name,level_${varnew},c,c,"reference height" ${output} # long name
        ncatted -O -h -a data_type,level_${varnew},c,c,"float" ${output}            # data type
        ncatted -O -h -a units,level_${varnew},c,c,"m" ${output}                    # units
        ncatted -O -h -a axis,level_${varnew},c,c,"Z" ${output}                     # axis
        ncatted -O -h -a positive,level_${varnew},c,c,"up" ${output}
        ncap2 -O -h -s "level_${varnew}=float(${cfulevel_number})" $output $output
# delete history
        ncatted -h -a history,global,d,, $output
# change institution name
	ncatted -h -a institution,global,m,c,"IC3" $output

# create a script to change the EXPID, insitutution, ensember, source and realiazation 
	i=0 # index
echo "before modify ncvalue"  $output ; ncdump -h $output
	for MEM in ${LISTMEMB[@]}; do

cat>modify_ncvalue<<EOF
ncap2 -O -h -s 'experiment_id($i,0:3)="$EXPID";realization($i)=$MEM;institution($i,0:$(( ${#INSTITUTION} -1  )))="$INSTITUTION";source($i,0:$(( ${#SOURCE} -1  )))="$SOURCE"' \$1 \$1
EOF
		cat modify_ncvalue
		bash modify_ncvalue $output; rm modify_ncvalue
		i=$((i+1))
	done
##
        ncrename -O -h -v time,kaka $output # delete time variable 
        ncks -O -h -x -v kaka $output $output # delete time variable
        save_final_output $varnew $output

 done # loop for variables 
}

function new_name(){  #to be rethought: make a match between var_name and xml table
      case $1 in # rename variable names in model output to the standard names which should be used in post-processed files 
        "T2M")
        varnew=tas
        idx=1
        ;;
        "D2M")
        varnew=d2m
        idx=25
        ;;
        "U10M")
        varnew=uas
        idx=15
        ;;
        "V10M")
        varnew=vas
        idx=16
        ;;
        "PRECIP")
        varnew=prlr
        idx=7
        ;;
        "CP")
        varnew=prc
        idx=26
        ;;
        "SF")
        varnew=prsn
        idx=27
        ;;
        "E")
        varnew=evspsbl
        idx=28
        ;;
        "SSTK")
#        "SST")
#weird...        VAR=SSTK
        varnew=tos
        idx=10
        ;;
        "MSL")
        varnew=psl
        idx=2
        ;;
        "SSR")
        varnew=rss
        idx=5
        ;;
        "SSRU")
        varnew=rsus
        idx=30
        ;;
        "SSRC")
        varnew=rsscs
        idx=29
        ;;
        "TSR")
        varnew=rst
        idx=21
        ;;
        "TSRC")
        varnew=rstcs
        idx=22
        ;;
        "TTR")
        varnew=rlt
        idx=23
        ;;
        "TTRC")
        varnew=rltcs
        idx=24
        ;;
        "STR")
        varnew=rls
        idx=6
        ;;
        "STRD")
        varnew=rlds
        idx=31
        ;;
        "STRU")
        varnew=rlus
        idx=32
        ;;
        "SLHF")
        varnew=hflsd
        idx=9
        ;;
        "SSHF")
        varnew=hfssd
        idx=8
        ;;
        "SSRD")
        varnew=rsds
        idx=20
        ;;
        "T")
        varnew=ta
        idx=11
        ;;
        "U")
        varnew=ua
        idx=12
        ;;
        "V")
        varnew=va
        idx=13
        ;;
        "W")
        varnew=wap
        idx=34
        ;;
        "CLWC")
        varnew=clw
        idx=36
        ;;
        "CIWC")
        varnew=cli 
        idx=37
        ;;
        "Z")
        varnew=g
        idx=14
        ;;
        "Q")
        varnew=hus
        idx=19
        ;;
        "tasmax")
        varnew=tasmax
        idx=17
        ;;
        "tasmin")
        varnew=tasmin
        idx=18
        ;;
        "TCC")
        varnew=clt
        idx=33
        ;;
        "CC")
        varnew=cl
        idx=35
#pab!!!! new variables to be added when variable list read directly in outputs, some are missing
        ;;
        "var78")
        varnew=tclw
        idx=38
        ;;
        "AL")
        varnew=al
        idx=39

#  Gather the members in a single netcdf file 

# $1 : prefix netcdf file name for all the members        
# $2 : output file name                               
# Created in May 2012           Author : vguemas@ic3.cat                            
# Adapted for atmospherical outputs January 2013 pierre-antoine.bretonniere@ic3.cat 

#  OLD_FILE=$(ls ${1}_* || echo "")
#  NEW_FILES=$(ls "${1}".*)
  OLD_FILE="$( find . -maxdepth 1 -name "${1}_*" | sed -e 's/\.\///g' )"
  NEW_FILES="$( find . -maxdepth 1 -name "${1}.*" | sed -e 's/\.\///g' )"
  VAR_LOC=$3
  VAR_OLD=$4

  rm -f tmp_cat.nc tmp_0_${OLD_FILE} tmp_${OLD_FILE} 
if [[ $( echo $NEW_FILES | wc -w ) -gt 1 ]]; then
  ncecat ${NEW_FILES} tmp_cat.nc || exit
  ncrename -h -d record,ensemble tmp_cat.nc
  ncecat -O -h tmp_cat.nc tmp_cat2.nc
  ncpdq -O -h -a ensemble,record tmp_cat2.nc tmp_cat2.nc
  ncwa -O -h -a record tmp_cat2.nc tmp_cat2.nc
  mv tmp_cat2.nc tmp_cat.nc #new
 for var_check in $VAR_LST_3D_GG
  do
  if [ $VAR_OLD == $var_check ];then
   ncks -O -h -a -v lon,lat,$VAR_LOC tmp_cat.nc tmp_cat.nc
#   ncks -O -h -a -v lon,lat,$VAR_LOC tmp_cat2.nc tmp_cat3.nc
#   mv tmp_cat3.nc tmp_cat.nc
#  else
#   mv tmp_cat2.nc tmp_cat.nc
  fi
 done
 if [ ! -z $OLD_FILE ] ; then
  ncks -C -O -v longitude,latitude,$VAR_LOC $OLD_FILE tmp_0_${OLD_FILE}
  ncecat -O -h tmp_0_${OLD_FILE} tmp_${OLD_FILE}
  ncpdq -O -h -a ensemble,record tmp_${OLD_FILE} tmp_${OLD_FILE}
  ncwa -O -h -a record tmp_${OLD_FILE} tmp_${OLD_FILE}
  ncrcat tmp_cat.nc tmp_${OLD_FILE} $2
 else
  mv tmp_cat.nc $2
 fi
}

function save_final_output(){
# save final post-processed output (file in *.nc format)
 varnew=$1
 output=$2
   
 TARDIR=${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly
 [ ! -d $TARDIR ] && mkdir -p $TARDIR
# [[ ! -z $( find ${SAVEDIR}/${EXPID}/monthly_mean/. -type d ) ]] && echo ${SAVEDIR} && find ${SAVEDIR}/${EXPID}/monthly_mean/. -type d | xargs chmod 775 2>/dev/null
 echo llll
# [[ ! -z $( find ${SAVEDIR}/${EXPID}/monthly_mean/. -type f ) ]] && find ${SAVEDIR}/${EXPID}/monthly_mean/. -type f | xargs chmod 664 2>/dev/null

# I have commented the lines below which do not make any sense !!!! Virginie
# Why concatenating the new file  with the already existing file ?
# The only valid reason to concatenate would be if the old file spans a given 
# range of forecast times and the new file spans a subsequent range of forecast
# times but then they would not have the same name !!!

#  if [ -e ${TARDIR}/${output} ] ; then
#   mv ${output} new_${output}
#   ncpdq -O -h -a ensemble,time new_${output} new_${output} # shape the dimensions
#   mv ${TARDIR}/${output} old_${output}
#   ncpdq -O -h -a ensemble,time old_${output} old_${output} # shape the dimensions
#   ncrcat -O -h old_${output} new_${output} ${output}
#   ncpdq -O -h -a time,ensemble ${output} ${output}         # again reshape the dimensions as per requirement of final output
#   rm -f old_${output} new_${output}
#  fi
 chmod 770 ${output}
#for tos, change value on land from 0 to NaN
if [ $varnew == "tos" ]; then
  cp ${MASK_PATH} mask1.nc
  lstdims=`ncdump -h mask1.nc | awk /dimensions:/,/variables:/ | grep -v dimensions: | grep -v variables: | awk '{print $1}'` 
  if [[ ${lstdims/longitude} == ${lstdims} ]] && [[ ${lstdims/latitude} == ${lstdims} ]] ; then
    ncrename -h -v lon,longitude -v lat,latitude mask1.nc
    ncrename -h -d lon,longitude -d lat,latitude mask1.nc
  fi
for i in $( seq 1 4);
do
cp mask1.nc mask1.nc.$i
done
ncrcat mask1.nc.* mask.nc
cdo -gtc,0.1 mask.nc mask2.nc
mv mask2.nc mask.nc

lstdims=`ncdump -h $output | awk /dimensions:/,/variables:/ | grep -v dimensions: | grep -v variables: | awk '{print $1}'`
if [[ ${lstdims/leadtime} != ${lstdims} ]] ; then
  ncrename -v leadtime,time $output
fi
lstdims=`ncdump -h mask.nc | awk /dimensions:/,/variables:/ | grep -v dimensions: | grep -v variables: | awk '{print $1}'`
if [[ ${lstdims/surface} != ${lstdims} ]] ; then
  ncwa -a surface mask.nc
fi
cp mask.nc $output
ncap2 -O -s "tos=tos/(1-LSM)" $output ${output}.tmp
ncks -h -x -v LSM ${output}.tmp ${output}.tmp2
ncdump ${output}.tmp2 > tmp.txt
sed -i s/Infinityf/NaN/g tmp.txt
ncgen -b tmp.txt -o  ${output}.tmp2
#  echo "where( tos = \"Infinityf\" ) tos[time,latitude,longitude]=\"NaN\"; " > mynco.nco
#  ncks -h -A mask.nc $output
#  echo "where( LSM != 0 ) tos[time,latitude,longitude]=\"NaN\"; " > mynco.nco
#  ncap2 -O -S mynco.nco ${output}.tmp2 ${output}.tmp3
#  ncks -h -x -v LSM ${output}.tmp ${output}.tmp2
  mv $output.tmp2 ${TARDIR}/${output}
  rm mask1.nc ${output}.tmp
fi
mv ${output} ${TARDIR}/${output}
#to make a smooth transition between the 2 versions of nccf_atm_monthly, as R functions look for atmospherical monthly means called $var_yyyymmdd.nc (without the members), a link between the 2 naming conventions is created so that R functions still work while they have not been updated.
 ln -sf ${TARDIR}/${output} ${TARDIR}/${varnew}_${SDATE}.nc
}
   
# Select variables and levels   
function combine_3d(){
for MEM in ${MEM_LST[@]}; do
#   FILES=$(ls MMA*SH*.nc.$MEM)
   FILES="$(find . -maxdepth 1 -name  "MMA*SH*.nc.$MEM" | sed -e 's/\.\///g' )"
   echo $FILES
   for f in ${FILES}; do   
    for var in ${VAR_LST_3D_SH[@]}; do
        new_name $var
        TMP_OUT=tmp_${varnew}_$f
        cdo selname,${var} -sellevel,${LEVEL_LST} ${f} ${TMP_OUT}
    done
#    FILES=$(ls MMA*GG*.nc.$MEM)
    FILES=$(find . -maxdepth 1 -name  "MMA*GG*.nc.$MEM" | sed -e 's/\.\///g' )
    for f in ${FILES}; do
   for var in ${VAR_LST_3D_GG[@]}; do
        new_name $var
        TMP_OUT=tmp_${varnew}_$f
     case $var in #pab
       Q)  
        cdo selname,${var} -sellevel,${LEVEL_LST} ${f} ${TMP_OUT} #
       ;;
       *)
        cdo selname,${var} ${f} ${TMP_OUT} #pab
        ;;
      esac
    done
    done

# combine all time step in one file
     for var in ${VAR_LST_3D[@]}; do
	  new_name $var
#      FILES=$(ls tmp_${varnew}_*)
      FILES="$(find . -maxdepth 1 -name "tmp_${varnew}_*" | sed -e 's/\.\///g' )"
      output=${varnew}_$SDATE.$MEM.nc
      cdo -O mergetime ${FILES} ${output} # combine all the time steps in one file
      ncrename -h -v ${var},${varnew} ${output}
    done #loop for variables 
done # loop for members 
}
######end of combine3d ##########


# interpolate from SH to regular grid
function regrid2x2(){
 for var in ${VAR_LST_3D_SH[@]}; do
    new_name $var
   FILES=$( find . -maxdepth 1 -name "${varnew}_$SDATE.*.nc" | sed -e 's/\.\///g' )
   for f in ${FILES}; do   
      cdo -r sp2gp -selname,${varnew} ${f} rg_${f}; rm -f ${f}
   done
 done
 for var in ${VAR_LST_3D_GG[@]}; do
   new_name $var
#   FILES="$(ls ${varnew}_$SDATE.*.nc)"
   FILES=$(find . -maxdepth 1 -name "${varnew}_$SDATE.*.nc" | sed -e 's/\.\///g' )
   for f in ${FILES}; do
 
      cdo selname,${varnew} ${f} rg_${f}; rm -f ${f}
   done
 done
}


function upper(){
   for var in ${VAR_LST_3D[@]}; do
    new_name $var

    echo ${LISTMEMB[0]} -- ${#LISTMEMB[@]} -- ${LISTMEMB[${#LISTMEMB[@]}-1]} 
    LSMBSH="${LISTMEMB[0]}-${LISTMEMB[${#LISTMEMB[@]}-1]}"
#    PREVIOUS_FILE="$(ls ${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly/${varnew}_${SDATE}_*.nc | tail -1)"
#    PREVIOUS_FILE="$( find . -maxdepth 1 -name  "${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly/${varnew}_${SDATE}_*.nc" | tail -1 | sed -e 's/\.\///g' )"
#savedir 
   PREVIOUS_FILE="$( find  ${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly -maxdepth -name  "${varnew}_${SDATE}_*.nc" | tail -1 | sed -e 's/\.\///g' )"
     cd ${SAVEDIR}/${EXPID}/monthly_mean/${varnew}_${NFRP}hourly/; 
     FILE_NAME_PREVIOUS="$( find . -maxdepth 1 -name "${varnew}_${SDATE}_*.nc" | tail -1 | sed -e 's/\.\///g' )"; cd -
     IDX_1ST=$(echo ${varnew}_${SDATE}_ | wc -m )
     FIRST_MEMBER_PREVIOUS=$(echo $FILE_NAME_PREVIOUS | cut -b$IDX_1ST )
     IDX_LST=$( expr $IDX_1ST + 2 )
     LAST_MEMBER_PREVIOUS=$(echo $FILE_NAME_PREVIOUS  | cut -b$IDX_LST )
#security check:
     if [ $LAST_MEMBER_PREVIOUS -le $(expr ${LISTMEMB[0]} - 1 ) ] ; then
        cp $PREVIOUS_FILE rg_$FILE_NAME_PREVIOUS
       if [ $LAST_MEMBER_PREVIOUS -eq $(expr ${LISTMEMB[0]} - 1 ) ] ; then
        LSMBSH=${FIRST_MEMBER_PREVIOUS}-${LISTMEMB[${#LISTMEMB[@]}-1]}
       else
        echo "Actual list of members does not follow directly the ones already post-processed! Check you did not forget any members at the beginning of your list"
        MISSING_FIRST=$(expr ${LAST_MEMBER_PREVIOUS} + 1 )
        MISSING_LAST=$(expr ${LISTMEMB[0]} - 1 )
        LSMBSH=${FIRST_MEMBER_PREVIOUS}_${LISTMEMB[${#LISTMEMB[@]}-1]}-${MISSING_FIRST}_${MISSING_LAST}
       fi
     else
      echo "Some members are going to be treated twice! Revise the consistency between your member list and the previously processed files"
     fi
    fi
    YRSO=${YEAR0}$(printf "%02d" $(( 10#$MON0)))_${YEARF}$(printf "%02d" $((10#$MONF))).nc
    [[ 10#${MONF} -le 9 ]] && [[ $( echo $MONF | wc -c) -eq 2 ]] && YRSO=${YEAR0}${MON0}_${YEARF}$(printf "%02d" ${MONF}).nc
    gather_memb rg_${varnew}_$SDATE ${varnew}_${SDATE}_fc${LSMBSH}_${YRSO}.nc $varnew $VAR
    output=${varnew}_${SDATE}_fc${LSMBSH}_${YRSO}.nc
#    gather_memb rg_${varnew}_$SDATE ${varnew}_${SDATE}_${LSMBSH}.nc $varnew $VAR
#    output=${varnew}_${SDATE}_${LSMBSH}.nc
   for CHECK_VAR in $VAR_LST_3D_SH
    do
    if [ $CHECK_VAR == $var ];then
    	ncrename -h -d lon,longitude -d lat,latitude -d lev,level ${output}
    	ncrename -h -v lon,longitude -v lat,latitude -v lev,level ${output}
    fi
   done
   for CHECK_VAR in $VAR_LST_3D_GG
    do
    if [ $CHECK_VAR == $var ];then
	  ncrename -h -d lon,longitude -d lat,latitude ${output}
      [[ ! -z $( ncdump -h ${output} | grep "mlev =" ) ]] && ncrename -h -d mlev,level ${output}
      [[ ! -z $( ncdump -h ${output} | grep "lev =" ) ]] && ncrename -h -d lev,level -v lev,level ${output}
      if [[ ! -z $( ncdump -h ${output} | grep "depth =" ) ]]; then
        ncrename -h -d depth,level ${output}
        ncrename -h -v depth,level ${output}
      fi
      ncrename -h -v lon,longitude -v lat,latitude ${output}
    fi
   done
#
# Get the CFU standard attributes to be written in the variable
#
        variables="$(cat ${HEAD_DIR}/table_of_variable | cut -f$idx -d'|' | sed -e 's/ /@/g')"
        cfustandard_name="$(echo $variables | cut -f2 -d' ' | sed -e 's/@/ /g')"  # variable standard name
        cfulong_name="$(echo $variables     | cut -f3 -d' ' | sed -e 's/@/ /g')"  # variable long name
        cfucell_methods="$(echo $variables  | cut -f4 -d' ' | sed -e 's/@/ /g')"  # variable cell methods
        cfuunit="$(echo $variables          | cut -f5 -d' ' | sed -e 's/@/ /g')"  # variable unit
        cfuunit_long="$(echo $variables     | cut -f6 -d' ' | sed -e 's/@/ /g')"  # variable unit long name
        cfulevel_number="$(echo $variables  | cut -f7 -d' ' | sed -e 's/@/ /g')"  # variable level
        cfulevel_type="$(echo $variables    | cut -f8 -d' ' | sed -e 's/@/ /g')"  # variable level type
        cfulevel_units="$(echo $variables   | cut -f9 -d' ' | sed -e 's/@/ /g')"  # variable level unit
#
# modify variable attributes
#
        for att in units valid_range actual_range code table GRID_TYPE truncation; do
          ncatted -O -h -a ${att},${varnew},d,, ${output}
        done

        ncatted -O -h -a _FillValue,${varnew},a,f,1.e+12 ${output}
        ncatted -O -h -a standard_name,${varnew},o,c,"$cfustandard_name" ${output} # variable standard name
        ncatted -O -h -a long_name,${varnew},o,c,"$cfulong_name" ${output}         # variable long name
        ncatted -O -h -a cell_methods,${varnew},o,c,"$cfucell_methods" ${output}   # variable cell methods
        ncatted -O -h -a unit_long,${varnew},o,c,"$cfuunit_long" ${output}         # variable long unit name
        ncatted -O -h -a units,${varnew},o,c,"$cfuunit" ${output}                  # variable units
        lstdims=`ncdump -h ${output} | awk /dimensions:/,/variables:/ | grep -v dimensions: | grep -v variables: | awk '{print $1}'` 
        if [[ ${lstdims/lev_2} != ${lstdims} ]] ; then
          ncrename -d lev_2,level -v lev_2,level $output
        fi
        if [[ ${lstdims/depth_2} != ${lstdims} ]] ; then
          ncrename -d depth_2,level -v depth_2,level $output
        fi
        if [[ ${lstdims/depth_3} != ${lstdims} ]] ; then
          ncrename -d depth_3,level -v depth_3,level $output
        fi
        if [[ ${lstdims/depth_4} != ${lstdims} ]] ; then
          ncrename -d depth_4,level -v depth_4,level $output
        fi
        if [[ ${lstdims/depth} != ${lstdims} ]] ; then
          ncrename -d depth,level -v depth,level $output
        fi
        lstdims=`ncdump -h ${output} | awk /dimensions:/,/variables:/ | grep -v dimensions: | grep -v variables: | awk '{print $1}'` 
        if [[ ${lstdims/level} == ${lstdims} ]] ; then
          ncrename -d lev,level -v lev,level $output
        fi
        ncatted -O -h -a data_type,level,o,c,"$cfulevel_type" ${output}       # variable level type
        ncatted -O -h -a units,level,o,c,"$cfulevel_units" ${output}          # variable level units
        ncatted -O -h -a coordinates,${varnew},o,c,"longitude latitude leadtime reftime time_bnd experiment_id source realization institution level" ${output}  
#
# If the NetCDF file had a horizontal axis name different from longitude
#
        [[ ! -z $( ncdump -h $output | grep record ) ]] && ncrename  -h -d record,ensemble ${output}
#
# modify logitude attributes  
#
        lon_min=0
        lon_max=359.25
        lat_min=-89.4270841760375
        lat_max=89.4270841760375  # These valuse shoud be obtaind from the file instead of hardcoded

        ncatted -O -h -a axis,longitude,o,c,"X" ${output}     # variable longitude axis
        ncatted -O -h -a topology,longitude,c,c,"circular" ${output}     # variable longitude axis
        ncatted -O -h -a modulo,longitude,c,f,"360" ${output}     # variable longitude axis
        ncatted -O -h -a valid_min,longitude,c,f,"$lon_min" ${output}     # variable longitude valid_min
        ncatted -O -h -a valid_max,longitude,c,f,"$lon_max" ${output}     # variable longitude valid_max
# modify latitude attributes 
        ncatted -O -h -a axis,latitude,o,c,"Y" ${output}      # variable latitude axis
        ncatted -O -h -a valid_min,latitude,c,f,"$lat_min" ${output}     # variable latitude valid_min
        ncatted -O -h -a valid_max,latitude,c,f,"$lat_max" ${output}     # variable latitude valin_max
# modify level attributes 
        ncatted -O -h -a standard_name,level,o,c,"air_pressure" ${output} # standard name
        ncatted -O -h -a long_name,level,o,c,"air pressure" ${output}         # long name
        ncatted -O -h -a data_type,level,o,c,"float" ${output}       # data type
        ncatted -O -h -a units,level,o,c,"hPa" ${output}                  #  units
        ncatted -O -h -a axis,level,o,c,"Z" ${output}          # axis
        ncatted -O -h -a positive,level,c,c,"up" ${output}
###
# modify the level values, should be hPa instead of Pa
	    ncap2 -O -h -s "level()=level()/100" ${output} ${output} 

        lstdims=`ncdump -h ${output} | awk /dimensions:/,/variables:/ | grep -v dimensions: | grep -v variables: | awk '{print $1}'` 
        if [[ ${lstdims/ensemble} == ${lstdims} ]] ; then
          ncecat -h ${output} tmp_${output}
          rm -f $output
          mv tmp_${output} ${output} 
          ncrename -h -O -d record,ensemble $output $output
        fi
        ncpdq -O -h -a time,ensemble ${output} ${output} # reshape the dimension and make time unlimited
      #  ncks  -h -A $HEAD_DIR/${SDATE}.nc ${output}
        ncks  -h -A header.nc ${output}
        ncrename -O -h -v time,kaka $output # delete time variable 
        ncks -O -h -x -v kaka $output $output # delete time variable
        ncatted -h -a history,global,d,, $output  #delete history
        ncatted -h -a institution,global,m,c,"IC3" $output ## change institution name in global attributes

# create a script to change the EXPID, insitutution, ensember, source and realiazation 
	i=0 # index
	for mem in ${LISTMEMB[@]}; do
cat>modify_ncvalue<<EOF
ncap2 -O -h -s 'experiment_id($i,0:3)="$EXPID";realization($i)=$mem;institution($i,0:$(( ${#INSTITUTION} -1  )))="$INSTITUTION";source($i,0:$(( ${#SOURCE} -1  )))="$SOURCE"' \$1 \$1
                cat modify_ncvalue
	        bash modify_ncvalue $output; rm modify_ncvalue
		i=$((i+1))
	done
##
#        rm -f ${files}*
        save_final_output $varnew $output