diff --git a/genan.R b/genan.R new file mode 100644 index 0000000000000000000000000000000000000000..20d4234cf938e227554e3072ec135ff9eec2da21 --- /dev/null +++ b/genan.R @@ -0,0 +1,24 @@ + +line='' +for (t in 1){ + xc=c('',paste0('-C-',t)) + xs=c('',paste0('_S-',t)) + xd=c('',paste0('_D-',t)) + xp=c('',paste0('_P-',t)) + xt=c('',paste0('_T-',t)) + xh=c('',paste0('_H-',t)) + for (c in 2){ + for (s in 1:2){ + for (d in 1:2){ + for (p in 1:2){ + for (t in 1:2){ + for (h in 1:2){ + line=paste(line,paste0('an',xc[c],xs[s],xd[d],xp[p],xt[t],xh[h])) + } + } + } + } + } + } +} +print(line) diff --git a/int_opmos.sh b/int_opmos.sh new file mode 100755 index 0000000000000000000000000000000000000000..8455f8526afa2bfbf8421ef4818912b684e0e664 --- /dev/null +++ b/int_opmos.sh @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH + +nmax=1 + +echo -e "\e[00;31m | Read parameters... \e[00m `date`" +ijob=$1 +joblist=$2 +pdir=$3 +indir=$4 +outdir=$5 +whichoutput=$6 +experiment=$7 +criteria=$8 +allstation=$9 +what="${10}" +echo "inputs : $ijob/$joblist/$pdir/$indir/$outdir/$whichoutput/$experiment/$criteria/$allstation/$what" + + +echo -e "\e[00;31m | Launch the jobs for each MOS model... \e[00m `date`" +subwhat=` awk -v job=$ijob '{if(NR==job){print $7}}' $joblist` +nsubwhat=`echo $subwhat | awk -F'|' '{print NF}'` +echo $subwhat $nsubwhat +for i in `seq 1 $nsubwhat` ; do + iwhat=`echo $subwhat | awk -F'|' -v i=$i 'BEGIN{OFS=IFS="\t"}{print $i}'` + echo "launch model $i/$nsubwhat) $iwhat `date`" + $pdir/job_opmos.sh $ijob $joblist $pdir $indir $outdir $whichoutput $experiment $criteria $allstation $iwhat &> $outdir/other/slurm_opmos/subslurm_job${ijob}_${iwhat} +done + + +echo -e "\e[00;31m | Wait the end... \e[00m `date`" +ndone=0 +while [ $ndone -lt $nwhat ] ; do + ndone=` grep END_job_$ijob $outdir/other/slurm_opmos/subslurm_job${ijob}_* | wc -l` + nerr=` grep Err $outdir/other/slurm_opmos/subslurm_job${ijob}_* 2> /dev/null | wc -l` + nkilled=`grep Killed $outdir/other/slurm_opmos/subslurm_job${ijob}_* 2> /dev/null | wc -l` + ncancel=`grep CANCEL $outdir/other/slurm_opmos/subslurm_job${ijob}_* 2> /dev/null | wc -l` + sleep 10 + echo "waiting... ($ndone/$nwhat) (NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel) `date`" +done + + +### save the slurm files +#mv $pdir/slurm* $outdir/other/slurm_post/ +echo "_THEEND_ `date`" #(do not modify because top_opmos.sh is counting the THEEND occurences...) diff --git a/job_opmos.sh b/job_opmos.sh new file mode 100755 index 0000000000000000000000000000000000000000..005f03a54507d8049b9e220db79924d9f58ad51a --- /dev/null +++ b/job_opmos.sh @@ -0,0 +1,77 @@ +#!/bin/bash + + +function convertsec(){ # (function for converting time in seconds, used for chrono routines) + echo $(($1 / 86400))':'`date +%H:%M:%S -d "0000-01-01 $(($1 % 86400)) seconds"` +} +begin=`date +%s` #(start chrono) + +echo "****** Read parameters" +ijob=$1 +joblist=$2 +pdir=$3 +indir=$4 +outdir=$5 +whichoutput=$6 +experiment=$7 +criteria=$8 +allstation=$9 +iwhat=${10} +ipollutant=`awk -v job=$ijob '{if(NR==job){print $2}}' $joblist` +istation=` awk -v job=$ijob '{if(NR==job){print $3}}' $joblist` +d1=` awk -v job=$ijob '{if(NR==job){print $4}}' $joblist` +d2=` awk -v job=$ijob '{if(NR==job){print $5}}' $joblist` +domain=` awk -v job=$ijob '{if(NR==job){print $6}}' $joblist` +echo "inputs : $ijob|$joblist|$pdir|$indir|$outdir|$whichoutput|$experiment|$criteria|$allstation|$iwhat" +echo "inputs joblist : $ipollutant|istation|$d1|$d2|$domain" + + + + + +echo "****** Create suboutput (for output files) and workdir (for temporary files) directory" +suboutdir=$outdir/data_${d1}_${d2}_$istation +workdir=$outdir/tmp/tmp_${ijob}_${d1}_${d2}_${istation}_${iwhat} +if [ ! -d $suboutdir ] ; then mkdir -p $suboutdir ; fi +if [ ! -d $workdir ] ; then mkdir -p $workdir ; fi + + + +echo "****** Launch the R routines" +Rscript routine0_prepare.R $iwhat $indir $workdir $whichoutput $istation $ipollutant $d1 $d2 $experiment $criteria $pdir $domain $allstation + + +echo "****** Check if results are here - pdf" +ls $workdir/*pdf 2> /dev/null +echo "****** Check if results are here - stat.csv" +ls $workdir/*stat.csv 2> /dev/null +echo "****** Check if results are here - data.csv" +ls $workdir/*data.csv 2> /dev/null +echo "****** Check if results are here - Rdata" +ls $workdir/*Rdata 2> /dev/null +echo "****** Check if results are here - importance.csv" +ls $workdir/*importance.csv 2> /dev/null + + +echo "****** Move results" +if [ `echo $whichoutput | grep -w pdf | wc -l` -eq 1 ] ; then mv $workdir/*pdf $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w stat | wc -l` -eq 1 ] ; then mv $workdir/*stat.csv $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w hdata | wc -l` -eq 1 ] ; then mv $workdir/*hdata.csv $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w dxdata | wc -l` -eq 1 ] ; then mv $workdir/*dxdata.csv $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w mxdata | wc -l` -eq 1 ] ; then mv $workdir/*mxdata.csv $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w h24xdata | wc -l` -eq 1 ] ; then mv $workdir/*h24xdata.csv $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w dmaxxdata | wc -l` -eq 1 ] ; then mv $workdir/*dmaxxdata.csv $suboutdir 2> /dev/null ; fi +if [ `echo $whichoutput | grep -w dmda8xdata | wc -l` -eq 1 ] ; then mv $workdir/*dmda8xdata.csv $suboutdir 2> /dev/null ; fi +mv $workdir/*Rdata $suboutdir 2> /dev/null +mv $workdir/*importance.csv $suboutdir 2> /dev/null + +rm -rf $workdir + + +#(Here stops the chrono) +end=`date +%s` +timejob=`convertsec $(($end-$begin))` #(copy the convertsec function in my ~/.bashrc to yours) +echo " END_job_$ijob >>> duration : ${timejob}" + + + diff --git a/job_post.sh b/job_post.sh new file mode 100755 index 0000000000000000000000000000000000000000..7fd3a193b29ebb07072eecbf8c7400f83158c413 --- /dev/null +++ b/job_post.sh @@ -0,0 +1,210 @@ +#!/bin/bash +#SBATCH --time=01:00:00 + +post=$1 +pdir=$2 +outdir=$3 +ipollutant=$4 +iwhat="$5" +d1=$6 +d2=$7 +itype=$8 + +echo "post=$post" +echo "pdir=$pdir" +echo "outdir=$outdir" +echo "ipollutant=$ipollutant" +echo "iwhat=$iwhat" +echo "d1=$d1" +echo "d2=$d2" +echo "itype=$itype" + +case $BSC_MACHINE in + 'power') workdir=$TMPDIR ;; + 'mn4') + case $post in + "aggregate1a") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + "aggregate1b") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + "aggregate2a") workdir=$outdir/tmp/tmppost_${post}_${ipollutant} ;; + "aggregate2b") workdir=$outdir/tmp/tmppost_${post}_${ipollutant} ;; + "xxstat") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + "confusion") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat}_${itype} ;; + "hfopstat") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + "hx1errorbinstat") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + "hx1obsbinstat") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + "hopstat") workdir=$outdir/tmp/tmppost_${post}_${ipollutant}_${iwhat} ;; + esac + + if [ ! -d $workdir ] ; then mkdir -p $workdir ; fi + ;; +esac + +echo "========== post:$post ==========" + + +############################################################################################# +if [ "$post" == "aggregate1a" ] ; then + echo " Aggregate hopdata.Rdata files... " + ls $outdir/data*/*${ipollutant}_${iwhat}_hopdata.Rdata > $workdir/listrdata_${ipollutant}_${iwhat} 2>/dev/null + nlines=`wc -l $workdir/listrdata_${ipollutant}_${iwhat} | awk '{print $1}'` + echo "$workdir/listrdata_${ipollutant}_${iwhat} ($nlines *_hopdata.Rdata files available)" + if [ $nlines -gt 0 ] ; then + outfile=$outdir/post_statistics/hopdata_${ipollutant}_${iwhat} + Rscript $pdir/post.R \ + $post \ + $outdir/data_${d1}_${d2}_ \ + $ipollutant \ + $iwhat \ + $outdir/other/tmp_filestations_all \ + $outfile \ + $workdir/listrdata_${ipollutant}_${iwhat} + fi +fi +if [ "$post" == "aggregate1b" ] ; then + echo " Aggregate hdata.Rdata files... " + ls $outdir/data*/*${ipollutant}_${iwhat}_1_hdata.Rdata > $workdir/listrdata_${ipollutant}_${iwhat} 2>/dev/null + nlines=`wc -l $workdir/listrdata_${ipollutant}_${iwhat} | awk '{print $1}'` + echo "$workdir/listrdata_${ipollutant}_${iwhat}_1 ($nlines *_hdata.Rdata files available)" + if [ $nlines -gt 0 ] ; then + outfile=$outdir/post_statistics/hdata_${ipollutant}_${iwhat}_1 + Rscript $pdir/post.R \ + $post \ + $outdir/data_${d1}_${d2}_ \ + $ipollutant \ + $iwhat \ + $outdir/other/tmp_filestations_all \ + $outfile \ + $workdir/listrdata_${ipollutant}_${iwhat} + fi +fi + + +############################################################################################# +if [ "$post" == "aggregate2a" ] ; then + echo " Aggregate xhopdata.Rdata files... " + ls $outdir/post_statistics/hopdata_${ipollutant}_*.Rdata > $workdir/listrdata_${ipollutant} 2>/dev/null + nlines=`wc -l $workdir/listrdata_${ipollutant} | awk '{print $1}'` + echo "$workdir/listrdata_${ipollutant} ($nlines *_hopdata.Rdata files available)" + if [ $nlines -gt 0 ] ; then + outfile=$outdir/post_statistics/hopdata_${ipollutant} + Rscript $pdir/post.R \ + $post \ + $workdir/listrdata_${ipollutant} + fi +fi +if [ "$post" == "aggregate2b" ] ; then + echo " Aggregate xhdata.Rdata files... " + ls $outdir/post_statistics/hdata_${ipollutant}_*_1.Rdata > $workdir/listrdata_${ipollutant} 2>/dev/null + nlines=`wc -l $workdir/listrdata_${ipollutant} | awk '{print $1}'` + echo "$workdir/listrdata_${ipollutant} ($nlines *_hdata.Rdata files available)" + if [ $nlines -gt 0 ] ; then + outfile=$outdir/post_statistics/hdata_${ipollutant}_1 + Rscript $pdir/post.R \ + $post \ + $workdir/listrdata_${ipollutant} \ + $ipollutant \ + $iwhat \ + $outdir/data_${d1}_${d2}_ + fi +fi + + + +############################################################################################# +if [ "$post" == "xxstat" ] ; then + echo " Compute xxstat... " + ls $outdir/post_statistics/hdata_${ipollutant}_${iwhat}_1.Rdata + + outfile=$outdir/post_statistics/hdata_${ipollutant}_1 + Rscript $pdir/post.R \ + $post \ + $outdir/post_statistics/xhdata_${ipollutant}_${iwhat}_1.Rdata \ + $ipollutant \ + $iwhat \ + $outdir/data_${d1}_${d2}_ + +fi + + + + + + + +############################################################################################# +if [ "$post" == "hopstat" ] ; then + echo " Get hopstat_final... " + outfile=$outdir/post_statistics/hopstat_${ipollutant}_${iwhat}_final + Rscript $pdir/post.R \ + $post \ + $outdir/data_${d1}_${d2}_ \ + $ipollutant \ + $iwhat \ + $outdir/other/tmp_filestations_all \ + $outfile \ + $outdir/post_statistics/hopdata_${ipollutant}_obs.Rdata \ + $outdir/post_statistics/hopdata_${ipollutant}_${iwhat}.Rdata + + echo " Get xhopstat_final ... " + outfile=$outdir/post_statistics/xhopstat_${ipollutant}_${iwhat}_final + Rscript $pdir/post.R \ + $post \ + $outdir/data_${d1}_${d2}_ \ + $ipollutant \ + $iwhat \ + $outdir/other/tmp_filestations_all \ + $outfile \ + $outdir/post_statistics/xhopdata_${ipollutant}_obs.Rdata \ + $outdir/post_statistics/xhopdata_${ipollutant}_${iwhat}.Rdata + +fi + + + +############################################################################################# +if [[ "$post" == "hfopstat" || "$post" == "hx1errorbinstat" || "$post" == "hx1obsbinstat" ]] ; then + ## Aggregate statistics on hfopstat/hx1errorbinstat/hx1obsbinstat (mean,sd,p05-25-50-75-95,min,max,N) + echo " Aggregate hfopstat/hx1errorbinstat/hx1obsbinstat... " + outfile=$outdir/post_statistics/${post}_${ipollutant}_${iwhat} + Rscript $pdir/post.R $post $outdir/data_${d1}_${d2}_ $ipollutant $iwhat $outdir/other/tmp_filestations_all $outfile +fi + + +############################################################################################# +if [ "$post" == "confusion" ] ; then + echo " Aggregate confusion matrices... " + tmpfile=$workdir/tmp_confusion_${ipollutant}_${iwhat}_${itype} + outfile=$outdir/post_statistics/${post}_${ipollutant}_${iwhat}_${itype}1limstat.csv + ini=1 + while read -r istation ; do + file=$outdir/data_${d1}_${d2}_${istation}/${istation}_${ipollutant}_${iwhat}_${itype}1limstat.csv + echo "/////////////////////" $file + if [ -f $file ] ; then + #echo ${istation}_${ipollutant}_${iwhat}_${itype}limstat.csv `awk -F',' '{if(NR==2){print $16}}' $file` + if [ $ini -eq 1 ] ; then + awk -F',' '{if(NR>1){print $26,$27,$16,$17,$18,$19}}' $file > $outfile + ini=0 + else + awk -F',' '{if(NR>1){print $26,$27,$16,$17,$18,$19}}' $file > $tmpfile + paste $outfile $tmpfile | awk '{print $1,$2,($3+$9),($4+$10),($5+$11),($6+$12)}' > ${tmpfile}2 + mv ${tmpfile}2 $outfile + head -2 $outfile + fi + fi + done < $outdir/other/tmp_filestations_all + mv $outfile ${tmpfile}3 + echo "lim1 lim2 H FA M CN" > $outfile + cat ${tmpfile}3 >> $outfile + awk '{print $1","$2","$3","$4","$5","$6}' $outfile > $tmpfile + mv $tmpfile $outfile + rm -f ${tmpfile}* + + head $outfile + +fi + +#if [ "$BSC_MACHINE" == "mn4" ] ; then +# rm -rf $workdir +#fi + +echo " END_job" diff --git a/post.R b/post.R new file mode 100644 index 0000000000000000000000000000000000000000..1c3484af69a8c4302607a7618df02029a0e463fc --- /dev/null +++ b/post.R @@ -0,0 +1,479 @@ +### source('~/scripts/opmos/post_mos.R') + +source('~/R/util.R') + +print('********************************************************************************') +print('********************** post.R *****************************************') +print('********************************************************************************') + +print('********** Read first argument') +if(1==1){ + args = commandArgs(TRUE) + post <- args[1] + #print(args) +} + + + + +############################################################################## +## aggregate1 ## +############################################################################## + +if(post=="aggregate1a" | post=="aggregate1b"){ + + print('********** Read arguments') + outdir <- args[2] + ipollutant <- args[3] + iwhat <- args[4] + stationfile <- args[5] + outfile <- args[6] + listrdatafile <- args[7] + print(paste0(' post = ',post)) + print(paste0(' outdir = ',outdir)) + print(paste0(' ipollutant = ',ipollutant)) + print(paste0(' iwhat = ',iwhat)) + print(paste0(' outdir = ',outdir)) + print(paste0(' stationfile = ',stationfile)) + print(paste0(' outfile = ',outfile)) + print(paste0(' listrdatafile = ',listrdatafile)) + + allstations=read.table(stationfile)[,1] + nallstations=length(allstations) + + if(post=="aggregate1a"){ + print('********** Aggregate hopdata.Rdata files') + rdata <- as.vector(read.table(listrdatafile)[,1]) + nrdata=length(rdata) + for (i in 1:nrdata){ + load(rdata[i]) + print(rdata[i]) + if(i==1){ + hopdata=save_hopdata + }else{ + hopdata=rbind(hopdata,save_hopdata) + } + } + print(dim(hopdata)) + print(object.size(hopdata)) + save_hopdata=hopdata + save(save_hopdata,file=paste0(outfile,'.Rdata')) + } + if(post=="aggregate1b"){ + print('********** Aggregate hdata.Rdata files') + rdata <- as.vector(read.table(listrdatafile)[,1]) + nrdata=length(rdata) + for (i in 1:nrdata){ + load(rdata[i]) + print(rdata[i]) + print(dim(save_hdata)) + + if(i==1){ + hdata=save_hdata + }else{ + hdata=rbind(hdata,save_hdata) + } + } + print(dim(hdata)) + print(object.size(hdata)) + save_hdata=hdata + save(save_hdata,file=paste0(outfile,'.Rdata')) + } +} + + + + + + + +############################################################################## +## aggregate2 ## +############################################################################## +## hopdata_XXX.Rdata => xhopdata_XXX.Rdata (for inter-model comparisons) +## wokfinal : vector identifying the times where all MOS results have data +if(post=="aggregate2a" | post=="aggregate2b"){ + print('********** Read arguments...') + rdatafile <- args[2] + print(paste0(' post = ',post)) + print(paste0(' rdatafile = ',rdatafile)) + + if(post=="aggregate2a"){ + print('********** Aggregate xhopdata.Rdata files') + rdata <- as.vector(read.table(rdatafile)[,1]) + nrdata=length(rdata) + for (i in 1:nrdata){ + load(rdata[i]) + print(rdata[i]) + print(length(which(is.finite(as.matrix(save_hopdata[,1:96]))==TRUE))) + wok=which(is.finite(apply(save_hopdata[,1:96],1,sum))==TRUE) + if(i==1){ + wokfinal=wok + }else{ + wokfinal=intersect(wok,wokfinal) + } + print(length(wokfinal)) + } + for (i in 1:nrdata){ + print(i) + load(rdata[i]) + print(rdata[i]) + print(dim(save_hopdata)) + save_hopdata=save_hopdata[wokfinal,] + print(dim(save_hopdata)) + print(length(which(is.finite(as.matrix(save_hopdata[,1:96]))==TRUE))) + outputfile=paste0(dirname(rdata[i]),'/x',basename(rdata[i])) + save(save_hopdata,file=outputfile) + } + print(paste('wokfinal : ',length(wokfinal))) + } + if(post=="aggregate2b"){ + print('********** Aggregate xhdata.Rdata files') + rdata <- as.vector(read.table(rdatafile)[,1]) + nrdata=length(rdata) + for (i in 1:nrdata){ + load(rdata[i]) + print(rdata[i]) + print(length(which(is.finite(as.matrix(save_hdata[,3:5]))==TRUE))) + wok=which(is.finite(apply(save_hdata[,3:5],1,sum))==TRUE) + if(i==1){ + wokfinal=wok + }else{ + wokfinal=intersect(wok,wokfinal) + } + print(length(wokfinal)) + } + for (i in 1:nrdata){ + print(i) + load(rdata[i]) + print(rdata[i]) + print(dim(save_hdata)) + save_hdata=save_hdata[wokfinal,] + print(dim(save_hdata)) + print(length(which(is.finite(as.matrix(save_hdata[,3:5]))==TRUE))) + outputfile=paste0(dirname(rdata[i]),'/x',basename(rdata[i])) + save(save_hdata,file=outputfile) + } + print(paste('wokfinal : ',length(wokfinal))) + } +} + + + +############################################################################## +## xxstat ## +############################################################################## +## => compute hourly statistics for each model and station, based on comparable dataset +if(post=="xxstat"){ + print('********** Compute comparable statistics for each station') + print('********** Read arguments...') + rdatafile <- args[2] + pollutant <- args[3] + iwhat <- args[4] + rootdir <- args[5] + print(paste0(' post =',post)) + print(paste0(' rdatafile =',rdatafile)) + print(paste0(' pollutant =',pollutant)) + print(paste0(' iwhat =',iwhat)) + print(paste0(' rootdir =',rootdir)) + + + print('********** Compute...') + print(paste('loading:',rdatafile)) + load(rdatafile) + print(dim(save_hdata)) + stations=as.vector(unique(save_hdata$station)) + print(paste('Nstations:',length(stations))) + for (istat in 1:length(stations)){ + wstat=which(save_hdata$station==stations[istat]) + datatext=c(get_statistics(mode='legend',width=8,sep=',')$cleg, + get_statistics(save_hdata$hobs[wstat],save_hdata$hmod[wstat],8,2,sep=',')$cres, + get_statistics(save_hdata$hobs[wstat],save_hdata$hmos[wstat],8,2,sep=',')$cres) + file=paste0(rootdir,stations[istat],'/',stations[istat],'_',pollutant,'_',iwhat,'_hxxstat.csv',sep='') + con <- file(file,open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + print(paste('writing:',file)) + } + print('********** End of xxstat') +} + + + +############################################################################## +## xxstat ## +############################################################################## +if(post=="hopstat" | post=="hfopstat" | post=='hx1errorbinstat' | post=='hx1obsbinstat'){ + + print('********** Read arguments') + outdir <- args[2] + ipollutant <- args[3] + iwhat <- args[4] + stationfile <- args[5] + outfile <- args[6] + print(paste0(' post =',post)) + print(paste0(' outdir =',outdir)) + print(paste0(' ipollutant =',ipollutant)) + print(paste0(' iwhat =',iwhat)) + print(paste0(' outdir =',outdir)) + print(paste0(' stationfile =',stationfile)) + print(paste0(' outfile =',outfile)) + if(post=="hopstat"){ + obs_save_hopdata_rdata <- args[7] + what_save_hopdata_rdata <- args[8] + print(paste0(' obs_save_hopdata_rdata =',obs_save_hopdata_rdata)) + print(paste0(' what_save_hopdata_rdata =',what_save_hopdata_rdata)) + } + + allstations=read.table(stationfile)[,1] + nallstations=length(allstations) + + + if(post=="hopstat"){ + print('********** Compute statistics on hopdata :stat=f(1-96h)') + load(obs_save_hopdata_rdata) ; obs=save_hopdata + load(what_save_hopdata_rdata) ; mod=save_hopdata + print(dim(obs)) + print(dim(mod)) + print(length(which(is.finite(as.matrix(obs[,1:96]))==TRUE))) + print(length(which(is.finite(as.matrix(mod[,1:96]))==TRUE))) + + nlengthforecast=dim(obs)[2]-2 + hopstat=array(NaN,dim=c(n5seasons,nlengthforecast,nmetrics)) + timeseason=get_season(obs$htime) + for (isea in 1:n5seasons){ + if(isea<5){ + wsea=which(timeseason==seasons[isea]) + }else{ + wsea=1:length(timeseason) + } + if(length(wsea)!=0){ + for (ih in 1:nlengthforecast){ + hopstat[isea,ih,]=get_statistics(obs[wsea,ih],mod[wsea,ih])$res + } + } + } + + for (isea in 1:n5seasons){ + if(isea<5){ + wsea=which(timeseason==seasons[isea]) + }else{ + wsea=1:length(timeseason) + } + if(length(wsea)!=0){ + datatext=paste('hour',get_statistics(mode='legend',width=8,sep=',')$cleg,sep=',') + for (ih in 1:nlengthforecast){ + datatext=c(datatext,paste(ih,get_statistics(obs[wsea,ih],mod[wsea,ih],7,2,sep=',')$cres,sep=',')) + } + con <- file(paste0(outfile,'_',seasons5[isea],'.csv'),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + } + } + + if(post=="hfopstat"){ + print('********** Get hfopstat') + ini=1 + for (istat in 1:nallstations){ + file=paste0(outdir,allstations[istat],'/',allstations[istat],'_',ipollutant,'_',iwhat,'_hfopstat.csv') + if(file.exists(file)){ + print(paste('OK:',file)) + tab=read.table(file,sep=',',skip=1) + if(ini==1){ + dim1=dim(tab)[1] + hfopstat_all <- array(NaN,dim=c(nallstations,dim1,nmetrics)) + ini=0 + } + for (im in 1:nmetrics){ + hfopstat_all[istat,,im] <- tab[,2+im] + } + }else{ + print(paste('NO:',file)) + } + } + + print('********** Aggregate statistics hfopstat') + hfopstat <- array(NaN,dim=c(dim1,nmetrics,10)) + for (i in 1:dim1){ + for (im in 1:nmetrics){ + w=which(is.finite(hfopstat_all[,i,im])==TRUE) + if(length(w)>0){ + hfopstat[i,im,1] <- meanok(hfopstat_all[w,i,im]) + if(length(w)>1){hfopstat[i,im,2] <- sdok(hfopstat_all[w,i,im])} + hfopstat[i,im,3] <- quantileok(hfopstat_all[w,i,im],0.05) + hfopstat[i,im,4] <- quantileok(hfopstat_all[w,i,im],0.25) + hfopstat[i,im,5] <- quantileok(hfopstat_all[w,i,im],0.50) + hfopstat[i,im,6] <- quantileok(hfopstat_all[w,i,im],0.75) + hfopstat[i,im,7] <- quantileok(hfopstat_all[w,i,im],0.95) + hfopstat[i,im,8] <- minok(hfopstat_all[w,i,im]) + hfopstat[i,im,9] <- maxok(hfopstat_all[w,i,im]) + } + hfopstat[i,im,10] <- length(w) + } + } + + + print('********** Write hfopstat results') + statistics=c('MEAN','SD','P05','P25','P50','P75','P95','MIN','MAX','N') + column1=c(rep(1,dim1/2),rep(2,dim1/2)) + column2=c(1:(dim1/2),1:(dim1/2)) + for (is in 1:10){ + datatext=paste('mod1mos2,hour',get_statistics(mode='legend',width=8,sep=',')$cleg,sep=',') + for (i in 1:dim1){ + linetext=paste(column1[i],column2[i],sep=',') + for (im in 1:nmetrics){ + linetext=paste( + linetext, + formatC(hfopstat[i,im,is],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + filename=paste0(outfile,'_',statistics[is],'.csv') + print(filename) + con <- file(filename,open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + } + + + if(post=='hx1errorbinstat'){ + print('********** Get hx1errorbinstat') + ini=1 + for (istat in 1:nallstations){ + file=paste0(outdir,allstations[istat],'/',allstations[istat],'_',ipollutant,'_',iwhat,'_hx1errorbinstat.csv') + if(file.exists(file)){ + print(paste('OK',file)) + tab=read.table(file,sep=',',skip=1) + if(ini==1){ + dim1=dim(tab)[1] + hx1errorbinstat_all <- array(NaN,dim=c(nallstations,dim1,nmetrics)) + lim1=tab[,1] + lim2=tab[,2] + ini=0 + } + for (im in 1:nmetrics){ + hx1errorbinstat_all[istat,,im] <- tab[,2+im] + } + }else{ + print(paste('NO',file)) + } + } + + + print('********** Aggregate statistics hx1errorbinstat') + hx1errorbinstat <- array(NaN,dim=c(dim1,nmetrics,10)) + for (i in 1:dim1){ + for (im in 1:nmetrics){ + w=which(is.finite(hx1errorbinstat_all[,i,im])==TRUE) + if(length(w)>0){ + hx1errorbinstat[i,im,1] <- meanok(hx1errorbinstat_all[w,i,im]) + if(length(w)>1){hx1errorbinstat[i,im,2] <- sdok(hx1errorbinstat_all[w,i,im])} + hx1errorbinstat[i,im,3] <- quantileok(hx1errorbinstat_all[w,i,im],0.05) + hx1errorbinstat[i,im,4] <- quantileok(hx1errorbinstat_all[w,i,im],0.25) + hx1errorbinstat[i,im,5] <- quantileok(hx1errorbinstat_all[w,i,im],0.50) + hx1errorbinstat[i,im,6] <- quantileok(hx1errorbinstat_all[w,i,im],0.75) + hx1errorbinstat[i,im,7] <- quantileok(hx1errorbinstat_all[w,i,im],0.95) + hx1errorbinstat[i,im,8] <- minok(hx1errorbinstat_all[w,i,im]) + hx1errorbinstat[i,im,9] <- maxok(hx1errorbinstat_all[w,i,im]) + } + hx1errorbinstat[i,im,10] <- length(w) + } + } + + + print('********** Write hx1errorbinstat results') + statistics=c('MEAN','SD','P05','P25','P50','P75','P95','MIN','MAX','N') + column1=c(rep(1,dim1/2),rep(2,dim1/2)) + column2=c(1:(dim1/2),1:(dim1/2)) + for (is in 1:10){ + datatext=paste('lim1,lim2',get_statistics(mode='legend',width=8,sep=',')$cleg,sep=',') + for (i in 1:dim1){ + linetext=paste(lim1[i],lim2[i],sep=',') + for (im in 1:nmetrics){ + linetext=paste( + linetext, + formatC(hx1errorbinstat[i,im,is],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + filename=paste0(outfile,'_',statistics[is],'.csv') + print(filename) + con <- file(filename,open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + } + + + + if(post=='hx1obsbinstat'){ + print('********** Get hx1obsbinstat') + ini=1 + for (istat in 1:nallstations){ + file=paste0(outdir,allstations[istat],'/',allstations[istat],'_',ipollutant,'_',iwhat,'_hx1obsbinstat.csv') + if(file.exists(file)){ + print(paste('OK',file)) + tab=read.table(file,sep=',',skip=1) + if(ini==1){ + dim1=dim(tab)[1] + hx1obsbinstat_all <- array(NaN,dim=c(nallstations,dim1,nmetrics)) + lim1=tab[,1] + lim2=tab[,2] + ini=0 + } + for (im in 1:nmetrics){ + hx1obsbinstat_all[istat,,im] <- tab[,2+im] + } + }else{ + print(paste('NO',file)) + } + } + + + print('********** Aggregate statistics hx1obsbinstat') + hx1obsbinstat <- array(NaN,dim=c(dim1,nmetrics,10)) + for (i in 1:dim1){ + for (im in 1:nmetrics){ + w=which(is.finite(hx1obsbinstat_all[,i,im])==TRUE) + if(length(w)>0){ + hx1obsbinstat[i,im,1] <- meanok(hx1obsbinstat_all[w,i,im]) + if(length(w)>1){hx1obsbinstat[i,im,2] <- sdok(hx1obsbinstat_all[w,i,im])} + hx1obsbinstat[i,im,3] <- quantileok(hx1obsbinstat_all[w,i,im],0.05) + hx1obsbinstat[i,im,4] <- quantileok(hx1obsbinstat_all[w,i,im],0.25) + hx1obsbinstat[i,im,5] <- quantileok(hx1obsbinstat_all[w,i,im],0.50) + hx1obsbinstat[i,im,6] <- quantileok(hx1obsbinstat_all[w,i,im],0.75) + hx1obsbinstat[i,im,7] <- quantileok(hx1obsbinstat_all[w,i,im],0.95) + hx1obsbinstat[i,im,8] <- minok(hx1obsbinstat_all[w,i,im]) + hx1obsbinstat[i,im,9] <- maxok(hx1obsbinstat_all[w,i,im]) + } + hx1obsbinstat[i,im,10] <- length(w) + } + } + + + print('********** Write hx1obsbinstat results') + statistics=c('MEAN','SD','P05','P25','P50','P75','P95','MIN','MAX','N') + column1=c(rep(1,dim1/2),rep(2,dim1/2)) + column2=c(1:(dim1/2),1:(dim1/2)) + for (is in 1:10){ + datatext=paste('lim1,lim2',get_statistics(mode='legend',width=8,sep=',')$cleg,sep=',') + for (i in 1:dim1){ + linetext=paste(lim1[i],lim2[i],sep=',') + for (im in 1:nmetrics){ + linetext=paste( + linetext, + formatC(hx1obsbinstat[i,im,is],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + filename=paste0(outfile,'_',statistics[is],'.csv') + print(filename) + con <- file(filename,open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + } +} + diff --git a/routine0_prepare.R b/routine0_prepare.R new file mode 100644 index 0000000000000000000000000000000000000000..f5736ef4f65e90e447f432d83b181771fd2a5f30 --- /dev/null +++ b/routine0_prepare.R @@ -0,0 +1,192 @@ +### source('~/scripts/opmos/routine0_prepare.R') +### for (k in 1:20){dev.off()} +### source(paste0(pdir,'/routine1_hdata.R')) + +rm(list=ls()) +source('~/R/util.R') +library(plyr) +library(SDMTools) +library(caret) + + +multistattraining=0 + +########################################################################### +print('********** Read arguments') +if(1==1){ + args = commandArgs(TRUE) + what <- args[1] + indir <- args[2] + workdir <- args[3] + whichoutput <- args[4] + station <- args[5] + pollutant <- args[6] + d1 <- args[7] + d2 <- args[8] + experiment <- args[9] + criteria <- as.numeric(args[10]) + pdir <- args[11] + domain <- args[12] + allstation <- args[13] + outputfile=paste0(station,'_',pollutant,'_',what) +}else{ + + what ="obs" + indir ="/gpfs/scratch/bsc32/bsc32883/data/extractions_et" + workdir ="/gpfs/scratch/pr1ehu00/pr1ehu18/data/opmos//debug//opmos_b007_MAD-RUR_2015010100_2015093023_vdebug/tmp/tmp_1_2015010100_2015093023_ES1802A_obs" + whichoutput ="pdf-stat-hdata-dxdata-mxdata-h24xdata-dmaxxdata-dmda8xdata" + station ="ES1802A" + pollutant ="PM10" + d1 ="2015010100" + d2 ="2015093023" + experiment ="b007" + criteria =75 + pdir ="/home/pr1ehu00/pr1ehu18/scripts/opmos" + domain ="IP4" + allstation ="MAD-RUR" + outputfile ="ES1802A_PM10_obs" + + + + id=toString(system("date '+%Y%m%d%H%M%S'")) + outputfile =paste0(station,"_",pollutant,"_",what,"_",id) + outputfile =paste0(station,"_",pollutant,"_",what,"_0.95") + outputfile =paste0(station,"_",pollutant,"_",what) + if(multistattraining==1){ + outputfile =paste0(station,"_",pollutant,"_",what,"_multistattraining") + } + + + workdir ="/gpfs/scratch/pr1ehu00/pr1ehu18/data/opmos/debug/interactive/" +} + + +print('********** Print arguments') +cat(paste0(' what ="',what,'"\n')) +cat(paste0(' indir ="',indir,'"\n')) +cat(paste0(' workdir ="',workdir,'"\n')) +cat(paste0(' whichoutput ="',whichoutput,'"\n')) +cat(paste0(' station ="',station,'"\n')) +cat(paste0(' pollutant ="',pollutant,'"\n')) +cat(paste0(' d1 ="',d1,'"\n')) +cat(paste0(' d2 ="',d2,'"\n')) +cat(paste0(' experiment ="',experiment,'"\n')) +cat(paste0(' criteria =',criteria,'\n')) +cat(paste0(' pdir ="',pdir,'"\n')) +cat(paste0(' domain ="',domain,'"\n')) +cat(paste0(' allstation ="',allstation,'"\n')) +cat(paste0(' outputfile ="',outputfile,'"\n')) + + +print('********** Preliminaries') +if(pollutant=='O3' ){limit=limit_o3} +if(pollutant=='NO2' ){limit=limit_no2} +if(pollutant=='PM10'){limit=limit_pm10} +nlimit=length(limit)-1 +if(what=='obs' ){cold='black'} ; colo='black' +if(what=='mod' ){cold='darkred'} ; colm='darkred' +if(what=='kf0' ){cold='orange'} +if(substr(what,1,2)=='ma' ){cold='darkorchid3'} +if(substr(what,1,3)=='kfs' ){cold='dodgerblue1'} +if(substr(what,1,3)=='kfd' ){cold='dodgerblue4'} +if(substr(what,1,2)=='an' ){cold='darkgreen'} +if(substr(what,1,4)=='kfsan'){cold='blue'} +if(substr(what,1,2)=='ml' ){cold='green'} + +pcho=17 ; pchm=16 ; pchd=16 +if(what=='mod'){ + colomd=c(colo,colm) ; legomd=c('obs','mod') ; pchomd=c(pcho,pchm) + colmd=colm ; legmd='mod' ; pchmd=pchm +}else{ + colomd=c(colo,colm,cold) ; legomd=c('obs','mod',what) ; pchomd=c(pcho,pchm,pchd) + colmd=c(colm,cold) ; legmd=c('mod',what) ; pchmd=c(pchm,pchd) +} + + + +print('********** Define time and create dataframe') +date1=ISOdate(substr(d1,1,4),substr(d1,5,6),substr(d1,7,8),substr(d1,9,10)) +date2=ISOdate(substr(d2,1,4),substr(d2,5,6),substr(d2,7,8),substr(d2,9,10)) +htime <- seq(date1,date2,"hours") ; nhtime <- length(htime) +dtime <- seq(date1,date2,"days") ; ndtime <- length(dtime) +mtime <- seq(date1,date2,"months") ; nmtime <- length(mtime) + +#htime=strptime(htime,format='%Y-%m-%d %H:%M:%S',tz='UTC') + + + + + + +print('********** Get hourly obs/mod data') +res <- get_hdata(pollutant,station,htime,c('obs','mod'),domain,experiment,indir,allstation) +hobs <- res[1,] +hmod <- res[2,] +if(experiment=='b007'){ + w=which(hmod==0) + if(length(w)!=0){hmod[w]=hmod[w+1]} ## temporary !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +} +print(summary(hobs)) +print(summary(hmod)) + + +print('********** Remove observations > 1000 ug/m3') +w=which(hobs > 1000) +print(paste('Numlber of observations > 1000 ug/m3 :',length(w))) +if(length(w)!=0){ + hobs[w]=NaN + print(htime[w]) +} + + +print('********** Check the number of available obs/mod data') +n_finiteobs <- length(which(is.finite(hobs))) +n_finitemod <- length(which(is.finite(hmod))) +n_nonzeroobs <- length(which(hobs!=0)) +n_nonzeromod <- length(which(hmod!=0)) +print(paste('n_finitemod =',n_finitemod ,100*n_finitemod /nhtime,'%')) +print(paste('n_finiteobs =',n_finiteobs ,100*n_finiteobs /nhtime,'%')) +print(paste('n_nonzeromod=',n_nonzeromod,100*n_nonzeromod/nhtime,'%')) +print(paste('n_nonzeroobs=',n_nonzeroobs,100*n_nonzeroobs/nhtime,'%')) + + + + + + + +if(100*n_finitemod/nhtime >= criteria & 100*n_nonzeromod/nhtime >= criteria & + 100*n_finiteobs/nhtime >= criteria & 100*n_nonzeroobs/nhtime >= criteria){ + + + print('********** Quality check') + lim=1e5 + if(maxok(hobs)>lim){ print(paste('BUG(hobs)=',maxok(hobs))) ; hobs[which(hobs>lim)]=NaN } + if(maxok(hmod)>lim){ print(paste('BUG(hmod)=',maxok(hmod))) ; hmod[which(hmod>lim)]=NaN } + + + flag_negativeallowed=0 ; legneg='NoNegative' + + + + source(paste0(pdir,'/routine1_hdata.R')) + + if(is.finite(maxok(hdata))==TRUE){ + + if(what=='obs' | what=='mod'){wversion=1}else{wversion=c(1,nversions)} + wversion=1 + source(paste0(pdir,'/routine2_timeseriesdata.R')) + source(paste0(pdir,'/routine3_write.R')) + if(what!='obs'){ for (k in wversion){source(paste0(pdir,'/routine4_plot.R'))} } + } + + if(0==1){ + hkfs=read.table(paste0('/esarchive/scratch/hpetetin/data/opmos/debug/opmos_b007_RUR_2015010100_2015093023_v20190107182811/data_2015010100_2015093023_',station,'/',station,'_',pollutant,'_kfsRMSE_hdata.csv'),skip=1,sep=',')[,4] + w=which(is.finite(hdata_all[1,1,,1])==TRUE & is.finite(hmod)==TRUE) + w=which(is.finite(hdata_all[1,1,,1])==TRUE & is.finite(hmod)==TRUE & is.finite(hkfs)==TRUE & is.finite(hobs)==TRUE) + print(get_statistics(hobs[w],hmod[w])$cleg) + print(paste(get_statistics(hobs[w],hmod[w])$cres,'(mod)')) + print(paste(get_statistics(hobs[w],hkfs[w])$cres,'(kfs)')) + print(paste(get_statistics(hobs[w],hdata_all[1,1,w,1])$cres ,'(ml)')) + } +} diff --git a/routine1_hdata.R b/routine1_hdata.R new file mode 100644 index 0000000000000000000000000000000000000000..25ab84a12cf6b870b0912f11ae83bedfaf98cdf5 --- /dev/null +++ b/routine1_hdata.R @@ -0,0 +1,1322 @@ +## OUTPUTS : hopdata,hdata +print('********** Define forecasting parameters') +timestep=24 #(hours) => at what time step the forecast are launched? +nlate=0 #(hours) => what is the delay for available observations when the forecast is launched? +nlengthforecast=24*4 #(hours) => how long last the forecast? +nforecast=nhtime/timestep # => how many forecast are runned over the entire period? +nversions=nlengthforecast/timestep # => how many different forecast (of different accuracy) are available? +plotdetail=0 + +print('********** Get hourly data') + +########################################################################### +if(what=='obs'){ + + print('********** Declare variables') + variables='obs' + legvariables='Concentration' + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Get hdata ') + for (k in 1:nversions){ + hdata[1,,k] <- hobs + } + + print('********** Get hopdata ') + hopdata <- array(NaN,dim=c(nforecast,nlengthforecast,1)) + for (i in 1:nforecast){ + itime=(i-1)*timestep + for (ih in 1:nlengthforecast){ + hopdata[i,ih,1] <- hobs[itime+ih] + } + } +} + + +########################################################################### +if(what=='mod'){ + + print('********** Declare variables') + if(experiment=='b007'){ + variables=c('mod','ws','wd','pres','ta','pblh') + legvariables=c('Concentration','WindSpeed','WindDirection','Pressure','Temperature','PBLH') + }else{ + variables='mod' + legvariables='Concentration' + } + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Get hdata') + if(experiment=='b007'){ + hws <- get_hdata('WS' ,station,htime,'mod',domain,experiment,indir,allstation) + hwd <- get_hdata('WD' ,station,htime,'mod',domain,experiment,indir,allstation) + hpres <- get_hdata('PRES',station,htime,'mod',domain,experiment,indir,allstation) + hta <- get_hdata('TA' ,station,htime,'mod',domain,experiment,indir,allstation) + hpblh <- get_hdata('PBLH',station,htime,'mod',domain,experiment,indir,allstation) + } + for (k in 1:nversions){ + hdata[1,,k] <- hmod + if(experiment=='b007'){ + hdata[2,,k] <- hws + hdata[3,,k] <- hwd + hdata[4,,k] <- hpres + hdata[5,,k] <- hta + hdata[6,,k] <- hpblh + } + } + + print('********** Get hopdata ') + hopdata <- array(NaN,dim=c(nforecast,nlengthforecast,1)) + for (i in 1:nforecast){ + itime=(i-1)*timestep + for (ih in 1:nlengthforecast){ + hopdata[i,ih,1] <- hmod[itime+ih] + } + } +} + + +########################################################################### +if(what=='kf0'){ + + print('********** Declare variables') + variables=c('mod','corr','error') + legvariables=c('Concentration','Correction','Error') + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Get hdata') + for (k in 1:nversions){ + hdata[1,,k] <- get_hdata(pollutant,station,htime,'kf0',domain,experiment,indir)[1,] + hdata[2,,k] <- hdata[1,]-hmod + hdata[3,,k] <- hdata[1,]-hobs + } +} + + + +########################################################################### +if(substr(what,1,2)=='ma'){ + + print('********** Declare variables') + variables=paste0(what,c('','corr','error')) + legvariables=c('Concentration','Correction','Error') + nvariables=length(variables) + nwindow=as.numeric(substr(what,3,10)) #(days) + + print('********** Get hopdata') + hopdata <- array(NaN,dim=c(nforecast,nlengthforecast,nvariables+2)) + for (i in 1:nforecast){ + itime=(i-1)*timestep + if((itime+nlengthforecast)<=nhtime){ + hopdata[i,,nvariables+1]=hobs[itime+1:nlengthforecast] + hopdata[i,,nvariables+2]=hmod[itime+1:nlengthforecast] + + wprevious=itime-(1:nwindow)*24 + wok=which(wprevious>0 & (itime-wprevious)>nlate) + if(length(wok)==nwindow){ + for (ih in 1:nlengthforecast){ + w=wprevious[wok]+hour0to24(ih%%timestep) + hcorr=meanok(hobs[w]-hmod[w]) + if(flag_negativeallowed==0){ hopdata[i,ih,1]=max(c(0,hmod[itime+ih]+hcorr)) } + if(flag_negativeallowed==1){ hopdata[i,ih,1]= hmod[itime+ih]+hcorr } + hopdata[i,ih,2]=hcorr + hopdata[i,ih,3]=hopdata[i,ih,1]-hobs[itime+ih] + } + } + } + } + + print('********** Homogeneize datasets') + w=which(is.na(hopdata[,,1])==TRUE | is.na(hopdata[,,nvariables+1])==TRUE | is.na(hopdata[,,nvariables+2])==TRUE) + if(length(w)!=0){ + hopdata[,,1][w]=NaN + hopdata[,,2][w]=NaN + hopdata[,,3][w]=NaN + hopdata[,,nvariables+1][w]=NaN + hopdata[,,nvariables+2][w]=NaN + } + + print('********** Get hdata') + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + for (ivar in 1:nvariables){ + for (i in 1:dim(hopdata)[1]){ + for (k in 1:nversions){ + if(i-k+1 > 0){ + hdata[ivar,(i-1)*timestep+1:timestep,k]=hopdata[i-k+1,(k-1)*timestep+1:timestep,ivar] + } + } + } + } +} + + +########################################################################### +if(substr(what,1,3)=='kfs' & substr(what,1,5)!='kfsan'){ + + print('********** Declare variables') + variables=paste0(what,c('','corr','error','uncert','gain','wt','vt','woverv')) + legvariables=c('Concentration','Correction','Error','Uncertainty','Gain','W','V','W/V ratio') + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Specify parameters') + configs=c(seq(0.0000001,0.000002,0.0000001), + seq(0.000001,0.00002,0.000001), + seq(0.00001,0.0002,0.00001), + seq(0.0001,0.002,0.0001), + seq(0.003,0.02,0.001), + seq(0.03,0.2,0.01), + seq(0.3,2,0.1), + seq(3,20,1), + seq(30,100,10), + seq(300,1000,100)) + v_t=1 + x_0_0= 0 + p_0_0= 5 + nconfigs=length(configs) + + print('********** Loop on configs - Get hopdata') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime)) + hopdata_all <- array(NaN,dim=c(nconfigs,nforecast,nlengthforecast,nvariables+2)) + for (iconfig in 1:nconfigs){ + print(paste(iconfig,configs[iconfig])) + + ## initialize + x_tm1_tm1 <- array(x_0_0,dim=timestep) #(x_0_0) + p_tm1_tm1 <- array(p_0_0,dim=timestep) #(p_0_0) + x_t_t <- array(NaN,dim=timestep) + p_t_t <- array(NaN,dim=timestep) + k_t <- array(NaN,dim=timestep) + + ## loop on time series + w_over_v=configs[iconfig] + w_t=v_t*w_over_v + for (i in 1:nforecast){ + itime=(i-1)*timestep + if(min(itime+1-timestep:1) >= 1 & max(itime+1-timestep:1) <= nhtime & (itime+nlengthforecast)<=nhtime){ + hopdata_all[iconfig,i,,nvariables+1]=hobs[itime+1:nlengthforecast] + hopdata_all[iconfig,i,,nvariables+2]=hmod[itime+1:nlengthforecast] + + corr=array(NaN,dim=timestep) + uncert=array(NaN,dim=timestep) + gain=array(NaN,dim=timestep) + + y_t=hmod[itime+1-timestep:1]-hobs[itime+1-timestep:1] + for (ih in 1:timestep){ + if(is.na(y_t[ih])==FALSE){ + k_t[ih] <- (p_tm1_tm1[ih] + w_t)/(p_tm1_tm1[ih] + w_t + v_t) + x_t_t[ih] <- x_tm1_tm1[ih] + k_t[ih]*(y_t[ih] - x_tm1_tm1[ih]) + p_t_t[ih] <- (p_tm1_tm1[ih] + w_t)*(1 - k_t[ih]) + }else{ + k_t[ih] <- 0 + x_t_t[ih] <- x_tm1_tm1[ih] + p_t_t[ih] <- p_tm1_tm1[ih] + w_t + } + corr[ih] <- -x_t_t[ih] + uncert[ih] <- p_t_t[ih] + gain[ih] <- k_t[ih] + } + + for (ih in 1:nlengthforecast){ + iih=hour0ton(ih%%timestep,timestep) + if(flag_negativeallowed==0){hopdata_all[iconfig,i,ih,1]=max(c(0,hmod[itime+ih]+corr[iih]))} + if(flag_negativeallowed==1){hopdata_all[iconfig,i,ih,1]=hmod[itime+ih]+corr[iih]} + hopdata_all[iconfig,i,ih,2] <- corr[iih] + hopdata_all[iconfig,i,ih,3] <- hopdata_all[iconfig,i,ih,1]-hopdata_all[iconfig,i,ih,nvariables+1] + hopdata_all[iconfig,i,ih,4] <- uncert[iih] + hopdata_all[iconfig,i,ih,5] <- gain[iih] + hopdata_all[iconfig,i,ih,6] <- w_t + hopdata_all[iconfig,i,ih,7] <- v_t + hopdata_all[iconfig,i,ih,8] <- w_t/v_t + } + x_tm1_tm1 <- x_t_t + p_tm1_tm1 <- p_t_t + } + } + } + + print('********** Homogeneize datasets') + w=which(is.na(hopdata_all[,,,1])==TRUE | is.na(hopdata_all[,,,2])==TRUE | is.na(hopdata_all[,,,3])==TRUE) + if(length(w)!=0){hopdata_all[w]=NaN} + + print('********** Get hdata') + print(gc()) + print(c(nconfigs,nvariables,nhtime,nversions)) + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime,nversions)) + for (iconfig in 1:nconfigs){ + for (ivar in 1:nvariables){ + for (i in 1:nforecast){ + for (k in 1:nversions){ + if(i-k+1 > 0){ + hdata_all[iconfig,ivar,(i-1)*timestep+1:timestep,k]=hopdata_all[iconfig,i-k+1,(k-1)*timestep+1:timestep,ivar] + } + } + } + } + } +} + + + + + + + +########################################################################### +if(substr(what,1,3)=='kfd'){ + + print('********** Declare variables') + variables=paste0(what,c('','corr','error','uncert','gain','wt','vt','woverv')) + legvariables=c('Concentration','Correction','Error','Uncertainty','Gain','W','V','W/V ratio') + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Specify parameters') + configs=5:90 #(ndynwindow) + timestep=24 + x_0_0= 0 + p_0_0= 5 + nconfigs=length(configs) + + print('********** Loop on configs') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime)) + hopdata_all <- array(NaN,dim=c(nconfigs,nforecast,nlengthforecast,nvariables+2)) + for (iconfig in 1:nconfigs){ + print(paste(iconfig,configs[iconfig])) + ndynwindow=configs[iconfig] + + ## initialize + x_tm1_tm1 <- array(x_0_0,timestep) #(x_0_0) + p_tm1_tm1 <- array(p_0_0,timestep) #(p_0_0) + x_t_t <- array(NaN,dim=timestep) + p_t_t <- array(NaN,dim=timestep) + k_t <- array(NaN,dim=timestep) + last_w_t <- array(NaN,dim=c(timestep,ndynwindow)) #(last values of x_t-x_{t-1}) + last_v_t <- array(NaN,dim=c(timestep,ndynwindow)) #(last values of y_t-x_t) + + ## loop on time series + for (i in 1:nforecast){ + + itime=(i-1)*timestep + if(min(itime+1-timestep:1) >= 1 & max(itime+1-timestep:1) <= nhtime & (itime+nlengthforecast)<=nhtime){ + hopdata_all[iconfig,i,,nvariables+1]=hobs[itime+1:nlengthforecast] + hopdata_all[iconfig,i,,nvariables+2]=hmod[itime+1:nlengthforecast] + + corr=array(NaN,dim=timestep) + uncert=array(NaN,dim=timestep) + gain=array(NaN,dim=timestep) + noisev=array(NaN,dim=timestep) + noisew=array(NaN,dim=timestep) + + y_t=hmod[itime+1-timestep:1]-hobs[itime+1-timestep:1] + for (itimestep in 1:timestep){ + + if(is.na(sdok(last_v_t[itimestep,]))==FALSE & is.na(sdok(last_w_t[itimestep,]))==FALSE){ + v_t = 1/(ndynwindow-1)*sumok( ((last_v_t[itimestep,])-meanok(last_v_t[itimestep,]))^2) + w_t = 1/(ndynwindow-1)*sumok( ((last_w_t[itimestep,])-meanok(last_w_t[itimestep,]))^2) + }else{ + v_t=1 + w_t=v_t*0.01 + } + + y_t=hmod[itime-timestep+itimestep]-hobs[itime-timestep+itimestep] + if(is.na(y_t)==FALSE){ + k_t[itimestep] <- (p_tm1_tm1[itimestep] + w_t)/(p_tm1_tm1[itimestep] + w_t + v_t) + x_t_t[itimestep] <- x_tm1_tm1[itimestep] + k_t[itimestep]*(y_t - x_tm1_tm1[itimestep]) + p_t_t[itimestep] <- (p_tm1_tm1[itimestep] + w_t)*(1 - k_t[itimestep]) + }else{ + k_t[itimestep] <- 0 + x_t_t[itimestep] <- x_tm1_tm1[itimestep] + p_t_t[itimestep] <- p_tm1_tm1[itimestep] + w_t + } + + corr[itimestep] <- -x_t_t[itimestep] + uncert[itimestep] <- p_t_t[itimestep] + gain[itimestep] <- k_t[itimestep] + noisev[itimestep] <- v_t + noisew[itimestep] <- w_t + + last_v_t[itimestep,]=c(y_t-x_tm1_tm1[itimestep] ,last_v_t[itimestep,1:(ndynwindow-1)]) + last_w_t[itimestep,]=c(x_t_t[itimestep]-x_tm1_tm1[itimestep],last_w_t[itimestep,1:(ndynwindow-1)]) + + p_tm1_tm1[itimestep]=p_t_t[itimestep] + x_tm1_tm1[itimestep]=x_t_t[itimestep] + } + + for (ih in 1:nlengthforecast){ + iih=hour0ton(ih%%timestep,timestep) + if(flag_negativeallowed==0){hopdata_all[iconfig,i,ih,1]=max(c(0,hmod[itime+ih]+corr[iih]))} + if(flag_negativeallowed==1){hopdata_all[iconfig,i,ih,1]=hmod[itime+ih]+corr[iih]} + hopdata_all[iconfig,i,ih,2] <- corr[iih] + hopdata_all[iconfig,i,ih,3] <- hopdata_all[iconfig,i,ih,1]-hopdata_all[iconfig,i,ih,nvariables+1] + hopdata_all[iconfig,i,ih,4] <- uncert[iih] + hopdata_all[iconfig,i,ih,5] <- gain[iih] + hopdata_all[iconfig,i,ih,6] <- noisew[iih] + hopdata_all[iconfig,i,ih,7] <- noisev[iih] + hopdata_all[iconfig,i,ih,8] <- noisew[iih]/noisev[iih] + } + } + } + } + + print('********** Homogeneize datasets') + w=which(is.na(hopdata_all[,,,1])==TRUE | is.na(hopdata_all[,,,2])==TRUE | is.na(hopdata_all[,,,3])==TRUE) + if(length(w)!=0){hopdata_all[w]=NaN} + + print('********** Get hdata') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime,nversions)) + for (iconfig in 1:nconfigs){ + for (ivar in 1:nvariables){ + for (i in 1:nforecast){ + for (k in 1:nversions){ + if(i-k+1 > 0){ + hdata_all[iconfig,ivar,(i-1)*timestep+1:timestep,k]=hopdata_all[iconfig,i-k+1,(k-1)*timestep+1:timestep,ivar] + } + } + } + } + } +} + + + + + + +########################################################################### +if(substr(what,1,2)=='an'){ + + print('********** Declare variables') + variables=paste0(what,c('','std','_weighted','sd_weighted','corr','error')) + legvariables=c('Concentration','Standard deviation','Concentration (weighted)','Standard deviation (weighted)','Correction','Error') + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Specify parameters') + configs=10 #(number of analogs) + nconfigs=length(configs) + if(substr(what,1,3)=='an-'){method=substr(what,4,100)} + method=gsub('RMSE','',method) + method=gsub('PCC','',method) + timestep=24 + nanwindow=45 + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime)) + + print('********** Check existence of features') + ok=1 + anvar=unlist(strsplit(method,'_')) + nanvar=length(anvar) + for (ivar in 1:nanvar){ + n=length(unlist(strsplit(anvar[ivar],'-'))) + + if(n >= 1){var = unlist(strsplit(anvar[ivar],'-'))[1] } + if(n >= 2){nhour = as.numeric(unlist(strsplit(anvar[ivar],'-'))[2])}else{nhour=0} + if(n >= 3){weight = as.numeric(unlist(strsplit(anvar[ivar],'-'))[3])}else{weight=1} + + if(var=='C'){val=hmod} + if(var=='S'){hws <- get_hdata('WS' ,station,htime,'mod',domain,experiment,indir,allstation) ; if(all(is.na(hws ))){ok=0 ; print('WS missing...')}} + if(var=='D'){hwd <- get_hdata('WD' ,station,htime,'mod',domain,experiment,indir,allstation) ; if(all(is.na(hwd ))){ok=0 ; print('WD missing...')}} + if(var=='P'){hpres <- get_hdata('PRES',station,htime,'mod',domain,experiment,indir,allstation) ; if(all(is.na(hpres))){ok=0 ; print('PRES missing...')}} + if(var=='T'){hta <- get_hdata('TA' ,station,htime,'mod',domain,experiment,indir,allstation) ; if(all(is.na(hta ))){ok=0 ; print('TA missing...')}} + if(var=='H'){hpblh <- get_hdata('PBLH',station,htime,'mod',domain,experiment,indir,allstation) ; if(all(is.na(hpblh))){ok=0 ; print('PBLH missing...')}} + } + + hopdata_all <- array(NaN,dim=c(nconfigs,nforecast,nlengthforecast,nvariables+2)) + if(ok==1){ + if(plotdetail==1){ + figout=paste0(workdir,'/',outputfile,'_detail.pdf',sep='') + pdf(figout,width=15,height=20,paper='a4r') + cex=1.2 + lwd=3 + par(mar=c(4,4,1.5,1.5),mfrow=c(2,1),cex=cex,cex.axis=cex,cex.lab=cex,cex.main=cex) + } + + print('********** Loop on configs - Get hopdata') + for (iconfig in 1:nconfigs){ + nanalogs=configs[iconfig] + for (i in (nanalogs+1):nforecast){ + print(c(iconfig,i)) + itime=(i-1)*timestep + if(min(itime+1-timestep:1) >= 1 & max(itime+1-timestep:1) <= nhtime & (itime+nlengthforecast)<=nhtime){ + hopdata_all[iconfig,i,,nvariables+1]=hobs[itime+1:nlengthforecast] + hopdata_all[iconfig,i,,nvariables+2]=hmod[itime+1:nlengthforecast] + + for (ih in 1:nlengthforecast){ + + itimestep=hour0ton(ih%%timestep,timestep) + + ## define dates where to search for analogs + witimestep=0:(i-2)*24+itimestep + witimestepp1=0:(i-1)*24+itimestep + w=which(as.numeric(difftime(htime[itime+ih],htime[witimestep],units='days'))%%365 <= nanwindow) + witimestep=witimestep[w] + + ## compute distance metric for all points + anvar=unlist(strsplit(method,'_')) + nanvar=length(anvar) + andistance=array(0,dim=length(witimestep)) + ini=1 + for (ivar in 1:nanvar){ + n=length(unlist(strsplit(anvar[ivar],'-'))) + + if(n >= 1){var = unlist(strsplit(anvar[ivar],'-'))[1] } + if(n >= 2){nhour = as.numeric(unlist(strsplit(anvar[ivar],'-'))[2])}else{nhour=0} + if(n >= 3){weight = as.numeric(unlist(strsplit(anvar[ivar],'-'))[3])}else{weight=1} + + if(var=='C'){val=hmod} + if(var=='S'){val=hws} + if(var=='D'){val=hwd} + if(var=='P'){val=hpres} + if(var=='T'){val=hta} + if(var=='H'){val=hpblh} + + wok=which(witimestep-nhour>0 & witimestep+nhour <= length(hmod)) + if(length(wok)==0){ + ok=0 + }else{ + witimestepok=witimestep[wok] + witimestepp1ok=witimestepp1[wok] + if(ini==1){andistance=array(0,dim=length(witimestepok));ini=0} + + if(min(witimestepok-nhour) > 0 & max(witimestepok+nhour) <= length(hmod)){ + tmp=array(0,dim=length(witimestepok)) + for (j in -nhour:nhour){ + tmp <- tmp + 1/sdok(val)^2*(val[witimestepok+j]-val[itime+ih+j])^2 + } + andistance <- andistance + weight*sqrt(tmp) + ok=1 + }else{ + ok=0 + } + } + } + + if(0==1 & i==(nanalogs+1)){ + print('-------------') + print(c(i,itime,ih,itimestep)) + print(witimestep) + } + + ## select analogs + if(ok==1){ + weight=1/andistance/(sumok(1/andistance)) + worder=order(andistance,decreasing=FALSE) + worder=worder[max(c(length(worder)-nanwindow,1)):length(worder)] + w=which(!is.na(hobs[witimestep[worder]])) + if(length(w) >= nanalogs){ + w=w[1:nanalogs] + worder=worder[w] + anforecast=meanok(hobs[witimestepok[worder]]) + + hopdata_all[iconfig,i,ih,1] <- anforecast + hopdata_all[iconfig,i,ih,2] <- sdok(hobs[witimestepok[worder]]) + hopdata_all[iconfig,i,ih,3] <- wt.mean(hobs[witimestepok[worder]],weight[worder]) + hopdata_all[iconfig,i,ih,4] <- wt.sd(hobs[witimestepok[worder]],weight[worder]) + hopdata_all[iconfig,i,ih,5] <- hopdata_all[iconfig,i,ih,1]-hopdata_all[iconfig,i,ih,nvariables+2] + hopdata_all[iconfig,i,ih,6] <- hopdata_all[iconfig,i,ih,1]-hopdata_all[iconfig,i,ih,nvariables+1] + } + } + } + } + } + } + if(plotdetail==1){ + print(figout) + dev.off() + } + } + + print('********** Homogeneize datasets') + w=which(is.na(hopdata_all[,,,1])==TRUE | is.na(hopdata_all[,,,2])==TRUE | is.na(hopdata_all[,,,3])==TRUE) + if(length(w)!=0){hopdata_all[w]=NaN} + + print('********** Get hdata') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime,nversions)) + for (iconfig in 1:nconfigs){ + for (ivar in 1:(nvariables)){ + for (i in 1:nforecast){ + for (k in 1:nversions){ + if(i-k+1 > 0){ + hdata_all[iconfig,ivar,(i-1)*timestep+1:timestep,k] = hopdata_all[iconfig,i-k+1,(k-1)*timestep+1:timestep,ivar] + } + } + } + } + } +} + + + + + + + +########################################################################### +if(substr(what,1,5)=='kfsan'){ + + print('KFSAN!!') + print('********** Declare variables') + variables=paste0(what,c('','corr','error','uncert','gain','wt','vt','woverv')) + legvariables=c('Concentration','Correction','Error','Uncertainty','Gain','W','V','W/V ratio') + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + print('********** Specify parameters') + method='RMSE' + timestep=24 + nanwindow=45 + configs=c(seq(0.0000001,0.000002,0.0000001), + seq(0.000001,0.00002,0.000001), + seq(0.00001,0.0002,0.00001), + seq(0.0001,0.002,0.0001), + seq(0.003,0.02,0.001), + seq(0.03,0.2,0.01), + seq(0.3,2,0.1), + seq(3,20,1), + seq(30,100,10), + seq(300,1000,100)) + configs=c(0.001,0.005,0.01,0.05,0.1,0.5,1) ## hervetmp + + nconfigs=length(configs) + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime)) + if(substr(what,1,6)=='kfsan-'){method=substr(what,7,100)} + method=gsub('RMSE','',method) + method=gsub('PCC','',method) + print(paste('what/method',what,method)) + + print('********** Check existence of features') + ok=1 + anvar=unlist(strsplit(method,'_')) + nanvar=length(anvar) + for (ivar in 1:nanvar){ + n=length(unlist(strsplit(anvar[ivar],'-'))) + + if(n >= 1){var = unlist(strsplit(anvar[ivar],'-'))[1] } + if(n >= 2){nhour = as.numeric(unlist(strsplit(anvar[ivar],'-'))[2])}else{nhour=0} + if(n >= 3){weight = as.numeric(unlist(strsplit(anvar[ivar],'-'))[3])}else{weight=1} + + if(var=='C'){val=hmod} + if(var=='S'){hws <- get_hdata('WS' ,station,htime,'mod',domain,experiment,indir) ; if(all(is.na(hws ))){ok=0}} + if(var=='D'){hwd <- get_hdata('WD' ,station,htime,'mod',domain,experiment,indir) ; if(all(is.na(hwd ))){ok=0}} + if(var=='P'){hpres <- get_hdata('PRES',station,htime,'mod',domain,experiment,indir) ; if(all(is.na(hpres))){ok=0}} + if(var=='T'){hta <- get_hdata('TA' ,station,htime,'mod',domain,experiment,indir) ; if(all(is.na(hta ))){ok=0}} + if(var=='H'){hpblh <- get_hdata('PBLH',station,htime,'mod',domain,experiment,indir) ; if(all(is.na(hpblh))){ok=0}} + } + + + print('********** Loop on configs - Get hopdata') + hopdata_all <- array(NaN,dim=c(nconfigs,nforecast,nlengthforecast,nvariables+2)) + for (iconfig in 1:nconfigs){ + for (i in 1:nforecast){ + # for (i in 1:150){ + + print(c(iconfig,configs[iconfig],i)) + itime=(i-1)*timestep + if(min(itime+1-timestep:1) >= 1 & max(itime+1-timestep:1) <= nhtime & (itime+nlengthforecast)<=nhtime){ + hopdata_all[iconfig,i,,nvariables+1]=hobs[itime+1:nlengthforecast] + hopdata_all[iconfig,i,,nvariables+2]=hmod[itime+1:nlengthforecast] + + for (ih in 1:nlengthforecast){ + + itimestep=hour0ton(ih%%timestep,timestep) + + ## define dates where to search for analogs + witimestep=0:(i-2)*24+itimestep + witimestepp1=0:(i-1)*24+itimestep + diff=as.numeric(difftime(htime[itime+itimestep],htime[witimestep],units='days'))%%365 + w=which(diff <= nanwindow | diff >= (365-nanwindow)) + witimestep=witimestep[w] + + ok=0 + if(length(w) >= nanwindow){ + + ## compute distance metric for all points + anvar=unlist(strsplit(method,'_')) + nanvar=length(anvar) + ini=1 + for (ivar in 1:nanvar){ + n=length(unlist(strsplit(anvar[ivar],'-'))) + if(n >= 1){var = unlist(strsplit(anvar[ivar],'-'))[1] } + if(n >= 2){nhour = as.numeric(unlist(strsplit(anvar[ivar],'-'))[2])}else{nhour=0} + if(n >= 3){weight = as.numeric(unlist(strsplit(anvar[ivar],'-'))[3])}else{weight=1} + + if(var=='C'){val=hmod} + if(var=='S'){val=hws} + if(var=='D'){val=hwd} + if(var=='P'){val=hpres} + if(var=='T'){val=hta} + if(var=='H'){val=hpblh} + + wok=which(witimestep-nhour>0 & witimestep+nhour <= length(hmod)) + if(length(wok)==0){ + ok=0 + }else{ + witimestepok=witimestep[wok] + witimestepp1ok=witimestepp1[wok] + if(ini==1){andistance=array(0,dim=length(witimestepok));ini=0} + if(min(witimestep-nhour) > 0 & max(witimestep+nhour) <= length(hmod)){ + tmp=array(0,dim=length(witimestepok)) + for (j in -nhour:nhour){ + tmp <- tmp + 1/sdok(val[1:itime])^2*(val[witimestepok+j]-val[itime+ih+j])^2 + } + andistance <- andistance + weight*sqrt(tmp) + ok=1 + }else{ + ok=0 + } + } + } + } + if(0==1 & ok==1){ + print('//////////////////') + print(what) + print(andistance) + her + } + + ## select analogs + if(ok==1){ + weight=1/andistance/(sumok(1/andistance)) + worder=rev(order(andistance,decreasing=FALSE)) + worder=worder[max(c(length(worder)-nanwindow,1)):length(worder)] + hanmod=hmod[witimestepok][worder] + hanobs=hobs[witimestepok][worder] + + ## initialize + v_t=1 + x_0_0=0 + p_0_0=5 + x_tm1_tm1=x_0_0 + p_tm1_tm1=p_0_0 + + ## loop on (short) analog time series + w_over_v=configs[iconfig] + w_t=v_t*w_over_v + + check=array(NaN,dim=c(length(worder),5)) + for (j in 1:length(worder)){ + y_t=hanmod[j]-hanobs[j] + if(is.na(y_t)==FALSE){ + k_t <- (p_tm1_tm1 + w_t)/(p_tm1_tm1 + w_t + v_t) + x_t_t <- x_tm1_tm1 + k_t*(y_t - x_tm1_tm1) + p_t_t <- (p_tm1_tm1 + w_t)*(1 - k_t) + }else{ + k_t <- 0 + x_t_t <- x_tm1_tm1 + p_t_t <- p_tm1_tm1 + w_t + } + + check[j,1]=x_t_t + check[j,2]=p_t_t + check[j,3]=x_tm1_tm1 + check[j,4]=p_tm1_tm1 + check[j,5]=k_t + + x_tm1_tm1 <- x_t_t + p_tm1_tm1 <- p_t_t + } + + if(flag_negativeallowed==0){hopdata_all[iconfig,i,ih,1]=max(c(0,hmod[itime+ih]-x_t_t))} + if(flag_negativeallowed==1){hopdata_all[iconfig,i,ih,1]=hmod[itime+ih]-x_t_t} + hopdata_all[iconfig,i,ih,2] <- -x_t_t + hopdata_all[iconfig,i,ih,3] <- hopdata_all[iconfig,i,ih,1]-hopdata_all[iconfig,i,ih,nvariables+1] + hopdata_all[iconfig,i,ih,4] <- p_t_t + hopdata_all[iconfig,i,ih,5] <- k_t + hopdata_all[iconfig,i,ih,6] <- w_t + hopdata_all[iconfig,i,ih,7] <- v_t + hopdata_all[iconfig,i,ih,8] <- w_t/v_t + + if(plotdetail==1 & ih==1){ + + par(mar=c(5,5,2,2),mfrow=c(3,2)) + layout(matrix(c(1,2,3, + 4,4,4, + 5,5,5, + 6,6,6),4,3,byrow=TRUE)) + main=paste(formatC(hmod[itime+ih],digits=1,width=1,format="f",flag=" "), + formatC(hobs[itime+ih],digits=1,width=1,format="f",flag=" "), + formatC(x_t_t,digits=1,width=1,format="f",flag=" "), + formatC(hopdata_all[iconfig,i,ih,1]-hopdata_all[iconfig,i,ih,nvariables+1],digits=1,width=1,format="f",flag=" "),sep='/') + plot(1:length(worder),check[,1],type='b',col='red',main=paste(i,'/',itime),xlab='',ylab='x_t_t') + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + plot(1:length(worder),check[,2],type='b',col='red',main=main,xlab='',ylab='P_t_t') + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + plot(1:length(worder),check[,5],type='b',col='red',xlab='',ylab='k_t') + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + + ylim=c(0,130) + if(is.finite(maxok(ylim))==TRUE){ + plot(1:length(witimestep),hmod[witimestep],type='n',ylim=ylim,main='Normal time series') + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(1:length(witimestep),hmod[witimestep],type='b',col='red') + points(1:length(witimestep),hobs[witimestep],type='b') + points(c(-1,1)*1e9,c(0,0)+hmod[itime+ih],type='l',col='red',lwd=2) + points(c(-1,1)*1e9,c(0,0)+hmod[itime+ih]-x_t_t,type='l',col='green',lwd=2) + } + if(is.finite(maxok(ylim))==TRUE){ + plot(1:length(witimestep[worder]),hmod[witimestep][worder],type='n',ylim=ylim,main='Ordered time series') + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(1:length(witimestep[worder]),hmod[witimestep][worder],type='b',col='red') + points(1:length(witimestep[worder]),hobs[witimestep][worder],type='b') + points(c(-1,1)*1e9,c(0,0)+hmod[itime+ih],type='l',col='red',lwd=2) + points(c(-1,1)*1e9,c(0,0)+hmod[itime+ih]-x_t_t,type='l',col='green',lwd=2) + } + + if(max(witimestepp1) <= length(hmod)){ + if(is.finite(maxok(ylim))==TRUE){ + plot(1:length(witimestepp1),hmod[witimestepp1],type='b',col='red',ylim=ylim,main='Normal time series +1 day') + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(1:length(witimestepp1),hobs[witimestepp1],type='b') + + points(1:length(witimestepp1),hopdata_all[iconfig,1:i,ih,1],type='p',pch=16,col='green',cex=2) + + points(length(witimestepp1),hmod[itime+ih],type='p',pch=16,col='red',cex=3) + points(length(witimestepp1),hmod[itime+ih]-x_t_t,type='p',pch=16,col='green',cex=3) + points(c(-1,1)*1e9,c(0,0)+hmod[itime+ih],type='l',col='red',lwd=2) + points(c(-1,1)*1e9,c(0,0)+hmod[itime+ih]-x_t_t,type='l',col='green',lwd=2) + + } + } + } + } + } + } + } + } + + + print('********** Homogeneize datasets') + w=which(is.na(hopdata_all[,,,1])==TRUE | is.na(hopdata_all[,,,2])==TRUE | is.na(hopdata_all[,,,3])==TRUE) + if(length(w)!=0){hopdata_all[w]=NaN} + + print('********** Get hdata') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime,nversions)) + for (iconfig in 1:nconfigs){ + for (ivar in 1:nvariables){ + for (i in 1:nforecast){ + for (k in 1:nversions){ + if(i-k+1 > 0){ + hdata_all[iconfig,ivar,(i-1)*timestep+1:timestep,k]=hopdata_all[iconfig,i-k+1,(k-1)*timestep+1:timestep,ivar] + } + } + } + } + } +} + + + +########################################################################### +if(substr(what,1,2)=='ml'){ + + print('********** Declare variables') + variables=what + legvariables='Concentration' + nvariables=length(variables) + hdata <- array(NaN,dim=c(nvariables,nhtime,nversions)) + + if(1==1 & experiment=='b007'){ + hws <- get_hdata('WS' ,station,htime,'mod',domain,experiment,indir,allstation) + hwd <- get_hdata('WD' ,station,htime,'mod',domain,experiment,indir,allstation) + hpres <- get_hdata('PRES',station,htime,'mod',domain,experiment,indir,allstation) + hta <- get_hdata('TA' ,station,htime,'mod',domain,experiment,indir,allstation) + hpblh <- get_hdata('PBLH',station,htime,'mod',domain,experiment,indir,allstation) + } + + print('********** Get ML options') + tmp=unlist(strsplit(substr(what,4,999),'-')) + configs=tmp[1] + imin=as.numeric(tmp[2]) + ifreq=as.numeric(tmp[3]) + configtarget=as.numeric(tmp[4]) + configvar=as.numeric(tmp[5]) + configtrain=as.numeric(tmp[6]) + + + nconfigs=length(configs) + + if(substr(configs,1,3)=='gbm'){ + library(gbm) + library(pdp) + library(geosphere) + } + + print('********** Get multistations training dataset') + if(multistattraining==1){ + #trainingstation=c('ES1422A','ES1943A','ES1938A','ES1944A','ES1942A','ES1941A','ES1937A','ES1525A','ES1426A','ES1947A','ES2028A') #URB-MAD + + print('********** Find near-by similar stations') + allstations=as.vector(read.table(paste0(stationsdir,'stations_ALL'))[,1]) + coordstation=get_infostat(station)[1,] + coordallstations=get_infostat(allstations) + dist <- array(NaN,dim=length(allstations)) + for (istat in 1:length(allstations)){ + dist[istat]=distHaversine(c(coordallstations[istat,1],coordallstations[istat,2]),c(coordstation[1],coordstation[2]))/1000 + } + wstat=which(coordallstations[,4]==coordstation[4] & dist <= 100 & dist!=0) + if(length(wstat)!=0){ + wstat=wstat[1:min(c(5,length(wstat)))] + trainingstation=allstations[wstat] + }else{ + trainingstation=station + } + print(trainingstation) + + xtrainingtarget <- NaN + xtraininghmod <- NaN + xtrainingherrorm24 <- NaN + xtrainingherrorm48 <- NaN + xtraininghmodm24 <- NaN + xtraininghmodm48 <- NaN + xtrainingherrorm1 <- NaN + xtrainingherrorm2 <- NaN + xtrainingherrorm3 <- NaN + xtrainingherrorm4 <- NaN + xtrainingherrorm5 <- NaN + xtrainingherrorm6 <- NaN + xtraininghws <- NaN + xtraininghwd <- NaN + xtraininghpres <- NaN + xtraininghta <- NaN + xtraininghpblh <- NaN + xtrainingrandom <- NaN + xtraininghour <- NaN + xtraininghpblhm1 <- NaN + xtraininghpblhp1 <- NaN + xtraininghtam1 <- NaN + xtraininghtap1 <- NaN + xtraininghpblh_d1 <- NaN + xtraininghta_d1 <- NaN + xtraininghws_d1 <- NaN + xtraininghwd_d1 <- NaN + xtraininghpres_d1 <- NaN + for (istat in 1:length(trainingstation)){ + res <- get_hdata(pollutant,trainingstation[istat],htime,c('obs','mod'),domain,experiment,indir,allstation) + hhobs <- res[1,] + hhmod <- res[2,] + if(experiment=='b007'){ + w=which(hhmod==0) + if(length(w)!=0){hhmod[w]=hhmod[w+1]} ## herve tmp !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + } + if(length(which(is.finite(hhobs)==TRUE & is.finite(hhmod)==TRUE))!=0){ + + hhws <- get_hdata('WS' ,trainingstation[istat],htime,'mod',domain,experiment,indir,allstation) + hhwd <- get_hdata('WD' ,trainingstation[istat],htime,'mod',domain,experiment,indir,allstation) + hhpres <- get_hdata('PRES',trainingstation[istat],htime,'mod',domain,experiment,indir,allstation) + hhta <- get_hdata('TA' ,trainingstation[istat],htime,'mod',domain,experiment,indir,allstation) + hhpblh <- get_hdata('PBLH',trainingstation[istat],htime,'mod',domain,experiment,indir,allstation) + + wtime=(24*2+1):length(hhobs) + + diff=100*(meanok(hhobs[wtime])-meanok(hobs[wtime]))/meanok(hobs[wtime]) + if(abs(diff) <= 20){ + + if(configtarget==0){xtrainingtarget <- c(xtrainingtarget, hhobs[wtime])} + if(configtarget==1){xtrainingtarget <- c(xtrainingtarget,hhmod[wtime]-hhobs[wtime])} + + xtraininghmod <- c(xtraininghmod,hhmod[wtime]) + xtrainingherrorm24 <- c(xtrainingherrorm24,hhmod[wtime-24]-hhobs[wtime-24]) + xtrainingherrorm48 <- c(xtrainingherrorm48,hhmod[wtime-48]-hhobs[wtime-48]) + xtraininghmodm24 <- c(xtraininghmodm24,hhmod[wtime-24]) + xtraininghmodm48 <- c(xtraininghmodm48,hhmod[wtime-48]) + xtrainingherrorm1 <- c(xtrainingherrorm1,hhmod[wtime-1]-hhobs[wtime-1]) + xtrainingherrorm2 <- c(xtrainingherrorm2,hhmod[wtime-2]-hhobs[wtime-2]) + xtrainingherrorm3 <- c(xtrainingherrorm3,hhmod[wtime-3]-hhobs[wtime-3]) + xtrainingherrorm4 <- c(xtrainingherrorm4,hhmod[wtime-4]-hhobs[wtime-4]) + xtrainingherrorm5 <- c(xtrainingherrorm5,hhmod[wtime-5]-hhobs[wtime-5]) + xtrainingherrorm6 <- c(xtrainingherrorm6,hhmod[wtime-6]-hhobs[wtime-6]) + xtraininghws <- c(xtraininghws,hhws[wtime]) + xtraininghwd <- c(xtraininghwd,hhwd[wtime]) + xtraininghpres <- c(xtraininghpres,hhpres[wtime]) + xtraininghta <- c(xtraininghta,hhta[wtime]) + xtraininghpblh <- c(xtraininghpblh,hhpblh[wtime]) + xtrainingrandom <- c(xtrainingrandom,runif(length(wtime),0,10)) + xtraininghour <- c(xtraininghour,hour0to24(wtime%%24)) + xtraininghpblhm1 <- c(xtraininghpblhm1,hhpblh[wtime-1]) + xtraininghpblhp1 <- c(xtraininghpblhp1,hhpblh[wtime+1]) + xtraininghtam1 <- c(xtraininghtam1,hhta[wtime-1]) + xtraininghtap1 <- c(xtraininghtap1,hhta[wtime+1]) + xtraininghpblh_d1 <- c(xtraininghpblh_d1,hhpblh[wtime]-hhpblh[wtime-3]) + xtraininghta_d1 <- c(xtraininghta_d1,hhta[wtime]-hhta[wtime-3]) + xtraininghws_d1 <- c(xtraininghws_d1,hhws[wtime]-hhws[wtime-3]) + xtraininghwd_d1 <- c(xtraininghwd_d1,hhwd[wtime]-hhwd[wtime-3]) + xtraininghpres_d1 <- c(xtraininghpres_d1,hhpres[wtime]-hhpres[wtime-3]) + } + } + } + if(configvar==0){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtrainingherrorm24)} + if(configvar==1){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh)} + if(configvar==11){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh,xtraininghour)} + if(configvar==2){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh, + xtraininghpblh_d1,xtraininghta_d1,xtraininghws_d1,xtraininghwd_d1,xtraininghpres_d1)} + if(configvar==3){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh, + xtraininghpblh_d1,xtraininghta_d1,xtraininghws_d1,xtraininghwd_d1,xtraininghpres_d1,xtraininghour)} + if(configvar==4){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh, + xtraininghpblh_d1,xtraininghta_d1,xtraininghws_d1,xtraininghwd_d1,xtraininghpres_d1,xtraininghour, + xtrainingherrorm24)} + if(configvar==5){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh, + xtraininghpblh_d1,xtraininghta_d1,xtraininghws_d1,xtraininghwd_d1,xtraininghpres_d1,xtraininghour, + xtrainingherrorm24, + xtrainingherrorm1,xtrainingherrorm2,xtrainingherrorm3,xtrainingherrorm4,xtrainingherrorm5,xtrainingherrorm6)} + if(configvar==6){ data0 <- data.frame(xtrainingtarget,xtraininghmod,xtraininghws,xtraininghwd,xtraininghpres,xtraininghta,xtraininghpblh, + xtraininghpblh_d1,xtraininghta_d1,xtraininghws_d1,xtraininghwd_d1,xtraininghpres_d1,xtraininghour, + xtrainingherrorm24,xtrainingherrorm48, + xtraininghmodm24,xtraininghmodm48)} + if(configvar==7){ data0 <- data.frame(xtrainingtarget,xtraininghmod, + xtrainingherrorm24,xtrainingherrorm48, + xtraininghmodm24,xtraininghmodm48)} + dataTrainMultistations <- na.omit(data0) + } + + print('********** Loop on configs - Get hopdata') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime)) + hopdata_all <- array(NaN,dim=c(nconfigs,nforecast,nlengthforecast,nvariables+2)) + for (iconfig in 1:nconfigs){ + print(paste(iconfig,configs[iconfig])) + model_trained_flag=0 + + for (i in 1:nforecast){ + #print(c(iconfig,i,nforecast)) + itime=(i-1)*timestep + if(min(itime+1-timestep:1) >= 1 & max(itime+1-timestep:1) <= nhtime & (itime+nlengthforecast)<=nhtime){ + hopdata_all[iconfig,i,,nvariables+1]=hobs[itime+1:nlengthforecast] + hopdata_all[iconfig,i,,nvariables+2]=hmod[itime+1:nlengthforecast] + + ## (train ml model) + if(model_trained_flag >= 0 & ((i >= imin & i%%ifreq==0) | i==nforecast)){ + print(c('train...',i)) + wtime=(24*2+1):(itime-4) + #wtime=(24*2+1):((nforecast-1)*timestep) + + if(configtarget==0){xtarget <- hobs[wtime] ; print('TARGET : hobs')} + if(configtarget==1){xtarget <- hmod[wtime]-hobs[wtime] ; print('TARGET : hmod-hobs')} + + xhmod <- hmod[wtime] + xherrorm24 <- hmod[wtime-24]-hobs[wtime-24] + xherrorm48 <- hmod[wtime-48]-hobs[wtime-48] + xhmodm24 <- hmod[wtime-24] + xhmodm48 <- hmod[wtime-48] + xherrorm1 <- hmod[wtime-1]-hobs[wtime-1] + xherrorm2 <- hmod[wtime-2]-hobs[wtime-2] + xherrorm3 <- hmod[wtime-3]-hobs[wtime-3] + xherrorm4 <- hmod[wtime-4]-hobs[wtime-4] + xherrorm5 <- hmod[wtime-5]-hobs[wtime-5] + xherrorm6 <- hmod[wtime-6]-hobs[wtime-6] + xhws <- hws[wtime] + xhwd <- hwd[wtime] + xhpres <- hpres[wtime] + xhta <- hta[wtime] + xhpblh <- hpblh[wtime] + xrandom <- runif(length(wtime),0,10) + xhour <- hour0to24(wtime%%24) + xhpblhm1 <- hpblh[wtime-1] + xhpblhp1 <- hpblh[wtime+1] + xhtam1 <- hta[wtime-1] + xhtap1 <- hta[wtime+1] + xhpblh_d1 <- hpblh[wtime]-hpblh[wtime-3] + xhta_d1 <- hta[wtime]-hta[wtime-3] + xhws_d1 <- hws[wtime]-hws[wtime-3] + xhwd_d1 <- hwd[wtime]-hwd[wtime-3] + xhpres_d1 <- hpres[wtime]-hpres[wtime-3] + xjulian <- as.numeric(format(htime[wtime],"%j")) + + ## (choose features) + if(configvar==0){ data0 <- data.frame(xtarget,xhmod,xherrorm24)} + if(configvar==1){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh)} + if(configvar==11){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh,xhour)} + if(configvar==2){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1)} + if(configvar==3){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour)} + if(configvar==4){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24)} + if(configvar==5){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24, + xherrorm1,xherrorm2,xherrorm3,xherrorm4,xherrorm5,xherrorm6)} + if(configvar==6){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24,xherrorm48, + xhmodm24,xhmodm48)} + if(configvar==7){ data0 <- data.frame(xtarget,xhmod, + xherrorm24,xherrorm48, + xhmodm24,xhmodm48)} + if(configvar==8){ data0 <- data.frame(xtarget,xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24,xherrorm48, + xhmodm24,xhmodm48,xjulian)} + + hxfeatures=htime[wtime] + hyfeatures=data0 + + features=colnames(data0)[2:length(colnames(data0))] + nfeatures=length(features) + + data <- na.omit(data0) + if(dim(data)[1]!=0){ + #partition=0.95 + #trainIndex <- createDataPartition(data$xtarget, p=partition, list=FALSE, times=1) + #dataTrain <- data[ trainIndex,] + #dataTest <- data[-trainIndex,] + dataTrain=data + + if(multistattraining==1){ + dataTrain=dataTrainMultistations + colnames(dataTrain)=c('xtarget',features) + } + + print(dim(dataTrain)) + + if(substr(configs,1,3)=='knn'){ + if(configtrain==1){ + fitControl <- trainControl(method="repeatedcv", number=10, repeats=3, preProcOptions=c('center','scale')) + mlfit <- caret::train(xtarget~., data=dataTrain, method = "knn", trControl=fitControl, tuneLength=10) + } + } + if(substr(configs,1,2)=='lm'){ + if(configtrain==1){ + fitControl <- trainControl(method="repeatedcv", number=10, repeats=3, preProcOptions=c('center','scale')) + mlfit <- caret::train(xtarget~., data=dataTrain, method = "lm", trControl=fitControl) + } + } + if(substr(configs,1,3)=='gbm'){ + if(configtrain==1){ + fitControl <- trainControl(method="cv", number=10, preProcOptions=c('center','scale')) + mlfit <- caret::train(xtarget~., data=dataTrain, method="gbm", trControl=fitControl, verbose=FALSE) + } + if(configtrain==2){ + fitControl <- trainControl(method="cv", number=10, preProcOptions=c('center','scale')) + tuneGrid <- expand.grid(interaction.depth=1, + n.trees=150, + shrinkage=0.1, + n.minobsinnode=3) + bagfraction=as.numeric(tmp[7]) + mlfit <- caret::train(xtarget~., data=dataTrain, method="gbm", trControl=fitControl, verbose=FALSE, tuneGrid=tuneGrid,bag.fraction=bagfraction) + } + if(configtrain==3){ + fitControl <- trainControl(method="cv", number=10, preProcOptions=c('center','scale')) + tuneGrid <- expand.grid(interaction.depth=3, + n.trees=1000, + shrinkage=0.2, + n.minobsinnode=5) + bagfraction=as.numeric(tmp[7]) + mlfit <- caret::train(xtarget~., data=dataTrain, method="gbm", trControl=fitControl, verbose=FALSE, tuneGrid=tuneGrid,bag.fraction=bagfraction) + } + if(configtrain==4){ + fitControl <- trainControl(method="cv", number=10, preProcOptions=c('center','scale')) + tuneGrid <- expand.grid(interaction.depth=1:3, + n.trees=c(1000,2000), + shrinkage=0.2, + n.minobsinnode=3:5) + bagfraction=as.numeric(tmp[7]) + mlfit <- caret::train(xtarget~., data=dataTrain, method="gbm", trControl=fitControl, verbose=FALSE, tuneGrid=tuneGrid,bag.fraction=bagfraction) + } + } + + #pdf('/esarchive/scratch/hpetetin/data/opmos/debug/debug.pdf',width=15,height=20,paper='a4r') + #pairs(data) + #dev.off() + + if(substr(configs,1,3)=='gbm' | substr(configs,1,3)=='knn'){ + if(exists("importance")==FALSE){importance <- array(NaN,dim=c(nfeatures,nforecast))} + x=varImp(mlfit)$importance + for (ifea in 1:nfeatures){ + wfea=which(row.names(x)==features[ifea]) + importance[ifea,i]=x[wfea,1] + } + importance_final=importance[,i] + } + } + model_trained_flag=1 + print('train finished') + } + + + if(model_trained_flag==1){ + for (ih in 1:nlengthforecast){ + + xhmod <- hmod[itime+ih] + + xherrorm24 <- hmod[itime+ih-24]-hobs[itime+ih-24] + xherrorm48 <- hmod[itime+ih-48]-hobs[itime+ih-48] + xhmodm24 <- hmod[itime+ih-24] + xhmodm48 <- hmod[itime+ih-48] + xherrorm1 <- hmod[itime+ih-1]-hobs[itime+ih-1] + xherrorm2 <- hmod[itime+ih-2]-hobs[itime+ih-2] + xherrorm3 <- hmod[itime+ih-3]-hobs[itime+ih-3] + xherrorm4 <- hmod[itime+ih-4]-hobs[itime+ih-4] + xherrorm5 <- hmod[itime+ih-5]-hobs[itime+ih-5] + xherrorm6 <- hmod[itime+ih-6]-hobs[itime+ih-6] + xhws <- hws[itime+ih] + xhwd <- hwd[itime+ih] + xhpres <- hpres[itime+ih] + xhta <- hta[itime+ih] + xhpblh <- hpblh[itime+ih] + xrandom <- runif(1,0,10) + xhour <- hour0to24(ih%%24) + xhpblhm1 <- hpblh[itime+ih-1] + xhpblhp1 <- hpblh[itime+ih+1] + xhtam1 <- hta[itime+ih-1] + xhtap1 <- hta[itime+ih+1] + xhpblh_d1 <- hpblh[itime+ih]-hpblh[itime+ih-3] + xhta_d1 <- hta[itime+ih]-hta[itime+ih-3] + xhws_d1 <- hws[itime+ih]-hws[itime+ih-3] + xhwd_d1 <- hwd[itime+ih]-hwd[itime+ih-3] + xhpres_d1 <- hpres[itime+ih]-hpres[itime+ih-3] + xjulian <- as.numeric(format(htime[itime+ih],"%j")) + + ## (choose features) + if(configvar==0){ data0 <- data.frame(xhmod,xherrorm24)} + if(configvar==1){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh)} + if(configvar==11){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh,xhour)} + if(configvar==2){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1)} + if(configvar==3){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour)} + if(configvar==4){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24)} + if(configvar==5){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24, + xherrorm1,xherrorm2,xherrorm3,xherrorm4,xherrorm5,xherrorm6)} + if(configvar==6){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24,xherrorm48, + xhmodm24,xhmodm48)} + if(configvar==7){ data0 <- data.frame(xhmod, + xherrorm24,xherrorm48, + xhmodm24,xhmodm48)} + if(configvar==8){ data0 <- data.frame(xhmod,xhws,xhwd,xhpres,xhta,xhpblh, + xhpblh_d1,xhta_d1,xhws_d1,xhwd_d1,xhpres_d1,xhour, + xherrorm24,xherrorm48, + xhmodm24,xhmodm48,xjulian)} + + + + data <- na.omit(data0) + if(nrow(data)!=0){ + corr <- as.numeric(predict(mlfit,newdata=data)) + + if(configtarget==0){ + if(flag_negativeallowed==0){hopdata_all[iconfig,i,ih,1]=max(c(0,corr))} + if(flag_negativeallowed==1){hopdata_all[iconfig,i,ih,1]=corr} + } + if(configtarget==1){ + if(flag_negativeallowed==0){hopdata_all[iconfig,i,ih,1]=max(c(0,hmod[itime+ih]-corr))} + if(flag_negativeallowed==1){hopdata_all[iconfig,i,ih,1]=hmod[itime+ih]-corr} + } + } + } + } + } + } + } + + print('********** Homogeneize datasets') + w=which(is.na(hopdata_all[,,,1])==TRUE | is.na(hopdata_all[,,,2])==TRUE | is.na(hopdata_all[,,,3])==TRUE) + if(length(w)!=0){hopdata_all[w]=NaN} + + print('********** Get hdata') + hdata_all <- array(NaN,dim=c(nconfigs,nvariables,nhtime,nversions)) + for (iconfig in 1:nconfigs){ + for (ivar in 1:nvariables){ + for (i in 1:nforecast){ + for (k in 1:nversions){ + if(i-k+1 > 0){ + hdata_all[iconfig,ivar,(i-1)*timestep+1:timestep,k]=hopdata_all[iconfig,i-k+1,(k-1)*timestep+1:timestep,ivar] + } + } + } + } + } + + + if(exists("importance")==TRUE){ + for (ifea in 1:nfeatures){ + importance_stat=array(NaN,dim=10) + importance_stat[1]=meanok(importance[ifea,]) + importance_stat[2]=sdok(importance[ifea,]) + importance_stat[3]=quantileok(importance[ifea,],0.05) + importance_stat[4]=quantileok(importance[ifea,],0.25) + importance_stat[5]=quantileok(importance[ifea,],0.50) + importance_stat[6]=quantileok(importance[ifea,],0.75) + importance_stat[7]=quantileok(importance[ifea,],0.95) + importance_stat[8]=minok(importance[ifea,]) + importance_stat[9]=maxok(importance[ifea,]) + + print(paste(formatC(importance_stat[1],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[2],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[3],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[4],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[5],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[6],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[7],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[8],digits=2,width=10,format='f',flag=''), + formatC(importance_stat[9],digits=2,width=10,format='f',flag=''),features[ifea])) + } + } +} + + + + +########################################################################### +if(substr(what,1,3)=='kfs' | substr(what,1,3)=='kfd' | substr(what,1,2)=='an' | substr(what,1,2)=='ml'){ + hopdata <- array(NaN,dim=c(nforecast,nlengthforecast,nvariables+2)) + + ibestconfig=NaN + if(is.finite(maxok(hdata_all))){ + k=1 + + print('********** Compute statistics for all configurations') + hdata_stat <- array(NaN,dim=c(nconfigs,nmetrics)) + for (iconfig in 1:nconfigs){ + hdata_stat[iconfig,]=get_omstat(hobs,hdata_all[iconfig,1,,k]) + } + + print('********** Find the optimal configuration') + optimetrics='RMSE' #(default choice) + if(length(grep('RMSE',what))==1){optimetrics='RMSE'} + if(length(grep('PCC' ,what))==1){optimetrics='PCC' } + if(nconfigs==1){ + ibestconfig=1 + }else{ + w=which(metrics==optimetrics) + if(length(which(optimetrics==c('RMSE','FA','M','FAR','POFD')))==1 & is.finite(minok(hdata_stat[,w]))){ + ibestconfig <- which(hdata_stat[,w]==minok(hdata_stat[,w]))[1] + } + if(length(which(optimetrics==c('PCC','H','CN','POD')))==1 & is.finite(maxok(hdata_stat[,w]))){ + ibestconfig <- which(hdata_stat[,w]==maxok(hdata_stat[,w]))[1] + } + } + } + + + print('********** Get hourly data') + if(is.na(ibestconfig)==FALSE){ + for (ivar in 1:nvariables){ + for (k in 1:nversions){ + hdata[ivar,,k]=hdata_all[ibestconfig,ivar,,k] + } + } + + for (i in 1:nforecast){ + for (ivar in 1:(nvariables+2)){ + for (ih in 1:nlengthforecast){ + hopdata[i,ih,ivar]=hopdata_all[ibestconfig,i,ih,ivar] + } + } + } + #for (i in 1:nforecast){ + # if(is.finite(max(hopdata[i,,]))==FALSE){hopdata[i,,]=NaN} + #} + + } +} + + diff --git a/routine2_timeseriesdata.R b/routine2_timeseriesdata.R new file mode 100644 index 0000000000000000000000000000000000000000..bfffaaf5842a809c294feef52fa64f1bb429b884 --- /dev/null +++ b/routine2_timeseriesdata.R @@ -0,0 +1,222 @@ +ddata <- array(NaN,dim=c(nvariables,ndtime,nversions)) +mdata <- array(NaN,dim=c(nvariables,nmtime,nversions)) +h24data <- array(NaN,dim=c(nvariables,24,nversions)) +hxdata <- array(NaN,dim=c(nvariables,ndtime,nversions)) +dxdata <- array(NaN,dim=c(nvariables,ndtime,nversions)) +mxdata <- array(NaN,dim=c(nvariables,nmtime,nversions)) +h24xdata <- array(NaN,dim=c(nvariables,24,nversions)) +dmaxdata <- array(NaN,dim=c(nvariables,ndtime,nversions)) +dmaxxdata <- array(NaN,dim=c(nvariables,ndtime,nversions)) +dmda8data <- array(NaN,dim=c(nvariables,ndtime,nversions)) +dmda8xdata <- array(NaN,dim=c(nvariables,ndtime,nversions)) + +if(what!='obs' & what!='mod'){ + print('********** Get hopdata opstat') + hfopdata <- array(NaN,dim=c(nlengthforecast,nvariables+2,10)) + w=1:ndtime + for (ih in 1:nlengthforecast){ + for (ivar in 1:(nvariables+2)){ + hfopdata[ih,ivar,1]=meanok(hopdata[w,ih,ivar]) + hfopdata[ih,ivar,2]=sdok(hopdata[w,ih,ivar]) + hfopdata[ih,ivar,3]=quantileok(hopdata[w,ih,ivar],0.05) + hfopdata[ih,ivar,4]=quantileok(hopdata[w,ih,ivar],0.25) + hfopdata[ih,ivar,5]=quantileok(hopdata[w,ih,ivar],0.50) + hfopdata[ih,ivar,6]=quantileok(hopdata[w,ih,ivar],0.75) + hfopdata[ih,ivar,7]=quantileok(hopdata[w,ih,ivar],0.95) + hfopdata[ih,ivar,8]=minok(hopdata[w,ih,ivar]) + hfopdata[ih,ivar,9]=maxok(hopdata[w,ih,ivar]) + hfopdata[ih,ivar,10]=length(which(is.finite(hopdata[w,ih,ivar])==TRUE)) + } + } + + + print('********** Get hopdata stat') + hfopstat <- array(NaN,dim=c(nlengthforecast,2,nmetrics)) + hfoplimstat <- array(NaN,dim=c(nlengthforecast,2,nlimit,nmetricster)) + for (ih in 1:nlengthforecast){ + hfopstat[ih,1,]=get_statistics(hopdata[,ih,nvariables+1],hopdata[,ih,nvariables+2])$res #(stat mod) + hfopstat[ih,2,]=get_statistics(hopdata[,ih,nvariables+1],hopdata[,ih,1])$res #(stat mos) + for (ilim in 1:nlimit){ + hfoplimstat[ih,1,ilim,]=get_statistics(hopdata[,ih,nvariables+1],hopdata[,ih,nvariables+2],lim1=limit[ilim],lim2=limit[ilim+1])$res #(stat mod) + hfoplimstat[ih,2,ilim,]=get_statistics(hopdata[,ih,nvariables+1],hopdata[,ih,1],lim1=limit[ilim],lim2=limit[ilim+1])$res #(stat mos) + } + } + print(maxok(hfopstat)) +} + + + +print('********** Get daily data') +dobs <- ts_compute(htime,hobs,'daily') +dmod <- ts_compute(htime,hmod,'daily') +for (k in wversion){ + for (ivar in 1:nvariables){ + ddata[ivar,,k] <- ts_compute(htime,hdata[ivar,,k],'daily') + } +} + + +print('********** Get daily data for exact comparisons') +hxobs=hobs +hxmod=hmod +hxdata=hdata + +hdataobsmod <- hobs+hmod +for (k in wversion){ + for (ivar in 1:nvariables){ + hdataobsmod <- hdataobsmod+hdata[ivar,,k] + } +} +w=which(is.na(hdataobsmod)==TRUE) +if(length(w)!=0){ + hxobs[w]=NaN + hxmod[w]=NaN + for (k in wversion){ + for (ivar in 1:nvariables){ + hxdata[ivar,w,k]=NaN + } + } +} +dxobs <- ts_compute(htime,hxobs,'daily') +dxmod <- ts_compute(htime,hxmod,'daily') +for (k in wversion){ + for (ivar in 1:nvariables){ + dxdata[ivar,,k] <- ts_compute(htime,hxdata[ivar,,k],'daily') + } +} + + + + +print('********** Get daily max data') +dmaxobs <- ts_compute(htime,hobs,'dailymax') +dmaxmod <- ts_compute(htime,hmod,'dailymax') +for (k in wversion){ + for (ivar in 1:nvariables){ + dmaxdata[ivar,,k] <- ts_compute(htime,hdata[ivar,,k],'dailymax') + } +} + + +print('********** Get daily max data for exact comparisons') +dmaxxobs <- ts_compute(htime,hxobs,'dailymax') +dmaxxmod <- ts_compute(htime,hxmod,'dailymax') +for (k in wversion){ + for (ivar in 1:nvariables){ + dmaxxdata[ivar,,k] <- ts_compute(htime,hxdata[ivar,,k],'dailymax') + } +} + + +print('********** Get daily max 8h data') +dmda8obs <- ts_compute(htime,hobs,'mda8') +dmda8mod <- ts_compute(htime,hmod,'mda8') +for (k in wversion){ + for (ivar in 1:nvariables){ + dmda8data[ivar,,k] <- ts_compute(htime,hdata[ivar,,k],'mda8') + } +} + + +print('********** Get daily max 8h data for exact comparisons') +dmda8xobs <- ts_compute(htime,hxobs,'mda8') +dmda8xmod <- ts_compute(htime,hxmod,'mda8') +for (k in wversion){ + for (ivar in 1:nvariables){ + dmda8xdata[ivar,,k] <- ts_compute(htime,hxdata[ivar,,k],'mda8') + } +} + + +print('********** Get monthly data') +mobs <- ts_compute(htime,hobs,'monthly') +mmod <- ts_compute(htime,hmod,'monthly') +for (k in wversion){ + for (ivar in 1:nvariables){ + mdata[ivar,,k] <- ts_compute(htime,hdata[ivar,,k],'monthly') + } +} + + +print('********** Get monthly data for exact comparisons') +mxobs <- ts_compute(htime,hxobs,'monthly') +mxmod <- ts_compute(htime,hxmod,'monthly') +for (k in wversion){ + for (ivar in 1:nvariables){ + mxdata[ivar,,k] <- ts_compute(htime,hxdata[ivar,,k],'monthly') + } +} + + +print('********** Get diurnal profiles data') +h24obs <- ts_compute(htime,hobs,'diurnal') +h24mod <- ts_compute(htime,hmod,'diurnal') +for (k in wversion){ + for (ivar in 1:nvariables){ + h24data[ivar,,k] <- ts_compute(htime,hdata[ivar,,k],'diurnal') + } +} + + +print('********** Get diurnal profiles data for exact comparisons') +h24xobs <- ts_compute(htime,hxobs,'diurnal') +h24xmod <- ts_compute(htime,hxmod,'diurnal') +for (k in wversion){ + for (ivar in 1:nvariables){ + h24xdata[ivar,,k] <- ts_compute(htime,hxdata[ivar,,k],'diurnal') + } +} + + +if(what!='obs' & what!='mod'){ + print('********** Get bin statistics function of forecast error') + hvar <- hxmod-hxobs + binsize=10 + errorvarbin=seq(-100,100,binsize) + nbin=length(errorvarbin)-1 + errorbinstat=array(NaN,dim=c(2,nbin,nmetrics,nversions)) + for (k in wversion){ + for (ibin in 1:nbin){ + w=which(hvar >= errorvarbin[ibin] & hvar < errorvarbin[ibin+1]) + if(length(w)>10){ + errorbinstat[1,ibin,,k]=get_statistics(hxobs[w],hxmod[w])$res + errorbinstat[2,ibin,,k]=get_statistics(hxobs[w],hxdata[1,w,k])$res + } + } + } + w=which(is.finite(errorbinstat)==FALSE) ; if(length(w)!=0){errorbinstat[w]=NaN} + + + print('********** Get bin statistics function of obseved concentration') + hvar <- hxobs + binsize=10 + obsvarbin=seq(0,200,binsize) + nbin=length(obsvarbin)-1 + obsbinstat=array(NaN,dim=c(2,nbin,nmetrics,nversions)) + for (k in wversion){ + for (ibin in 1:nbin){ + w=which(hvar >= obsvarbin[ibin] & hvar < obsvarbin[ibin+1]) + if(length(w)>10){ + obsbinstat[1,ibin,,k]=get_statistics(hxobs[w],hxmod[w])$res + obsbinstat[2,ibin,,k]=get_statistics(hxobs[w],hxdata[1,w,k])$res + } + } + } + w=which(is.finite(obsbinstat)==FALSE) ; if(length(w)!=0){obsbinstat[w]=NaN} + +} + + + +if(substr(what,1,6)=='ml_gbm'){ + print('********** get partial dependency plots') + resolution=30 + npdp=min(c(nfeatures,9)) + x=varImp(mlfit)$importance + featurepdp <- array(NaN,dim=c(npdp,2,resolution)) + for (ivar in 1:npdp){ + tmp <- partial(mlfit,pred.var=row.names(x)[order(x,decreasing=TRUE)[ivar]],rug=TRUE,grid.resolution=resolution) + featurepdp[ivar,1,] <- tmp[,1] + featurepdp[ivar,2,] <- tmp[,2] + } +} diff --git a/routine3_write.R b/routine3_write.R new file mode 100644 index 0000000000000000000000000000000000000000..827b581ca0d48f94be22a27f10727ec17855e9ba --- /dev/null +++ b/routine3_write.R @@ -0,0 +1,310 @@ +print('********** Write hourly data') +datatext='htime,obs,mod' +for (k in wversion){ datatext=paste0(datatext,',',paste0(variables,'_',k,collapse=',')) } +for (i in 1:nhtime){ + if(format(htime[i],'%H')=='00'){ + linetext=paste(paste(as.character(htime[i]),'00:00:00'), + formatC(hobs[i],digits=2,width=3,format='f',flag=''), + formatC(hmod[i],digits=2,width=3,format='f',flag=''),sep=',') + }else{ + linetext=paste(as.character(htime[i]), + formatC(hobs[i],digits=2,width=3,format='f',flag=''), + formatC(hmod[i],digits=2,width=3,format='f',flag=''),sep=',') + } + for (k in wversion){ + for (ivar in 1:nvariables){ + linetext=paste(linetext,formatC(hdata[ivar,i,k],digits=2,width=3,format='f',flag=''),sep=',') + } + } + datatext=c(datatext,linetext) +} +file=paste(workdir,'/',outputfile,'_hdata.csv',sep='') +con <- file(file,open="w") +writeLines(datatext[1:length(datatext)], con = con) +close(con) +print(file) + + +print('********** Write daily data') +datatext='dtime,obs,mod' +for (k in wversion){ datatext=paste0(datatext,',',paste0(variables,'_',k,collapse=',')) } +for (i in 1:ndtime){ + linetext=paste(as.character(dtime[i]), + formatC(dxobs[i],digits=2,width=3,format='f',flag=''), + formatC(dxmod[i],digits=2,width=3,format='f',flag=''),sep=',') + for (k in wversion){ + for (ivar in 1:nvariables){ + linetext=paste(linetext,formatC(dxdata[ivar,i,k],digits=2,width=3,format='f',flag=''),sep=',') + } + } + datatext=c(datatext,linetext) +} +file=paste(workdir,'/',outputfile,'_dxdata.csv',sep='') +con <- file(file,open="w") +writeLines(datatext[1:length(datatext)], con = con) +close(con) +print(file) + + +print('********** Write daily max data') +datatext='dtime,obs,mod' +for (k in wversion){ datatext=paste0(datatext,',',paste0(variables,'_',k,collapse=',')) } +for (i in 1:ndtime){ + linetext=paste(as.character(dtime[i]), + formatC(dmaxxobs[i],digits=2,width=3,format='f',flag=''), + formatC(dmaxxmod[i],digits=2,width=3,format='f',flag=''),sep=',') + for (k in wversion){ + for (ivar in 1:nvariables){ + linetext=paste(linetext,formatC(dmaxxdata[ivar,i,k],digits=2,width=3,format='f',flag=''),sep=',') + } + } + datatext=c(datatext,linetext) +} +file=paste(workdir,'/',outputfile,'_dmaxxdata.csv',sep='') +con <- file(file,open="w") +writeLines(datatext[1:length(datatext)], con = con) +close(con) +print(file) + + +print('********** Write daily mda8 data') +datatext='dtime,obs,mod' +for (k in wversion){ datatext=paste0(datatext,',',paste0(variables,'_',k,collapse=',')) } +for (i in 1:ndtime){ + linetext=paste(as.character(dtime[i]), + formatC(dmda8xobs[i],digits=2,width=3,format='f',flag=''), + formatC(dmda8xmod[i],digits=2,width=3,format='f',flag=''),sep=',') + for (k in wversion){ + for (ivar in 1:nvariables){ + linetext=paste(linetext,formatC(dmda8xdata[ivar,i,k],digits=2,width=3,format='f',flag=''),sep=',') + } + } + datatext=c(datatext,linetext) +} +file=paste(workdir,'/',outputfile,'_dmda8xdata.csv',sep='') +con <- file(file,open="w") +writeLines(datatext[1:length(datatext)], con = con) +close(con) +print(file) + + +print('********** Write monthly data') +datatext='mtime,obs,mod' +for (k in wversion){ datatext=paste0(datatext,',',paste0(variables,'_',k,collapse=',')) } +for (i in 1:nmtime){ + linetext=paste(as.character(format(mtime[i],"%Y-%m")), + formatC(mxobs[i],digits=2,width=3,format='f',flag=''), + formatC(mxmod[i],digits=2,width=3,format='f',flag=''),sep=',') + for (k in wversion){ + for (ivar in 1:nvariables){ + linetext=paste(linetext,formatC(mxdata[ivar,i,k],digits=2,width=3,format='f',flag=''),sep=',') + } + } + datatext=c(datatext,linetext) +} +file=paste(workdir,'/',outputfile,'_mxdata.csv',sep='') +con <- file(file,open="w") +writeLines(datatext[1:length(datatext)], con = con) +close(con) +print(file) + + +print('********** Write diurnal profiles data') +datatext='hour,obs,mod' +for (k in wversion){ datatext=paste0(datatext,',',paste0(variables,'_',k,collapse=',')) } +for (i in 1:24){ + linetext=paste(i, + formatC(h24xobs[i],digits=2,width=3,format='f',flag=''), + formatC(h24xmod[i],digits=2,width=3,format='f',flag=''),sep=',') + for (k in wversion){ + for (ivar in 1:nvariables){ + linetext=paste(linetext,formatC(h24xdata[ivar,i,k],digits=2,width=3,format='f',flag=''),sep=',') + } + } + datatext=c(datatext,linetext) +} +file=paste(workdir,'/',outputfile,'_h24xdata.csv',sep='') +con <- file(file,open="w") +writeLines(datatext[1:length(datatext)], con = con) +close(con) +print(file) + + + +if(what!='obs' & what!='mod'){ + + print('********** Write statistics - from hourly/daily/monthly data') + for (i in c('hx','dx','mx','dmaxx','mda8x')){ + if(i=='hx' ){xobs=hxobs ; xmod=hxmod ; xdata=hxdata} + if(i=='dx' ){xobs=dxobs ; xmod=dxmod ; xdata=dxdata} + if(i=='mx' ){xobs=mxobs ; xmod=mxmod ; xdata=mxdata} + if(i=='dmaxx'){xobs=dmaxxobs ; xmod=dmaxxmod ; xdata=dmaxxdata} + if(i=='mda8x'){xobs=dmda8xobs ; xmod=dmda8xmod ; xdata=dmda8xdata} + + header='configuration : none' + if(substr(what,1,3)=='kfs' | + substr(what,1,3)=='kfd'){header=paste0('configuration : ',configs[ibestconfig])} + datatext=c(header, + get_statistics(mode='legend',width=8,sep=',')$cleg, + get_statistics(xobs,xmod,8,2,sep=',')$cres) + for (k in wversion){ + datatext=c(datatext,get_statistics(xobs,xdata[1,,k],8,2,sep=',')$cres) + } + file=paste(workdir,'/',outputfile,'_',i,'stat.csv',sep='') + con <- file(file,open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + print(file) + } + + + if(1==1){ + print('********** Write statistics - from hourly/daily/dailymax data with with limits') + for (k in wversion){ + for (i in c('hx','dx','dmaxx','dmda8x')){ + if(i=='hx' ){xobs=hxobs ; xmod=hxmod ; xdata=hxdata} + if(i=='dx' ){xobs=dxobs ; xmod=dxmod ; xdata=dxdata} + if(i=='dmaxx' ){xobs=dmaxxobs ; xmod=dmaxxmod ; xdata=dmaxxdata} + if(i=='dmda8x'){xobs=dmda8xobs ; xmod=dmda8xmod ; xdata=dmda8xdata} + datatext=get_statistics(mode='legendter',width=7,sep=',')$cleg + for (ilim in 1:nlimit){ + datatext=c(datatext, + get_statistics(xobs,xmod,7,1,lim1=limit[ilim],lim2=limit[ilim+1],sep=',')$cres, + get_statistics(xobs,xdata[1,,k],7,1,lim1=limit[ilim],lim2=limit[ilim+1],sep=',')$cres) + #print('*******') + #print(get_statistics(xobs,xmod,7,1,lim1=limit[ilim],lim2=limit[ilim+1],sep=',')$cres) + #print(get_statistics(xobs,xdata[1,,k],7,1,lim1=limit[ilim],lim2=limit[ilim+1],sep=',')$cres) + } + con <- file(paste(workdir,'/',outputfile,'_',i,k,'limstat.csv',sep=''),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + } + } + + + print('********** Write error bin statistics') + for (k in wversion){ + datatext=paste(' lim1, lim2',get_statistics(mode='legend',width=7,sep=',')$cleg,sep=',') + for (ibin in 1:nbin){ + for (j in 1:2){ + linetext=paste0(formatC(errorvarbin[ibin],digits=0,width=6,format='f',flag=''),',', + formatC(errorvarbin[ibin+1],digits=0,width=6,format='f',flag=''),',') + for (im in 1:nmetrics){ + linetext=paste0(linetext, + formatC(errorbinstat[j,ibin,im,k],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + } + con <- file(paste(workdir,'/',outputfile,'_hx',k,'errorbinstat.csv',sep=''),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + + + print('********** Write obs bin statistics') + for (k in wversion){ + datatext=paste(' lim1, lim2',get_statistics(mode='legend',width=8,sep=',')$cleg,sep=',') + for (ibin in 1:nbin){ + for (j in 1:2){ + linetext=paste(formatC(obsvarbin[ibin],digits=0,width=6,format='f',flag=''), + formatC(obsvarbin[ibin+1],digits=0,width=6,format='f',flag=''),sep=',') + for (im in 1:nmetrics){ + linetext=paste(linetext, + formatC(obsbinstat[j,ibin,im,k],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + } + con <- file(paste(workdir,'/',outputfile,'_hx',k,'obsbinstat.csv',sep=''),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } + + + + print('********** Write hfopdata') + datatext='ivar,hour,mean,sd,p05,p25,p50,p75,p95,min,max,N' + for (ivar in 1:(nvariables+2)){ + for (ih in 1:nlengthforecast){ + linetext=paste0(ivar,ih,sep=',') + for (im in 1:10){ + linetext=paste( + linetext, + formatC(hfopdata[ih,ivar,im],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + } + con <- file(paste(workdir,'/',outputfile,'_hfopdata.csv',sep=''),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + + + + print('********** Write hfopstat') + datatext=paste('mod1mos2,hour',get_statistics(mode='legend',width=8,sep=',')$cleg,sep=',') + for (i in 1:2){ + for (ih in 1:nlengthforecast){ + linetext=paste0(formatC(i,digits=0,width=8,format='f',flag=''),',', + formatC(ih,digits=0,width=4,format='f',flag='')) + for (im in 1:nmetrics){ + linetext=paste( + linetext, + formatC(hfopstat[ih,i,im],digits=2,width=7,format='f',flag=''),sep=',') + } + datatext=c(datatext,linetext) + } + } + con <- file(paste(workdir,'/',outputfile,'_hfopstat.csv',sep=''),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + + + + if(exists("importance_final")==TRUE){ + print('********** Write feature importance') + datatext='feature,importance' + for (ifea in 1:nfeatures){ + datatext=c(datatext,paste(features[ifea],formatC(importance_final[ifea],digits=2,width=0,format='f',flag=''),sep=',')) + } + con <- file(paste(workdir,'/',outputfile,'_importance.csv',sep=''),open="w") + writeLines(datatext[1:length(datatext)], con = con) + close(con) + } +} + + +print('********** Save hopdata (only concentration)') +for (i in 1:nforecast){ + itime=(i-1)*timestep+1 + if(i==1){ + save_time=htime[itime] + }else{ + save_time=c(save_time,htime[itime]) + } +} +save_hopdata <- as.data.frame(hopdata[,,1]) +save_hopdata$time <- save_time +save_hopdata$station <- rep(station,nforecast) +colnames(save_hopdata) <- c(paste0(1:nlengthforecast,"h"),'htime','station') +save(save_hopdata,file=paste0(workdir,'/',outputfile,'_hopdata.Rdata')) +print(dim(save_hopdata)) +print(object.size(save_hopdata)) + + +print('********** Save hdata') +ivar=1 +for (k in wversion){ + + save_hdata <- as.data.frame(rep(station,nhtime)) + save_hdata$htime <- htime + save_hdata$hobs <- hobs + save_hdata$hmod <- hmod + save_hdata$hmos <- hdata[ivar,,k] + colnames(save_hdata) <- c('station','htime','hobs','hmod','hmos') + save(save_hdata,file=paste0(workdir,'/',outputfile,'_',k,'_hdata.Rdata')) + print(dim(save_hdata)) + print(object.size(save_hdata)) +} diff --git a/routine4_plot.R b/routine4_plot.R new file mode 100644 index 0000000000000000000000000000000000000000..daa12d31666eaa0eea5d24177f573bb73292b9ea --- /dev/null +++ b/routine4_plot.R @@ -0,0 +1,361 @@ +print('********** Plot') +figout=paste0(workdir,'/',outputfile,'_',k,'.pdf',sep='') +pdf(figout,width=15,height=20,paper='a4r') + + +print('********** Preliminaries for plot') +cex=1.8 +lwd=3 +ptcex=2 +par(xaxs='i',yaxs='i',cex=cex,cex.axis=cex,cex.lab=cex,cex.main=cex) +main='' +if(substr(what,1,3)=='kfs'){ main=paste0('STATIC : w_over_v=',configs[ibestconfig],' / v_t=',v_t)} +if(substr(what,1,3)=='kfd'){ main=paste0('DYNAMIC : ndynwindow=',configs[ibestconfig]) } + + + + +## plot 1 - obs/mod/ time series - concentrations +print('********** plot 1') +par(mar=c(5,5,2,2),mfrow=c(2,1)) +for (i in 1:6){ + if(i==1){t=htime ; o=hobs ; m=hmod ; d=hdata[1,,k] ; mainsuffix='(non exact)'} + if(i==2){t=htime ; o=hxobs ; m=hxmod ; d=hxdata[1,,k] ; mainsuffix=''} + if(i==3){t=dtime ; o=dobs ; m=dmod ; d=ddata[1,,k] ; mainsuffix='(non exact)'} + if(i==4){t=dtime ; o=dxobs ; m=dxmod ; d=dxdata[1,,k] ; mainsuffix=''} + if(i==5){t=mtime ; o=mobs ; m=mmod ; d=mdata[1,,k] ; mainsuffix='(non exact)'} + if(i==6){t=mtime ; o=mxobs ; m=mxmod ; d=mxdata[1,,k] ; mainsuffix=''} + #print(paste(i,meanok(o),meanok(m),meanok(d))) + + ylim=c(0,1.6*maxok(c(o,m,d))) + + if(i <= 2){www=which(htime>=dtime[220])}else{www=1:length(t)} ; www=1:length(t) ## testherve + + plot(t[www],o[www],type='n',ylim=ylim,xlab='Date',ylab=legvariables[1],main=paste(main,mainsuffix)) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(t,o,type='l',lwd=lwd,col=colo) + points(t,m,type='l',lwd=lwd,col=colm) + points(t,d,type='l',lwd=lwd,col=cold) + if(i >= 5){ + points(t,o,type='p',pch=pcho,col=colo,cex=ptcex) + points(t,m,type='p',pch=pchm,col=colm,cex=ptcex) + points(t,d,type='p',pch=pchd,col=cold,cex=ptcex) + legend('topleft',legomd,lty=1,lwd=lwd,col=colomd,pch=pchomd,bg='white',cex=cex*0.8,pt.cex=ptcex) + }else{ + legend('topleft',legomd,lty=1,lwd=lwd,col=colomd,pch=-1,bg='white',cex=cex*0.8) + } +} +main='' + + +## plot 2 - time series - other variables +if(nvariables>1){ + print('********** plot 2') + par(mar=c(5,5,2,2),mfrow=c(3,1)) + for (ivar in 2:nvariables){ + for (i in 1:3){ + if(i==1){t=htime ; d=hdata[ivar,,k]} + if(i==2){t=dtime ; d=ddata[ivar,,k]} + if(i==3){t=mtime ; d=mdata[ivar,,k]} + ylim=rangeok(d,0.1,0.5) + plot(t,d,type='n',ylim=ylim,xlab='Date',ylab=legvariables[ivar],main=main) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(t,d,type='l',lwd=lwd,col=cold) + if(i==3){ + points(t,d,type='p',pch=pchd,col=cold,cex=ptcex) + legend('top',what,lty=1,lwd=lwd,col=cold,pch=pchd,bg='white',cex=cex,pt.cex=cex) + }else{ + legend('top',what,lty=1,lwd=lwd,col=cold,pch=-1,bg='white',cex=cex,pt.cex=cex) + } + } + } +} + + + + + + + +## plot 3 - obs/mod/ time series - diurnal +print('********** plot 3') +par(mar=c(5,5,2,2),mfrow=c(2,2)) +for (i in 1:2){ + t=1:24 + if(i==1){o=h24obs ; m=h24mod ; d=h24data[1,,k] ; mainsuffix='(non exact)'} + if(i==2){o=h24xobs ; m=h24xmod ; d=h24xdata[1,,k] ; mainsuffix=''} + ylim=rangeok(c(o,m,d),0.1,0.8) + plot(t,o,type='n',ylim=ylim,xlab='Hour of the day',ylab=legvariables[1],main=paste(main,mainsuffix)) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(t,o,type='l',lwd=lwd,col=colo) + points(t,m,type='l',lwd=lwd,col=colm) + points(t,d,type='l',lwd=lwd,col=cold) + points(t,o,type='p',pch=pcho,col=colo,cex=ptcex) + points(t,m,type='p',pch=pchm,col=colm,cex=ptcex) + points(t,d,type='p',pch=pchd,col=cold,cex=ptcex) + legend('topleft',legomd,lty=1,lwd=lwd,col=colomd,pch=pchomd,bg='white',cex=cex,pt.cex=ptcex) +} + + +## plot 4 - other variables diurnal +if(nvariables>1){ + print('********** plot 4') + par(mar=c(5,5,2,2),mfrow=c(2,2)) + for (ivar in 1:nvariables){ + t=1:24 ; d=h24data[ivar,,k] + ylim=rangeok(d,0.1,0.8) + if(all(is.finite(ylim))){ + plot(t,d,type='n',ylim=ylim,xlab='Hour of the day',ylab=legvariables[ivar],main=main) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(t,d,type='l',lwd=lwd,col=cold) + points(t,d,type='p',pch=pchd,col=cold,cex=ptcex) + legend('topleft',what,lty=1,lwd=lwd,col=cold,pch=pchd,bg='white',cex=cex,pt.cex=cex) + }else{ + plotblank() + } + } +} + +## plot 5 - scatter plots +print('********** plot 5') +par(mar=c(5,5,2,2),mfrow=c(2,2)) + +imax=2 +if(substr(what,1,2)=='an'){imax=4} +if(what=='mod'){imax=1} +for (i in 1:imax){ + if(i==1){x=hobs ; y=hmod ; col=colm ; xlab='Observations' ; ylab='mod'} + if(i==2){x=hobs ; y=hdata[1,,k] ; col=cold ; xlab='Observations' ; ylab=what} + if(i==3){x=hdata[1,,k] ; y=hdata[3,,k] ; col=cold ; xlab=legvariables[1] ; ylab=legvariables[3]} + if(i==4){x=hdata[2,,k] ; y=hdata[4,,k] ; col=cold ; xlab=legvariables[1] ; ylab=legvariables[3]} + main='' + lm=lm(y~x) + a=summary(lm)$coefficients[2] + b=summary(lm)$coefficients[1] + r=corok(x,y) + main=paste0('y=',formatC(a,digits=2,width=3,format='f',flag=''),'x+', + formatC(b,digits=2,width=3,format='f',flag=''),' (r=', + formatC(r,digits=2,width=3,format='f',flag=''),')') + lim=rangeok(c(hobs,hmod,hdata[1,,k]),0.02,0.02) + plot(x,y,type='p',pch=16,col=adjustcolor(col,alpha.f=0.3),xlab='Observations',ylab=paste('Model',ylab),main=main,xlim=lim,ylim=lim) + points(c(-1,1)*1e9,c(-1,1)*1e9,type='l',lty=2) + if(length(w)!=0){abline(lm,col='black',lwd=lwd)} +} + + + + +if(substr(what,1,3)=='kfs' | substr(what,1,3)=='kfd'){ + ## plot 6 - optimal configuration + print('********** plot 6') + par(mar=c(5,5,2,2),mfrow=c(2,2)) + if(substr(what,1,3)=='kfs'){xlab='W/V' ; log='x'} + if(substr(what,1,3)=='kfd'){xlab='Nwindow' ; log=''} + wm=which(metrics==optimetrics) + for (im in unique(c(which(metrics==optimetrics),1,3,5))){ + plot(configs,hdata_stat[,im],type='n',xlab=xlab,ylab=metrics[im],log=log, + main=paste0(xlab,'{',optimetrics,'} = ',configs[ibestconfig])) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(configs[ibestconfig]*c(1,1),c(-1,1)*1e9,type='l',lwd=lwd,col=cold) + points(configs,hdata_stat[,im],type='p',pch=16) + points(configs,hdata_stat[,im],type='l',lwd=lwd) + points(configs[ibestconfig],hdata_stat[ibestconfig,im],type='p',pch=16,cex=ptcex,col=cold) + } +} + + + + + + + + + + +## plot 7 - error versus bias/concentration +if(what!='mod'){ + print('********** plot 7') + par(mar=c(4,5,1,1),mfrow=c(2,3)) + for (i in 1:2){ + for (im in c(1,3,7,2,4,15)){ + if(i==1){ varbin=errorvarbin ; binstat=errorbinstat[,,im,k] ; xlab='Forecast error'} + if(i==2){ varbin=obsvarbin ; binstat=obsbinstat[,,im,k] ; xlab='Observed concentration'} + + w=which(is.finite(binstat[1,])==TRUE & is.finite(binstat[2,])==TRUE) + if(length(w)>1){ + varbin=varbin[w] ; binstat=binstat[,w] ; nbin=length(varbin) + xlim=rangeok(c(varbin,max(varbin+binsize))) + + if(metrics[im]=='RMSE' | metrics[im]=='NRMSE' | metrics[im]=='N'){ + ylim=rangeok(binstat,0,0.4,zero=TRUE) + }else{ + ylim=rangeok(binstat,0.1,0.4) + } + if(all(is.finite(ylim))==TRUE){ + plot(varbin+binsize/2,binstat[1,],type='n',xlab=xlab,ylab=metrics[im],xlim=xlim,ylim=ylim) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + if(metrics[im]=='MB'){points(c(-1,1)*1e9,c(0,0),type='l',lwd=2)} + points(varbin+binsize/2,binstat[1,],type='l',lwd=lwd,col=colm) + points(varbin+binsize/2,binstat[2,],type='l',lwd=lwd,col=cold) + points(varbin+binsize/2,binstat[1,],type='p',pch=pchm,col=colm,cex=ptcex) + points(varbin+binsize/2,binstat[2,],type='p',pch=pchd,col=cold,cex=ptcex) + legend('topleft',legmd,lty=1,lwd=lwd,col=colmd,pch=pchmd,bg='white',cex=cex*0.8,pt.cex=ptcex) + }else{ + plotblank() + } + } + } + } + + + + print('********** Plot 8') + par(mar=c(4.5,4.5,1.5,1),mfrow=c(3,2)) + hforecast=1:nlengthforecast +# for (ivar in which(legvariables=='Concentration' | legvariables=='Error')){ + for (ivar in which(legvariables=='Concentration')){ + if(legvariables[ivar]=='Concentration'){ + ylim=rangeok(hfopdata[,c(ivar,nvariables+1:2),1],0.1,1) + main=paste(legneg,'mode') + }else{ + ylim=rangeok(hfopdata[,ivar,1],0.1,1) + if(ivar==3){ + x=1:nlengthforecast/24 + y=hfopdata[,3,1] + trendmb=summary(lm(y~x))$coefficients[2] + main=paste(formatC(trendmb,digits=2,width=2,format="f",flag=" "),'ug/m3/day') + }else{ + main='' + } + } + plot(hforecast,hfopdata[,1,1],type='n',ylim=ylim,xlab='Forecast hour',ylab=legvariables[ivar],main=main) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(c(-1,1)*1e9,c(0,0),type='l',lwd=2) + points(hforecast,hfopdata[,ivar,1],type='p',lwd=lwd,col=cold,pch=pchd,cex=cex) + points(hforecast,hfopdata[,ivar,1],type='l',lwd=lwd,col=cold) + if(legvariables[ivar]=='Concentration'){ + points(hforecast,hfopdata[,nvariables+1,1],type='p',col=colo,pch=pcho,cex=cex) + points(hforecast,hfopdata[,nvariables+1,1],type='l',col=colo,lwd=lwd) + points(hforecast,hfopdata[,nvariables+2,1],type='p',col=colm,pch=pchm,cex=cex) + points(hforecast,hfopdata[,nvariables+2,1],type='l',col=colm,lwd=lwd) + legend('topleft',legomd,lty=1,lwd=lwd,col=colomd,pch=pchomd,bg='white',cex=cex*0.8,pt.cex=ptcex) + }else{ + # fillplot(1:nforecast,hfopdata[,ivar,1],cold,0.5) + legend('topleft',what,lty=1,lwd=lwd,col=cold,pch=pchd,bg='white',cex=cex,pt.cex=cex) + } + } + + + for (im in c(1,3,5,6,7,2,4)){ + x=1:nlengthforecast/24 + y=hfopstat[,2,im] + trendmb=summary(lm(y~x))$coefficients[2] + ylim=rangeok(hfopstat[,,im],0.1,1) + plot(hforecast,hfopstat[,1,im],type='n',ylim=ylim,xlab='Forecast hour',ylab=metrics[im]) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(c(-1,1)*1e9,c(0,0),type='l',lwd=2) + points(hforecast,hfopstat[,1,im],type='p',lwd=lwd,col=colm,pch=pchm,cex=cex) + points(hforecast,hfopstat[,1,im],type='l',lwd=lwd,col=colm) + points(hforecast,hfopstat[,2,im],type='p',lwd=lwd,col=cold,pch=pchd,cex=cex) + points(hforecast,hfopstat[,2,im],type='l',lwd=lwd,col=cold) + legend('topleft',legmd,lty=1,lwd=lwd,col=colmd,pch=pchmd,bg='white',cex=cex,pt.cex=cex) + + mod_mean <- meanok(hfopstat[,1,im]) + mos_mean <- meanok(hfopstat[,2,im]) + x=1:nlengthforecast/24 ; y=hfopstat[,1,im] + mod_trend <- summary(lm(y~x))$coefficients[2] + mod_trendsd <- summary(lm(y~x))$coefficients[4] + x=1:nlengthforecast/24 ; y=hfopstat[,2,im] + mos_trend <- summary(lm(y~x))$coefficients[2] + + text(min(hforecast),max(ylim)+(max(ylim)-min(ylim))*0.05, + paste0(formatC(mod_mean,digits=1,width=2,format="f",flag=""),'/', + formatC(mod_trend,digits=3,width=2,format="f",flag="")),xpd=TRUE,cex=cex,adj=0,col=colm) + text(max(hforecast),max(ylim)+(max(ylim)-min(ylim))*0.05, + paste0(formatC(mos_mean,digits=1,width=2,format="f",flag=""),'/', + formatC(mos_trend,digits=3,width=2,format="f",flag="")),xpd=TRUE,cex=cex,adj=1,col=cold) + } + + +} + + + + + + +if(exists("hxfeatures")==TRUE){ + + print('********** plot features') + for (ifea in 1:nfeatures){ + par(mar=c(5,5,2,2),mfrow=c(2,1)) + wfea=which(colnames(hyfeatures)==features[ifea]) + plot(hxfeatures,hyfeatures[,wfea],type='n',xlab='Date',ylab=features[ifea]) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(hxfeatures,hyfeatures[,wfea],type='l',pch=pchd,col=cold,cex=ptcex) + + dyfeatures=ts_compute(hxfeatures,hyfeatures[,wfea],'daily') + dxfeatures=as.Date(unique(format(htime[wtime],format='%Y-%m-%d'))) + + plot(dxfeatures,dyfeatures,type='n',xlab='Date',ylab=features[ifea]) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + points(dxfeatures,dyfeatures,type='l',pch=pchd,col=cold,cex=ptcex) + } + + if(substr(what,1,6)=='ml_gbm'){ + print('********** plot partial dependency plots') + x=varImp(mlfit)$importance + for (kplot in 1:2){ + par(mar=c(5,5,2,2),mfrow=c(3,3)) + if(kplot==1){ ylim=rangeok(featurepdp[,2,]) } + if(kplot==2){ ylim=rangeok(featurepdp[2:npdp,2,]) } + for (ivar in 1:npdp){ + variable=row.names(x)[order(x,decreasing=TRUE)[ivar]] + plot(featurepdp[ivar,1,],featurepdp[ivar,2,],type='n',pch=16,col=cold,cex=cex, + ylim=ylim,xlab=variable,ylab='Partial dependence') + fillplot(featurepdp[ivar,1,],featurepdp[ivar,2,],cold,1) + for (idec in (0:10)/10){ + points(quantileok(hyfeatures[,which(colnames(hyfeatures)==variable)],idec),min(ylim),type='p',pch="|",cex=2) + } + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + } + } + } +} + + +if(exists("importance_final")==TRUE){ + print('********** Plot 9') + par(mar=c(5,15,3,3),mfrow=c(1,1)) + plot(c(-5,105),c(0.5,nfeatures+0.5),type='n',xlab='Feature importance',ylab='',axes=FALSE) + grid(nx=NULL,ny=NULL,col='grey',lwd=0.7,lty=1) + for (ifea in 1:nfeatures){ + points(c(0,importance_final[ifea]),ifea*c(1,1),type='l',lwd=20) + points(c(0,importance_final[ifea]),ifea*c(1,1),type='l',lwd=16,col=cold) + } + + itrain=1 + for (i in 1:nforecast){ + if(is.finite(importance[1,i])==TRUE){ + for (ifea in 1:nfeatures){ + #points(importance[ifea,i],ifea,type='p',pch=16,cex=3) + #text(importance[ifea,i],ifea,itrain,adj=0.5,cex=1,col=cold,font=2) + text(importance[ifea,i],ifea,itrain,adj=0.5,cex=1,font=2) + + } + itrain=itrain+1 + } + } + + + axis(1,at=c(-1e9,axTicks(1),1e9), + labels=formatC(c(-1e9,axTicks(1),1e9),digits=0,width=2,format="f",flag=" "), + las=1,cex=cex,cex.axis=cex) + axis(2,at=c(-1e9,1:nfeatures,1e9),labels=c('',substr(features,2,99),''),las=1,cex=cex,cex.axis=cex) + axis(3,at=c(-1e9,1e9)) + axis(4,at=c(-1e9,1e9)) +} + + +print(figout) +dev.off() diff --git a/select_stations.R b/select_stations.R new file mode 100644 index 0000000000000000000000000000000000000000..77c6858ff6311674a13a22016441ee815c62acd6 --- /dev/null +++ b/select_stations.R @@ -0,0 +1,86 @@ +### source('~/scripts/opmos/select_stations.R') + +print('**************************************************************************************************** ') +if(1==1){ + args = commandArgs(TRUE) + print(args) + pollutant <- args[1] + d1 <- args[2] + d2 <- args[3] + infile <- args[4] + outfile <- args[5] + criteria <- args[6] + experiment <- args[7] + indir <- args[8] + station <- args[9] + +print(args) + +}else{ + pollutant <- "PM10" + d1 <- "2015010100" + d2 <- "2015093023" + infile <- "/home/bsc32/bsc32883/scripts/opmos/tmp_filestations_all" + outfile <- "/home/bsc32/bsc32883/scripts/opmos/tmp_filestations" + criteria <- 75 + experiment <- 'b007' + indir <- '/gpfs/scratch/bsc32/bsc32883/data/extractions_et' + station <-'IP-RUR' +} + +print(paste('**************************************************>> ',pollutant)) + +date1=ISOdate(substr(d1,1,4),substr(d1,5,6),substr(d1,7,8),substr(d1,9,10),tz='UTC') +date2=ISOdate(substr(d2,1,4),substr(d2,5,6),substr(d2,7,8),substr(d2,9,10),tz='UTC') +htime <- seq(date1,date2,"hours") ; nhtime <- length(htime) + +#if(experiment=='b0a6' ){file=paste0('/esarchive/scratch/hpetetin/data/extractions_et/b0a6/data/ALL_b0a6_',pollutant,'_EIONET_spain_HL_20150101_20151231.RData')} +#if(experiment=='b007' ){file=paste0('/esarchive/scratch/hpetetin/data/extractions_et/b007/data/ALL_b007_',pollutant,'_EIONET_spain_HL_20150101_20151231.RData')} +#if(experiment=='cams50'){file=paste0('/esarchive/scratch/hpetetin/data/extractions_et/cams50/data/ALL_cams50_',pollutant,'_EIONET_nrt_spain_HL_20151001_20181031.RData')} + + +setid='spain' +if(station=='RUR'){setid='rural'} +if(station=='URB'){setid='urban'} +if(station=='SUB'){setid='suburban'} + + +if(experiment=='b0a6' | experiment=='b007'){datesuffix='20150101_20151231' ; network='EIONET'} +if(experiment=='cams50' ){datesuffix='20160101_20171231' ; network='EIONET_nrt'} +print(pollutant) +file=paste0(indir,'/',experiment,'/data/ALL_',experiment,'_',pollutant,'_',network,'_',setid,'_HL_',datesuffix,'.RData') +print(file) +print(outfile) + +load(file) + +tab=as.vector(read.table(infile,colClasses='character')[,1]) +text='' +for (istat in 1:length(tab)){ + wstat=which(data.all$station==tab[istat]) + + tmp=data.all[wstat,] + tmptime=as.POSIXct(tmp[,2],format="%Y-%m-%d %H:%M:%S") + + w=which(tmptime >= date1 & tmptime <= date2 & is.na(tmp$obs)==FALSE) + ndata=100*length(w)/nhtime + if(ndata >= criteria){ + text=c(text,tab[istat]) + } + #print(paste('Nobs(%) :',tab[istat],ndata)) +} +con <- file(outfile,open="w") +writeLines(text[2:length(text)], con = con) +close(con) +print(length(text)-1) + + +if(0==1){ + load('/gpfs/scratch/bsc32/bsc32883/data/extractions_et/cams50/data/ALL_cams50_O3_EIONET_nrt_urban_HL_20160101_20171231.RData') + stations=as.vector(unique(data.all$station)) + datatext='' + for (i in 1:length(stations)){datatext=c(datatext,stations[i])} + con <- file('/gpfs/scratch/pr1ehu00/pr1ehu18/data/stations/stations_URB',open="w") + writeLines(datatext[2:length(datatext)], con = con) + close(con) +} diff --git a/select_stations.sh b/select_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..6f28235e7f1bb26d3664271cb4f96f7b47f926ea --- /dev/null +++ b/select_stations.sh @@ -0,0 +1,26 @@ +#!/bin/bash +ipollutant=$1 +d1=$2 +d2=$3 +infile=$4 +outfile=$5 +criteria=$6 +experiment=$7 +indir=$8 + +case $ipollutant in + 'O3') ipolcode=1 ;; + 'NO2')ipolcode=2 ;; + 'PM10') ipolcode=24 ;; +esac + + +rm -f $outfile +while read -r istat ; do + if [ -f $indir/${istat}_obs_${ipolcode}.csv ] ; then + nlines=`wc -l $indir/${istat}_obs_${ipolcode}.csv | awk '{print $1}'` + if [ $nlines -ne 0 ] ; then + echo $istat >> $outfile + fi + fi +done < $infile diff --git a/top_opmos.sh b/top_opmos.sh new file mode 100755 index 0000000000000000000000000000000000000000..5f66a46ceaee139c57876e65144533896893b578 --- /dev/null +++ b/top_opmos.sh @@ -0,0 +1,363 @@ +#!/bin/bash +######################################################################################## +# TOP_OPMOS (Operational_like MOS) +######################################################################################## +# +# This script handles all types of MOS (model output statistics) to correct the model outputs +# based on the observations +# +######### STEPS : +# 0) cancel currently running jobs and preliminaries +# 1) generate the joblist +# 2) launch opmos jobs +# 3) wait the end of the jobs +# 4) save usefull files +# 5) start postprocessing +# +######### IMPORTANT USER-DEFINED PARAMETERS : +# - experiment : experiment among CALIOPE/b007/CAMS50 +# - d1, d2 : dates (YYYYMMDDHH with HH from 00 to 23) +# - station : one station name or the code for a group of stations +# - pollutant : one pollutant or a list of list of pollutants +# - what : one MOS model or a list of MOS model (always include "obs" and "mod") +# - jobtime : maximum time (in hours) of the job (one job corresponds to all MOS models on 1 station for 1 pollutant +# - criteria : minimum percentage of observational data required for taking into account a station +# - nmaxrunningjobs : maximum number of jobs running in parallel (recommanded : 800 on P9 or 1800 on MN4) +# - outdirtmp : general output directory (where a folder will be created) +# - domain : only used for CALIOPE +# - whichoutput : which output files you want to keep, could be any combination of pdf/stat/hdata/h24data/... +# +######################################################################################## + +nmosperjob=1 + +whichoutput='pdf-stat-hdata-dxdata-mxdata-h24xdata-dmaxxdata-dmda8xdata' + +for k in 1 ; do + case $k in + 1) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='MAD-RUR' ; pollutant='O3' ;; + 2) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='MAD-SUB' ; pollutant='O3 PM10 NO2' ;; + 3) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='MAD-URB' ; pollutant='O3 PM10 NO2' ;; + 4) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='MAD-RUR' ; pollutant='O3' ;; + 5) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='MAD-SUB' ; pollutant='O3' ;; + 6) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='MAD-URB' ; pollutant='O3' ;; + + 10) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='IP' ; pollutant='O3 PM10 NO2' ;; + 11) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='IP-RUR' ; pollutant='O3 PM10 NO2' ;; + 12) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='IP-SUB' ; pollutant='O3 PM10 NO2' ;; + 13) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='IP-URB' ; pollutant='O3 PM10 NO2' ;; + 14) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='RUR' ; pollutant='O3' ;; + 15) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='SUB' ; pollutant='O3' ;; + 16) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='URB' ; pollutant='O3' ;; + + 21) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='RUR' ; pollutant='PM10' ;; + 22) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='SUB' ; pollutant='PM10' ;; + 23) experiment=b007 ; d1=2015010100 ; d2=2015093023 ; station='URB' ; pollutant='PM10' ;; + 24) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='RUR' ; pollutant='O3' ;; + 25) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='SUB' ; pollutant='O3' ;; + 26) experiment=cams50 ; d1=2016010100 ; d2=2017123123 ; station='URB' ; pollutant='O3' ;; + *) exit 0 ;; + esac + + case $experiment in +# b007) what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma10 ma15 ma20 kfsRMSE kfdRMSE an-C-0 an-C-1 an-C-1_H-1 an-C-1_T-1 an-C-1_T-1_H-1 an-C-1_P-1 an-C-1_P-1_H-1 an-C-1_P-1_T-1 an-C-1_P-1_T-1_H-1 an-C-1_D-1 an-C-1_D-1_H-1 an-C-1_D-1_T-1 an-C-1_D-1_T-1_H-1 an-C-1_D-1_P-1 an-C-1_D-1_P-1_H-1 an-C-1_D-1_P-1_T-1 an-C-1_D-1_P-1_T-1_H-1 an-C-1_S-1 an-C-1_S-1_H-1 an-C-1_S-1_T-1 an-C-1_S-1_T-1_H-1 an-C-1_S-1_P-1 an-C-1_S-1_P-1_H-1 an-C-1_S-1_P-1_T-1 an-C-1_S-1_P-1_T-1_H-1 an-C-1_S-1_D-1 an-C-1_S-1_D-1_H-1 an-C-1_S-1_D-1_T-1 an-C-1_S-1_D-1_T-1_H-1 an-C-1_S-1_D-1_P-1 an-C-1_S-1_D-1_P-1_H-1 an-C-1_S-1_D-1_P-1_T-1 an-C-1_S-1_D-1_P-1_T-1_H-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_H-1 kfsan-C-1_T-1 kfsan-C-1_T-1_H-1 kfsan-C-1_P-1 kfsan-C-1_P-1_H-1 kfsan-C-1_P-1_T-1 kfsan-C-1_P-1_T-1_H-1 kfsan-C-1_D-1 kfsan-C-1_D-1_H-1 kfsan-C-1_D-1_T-1 kfsan-C-1_D-1_T-1_H-1 kfsan-C-1_D-1_P-1 kfsan-C-1_D-1_P-1_H-1 kfsan-C-1_D-1_P-1_T-1 kfsan-C-1_D-1_P-1_T-1_H-1 kfsan-C-1_S-1 kfsan-C-1_S-1_H-1 kfsan-C-1_S-1_T-1 kfsan-C-1_S-1_T-1_H-1 kfsan-C-1_S-1_P-1 kfsan-C-1_S-1_P-1_H-1 kfsan-C-1_S-1_P-1_T-1 kfsan-C-1_S-1_P-1_T-1_H-1 kfsan-C-1_S-1_D-1 kfsan-C-1_S-1_D-1_H-1 kfsan-C-1_S-1_D-1_T-1 kfsan-C-1_S-1_D-1_T-1_H-1 kfsan-C-1_S-1_D-1_P-1 kfsan-C-1_S-1_D-1_P-1_H-1 kfsan-C-1_S-1_D-1_P-1_T-1 kfsan-C-1_S-1_D-1_P-1_T-1_H-1' ;; + b007) what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma10 ma15 ma20 kfsRMSE kfdRMSE an-C-0 an-C-0_S-0_D-0_T-0 kfsan-C-0 kfsan-C-0 kfsan-C-0_S-0_D-0_T-0 an-C-1 an-C-1_S-1_D-1_T-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_S-1_D-1_T-1 an-C-2 an-C-2_S-2_D-2_T-2 kfsan-C-0 kfsan-C-2 kfsan-C-2_S-2_D-2_T-2' ;; +# b007) what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma10 ma15 ma20 kfsRMSE kfdRMSE an-C-0 an-C-1 an-C-1_H-1 an-C-1_T-1 an-C-1_T-1_H-1 an-C-1_P-1 an-C-1_P-1_H-1 an-C-1_P-1_T-1 an-C-1_P-1_T-1_H-1 an-C-1_D-1 an-C-1_D-1_H-1 an-C-1_D-1_T-1 an-C-1_D-1_T-1_H-1 an-C-1_D-1_P-1 an-C-1_D-1_P-1_H-1 an-C-1_D-1_P-1_T-1 an-C-1_D-1_P-1_T-1_H-1 an-C-1_S-1 an-C-1_S-1_H-1 an-C-1_S-1_T-1 an-C-1_S-1_T-1_H-1 an-C-1_S-1_P-1 an-C-1_S-1_P-1_H-1 an-C-1_S-1_P-1_T-1 an-C-1_S-1_P-1_T-1_H-1 an-C-1_S-1_D-1 an-C-1_S-1_D-1_H-1 an-C-1_S-1_D-1_T-1 an-C-1_S-1_D-1_T-1_H-1 an-C-1_S-1_D-1_P-1 an-C-1_S-1_D-1_P-1_H-1 an-C-1_S-1_D-1_P-1_T-1 an-C-1_S-1_D-1_P-1_T-1_H-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_H-1 kfsan-C-1_T-1 kfsan-C-1_T-1_H-1 kfsan-C-1_P-1 kfsan-C-1_P-1_H-1 kfsan-C-1_P-1_T-1 kfsan-C-1_P-1_T-1_H-1 kfsan-C-1_D-1 kfsan-C-1_D-1_H-1 kfsan-C-1_D-1_T-1 kfsan-C-1_D-1_T-1_H-1 kfsan-C-1_D-1_P-1 kfsan-C-1_D-1_P-1_H-1 kfsan-C-1_D-1_P-1_T-1 kfsan-C-1_D-1_P-1_T-1_H-1 kfsan-C-1_S-1 kfsan-C-1_S-1_H-1 kfsan-C-1_S-1_T-1 kfsan-C-1_S-1_T-1_H-1 kfsan-C-1_S-1_P-1 kfsan-C-1_S-1_P-1_H-1 kfsan-C-1_S-1_P-1_T-1 kfsan-C-1_S-1_P-1_T-1_H-1 kfsan-C-1_S-1_D-1 kfsan-C-1_S-1_D-1_H-1 kfsan-C-1_S-1_D-1_T-1 kfsan-C-1_S-1_D-1_T-1_H-1 kfsan-C-1_S-1_D-1_P-1 kfsan-C-1_S-1_D-1_P-1_H-1 kfsan-C-1_S-1_D-1_P-1_T-1 kfsan-C-1_S-1_D-1_P-1_T-1_H-1' ;; + cams50) what='obs mod ma1 ma2 ma3 ma7 ma10 ma15 ma20 kfsRMSE kfdRMSE an-C-0 an-C-1 kfsan-C-0 kfsan-C-1' ;; + *) exit 0 ;; + esac + what='obs mod ma7 kfsRMSE ml_knn-200-30-0-11-4 ml_gbm-200-30-0-11-4-0.50' + what='obs mod kfsRMSE ml_gbm-100-100-0-1-1-0.50 ml_gbm-100-100-0-2-1-0.50 ml_gbm-100-100-0-11-1-0.50 ml_gbm-100-100-1-1-1-0.50 ml_gbm-100-100-1-2-1-0.50 ml_gbm-100-100-1-11-1-0.50' + what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma15 kfsRMSE kfdRMSE an-C-0 an-C-0_S-0_D-0_T-0 kfsan-C-0 kfsan-C-0 kfsan-C-0_S-0_D-0_T-0 an-C-1 an-C-1_S-1_D-1_T-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_S-1_D-1_T-1 an-C-2 an-C-2_S-2_D-2_T-2 kfsan-C-0 kfsan-C-2 kfsan-C-2_S-2_D-2_T-2 ml_gbm-100-100-0-1-1-0.50 ml_gbm-100-100-0-2-1-0.50 ml_gbm-100-100-0-11-1-0.50 ml_gbm-100-100-1-1-1-0.50 ml_gbm-100-100-1-2-1-0.50 ml_gbm-100-100-1-11-1-0.50' + #what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma10 ma15 ma20 kfsRMSE kfdRMSE an-C-0 an-C-1 an-C-1_H-1 an-C-1_T-1 an-C-1_T-1_H-1 an-C-1_P-1 an-C-1_P-1_H-1 an-C-1_P-1_T-1 an-C-1_P-1_T-1_H-1 an-C-1_D-1 an-C-1_D-1_H-1 an-C-1_D-1_T-1 an-C-1_D-1_T-1_H-1 an-C-1_D-1_P-1 an-C-1_D-1_P-1_H-1 an-C-1_D-1_P-1_T-1 an-C-1_D-1_P-1_T-1_H-1 an-C-1_S-1 an-C-1_S-1_H-1 an-C-1_S-1_T-1 an-C-1_S-1_T-1_H-1 an-C-1_S-1_P-1 an-C-1_S-1_P-1_H-1 an-C-1_S-1_P-1_T-1 an-C-1_S-1_P-1_T-1_H-1 an-C-1_S-1_D-1 an-C-1_S-1_D-1_H-1 an-C-1_S-1_D-1_T-1 an-C-1_S-1_D-1_T-1_H-1 an-C-1_S-1_D-1_P-1 an-C-1_S-1_D-1_P-1_H-1 an-C-1_S-1_D-1_P-1_T-1 an-C-1_S-1_D-1_P-1_T-1_H-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_H-1 kfsan-C-1_T-1 kfsan-C-1_T-1_H-1 kfsan-C-1_P-1 kfsan-C-1_P-1_H-1 kfsan-C-1_P-1_T-1 kfsan-C-1_P-1_T-1_H-1 kfsan-C-1_D-1 kfsan-C-1_D-1_H-1 kfsan-C-1_D-1_T-1 kfsan-C-1_D-1_T-1_H-1 kfsan-C-1_D-1_P-1 kfsan-C-1_D-1_P-1_H-1 kfsan-C-1_D-1_P-1_T-1 kfsan-C-1_D-1_P-1_T-1_H-1 kfsan-C-1_S-1 kfsan-C-1_S-1_H-1 kfsan-C-1_S-1_T-1 kfsan-C-1_S-1_T-1_H-1 kfsan-C-1_S-1_P-1 kfsan-C-1_S-1_P-1_H-1 kfsan-C-1_S-1_P-1_T-1 kfsan-C-1_S-1_P-1_T-1_H-1 kfsan-C-1_S-1_D-1 kfsan-C-1_S-1_D-1_H-1 kfsan-C-1_S-1_D-1_T-1 kfsan-C-1_S-1_D-1_T-1_H-1 kfsan-C-1_S-1_D-1_P-1 kfsan-C-1_S-1_D-1_P-1_H-1 kfsan-C-1_S-1_D-1_P-1_T-1 kfsan-C-1_S-1_D-1_P-1_T-1_H-1' + #what='obs mod ma7 kfsRMSE ml_lm1-60-60 ml_gbm1-60-60-0.50 ml_gbm1-60-60-0.75 ml_gbm1-60-60-1.00' + #what='obs mod ma1 ma2 ma3 ma7 kfsRMSE ml_lm1-60-30 ml_lm1-60-40 ml_lm1-60-50 ml_lm1-60-60 ml_gbm1-60-30-0.50 ml_gbm1-60-40-0.50 ml_gbm1-60-50-0.50 ml_gbm1-60-60-0.50 ml_gbm1-60-30-0.75 ml_gbm1-60-40-0.75 ml_gbm1-60-50-0.75 ml_gbm1-60-60-0.75 ml_gbm1-60-30-1.00 ml_gbm1-60-40-1.00 ml_gbm1-60-50-1.00 ml_gbm1-60-60-1.00' + #what='obs mod ma1 ma2 ma3 ma7 kfsRMSE ml_lm1-60-30 ml_lm1-60-40 ml_lm1-60-50 ml_lm1-60-60 ml_gbm1-60-30-0.75 ml_gbm1-60-40-0.75 ml_gbm1-60-50-0.75 ml_gbm1-60-60-0.75 ml_lm2-60-30 ml_lm2-60-40 ml_lm2-60-50 ml_lm2-60-60 ml_gbm2-60-30-0.75 ml_gbm2-60-40-0.75 ml_gbm2-60-50-0.75 ml_gbm2-60-60-0.75 ml_lm3-60-30 ml_lm3-60-40 ml_lm3-60-50 ml_lm3-60-60 ml_gbm3-60-30-0.75 ml_gbm3-60-40-0.75 ml_gbm3-60-50-0.75 ml_gbm3-60-60-0.75' + #what='obs mod ma1 ma2 ma3 ma7 kfsRMSE ml_lm1-60-30 ml_lm1-60-40 ml_lm1-60-50 ml_lm1-60-60 ml_gbm1-30-30-1.00 ml_gbm1-30-10-1.0 ml_gbm1-30-30-0.75 ml_gbm1-30-10-0.75 ml_gbm1b-30-30-1.00 ml_gbm1b-30-10-1.0 ml_gbm1b-30-30-0.75 ml_gbm1b-30-10-0.75 ml_gbm2-30-30-1.00 ml_gbm2-30-10-1.0 ml_gbm2-30-30-0.75 ml_gbm2-30-10-0.75 ml_gbm2b-30-30-1.00 ml_gbm2b-30-10-1.0 ml_gbm2b-30-30-0.75 ml_gbm2b-30-10-0.75' + what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma15 kfsRMSE kfdRMSE an-C-0 an-C-0_S-0_D-0_T-0 kfsan-C-0 kfsan-C-0 kfsan-C-0_S-0_D-0_T-0 an-C-1 an-C-1_S-1_D-1_T-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_S-1_D-1_T-1 an-C-2 an-C-2_S-2_D-2_T-2 kfsan-C-0 kfsan-C-2 kfsan-C-2_S-2_D-2_T-2' + what='obs mod ma7 ma15 kfsRMSE ml_gbm-100-100-0-1-1-0.50' + what='obs mod ma7 kfsRMSE' + + what='obs mod ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma15 kfsRMSE kfdRMSE an-C-0 an-C-0_S-0_D-0_T-0 kfsan-C-0 kfsan-C-0 kfsan-C-0_S-0_D-0_T-0 an-C-1 an-C-1_S-1_D-1_T-1 kfsan-C-0 kfsan-C-1 kfsan-C-1_S-1_D-1_T-1 an-C-2 an-C-2_S-2_D-2_T-2 kfsan-C-0 kfsan-C-2 kfsan-C-2_S-2_D-2_T-2 ml_gbm-100-100-0-1-1-0.50 ml_gbm-100-100-0-2-1-0.50 ml_gbm-100-100-0-11-1-0.50 ml_gbm-100-100-1-1-1-0.50 ml_gbm-100-100-1-2-1-0.50 ml_gbm-100-100-1-11-1-0.50' + + what='obs mod +ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma15 +kfsRMSE kfdRMSE +an-C-0 an-C-0_D-0 an-C-0_T-0 an-C-0_S-0 an-C-0_S-0_T-0 an-C-0_S-0_D-0_T-0 an-C-0_S-0_D-0_P-0_T-0_H-0 +an-C-1 an-C-1_D-1 an-C-1_T-1 an-C-1_S-1 an-C-1_S-1_T-1 an-C-1_S-1_D-1_T-1 an-C-1_S-1_D-1_P-1_T-1_H-1 +an-C-2 an-C-2_D-2 an-C-2_T-2 an-C-2_S-2 an-C-2_S-2_T-2 an-C-2_S-2_D-2_T-2 an-C-2_S-2_D-2_P-2_T-2_H-2 +kfsan-C-0 kfsan-C-0_D-0 kfsan-C-0_T-0 kfsan-C-0_S-0 kfsan-C-0_S-0_T-0 kfsan-C-0_S-0_D-0_T-0 kfsan-C-0_S-0_D-0_P-0_T-0_H-0 +kfsan-C-1 kfsan-C-1_D-1 kfsan-C-1_T-1 kfsan-C-1_S-1 kfsan-C-1_S-1_T-1 kfsan-C-1_S-1_D-1_T-1 kfsan-C-1_S-1_D-1_P-1_T-1_H-1 +kfsan-C-2 kfsan-C-2_D-2 kfsan-C-2_T-2 kfsan-C-2_S-2 kfsan-C-2_S-2_T-2 kfsan-C-2_S-2_D-2_T-2 kfsan-C-2_S-2_D-2_P-2_T-2_H-2 +ml_gbm-90-30-0-0-1-0.50 ml_gbm-90-30-0-1-1-0.50 ml_gbm-90-30-0-2-1-0.50 ml_gbm-90-30-0-3-1-0.50 ml_gbm-90-30-0-4-1-0.50 ml_gbm-90-30-0-6-1-0.50 ml_gbm-90-30-0-7-1-0.50 ml_gbm-90-30-0-11-1-0.50 +ml_gbm-90-30-0-0-1-0.75 ml_gbm-90-30-0-1-1-0.75 ml_gbm-90-30-0-2-1-0.75 ml_gbm-90-30-0-3-1-0.75 ml_gbm-90-30-0-4-1-0.75 ml_gbm-90-30-0-6-1-0.75 ml_gbm-90-30-0-7-1-0.75 ml_gbm-90-30-0-11-1-0.75 +ml_gbm-90-30-1-0-1-0.50 ml_gbm-90-30-1-1-1-0.50 ml_gbm-90-30-1-2-1-0.50 ml_gbm-90-30-1-3-1-0.50 ml_gbm-90-30-1-4-1-0.50 ml_gbm-90-30-1-6-1-0.50 ml_gbm-90-30-1-7-1-0.50 ml_gbm-90-30-1-11-1-0.50 +ml_gbm-90-30-1-0-1-0.75 ml_gbm-90-30-1-1-1-0.75 ml_gbm-90-30-1-2-1-0.75 ml_gbm-90-30-1-3-1-0.75 ml_gbm-90-30-1-4-1-0.75 ml_gbm-90-30-1-6-1-0.75 ml_gbm-90-30-1-7-1-0.75 ml_gbm-90-30-1-11-1-0.75' + + + + + + + + id=1 + jobtime=3 + debug=1 + + case $BSC_MACHINE in + 'power') + outdirtmp=/esarchive/scratch/hpetetin/data/opmos/ + filestationsdir=/esarchive/scratch/hpetetin/data/stations ;; + 'mn4') + outdirtmp=/gpfs/scratch/pr1ehu00/pr1ehu18/data/opmos/ + filestationsdir=/gpfs/scratch/pr1ehu00/pr1ehu18/data/stations ;; + esac + case $experiment in + 'caliope') indir=/gpfs/scratch/bsc32/bsc32883/data/caliope ;; + *) indir=/gpfs/scratch/bsc32/bsc32883/data/extractions_et ;; + esac + if [ $debug -eq 1 ] ; then outdirtmp=$outdirtmp/debug/ ; fi + outdir=$outdirtmp/opmos_${experiment}_${station}_${d1}_${d2}_v`date '+%Y%m%d%H%M%S'` + #outdir=$outdirtmp/opmos_${experiment}_${station}_${d1}_${d2}_vdebug ; rm -rf $outdir ; mkdir $outdir + + filestations=$filestationsdir/stations.csv + pdir=`pwd` + domain=IP4 + joblist=$pdir/tmp_joblist + criteria=75 + case $BSC_MACHINE in + 'power') nmaxrunningjobs=800 ;; + 'mn4') nmaxrunningjobs=1800 ;; + esac + + ############################################################################################# + + echo -e "\e[00;31m | ==============================================\e[00m" + echo -e "\e[00;31m | ================== TOP_MOS =================\e[00m" + echo -e "\e[00;31m | ==============================================\e[00m" + echo -e "\e[00;31m | > experiment : \e[00m $experiment" + echo -e "\e[00;31m | > d1 : \e[00m $d1" + echo -e "\e[00;31m | > d2 : \e[00m $d2" + echo -e "\e[00;31m | > station : \e[00m $station" + echo -e "\e[00;31m | > pollutant : \e[00m $pollutant" + echo -e "\e[00;31m | > nmosperjob : \e[00m $nmosperjob" + begin=`date +%s` #(start chrono) + + ############ STEP0 : preliminaries + echo -e "\e[00;31m | Cancel current jobs \e[00m `date`" + for i in $(squeue | grep jopmos$id | awk '{print $1}') ; do scancel $i ; done + for i in $(squeue | grep post$id | awk '{print $1}') ; do scancel $i ; done + + echo -e "\e[00;31m | Preliminaries \e[00m `date`" + username=`whoami` + find $pdir -maxdepth 1 -name 'slurm-*out' -delete #(in case too high number of slurm files to delete) + rm -f $pdir/tmp* + if [ ! -d $outdir ] ; then mkdir -p $outdir ; fi + function convertsec(){ # (function for converting time in seconds, used for chrono routines) + echo $(($1 / 86400))':'`date +%H:%M:%S -d "0000-01-01 $(($1 % 86400)) seconds"` + } + mkdir $outdir/other/ + mkdir $outdir/other/slurm_opmos + mkdir $outdir/other/slurm_post + + + ############ STEP1 : generate the joblist or (or use an already existing joblist) + rm -f $joblist + if [ ! -f $joblist ] ; then + echo -e "\e[00;31m | Preparing jobs list \e[00m ($joblist) `date`" + rm -f $pdir/log_station_data + ijob=1 + for ipollutant in $pollutant ; do + for istation in $station ; do + + ## get the list with all stations (if not already available in filestation directory) + filestationslist=$filestationsdir/stations_$istation + if [ ! -f $filestationslist ] ; then + case $istation in + 'IP') awk -F',' '{if(NR>1 ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'IP-URB') awk -F',' '{if(NR>1 && $6=="URB" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'IP-SUB') awk -F',' '{if(NR>1 && $6=="SUB" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'IP-RUR') awk -F',' '{if(NR>1 && $6=="RUR" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'BCN') awk -F',' '{if(NR>1 && $8=="Barcelona"){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'BCN-URB') awk -F',' '{if(NR>1 && $6=="URB" && $8=="Barcelona"){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'BCN-SUB') awk -F',' '{if(NR>1 && $6=="SUB" && $8=="Barcelona"){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'BCN-RUR') awk -F',' '{if(NR>1 && $6=="RUR" && $8=="Barcelona"){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'MAD') awk -F',' '{if(NR>1 && $8=="Madrid" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'MAD-URB') awk -F',' '{if(NR>1 && $6=="URB" && $8=="Madrid" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'MAD-SUB') awk -F',' '{if(NR>1 && $6=="SUB" && $8=="Madrid" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + 'MAD-RUR') awk -F',' '{if(NR>1 && $6=="RUR" && $8=="Madrid" ){print $1}}' $filestations > $pdir/tmp_filestations ;; + *) echo $istation > $pdir/tmp_filestations ;; + esac + cp $pdir/tmp_filestations $filestationslist + else + cp $filestationslist $pdir/tmp_filestations + fi + if [ "$experiment" == "caliope" ] ; then + ## [caliope] select stations with at least one data + mv $pdir/tmp_filestations $pdir/tmp_filestations_all + ./select_stations.sh $ipollutant $d1 $d2 $pdir/tmp_filestations_all $pdir/tmp_filestations $criteria $experiment $indir + nstation=` wc -l $pdir/tmp_filestations | awk '{print $1}'` + nstationall=`wc -l $pdir/tmp_filestations_all | awk '{print $1}'` + echo -e "\e[00;31m | \e[00m $istation : nstation=$nstation with $ipollutant observations (total=$nstationall stations)" + else + ## [other experiment] select stations with at least data + mv $pdir/tmp_filestations $pdir/tmp_filestations_all + Rscript $pdir/select_stations.R $ipollutant $d1 $d2 $pdir/tmp_filestations_all $pdir/tmp_filestations $criteria $experiment $indir $station > $outdir/other/log_select_stations_$ipollutant + #cat $outdir/other/log_select_stations_$ipollutant + nstation=`wc -l $pdir/tmp_filestations | awk '{print $1}'` + nstationall=`wc -l $pdir/tmp_filestations_all | awk '{print $1}'` + echo -e "\e[00;31m | \e[00m $istation : nstation=$nstation with ${criteria}% of $ipollutant observations (total=$nstationall stations)" + fi + + ## loop on stations - assign model to the different jobs in function of + while read -r istat ; do + i=0 + ilistwhat="" + for iwhat in $what ; do + if [ $i -lt $nmosperjob ] ; then + if [ "${ilistwhat}" == "" ] ; then + ilistwhat="${iwhat}" + else + ilistwhat="${ilistwhat}|${iwhat}" + fi + i=$((i+1)) + fi + if [ $i -eq $nmosperjob ] ; then + echo $ijob $ipollutant $istat $d1 $d2 $domain $ilistwhat >> $joblist + ijob=$(($ijob+1)) + i=0 + ilistwhat="" + fi + done + if [ "${ilistwhat}" != "" ] ; then + echo $ijob $ipollutant $istat $d1 $d2 $domain $ilistwhat >> $joblist + ijob=$(($ijob+1)) + fi + + done < $pdir/tmp_filestations + done + done + else + echo -e "\e[00;31m | Using existant jobs list \e[00m ($joblist)" + fi + njobs=`wc -l $joblist | awk '{print $1}'` + echo -e "\e[00;31m | \e[00m njobs=$njobs " + + + ############ STEP2 : Launch opmos jobs + echo -e "\e[00;31m | Start loop on jobs list \e[00m `date`" + time_begin=`date +%s` + for ijob in `seq 1 $njobs` ; do + + ## monitor the jobs (only each 10 job to speed-up the loop (the squeue command is relatively slow, especially on MN4) + if [ $(($ijob%10)) -eq 1 ] ; then + nrunningjobs=`squeue | grep $username | wc -l` + nerr=` grep Err $pdir/sl* 2> /dev/null | wc -l` + nkilled=`grep Killed $pdir/sl* 2> /dev/null | wc -l` + ncancel=`grep CANCEL $pdir/sl* 2> /dev/null | wc -l` + echo "$ijob/$njobs ($nrunningjobs running, max=$nmaxrunningjobs)(NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel)" + if [[ $nerr -gt 0 || $nkilled -gt 0 || $ncancel -gt 0 ]] ; then + for i in $(squeue | grep jopmos$id | awk '{print $1}') ; do scancel $i ; done + echo "(NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel)" + exit 0 + fi + else + echo "$ijob/$njobs (~$nrunningjobs running, max=$nmaxrunningjobs)" + fi + + ## launch a new job from the list only if the number of jobs currently running is lower than , otherwise wait + ijoblaunched=0 + while [ $ijoblaunched -eq 0 ] ; do + if [ $nrunningjobs -lt $nmaxrunningjobs ] ; then + iwhat=`awk -v job=$ijob '{if(NR==job){print $2}}' $joblist` + sbatch --time=${jobtime}:00:00 -n1 -c5 --job-name="jopmos$id" $pdir/int_opmos.sh $ijob $joblist $pdir $indir $outdir $whichoutput $experiment $criteria $station "$what" #> /dev/null 2>&1 + ijoblaunched=1 + else + nerr=` grep Err $pdir/sl* 2> /dev/null | wc -l` + nkilled=`grep Killed $pdir/sl* 2> /dev/null | wc -l` + ncancel=`grep CANCEL $pdir/sl* 2> /dev/null | wc -l` + nrunningjobs=`squeue | grep $username | wc -l` + echo "wait for launching next job... $ijob/$njobs ($nrunningjobs running, max=$nmaxrunningjobs)(NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel)" + if [[ $nerr -gt 0 || $nkilled -gt 0 || $ncancel -gt 0 ]] ; then + for i in $(squeue | grep jopmos$id | awk '{print $1}') ; do scancel $i ; done + echo "(NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel)" + exit 0 + fi + sleep 10 + fi + done + done + time_end=`date +%s` + duration=`convertsec $((${time_end}-${time_begin}))` + echo -e "\e[00;31m | \e[00m $njobs launched in $duration)" + + + + ############ STEP3 : Wait the end of the jobs + ## the post processing need all files to be generated + echo -e "\e[00;31m | Wait the end of the jobs... \e[00m `date`" + ndone=0 + while [ $ndone -lt $njobs ] ; do + nerr=` grep Err $outdir/other/slurm_opmos/subslurm_* 2> /dev/null | wc -l` + nkilled=`grep Killed $outdir/other/slurm_opmos/subslurm_* 2> /dev/null | wc -l` + ncancel=`grep CANCEL $outdir/other/slurm_opmos/subslurm_* 2> /dev/null | wc -l` + ndone=` grep _THEEND_ $pdir/sl* 2> /dev/null | wc -l` + nslurm=`ls $pdir/sl* 2> /dev/null | wc -l` + sleep 10 + echo "waiting... ($ndone/$njobs)($nslurm slurm files) (NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel) `date`" + if [[ $nerr -gt 0 || $nkilled -gt 0 || $ncancel -gt 0 ]] ; then + for i in $(squeue | grep jopmos$id | awk '{print $1}') ; do scancel $i ; done + echo "(NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel)" + exit 0 + fi + done + + + ############ STEP4 : Save (potentially) usefull files + mv $pdir/slurm*.out $outdir/other/slurm_opmos + mv $joblist $outdir/other/ + #mv $pdir/log_station_data $outdir/other/ + mv $pdir/tmp_filestations $outdir/other/ + mv $pdir/tmp_filestations_all $outdir/other/ + cat $pdir/top_opmos.sh > $outdir/other/copy_top_opmos.sh + rm -r $outdir/tmp + + ############ STEP5 : Start post processing + echo -e "\e[00;31m | Launch post.. \e[00m `date`" + $pdir/top_post.sh $debug "$what" "$pollutant" $id $outdir $d1 $d2 &> $pdir/tmplog_top_post & + + echo -e "\e[00;31m | Waiting end of top_post.sh... \e[00m `date`" + ok=0 + while [ $ok -ne 1 ] ; do + ok=`grep END_top_post $pdir/tmplog_top_post 2> /dev/null | wc -l` + sleep 10 + echo "waiting ($ok)... `date`" + done + + end=`date +%s` + timejob=`convertsec $(($end-$begin))` #(copy the convertsec function in my ~/.bashrc to yours) + echo " END >>> duration : ${timejob} `date`" + +done + +exit 0 + + + + +for i in $(squeue | grep post$id | awk '{print $1}') ; do scancel $i ; done +d1=2015010100 ; d2=2015093023 +pdir=`pwd` +debug=1 +id=1 +outdir=/gpfs/scratch/pr1ehu00/pr1ehu18/data/opmos/debug/opmos_b007_MAD-RUR_2015010100_2015093023_v20190111134552 +rm -f $pdir/slurm-* +rm -f $outdir/other/slurm_post/slurm-* +rm -f $outdir/post_statistics/* + what='obs mod kfsRMSE +ml_gbm-90-30-0-0-1-0.50 ml_gbm-90-30-0-1-1-0.50 ml_gbm-90-30-0-2-1-0.50 ml_gbm-90-30-0-3-1-0.50 ml_gbm-90-30-0-4-1-0.50 ml_gbm-90-30-0-6-1-0.50 ml_gbm-90-30-0-7-1-0.50 ml_gbm-90-30-0-11-1-0.50 +ml_gbm-90-30-0-0-1-0.75 ml_gbm-90-30-0-1-1-0.75 ml_gbm-90-30-0-2-1-0.75 ml_gbm-90-30-0-3-1-0.75 ml_gbm-90-30-0-4-1-0.75 ml_gbm-90-30-0-6-1-0.75 ml_gbm-90-30-0-7-1-0.75 ml_gbm-90-30-0-11-1-0.75 +ml_gbm-90-30-1-0-1-0.50 ml_gbm-90-30-1-1-1-0.50 ml_gbm-90-30-1-2-1-0.50 ml_gbm-90-30-1-3-1-0.50 ml_gbm-90-30-1-4-1-0.50 ml_gbm-90-30-1-6-1-0.50 ml_gbm-90-30-1-7-1-0.50 ml_gbm-90-30-1-11-1-0.50 +ml_gbm-90-30-1-0-1-0.75 ml_gbm-90-30-1-1-1-0.75 ml_gbm-90-30-1-2-1-0.75 ml_gbm-90-30-1-3-1-0.75 ml_gbm-90-30-1-4-1-0.75 ml_gbm-90-30-1-6-1-0.75 ml_gbm-90-30-1-7-1-0.75 ml_gbm-90-30-1-11-1-0.75' +pollutant='PM10' +$pdir/top_post.sh $debug "$what" "$pollutant" $id $outdir $d1 $d2 diff --git a/top_post.sh b/top_post.sh new file mode 100755 index 0000000000000000000000000000000000000000..2778486dee37dafec55977642a57b5ed89c9f807 --- /dev/null +++ b/top_post.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +debug=$1 +what="$2" +pollutant="$3" +id=$4 +outdir=$5 +d1=$6 +d2=$7 + +echo "debug : $debug" +echo "what : $what" +echo "pollutant : $pollutant" +echo "id : $id" +echo "outdir : $outdir" +echo "d1 : ${d1}" +echo "d2 : ${d2}" + + + +function waitendjob() +{ + echo -e "\e[00;31m | Check if jobs are finished... \e[00m `date`" + ok=1 + while [ $ok -eq 0 ] ; do + nslurm=`ls $pdir/sl* 2> /dev/null| wc -l` + if [ $nslurm -gt 0 ] ; then ok=1 ; fi + sleep 1 + done + sleep 10 + njob=999 + while [ $njob -ne 0 ] ; do + nerr=` grep Err $pdir/sl* 2> /dev/null | wc -l` + nkilled=`grep Killed $pdir/sl* 2> /dev/null | wc -l` + ncancel=`grep CANCEL $pdir/sl* 2> /dev/null | wc -l` + nslurm=`ls $pdir/sl* 2> /dev/null | wc -l` + njob=`squeue | grep jpost${id} | wc -l` + echo "waiting... ($njob still running)($nslurm slurm files) (NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel) `date`" + if [[ $nerr -gt 0 || $nkilled -gt 0 || $ncancel -gt 0 ]] ; then + for i in $(squeue | grep jpost${id} | awk '{print $1}') ; do scancel $i ; done + echo "(NERRORS/NKILLED/NCANCEL=$nerr/$nkilled/$ncancel)" + exit 0 + fi + sleep 10 + done +} + +############################################################################################# +echo -e "\e[00;31m | ==============================================\e[00m" +echo -e "\e[00;31m | ==================== POST_MOS ==============\e[00m" +echo -e "\e[00;31m | ==============================================\e[00m" + +for i in $(squeue | grep post$id | awk '{print $1}') ; do scancel $i ; done + + +echo -e "\e[00;31m | Preliminaries \e[00m `date`" +pdir=`pwd` +type='hx dx dmaxx dmda8x' +tmp=`cd $outdir ; ls data* | head -1` +#d1=${tmp:5:10} ; echo $d1 +#d2=${tmp:16:10} ; echo $d2 +rm -f $pdir/slurm*out + + +echo -e "\e[00;31m | Post-processed statistics... \e[00m `date`" +if [ ! -d $outdir/post_statistics ] ; then + mkdir $outdir/post_statistics +fi + + +############################################################################################# +echo -e "\e[00;31m | Aggregate hopdata.Rdata and hdata.Rdata files... \e[00m `date`" +for iwhat in $what ; do + for ipollutant in $pollutant ; do + sbatch -n1 -c1 --job-name="jpost${id}A" $pdir/job_post.sh "aggregate1a" $pdir $outdir $ipollutant $iwhat $d1 $d2 &> /dev/null + sbatch -n1 -c1 --job-name="jpost${id}A" $pdir/job_post.sh "aggregate1b" $pdir $outdir $ipollutant $iwhat $d1 $d2 &> /dev/null + done +done +waitendjob + + +############################################################################################# +echo -e "\e[00;31m | Aggregate xhopdata.Rdata and xhdata.Rdata files... \e[00m `date`" +for ipollutant in $pollutant ; do + sbatch -n1 -c1 --job-name="jpost${id}B" $pdir/job_post.sh "aggregate2a" $pdir $outdir $ipollutant "$what" $d1 $d2 &> /dev/null + sbatch -n1 -c1 --job-name="jpost${id}B" $pdir/job_post.sh "aggregate2b" $pdir $outdir $ipollutant "$what" $d1 $d2 &> /dev/null +done +waitendjob + + +############################################################################################# +echo -e "\e[00;31m | Compute xxstat... \e[00m `date`" +for iwhat in $what ; do + if [[ "$iwhat"" != "obs" && "$iwhat"" != "mod" ]] ; then + for ipollutant in $pollutant ; do + sbatch -n1 -c1 --job-name="jpost${id}C" $pdir/job_post.sh "xxstat" $pdir $outdir $ipollutant "$iwhat" $d1 $d2 &> /dev/null + done + fi +done + + +############################################################################################# +echo -e "\e[00;31m | Launch confusion/hfopstat/hx1errorbinstat/hx1obsbinstat steps... \e[00m `date`" +for iwhat in $what ; do + if [[ "$iwhat" != "obs" && "$iwhat" != "mod" ]] ; then + for ipollutant in $pollutant ; do + for itype in $type ; do + sbatch -n1 -c1 --job-name="jpost${id}D" $pdir/job_post.sh "confusion" $pdir $outdir $ipollutant $iwhat $d1 $d2 $itype &> /dev/null + done + sbatch -n1 -c1 --job-name="jpost${id}E" $pdir/job_post.sh "hfopstat" $pdir $outdir $ipollutant $iwhat $d1 $d2 &> /dev/null + sbatch -n1 -c1 --job-name="jpost${id}F" $pdir/job_post.sh "hx1errorbinstat" $pdir $outdir $ipollutant $iwhat $d1 $d2 &> /dev/null + sbatch -n1 -c1 --job-name="jpost${id}G" $pdir/job_post.sh "hx1obsbinstat" $pdir $outdir $ipollutant $iwhat $d1 $d2 &> /dev/null + done + fi +done +waitendjob + + +############################################################################################# +echo -e "\e[00;31m | Launch hopstat step... \e[00m `date`" +for iwhat in $what ; do + if [ "$iwhat" != "obs" ] ; then + for ipollutant in $pollutant ; do + sbatch -n1 -c1 --job-name="jpost${id}H" $pdir/job_post.sh "hopstat" $pdir $outdir $ipollutant $iwhat $d1 $d2 &> /dev/null + done + fi +done +waitendjob + + +echo -e "\e[00;31m | Move slurm files ... \e[00m `date`" +mv $pdir/slurm*.out $outdir/other/slurm_post + +echo -e "\e[00;31m | END_top_post ... \e[00m `date`" + + + diff --git a/transfer.sh b/transfer.sh new file mode 100755 index 0000000000000000000000000000000000000000..7c566b9d3efc37c1980f9ee418c39fa96009794c --- /dev/null +++ b/transfer.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +destination=/gpfs/scratch/bsc32/bsc32883/4transfer/ + +rm -rf $destination/opmos $destination/util.R +cp ~/R/util.R $destination +mkdir $destination/opmos +cp ~/scripts/opmos/*sh $destination/opmos +cp ~/scripts/opmos/*R $destination/opmos