diff --git a/SYNOP/STATS/fortran-programs/plots_for_one_station b/SYNOP/STATS/fortran-programs/plots_for_one_station deleted file mode 100755 index ed3054838289d9c49461f9cd550f6b8849089333..0000000000000000000000000000000000000000 Binary files a/SYNOP/STATS/fortran-programs/plots_for_one_station and /dev/null differ diff --git a/SYNOP/STATS/fortran-programs/plots_for_one_station.f95 b/SYNOP/STATS/fortran-programs/plots_for_one_station.f95 deleted file mode 100644 index b8212593ec65829b2dea2bb99a57aac33cecb85f..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/fortran-programs/plots_for_one_station.f95 +++ /dev/null @@ -1,614 +0,0 @@ -program plots_for_one_station - ! - ! For producing files that can be used to generate "one-station standard plots" - ! in python and in GrADS. However, these plots need to be generated separately, - ! i.e., python or GrADS is not called directly from this program. - ! - ! The following plots can be produced, depending on the details of the - ! python or GrADS script: - ! - ! 1) The time series of simulated data at one station location, - ! plotted against the background of the observed quantiles. - ! Separate plots for 00, 06, 12 and 18 UTC. The plots are - ! organized so that all the simulation data are projected - ! on a single annual cycle (normal year, leap days excluded) - ! regardless of the length of the simulation. - ! - ! 2) A quantile rank histogram, showing the normalized frequencies - ! for 100 percentile bins from 0-1% to 99-100%. The Mean - ! Square Deviation and its p-value are written as text in this plot. - ! - ! NB hard-coding: the program currently gives output only for 00, 06, 12 and 18 UTC. - ! - ! ---------------------------------------------------------------------- - ! Jouni Räisänen, University of Helsinki, August 2023 - !----------------------------------------------------------------------- - ! - ! INPUT FILES (all as text files retrieved from .odb format): - ! ------------------------------------------------------------ - ! - ! sim_file = simulated data from the station location of interest - ! - ! Each line of this file includes: - ! - ! 1) year - ! 2) month - ! 3) day - ! 4) hour - ! 5) data value - ! - ! quantile_file = observed quantiles for this station - ! - ! Each line of this file includes: - ! - ! 1) station code - ! 2) longitude - ! 3) latitude - ! 4) month - ! 5) day - ! 6) hour - ! 7-105) quantiles from 1% to 99% - ! - ! rank_histogram_file = frequencies and MSD and P-values for plotting the rank histogram - ! - ! Each line of this file includes: - ! - ! 1) station code - ! 2) longitude - ! 3) latitude - ! 4) UTC hour - ! 5) total number of observation for the UTC hour - ! 6) Mean squared deviation (MSD) of the quantile bin rank histogram wrt a flat histogram - ! 7) p-value of the MSD value, relative to the bootstrap sampling distribution - ! 8-107) frequencies for quantile bins 0-1%, 1-2%, ... ,98-99%, 99-100% - ! - ! ---------------------------------------------------------------------------------------- - ! - ! OUTPUT FILES in ODB compatible text format (for python) - ! - ! txt_data_time_series: individual data values from sim_file - ! - ! The first line is a header. The other lines include: - ! - ! 1) station code - ! 2) longitude - ! 3) latitude - ! 4) year - ! 5) day of year (counted from Jan 1) - ! 6) UTC hour - ! 7) data value - ! - ! txt_data_quantiles: selected quantiles from the oberved distribution as a function of UTC hour and time of year - ! - ! The first line is a header. The other lines include: - ! - ! 1) station code - ! 2) longitude - ! 3) latitude - ! 4) day of year (counted from Jan 1) - ! 5) UTC hour - ! 6-12) quantiles 1, 10, 25, 50, 75, 90 and 99% - ! - ! txt_data_rank_histogram = quantile bin frequencies (separately for each UTC hour) - ! - ! The first line is a header. The other lines include: - ! - ! 1) station code - ! 2) longitude - ! 3) latitude - ! 4) UTC hour - ! 5) Mean squared deviation (MSD) of the quantile bin rank histogram wrt a flat histogram - ! 6) p-value of the MSD value, relative to the bootstrap sampling distribution - ! 7-106) frequencies for quantile bins 0-1%, 1-2%, ... ,98-99%, 99-100% - ! - ! ---------------------------------------------------------------------------------------- - ! - ! OUTPUT FILES in GrADS binary format (for GrADS) - ! - ! grads_data_time_series = individual data values from sim_file - ! + values of selected quantiles as a fuction - ! of UTC hour and time of the year. - ! - ! grads_data_rank_histogram = quantile bin frequencies (separately for each UTC hour) - ! - ! - ! In addition, the program writes a few auxilliary texts files with hard-coded names - ! for use in GrADS scripts: - ! - ! "vrange_commands" : ranges needed on the y axis of plots - ! "time_series_commands" : for plotting different years in time series with different colors - ! "coordinates" : for including coordinate values in GrADS plot - ! "msd_and_p-value_00" : for including 00 UTC MSD and p-value in GrADS plot - ! "msd_and_p-value_06" : for including 06 UTC MSD and p-value in GrADS plot - ! "msd_and_p-value_12" : for including 12 UTC MSD and p-value in GrADS plot - ! "msd_and_p-value_18" : for including 18 UTC MSD and p-value in GrADS plot - ! "msd_and_p-value" : for including all-UTC MSD and p-value in GrADS plot - ! - !------------------------------------------------------------------------------------------ - ! - ! NAMELIST PARAMETERS: - ! - ! a) input files (see above) - ! - ! sim_file - ! quantile_file - ! rank_histogram_file - ! - ! b) output files in ODB compatible text format (see above) - ! - ! txt_data_time_series - ! txt_data_rank_histogram - ! txt_data_quantiles - ! - ! c) output files in GrADS binary format - ! - ! grads_data_time_series - ! grads_data_rank_histogram - ! - ! d) other - ! - ! year1,month1 = beginning of the period in time series output - ! year2,month2 = end of the period in time series output - ! include_rank_histograms: if .true. (.false.), rank histogram data is included (excluded) in output - ! l_code_in_char : if .true., (.false.) station codes are assumed to be 3-character strings (integers) - ! l_round_6h : if .true., UTC hours are rounded to the closesest even 6-h (00, 06, 12 or 18 UTC). - ! if .false., data for non-matching UTC hours are excluded from output - ! - ! miss = missing value code. Default = -9.99e6. All data f with |f| >= |miss| are treated as missing - ! - !------------------------------------------------------------------------------------------- - ! - implicit none - character*160 :: sim_file ! see above - character*160 :: quantile_file ! see above - character*160 :: rank_histogram_file ! see above - character*160 :: grads_data_time_series ! see above - character*160 :: grads_data_rank_histogram ! see above - character*160 :: txt_data_time_series ! see above - character*160 :: txt_data_quantiles ! see above - character*160 :: txt_data_rank_histogram ! see above - character*160 :: out_dir - integer :: year1,year2 ! first and last year of simulation - integer :: month1,month2 ! first and last month of simulation - integer :: nyear ! number of years = year2-year1+1 - ! - ! Data structures for input data - ! - integer,parameter :: nhour=4 ! only 00, 06, 12 and 18 UTC used - integer,parameter :: nmax_years=200 ! maximum length of a simulation in years - integer,parameter :: ndays_year=365 ! number of days in a non-leap year - - real :: f(nmax_years,ndays_year,nhour) ! the simulated values - real :: f1 ! a single simulated value - ! - integer,parameter :: nquant=99 ! number of quantiles - real :: quant(nquant,ndays_year,nhour) ! quantile values from quantile_file - real :: quant1(nquant) ! quantile values for a single day and UTC hour - ! - real :: freq(nquant+1,nhour+1) ! quantile bin frequencies from the rank histogram file - real :: freq1(nquant+1) ! frequencies for a single UTC hour - real :: msd(nhour+1) ! MSD value from the rank histogram file - real :: msd1 ! a single MSD value - real :: p_value(nhour+1) ! p-values from the rank histogram files - real :: p_value1 ! a single p-value - - integer :: yyear,mmonth,dday,hhour ! time as read from sim_file - integer :: year,month,day,hour ! time after eventual rounding to nearest 6-h - character*3 :: station_char ! station code as 3-char string - integer :: station_int ! station code as integer - logical :: l_code_in_char ! .true. for station codes as string - logical :: l_round_6h ! .true. for rounding time to nearest 6-h - real :: ntot ! total number of observations as read from rank_histogram_file - real :: lon,lat ! longitude and latitude - integer :: i,j,k ! loop variables - integer :: day_of_year ! day count from Jan 1 (in a non-leap year) - - real :: miss ! missing value code - - real :: fmin(nhour),fmax(nhour) ! minimum and maximum values for time series plots - real :: fmin_down(nhour),fmax_up(nhour) ! same, but extended outward - real :: freqmax(nhour+1) ! largest frequency for the rank histogram - - integer,parameter :: n_colors=13 ! number of colours for GrADS output - integer :: color(n_colors) ! colours for GrADS output - integer :: num_int_arr ! fix for Warning : Legacy Extension: REAL array index at (1) - - integer :: irec ! record number for GrADS output -! -! For text output in odb-compatible format: -! - INTEGER, PARAMETER :: L_dataline=1700 ! max length of input or output text line - character*1700 :: dataline,headerline,emptyline ! for reading / writing text files - character*2 :: number_of_bin ! quantile bin numbers for header in txt_data_quantiles - character*16 :: frequency_value ! frequency value written as character string -! - logical :: include_rank_histograms ! true for including rank histograms - - namelist/param/sim_file,quantile_file,rank_histogram_file,& - grads_data_time_series,grads_data_rank_histogram,& - year1,year2,month1,month2,& - txt_data_time_series,txt_data_rank_histogram,txt_data_quantiles,& - include_rank_histograms,l_code_in_char,l_round_6h,out_dir,miss - - data color/9,14,4,11,5,13,3,10,7,12,8,2,6/ -! -! default values -! - miss=-9.99e6 - l_code_in_char=.false. - l_round_6h=.false. - include_rank_histograms=.true. -! -! Read the namelist and count the number of years for output -! - read(*,nml=param) - nyear=year2-year1+1 - - !************************************************************************************************ - ! Read the data from the input files. Only data for the period year1/month1-year2/month2 - ! will be included in the time series output, even if other data exist. - ! - ! 1) time series - ! - f=miss - open(unit=1,form='formatted',file=sim_file,status='old') - do while(.true.) -11 read(1,*,err=11,end=12) yyear,mmonth,dday,hhour,f1 - if(l_round_6h)then - call round_6h(yyear,mmonth,dday,hhour,year,month,day,hour) ! Rounding to nearest 6-h? - else - year=yyear - month=mmonth - day=dday - hour=hhour - endif - if((year.gt.year1.or.(year.eq.year1.and.month.ge.month1)).and. & ! Is the time within the - (year.lt.year2.or.(year.eq.year2.and.month.le.month2))) then ! wanted range? - - if(.not.(month.eq.2.and.day.eq.29))then ! leap days are skipped from plotting - f(year-year1+1,day_of_year(month,day),1+hour/6)=f1 ! hard-coding to 6-hourly resolution - endif - endif - enddo -12 continue - close(1) - ! - ! 2) quantiles - ! - quant=miss - open(unit=1,form='formatted',file=quantile_file,status='old') - do while(.true.) - if(l_code_in_char)then -21 read(1,*,err=21,end=23)station_char,lon,lat,month,day,hour,(quant1(i),i=1,nquant) - else -22 read(1,*,err=22,end=23)station_int,lon,lat,month,day,hour,(quant1(i),i=1,nquant) - endif - if((hour.eq.0.or.hour.eq.6.or.hour.eq.12.or.hour.eq.18)& - .and.(.not.(month.eq.2.and.day.eq.29))) then ! leap days are skipped from plotting - do i=1,nquant - quant(i,day_of_year(month,day),1+hour/6)=quant1(i) - enddo - endif - enddo -23 continue - close(1) - ! - ! 3) quantile bin frequencies and related MSD and p-value statistics - ! - freq=miss - if(include_rank_histograms)then - open(unit=1,form='formatted',file=rank_histogram_file,status='old') - do while(.true.) - if(l_code_in_char)then -31 read(1,*,err=31,end=33)station_char,lon,lat,hour,ntot,msd1,p_value1,(freq1(i),i=1,nquant+1) - else -32 read(1,*,err=32,end=33)station_int,lon,lat,hour,ntot,msd1,p_value1,(freq1(i),i=1,nquant+1) - endif - if((hour.eq.0.or.hour.eq.6.or.hour.eq.12.or.hour.eq.18.or.hour.eq.24))then - msd(1+hour/6)=msd1 - p_value(1+hour/6)=p_value1 - do i=1,nquant+1 - freq(i,1+hour/6)=freq1(i) - enddo - endif - enddo -33 continue - close(1) - endif - -!********************************************************************************************** -! Find the minimum and maximum values of f (or the 1st and 99th percentile) -! for defining the vranges for the GrADS time series plots - - do j=1,nhour - fmin(j)=quant(1,1,j) - fmax(j)=quant(nquant,1,j) - do i=1,ndays_year - if(quant(1,i,j).lt.fmin(j))fmin(j)=quant(1,i,j) - if(quant(nquant,i,j).gt.fmax(j))fmax(j)=quant(nquant,i,j) - do k=1,nyear - if(f(k,i,j).lt.fmin(j).and.abs(f(k,i,j)).lt.abs(miss))fmin(j)=f(k,i,j) - if(f(k,i,j).gt.fmax(j).and.f(k,i,j).lt.abs(miss))fmax(j)=f(k,i,j) - enddo - enddo - fmin_down(j)=fmin(j)-0.05*(fmax(j)-fmin(j)) - fmax_up(j)=fmax(j)+0.05*(fmax(j)-fmin(j)) - enddo -! -! Find the largest frequency from the rank histograms -! - freqmax=0. - do j=1,nhour+1 - do i=1,nquant+1 - if(freq(i,j).gt.freqmax(j))freqmax(j)=freq(i,j) - enddo - enddo -! -! Write the commands to the auxiliary script files for Grads -! - open(unit=1,form='formatted',file=trim(out_dir)//'vrange_commands') - write(1,'(A11,2F10.3)')'set vrange ',fmin_down(1),fmax_up(1) - write(1,'(A11,2F10.3)')'set vrange ',fmin_down(2),fmax_up(2) - write(1,'(A11,2F10.3)')'set vrange ',fmin_down(3),fmax_up(3) - write(1,'(A11,2F10.3)')'set vrange ',fmin_down(4),fmax_up(4) - write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(1) ! conversion to relative frequencies - write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(2) ! conversion to relative frequencies - write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(3) ! conversion to relative frequencies - write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(4) ! conversion to relative frequencies - write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(5) ! conversion to relative frequencies - close(1) - - open(unit=1,form='formatted',file=trim(out_dir)//'time_series_commands') - do i=1,nyear - write(1,'(A12)')'set cthick 4' - write(1,'(A12)')'set cstyle 0' - write(1,'(A11)')'set cmark 3' - write(1,'(A15)')'set digsiz 0.07' - if(nyear.eq.1)then - num_int_arr=nint(1.+n_colors)/2. - write(1,'(A11,I2)')'set ccolor ',color(num_int_arr) - !write(1,'(A11,I2)')'set ccolor ',color(nint(1.+n_colors)/2.) - else - num_int_arr=nint(1+(i-1.)/(nyear-1.)*(n_colors-1.)) - write(1,'(A11,I2)')'set ccolor ',color(num_int_arr) - !write(1,'(A11,I2)')'set ccolor ',color(nint(1+(i-1.)/(nyear-1.)*(n_colors-1.))) - endif - write(1,'(A7,I2,A1)')'d &0(z=',i,')' - enddo - close(1) - - open(unit=1,form='formatted',file=trim(out_dir)//'coordinates') - write(1,'(F8.3)')lon - write(1,'(F8.3)')lat - close(1) - - if(include_rank_histograms)then - open(unit=1,form='formatted',file=trim(out_dir)//'msd_and_p-value_00') - write(1,'(A15)')'set strsiz 0.17' - write(1,'(A18)')'set string 1 l 4 0' - write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(1) - write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(1) - close(1) - open(unit=1,form='formatted',file=trim(out_dir)//'msd_and_p-value_06') - write(1,'(A15)')'set strsiz 0.17' - write(1,'(A18)')'set string 1 l 4 0' - write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(2) - write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(2) - close(1) - open(unit=1,form='formatted',file=trim(out_dir)//'msd_and_p-value_12') - write(1,'(A15)')'set strsiz 0.17' - write(1,'(A18)')'set string 1 l 4 0' - write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(3) - write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(3) - close(1) - open(unit=1,form='formatted',file=trim(out_dir)//'msd_and_p-value_18') - write(1,'(A15)')'set strsiz 0.17' - write(1,'(A18)')'set string 1 l 4 0' - write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(4) - write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(4) - close(1) - open(unit=1,form='formatted',file=trim(out_dir)//'msd_and_p-value') - write(1,'(A15)')'set strsiz 0.17' - write(1,'(A18)')'set string 1 l 4 0' - write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(5) - write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(5) - close(1) - endif -! -!*************************************************************** -! Write the GrADS binary files needed for the plots - - open(unit=1,form='unformatted',file=grads_data_time_series,access='DIRECT',& - recl=4,status='unknown') - do j=1,nhour - do i=1,ndays_year - irec=((i-1)+(j-1)*ndays_year)*(nyear+7) - do k=1,nyear - write(1,rec=irec+k)f(k,i,j) ! time series - enddo - write(1,rec=irec+nyear+1)quant(1,i,j) ! selected quantile - write(1,rec=irec+nyear+2)quant(10,i,j) - write(1,rec=irec+nyear+3)quant(25,i,j) - write(1,rec=irec+nyear+4)quant(50,i,j) - write(1,rec=irec+nyear+5)quant(75,i,j) - write(1,rec=irec+nyear+6)quant(90,i,j) - write(1,rec=irec+nyear+7)quant(99,i,j) - enddo - enddo - close(1) - - if(include_rank_histograms)then - open(unit=1,form='unformatted',file=grads_data_rank_histogram,access='DIRECT',& - recl=4,status='unknown') - do j=1,nhour+1 - irec=(nquant+1)*(j-1) - do i=1,nquant+1 - write(1,rec=irec+i)freq(i,j) !quantile bin frequencies - enddo - enddo - close(1) - endif - -!*************************************************************** -! Write the time series, quantiles and rank histogram data to text files - -! -! Empty data line -! - do i=1,L_dataline - emptyline(i:i)=' ' - enddo -! -! Time series. 365 days per year, 4 times per day, from year1 to year2 -! - open(unit=1,form='formatted',file=txt_data_time_series) - headerline=emptyline - if(l_code_in_char)then - headerline='station@hdr:string longitude@hdr:real latitude@hdr:real year@hdr:integer'//& - ' day_of_year@hdr:integer hour@hdr:integer value@body:real' - else - headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer'//& - ' day_of_year@hdr:integer hour@hdr:integer value@body:real' - endif - write(1,*)trim(headerline) - do year=year1,year2 - do j=1,365 - do hour=0,18,6 - if(abs(f(year-year1+1,j,1+hour/6)).lt.abs(miss))then - if(l_code_in_char)then - write(1,'(A3,2F16.6,1X,I4,1X,I3,1X,I2,F16.6)')station_char,lon,lat,year,j,hour,f(year-year1+1,j,1+hour/6) - else - write(1,'(I6,2F16.6,1X,I4,1X,I3,1X,I2,F16.6)')station_int,lon,lat,year,j,hour,f(year-year1+1,j,1+hour/6) - endif - else - if(l_code_in_char)then - write(1,'(A3,2F16.6,1X,I4,1X,I3,1X,I2,A16)')station_char,lon,lat,year,j,hour,' NaN' - else - write(1,'(I6,2F16.6,1X,I4,1X,I3,1X,I2,A16)')station_int,lon,lat,year,j,hour,' NaN' - endif - endif - enddo - enddo - enddo - close(1) -! -! Selected quantiles of the observed distribution. 365 days per year, 4 time per day. -! - open(unit=1,form='formatted',file=txt_data_quantiles) - headerline=emptyline - if(l_code_in_char)then - headerline='station@hdr:string longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer'//& - ' q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real' - else - headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer'//& - ' q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real' - endif - write(1,*)trim(headerline) - do year=year1,year2 - do j=1,365 - do hour=0,18,6 - if(l_code_in_char)then - write(1,'(A3,2F16.6,1X,I3,1X,I2,7F16.6)')station_char,lon,lat,j,hour,& - quant(1,j,hour/6+1),quant(10,j,hour/6+1),quant(25,j,hour/6+1),quant(50,j,hour/6+1),& - quant(75,j,hour/6+1),quant(90,j,hour/6+1),quant(99,j,hour/6+1) - else - write(1,'(I6,2F16.6,1X,I3,1X,I2,7F16.6)')station_int,lon,lat,j,hour,& - quant(1,j,hour/6+1),quant(10,j,hour/6+1),quant(25,j,hour/6+1),quant(50,j,hour/6+1),& - quant(75,j,hour/6+1),quant(90,j,hour/6+1),quant(99,j,hour/6+1) - endif - enddo - enddo - enddo - close(1) -! -! Rank histogram of frequencies. 100 bins from 0-1% to 99-100. Before that, the MSD and p-values - ! - if(include_rank_histograms)then - open(unit=1,form='formatted',file=txt_data_rank_histogram) - headerline=emptyline - if(l_code_in_char)then - headerline='station@hdr:string longitude@hdr:real latitude@hdr:real hour@hdr:integer' - else - headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer' - endif - headerline=trim(headerline)//' msd@body:real p_value@body:real' - do j=0,nquant - write(number_of_bin,'(i2.2)')j - headerline=trim(headerline)//' f'//number_of_bin//'@body:real' - enddo - write(1,*)trim(headerline) - - do hour=0,24,6 - if(l_code_in_char)then - write(dataline,'(A3,2F16.6,1X,I2,2F16.6)')& - station_char,lon,lat,hour,msd(1+hour/6),p_value(1+hour/6) - else - write(dataline,'(I6,2F16.6,1X,I2,2F16.6)')& - station_int,lon,lat,hour,msd(1+hour/6),p_value(1+hour/6) - endif - - do i=1,nquant+1 - write(frequency_value,'(F16.6)')freq(i,1+hour/6) - dataline=trim(dataline)//frequency_value - enddo - write(1,*)trim(dataline) - enddo - - close(1) - endif - - end program plots_for_one_station - - integer function day_of_year(month,day) -! -! Number of day 'day' in the 'month':th month from the beginning of a non-leap year -! - implicit none - integer :: year,month,day - integer,parameter :: nmon=12 - integer :: days_before(nmon) ! this is for normal years - integer :: dnumber - data days_before / 0,31,59,90,120,151,181,212,243,273,304,334 / - - day_of_year=days_before(month)+day - return - end function day_of_year - -! -! rounding the time to the nearest full 6 hours -! -subroutine round_6h(year,month,day,hour,year1,month1,day1,hour1) -implicit none -integer :: year,month,day,hour ! time before rounding -integer :: year1,month1,day1,hour1 ! time after rounding -integer :: ndays_month ! number of days in a month -ndays_month=31 -if(month.eq.4.or.month.eq.6.or.month.eq.9.or.month.eq.11)ndays_month=30 -if(month.eq.2)then - if((mod(year,4).eq.0.and.mod(year,100).gt.0).or.(mod(year,400).eq.0))then - ndays_month=29 - else - ndays_month=28 - endif -endif -year1=year -month1=month -day1=day -hour1=6*((hour+3)/6) -if(hour.ge.21)then ! Hour was rounded forward to 00 UTC -> increment day - hour1=0 - day1=day+1 - if(day1.gt.ndays_month)then ! increment month - day1=1 - month1=month1+1 - if(month1.gt.12)then ! increment year - month1=1 - year1=year+1 - endif - endif -endif -return -end subroutine round_6h - - - diff --git a/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics b/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics deleted file mode 100755 index 8e727b3be9456b0ccd97e6a2a6822c2669e91214..0000000000000000000000000000000000000000 Binary files a/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics and /dev/null differ diff --git a/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics.f95 b/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics.f95 deleted file mode 100644 index 46a8a608af26ddb7e8e1cd32fc327b9aac536c28..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics.f95 +++ /dev/null @@ -1,380 +0,0 @@ - program rank_histogram_summary_statistics -! -! Calculation of all-station rank histogram summary statistics: -! -! 1) distribution of p-values (in nbins_p bins) -! 2) Average frequencies in NQUANT+1 = 100 quantile bins -! (the quantile bin number is hard-coded!) -! -! The calculation is made for the UTC hours defined by -! the namelist parameters HOUR1, HOUR2, HOUR_STEP. It is assumed -! that the input file (INFILE) only includes data for these -! hours. -! -! Jouni Räisänen, University of Helsinki, August 2023 -! -!------------------------------------------------------------------------- -! -! INPUT FILE: -! -! infile: a text file including the rank histogram statistics for all stations -! -! The first line is a header. The other lines include: -! -! 1) station code (as a 3-character string, if l_code_in_char =.true. Else as an integer) -! 2) longitude -! 3) latitude -! 4) UTC hour (24 for 'all-UTC' statistics -! 5) total number of observations for this UTC hour -! 6) Mean squared deviation (MSD) from a flat quantile frequency histogram -! 7) p-value for MSD (= probability for getting at least as large MSD by chance) -! 8-107) frequencies in quantile space: 0-1%, 1-2% ... 98-99%, 99-100% -! -! Order of data assumed in the input file: -! -! line 1: header -! line 2: station 1, HOUR1 -! line 3: station 1, HOUR1 + HOUR_STEP -! line n = 1 + (HOUR2-HOUR1)/HOUR_STEP + 1 : station 2, HOUR2 -! line n+1: station2, HOUR1 -! etc. -!-------------------------------------------------------------------------- -! -! OUTPUT FILES: -! -! outfile_p_values: p-values at individual stations (This far: only the all-UTC p-values!) -! This file can be used for plotting the p-values on a map in python -! -! The first line is a header for eventual conversion to ODB. The other lines include -! -! 1) Station code as integer. If the original code was a 3-char string (GRUAN), -! the ordinal number of the station is written (for plotting in python) -! 2) longitude -! 3) latitude -! 4) p-value for MSD -!---------------------------------------------------------------------------- -! outfile_p_freq: frequency histogram of p-values (for different UTC hours separately) -! This file can be used for plotting a histogram of the p-value frequencies in python! -! -! The first line is a header. The others include -! -! 1) UTC hour -! 2-nbins_p+1) frequencies of p-values in nbins_p bins -!----------------------------------------------------------------------------- -! outfile_q = histogram of all-station-mean quantile bin frequencies (for different UTC hours separately) -! -! The first line is a header. The others include -! -! 1) UTC hour -! 2-101) quantile bin frequencies for 0-1%, 1-2% ... 99-100%. -!------------------------------------------------------------------------------ -! outfile_grads (only if grads_out=.true.): p-value frequencies and quantile bin frequencies in GrADS binary format -!------------------------------------------------------------------------------ -! text_output: frequencies of selected small p-values and some other data in free-text format, for each UTC hour separately. -!----------------------------------------------------------------------------------------------------------------------------- -! -! NAMELIST PARAMETERS: -! -! infile: see above -! outfile_p_values: see above -! outfile_p_freq: see above -! outfile_q: see above -! outfile_grads: see above -! text_output: see above -! nbins_p: number of p-value bins in outfile_p_freq -! grads_out: if .true. (.false.), outfile_grads is written (not written) -! -! hour1,hour2,hour_step: UTC hours in output = hour1, hour1+hour_step ... hour2 -! -! l_code_in_char: if .true., the station codes in infile are assumed to be 3-char string (else: integer) -! miss: missing value code. Default = -9.99e6. All values f with |f| >= |miss| are treated as missing. - - IMPLICIT NONE - INTEGER :: I,J ! loop variables - INTEGER,PARAMETER :: NHOUR=24 ! number of stations - INTEGER,PARAMETER :: NQUANT=99 ! number of quantiles from 1% to 99% - INTEGER,PARAMETER :: NPMAX=100 ! maximum bin number for frequency diagram of p-values - INTEGER,PARAMETER :: NSTATMAX=10000 ! maximum number of stations - REAL :: frequency_q1(nquant+1) ! quantile frequencies at a single station and UTC hour - REAL :: frequency_q(nquant+1,nhour+1) !all-station mean quantile frequencies for each UTC hour - REAL :: frequency_p(npmax,nhour+1) !frequencies of p-values for each UTC hours - REAL :: max_freq_p,max_freq_q ! maximum of p-value and quantile frequencies (for GrADS only) - REAL :: frequency_p01(nhour+1) ! frequency of p_values <= 0.1 % - REAL :: frequency_p1(nhour+1) ! same, <= 1 % - REAL :: frequency_p5(nhour+1) ! same, <= 5 % - character*2 :: number_of_bin ! two-digit code for quantiles for outfile_q and outfile_p_values headers - character*16 :: frequency_value ! frequency written in F16.6 format - character*160 :: infile,outfile_p_freq,outfile_p_values,outfile_q ! input and output files (see above) - character*160 :: outfile_grads,text_output ! output files (see above) - INTEGER,PARAMETER :: L_dataline=1700 ! maximum length of lines in text files - character*1700 :: dataline,headerline,emptyline ! string variables for input and output - - INTEGER :: nbins_p ! number of p-value bins in outfile_p_freq - INTEGER :: hour1,hour2,hour_step ! input / output UTC hours (see above) - INTEGER :: hour ! UTC hour - INTEGER :: n_line ! line count for lines read from infile - INTEGER :: n_station ! station count - INTEGER :: pbin ! index for p-value bin - - LOGICAL :: HOUR_USED(nhour+1) ! .true. for hours included in (hour1, hour1+hour_step,...,hour2) - INTEGER :: nhour_used ! number of UTC hours used - INTEGER :: station1_int ! the station code read from infile (RHARM, FMI) - CHARACTER*3 :: station1_char ! the station code read from infile (GRUAN) - LOGICAL :: l_code_in_char ! if .true., station codes in 3-char strings assumed - REAL :: longitude1,latitude1 !longitude and latitude - INTEGER :: station(nstatmax) !station codes of all stations - REAL :: longitude(nstatmax),latitude(nstatmax) ! station longitudes and latitudes - REAL :: p_value1, msd ! p-value and MSD read from infile - REAL :: p_value(nhour+1,nstatmax) ! p-values at individual stations for map - REAL :: total_number ! number of observations read from infile - - LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing - INTEGER :: IREC ! record number for GrADS output -! - REAL :: MISS ! Missing value code - INTEGER :: valid_stations(nhour+1) ! stations with valid data - - NAMELIST/param/infile,outfile_p_values,outfile_p_freq,outfile_q,& - outfile_grads,text_output,& - nbins_p,grads_out,& - hour1,hour2,hour_step,& - l_code_in_char,miss - - miss=-9.99e6 ! default for missing value - READ(*,NML=PARAM) - valid_stations=0 -! -! Which UTC hours are used and what is their total number? -! - hour_used=.false. - nhour_used=0. - do i=hour1,hour2,hour_step - hour_used(i+1)=.true. - nhour_used=nhour_used+1 - enddo -! -! Empty data line -! - do i=1,L_dataline - emptyline(i:i)=' ' - enddo -!---------------------------------------------------------------------- -! -! Initialize counters etc. -! - frequency_p=0. - frequency_q=0. - frequency_p01=0. - frequency_p1=0. - frequency_p5=0. - n_station=0 - n_line=0 - p_value=miss -! -! Read the contents of the input file -! - open(unit=1,form='formatted',status='old',file=infile) - read(1,*) ! the first line in infile is a header - write(*,*)' Opened infile: ', infile - - do while(.true.) ! Loop continued until end of file reached - if(l_code_in_char)then - 1 read(1,*,err=1,end=3)& - station1_char,longitude1,latitude1,hour,total_number,msd,p_value1,(frequency_q1(i),i=1,nquant+1) - else - 2 read(1,*,err=2,end=3)& - station1_int,longitude1,latitude1,hour,total_number,msd,p_value1,(frequency_q1(i),i=1,nquant+1) - endif -! write(*,*)'n_station,Hour step:',n_station,hour_step - if(hour_used(hour+1))then ! Only use the statistics for the UTC hours define by hour1, hour2, hour_step - n_line=n_line+1 - if(mod(n_line,nhour_used).eq.1)then ! This assumes that all the required UTC hours are always included in infile - n_station=n_station+1 - if(l_code_in_char)then - station(n_station)=n_station - else - station(n_station)=station1_int - endif - longitude(n_station)=longitude1 - latitude(n_station)=latitude1 - endif - if(total_number.gt.0.and.abs(frequency_q1(1)).lt.abs(miss))then ! Only include stations with some valid data - p_value(hour+1,n_station)=p_value1 ! for map of p-values - valid_stations(hour+1)=valid_stations(hour+1)+1 - do i=1,nquant+1 - frequency_q(i,hour+1)=frequency_q(i,hour+1)+frequency_q1(i) ! update the quantile bin frequencies - enddo - pbin=min(1+(p_value1*nbins_p),real(nbins_p)) - frequency_p(pbin,hour+1)=frequency_p(pbin,hour+1)+1 ! update the p-value bin frequencies -! -! Frequencies of small p-values: -! - if(p_value1.le.0.001.and.abs(p_value1).lt.abs(miss))then - frequency_p01(hour+1)=frequency_p01(hour+1)+1. - endif - if(p_value1.le.0.01.and.abs(p_value1).lt.abs(miss))then - frequency_p1(hour+1)=frequency_p1(hour+1)+1. - endif - if(p_value1.le.0.05.and.abs(p_value1).lt.abs(miss))then - frequency_p5(hour+1)=frequency_p5(hour+1)+1. - endif - endif - - endif - enddo -3 continue - close(1) - write(*,*)' Read infile: # of lines, # of stations: ',n_line,n_station - -! if(n_line.ne.n_station*nhour_used)then -! write(*,*)'Something wrong !!!' -! stop -! endif -!--------------------------------------------------------------------- -! Divide all the frequencies by the number of stations -! - do hour=hour1,hour2,hour_step - do i=1,nquant+1 - frequency_q(i,hour+1)=frequency_q(i,hour+1)/valid_stations(hour+1) - enddo - do i=1,nbins_p - frequency_p(i,hour+1)=frequency_p(i,hour+1)/valid_stations(hour+1) - enddo - frequency_p01(hour+1)=frequency_p01(hour+1)/valid_stations(hour+1) - frequency_p1(hour+1)=frequency_p1(hour+1)/valid_stations(hour+1) - frequency_p5(hour+1)=frequency_p5(hour+1)/valid_stations(hour+1) - enddo -! -! Find the normalized maximum frequencies for GrADS axis limits -! - max_freq_p=0 - max_freq_q=0 - do i=1,nbins_p - if(frequency_p(i,nhour+1).gt.max_freq_p/nbins_p)then - max_freq_p=frequency_p(i,nhour+1)*nbins_p - endif - enddo - max_freq_p=1.02*max_freq_p - - do i=1,nquant+1 - if(frequency_q(i,nhour+1).gt.max_freq_q/(nquant+1))then - max_freq_q=frequency_q(i,nhour+1)*(nquant+1) - endif - enddo - max_freq_q=1.02*max_freq_q -! -!--------------------------------------------------------------------- -! -! Write the text output file: -! 1) number of stations -! 2) frequencies of selected small p-values - - open(unit=1,form='formatted',file=text_output) - write(1,'(A20,I3)')'Number of stations: ',n_station - write(1,'(A24,I3)')'Number of p-value bins: ',nbins_p - if(grads_out)then - write(1,'(A23,F6.3)')'Axis_limit_p_for_GrADS ',max_freq_p - write(1,'(A23,F6.3)')'Axis_limit_q_for_GrADS ',max_freq_q - endif - do hour=hour1,hour2,hour_step - write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.001 ',hour,' UTC: ',& - frequency_p01(hour+1) - write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.01 ',hour,' UTC: ',& - frequency_p1(hour+1) - write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.05 ',hour,' UTC: ',& - frequency_p5(hour+1) - enddo - close(1) -! -! Write the average quantile bin frequencies and p-value bin frequencies -! to ODB compatible text files - -!-------------------------------------------------------------------------- -! -! Open the quantile bin frequency output file and write its first line. -! - open(unit=2,form='formatted',status='unknown',file=outfile_q) - headerline=emptyline - headerline='hour@hdr:integer ' - do j=0,nquant - write(number_of_bin,'(i2.2)')j - headerline=trim(headerline)//' f'//number_of_bin//'@body:real' - enddo - write(2,*)trim(headerline) -! -! Write the data lines for each UTC hour used -! - do hour=hour1,hour2,hour_step - j=hour+1 - write(dataline,'(I6)')& - hour - do i=1,nquant+1 - write(frequency_value,'(F16.6)')frequency_q(i,j) - dataline=trim(dataline)//frequency_value - enddo - write(2,*)trim(dataline) - enddo - close(2) -! -! Write the file giving the p-values for individual stations -! - open(unit=2,form='formatted',status='unknown',file=outfile_p_values) - headerline=' station@hdr:integer longitude@hdr:real latitude@hdr:real p_value@body:real' - write(2,*)trim(headerline) - do i=1,n_station - write(2,'(7X,I6,3F16.6)')& ! this far: just fwrite the station ordinal number, in case - ! the station code was a character string - station(i),longitude(i),latitude(i),p_value(nhour+1,i) - enddo - close(2) -! -! Open the p_value frequency output file and write its first line. -! - open(unit=2,form='formatted',status='unknown',file=outfile_p_freq) - headerline=emptyline - headerline='hour@hdr:integer ' - do j=1,nbins_p - write(number_of_bin,'(i2.2)')j-1 - headerline=trim(headerline)//' p'//number_of_bin//'@body:real' - enddo - write(2,*)trim(headerline) -! -! Write the data lines for each UTC hour used -! - do hour=hour1,hour2,hour_step - j=hour+1 - write(dataline,'(I6)')& - hour - do i=1,nbins_p - write(frequency_value,'(F16.6)')frequency_p(i,j) - dataline=trim(dataline)//frequency_value - enddo - write(2,*)trim(dataline) - enddo - close(2) -! -!------------------------------------------------------------------------------- -! -! Open the file for GrADS output and write its contents, for easier visualisation? -! - if(grads_out)then - open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& - recl=4,status='unknown') -! - do hour=hour1,hour2,hour_step - j=(hour-hour1)/hour_step+1 - do i=1,nbins_p - irec=(j-1)*(nbins_p+nquant+1)+i - write(11,rec=irec)frequency_p(i,hour+1) - enddo - do i=1,nquant+1 - irec=(j-1)*(nbins_p+nquant+1)+nbins_p+i - write(11,rec=irec)frequency_q(i,hour+1) -! write(*,*)hour,i,frequency_q(i,hour+1) - enddo - enddo - close(11) - - endif ! if (grads_out) - - END program rank_histogram_summary_statistics diff --git a/SYNOP/STATS/fortran-programs/rank_histograms_bootstrap.f95 b/SYNOP/STATS/fortran-programs/rank_histograms_bootstrap.f95 deleted file mode 100644 index 3a075742b6542832c440963015abf3a2883af230..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/fortran-programs/rank_histograms_bootstrap.f95 +++ /dev/null @@ -1,662 +0,0 @@ - PROGRAM rank_histograms_bootstrap -! -! For estimating the sampling distribution of the Mean Square Deviation -! (MSD) statistics for quantile bin rank histogram frequencies. -! These distributions can be used to answer the following question: -! -! "If we have LENGTH_IN_YEARS years of model data and its rank histogram -! against observations has a given MSD value, what is the probability -! of getting such a large MSD just as a result of internal variability"? -! -! The sampling distribution is formed using a bootstrap: -! -! Each boostrap realization calculates the rank histograms by selecting a total -! of LENGTH_IN_YEARS years of observed data randomly divided in chunks. Their -! length is defined as follows: -! -! 1) The number of chunks is at least MIN_CHUNK_NUMBER -! 2) If not restricted by (1) there are CHUNKS_PER_YEAR chunks for each year -! 3) The length is rounded to the nearest full number of days, so that the -! total length is LENGTH_IN_YEARS years (which may be non-integer) -! -! For each realization, the Mean Squared Difference (MSD) of the resulting rank -! histogram relative to the theoretically expected flat histogram is calculated. -! -! Jouni Räisänen, July 2023 -! -!-------------------------------------------------------------------------------- -! Input files (text format, retrieved from ODB with the 'odb sql select' command) -!-------------------------------------------------------------------------------- -! -! INFILE: Observed time series at the station location. -! The first line is a header. The other lines include: -! -! 1) year -! 2) month -! 3) day -! 4) hour -! 5) data value -! -! QUANTILE_FILE: quantile values, separately for each day of the year! -! The first line is a header. The other lines include: -! -! 1) station code -! 2) longitude -! 3) latitude -! 4) month -! 5) day -! 6) hour -! 7-105) The 99 quantiles from 1 % to 99 % -! -! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. -! Otherwise, it is assumed to be an integer -! -!-------------------------------------------------------------------------------- -! Output files -!-------------------------------------------------------------------------------- -! -! OUTFILE: MSD (mean squared deviation) distribution of quantile frequencies -! from bootstrap tests. -! -! The first 1st line is a header. The other lines include -! -! 1) station code -! 2) longitude -! 3) latitude -! 4) number of realization -! 5 to N-1) MSD values for HOUR1, HOUR1 + HOUR_STEP ... HOUR2 -! N) MSD calculated from frequencies averaged over all the UTC hours within -! HOUR1, HOUR1 + HOUR_STEP ... HOUR2 -! -! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. -! Otherwise, it is assumed to be an integer -! -! OUTFILE_GRADS: MSD distribution of quantile frequencies in a GrADS binary file (if grads_out=.true.) -! -!------------------------------------------------------------------------------------------- -! Namelist parameters -!------------------------------------------------------------------------------------------- -! -! infile : file for observation time series (see above) -! quantile_file : file for pre-computed quantile values (see above) -! outfile : output file for bootstrap MSD statistics in ODB compatible text format -! outfile_grads : output file in GrADS binary format -! grads_out: if .true., outfile_grads is also written -! l_code_in_char: if .true., 3-character station codes assumed (else integer) -! lorder: if .true., the output files are ordered from smallest to largest MSD values -! -! hour1 = first UTC hour analysed -! hour2 = last UTC hour analysed -! hour_step = step of analyzed UTC hours -! l_round_hour : if .true., hours are rounded to the nearest analysis hour. -! if .false., data for non-matching hours is ignored -! -! year1 = first year used (if data available) -! year2 = last year used (if data available) -! -! miss = missing value code. All input data f with abs(f) >= abs(miss) is treated as missing -! -! length_in_years = length of bootstrap samples in years (does not need to integer) -! -! min_chunk_number = minimum number of separate data "chunks" for one bootstrap realization -! chunks_per_year = length of bootstrap "chunks", if not limited by min_chunk_number -! nreal = number of bootstrap realizations -! l_all_hours_required : if .true., only those days are used for which data for all the required -! UTC hours is available, even in forming the single-UTC-hour bootstrap -! samples. This should be set to .false. for those data sets (e.g., soundings) -! for which data is frequently missing for some UTC hours - - ! (in QUANTILE_FILE) are needed: -! -! INFILE: text file of the simulated or observed time series at the station location. -! 1st line for each UTC hour is a header -! The other lines include station code, longitude and latitude, -! year, month, day, hour and the data value -! -! QUANTILE_FILE: quantile values, separately for each day of the year -! 1st line is a header -! The other lines include station code, longitude and latitude, -! month, day and hour, and the 99 quantiles for each day+hoyr of the year -! -! Only data for the years YEAR1-YEAR2 are taken into account, even if other years -! are present in INFILE. -! -! The output is written in OUTFILE. -! 1st line is the header. -! The other lines includes the station code, station coordinates, -! LENGTH_in_YEARS, realization/rank number and the corresponding MSD values -! for each UTC hour + their combination. -! -! If (GRADS_OUT), output is also written in a GrADS binary file for easier plotting. -! -!--------------------------------------------------------------------------------------- -! -! LIMITATIONS of the boostrap method: -! -! 1) An in-sample boostrap will likely produce a narrower sampling distribution of -! quantile bin frequencies, and therefore lower MSD:s, than would be obtained -! if observations from a period not used for calculating the quantiles were used. - -! 2) Autocorrelation on time scales exceeding 1/CHUNKS_PER_YEARS years is neglected -! This is also expected to reduce the sampling variability in quantile bin frequencies, -! resulting a low bias in the MSD:s (particularly over sea areas for temperature?) -! -! For both reasons, there will be too many "false positives" when comparing -! a model simulation with observations. -! -!---------------------------------------------------------------------------------------- - - IMPLICIT NONE - INTEGER :: I,J,K,Q,R - INTEGER,PARAMETER :: NMON=12, NDAY=31, NHOUR=24, NQUANT=99 - REAL :: quantiles(nquant,nmon,nday,nhour) ! quantiles for each month, day and UTC hour - REAL :: q1(nquant) ! quantiles for one day + hour - REAL :: value1 ! individual data values read from the input file - INTEGER :: ntimes_hour(nhour) ! total number of times with data for an UTC hour - CHARACTER*160 :: infile,outfile,quantile_file ! file names (see above) - CHARACTER*160 :: outfile_grads ! grads output for testing? - INTEGER :: loutfile_grads ! length of outfile_grads name, to circumvent a memory leak!? (6.5.2013) - CHARACTER*2 :: hh ! hour code for ODB header - INTEGER,PARAMETER :: L_dataline=500 ! maximum length of data lines in output - character*500 dataline,headerline,emptyline ! for writing lines in output text files - character*16 msd_value ! frequency written in F16.6 format - ! - INTEGER :: yyear,mmonth,dday,hhour !year, month, day and hour from file - INTEGER :: year,month,day,hour ! year, month, day and hour after eventual rounding of hours - INTEGER :: day1 ! running number for the first day in a bootstrap chunk - INTEGER :: year1,year2 ! years of data used in forming the bootstrap samples - INTEGER :: hour1,hour2,hour_step ! list of hours analysed - LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour - LOGICAL :: HOUR_USED(nhour) ! .true. for those UTC hours that are used - INTEGER :: nhour_used ! number of UTC hours used - INTEGER :: station_int ! station code read from quantile file, as integer (for synop stations) - CHARACTER*3 :: station_char ! ------------------------------ as characters (for soundings?) - LOGICAL :: l_code_in_char ! .true. for station code in characters - REAL :: longitude,latitude ! statio longitude and latitude from the quantile file - - LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing - INTEGER :: IREC ! record number for GrADS output - LOGICAL :: LORDER ! if .true., MSD output in ascending order - - INTEGER,PARAMETER :: nmaxdays=100000,nmaxyears=250 - INTEGER :: rank1(nmaxyears,nmon,nday,nhour) ! quantile ranks (1-100) of the observed values - INTEGER :: rank(nmaxdays,nhour) ! quantile ranks (1-100) of the observed values in chronological order - INTEGER :: rank_all_UTC(nmaxdays,nhour) ! same for days in which data is available for all UTC hours - INTEGER :: NDAYS(nhour+1) ! number of days in the observed time series. "nhour+1" represents the "all-UTC" value. - LOGICAL :: L_all_hours_required ! if .true., all those days are omitted when data is missing for - ! at least one of the output UTC hours. Otherwise, all valid observations are included for each UTC hour separately, - ! but the all-UTC-hour statistics only uses data for all UTC hours available. - LOGICAL:: day_OK(nhour+1) ! will the day be included in bootstrap? - - REAL :: length_in_years ! total length of bootstrap samples in years (see above) - INTEGER :: min_chunk_number ! minimum number of chunks per one bootstrap sample - INTEGER :: chunks_per_year ! number of chunks per years - INTEGER :: number_of_chunks ! number of chinks per bootstrap sample - ! This is replaced by min_chunk_number if it is larger. - INTEGER :: chunk_length_UTC(nhour+1) ! chunk length separately for each UTC hour - INTEGER :: number_of_chunks_UTC(nhour+1) ! number of chunks separately for each UTC hour - INTEGER :: nreal ! number of bootstrap realizations - REAL :: rnd ! for random numbers - - INTEGER,PARAMETER :: NMAX_REAL=10000 ! maximum number of realizations - REAL :: MSD(nmax_real,nhour+1) ! nhour+1 to accommodate the all-UTC statistics - REAL :: MSD1(nmax_real) ! MSD realizations for one UTC hour (to facilitate easy ordering) - REAL :: frequency(nquant+1,nhour+1) ! quantile bin frequencies - REAL :: frequency_all_UTC(nquant+1,nhour+1) ! quantile bin frequencies for the days in the "all-UTC" sample - REAL :: expected_freq ! expected quantile bin frequency (1/100) - - REAL :: MISS ! missing value code -! -!----------------------------------------------------------------------------------------------------- -! - NAMELIST/param/infile,outfile,quantile_file,& - hour1,hour2,hour_step,year1,year2,grads_out,outfile_grads,lorder,& - length_in_years,min_chunk_number,chunks_per_year,nreal,& - l_code_in_char,l_round_hour,l_all_hours_required,miss - - expected_freq=1./(nquant+1) ! frequencies in a flat rank histogram - lorder=.true. ! by default, ascending order of MSDs in the output - min_chunk_number=2 ! default for minimum number of chunks - chunks_per_year=4 ! default. Decrease of this will reduce the autocorrelation - ! problem but it will also reduce the independence - ! (thus the variation) between the bootstrap samples - nreal=1000 ! default number of bootstrap realizations - miss=-9.99e6 - READ(*,NML=PARAM) -! -! Length of the GrADS output file name. This should not really be needed. -! - loutfile_grads=len(trim(outfile_grads)) -! -! Which UTC hours are used and what is their total number? -! - hour_used=.false. - nhour_used=0. - do i=hour1,hour2,hour_step - hour_used(i+1)=.true. - nhour_used=nhour_used+1 - enddo -! -! Empty data line for ODB-compatible output -! - do i=1,L_dataline - emptyline(i:i)=' ' - enddo -! write(*,*)'Check 1',len(trim(outfile_grads)),loutfile_grads,outfile_grads -!---------------------------------------------------------------------- -! -! Read the contents of the quantile_file to array 'quantiles' - ! - quantiles=miss - open(unit=1,form='formatted',status='old',file=quantile_file) - write(*,*)'Opened quantile file' - do while(.true.) - if(l_code_in_char)then -! The err=1 specifier ensures that header lines are skipped -1 read(1,*,err=1,end=3)& - station_char,longitude,latitude,month,day,hour,& - (q1(i),i=1,nquant) - else -! The err=2 specifier ensures that header lines are skipped -2 read(1,*,err=2,end=3)& - station_int,longitude,latitude,month,day,hour,& - (q1(i),i=1,nquant) - endif - - do i=1,nquant - quantiles(i,month,day,hour+1)=q1(i) - enddo -! copy quantiles from 28 February to 29 February -! (in case the latter is not in the quantile file as it should) - if(month.eq.2.and.day.eq.28)then - do i=1,nquant - quantiles(i,month,day+1,hour+1)=q1(i) - enddo - endif - enddo -3 continue - write(*,*)'Quantile file read' - close(1) -! -!---------------------------------------------------------------------- -! -! Open the input data file (i.e. the station observations for the variable of interest). -! The data must be in chronological order. - - open(unit=1,form='formatted',status='old',file=infile) - -! Read the data from the input file and find their ranks in the quantile distribution. - - rank1=miss - do while(.true.) -11 read(1,*,err=11,end=12)yyear,mmonth,dday,hhour,value1 - ! write(*,*)year,month,day,hour,value1 - - if(l_round_hour)then - call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& - hour1,hour2,hour_step) - else - year=yyear - month=mmonth - day=dday - hour=hhour - endif - - if(year.ge.year1.and.year.le.year2.and.hour_used(hour+1).and.abs(value1).le.abs(miss))then - call find_rank(value1,quantiles(1,month,day,hour+1),nquant,& - rank1(year-year1+1,month,day,hour+1),miss) - endif - enddo -12 continue - close(1) -!--------------------------------------------------------------------------------------------------- -! Rewrite the ranks for easier implementation of the bootstrap (= omit missing values) -!--------------------------------------------------------------------------------------------------- - ndays=0 - do year=year1,year2 - do month=1,nmon - do day=1,nday - day_OK=.true. - do hour=hour1,hour2,hour_step - if(rank1(year-year1+1,month,day,hour+1).eq.miss)then - day_OK(hour+1)=.false. - day_OK(nhour+1)=.false. ! all-UTC: day excluded if any data is missing - endif - enddo - ! all data excluded if some of the UTC hours is missing and all hours are required. - if(l_all_hours_required.and..not.day_OK(nhour+1))then - day_OK=.false. - endif -! -! Form the list of valid days and calculate its length separately for each UTC hour: -! - do hour=hour1,hour2,hour_step - if(day_OK(hour+1))then - ndays(hour+1)=ndays(hour+1)+1 - rank(ndays(hour+1),hour+1)=rank1(year-year1+1,month,day,hour+1) - endif - enddo - ! - ! All-UTC list of days and ranks. Only days with valid data for all UTC hours are included. - ! - if(day_OK(nhour+1))then - ndays(nhour+1)=ndays(nhour+1)+1 - do hour=hour1,hour2,hour_step - rank_all_UTC(ndays(nhour+1),hour+1)=rank1(year-year1+1,month,day,hour+1) - enddo - endif - - enddo ! end of year-loop - enddo ! end of month-loop - enddo ! end of day-loop - - write(*,*)'ndays',(ndays(hour+1),hour=hour1,hour2,hour_step),ndays(nhour+1) - -!-------------------------------------------------------------------------------------- -! Bootstrap parameters (nreal realizations, each consisting of 'number_of_chunks' 'chunk_length'-day periods). -! -! These parameters must be defined for each UTC hour separately, -! because the length of the time series may be different -! -! The length of a "chunk" is not allowed to exceed half of the length of the period of data -! - number_of_chunks=nint(length_in_years*chunks_per_year) ! default number of chunks per realization - number_of_chunks=max(number_of_chunks,min_chunk_number) ! reset to minimum if needed -! -! Recalculate chunk lengths and numbers of chunks for each UTC hour, based on the real number of -! days that are available and the requirement that the chunk length must not exceed half of -! the total data sample. - - do hour=hour1,hour2,hour_step - chunk_length_UTC(hour+1)=min(nint((365.25*length_in_years)/number_of_chunks),ndays(hour+1)/2) - number_of_chunks_UTC(hour+1)=nint((365.25*length_in_years)/chunk_length_UTC(hour+1)) - enddo - chunk_length_UTC(nhour+1)=min(nint((365.25*length_in_years)/number_of_chunks),ndays(nhour+1)/2) - number_of_chunks_UTC(nhour+1)=nint((365.25*length_in_years)/chunk_length_UTC(nhour+1)) - - write(*,*)'number_of_chunks_UTC',(number_of_chunks_UTC(hour+1),hour=hour1,hour2,hour_step),number_of_chunks_UTC(nhour+1) - write(*,*)'chunk_length_UTC',(chunk_length_UTC(hour+1),hour=hour1,hour2,hour_step),chunk_length_UTC(nhour+1) -!************************************************************************************** -! Bootstrap begins -!************************************************************************************** - msd=0. - do r=1,nreal - frequency=0 - - -! Selection of chunks and calculation of rank frequencies (each UTC hour sperately) - - do j=hour1,hour2,hour_step - - if(ndays(j+1).gt.1)then - - do k=1,number_of_chunks_UTC(j+1) - call random_number(rnd) - day1=1+(ndays(j+1)-chunk_length_UTC(j+1)+1)*rnd - do i=day1,day1+chunk_length_UTC(j+1)-1 - frequency(rank(i,j+1),j+1)=& - frequency(rank(i,j+1),j+1)+1./(number_of_chunks_UTC(j+1)*chunk_length_UTC(j+1)) - enddo - enddo - - endif - - enddo -! -! Calculation of the MSD for each UTC hour separately. -! - do j=hour1,hour2,hour_step - if(ndays(j+1).gt.1)then - do i=1,nquant+1 - msd(r,j+1)=msd(r,j+1)+((frequency(i,j+1)/expected_freq-1.)**2.)/(nquant+1.) - enddo - else - msd(r,j+1)=miss - endif - enddo -! -! Selection of chunks and calculation of rank frequencies (all-UTC-hour case, -! only including data for the days for which all UTC hours are available) -! - frequency_all_UTC=0. - if(ndays(nhour+1).gt.1)then - do k=1,number_of_chunks_UTC(nhour+1) - call random_number(rnd) - day1=1+(ndays(nhour+1)-chunk_length_UTC(nhour+1)+1)*rnd - do i=day1,day1+chunk_length_UTC(nhour+1)-1 - do j=hour1,hour2,hour_step - frequency_all_UTC(rank_all_UTC(i,j+1),j+1)=& - frequency_all_UTC(rank_all_UTC(i,j+1),j+1)+1./(number_of_chunks_UTC(nhour+1)*chunk_length_UTC(nhour+1)) - enddo - enddo - enddo - endif -! -! Calculation of the all-UTC MSD -! - if(ndays(nhour+1).gt.1)then - do i=1,nquant+1 - do j=hour1,hour2,hour_step - frequency_all_UTC(i,nhour+1)=& - frequency_all_UTC(i,nhour+1)+frequency_all_UTC(i,j+1)/nhour_used - enddo - msd(r,nhour+1)=msd(r,nhour+1)+((frequency_all_UTC(i,nhour+1)/expected_freq-1.)**2.)/(nquant+1.) - enddo - else - msd(r,nhour+1)=miss - endif - -!********************************************************************* - enddo ! end of the bootstrap (r) loop -!********************************************************************* - -!-------------------------------------------------------------------------------------------------- -! -! Ordering of the MSD:s in ascending order? -! Again, first the individual UTC hours and then the all-UTC MSD:s - - if(lorder)then - do j=hour1,hour2,hour_step - do r=1,nreal - msd1(r)=msd(r,j+1) - enddo - call order(msd1,msd(1,j+1),nreal) - enddo - do r=1,nreal - msd1(r)=msd(r,nhour+1) - enddo - call order(msd1,msd(1,nhour+1),nreal) - endif -!------------------------------------------------------------------------------------------------ -! -! Open the output file and write its first line. -! - open(unit=2,form='formatted',status='unknown',file=outfile) - headerline=emptyline - if(l_code_in_char)then - headerline='station@hdr:string longitude@hdr:real latitude@hdr:real realization@hdr:integer' - else - headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real realization@hdr:integer' - endif - do j=hour1,hour2,hour_step - write(hh,'(i2.2)')j - headerline=trim(headerline)//' msd'//hh//'@body:real' - enddo - headerline=trim(headerline)//' msd24'//'@body:real' - write(2,*)trim(headerline) - ! - ! Write the MSD values to the output file: first for each - ! individual UTC hour, then the overall statistics - ! - do r=1,nreal - dataline=emptyline - if(l_code_in_char)then - write(dataline,'(A3,2F16.6,I6)')station_char,longitude,latitude,r - else - write(dataline,'(I6,2F16.6,I6)')station_int,longitude,latitude,r - endif - do j=hour1,hour2,hour_step - write(msd_value,'(F16.6)')msd(r,j+1) - dataline=trim(dataline)//msd_value - enddo - write(msd_value,'(F16.6)')msd(r,nhour+1) - dataline=trim(dataline)//msd_value - write(2,*)trim(dataline) - if(r.eq.1.or.r.eq.500.or.r.eq.1000)write(*,*)r,trim(dataline) - enddo - - close(2) -! ----------------------------------------------------------------- -! -! Output also as GrADS binaries, for visualisation in GrADS? -! - if(grads_out)then - open(unit=11,form='unformatted',file=outfile_grads(1:loutfile_grads),access='DIRECT',& - recl=4,status='unknown') - do hour=hour1,hour2,hour_step - j=1+(hour-hour1)/hour_step - do i=1,nreal - write(11,rec=(j-1)*nreal+i)msd(i,hour+1) - enddo - enddo - do i=1,nreal - write(11,rec=j*nreal+i)msd(i,nhour+1) - enddo - close(11) - endif - - END program rank_histograms_bootstrap -! -!-------------------------------------------------------------------------------------------- -! - subroutine find_rank & - (f,quantiles,nquant,rank1,miss) -! -! Find the rank of a data value within the quantile table (quantiles). -! -! f = individual data value (IN) -! quantiles = quantile values (IN) -! nquant = number of quantile values (IN) -! rank1 = rank of the data value (OUT) -! miss = missing value code (IN/OUT) - - implicit none - integer :: nquant - real :: quantiles(nquant) - integer :: rank1 - real :: f - integer :: i1,i2,i,ind - real :: miss - if(abs(quantiles(1)).ge.abs(miss))then - write(*,*)'Quantiles missing!?' ! This should never happen. - rank1=miss - else - if(f.lt.quantiles(1))then - rank1=1 - else - if(f.ge.quantiles(nquant))then - rank1=nquant+1 - else - i1=1 - i2=nquant - do while (i2.gt.i1+1) - i=(i1+i2)/2 - if(f.ge.quantiles(i))then - i1=(i1+i2)/2 - else - i2=(i1+i2)/2 - endif - enddo - rank1=i1+1 - endif - endif - endif - - return - end subroutine find_rank -! -!-------------------------------------------------------------------------------------------- -! - subroutine order(f,g,n) -! -! Ordering the values of f(1...n) in ascending order. Result in g -! Simple exchange ordering (inefficient for large n!) -! - implicit none - integer :: i,j,n - real :: f(n),g(n),g1 - g=f - do i=1,n-1 - do j=i+1,n - if(g(j).lt.g(i))then - g1=g(i) - g(i)=g(j) - g(j)=g1 - endif - enddo - enddo - - return - end subroutine order -! -!----------------------------------------------------------------------- -! -subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& - hour1,hour2,hour_step) -! -! Rounding the hour to the nearest hour within (hour1, hour1 + hour_step, ... hour2) -! -implicit none -integer :: yyear,mmonth,dday,hhour ! year, month, day and hour before rounding (IN) -integer :: year,month,day,hour ! year, month, day and hour after rounding (OUT) -integer :: hour1,hour2,hour_step ! target hours: hour1, hour1 + hour_step ... hour2 -integer :: ndays_month(12) ! number of days per month -integer :: hour_index -ndays_month=31 -ndays_month(4)=30 -ndays_month(6)=30 -ndays_month(9)=30 -ndays_month(11)=30 -ndays_month(2)=28 -if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then - ndays_month(2)=29 -endif -year=yyear -month=mmonth -day=dday -! -! round the hour to the nearest output hour -! -hour_index=nint((hhour-hour1+0.)/hour_step) -hour=hour1+hour_step*hour_index - if(hour.ge.24)then ! hhour was rounded forward to next day - hour=hour1 - day=dday+1 - if(day.gt.ndays_month(month))then - day=1 - month=month+1 - if(month.gt.12)then - month=1 - year=yyear+1 - endif - endif - endif - if(hour.lt.0)then ! Hhour was rounded backward to previous day - hour=hour2 - day=dday-1 - if(day.eq.0)then - month=month-1 - if(month.eq.0)then - month=12 - year=yyear-1 - endif - day=ndays_month(month) - endif - endif - -return -end subroutine round_hour - - - - diff --git a/SYNOP/STATS/fortran-programs/rank_histograms_one_station b/SYNOP/STATS/fortran-programs/rank_histograms_one_station deleted file mode 100755 index 671c1c429f02e3e0ddbc96c7ab2ad3b081332169..0000000000000000000000000000000000000000 Binary files a/SYNOP/STATS/fortran-programs/rank_histograms_one_station and /dev/null differ diff --git a/SYNOP/STATS/fortran-programs/rank_histograms_one_station.f95 b/SYNOP/STATS/fortran-programs/rank_histograms_one_station.f95 deleted file mode 100644 index 2540ae9b0a0018c1acfb1c82f815b5a670be2f41..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/fortran-programs/rank_histograms_one_station.f95 +++ /dev/null @@ -1,552 +0,0 @@ - program rank_histograms_one_station -! -! Produce the rank histogram (i.e., the frequencies at which the simulated or -! observed data at one station fall between different quantiles of a pre-computed -! quantile distribution (below 1%, within 1-2%, ... above 99%) -! -! Jouni Räisänen, July 2015 -! -!---------------------------------------------------------------------- -! Input files (text format) -!---------------------------------------------------------------------- -! -! INFILE: simulated or observed time series at the station location. -! The first line is a header. The other lines include: -! -! 1) year -! 2) month -! 3) day -! 4) hour -! 5) data value -! -! QUANTILE_FILE: quantile values, separately for each day of the year! -! The first line is a header. The other lines include: -! -! 1) station code -! 2) longitude -! 3) latitude -! 4) month -! 5) day -! 6) hour -! 7-105) The 99 quantiles from 1 % to 99 % -! -! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. -! Otherwise, it is assumed to be an integer -! -! MSD_BOOTSTRAP_FILE: MSD (mean squared deviation) distribution of quantile frequencies -! from bootstrap tests. -! -! The first 1st line is a header. The other lines include -! -! 1) station code -! 2) longitude -! 3) latitude -! 4) number of realization -! 5 to N-1) MSD values for HOUR1, HOUR1 + HOUR_STEP ... HOUR2 -! N) MSD calculated from frequencies averaged over all the UTC hours within -! HOUR1, HOUR1 + HOUR_STEP ... HOUR2 -! -! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. -! Otherwise, it is assumed to be an integer -! -!---------------------------------------------------------------------- -! Output files: -!---------------------------------------------------------------------- -! -! OUTFILE: frequencies of quantiles. -! The first 1st line is a header. The other lines include -! -! 1) station code -! 2) longitude -! 3) latitude -! 4) hour -! 5) number of obervations for this (UTC) hour -! 6) MSD for the quantile frequencies -! 7) p-value of MSD (=fraction of bootstrap realizations with larger or equal MSD) -! 8-107) quantile frequencies (on scale 0-1) from 0-1 % ... 99-100% -! -! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. -! Otherwise, it is assumed to be an integer. -! -! UTC hours included: HOUR1, HOUR1 + HOUR_STEP ... HOUR2 + all-UTC-statistics coded with HOUR=24 -! -! OUTFILE_GRADS: frequencies of quantiles in GrADS binary output (if grads_out=.true.) -! -! The resulting GraDS binary file includes 7 variables (1 level for 1-6, 100 levels for 7): -! -! 1) longitude -! 2) latitude -! 3) hour -! 4) number of obervations for this (UTC) hour -! 5) MSD for the quantile frequencies -! 6) p-value of MSD (=fraction of bootstrap realizations with larger or equal MSD) -! 7) quantile frequencies (on scale 0-1) from 0-1 % ... 99-100% -! -!--------------------------------------------------------------- -! Namelist parameters -!--------------------------------------------------------------- -! -! NAMELIST/param/infile,outfile,quantile_file,msd_bootstrap_file,& -! hour1,hour2,hour_step,year1,year2,month1,month2,grads_out,outfile_grads,& -! l_code_in_char,l_round_hour,miss -! -! infile : file name for simulated or observed time series (see above) -! quantile_file : file name for pre-computed quantiles (see above) -! msd_bootstrap_file : file name for pre-computed bootstrap statistics (see above) -! outfile : name of output file (see above) -! outfile_grads : output file in GrADS binary format (see above) -! grads_out : if .true., outfile_grads is written -! l_code_in_char: if .true., 3-character station codes assumed (else integer) -! -! hour1 = first UTC hour analysed -! hour2 = last UTC hour analysed -! hour_step = step of analyzed UTC hours -! l_round_hour : if .true., hours are rounded to the nearest analysis hour. -! if .false., data for non-matching hours is ignored -! -! year1,month1 = beginning of the analysis period -! year2,month2 = end of the analysis period -! -! miss = missing value code. All input data f with abs(f) >= abs(miss) is treated as missing -! -!--------------------------------------------------------------- -! - IMPLICIT NONE - INTEGER :: I,J,K,Q - INTEGER,PARAMETER :: NMON=12, NDAY=31, NHOUR=24, NQUANT=99 - REAL :: quantiles(nquant,nmon,nday,nhour) ! array for quantile values - REAL :: q1(nquant) ! quantiles for one day + hour - REAL :: frequency(nquant+1,nhour+1) ! frequency of quantiles. NHOUR+1 for the all-UTC frequencies - REAL :: expected_freq ! expected frequency of quantiles (1/100) - REAL :: MSD(nhour+1) ! MSD statistics per UTC hour - REAL :: p_value(nhour+1) ! p values per UTC hour - REAL :: msd_bootstrap(nhour+1) ! msd values for one bootstrap realization - INTEGER :: n_bootstrap ! total number of bootstrap realizations - INTEGER :: realization ! number of bootstrap realization read from msd_boostrap_file - REAL :: value1 ! individual data values read from the input file - INTEGER :: ntimes_hour(nhour) ! total number of times with data for an UTC hour - INTEGER :: ntimes_hour_tot ! total number of times with data for all UTC hours - CHARACTER*160 :: infile,outfile,quantile_file,msd_bootstrap_file - CHARACTER*160 :: outfile_grads - INTEGER,PARAMETER :: L_dataline=1700 ! maximum length of lines in text files - character*1700 :: dataline,headerline,emptyline ! character strings for writing of output files - character*2 :: number_of_bin ! two-digit code for quantiles for outfile header - character*16 :: frequency_value ! frequency written in F16.6 format -! - INTEGER :: yyear,mmonth,dday,hhour ! time as read from the input file - INTEGER :: year,month,day,hour ! time after eventual rounding of hours - - INTEGER :: year1,year2,month1,month2 ! period used in calculations (see above) - INTEGER :: hour1,hour2,hour_step ! hours used in calculations (see above) - LOGICAL :: HOUR_USED(nhour) ! true if the UTC hour is used - INTEGER :: nhour_used ! number of UTC hours used - INTEGER :: station_int ! station code integer (for synop stations amd RHARM soundings) - CHARACTER*3 :: station_char ! station with characters (for GRUAN soundings) - LOGICAL :: l_code_in_char ! .true. for station code in characters - REAL :: longitude,latitude ! station longitude and latitude - - LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing - INTEGER :: IREC ! record number for GrADS output - - LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour -! - REAL :: MISS ! missing value code -! -!----------------------------------------------------------------------- -! - NAMELIST/param/infile,outfile,quantile_file,msd_bootstrap_file,& - hour1,hour2,hour_step,year1,year2,month1,month2,grads_out,outfile_grads,& - l_code_in_char,l_round_hour,miss - - MISS=-9.99e6 - READ(*,NML=PARAM) -! -! Which UTC hours are used and what is their total number? -! - hour_used=.false. - nhour_used=0. - do i=hour1,hour2,hour_step - hour_used(i+1)=.true. - nhour_used=nhour_used+1 - enddo -! -! Empty data line -! - do i=1,L_dataline - emptyline(i:i)=' ' - enddo -!---------------------------------------------------------------------- -! -! Read the contents of the quantile_file to array 'quantiles' -! - quantiles=miss - open(unit=1,form='formatted',status='old',file=quantile_file) - - do while(.true.) - if(l_code_in_char)then - ! The err=1 specifier ensures that header lines are skipped -1 read(1,*,err=1,end=3)& - station_char,longitude,latitude,month,day,hour,(q1(i),i=1,nquant) - else -2 read(1,*,err=2,end=3)& - station_int,longitude,latitude,month,day,hour,(q1(i),i=1,nquant) - endif - do i=1,nquant - quantiles(i,month,day,hour+1)=q1(i) - enddo -! copy quantiles from 28 February to 29 February - if(month.eq.2.and.day.eq.28)then - do i=1,nquant - quantiles(i,month,day+1,hour+1)=q1(i) - enddo - endif - enddo -3 continue - close(1) -! -!---------------------------------------------------------------------- -! -! Read the station observations and count the absolute quantile bin frequencies -! - frequency=0 - ntimes_hour=0 - ntimes_hour_tot=0 - - open(unit=1,form='formatted',status='old',file=infile) - - do while(.true.) -11 read(1,*,err=11,end=12) yyear,mmonth,dday,hhour,value1 -! -! Rounding of hours? -! - if(l_round_hour)then - call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& - hour1,hour2,hour_step) - else - year=yyear - month=mmonth - day=dday - hour=hhour - endif -! -! If the hour is used and the date is within th analyzed period, -! update the quantile bin frequencies - - if(hour_used(hour+1))then - if((year.gt.year1.or.(year.eq.year1.and.month.ge.month1)).and. & - (year.lt.year2.or.(year.eq.year2.and.month.le.month2)).and. & - abs(value1).lt.abs(miss))then - ntimes_hour(hour+1)=ntimes_hour(hour+1)+1 - ntimes_hour_tot=ntimes_hour_tot+1 - call update_frequencies & - (frequency(1,hour+1),value1,quantiles(1,month,day,hour+1),nquant,miss) - endif - endif - - enddo -12 continue - close(1) -! -!---------------------------------------------------------------------- -! -! Convert absolute frequencies to relative frequencies -! - do hour=hour1,hour2,hour_step - do i=1,nquant+1 - if(ntimes_hour(hour+1).gt.0)then - frequency(i,hour+1)=frequency(i,hour+1)/ntimes_hour(hour+1) - else - frequency(i,hour+1)=miss - endif - enddo - enddo -! -!-------------------------------------------------------------------------------- -! -! Calculation of MSD. Because it can be assumed that model data are always available, -! all UTC hours get the same weight in the calculation of the all-UTC MSD. -! - expected_freq=1./(nquant+1.) - do j=hour1,hour2,hour_step - if(frequency(1,j+1).eq.miss)then - msd(j+1)=miss - else - do i=1,nquant+1 - msd(j+1)=msd(j+1)+((frequency(i,j+1)/expected_freq-1.)**2.)/(nquant+1.) - enddo - endif - enddo - do i=1,nquant+1 - do j=hour1,hour2,hour_step - frequency(i,nhour+1)=& - frequency(i,nhour+1)+frequency(i,j+1)/nhour_used - enddo - msd(nhour+1)=msd(nhour+1)+((frequency(i,nhour+1)/expected_freq-1.)**2.)/(nquant+1.) - enddo - do j=hour1,hour2,hour_step - if(frequency(1,j+1).eq.miss)then - do i=1,nquant+1 - frequency(i,nhour+1)=miss - enddo - msd(nhour+1)=miss - endif - enddo - -!---------------------------------------------------------------------- -! -! Read the bootstrap MSD:s and calculate the fractions of them that exceed the actual value -! - n_bootstrap=0 - p_value=0. - - open(unit=1,form='formatted',status='old',file=msd_bootstrap_file) - do while(.true.) - if(l_code_in_char)then -21 read(1,*,err=21,end=23)station_char,longitude,latitude,realization,& - (msd_bootstrap(j+1),j=hour1,hour2,hour_step),msd_bootstrap(nhour+1) - else -22 read(1,*,err=22,end=23)station_int,longitude,latitude,realization,& - (msd_bootstrap(j+1),j=hour1,hour2,hour_step),msd_bootstrap(nhour+1) - endif -! -! Update the p-value counters -! - n_bootstrap=n_bootstrap+1 - do hour=hour1,hour2,hour_step - if(msd_bootstrap(hour+1).ge.msd(hour+1))then - p_value(hour+1)=p_value(hour+1)+1. - endif - enddo - if(msd_bootstrap(nhour+1).ge.msd(nhour+1))then - p_value(nhour+1)=p_value(nhour+1)+1. - endif - enddo -23 continue -! -! Convert the p_values from absolute counts to relative frequencies -! - do hour=hour1,hour2,hour_step - p_value(hour+1)=p_value(hour+1)/n_bootstrap - enddo - p_value(nhour+1)=p_value(nhour+1)/n_bootstrap -! -!-------------------------------------------------------------------------- -! -! Open the output file and write its header line in ODB compatible text format -! - open(unit=2,form='formatted',status='unknown',file=outfile) - headerline=emptyline - if(l_code_in_char)then - headerline='station@hdr:string longitude@hdr:real latitude@hdr:real hour@hdr:integer' - else - headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer' - endif - headerline=trim(headerline)//' total_number@body:real' - headerline=trim(headerline)//' msd@body:real' - headerline=trim(headerline)//' p_value@body:real' -! -! quantile bin frequency variable names: f00, f01 ... f99 -! - do j=0,nquant - write(number_of_bin,'(i2.2)')j - headerline=trim(headerline)//' f'//number_of_bin//'@body:real' - enddo - write(2,*)trim(headerline) -! -! Write the data lines for each UTC hour used -! - do hour=hour1,hour2,hour_step - j=hour+1 - if(l_code_in_char)then - write(dataline,'(A3,2F16.6,1X,I2,I7,2F16.6)')& - station_char,longitude,latitude,hour,ntimes_hour(j),msd(j),p_value(j) - else - write(dataline,'(I6,2F16.6,1X,I2,I7,2F16.6)')& - station_int,longitude,latitude,hour,ntimes_hour(j),msd(j),p_value(j) - endif - - do i=1,nquant+1 - write(frequency_value,'(F16.6)')frequency(i,j) - dataline=trim(dataline)//frequency_value - enddo - write(2,*)trim(dataline) - enddo -! -! Write the data line for the all-UTC statistics -! - if(l_code_in_char)then - write(dataline,'(A3,2F16.6,1X,I2,I7,2F16.6)')& - station_char,longitude,latitude,24,ntimes_hour_tot,msd(nhour+1),p_value(nhour+1) - else - write(dataline,'(I6,2F16.6,1X,I2,I7,2F16.6)')& - station_int,longitude,latitude,24,ntimes_hour_tot,msd(nhour+1),p_value(nhour+1) - endif - do i=1,nquant+1 - write(frequency_value,'(F16.6)')frequency(i,nhour+1) - dataline=trim(dataline)//frequency_value - enddo - write(2,*)trim(dataline) - - close(2) -! -!--------------------------------------------------------------------------------- -! -! Open the file for GrADS output and write its contents, for visualisation in GrADS? -! - if(grads_out)then - open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& - recl=4,status='unknown') -! -! Again, first each UTC hour separately -! - do hour=hour1,hour2,hour_step - j=(hour-hour1)/hour_step - write(11,rec=1+j*(nquant+6))longitude - write(11,rec=2+j*(nquant+6))latitude - write(11,rec=3+j*(nquant+6))real(ntimes_hour(hour+1)) - write(11,rec=4+j*(nquant+6))msd(hour+1) - write(11,rec=5+j*(nquant+6))p_value(hour+1) - do i=1,nquant+1 - write(11,rec=5+j*(nquant+6)+i)frequency(i,hour+1) - enddo - enddo - ! - ! The all-UTC statistics - ! - write(11,rec=1+(j+1)*(nquant+6))longitude - write(11,rec=2+(j+1)*(nquant+6))latitude - write(11,rec=3+(j+1)*(nquant+6))real(ntimes_hour_tot) - write(11,rec=4+(j+1)*(nquant+6))msd(nhour+1) - write(11,rec=5+(j+1)*(nquant+6))p_value(nhour+1) - do i=1,nquant+1 - write(11,rec=5+(j+1)*(nquant+6)+i)frequency(i,nhour+1) - enddo - - close(11) - - endif ! if (grads_out) - - END program rank_histograms_one_station -! -!---------------------------------------------------------------- -! - subroutine update_frequencies & - (frequency,f,quantiles,nquant,miss) -! -! Find the location of the data value (f) within the -! quantile value table (quantiles, ascending order) -! and update the table of absolute quantile bin -! frequencies (frequency) -! -! frequency (in/out) = quantile bin frequencies -! f (in) = individual data value -! quantiles (in) = quantile values -! nquant (in) = number of quantiles -! miss (in) = missing value code - - implicit none - integer :: nquant - real :: quantiles(nquant) - real :: frequency(nquant+1) - real :: f - integer :: i1,i2,i,ind - real :: miss -! -! Quantiles should never be missing, but if they are, -! this should concern all quantiles simultaneously. -! Therefore, check just the 1st quantile - - if(abs(quantiles(1)).ge.abs(miss))then - write(*,*)'Quantiles missing!?' - stop - endif -! -! Find the position of f (ind) in the quantile table -! - if(f.lt.quantiles(1))then - ind=1 - else - if(f.ge.quantiles(nquant))then - ind=nquant+1 - else - i1=1 - i2=nquant - do while (i2.gt.i1+1) - i=(i1+i2)/2 - if(f.ge.quantiles(i))then - i1=(i1+i2)/2 - else - i2=(i1+i2)/2 - endif - enddo - ind=i1+1 - endif - endif -! -! Update the frequency table -! - frequency(ind)=frequency(ind)+1. - - return - end subroutine update_frequencies - -!---------------------------------------------------------------------- - -subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& - hour1,hour2,hour_step) -! -! Rounding the hour to the nearest hour within (hour1, hour1 + hour_step, ... hour2) -! -implicit none -integer :: yyear,mmonth,dday,hhour ! year, month, day and hour before rounding (IN) -integer :: year,month,day,hour ! year, month, day and hour after rounding (OUT) -integer :: hour1,hour2,hour_step ! target hours: hour1, hour1 + hour_step ... hour2 -integer :: ndays_month(12) ! number of days per month -integer :: hour_index -ndays_month=31 -ndays_month(4)=30 -ndays_month(6)=30 -ndays_month(9)=30 -ndays_month(11)=30 -ndays_month(2)=28 -if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then - ndays_month(2)=29 -endif -year=yyear -month=mmonth -day=dday -! -! round the hour to the nearest output hour -! -hour_index=nint((hhour-hour1+0.)/hour_step) -hour=hour1+hour_step*hour_index - if(hour.ge.24)then ! hhour was rounded forward to next day - hour=hour1 - day=dday+1 - if(day.gt.ndays_month(month))then - day=1 - month=month+1 - if(month.gt.12)then - month=1 - year=yyear+1 - endif - endif - endif - if(hour.lt.0)then ! Hhour was rounded backward to previous day - hour=hour2 - day=dday-1 - if(day.eq.0)then - month=month-1 - if(month.eq.0)then - month=12 - year=yyear-1 - endif - day=ndays_month(month) - endif - endif - -return -end subroutine round_hour - - - diff --git a/SYNOP/STATS/produce_rank_histograms_all_stations.sh b/SYNOP/STATS/produce_rank_histograms_all_stations.sh deleted file mode 100755 index 129f2d527874e8a8dfddf07a1df71522555b174a..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/produce_rank_histograms_all_stations.sh +++ /dev/null @@ -1,300 +0,0 @@ -#!/bin/bash -echo "Bash version ${BASH_VERSION}..." -################################################################## -# -# Calculation of quantile space rank histograms and their -# summary statistics (MSD + p-value) for all stations in the -# FMI open data sample -# -# The following need to be given as arguments: -# -# - variable -# - first year of raw data -# - last year of raw data -# - first month of raw data (default 1) -# - last month of raw data (default 12) -# -# As the input for this script, the following files are needed: -# -# 1) List of stations -# 2) Raw simulation data at station coordinates -# (but here, the station observations are used as a surrogate of this file) -# 3) File including quantiles as a function of time of year -# 4) File including the rank histogram bootstrap MSD values -# -# Important notes: -# -# 1) It is assumed that all the files are in ODB format. However, -# their relevant parts are converted to text format for processing. -# 2) The rank histogram bootstrap MSD values must be available -# for the same length of simulation as given as arguments to this script. -# -# Execution (e.g): ./produce_rank_histograms_all_stations 39 2020 2020 1 12 -# -# ORIGINAL: -# Jouni Räisänen, Aug 2023 -# MODIFIED: -# Alexander Mahura, Sep-Oct-Nov 2023 -# -################################################################## -# -# Arguments: -# -# 1. Variable code -# -variable=$1 -echo " Meteorological variable code = $1" -# -# The following variables are included: -# -# 91 'total amount of clouds' -# 108 'sea level pressure' -# 80 '1-hour precipitation' -# 58 'relative humidity' -# 999 '10-minute precipitation intensity' -# 71 'snow depth' -# 39 '2m temperature' -# 40 'dew point temperature' -# 62 'visibility' -# 111 'wind direction' -# 261 'maximum wind gust in last 10 minutes' -# 221 'surface wind speed' -# -# 2.-3: First and last year -# -year1=$2 -year2=$3 -let nyears=year2-year1+1 -# -# 4.-5: First and last month -# -month1="${4:-1}" -month2="${5:-12}" -# -# Find the length of the simulation period. -# It this is 12 months or more, it is rounded to the nearest integer -# number of years. Otherwise, the number of months is recorded. -# -let nmonths=12*nyears+month2-month1-11 -echo ' Number of months' $nmonths -# -n=$( echo "$nmonths / 12" | bc -l ) -nyears_round=$( echo "($n + 0.5) / 1" | bc ) -echo ' Number of years rounded' ${nyears_round} -# -if [ $nmonths -ge 12 ] -then - period=${nyears_round}yr -else - period=${nmonths}mon -fi -echo $period -# -# Add zero to ahead of $month1 and $month2 for naming of files if needed? -# -if [ $month1 -le 9 ] -then - month1='0'${month1} -fi -if [ $month2 -le 9 ] -then - month2='0'${month2} -fi -# -################################################################## -# -# Add odb_api to $PATH -# -# On Puhti -#PATH="${PATH}":/projappl/project_2001011/jouni/to_alexander_220823/odb_api/bin -# -# On Lumi -#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin -# -# On Lumi (asked Devaraju Narayanappa, CSC) -# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C -# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C -# -################################################################## -# -# ----- File names. Hard-coded, at least this far ----- -# -# NB: it would be more efficient to have ODB files for individual -# stations instead for the ones that include everything for all stations! -# -# 1) Raw data from model simulation (this far, mimicked by observations) -echo " Raw data from model simulation (this far, mimicked by observations) ..." -sim_dir=$rootdir/SYNOP/output/ODB_data/$experiment #="/scratch/project_465000454/ama/open_data_ODB" -sim_file=${sim_dir}/simulated_synop_data.odb -echo " $sim_file" - -# 2) Pre-computed quantiles -echo " Directory with pre-computed quantiles ..." -#quant_dir=quantiles -#quant_dir=/scratch/project_465000454/ama/STATDATASYNOP/quantiles -quant_dir=$rootdir/SYNOP/input/STATDATASYNOP/quantiles -echo " $quant_dir" -quant_file=${quant_dir}/quant_all_stations_${variable}_4.odb - -# 3) Pre-computed bootstrap MSD statistics -echo " Directory with pre-computed bootstrap MSD statistics ..." -#bootstrap_dir=bootstrap_statistics/${variable}_${period} -#bootstrap_dir=/scratch/project_465000454/ama/STATDATASYNOP/bootstrap_statistics/${variable}_${period} -bootstrap_dir=$rootdir/SYNOP/input/STATDATASYNOP/bootstrap_statistics/${variable}_${period} -echo " $bootstrap_dir" -msd_bootstrap_file=${bootstrap_dir}/MSD_bootstrap_${variable}_${period}_all_stations.odb - -# 4) Directory for results -echo " Directory for results ..." -#outdir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} -outdir=$rootdir/SYNOP/output/scratch/$experiment/rank_histograms/${variable}_${year1}${month1}-${year2}${month2} -echo " $outdir" -if test -d "$outdir"; then - echo $outdir exists -else - echo $outdir does not exist - echo " Creating directory: $outdir" - mkdir -p $outdir -fi -# -################################################################## -# -# It is assumed that the list of stations has been pre-produced -# and is available as a .txt file. -# -if [ $run_type == 'test' ]; then - echo "Running a test with only 3 synop stations." - station_list=test_list_of_stations_${variable}.txt -elif [ $run_type == 'FMI' ]; then - station_list=list_of_stations_${variable}.txt -elif [ $run_type == 'HadISD' ]; then - echo 'ERROR: HadISD not implemetend fully yet.' - exit -else - echo 'ERROR: given dataset not implemented! Select run_type=test,FMI,HadISD' - exit -fi -echo " List of synop stations (with geographical coordinates): $station_list" -# -# -################################################################ -# -# Compile the Fortran program that calculates the rank histograms -# -echo " Compiling fortran-program to calculate rank histograms ..." -echo " fortran-programs/rank_histograms_one_station.f95" - -#gfortran fortran-programs/rank_histograms_one_station.f95 -o rank_histograms_one_station - -################################################################# -# -# Count the number of lines in the station list: -# -n_lines=$(cat ${station_list} | wc | awk NF=1) -# -# Skip the first line which contains no station_ID -# -line_number=2 -# -# Loop over all stations -# -while [ ${line_number} -le `expr $n_lines` ] -do - head -`expr ${line_number}` ${station_list} | tail -1 > $rootdir/SYNOP/output/scratch/$experiment/input.txt - read station longitude latitude < $rootdir/SYNOP/output/scratch/$experiment/input.txt -echo " **********" -echo " Synop station ID, longitude, latitude: ${station} ${longitude} ${latitude}" - -#echo ${station} -#echo ${longitude} -#echo ${latitude} -# -################################################################ -# Select the simulation data for the station (mimicked this far by observations!). -# When simulation data are available, a means to retrieve the correct data points from it must be designed -echo " Selecting the simulation data (mimicked this far by observations) for synop station: ${station}" -# -odb_command="odb sql 'select year,month,day,hour,value where station=${station} and (year>=${year1} and year<=${year2}) and (hour=0 or hour=6 or hour=12 or hour=18) and variable=${variable}' -i ${sim_file} -o $rootdir/SYNOP/output/scratch/$experiment/sim_data" -eval ${odb_command} -# -############################################################### -# Select the quantiles for the station: -echo " Selecting the quantiles for synop station: ${station}" -# 00, 06, 12 and 18 UTC are picked separately, since -# select \* does not allow for parentheses (very strange) -# -#rm quantile_selection_* -odb sql select \* where hour=0 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_00 -odb sql select \* where hour=6 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_06 -odb sql select \* where hour=12 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_12 -odb sql select \* where hour=18 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_18 -cat $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_* > $rootdir/SYNOP/output/scratch/$experiment/quantile_selection -# -################################################################ -# -# Get the distribution of pre-computed Mean Square Deviations from -# the msd_bootstrap_file. Note that these values depend on the length -# of the time period (included in the name of msd_bootstrap_file) -# and (to a smaller extent) the selected station. -# -odb sql select \* where station=${station} -i ${msd_bootstrap_file} -o $rootdir/SYNOP/output/scratch/$experiment/msd_bootstrap_selection -# -############################################################### -# Produce the rank histogram for one station. -# Include data for 00, 06, 12 and 18 UTC. -echo " Producing the rank histogram for one synop station: ${station}" -# -############################################################## -# -echo " Checking existence of file: rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} ..." -outfile=${outdir}/rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} - -echo " $outfile" -if test -f "$outfile"; then - echo $outfile exists -else - echo $outfile does not exist - echo " Creating file: $outfile" - touch $outfile -fi - - -# -$SRC_OBSALL_DIR/SYNOP/STATS/fortran-programs/rank_histograms_one_station < ${outdir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations.odb -###################################################################### -# -# Delete the fortran executable -# -#rm rank_histograms_one_station diff --git a/SYNOP/STATS/produce_standard_plots_all_stations.sh b/SYNOP/STATS/produce_standard_plots_all_stations.sh deleted file mode 100755 index e7a782e3e492b25d76e9eefe108ecadb26d301c2..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/produce_standard_plots_all_stations.sh +++ /dev/null @@ -1,344 +0,0 @@ -#!/bin/bash -echo "Bash version ${BASH_VERSION}..." -echo "Python version" -which python -########################################################################## -# -# Produce the following plots: -# -# (a) individual data values (from a model simulation or a real station) -# against the background of quantiles from the observed distribution -# for the same station -# (b) rank histogram for the same station -# -# for all stations in the FMI open data sample -# -# The following need to be given as arguments: -# -# - variable -# - first year of raw data -# - last year of raw data -# - first month of raw data (default 1) -# - last month of raw data (default 12) -# -# As the input for this script, the following files are needed: -# -# 1) List of stations -# 2) Raw simulation data at station coordinates -# (but here, the station observations are used as a surrogate of this file) -# 3) File including quantiles as a function of time of year -# 4) File including the rank histogram bootstrap MSD values -# -# Important note: -# -# 1) It is assumed that all the files are in ODB format. However, -# their relevant parts are converted to text format for processing. -# -# Execution (e.g): ./produce_standard_plot_all_stations 39 2020 2020 1 12 -# ORIGINAL: -# Jouni Räisänen, August 2023 -# MODIFIED: -# Alexander Mahura, Sep-Oct 2023 -# -################################################################## -# -# Arguments: -# -# 1. Variable code -# -variable=$1 -echo " Meteorological variable code = $1" - -# The following variables are included: -# -# 91 'total amount of clouds' -# 108 'sea level pressure' -# 80 '1-hour precipitation' -# 58 'relative humidity' -# 999 '10-minute precipitation intensity' -# 71 'snow depth' -# 39 '2m temperature' -# 40 'dew point temperature' -# 62 'visibility' -# 111 'wind direction' -# 261 'maximum wind gust in last 10 minutes' -# 221 'surface wind speed' -# -# 2.-3: First and last year -# -year1=$2 -year2=$3 -let nyears=year2-year1+1 -# -# 4.-5: First and last month -# -month1="${4:-1}" -month2="${5:-12}" -# -# Find the length of the simulation period. -# It this is 12 months or more, it is rounded to the nearest integer -# number of years. Otherwise, the number of months is recorded. -# -let nmonths=12*nyears+month2-month1-11 -echo 'Number of months' $nmonths -# -n=$( echo "$nmonths / 12" | bc -l ) -nyears_round=$( echo "($n + 0.5) / 1" | bc ) -echo 'Number of years rounded' ${nyears_round} -# -if [ $nmonths -ge 12 ] -then - period=${nyears_round}yr -else - period=${nmonths}mon -fi -echo $period -# -# Add zero to ahead of $month1 and $month2 for naming of files if needed? -# -if [ $month1 -le 9 ] -then - month1='0'${month1} -fi -if [ $month2 -le 9 ] -then - month2='0'${month2} -fi -# -################################################################## -# -# Add odb_api to $PATH -# On Puhti -#PATH="${PATH}":/projappl/project_2001011/jouni/to_alexander_220823/odb_api/bin -# On Lumi -#export PATH="/projappl/project_465000454/ama/software/odb_api/bin:$PATH" -# -# On Lumi (asked Devaraju Narayanappa, CSC) -# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C -# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C -# -# Define the python environment -# On Puhti -#export PATH="/projappl/project_2001011/madeleine/python_envs/post-pp/bin:$PATH" -# On Lumi -# it has been activated in .bash_profile with --- module load python-climatedt/3.11.3-cpeCray-22.08 -# On Lumi -# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C -# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C -# -################################################################## -# -# ----- File names. Hard-coded, at least this far ----- -# -# 1) Raw data from model simulation (this far, mimicked by observations) -echo " Raw data from model simulation (this far, mimicked by observations) ..." -#sim_dir="/scratch/project_465000454/ama/open_data_ODB" -sim_dir=$rootdir/SYNOP/output/ODB_data/$experiment -sim_file=${sim_dir}/simulated_synop_data.odb -echo " $sim_file" - -# 2) Pre-computed quantiles -echo " Directory for pre-computed quantiles ..." -#quant_dir=quantiles -#quant_dir=/scratch/project_465000454/ama/STATDATASYNOP/quantiles -quant_dir=$rootdir/SYNOP/input/STATDATASYNOP/quantiles -echo " $quant_dir" -quant_file=${quant_dir}/quant_all_stations_${variable}_4.odb - -# 3) Pre-computed rank histogram data -echo " Directory for pre-computed rank histogram data ..." -#rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} -rh_dir=$rootdir/SYNOP/output/scratch/$experiment/rank_histograms/${variable}_${year1}${month1}-${year2}${month2} -echo " $rh_dir" -rh_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations.odb - -# 4) Directory for figures -echo " Directory for figures ..." -#figure_dir=figures/standard_plots_${variable}_${year1}${month1}-${year2}${month2} -figure_dir=$rootdir/SYNOP/output/figures/$experiment -echo " $figure_dir" -if test -d "$figure_dir"; then - echo $figure_dir exists -else - echo $figure_dir does not exist - echo " Creating directory: $figure_dir" - mkdir -p $figure_dir -fi - -# -################################################################## -# -# It is assumed that the list of stations has been pre-produced -# and is available as a .txt file. -# -if [ $run_type == 'test' ]; then - echo "Running a test with only 3 synop stations." - station_list=test_list_of_stations_${variable}.txt -elif [ $run_type == 'FMI' ]; then - station_list=list_of_stations_${variable}.txt -elif [ $run_type == 'HadISD' ]; then - echo 'ERROR: HadISD not implemetend fully yet.' - exit -else - echo 'ERROR: given dataset not implemented! Select run_type=test,FMI,HadISD' - exit -fi -echo " List of synop stations (with geographical coordinates): $station_list" -# -# -################################################################ -# -echo " Compiling the Fortran program needed for creating the figures ..." -echo " fortran-programs/plots_for_one_station.f95" - -#gfortran fortran-programs/plots_for_one_station.f95 -o plots_for_one_station - -################################################################# -# -# Count the number of lines in the station list -# -n_lines=$(cat ${station_list} | wc | awk NF=1) -# -# Skip the first line which contains no station_ID -# -line_number=2 -# -# Loop over all stations -# -while [ ${line_number} -le `expr $n_lines` ] -do - head -`expr ${line_number}` ${station_list} | tail -1 > $rootdir/SYNOP/output/scratch/$experiment/input.txt - read station longitude latitude < $rootdir/SYNOP/output/scratch/$experiment/input.txt -echo " **********" -echo " Synop station ID, longitude, latitude: ${station} ${longitude} ${latitude}" -# -################################################################ -# Select the simulation data for the station (mimicked this far by observations!) -# When simulation data are available, a means to retrieve the correct data points from it must be designed. -echo " Selecting the simulation data (mimicked this far by observations) for synop station: ${station}" -# -odb_command="odb sql 'select year,month,day,hour,value where station=${station} and (year>=${year1} and year<=${year2}) and (hour=0 or hour=6 or hour=12 or hour=18) and variable=${variable}' -i ${sim_file} -o $rootdir/SYNOP/output/scratch/$experiment/sim_data" -eval ${odb_command} - -################################################################ -# Select the rank histogram data for the station -echo " Selecting rank histogram data for synop station: ${station}" -# -odb sql select \* where station=${station} -i ${rh_file} -o $rootdir/SYNOP/output/scratch/$experiment/rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} -# -############################################################### -# Select the quantiles for the station -echo " Selecting the quantiles for synop station: ${station}" -# -# 00, 06, 12 and 18 UTC are picked separately, since -# select \* does not allow for parentheses (very strange) -# -#rm quantile_selection_* -odb sql select \* where hour=0 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_00 -odb sql select \* where hour=6 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_06 -odb sql select \* where hour=12 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_12 -odb sql select \* where hour=18 and station=${station} -i ${quant_file} -o $rootdir/SYNOP/output/scratch/$experiment/quantile_selection_18 -cat quantile_selection_* > $rootdir/SYNOP/output/scratch/$experiment/quantile_selection -# -############################################################## -# -# Produce the plots for one station -# 1) Run the fortran program that produces the required text files -# 2) Run the python script that produces the actual plot -# -# NB: the fortran program also produces some files for generation of the same plots -# in GrADS, but plotting in GrADS is omitted in this script. -# -############################################################### -# ????????? -if [ 0 -eq 1 ]; then - ./plots_for_one_station < $rootdir/SYNOP/output/scratch/$experiment/standard_plot.cfg -[data] -variable=${variable} -station_id=${station} -longitude=${longitude} -latitude=${latitude} -year_beg=${year1} -year_end=${year2} -month_beg=${month1} -month_end=${month2} - -[input] -sim_data=$rootdir/SYNOP/output/scratch/$experiment/time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt -quantile_sel=$rootdir/SYNOP/output/scratch/$experiment/quantiles_${variable}_${station}.txt -rank_hists=$rootdir/SYNOP/output/scratch/$experiment/rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt - -[output] -fig_name=${figure_dir}/standard_plot_${variable}_${station}_${year1}${month1}-${year2}${month2}_python.png -EOF - -################################################################ -# -echo " Calling python to plot quantiles rank histogram for synop station: ${station}" -python3 python/plot_quantiles_rankhist_synop.py $rootdir/SYNOP/output/scratch/$experiment/standard_plot.cfg - -################################################################ -# Remove unnecessary files -echo " Removing unnecessary temporary files ..." - -#rm input.txt -#rm vrange_* -#rm msd_and_p-value -#rm quantile_selection -#rm rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} -#rm sim_data -###rm msd_bootstrap_selection -###rm standard_plot_one_station -#rm time_series_commands -#rm time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.grads -#rm rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.grads -#rm standard_plot.cfg -#rm time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt -#rm quantiles_${variable}_${station}.txt -#rm rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt -# -((line_number++)) -done - -################################################################ -# The Fortran executable is not needed any more: -# -#rm plots_for_one_station -#rm quantile_selection_* -#rm msd_and_p-value_* -#rm coordinates diff --git a/SYNOP/STATS/python/plot_p_values_map_synop.py b/SYNOP/STATS/python/plot_p_values_map_synop.py deleted file mode 100644 index bc243153490de92db3b5a0366505f46f203bcd1d..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/python/plot_p_values_map_synop.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python - -''' -Written by Madeleine Ekblom, 2023-04-06 -updates 2023-11-27 -- Option of moving legend into plot when plotting on a global map(lower left corner) - -Based on Jouni Räisänen's script for plotting p values on the map of Finland - -Plots p values on a map restricted to an area limited by lons (19, 32) and lats (59, 71) - -Example: -$ python3 plot_p_values_map.py p_values_as_text_file -$ python3 plot_p_values_map.py p-values - -p_values_as_text_file is a text file containing stationID, longitude, latitude, and p value with one header row -''' - -import numpy as np -import matplotlib.pyplot as plt -import sys -import cartopy.crs as ccrs -from matplotlib.lines import Line2D - -def read_data(p_values_file): - ''' - Description: Reads in file containing p values at different stations - Input: file containing p values with stationID (sid), longitude (lon), latitude (lat), and p value (p). Header = 1 row. - Output: structure numpy array with sid, lon, lat, p - ''' - - p_values_data = np.loadtxt(p_values_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'p'), 'formats':('i4', 'f4', 'f4', 'f4')}) - - return p_values_data - - -def plot_p_values(p_values,figname): - ''' - Description: plot p values on a map restricted to an area limited by lons (19, 32) and lats (59, 71) - Input: numpy array - Output: saves a figure in the running directory - ''' - # add more variables and units when needed - variable_shortnames={'39': '2t'} - variable_units={'39': '$^\circ$C'} - - # variables to add, shortnames and units need to be checked - # total amount of clouds, 91 - # sea level pressure, 108 - # 1-hour precipitation, 80 - # relative humidity, 58 - # 10-minute precipitation intensity, 999 - # snow depth, 71 - # 2m temp, 39 --> 2t, degrees Celcius - # dew point temperature, 40 - # visibility, 6 - # wind direction, 111 - # max wind gust in last 10 mins,261 - # surface wind speed, 221 - - variable='39' # change to be an input value (as string) - - # if data only from SYNOP stations in Finland: - only_Finland_data=True - - # Define area of map: - if only_Finland_data: - # Finland: - lon_min=19 - lon_max=32 - lat_min=59 - lat_max=71 - else: - # Global: - lon_min=0 - lon_max=359 - lat_min=-90 - lat_max=90 - - # define colours and limits for the colours: - colors = ['darkviolet', 'blue', 'skyblue', 'lime', 'yellow', 'orange', 'red', 'grey'] - limits = [0,0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,1] - - # create figure and add a subplot with a map: - fig = plt.figure(figsize=(8,12)) - ax = fig.add_subplot(1,1,1,projection=ccrs.PlateCarree()) - ax.coastlines() - ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) - ax.gridlines(draw_labels=True) - - # Plotting details: - fs1=20 # fontsize for title - fs2=14 # fontsize for legend - fs3=10 # fontisze for text box explanation - - #loop over limits: - legend_elements = [] - for n in range(8): - # find indices - p_ind = ((p_values['p'] > limits[n]) & (p_values['p'] <= limits[n+1])) - # scatter plot - ax.scatter(p_values['lon'][p_ind], p_values['lat'][p_ind], c=colors[n]) - # add to legend_elements, color and values - legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1]))) - - # explanation for figure: - text_expl ='p-value: the probability that the MSD statistics for the quantile rank\nhistogram would be exceeded by pure chance\nwhen comparing a perfect model simulation with observations.' - - # add legend table with customized values - if only_Finland_data: - # If Finland map, put legend outside map - ax.legend(handles=legend_elements, loc='center left', fontsize=fs3) #, bbox_to_anchor=(1.1, 0.5), fontsize=fs2) - # add text box with explanation to the bottom of the figure: 0.1, -0.25 - ax.text(0.1, -0.25, text_expl, bbox={'pad':10, 'facecolor':'white'}, fontsize=fs3, transform=ax.transAxes) - else: - # If global map, put Legend in the lower left corner - ax.legend(handles=legend_elements, loc='lower left', fontsize=fs3) - # add text box with explanation to the bottom of the figure, 0.1, -0.45 - ax.text(0.1, -0.45, text_expl, bbox={'pad':10, 'facecolor':'white'}, fontsize=fs3, transform=ax.transAxes) - - # set title: - fig.suptitle('SYNOP stations: p-values for ' + variable_shortnames[variable] +'\nquantiles in ' + timestring, fontsize=fs1) - - # save figure - #plt.savefig('p_values.png', dpi=300) - plt.savefig(figname, dpi=300) - - -def main(p_values_file,figurename): - ''' - Main file: (1) reads in p values from file, (2) plot p values on a map - ''' - - p_values_data = read_data(p_values_file) - - plot_p_values(p_values_data,figurename) - - -if __name__=='__main__': - p_values_file = sys.argv[1] - timestring = sys.argv[2] # Not used????????? - figurename = sys.argv[3] - - #main(p_values_file) # original - main(p_values_file,figurename) diff --git a/SYNOP/STATS/python/plot_quantiles_rankhist_synop.py b/SYNOP/STATS/python/plot_quantiles_rankhist_synop.py deleted file mode 100644 index aec03020e2c76c48bce952812d05a1fd0ac46ac8..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/python/plot_quantiles_rankhist_synop.py +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env python3 - -''' -Written by Madeleine Ekblom, 2023-05-10 -updates 2023-11-27: -- visual changes of the plot: labels, legends , title, and colours updated - -Based on Jouni Räisänen's script for plotting quantiles/time series data (00, 06, 12, 18 UTC) and rank histogram (24UTC=combination of 00,06,12,and 18 UTC data) - -Example: -$ python3 plot_quantiles_rankhist.py example.cfg - -example.cfg is a text file containing: -[data] -variable=39 -station_id=102019 -longitude=23.576000 -latitude=68.602997 -year_beg=2010 -year_end=2012 -month_beg=01 -month_end=12 - -[input] -sim_data=time_series_102019_2010-2012.txt -quantile_sel=quantiles_102019.txt -rank_hists=rank_histogram_102019_2010-2012.txt - -[output] -fig_name=standard_plot_102019_2010-2012_python.png - -''' - -import numpy as np -import matplotlib.pyplot as plt -import sys -import configparser -import datetime -from matplotlib.dates import drange, MonthLocator, DateFormatter -import matplotlib.lines as mlines -import matplotlib.patches as mpatches -import matplotlib.ticker as ticker - -config = configparser.ConfigParser() - -def plot_data(quantiles_data, rank_data, time_series_data, variable, station_id, lon, lat, year_beg, year_end, month_beg, month_end, figname, savefig=False): - ''' - Plot data: quantiles data give the lines and the time series data give the dots - ''' - # add more variables and units when needed - variable_shortnames={'39': '2t'} - variable_units={'39': '$^\circ$C'} - - # variables to add, shortnames and units need to be checked - # total amount of clouds, 91 - # sea level pressure, 108 - # 1-hour precipitation, 80 - # relative humidity, 58 - # 10-minute precipitation intensity, 999 - # snow depth, 71 - # 2m temp, 39 --> 2t, degrees Celcius - # dew point temperature, 40 - # visibility, 6 - # wind direction, 111 - # max wind gust in last 10 mins,261 - # surface wind speed, 221 - - # experiment id: now use 'test data' - to be replaced with real exp_id (as input argument) - exp_id = 'test data' - - # set up date array consisting of all days in beg year: - days = np.arange(1,366) - #day1 = datetime.date(year_beg, 1, 1) - #day2 = datetime.date(year_beg+1, 1, 1) - day1 = datetime.date(2001, 1, 1) - day2 = datetime.date(2002, 1, 1) - delta = datetime.timedelta(days=1) - dates = drange(day1, day2, delta) - - # arrays for quantile names, axes titles, hours to plot - q_names = ['q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'] - sub_titles = ['00 UTC', '06 UTC', '12 UTC', '18 UTC'] - hours = [0, 6, 12, 18] - - # plotting details: - alpha = [0.5, 0.3, 0.1] # transparency for shaded area plotting - color='blue' # color for shading - linecolor='red' # color for solid line - scatter_size=5 # size for scattered dots - fs1=20 # fontsize for title - fs2=14 # fontsize for labels, subplot titles - fs3 = 12 # fontsize for - fs4=10 # fontsize for extra text box, legend - ndigits = 3 # round off to ndigits (e.g., lon, lat) - - # use variable to get variable and corresponding unit, used as ylabel in plots: - ylabel = variable_shortnames[variable] + ' (' + variable_units[variable] + ')' - - # legend elemnents for quantile data, one legend for model and one legend for observations - legend_elements1 = [mlines.Line2D([],[], marker='.', linestyle='', color='k', label='Model data (' + exp_id + ')')] - legend_elements2= [mpatches.Patch(color=color, alpha=alpha[0], label='25-50%'), - mpatches.Patch(color=color, alpha=alpha[1], label='10-25%'), - mpatches.Patch(color=color, alpha=alpha[2], label='1-10%'), - mlines.Line2D([],[], color=linecolor, label='50%'), - mpatches.Patch(color=color, alpha=alpha[0], label='50-75%'), - mpatches.Patch(color=color, alpha=alpha[1], label='75-90%'), - mpatches.Patch(color=color, alpha=alpha[2], label='90-99%')] - - # round lon, lat to three digits: - lon = np.round(float(lon), ndigits) - lat = np.round(float(lat), ndigits) - - # define lon/lat text strings for title of the form lat: XX N/S, lat: XX E/W - if (lat > 0): - # if lat positive -> N - lat_text = ' (lat: ' + str(lat) + '$^\circ$N, ' - else: - # if negative -> S - lat_text = ' (lat ' + str(-lat) + '$^\circ$S, ' - - if (0 <= lon) and (lon <= 180): - # lon btw 0 an 180 -> E - lon_text = ' lon: ' + str(lon) + '$^\circ$E)' - else: - if lon < 0: - # lon < 0 --> - lon W - lon_text = ' lon: ' + str(-lon) + '$^\circ$W)' - else: - # 180 < lon < 360 --> - (lon - 360) W - lon = -(lon - 360) - lon_text = ' lon: ' + str(lon) + '$^\circ$W)' - - # define figure title: - fig_title = 'SYNOP station: ' + station_id + lat_text + lon_text + '\nModel data from ' + str(year_beg) + month_beg + '-' + str(year_end) + month_end - - # calculate number of years: - nyears = year_end - year_beg + 1 - - # set up figure - fig = plt.figure(figsize=(10,10), layout='constrained') - spec = fig.add_gridspec(4,2) - fig.suptitle(fig_title, fontsize=fs1) - - # counter - c = 0 - - # plot quantiles/time series for times 00, 06, 12, 18: - for i in range(2): - for j in range(2): - # set up axis: title, xaxis, x- and y-labels - ax = fig.add_subplot(spec[i,j]) - ax.set_title(sub_titles[c], fontsize=fs2) - ax.set_xlim(dates[0], dates[-1]) - ax.xaxis.set_major_locator(MonthLocator()) - ax.xaxis.set_major_formatter(DateFormatter('%b')) - ax.set_xlabel('Time', fontsize=fs2) - ax.set_ylabel(ylabel, fontsize=fs2) - - # quantile data: - # find quantile data hour = 0, 6, 12, 18 - qh_ind = (quantiles_data['hour'][:] == hours[c]) - - # get quantile data, qXX_data = quantile XX data at hour qh_ind - q01_data = quantiles_data[q_names[0]][qh_ind] - q10_data = quantiles_data[q_names[1]][qh_ind] - q25_data = quantiles_data[q_names[2]][qh_ind] - q50_data = quantiles_data[q_names[3]][qh_ind] - q75_data = quantiles_data[q_names[4]][qh_ind] - q90_data = quantiles_data[q_names[5]][qh_ind] - q99_data = quantiles_data[q_names[6]][qh_ind] - - # solid line at q50: - ax.plot(dates, q50_data, c=linecolor, ls='-') - - # shading will be the most opaque close to 50 and more transparent close to 1/99 - - # fill area btw q25-q50, q50-q75: alpha = alpha[0] - ax.fill_between(dates, q25_data[:], q50_data[:], color=color, alpha=alpha[0]) - ax.fill_between(dates, q50_data[:], q75_data[:], color=color, alpha=alpha[0]) - - # fill area btw q10-q25, q75-q90, alpha=alpha[1] - ax.fill_between(dates, q10_data[:], q25_data[:], color=color, alpha=alpha[1]) - ax.fill_between(dates, q75_data[:], q90_data[:], color=color, alpha=alpha[1]) - - # fill area btw q01-q10, q90-q99, alpha=alpha[2] - ax.fill_between(dates, q01_data[:], q10_data[:], color=color, alpha=alpha[2]) - ax.fill_between(dates, q90_data[:], q99_data[:], color=color, alpha=alpha[2]) - - # plot time series data: - # find time series where hour = 0, 6, 12, 18 - th_ind = (time_series_data['hour'][:] == hours[c]) - t_data = time_series_data['value'][th_ind] # all years - - for n in range(nyears): - # plot data with black, small dots - ax.scatter(dates, t_data[n*365:(n+1)*365], s=scatter_size, c='k', marker='.') - c = c + 1 - - # plot rank histogram data: - ax2 = fig.add_subplot(spec[2,0]) - ax2.set_title('Rank histogram', fontsize=fs2) - ax2.bar(np.arange(100)+0.5, rank_data[4,6:]*100) # add + 0.5 to center bars - ax2.set_xlim(0,100) - ax2.set_xlabel('Percentile', fontsize=fs2) - ax2.set_ylabel('Relative frequency ', fontsize=fs2) - ax2.axhline(y=1, c='k', ls='--') # horizontal line at y=1.0 - ax2.xaxis.set_major_locator(ticker.FixedLocator([10,25,50,75,90])) - - # add a sixth axis for legends and text data: - ax3 = fig.add_subplot(spec[2,1]) - ax3.axis('off') - - # legend with two columns at the upper centre of the figure: - leg1 = ax3.legend(loc='upper center', handles=legend_elements1, fontsize=fs4) - leg2 = ax3.legend(loc='lower center', handles=legend_elements2, ncols=2, fontsize=fs4, title='Quantiles for observations', title_fontsize=fs4) - ax3.add_artist(leg1) - ax3.add_artist(leg2) - - # add text boxes with (1) rank histogram data and (2) explanation - rank_hist_text = r'$\bf{MSD}$:' + str(np.round(rank_data[4,4], ndigits)) + '\n' + r'$\bf{ p-value}$:' + str(np.round(rank_data[4,5], ndigits)) - - # add text boxes to axis in right lower corner - ax3.text(0.35, -0.3, rank_hist_text, bbox={'pad': 10, 'facecolor':'white'}, fontsize=fs3) - - # figure explanation: explaining what quantiles and model data are - expl_msd_pvalue = 'Mean Square Deviation (' + r'$\bf{MSD}$' + ' relative to flat histogram\n' + r'$\bf{p-value}$' + ' of MSD relative to sampling distribution\n' - expl_quantile='Quantiles are estimated based on observations at the station ' + station_id +' (period: 2010-2021)\n' - expl_model='Model data from experiment (' + exp_id + ') interpolated to location of this station ' - fig_text = expl_msd_pvalue + expl_quantile + expl_model - - # add subplot at the bottom of the figure, and add figure explanation as text to this subplot - ax4 = fig.add_subplot(spec[3,:]) - ax4.axis('off') - ax4.text(0.1, 0.4, fig_text, bbox={'pad':10, 'facecolor':'white'}, fontsize=fs4) - - # save figure: - if savefig: - plt.savefig(figname, dpi=300) - - -def read_quantiles_data(quantile_sel_file): - ''' - Reads quantile data from text file and returns a structured numpy array - Input: text file - Output:a structured numpy array: sid, lon, lat, day_of_year, hour, q01, q10, q25, q50, q75, q90, q99 - ''' - # Header line contains: - #station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real - - quantile_data = np.loadtxt(quantile_sel_file, skiprows=1, dtype={'names':('sid', 'lon', 'lat', 'day_of_year', 'hour', 'q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')}) - - return quantile_data - -def read_rank_data(rank_hists_file): - ''' - Reads rank histogram data binned into a 100 bins: 0-1%, 1-2%, ..., 99-100% - Input: text file - Output: numpy array (unstructured) first columns contain station id, longitude, latitude, hour, msd, p_value, and then follows the bins f00 = 0-1%, ..., f99=99-100% - ''' - - #Header line contains: - # station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer msd@body:real p_value@body:real f00@body:real f01@body:real f02@body:real f03@body:real f04@body:real f05@body:real f06@body:real f07@body:real f08@body:real f09@body:real f10@body:real f11@body:real f12@body:real f13@body:real f14@body:real f15@body:real f16@body:real f17@body:real f18@body:real f19@body:real f20@body:real f21@body:real f22@body:real f23@body:real f24@body:real f25@body:real f26@body:real f27@body:real f28@body:real f29@body:real f30@body:real f31@body:real f32@body:real f33@body:real f34@body:real f35@body:real f36@body:real f37@body:real f38@body:real f39@body:real f40@body:real f41@body:real f42@body:real f43@body:real f44@body:real f45@body:real f46@body:real f47@body:real f48@body:real f49@body:real f50@body:real f51@body:real f52@body:real f53@body:real f54@body:real f55@body:real f56@body:real f57@body:real f58@body:real f59@body:real f60@body:real f61@body:real f62@body:real f63@body:real f64@body:real f65@body:real f66@body:real f67@body:real f68@body:real f69@body:real f70@body:real f71@body:real f72@body:real f73@body:real f74@body:real f75@body:real f76@body:real f77@body:real f78@body:real f79@body:real f80@body:real f81@body:real f82@body:real f83@body:real f84@body:real f85@body:real f86@body:real f87@body:real f88@body:real f89@body:real f90@body:real f91@body:real f92@body:real f93@body:real f94@body:real f95@body:real f96@body:real f97@body:real f98@body:real f99@body:real - - rank_hists = np.loadtxt(rank_hists_file, skiprows=1) - - return rank_hists - -def read_time_series_data(sim_data_file): - ''' - Reads time series data - Input: text file - Output: structured numpy array: sid, lon, lat, year, day_of_year, hour, value - ''' - - # Header line contains - # station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer day_of_year@hdr:integer hour@hdr:integer value@body:real - - time_series_data = np.loadtxt(sim_data_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'year', 'day_of_year', 'hour', 'value'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'i4', 'f4')}) - - return time_series_data - -def main(config_file): - ''' - Main: - reads config files - reads input data: time_series_data, quantiles_data, rank_hist_data - calls plotting function and saves figure with figure name given in config file - ''' - ## Read from config file ## - config.read(config_file) - - # data to be plotted - variable = config['data']['variable'] - station_id = config['data']['station_id'] - lon = config['data']['longitude'] - lat = config['data']['latitude'] - year_beg = int(config['data']['year_beg']) - year_end = int(config['data']['year_end']) - month_beg = config['data']['month_beg'] - month_end = config['data']['month_end'] - - # input files - sim_file = config['input']['sim_data'] - quantile_file = config['input']['quantile_sel'] - rank_hist_file = config['input']['rank_hists'] - - # output files - figname = config['output']['fig_name'] - - ## Read input data ## - time_series_data = read_time_series_data(sim_file) - quantiles_data = read_quantiles_data(quantile_file) - rank_hist_data = read_rank_data(rank_hist_file) - - ## Plot data ## - plot_data(quantiles_data, rank_hist_data, time_series_data, variable, station_id, lon, lat, year_beg, year_end, month_beg, month_end, figname, True) - - -if __name__=='__main__': - if (len(sys.argv) < 2): - sys.exit("Error: config file must be added as 2nd argument") - elif (len(sys.argv) > 2): - sys.exit("Error: only add config file as argument") - config_file = sys.argv[1] - - main(config_file) diff --git a/SYNOP/STATS/python/plot_rankhist_sum_all_stations_synop.py b/SYNOP/STATS/python/plot_rankhist_sum_all_stations_synop.py deleted file mode 100644 index 4fbcc46a983e0deb38218391f91180a3f703556b..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/python/plot_rankhist_sum_all_stations_synop.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env python - -''' -Written by Madeleine Ekblom, 2023-09-12 -updates 2023-11-27: -- moved text from title to a new text box - -Based on Jouni Räisänen's script for plotting rank histogram summart statistics - -Example: -$ python3 plot_rankhist_sum_all_stations_synop.py rh_summary_file nstat p01 p1 p5 max_freq_p max_freq_q - -''' - -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) -import sys - -def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q,figname): - ''' - Input: p_freq, q_freq, number of stations, p01, p1, p5, max freq p, max freq q - Description: Plots p and q frequencies and saves the figure with figname - ''' - # add more variables and units when needed - variable_shortnames={'39': '2t'} - variable_units={'39': '$^\circ$C'} - - # variables to add, shortnames and units need to be checked - # total amount of clouds, 91 - # sea level pressure, 108 - # 1-hour precipitation, 80 - # relative humidity, 58 - # 10-minute precipitation intensity, 999 - # snow depth, 71 - # 2m temp, 39 --> 2t, degrees Celcius - # dew point temperature, 40 - # visibility, 6 - # wind direction, 111 - # max wind gust in last 10 mins,261 - # surface wind speed, 221 - - variable='39' # change to be an input value, as string - - # title - title='SYNOP: Rank histogram summary statistics for ' + variable_shortnames[variable] - - # text box information - text_box = 'Number of stations: ' + str(number_of_stations) + '\nFrequency, p<0.001: ' + str(p01) + '\nFrequency, p<0.01: ' + str(p1) + '\nFrequency, p<0.05: ' + str(p5) - - # these can also be input arguments if the number of bins are not constant - number_of_p_bins=20 - number_of_q_bins=100 - - # set up figure - fig = plt.figure(figsize=(8,12)) - gs = fig.add_gridspec(3, hspace=0.5) - ax1, ax2, ax3 = gs.subplots(sharex=False, sharey=False) - - # plotting details - fs1=20 # fontsize for title - fs2=14 # fontsize for labels, subplot titles, textbox - fs3=10 # fontsize for text explanation - - # Plot p values - ax1.set_title('Normalized p-value frequencies', fontsize=fs2) - ax1.set_xlabel('p-value', fontsize=fs2) - ax1.set_ylabel('Relative frequency', fontsize=fs2) - ax1.set_ylim([0,max_freq_p]) - ax1.bar(np.arange(number_of_p_bins)+0.5, number_of_p_bins*p_freq) # add +0.5 to center the bars - ax1.xaxis.set_major_locator(MultipleLocator(2)) - ax1.xaxis.set_minor_locator(MultipleLocator(1)) - p_xlabels = ax1.get_xticks() - ax1.set_xticks(p_xlabels, p_xlabels/number_of_p_bins) - ax1.set_xlim([0,number_of_p_bins]) - - # Plot q values - ax2.set_title('Normalized quantile frequencies', fontsize=fs2) - ax2.set_xlabel('Quantile (%)', fontsize=fs2) - ax2.set_ylabel('Relative frequency', fontsize=fs2) - ax2.bar(np.arange(number_of_q_bins)+0.5, number_of_q_bins*q_freq) # add +0.5 to center the bars - ax2.xaxis.set_major_locator(MultipleLocator(10)) - ax2.xaxis.set_minor_locator(MultipleLocator(5)) - ax2.set_ylim([0,max_freq_q]) - ax2.set_xlim([0,number_of_q_bins]) - - # remove axis from ax3 - ax3.axis('off') - - # add text box with additional infromation: - ax3.text(0.3, 0.6, text_box, bbox={'pad':10, 'facecolor':'white'}, fontsize=fs2) - - # explanation for figure - text_pvalue= 'Normalized p-values: Frequency distribution of p-values at\nall individual stations, normalized to give an expected value of 1.\n' - text_quantile='Normalized quantiles: Frequency distribution of model data\nrelative to quantiles of observations, averaged over all stations\nand divided by the expected value of 0.01.' - - text_expl = text_pvalue + text_quantile - - # add text box with explanation to the bottom of the figure - ax3.text(0.1, -0.05, text_expl, bbox={'pad':10, 'facecolor':'white'}, fontsize=fs3) - - # add title to figure - fig.suptitle(title, fontsize=fs1) - - # save figure - #figname='rank_hist_sumstats.png' - plt.savefig(figname, dpi=300) - -def read_data(rh_summary_file): - ''' - Input: path/to/rh_summary_file - Output: p_freq, q_freq - Description: reads in p frequencies and q frequencies - ''' - - # first line contains a header and first column contains hour - p_freq_all = np.loadtxt(rh_summary_file + '_p-freq.txt', skiprows=1) - q_freq_all = np.loadtxt(rh_summary_file + '_q-freq.txt', skiprows=1) - - # p_freq and q_freq contain data 00, 06, 12, 18, 24 (the last is used in these plots) - p_freq = p_freq_all[-1,1:] - q_freq = q_freq_all[-1,1:] - - return p_freq, q_freq - -if __name__=='__main__': - rh_summary_file = sys.argv[1] - number_of_stations = sys.argv[2] - p01 = sys.argv[3] - p1 = sys.argv[4] - p5 = sys.argv[5] - max_freq_p = float(sys.argv[6]) - max_freq_q = float(sys.argv[7]) - figname = sys.argv[8] - - p_freq, q_freq = read_data(rh_summary_file) - plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q,figname) - diff --git a/SYNOP/STATS/summary_rank_histograms_all_stations.sh b/SYNOP/STATS/summary_rank_histograms_all_stations.sh deleted file mode 100755 index 267b0e217dc0abfaf890598502873e5187ddf0cb..0000000000000000000000000000000000000000 --- a/SYNOP/STATS/summary_rank_histograms_all_stations.sh +++ /dev/null @@ -1,223 +0,0 @@ -#!/bin/bash -echo "Bash version ${BASH_VERSION}..." -echo "Python version" -which python -################################################################## -# -# Calculation and plotting of summary statistics for quantile -# space rank histograms: -# -# - a map of p-values -# - bar plot of p-value distribution + rank histogram -# averaged over all stations (*** only implemented in GrADS this far ***) -# -# The following need to be given as arguments: -# -# - variable -# - first year of raw data -# - last year of raw data -# - first month of raw data (default 1) -# - last month of raw data (default 12) -# -# As the input for this script, only the following is needed: -# -# 1) Rank histogram statistics files for all stations in a single file -# (for the time range specified by the script arguments) -# -# Execution (e.g): ./summary_rank_histograms_all_stations 39 2020 2020 1 12 -# ORIGINAL: -# Jouni Räisänen, August 2023 -# MODIFIED: -# Alexander Mahura, Sep-Oct 2023 -# -################################################################## -# -# Arguments: -# -# 1. Meteorological variable code -# -variable=$1 -echo " Meteorological variable code = $1" -# -# The following variables are included: -# -# 91 'total amount of clouds' -# 108 'sea level pressure' -# 80 '1-hour precipitation' -# 58 'relative humidity' -# 999 '10-minute precipitation intensity' -# 71 'snow depth' -# 39 '2m temperature' -# 40 'dew point temperature' -# 62 'visibility' -# 111 'wind direction' -# 261 'maximum wind gust in last 10 minutes' -# 221 'surface wind speed' -# -# 2.-3: First and last year -# -year1=$2 -year2=$3 -let nyears=year2-year1+1 -# -# 4.-5: First and last month -# -month1="${4:-1}" -month2="${5:-12}" -# -# Add zero to ahead of $month1 and $month2 for naming of files if needed? -# -if [ $month1 -le 9 ] -then - month1='0'${month1} -fi -if [ $month2 -le 9 ] -then - month2='0'${month2} -fi -# -################################################################## -# -# Add odb_api to $PATH -# On Puhti -#PATH="${PATH}":/projappl/project_2001011/jouni/to_alexander_220823/odb_api/bin -# -# On Lumi -#export PATH="/projappl/project_465000454/ama/software/odb_api/bin:$PATH" -# -# On Lumi -#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin -# -# On Lumi (asked Devaraju Narayanappa, CSC) -# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C -# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C - -# Define the python environment -# On Puhti -#export PATH="/projappl/project_2001011/madeleine/python_envs/post-pp/bin:$PATH" -# On Lumi -# module use /project/project_465000454/devaraju/modules/LUMI/23.03/ -# -################################################################## -# -echo " Compiling the Fortran program that produces the rank histogram summary statistics ..." -echo " fortran-programs/rank_histogram_summary_statistics.f95" - -#gfortran fortran-programs/rank_histogram_summary_statistics.f95 -o rank_histogram_summary_statistics - -################################################################## -# -# ----- File names. Hard-coded, at least this far ----- -# -# 1) Rank histogram directory and input name file name without extension -echo " Directory for rank histogram ..." -#rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} -rh_dir=$rootdir/SYNOP/output/scratch/$experiment/rank_histograms/${variable}_${year1}${month1}-${year2}${month2} -echo " $rh_dir" -rh_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations -rh_summary_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_summary - -# 2) Name of output file(s) without extension -out_file=${rh_dir}/rh_summary_${variable}_${year1}${month1}-${year2}${month2} - -# 3) Directory for figures -echo " Directory for figures ..." -#figure_dir=figures -figure_dir=$rootdir/SYNOP/output/figures/$experiment -echo " $figure_dir" - -################################################################## -# -# Convert the all-station ODB format rank histogram file to txt -# format for reading in Fortran -# -odb sql select \* -i ${rh_file}.odb -o ${rh_file}.txt -# -################################################################## -# -echo " Calculating the rank histogram summary statistics ..." -# -$SRC_OBSALL_DIR/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics < line_1 -read v1 v2 v3 number_of_stations < line_1 -head -2 ${rh_summary_file}_text_output | tail -1 > line_1 -read v1 v2 v3 v4 number_of_p_bins < line_1 -head -3 ${rh_summary_file}_text_output | tail -1 > line_1 -read v1 max_freq_p < line_1 -head -4 ${rh_summary_file}_text_output | tail -1 > line_1 -read v1 max_freq_q < line_1 -tail -3 ${rh_summary_file}_text_output | head -1 > line_1 -read v1 v2 v3 v4 v5 v6 p01 < line_1 -tail -2 ${rh_summary_file}_text_output | head -1 > line_1 -read v1 v2 v3 v4 v5 v6 p1 < line_1 -tail -1 ${rh_summary_file}_text_output > line_1 -read v1 v2 v3 v4 v5 v6 p5 < line_1 -numbers_of_p_bins_up=$( echo "(${number_of_p_bins} + 0.5)" | bc ) -echo $( echo "(${number_of_p_bins} + 0.5)" | bc ) > line_1 -read number_of_p_bins_up < line_1 -rm line_1 -# -###################################################################### -# -# Plotting in Python: -# Madeleine converted Jouni's grads-scripts to python -# 1 - Madeleine Ekblom, 2023-04-06 -# python/plot_p_values_map.py -# 2 - Madeleine Ekblom, 2023-09-12 -# python/plot_rank_hist_sum_all_stations.py -# -###################################################################### -# -echo " (1) Plotting - P-Values Summary on 2D map ..." -# -###################################################################### - -python_script=python/plot_p_values_map_synop.py - -#python3 ${python_script} ${rh_summary_file}_p-values.txt ${year1}${month1}-${year2}${month2} -python3 ${python_script} ${rh_summary_file}_p-values.txt ${year1}${month1}-${year2}${month2} ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png - -#mv p_values.png ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png -echo " Output file: ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png" - -##################################################################### -# -echo " (2) Plotting - Rank Histogram Summary Statistics ..." -# -##################################################################### - -python_script=python/plot_rankhist_sum_all_stations_synop.py - -#python3 ${python_script} ${rh_summary_file} ${number_of_stations} ${p01} ${p1} ${p5} ${max_freq_p} ${max_freq_q} ${figname} -python3 ${python_script} ${rh_summary_file} ${number_of_stations} ${p01} ${p1} ${p5} ${max_freq_p} ${max_freq_q} ${figure_dir}/rank-hist-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png - -#mv rank_hist_sumstats.png ${figure_dir}/rank-hist-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png -echo " Output file: ${figure_dir}/rank-hist-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png" - -###################################################################### -# -# Delete files that are not needed any more/ -# ! NOT NEEDED as converted to usage of python instead of grads -# -##################################################################### -#rm summary_plot_in_grads -#rm summary_statistics.ctl diff --git a/SYNOP/STATS/to_be_implemented/stats_job.bash b/SYNOP/STATS/to_be_implemented/stats_job.bash index 364ab432db03594c5588ea03a84ff073ac2eaa40..9ca4da904f9bcf46f6e880ffd8a414251f310fbc 100644 --- a/SYNOP/STATS/to_be_implemented/stats_job.bash +++ b/SYNOP/STATS/to_be_implemented/stats_job.bash @@ -1,12 +1,12 @@ #!/bin/bash -l -#SBATCH --job-name=synop # Job name -#SBATCH --output=synop_stats_test.eo # Name of stdout output file -#SBATCH --error=synop_stats_test.eo # Name of stderr error file +#SBATCH --job-name=synop_150 # Job name +#SBATCH --output=synop_stats_test_150G.eo # Name of stdout output file +#SBATCH --error=synop_stats_test_150G.eo # Name of stderr error file #SBATCH --partition=small # Partition (queue) name small, debug #SBATCH --ntasks=1 # One task (process) #SBATCH --cpus-per-task=1 # Number of cores (threads) -#SBATCH --mem-per-cpu=0 -#SBATCH --time=6:00:00 # Run time (hh:mm:ss) +#SBATCH --mem-per-cpu=90000 +#SBATCH --time=2:00:00 # Run time (hh:mm:ss) #SBATCH --account=project_465000454 # Project for billing # Script for running the statistics package as a batch job @@ -14,4 +14,24 @@ export PATH="/project/project_465000454/jlento/hadisd/hadisd-container-env/bin/:$PATH" echo 'Start statistical computations.' -python3 synop_stats.py + +if [ 1 -eq 1 ]; then # Test the playground version + python3 synop_stats_playground.py +fi + +if [ 0 -eq 1 ]; then # Test the implementation version + script=../../synop_stats.py + + variable=2t + timestring=2010 + experiment=a0gg + sim_infile=/projappl/project_465000454/data/OBSALL/SYNOP/output/ODB_data/a0gg/simulated_synop_data_2010.odb + msd_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/bootstrap_statistics/167_1yr/MSD_bootstrap_167_1yr_all_stations.odb + quant_file=/projappl/project_465000454/lautuppi/playground/synop_stats/test_input_files/quantiles_167_10stations.odb + #quant_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/quantiles/quantiles_167_all_stations.odb + station_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/station_lists/station_list_167_183_17_1990-2023_thinned.txt + obs_monmean_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/monthly_means/monthly_means_all_stations_167.odb + figure_output=/projappl/project_465000454/data/OBSALL/SYNOP/output/figures/$experiment + + python3 $script $variable $timestring $experiment $sim_infile $msd_file $quant_file $station_file $obs_monmean_file $figure_output +fi diff --git a/SYNOP/STATS/to_be_implemented/synop_stats.py b/SYNOP/STATS/to_be_implemented/synop_stats_playground.py similarity index 61% rename from SYNOP/STATS/to_be_implemented/synop_stats.py rename to SYNOP/STATS/to_be_implemented/synop_stats_playground.py index 05cc1945a19229c23c192ee8579fe77770f61952..fa5d1e2a0ce601588bbc8c7b1b789cf5b1f9fc3f 100644 --- a/SYNOP/STATS/to_be_implemented/synop_stats.py +++ b/SYNOP/STATS/to_be_implemented/synop_stats_playground.py @@ -1,5 +1,6 @@ import pandas as pd import pyodc as odc +import pingouin as pg import numpy as np import matplotlib.pyplot as plt import matplotlib.ticker as ticker @@ -41,11 +42,11 @@ def rank_histogram_one_station(sim_data_one_station,msd_data_one_station,quantil # Select 00, 06, 12 and 18 UTC and daily mean. utc00 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 0] utc00 = (utc00[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 - utc06 = sim_data_one_station[sim_data_one_station['hour@descr'] == 6] + utc06 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 6] utc06 = (utc06[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 - utc12 = sim_data_one_station[sim_data_one_station['hour@descr'] == 12] + utc12 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 12] utc12 = (utc12[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 - utc18 = sim_data_one_station[sim_data_one_station['hour@descr'] == 18] + utc18 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 18] utc18 = (utc18[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 year = 2010 quant_station_year = quantiles_one_station[quantiles_one_station['year@hdr'] == year] @@ -154,7 +155,7 @@ def plot_rank_histograms(rank_counts, msd, p_val,station_id,year,figure_output): plt.savefig(figure_output+'/test_rank_histograms'+station_id+'_'+str(year)+'.png',dpi=140) #plt.show() - plt.clf() + plt.close() def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,figure_output): # plot quantiles. @@ -196,7 +197,7 @@ def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,fi ax1.fill_between(dates, q01[q01["hour@hdr"]==0].iloc[:,1], q10[q10["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[2]) ax1.fill_between(dates, q90[q90["hour@hdr"]==0].iloc[:,1], q99[q99["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[2]) ax1.plot(dates, q50[q50["hour@hdr"]==0].iloc[:,1], 'r-') - ax1.scatter(dates, utc00["value@descr"], s=scatter_size, c='k', marker='.') + ax1.scatter(dates, utc00["value@body"], s=scatter_size, c='k', marker='.') ax1.set_xlim(dates[0], dates[-1]) ax1.xaxis.set_major_locator(MonthLocator(months)) ax1.xaxis.set_major_formatter(DateFormatter('%b')) @@ -212,7 +213,7 @@ def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,fi ax2.fill_between(dates, q01[q01["hour@hdr"]==6].iloc[:,1], q10[q10["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[2]) ax2.fill_between(dates, q90[q90["hour@hdr"]==6].iloc[:,1], q99[q99["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[2]) ax2.plot(dates, q50[q50["hour@hdr"]==6].iloc[:,1], 'r-') - ax2.scatter(dates, utc06["value@descr"], s=scatter_size, c='k', marker='.') + ax2.scatter(dates, utc06["value@body"], s=scatter_size, c='k', marker='.') ax2.set_xlim(dates[0], dates[-1]) ax2.xaxis.set_major_locator(MonthLocator(months)) ax2.xaxis.set_major_formatter(DateFormatter('%b')) @@ -227,7 +228,7 @@ def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,fi ax3.fill_between(dates, q01[q01["hour@hdr"]==12].iloc[:,1], q10[q10["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[2]) ax3.fill_between(dates, q90[q90["hour@hdr"]==12].iloc[:,1], q99[q99["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[2]) ax3.plot(dates, q50[q50["hour@hdr"]==12].iloc[:,1], 'r-') - ax3.scatter(dates, utc12["value@descr"], s=scatter_size, c='k', marker='.') + ax3.scatter(dates, utc12["value@body"], s=scatter_size, c='k', marker='.') ax3.set_xlim(dates[0], dates[-1]) ax3.xaxis.set_major_locator(MonthLocator(months)) ax3.xaxis.set_major_formatter(DateFormatter('%b')) @@ -243,7 +244,7 @@ def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,fi ax4.fill_between(dates, q01[q01["hour@hdr"]==18].iloc[:,1], q10[q10["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[2]) ax4.fill_between(dates, q90[q90["hour@hdr"]==18].iloc[:,1], q99[q99["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[2]) ax4.plot(dates, q50[q50["hour@hdr"]==18].iloc[:,1], 'r-') - ax4.scatter(dates, utc18["value@descr"], s=scatter_size, c='k', marker='.') + ax4.scatter(dates, utc18["value@body"], s=scatter_size, c='k', marker='.') ax4.set_xlim(dates[0], dates[-1]) ax4.xaxis.set_major_locator(MonthLocator(months)) ax4.xaxis.set_major_formatter(DateFormatter('%b')) @@ -278,7 +279,7 @@ def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,fi plt.savefig(figure_output+'/test_quantiles'+station_id+'_'+str(year)+'.png',dpi=140) #plt.show() - plt.clf() + plt.close() def plot_p_values(station_data, p_values, experiment, variable, timestring, figure_output): ''' @@ -382,7 +383,7 @@ def plot_p_values(station_data, p_values, experiment, variable, timestring, figu ax4.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') plt.savefig(figname, dpi=140) #plt.show() - plt.clf() + plt.close() def plot_summary_rank_histograms(rank_histograms,p_values,experiment,variable,timestring,figure_output): # Level 3, (see summary_rank_histograms_all_stations()) @@ -498,38 +499,299 @@ def plot_summary_rank_histograms(rank_histograms,p_values,experiment,variable,ti plt.savefig(figure_output+'/rank_hist_summary_'+timestring+'.png',dpi=140) #plt.show() - plt.clf() + plt.close() -def plot_bias_p_values_maps_ttest_synop(): - # Level 3.3.1 - # Convert Jouni's script plot_bias_p_values_maps_t-test_synop.py here - print('Not implemented!') +def plot_bias(bias, figure_output, timestring): + lon_min=-180 + lon_max=180 + lat_min=-90 + lat_max=90 + + colors = ['darkviolet', 'blueviolet', 'indigo', 'darkblue', 'blue', 'deepskyblue', 'aquamarine', 'gold', 'orange', 'orangered', 'red', 'darkred', 'deeppink', 'magenta'] + limits_bias = [-99, -12, -8, -4, -2, -1, -0.5, 0, 0.5, 1, 2, 4, 8, 12, 99] + + fig = plt.figure(figsize=(19,11), layout='constrained') + spec = fig.add_gridspec(2,2) + fig_title='Bias ($^oC$) of yearly mean T2m against observed climate: Experiment a0gg, Variable T2m, Year 2010' + fig.suptitle(fig_title, fontsize=16) + + ax1 = fig.add_subplot(spec[0,0],projection=ccrs.PlateCarree()) + ax1.coastlines() + ax1.set_title('00 UTC') + ax1.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax1.gridlines(draw_labels=True) -#def plot_standard_plots_one_station(): - # Level 3.4.1 (see produce_standard_plots_all_stations()) - # plot 4 time series and 1 rank histogram - # Combine plots_for_one_station and plot_quantiles_rankhist.py + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias00 = bias[bias['hour@hdr']==0] + b = bias00['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 -def plot_summary_rank_histogram(): - # Level 3, (see summary_rank_histograms_all_stations()) - # Put all rank histogram data to one rank histogram. Look at rank_histogram_summary_statistics - # p-values and quantiles - print('Not implemented!') + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax1.scatter(bias00['longitude@hdr'][bias_ind], bias00['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax1.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + ax2 = fig.add_subplot(spec[0,1],projection=ccrs.PlateCarree()) + ax2.coastlines() + ax2.set_title('06 UTC') + ax2.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax2.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias06 = bias[bias['hour@hdr']==6] + b = bias06['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax2.scatter(bias06['longitude@hdr'][bias_ind], bias06['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax2.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + ax3 = fig.add_subplot(spec[1,0],projection=ccrs.PlateCarree()) + ax3.coastlines() + ax3.set_title('12 UTC') + ax3.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax3.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias12 = bias[bias['hour@hdr']==12] + b = bias12['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax3.scatter(bias12['longitude@hdr'][bias_ind], bias12['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax3.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + ax4 = fig.add_subplot(spec[1,1],projection=ccrs.PlateCarree()) + ax4.coastlines() + ax4.set_title('18 UTC') + ax4.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax4.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias18 = bias[bias['hour@hdr']==18] + b = bias18['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax4.scatter(bias18['longitude@hdr'][bias_ind], bias18['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax4.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + #ax.set_title('Bias: ' + header_string, fontsize=12, fontweight='bold') + plt.savefig(figure_output+'/bias_map_'+timestring+'.png',dpi=140) + #plt.show() +def plot_ttest_summary_histograms(ttest_results_ll, figure_output, timestring): + p_values = ttest_results_ll[['station@hdr','longitude@hdr','latitude@hdr','hour@hdr','p-val']] + number_of_p_bins=20 + p_val_bins = np.arange(number_of_p_bins)/number_of_p_bins + rank_counts = np.zeros((number_of_p_bins+1,4)) + rank_counts_norm = rank_counts + fig_title = 'Summary of p-values of t-tests of stationwise monthly means' -################### Process functions. ############################## -# This is the call order in the original shell scripts. + #p_vals_only = p_values['p-val'] + for i in range(4): + hour=i*6 + p_vals = p_values[p_values['hour@hdr']==hour] + p_vals_only = p_vals['p-val'] + #print(p_vals) + for j, pval in enumerate(p_vals_only): + ranks = np.searchsorted(p_val_bins, pval, side='right') + rank_counts[ranks,i] += 1 + rank_counts_norm[:,i] = 100*rank_counts[:,i]/sum(rank_counts[:,i]) + + fig = plt.figure(figsize=(12,6), layout='constrained') + spec = fig.add_gridspec(2,2) + fig.suptitle(fig_title, fontsize=16) + + ax1 = fig.add_subplot(spec[0,0]) + ax1.set_title('Normalized p-value frequencies 00 UTC') + ax1.bar(p_val_bins,rank_counts_norm[1:,0],width=0.05,align='edge',color='#016c59') + ax1.set_xlim(-0.01,1.01) + ax1.xaxis.set_major_locator(MultipleLocator(0.2)) + ax1.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax1.set_xlabel('P-value') + ax1.set_ylabel('% of stations') + + ax2 = fig.add_subplot(spec[0,1]) + ax2.set_title('Normalized p-value frequencies 06 UTC') + ax2.bar(p_val_bins,rank_counts_norm[1:,1],width=0.05,align='edge',color='#016c59') + ax2.set_xlim(-0.01,1.01) + ax2.xaxis.set_major_locator(MultipleLocator(0.2)) + ax2.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax2.set_xlabel('P-value') + ax2.set_ylabel('% of stations') + + ax3 = fig.add_subplot(spec[1,0]) + ax3.set_title('Normalized p-value frequencies 12 UTC') + ax3.bar(p_val_bins,rank_counts_norm[1:,2],width=0.05,align='edge',color='#016c59') + ax3.set_xlim(-0.01,1.01) + ax3.xaxis.set_major_locator(MultipleLocator(0.2)) + ax3.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax3.set_xlabel('P-value') + ax3.set_ylabel('% of stations') + + ax4 = fig.add_subplot(spec[1,1]) + ax4.set_title('Normalized p-value frequencies 18 UTC') + ax4.bar(p_val_bins,rank_counts_norm[1:,3],width=0.05,align='edge',color='#016c59') + ax4.set_xlim(-0.01,1.01) + ax4.xaxis.set_major_locator(MultipleLocator(0.2)) + ax4.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax4.set_xlabel('P-value') + ax4.set_ylabel('% of stations') + + #plt.savefig('test_figures/t-test_pval_histograms.png',dpi=140) + plt.savefig(figure_output+'/t-test_pval_histograms_'+timestring+'.png',dpi=140) + #plt.show() + +def plot_ttest_summary_maps(ttest_results, figure_output, timestring): + p_values = ttest_results[['station@hdr','longitude@hdr','latitude@hdr','hour@hdr','p-val']] + lon_min=-180 + lon_max=180 + lat_min=-90 + lat_max=90 + + #figname = figure_output + '/p_vals_SYNOP_'+variable+'_rankhist_'+experiment+'_'+timestring+'.png' + figname = figure_output+'/t-test_pval_maps_'+timestring+'.png' + + colors = ['darkviolet', 'blue', 'skyblue', 'lime', 'yellow', 'orange', 'red', 'grey'] + limits = [0,0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,1] + + fig = plt.figure(figsize=(19,11), layout='constrained') + spec = fig.add_gridspec(2,2) + #fig_title='P-values for SYNOP rank histograms against observed climate: Experiment ' + experiment + ', Variable ' + variable + ', Year ' + timestring + fig_title='P-values for SYNOP monthly means against observed climate: Experiment a0gg, Variable T2m, Year 2010' + fig.suptitle(fig_title, fontsize=16) + + p_vals = p_values[p_values['hour@hdr']==0] + p_vals_only = p_vals['p-val'].to_numpy() + ax1 = fig.add_subplot(spec[0,0],projection=ccrs.PlateCarree()) + ax1.set_title('00 UTC') + ax1.coastlines() + ax1.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax1.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + #print(p_mod) + for i in range(len(p_vals_only)): + #print(i) + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax1.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax1.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + p_vals = p_values[p_values['hour@hdr']==6] + p_vals_only = p_vals['p-val'].to_numpy() + ax2 = fig.add_subplot(spec[0,1],projection=ccrs.PlateCarree()) + ax2.set_title('06 UTC') + ax2.coastlines() + ax2.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax2.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + for i in range(len(p_vals_only)): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax2.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax2.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + p_vals = p_values[p_values['hour@hdr']==12] + p_vals_only = p_vals['p-val'].to_numpy() + ax3 = fig.add_subplot(spec[1,0],projection=ccrs.PlateCarree()) + ax3.set_title('12 UTC') + ax3.coastlines() + ax3.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax3.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + for i in range(len(p_vals_only)): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax3.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax3.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + p_vals = p_values[p_values['hour@hdr']==18] + p_vals_only = p_vals['p-val'].to_numpy() + ax4 = fig.add_subplot(spec[1,1],projection=ccrs.PlateCarree()) + ax4.set_title('18 UTC') + ax4.coastlines() + ax4.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax4.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + for i in range(len(p_vals_only)): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax4.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax4.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + plt.savefig(figname, dpi=140) + #plt.show() def read_synop_odb(sim_infile): # Read the odb file containing simulated SYNOP obs. sim = odc.read_odb(sim_infile, single = True, columns = ['year@hdr', 'month@hdr', 'day@hdr', 'hour@hdr', 'longitude@hdr', 'latitude@hdr', 'variable@hdr', 'station@hdr', 'value@body']) return sim - -#def read_stations(): - # Read SYNOP station list. -def calculate_monthly_means_sim(sim_raw): +def calculate_monthly_means_sim(sim_raw): # Let's do yearly means for now and come back to this later. # Level 2.1 # Compute monthly means for all stations from the simulated data. # Loop over stations @@ -547,6 +809,11 @@ def calculate_monthly_means_sim(sim_raw): sim_monmean["value@body"] -= 273.15 return sim_monmean +def calculate_yearly_means_sim(sim_raw): + sim_yearmean = (sim_raw.groupby(["year@hdr", "hour@hdr", "station@hdr"], as_index=False).agg({"value@body": "mean"})) + sim_yearmean["value@body"] -= 273.15 # Kelvin to Celsius + return sim_yearmean + def read_MSD_stats(msd_file): # Read all climatological Mean Squared Deviation bootstrap statistics for production of rank histograms. msd = odc.read_odb(msd_file, single = True, columns = ['station@hdr','longitude@hdr','latitude@hdr','elevation@hdr','realization@hdr','msd00@body','msd06@body','msd12@body','msd18@body','msd24@body']) @@ -568,6 +835,31 @@ def read_quantiles(quant_file): quant = odc.read_odb(quant_file, single=True, columns=all_columns) return quant +def read_obs_monmeans(obs_monmean_file): + # Obs values already in Celsius. Remove all stupid temperature values. + obs = odc.read_odb(obs_monmean_file, single = True, columns = ['station@hdr', 'longitude@hdr', 'latitude@hdr', 'year@hdr', 'month@hdr', 'hour@hdr', 'value@body']) + obs.where(obs['value@body'] > -150, np.nan, inplace = True) + obs.where(obs['value@body'] < 150, np.nan, inplace = True) + obs.dropna(how='any', inplace = True) + obs["station@hdr"] = obs["station@hdr"].astype(int).astype(str).str.zfill(6) # Force the station ids to be six char strings. + return obs + +def calculate_yearly_means_obs(obs): + obs_yearmean = (obs.groupby(["year@hdr", "hour@hdr", "station@hdr"], as_index=False).agg({"value@body": "mean"})) + return obs_yearmean + +def calculate_allyear_mean(obs): + obs_allyearmean = (obs.groupby(["hour@hdr", "station@hdr"], as_index=False).agg({"value@body": "mean"})) + return obs_allyearmean + +def calculate_bias(yearly_means_sim, obs_allyearmean, coords): + sim_and_obs_allyearmean = yearly_means_sim.merge(obs_allyearmean, on=['hour@hdr', 'station@hdr'], suffixes=('_df1', '_df2')) + sim_and_obs_allyearmean['bias'] = sim_and_obs_allyearmean['value@body_df1'] - sim_and_obs_allyearmean['value@body_df2'] + bias_tmp = sim_and_obs_allyearmean[['hour@hdr', 'station@hdr', 'bias']] + bias = bias_tmp.merge(coords, on = ["station@hdr"]) + bias["longitude@hdr"] = bias["longitude@hdr"].apply(lambda lon: lon - 360 if lon > 180 else lon) + return bias + def produce_rank_histograms_all_stations(sim_data,msd_data,quantiles,station_list,figure_output): # Level 2.2 # Loop over all stations. @@ -578,26 +870,51 @@ def produce_rank_histograms_all_stations(sim_data,msd_data,quantiles,station_lis sim_data['station@hdr'] = sim_data['station@hdr'].str.strip() msd_data['station@hdr'] = msd_data['station@hdr'].astype(str).str.strip() quantiles['station@hdr'] = quantiles['station@hdr'].astype(str).str.strip() - for i in range(10): - #for i in range(len(station_list)): - station_id =station_list[i].strip() # prepend zeros if length <6 chars. (???) (Test if it breaks anything or not.) + for i in range(10): # For testing with 10 first stations. + #for i in range(len(station_list)): # For running the full list of 541 stations. + station_id =station_list[i].strip() # prepend zeros if length <6 chars. sim_data_one_station = sim_data[sim_data['station@hdr'] == station_id] msd_data_one_station = msd_data[msd_data['station@hdr'] == station_id.lstrip('0')] quantiles_one_station = quantiles[quantiles['station@hdr'] == station_id.lstrip('0')] print('Processing rank histograms and quantile plots for station ', station_id) rank_histogram,p_val = rank_histogram_one_station(sim_data_one_station,msd_data_one_station,quantiles_one_station,station_id,figure_output) - #print(p_val) rank_histograms_all_stations[i,:,:] = rank_histogram[:,:] p_vals_all_stations[i,:]=p_val[:] #np.savetxt('test_output_files/HadISD_p_vals_2010.txt',p_vals_all_stations) return rank_histograms_all_stations,p_vals_all_stations -def ttest_all_stations(): - # Level 2.3 - # Juha's example does t-tests for all stations at once - # -->> plot_bias_p_values_maps_t-test_synop from Jouni's original python scripts - print('Not implemented!') +def calculate_ttests(sim_yearmean, obs_yearmean, coords): + results = [] + + # Get unique station and hour combinations + stations = sim_yearmean["station@hdr"].unique() + #stations = stations[:5] + hours = [0, 6, 12, 18] + + # Perform t-tests for each station and hour + for station in stations: + for hour in hours: + obs_values = obs_yearmean.loc[ + (obs_yearmean["station@hdr"] == station) & (obs_yearmean["hour@hdr"] == hour), + "value@body" + ] + sim_values = sim_yearmean.loc[ + (sim_yearmean["station@hdr"] == station) & (sim_yearmean["hour@hdr"] == hour), + "value@body" + ] + + if len(obs_values) > 1 and len(sim_values) > 0: + #print(obs_values, sim_values) + test_result = pg.ttest(obs_values, sim_values) + test_result.insert(0, "station@hdr", station) + test_result.insert(1, "hour@hdr", hour) + results.append(test_result[["station@hdr","hour@hdr","T","p-val"]]) + + t_test_results = pd.concat(results, ignore_index=True) if results else pd.DataFrame() + t_test_results_ll = pd.merge(t_test_results, coords, how = "left", on = ["station@hdr"]) + t_test_results_ll["longitude@hdr"] = t_test_results_ll["longitude@hdr"].apply(lambda lon: lon - 360 if lon > 180 else lon) + return t_test_results_ll def produce_standard_plots_all_stations(): # Level 2.4 @@ -614,22 +931,26 @@ def produce_standard_plots_all_stations(): ##################### The main function. ########################### def main(): - # Level 1 + variable = '2t' + timestring = '2010' + experiment = 'a0gg' sim_infile='/projappl/project_465000454/data/OBSALL/SYNOP/output/ODB_data/a0gg/simulated_synop_data_2010.odb' msd_file='/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/bootstrap_statistics/167_1yr/MSD_bootstrap_167_1yr_all_stations.odb' - quant_file='test_input_files/quantiles_167_10stations.odb' - #quant_file='/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/quantiles/quantiles_167_all_stations.odb' - monthly_means_outfile='test_output_files/monthly_means_synop.odb' - figure_output='test_figures' + ###### Smaller test file that contains T2m quantiles for the first 10 stations only: + #quant_file='/projappl/project_465000454/lautuppi/playground/synop_stats/test_input_files/quantiles_167_10stations.odb' + ###### The "real" T2m quantile file for all 541 SYNOP stations: + quant_file='/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/quantiles/quantiles_167_all_stations.odb' + #monthly_means_outfile='test_output_files/monthly_means_synop.odb' # Maybe we don't need to write these anywhere after all. + ###### A place for figures: + #figure_output='/projappl/project_465000454/lautuppi/playground/synop_stats/test_figures' # Experimental test figures + ###### A more appropriate place for figures: + figure_output='/projappl/project_465000454/data/OBSALL/SYNOP/output/figures/'+experiment print('Loading simulated SYNOP data for statistical computations ...') sim_data=read_synop_odb(sim_infile) print('... Done!') station_list = sorted(sim_data['station@hdr'].head(541)) ##read_stations() # Do I need this or do I get what I need from the ODB file??? - print('Computing monthly means from simulated SYNOP data ...') - monthly_means=calculate_monthly_means_sim(sim_data) - print('... Done!') - #write_monthly_means(monthly_means, monthly_means_outfile) # Look at how to write odb with python + #write_monthly_means(monthly_means, monthly_means_outfile) # Maybe we just make the plots and that'll be it. print('Loading MSD statistics and quantiles ...') msd_stats=read_MSD_stats(msd_file) quantiles=read_quantiles(quant_file) @@ -638,22 +959,43 @@ def main(): rank_histograms_all_stations,p_vals_all_stations = produce_rank_histograms_all_stations(sim_data,msd_stats,quantiles,station_list,figure_output) print('... Done!') print('Plot all rank histogram p-values on a map ...') - station_data = np.loadtxt('/projappl/project_465000454/data/OBSALL/SYNOP/input/HadISD_tmp/hadisd_data_to_Alexander/station_lists/station_list_167_183_17_1990-2023_thinned.txt', skiprows=1) - variable = '2t' - timestring = '2010' - experiment = 'a0gg' + # It is possible to extract the station coordinates from the dataframes revolving around as well, but I was too lazy this time. + station_data = np.loadtxt('/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/station_lists/station_list_167_183_17_1990-2023_thinned.txt', skiprows=1) plot_p_values(station_data,p_vals_all_stations,experiment,variable,timestring,figure_output) print('... Done!') print('Plot summary rank histograms and p-value histograms ...') #np.save('test_output_files/rank_histograms_all_stations.npy',rank_histograms_all_stations) plot_summary_rank_histograms(rank_histograms_all_stations,p_vals_all_stations,experiment,variable,timestring,figure_output) print('... Done!') + print('Computing yearly means from simulated SYNOP data ...') # Yearly means for now. A foothold for expanding to monthly means remains. + yearly_means_sim=calculate_yearly_means_sim(sim_data) + print('... Done!') + print('Reading observed monthly means and computing yearly means from them ...') + obs_monmean_file = '/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/monthly_means/monthly_means_all_stations_167.odb' + obs_monmeans = read_obs_monmeans(obs_monmean_file) + obs_yearmeans = calculate_yearly_means_obs(obs_monmeans) + obs_allyearmean = calculate_allyear_mean(obs_monmeans) + coords = sim_data[["station@hdr","longitude@hdr","latitude@hdr"]].drop_duplicates() + print('... Done!') + print('Compute stationwise biases of the selected yearly means relative to the observed climatology ...') + biases = calculate_bias(yearly_means_sim, obs_allyearmean, coords) + print('... Done!') + print('Plot biases on maps ...') + plot_bias(biases, figure_output, timestring) + print('... Done!') + print('Compute t-tests for the yearly means ...') + ttest_results = calculate_ttests(yearly_means_sim, obs_yearmeans, coords) + print('... Done!') + print('Plot p-value summary histograms of the yearly mean t-tests ...') + plot_ttest_summary_histograms(ttest_results, figure_output, timestring) + print('... Done!') + print('Plot the p-values of the yearly mean t-tests on maps ...') + plot_ttest_summary_maps(ttest_results, figure_output, timestring) + print('... Done!') + print('----- All steps of SYNOP statistical monitoring completed. -----') #ttest_all_stations() #produce_standard_plots_all_stations() #summary_rank_histograms_all_stations() if __name__=='__main__': - # - # - # main() diff --git a/SYNOP/interp_synop.py b/SYNOP/interp_synop.py index 1880ed7e21073fe04fd056ef555c9500535d1e09..71bd8c533f8fefd6fed01204bf24c20d7700db9b 100644 --- a/SYNOP/interp_synop.py +++ b/SYNOP/interp_synop.py @@ -44,7 +44,8 @@ def write_out_v1(surf_profs,target_lons,target_lats,stat_id,outf,year,month,day, line_i=np.zeros(6) line_r=np.zeros(3) f=open(outf,'a+') - f.write('year@descr:integer month@descr:integer day@descr:integer hour@descr:integer minute@descr:integer second@descr:integer longitude@descr:real latitude@descr:real value@descr:real variable@descr:integer station@descr:string'+ '\n') + #f.write('year@descr:integer month@descr:integer day@descr:integer hour@descr:integer minute@descr:integer second@descr:integer longitude@descr:real latitude@descr:real value@descr:real variable@descr:integer station@descr:string'+ '\n') + f.write('year@hdr:integer month@hdr:integer day@hdr:integer hour@hdr:integer minute@hdr:integer second@hdr:integer longitude@hdr:real latitude@hdr:real value@body:real variable@hdr:integer station@hdr:string'+ '\n') for i in range(shape_surf[1]): line_i[0]=year #1990 # year@descr:integer line_i[1]=month #1 # month@descr:integer diff --git a/SYNOP/main_synop.sh b/SYNOP/main_synop.sh index d41847cedb8575a0d59811e4a16ab5a71b918bd0..56af8a832bb07db01de082f1142a97e86e89c916 100755 --- a/SYNOP/main_synop.sh +++ b/SYNOP/main_synop.sh @@ -49,7 +49,7 @@ echo " STEP #0 : PRE-CHEKING EXISTANCE OF NECESSARY DIRECTORIES ..." echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" pwd echo " Directory for pre-processing mod data extracted with gsv ..." -export inp_dir_gsvmoddata=$rootdir/SYNOP/output/scratch/$experiment/GSVMODDATA #"GSVMODDATA" +export inp_dir_gsvmoddata=$rootdir/SYNOP/output/scratch/$experiment/GSVMODDATA echo "$inp_dir_gsvmoddata" if test -d "$inp_dir_gsvmoddata"; then echo $inp_dir_gsvmoddata exists @@ -57,11 +57,10 @@ else echo $inp_dir_gsvmoddata does not exist echo " Creating directory: $inp_dir_gsvmoddata" mkdir -p $inp_dir_gsvmoddata - #cp SYNOP/GSVMODDATA/*.nc $inp_dir_gsvmoddata # for testing fi echo " Directory for processing observation data ..." -export inp_dir_dataobs=$rootdir/SYNOP/output/scratch/$experiment/DATAOBS #"DATAOBS" +export inp_dir_dataobs=$rootdir/SYNOP/output/scratch/$experiment/DATAOBS echo "$inp_dir_dataobs" if test -d "$inp_dir_dataobs"; then echo $inp_dir_dataobs exists @@ -72,7 +71,7 @@ else fi echo " Directory for processing modeled data ..." -export inp_dir_datamod=$rootdir/SYNOP/output/scratch/$experiment/DATAMOD #"DATAMOD" +export inp_dir_datamod=$rootdir/SYNOP/output/scratch/$experiment/DATAMOD echo "$inp_dir_datamod" if test -d "$inp_dir_datamod"; then echo $inp_dir_datamod exists @@ -88,7 +87,7 @@ echo $inp_dir_data_ODB if test -d "$inp_dir_data_ODB"; then echo $inp_dir_data_ODB exists if [ $restart == 'true' ]; then - rm $rootdir/SYNOP/output/ODB_data/$experiment/simulated_synop_data.odb + rm $rootdir/SYNOP/output/ODB_data/$experiment/simulated_synop_data* #.odb echo 'Restarting means removing the old simulated synop observations!' fi else @@ -98,7 +97,7 @@ else fi echo " Directory for saving graphical output ..." -export inp_dir_graphs=$rootdir/SYNOP/output/figures/$experiment #"GRAPHS" +export inp_dir_graphs=$rootdir/SYNOP/output/figures/$experiment echo "$inp_dir_graphs" if test -d "$inp_dir_graphs"; then echo $inp_dir_graphs exists @@ -112,18 +111,6 @@ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo " ..... STEP 0 - COMPLETED ....." echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo " " -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo " STEP #1 : PRE-PROCESSING MOD DATA EXTRACTED WITH GSV_INTERFACE ..." -echo " SCRIPT --- gsv_mod_data.sh" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - -#./gsv_mod_data.sh - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo " ..... STEP 1 - COMPLETED ....." -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo "====================================================================" echo " SYNOP : START LOOP OVER ALL AVAILABLE TIME-SLICES " echo "====================================================================" @@ -146,26 +133,16 @@ while [ $cur_date -le $edate ]; do done echo 'List of start dates created!' -#tmp_file_tstamps=$rootdir/SYNOP/output/scratch/$experiment/file_with_tstamps.txt #"file_with_tstamps.txt" -#ls $inp_dir_gsvmoddata > $tmp_file_tstamps -#tail -n 1 "$tmp_file_tstamps" | tee >(wc -c | xargs -I {} truncate "$tmp_file_tstamps" -s -{}) - -#nrec_file=$( sed -n '$=' $tmp_file_tstamps) -#echo "nrec_file: $nrec_file" - nrec_date=$( sed -n '$=' $tmp_file_dates ) # number of dates. echo 'Number of chunks to be processed:' $nrec_date for (( nnrec=1; nnrec<=$nrec_date; nnrec++ )) do # loop over dates - #echo 'hep------------' - #echo cdate=$( sed -n "${nnrec}p" $tmp_file_dates ) cdate_s=$( echo $cdate | cut -c 1-8 ) echo '#### Current date ####' $cdate + echo 'Get the GSV data ...' ./gsv_mod_data.sh $cdate tmp_file_tstamps=$rootdir/SYNOP/output/scratch/$experiment/file_with_tstamps.txt ls $inp_dir_gsvmoddata/$cdate_s > $tmp_file_tstamps - #sed -i '/2t_r360x180\.nc/d' $tmp_file_tstamps - #sed -i -e "/$cdate_s/!d" $tmp_file_tstamps tail -n 1 "$tmp_file_tstamps" | tee >(wc -c | xargs -I {} truncate "$tmp_file_tstamps" -s -{}) nrec_file=$( sed -n '$=' $tmp_file_tstamps) echo $nrec_file '......................' @@ -174,9 +151,6 @@ for (( nnrec=1; nnrec<=$nrec_date; nnrec++ )) do # loop over dates b_mm=$( head -n 1 $tmp_file_tstamps | cut -c 5-6 ) b_dd=$( head -n 1 $tmp_file_tstamps | cut -c 7-8 ) b_hh=$( head -n 1 $tmp_file_tstamps | cut -c 9-10 ) - #echo "b_yy, b_mm, b_dd, b_hh : $b_yy $b_mm $b_dd $b_hh" - #tail -n +2 "$tmp_file_tstamps" > "$tmp_file_tstamps.tmp" && mv "$tmp_file_tstamps.tmp" "$tmp_file_tstamps" - # see line above (it is added at the bottom of the script) echo "====================================================================" echo " START CALCULATIONS FOR TIME-SLICE: $b_yy $b_mm $b_dd $b_hh " echo "====================================================================" @@ -186,11 +160,8 @@ for (( nnrec=1; nnrec<=$nrec_date; nnrec++ )) do # loop over dates echo " STEP #2 : EXTRACTING SYNOP OBS DATA FROM ODB ..." echo " SCRIPT --- synop_obs.sh" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - #./synop_obs.sh 2020 01 20 00 00 00 39 21 24 61 64 - #./synop_obs.sh 2020 01 20 15 00 00 $varMETnum $lon_val_min $lon_val_max $lat_val_min $lat_val_max b_min="00" b_sec="00" - pwd ./synop_obs.sh $b_yy $b_mm $b_dd $b_hh $b_min $b_sec $varMETnum $lon_val_min $lon_val_max $lat_val_min $lat_val_max echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" @@ -203,12 +174,8 @@ for (( nnrec=1; nnrec<=$nrec_date; nnrec++ )) do # loop over dates echo " AND ADDING SUCH INTERPOLATED MOD DATA TO ODB ..." echo " SCRIPT --- synop_mod.sh" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - #./synop_mod.sh 2020012000_2t_r360x180.nc 2020012000 2t - #./synop_mod.sh 2020012015_2t_r360x180.nc 2020012015 $varMETstr - #gsv_extr_filename="2t_r360x180.nc" - #export gsv_extr_filename # export from load_modules.sh + b_2t=$gsv_extr_filename - #b_2t="_2t_r360x180.nc" b_yyyymmddhh=$b_yy$b_mm$b_dd$b_hh echo "b_2t, b_yyyymmddhh = $b_2t $b_yyyymmddhh" b_fname_yyyymmddhh_2t=${b_yyyymmddhh}_$b_2t @@ -219,19 +186,16 @@ for (( nnrec=1; nnrec<=$nrec_date; nnrec++ )) do # loop over dates echo " ..... STEP 3 - COMPLETED ....." echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo " STEP #4 : SELF-CONTROL BY PLOTTING DIFFEENCE OBS VS MOD ..." - echo " SCRIPT --- graph_mod_obs.py" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - #python graph_mod_obs.py 2020012000 2t - #python graph_mod_obs.py 2020012015 $varMETstr - #python graph_mod_obs.py $inp_dir_dataobs/ $inp_dir_datamod/ $inp_dir_graphs/ $b_yyyymmddhh $varMETstr - - #mv *.png $inp_dir_graphs # GRAPHS/ - - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo " ..... STEP 4 - COMPLETED ....." - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + if [ 0 -eq 1 ]; then # This might be useful only in storylines simulations! + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo " STEP #4 : SELF-CONTROL BY PLOTTING DIFFEENCE OBS VS MOD ..." + echo " SCRIPT --- graph_mod_obs.py" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + python graph_mod_obs.py $inp_dir_dataobs/ $inp_dir_datamod/ $inp_dir_graphs/ $b_yyyymmddhh $varMETstr + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo " ..... STEP 4 - COMPLETED ....." + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + fi echo "====================================================================" echo " END CALCULATIONS FOR TIME-SLICE: $b_yy $b_mm $b_dd $b_hh " @@ -244,23 +208,28 @@ for (( nnrec=1; nnrec<=$nrec_date; nnrec++ )) do # loop over dates if [ $clean_scratch == 'true' ]; then rm -rf $inp_dir_gsvmoddata/$cdate_s fi - echo " " - echo "====================================================================" - echo " Checking if STATS should be run ..." - echo " STATS - PRODUCING QUANTILE RANK HISTOGRAM STATISTICS AND PLOTS" - echo " SCRIPT --- synop_stats.sh" - echo " STATS is run on a monthly basis - i.e., done on 2nd day of a month" - echo " ... BUT NOW : $b_yy $b_mm $b_dd $b_hh" - ##if [ $b_dd == "02" ] && [ $b_hh == "00" ]; then - if [ $b_dd == "21" ] && [ $b_hh == "01" ]; then - b_mm_start="1" - b_mm_end="12" - echo " ... CALCULATING NOW : $b_yy $b_mm $b_dd $b_hh" - echo "====================================================================" - echo " b_yy, b_mm, b_dd, b_hh, b_mm_start, b_mm_end : $b_yy $b_mm $b_dd $b_hh $b_mm_start $b_mm_end" - echo " varMETnum, b_yy, b_yy, b_mm_start, b_mm_end : $varMETnum, $b_yy, $b_yy, $b_mm_start, $b_mm_end" - #./synop_stats.sh $varMETnum $b_yy $b_yy $b_mm_start $b_mm_end +# Insert the new stats monitoring here !!! +# - Step 1: Enable stats for year 2010 only. + if [ $b_yy$b_mm$b_dd$b_hh == "2010123123" ]; then + echo '----- It is time to run statistical monitoring. -----' + echo " STATS - producing the standard set of monitoring plots." + echo " SCRIPT --- synop_stats.py" + PATH_orig=$PATH + export PATH="/project/project_465000454/jlento/hadisd/hadisd-container-env/bin/:$PATH" + variable=2t + #timestring=2010 + #experiment=a0gg + sim_infile=/projappl/project_465000454/data/OBSALL/SYNOP/output/ODB_data/$experiment/simulated_synop_data_2010.odb + msd_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/bootstrap_statistics/167_1yr/MSD_bootstrap_167_1yr_all_stations.odb + #quant_file=/projappl/project_465000454/lautuppi/playground/synop_stats/test_input_files/quantiles_167_10stations.odb + quant_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/quantiles/quantiles_167_all_stations.odb + station_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/station_lists/station_list_167_183_17_1990-2023_thinned.txt + obs_monmean_file=/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/monthly_means/monthly_means_all_stations_167.odb + figure_output=/projappl/project_465000454/data/OBSALL/SYNOP/output/figures/$experiment #$climate_simulation + + python3 synop_stats.py $variable $b_yy $climate_simulation $sim_infile $msd_file $quant_file $station_file $obs_monmean_file $figure_output + export PATH=$PATH_orig fi echo " Checking size of file with time stamps: $tmp_file_dates ..." diff --git a/SYNOP/synop_mod.sh b/SYNOP/synop_mod.sh index 6b01af8cf7e76b31bc0e14376bcabb02d80b2b16..d82f1d04c9f8dde0560bfcef41db033029fd2856 100755 --- a/SYNOP/synop_mod.sh +++ b/SYNOP/synop_mod.sh @@ -182,7 +182,8 @@ elif [ $run_type == 'test' ] || [ $run_type == 'FMI' ]; then # Alexander's solut # .............................................................................. # SYNOP data ................................................................... # Defining header for adding mod synop data to odb file ... - header_mod_synop_data="year@descr:integer month@descr:integer day@descr:integer hour@descr:integer minute@descr:integer second@descr:integer longitude@descr:real latitude@descr:real value@descr:real variable@descr:integer station@descr:string" + #header_mod_synop_data="year@descr:integer month@descr:integer day@descr:integer hour@descr:integer minute@descr:integer second@descr:integer longitude@descr:real latitude@descr:real value@descr:real variable@descr:integer station@descr:string" + header_mod_synop_data="year@hdr:integer month@hdr:integer day@hdr:integer hour@hdr:integer minute@hdr:integer second@hdr:integer longitude@hdr:real latitude@hdr:real value@body:real variable@hdr:integer station@hdr:string" # Inserting header line as 1st line into mod_synop_YYYYMMDDHH.dat file ... sed -i "1i ${header_mod_synop_data}" $outfile @@ -192,7 +193,7 @@ fi # Common #################################################################### # Adding data to odb file ... #mod_synop_odb="/scratch/project_465000454/ama/open_data_ODB/synop_mod_data_2010-2021.odb" # Opt1 - new ##mod_synop_odb="/scratch/project_465000454/ama/open_data_ODB/synop_open_data_2010-2021.odb" # Opt2 - original -mod_synop_odb=$rootdir/SYNOP/output/ODB_data/$experiment/simulated_synop_data.odb #synop_open_data_2010-2021.odb +mod_synop_odb=$rootdir/SYNOP/output/ODB_data/$experiment/simulated_synop_data_$year.odb #synop_open_data_2010-2021.odb echo " Path and Name of odb-file with saved records:" echo " $mod_synop_odb" diff --git a/SYNOP/synop_stats.py b/SYNOP/synop_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..7713031a21a93e0d09f55f9bd300e978bd9f8d7f --- /dev/null +++ b/SYNOP/synop_stats.py @@ -0,0 +1,1012 @@ +import pandas as pd +import pyodc as odc +import pingouin as pg +import numpy as np +import sys +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker +import datetime +from matplotlib.dates import drange, MonthLocator, DateFormatter +from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) +import matplotlib.lines as mlines +import matplotlib.patches as mpatches +import cartopy.crs as ccrs +from matplotlib.lines import Line2D + +# Remember this before running: +# export PATH="/project/project_465000454/jlento/hadisd/hadisd-container-env/bin/:$PATH" +# That's a python container that has pyodc + +# NOTE: This script is yet to be implemented properly ! ! ! ! ! ! ! ! ! +# Copied from /projappl/project_465000454/lautuppi/playground/synop_stats/ + +# Pseudo code at the beginning!!! +############### Functions for individual tasks. ######################### + +#def monthly_means_one_station(): (Maybe not needed.) + # Level 3.1.1 (belongs to 2.1 calculate_monthly_means_sim) + # Replicate Fortran code "monthly_means_one_station" + +def write_monthly_means(): + # Level 3.1.2 (belongs to 2.1 calculate_monthly_means_sim) + print('Not implemented!') + +def rank_histogram_one_station(sim_data_one_station,msd_data_one_station,quantiles_one_station,station_id,figure_output): + # Level 3.2.2 (belongs to 2.2 produce_rank_histograms_all_stations) + # Replicate Fortran code "rank_histogram_one_station" + # Reads the following files: + # - infile='sim_data.txt' + # - quantile_file='quantiles.txt' + # - msd_bootstrap_file='msd_bootstrap_selection' + # Return data for a rank histogram for one station. + # Do we need to save the rank histogram in ODB format??? + # Select 00, 06, 12 and 18 UTC and daily mean. + utc00 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 0] + utc00 = (utc00[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 + utc06 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 6] + utc06 = (utc06[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 + utc12 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 12] + utc12 = (utc12[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 + utc18 = sim_data_one_station[sim_data_one_station['hour@hdr'] == 18] + utc18 = (utc18[["year@hdr","month@hdr","day@hdr","hour@hdr","value@body","variable@hdr","station@hdr"]]).groupby(["year@hdr","month@hdr","day@hdr","hour@hdr","variable@hdr","station@hdr"]).mean() - 273.15 + year = 2010 + quant_station_year = quantiles_one_station[quantiles_one_station['year@hdr'] == year] + #print(quantiles_one_station) + #print(quant_station_year) + quant_station_year_00 = quant_station_year[quant_station_year['hour@hdr'] == 0] + quant_station_year_06 = quant_station_year[quant_station_year['hour@hdr'] == 6] + quant_station_year_12 = quant_station_year[quant_station_year['hour@hdr'] == 12] + quant_station_year_18 = quant_station_year[quant_station_year['hour@hdr'] == 18] + # Compute the ranks + rank_counts = np.zeros((100,4)) # 99 quantiles but include edges + #rank_counts_06 = np.zeros(100) + #rank_counts_12 = np.zeros(100) + #rank_counts_18 = np.zeros(100) + quantile_columns = [f'q{str(i).zfill(2)}@body' for i in range(1, 100)] + #print(quant_station_year_00[quantile_columns]) + for i, temp in enumerate(utc00["value@body"]): + ranks = np.searchsorted(quant_station_year_00[quantile_columns].iloc[i,:], temp, side='right') + rank_counts[ranks,0] += 1 + + for i, temp in enumerate(utc06["value@body"]): + ranks = np.searchsorted(quant_station_year_06[quantile_columns].iloc[i,:], temp, side='right') + rank_counts[ranks,1] += 1 + + for i, temp in enumerate(utc12["value@body"]): + ranks = np.searchsorted(quant_station_year_12[quantile_columns].iloc[i,:], temp, side='right') + rank_counts[ranks,2] += 1 + + for i, temp in enumerate(utc18["value@body"]): + ranks = np.searchsorted(quant_station_year_18[quantile_columns].iloc[i,:], temp, side='right') + rank_counts[ranks,3] += 1 + + # Compute MSDs (00, 06, 12, 18) relative to flat rank histograms. + # NOTE: computation in relative frequencies! + flat_histogram = np.ones(100) + msd = np.zeros(4) + msd[0] = np.mean((rank_counts[:,0]/3.65 - flat_histogram) ** 2) + msd[1] = np.mean((rank_counts[:,1]/3.65 - flat_histogram) ** 2) + msd[2] = np.mean((rank_counts[:,2]/3.65 - flat_histogram) ** 2) + msd[3] = np.mean((rank_counts[:,3]/3.65 - flat_histogram) ** 2) + # Compute the p-values. + p_val = np.zeros(4) + n_bootstrap = len(msd_data_one_station.iloc[:,5]) + p_rank_00 = np.searchsorted(msd_data_one_station.iloc[:,5],msd[0],side='right') + p_val[0] = (n_bootstrap - p_rank_00)/n_bootstrap + p_rank_06 = np.searchsorted(msd_data_one_station.iloc[:,6],msd[1],side='right') + p_val[1] = (n_bootstrap - p_rank_06)/n_bootstrap + p_rank_12 = np.searchsorted(msd_data_one_station.iloc[:,7],msd[2],side='right') + p_val[2] = (n_bootstrap - p_rank_12)/n_bootstrap + p_rank_18 = np.searchsorted(msd_data_one_station.iloc[:,8],msd[3],side='right') + p_val[3] = (n_bootstrap - p_rank_18)/n_bootstrap + + plot_rank_histograms(rank_counts, msd, p_val,station_id,year,figure_output) + plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,figure_output) + + return rank_counts,p_val + +def plot_rank_histograms(rank_counts, msd, p_val,station_id,year,figure_output): + # plot the rank histograms + fig = plt.figure(figsize=(10,8), layout='constrained') + spec = fig.add_gridspec(3,2) + fig_title='Test rank histograms for station '+station_id + fig.suptitle(fig_title, fontsize=15) + + ax1 = fig.add_subplot(spec[0,0]) + ax1.set_title(f"00 UTC\nMSD: {msd[0]:.2f}, p-value: {p_val[0]:.3f}") + ax1.bar(np.arange(100), rank_counts[:,0]/3.65) + ax1.set_xlim(-0.5,100.5) + ax1.set_xlabel('Percentile') + ax1.set_ylabel('% of total count') + ax1.axhline(y=1,color='r') + + ax2 = fig.add_subplot(spec[0,1]) + ax2.set_title(f"06 UTC\nMSD: {msd[1]:.2f}, p-value: {p_val[1]:.3f}") + ax2.bar(np.arange(100), rank_counts[:,1]/3.65) + ax2.set_xlim(-0.5,100.5) + ax2.set_xlabel('Percentile') + ax2.axhline(y=1,color='r') + + ax3 = fig.add_subplot(spec[1,0]) + ax3.set_title(f"12 UTC\nMSD: {msd[2]:.2f}, p-value: {p_val[2]:.3f}") + ax3.bar(np.arange(100), rank_counts[:,2]/3.65) + ax3.set_xlim(-0.5,100.5) + ax3.set_xlabel('Percentile') + ax3.set_ylabel('% of total count') + ax3.axhline(y=1,color='r') + + ax4 = fig.add_subplot(spec[1,1]) + ax4.set_title(f"18 UTC\nMSD: {msd[3]:.2f}, p-value: {p_val[3]:.3f}") + ax4.bar(np.arange(100), rank_counts[:,3]/3.65) + ax4.set_xlim(-0.5,100.5) + ax4.set_xlabel('Percentile') + ax4.axhline(y=1,color='r') + + # figure explanation: explaining what quantiles and model data are + exp_id = 'a0gg' + expl_msd_pvalue = 'Mean Square Deviation (' + r'$\bf{MSD}$)' + ' relative to flat histogram.' + r' $\bf{p-value}$' + ' of MSD relative to sampling distribution.\n' + expl_quantile='Quantiles are estimated based on observations at the station ' + station_id +' (period: 1990-2021).\n' + expl_model='Model data from experiment (' + exp_id + ') interpolated to location of this station.' + fig_text = expl_msd_pvalue + expl_quantile + expl_model + + # add subplot at the bottom of the figure, and add figure explanation as text to this subplot + ax5 = fig.add_subplot(spec[2,:]) + ax5.axis('off') + ax5.text(0.1, 0.6, fig_text, bbox={'pad':10, 'facecolor':'white'}, fontsize=10) + + plt.savefig(figure_output+'/test_rank_histograms'+station_id+'_'+str(year)+'.png',dpi=140) + #plt.show() + plt.close() + +def plot_quantiles(quant_station_year,utc00,utc06,utc12,utc18,station_id,year,figure_output): + # plot quantiles. + #print(quant_station_year) + q01 = quant_station_year[['hour@hdr','q01@body']] + q10 = quant_station_year[['hour@hdr','q10@body']] + q25 = quant_station_year[['hour@hdr','q25@body']] + q50 = quant_station_year[['hour@hdr','q50@body']] + q75 = quant_station_year[['hour@hdr','q75@body']] + q90 = quant_station_year[['hour@hdr','q90@body']] + q99 = quant_station_year[['hour@hdr','q99@body']] + + days = np.arange(1,366) + day1 = datetime.date(year, 1, 1) + day2 = datetime.date(year+1, 1, 1) + delta = datetime.timedelta(days=1) + dates = drange(day1, day2, delta) + months = np.arange(13) + + alpha = [0.5, 0.3, 0.1] + color = 'blue' + scatter_size=5 # size for scattered dots + fs3 = 12 # fontsize for + fs4=10 # fontsize for extra text box, legend + linecolor='red' # color for solid line + + fig = plt.figure(figsize=(10,9), layout='constrained') + spec = fig.add_gridspec(3,2) + fig_title='Test quantile plots for station '+station_id+', Year='+str(year) + fig.suptitle(fig_title, fontsize=15) + + ax1 = fig.add_subplot(spec[0,0]) + ax1.set_title('00 UTC') + #print(q25) + ax1.fill_between(dates, q25[q25["hour@hdr"]==0].iloc[:,1], q50[q50["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[0]) + ax1.fill_between(dates, q50[q50["hour@hdr"]==0].iloc[:,1], q75[q75["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[0]) + ax1.fill_between(dates, q10[q10["hour@hdr"]==0].iloc[:,1], q25[q25["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[1]) + ax1.fill_between(dates, q75[q75["hour@hdr"]==0].iloc[:,1], q90[q90["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[1]) + ax1.fill_between(dates, q01[q01["hour@hdr"]==0].iloc[:,1], q10[q10["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[2]) + ax1.fill_between(dates, q90[q90["hour@hdr"]==0].iloc[:,1], q99[q99["hour@hdr"]==0].iloc[:,1], color=color, alpha=alpha[2]) + ax1.plot(dates, q50[q50["hour@hdr"]==0].iloc[:,1], 'r-') + ax1.scatter(dates, utc00["value@body"], s=scatter_size, c='k', marker='.') + ax1.set_xlim(dates[0], dates[-1]) + ax1.xaxis.set_major_locator(MonthLocator(months)) + ax1.xaxis.set_major_formatter(DateFormatter('%b')) + ax1.set_xlabel('Time') + ax1.set_ylabel('2m temperature $[^oC]$') + + ax2 = fig.add_subplot(spec[0,1]) + ax2.set_title('06 UTC') + ax2.fill_between(dates, q25[q25["hour@hdr"]==6].iloc[:,1], q50[q50["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[0]) + ax2.fill_between(dates, q50[q50["hour@hdr"]==6].iloc[:,1], q75[q75["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[0]) + ax2.fill_between(dates, q10[q10["hour@hdr"]==6].iloc[:,1], q25[q25["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[1]) + ax2.fill_between(dates, q75[q75["hour@hdr"]==6].iloc[:,1], q90[q90["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[1]) + ax2.fill_between(dates, q01[q01["hour@hdr"]==6].iloc[:,1], q10[q10["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[2]) + ax2.fill_between(dates, q90[q90["hour@hdr"]==6].iloc[:,1], q99[q99["hour@hdr"]==6].iloc[:,1], color=color, alpha=alpha[2]) + ax2.plot(dates, q50[q50["hour@hdr"]==6].iloc[:,1], 'r-') + ax2.scatter(dates, utc06["value@body"], s=scatter_size, c='k', marker='.') + ax2.set_xlim(dates[0], dates[-1]) + ax2.xaxis.set_major_locator(MonthLocator(months)) + ax2.xaxis.set_major_formatter(DateFormatter('%b')) + ax2.set_xlabel('Time') + + ax3 = fig.add_subplot(spec[1,0]) + ax3.set_title('12 UTC') + ax3.fill_between(dates, q25[q25["hour@hdr"]==12].iloc[:,1], q50[q50["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[0]) + ax3.fill_between(dates, q50[q50["hour@hdr"]==12].iloc[:,1], q75[q75["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[0]) + ax3.fill_between(dates, q10[q10["hour@hdr"]==12].iloc[:,1], q25[q25["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[1]) + ax3.fill_between(dates, q75[q75["hour@hdr"]==12].iloc[:,1], q90[q90["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[1]) + ax3.fill_between(dates, q01[q01["hour@hdr"]==12].iloc[:,1], q10[q10["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[2]) + ax3.fill_between(dates, q90[q90["hour@hdr"]==12].iloc[:,1], q99[q99["hour@hdr"]==12].iloc[:,1], color=color, alpha=alpha[2]) + ax3.plot(dates, q50[q50["hour@hdr"]==12].iloc[:,1], 'r-') + ax3.scatter(dates, utc12["value@body"], s=scatter_size, c='k', marker='.') + ax3.set_xlim(dates[0], dates[-1]) + ax3.xaxis.set_major_locator(MonthLocator(months)) + ax3.xaxis.set_major_formatter(DateFormatter('%b')) + ax3.set_xlabel('Time') + ax3.set_ylabel('2m temperature $[^oC]$') + + ax4 = fig.add_subplot(spec[1,1]) + ax4.set_title('18 UTC') + ax4.fill_between(dates, q25[q25["hour@hdr"]==18].iloc[:,1], q50[q50["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[0]) + ax4.fill_between(dates, q50[q50["hour@hdr"]==18].iloc[:,1], q75[q75["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[0]) + ax4.fill_between(dates, q10[q10["hour@hdr"]==18].iloc[:,1], q25[q25["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[1]) + ax4.fill_between(dates, q75[q75["hour@hdr"]==18].iloc[:,1], q90[q90["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[1]) + ax4.fill_between(dates, q01[q01["hour@hdr"]==18].iloc[:,1], q10[q10["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[2]) + ax4.fill_between(dates, q90[q90["hour@hdr"]==18].iloc[:,1], q99[q99["hour@hdr"]==18].iloc[:,1], color=color, alpha=alpha[2]) + ax4.plot(dates, q50[q50["hour@hdr"]==18].iloc[:,1], 'r-') + ax4.scatter(dates, utc18["value@body"], s=scatter_size, c='k', marker='.') + ax4.set_xlim(dates[0], dates[-1]) + ax4.xaxis.set_major_locator(MonthLocator(months)) + ax4.xaxis.set_major_formatter(DateFormatter('%b')) + ax4.set_xlabel('Time') + + ####################### Figure texts ######################### + + # legend elemnents for quantile data, one legend for model and one legend for observations + exp_id = 'a0gg' + legend_elements1 = [mlines.Line2D([],[], marker='.', linestyle='', color='k', label='Model data (' + exp_id + ')')] + legend_elements2= [mpatches.Patch(color=color, alpha=alpha[0], label='25-50%'), + mpatches.Patch(color=color, alpha=alpha[1], label='10-25%'), + mpatches.Patch(color=color, alpha=alpha[2], label='1-10%'), + mlines.Line2D([],[], color=linecolor, label='50%'), + mpatches.Patch(color=color, alpha=alpha[0], label='50-75%'), + mpatches.Patch(color=color, alpha=alpha[1], label='75-90%'), + mpatches.Patch(color=color, alpha=alpha[2], label='90-99%')] + + # add a sixth axis for legends and text data: + ax5 = fig.add_subplot(spec[2,0]) + ax5.axis('off') + + # legend with two columns at the upper centre of the figure: + leg1 = ax5.legend(loc='upper center', handles=legend_elements1, fontsize=fs4) + ax5.add_artist(leg1) + #ax5.add_artist(leg2) + + ax6 = fig.add_subplot(spec[2,1]) + ax6.axis('off') + leg2 = ax6.legend(loc='upper center', handles=legend_elements2, ncols=2, fontsize=fs4, title='Quantiles for observations', title_fontsize=fs4) + ax6.add_artist(leg2) + + plt.savefig(figure_output+'/test_quantiles'+station_id+'_'+str(year)+'.png',dpi=140) + #plt.show() + plt.close() + +def plot_p_values(station_data, p_values, experiment, variable, timestring, figure_output): + ''' + Description: plot p values on a map + Input: numpy array + Output: saves a figure in the specified output directory + ''' + lon_min=-180 + lon_max=180 + lat_min=-90 + lat_max=90 + + figname = figure_output + '/p_vals_SYNOP_'+variable+'_rankhist_'+experiment+'_'+timestring+'.png' + + colors = ['darkviolet', 'blue', 'skyblue', 'lime', 'yellow', 'orange', 'red', 'grey'] + limits = [0,0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,1] + + fig = plt.figure(figsize=(19,11), layout='constrained') + spec = fig.add_gridspec(2,2) + fig_title='P-values for SYNOP rank histograms against observed climate: Experiment ' + experiment + ', Variable ' + variable + ', Year ' + timestring + fig.suptitle(fig_title, fontsize=16) + + ax1 = fig.add_subplot(spec[0,0],projection=ccrs.PlateCarree()) + ax1.set_title('00 UTC') + ax1.coastlines() + ax1.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax1.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_values[:,0]-0.5) + for i in range(len(p_values[:,0])): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_values[:,0] >= limits[n]) & (p_values[:,0] < limits[n+1])) + ax1.scatter(station_data[:,1][p_ind], station_data[:,2][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax1.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + ax2 = fig.add_subplot(spec[0,1],projection=ccrs.PlateCarree()) + ax2.set_title('06 UTC') + ax2.coastlines() + ax2.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax2.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_values[:,1]-0.5) + for i in range(len(p_values[:,1])): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_values[:,1] >= limits[n]) & (p_values[:,1] < limits[n+1])) + ax2.scatter(station_data[:,1][p_ind], station_data[:,2][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax2.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + ax3 = fig.add_subplot(spec[1,0],projection=ccrs.PlateCarree()) + ax3.set_title('12 UTC') + ax3.coastlines() + ax3.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax3.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_values[:,2]-0.5) + for i in range(len(p_values[:,2])): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_values[:,1] >= limits[n]) & (p_values[:,1] < limits[n+1])) + ax3.scatter(station_data[:,1][p_ind], station_data[:,2][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax3.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + ax4 = fig.add_subplot(spec[1,1],projection=ccrs.PlateCarree()) + ax4.set_title('18 UTC') + ax4.coastlines() + ax4.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax4.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_values[:,3]-0.5) + for i in range(len(p_values[:,2])): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_values[:,1] >= limits[n]) & (p_values[:,1] < limits[n+1])) + ax4.scatter(station_data[:,1][p_ind], station_data[:,2][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax4.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + plt.savefig(figname, dpi=140) + #plt.show() + plt.close() + +def plot_summary_rank_histograms(rank_histograms,p_values,experiment,variable,timestring,figure_output): + # Level 3, (see summary_rank_histograms_all_stations()) + # Put plot_rank_hist_sum_synop.py here. + ''' + Input: p_freq, q_freq, number of stations, p01, p1, p5, p_ylim, q_ylim, figname + Description: Plots 'all-UTC' p and q frequencies and saves the figure with figname + ''' + #title='Rank histogram summary statistics: Experiment ' + experiment + ' Variable ' + variable +'\nNumber of stations: ' + str(number_of_stations) + '\nFrequency, p<0.001: ' + str(p01) + '\nFrequency, p<0.01: ' + str(p1) + '\nFrequency, p<0.05: ' + str(p5) + # can also be input arguments if the number of bins are not constant + number_of_p_bins=20 + p_val_bins = np.arange(number_of_p_bins)/number_of_p_bins + #print(p_val_bins) + rank_counts = np.zeros((number_of_p_bins+1,4)) + rank_counts_norm = rank_counts + number_of_q_bins=100 + q_bins = np.arange(number_of_q_bins)+1 + #timestring='2010' + + mean_rank_histogram = np.mean(rank_histograms, axis=0) + + for i in range(4): + for j, pval in enumerate(p_values[:,i]): + ranks = np.searchsorted(p_val_bins, pval, side='right') + rank_counts[ranks,i] += 1 + rank_counts_norm[:,i] = 100*rank_counts[:,i]/sum(rank_counts[:,i]) + #print(rank_counts[:,0]) + + fig = plt.figure(figsize=(12,12), layout='constrained') + spec = fig.add_gridspec(4,2) + fig_title='Summary of rank histograms and p-values: Experiment ' + experiment + ', Variable ' + variable + ', Year ' + timestring + fig.suptitle(fig_title, fontsize=16) + + ax1 = fig.add_subplot(spec[0,0]) + ax1.set_title('Normalized p-value frequencies 00 UTC') + ax1.bar(p_val_bins,rank_counts_norm[1:,0],width=0.05,align='edge',color='#016c59') + ax1.set_xlim(-0.01,1.01) + ax1.xaxis.set_major_locator(MultipleLocator(0.2)) + ax1.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax1.set_xlabel('P-value') + ax1.set_ylabel('% of stations') + #ax1.axhline(y=1,color='r') + + ax2 = fig.add_subplot(spec[1,0]) + ax2.set_title('Summary rank histogram 00 UTC') + ax2.bar(q_bins,100*mean_rank_histogram[:,0]/sum(mean_rank_histogram[:,0]),width=1.0,color='#253494') + ax2.set_xlim(0,101) + ax2.xaxis.set_major_locator(MultipleLocator(10)) + ax2.xaxis.set_minor_locator(MultipleLocator(5)) + ax2.set_ylabel('% of total count') + ax2.set_xlabel('Percentile') + ax2.axhline(y=1,color='r') + + ax3 = fig.add_subplot(spec[0,1]) + ax3.set_title('Normalized p-value frequencies 06 UTC') + ax3.bar(p_val_bins,rank_counts_norm[1:,1],width=0.05,align='edge',color='#016c59') + ax3.set_xlim(-0.01,1.01) + ax3.xaxis.set_major_locator(MultipleLocator(0.2)) + ax3.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax3.set_xlabel('P-value') + ax3.set_ylabel('% of stations') + #ax3.axhline(y=1,color='r') + + ax4 = fig.add_subplot(spec[1,1]) + ax4.set_title('Summary rank histogram 06 UTC') + ax4.bar(q_bins,100*mean_rank_histogram[:,1]/sum(mean_rank_histogram[:,1]),width=1.0,color='#253494') + ax4.set_xlim(0,101) + ax4.xaxis.set_major_locator(MultipleLocator(10)) + ax4.xaxis.set_minor_locator(MultipleLocator(5)) + ax4.set_ylabel('% of total count') + ax4.set_xlabel('Percentile') + ax4.axhline(y=1,color='r') + + ax5 = fig.add_subplot(spec[2,0]) + ax5.set_title('Normalized p-value frequencies 12 UTC') + ax5.bar(p_val_bins,rank_counts_norm[1:,2],width=0.05,align='edge',color='#016c59') + ax5.set_xlim(-0.01,1.01) + ax5.xaxis.set_major_locator(MultipleLocator(0.2)) + ax5.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax5.set_xlabel('P-value') + ax5.set_ylabel('% of stations') + #ax5.axhline(y=1,color='r') + + ax6 = fig.add_subplot(spec[3,0]) + ax6.set_title('Summary rank histogram 12 UTC') + ax6.bar(q_bins,100*mean_rank_histogram[:,2]/sum(mean_rank_histogram[:,2]),width=1.0,color='#253494') + ax6.set_xlim(0,101) + ax6.xaxis.set_major_locator(MultipleLocator(10)) + ax6.xaxis.set_minor_locator(MultipleLocator(5)) + ax6.set_ylabel('% of total count') + ax6.set_xlabel('Percentile') + ax6.axhline(y=1,color='r') + + ax7 = fig.add_subplot(spec[2,1]) + ax7.set_title('Normalized p-value frequencies 18 UTC') + ax7.bar(p_val_bins,rank_counts_norm[1:,3],width=0.05,align='edge',color='#016c59') + ax7.set_xlim(-0.01,1.01) + ax7.xaxis.set_major_locator(MultipleLocator(0.2)) + ax7.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax7.set_xlabel('P-value') + ax7.set_ylabel('% of stations') + #ax7.axhline(y=1,color='r') + + ax8 = fig.add_subplot(spec[3,1]) + ax8.set_title('Summary rank histogram 18 UTC') + ax8.bar(q_bins,100*mean_rank_histogram[:,3]/sum(mean_rank_histogram[:,3]),width=1.0,color='#253494') + ax8.set_xlim(0,101) + ax8.xaxis.set_major_locator(MultipleLocator(10)) + ax8.xaxis.set_minor_locator(MultipleLocator(5)) + ax8.set_ylabel('% of total count') + ax8.set_xlabel('Percentile') + ax8.axhline(y=1,color='r') + + plt.savefig(figure_output+'/rank_hist_summary_'+timestring+'.png',dpi=140) + #plt.show() + plt.close() + +def plot_bias(bias, figure_output, timestring): + lon_min=-180 + lon_max=180 + lat_min=-90 + lat_max=90 + + colors = ['darkviolet', 'blueviolet', 'indigo', 'darkblue', 'blue', 'deepskyblue', 'aquamarine', 'gold', 'orange', 'orangered', 'red', 'darkred', 'deeppink', 'magenta'] + limits_bias = [-99, -12, -8, -4, -2, -1, -0.5, 0, 0.5, 1, 2, 4, 8, 12, 99] + + fig = plt.figure(figsize=(19,11), layout='constrained') + spec = fig.add_gridspec(2,2) + fig_title='Bias ($^oC$) of yearly mean T2m against observed climate: Experiment a0gg, Variable T2m, Year 2010' + fig.suptitle(fig_title, fontsize=16) + + ax1 = fig.add_subplot(spec[0,0],projection=ccrs.PlateCarree()) + ax1.coastlines() + ax1.set_title('00 UTC') + ax1.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax1.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias00 = bias[bias['hour@hdr']==0] + b = bias00['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax1.scatter(bias00['longitude@hdr'][bias_ind], bias00['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax1.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + ax2 = fig.add_subplot(spec[0,1],projection=ccrs.PlateCarree()) + ax2.coastlines() + ax2.set_title('06 UTC') + ax2.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax2.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias06 = bias[bias['hour@hdr']==6] + b = bias06['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax2.scatter(bias06['longitude@hdr'][bias_ind], bias06['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax2.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + ax3 = fig.add_subplot(spec[1,0],projection=ccrs.PlateCarree()) + ax3.coastlines() + ax3.set_title('12 UTC') + ax3.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax3.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias12 = bias[bias['hour@hdr']==12] + b = bias12['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax3.scatter(bias12['longitude@hdr'][bias_ind], bias12['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax3.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + ax4 = fig.add_subplot(spec[1,1],projection=ccrs.PlateCarree()) + ax4.coastlines() + ax4.set_title('18 UTC') + ax4.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax4.gridlines(draw_labels=True) + + #loop over limits: + legend_elements = [] + bias_freq = [0] * 14 + bias18 = bias[bias['hour@hdr']==18] + b = bias18['bias'].to_numpy() + for i in range(len(b)): + for n in range(14): + if (b[i] > limits_bias[n]) and (b[i] <= limits_bias[n+1]): + bias_freq[n] = bias_freq[n] + 1 + + for n in range(14): + bias_ind = ((b > limits_bias[n]) & (b <= limits_bias[n+1])) + ax4.scatter(bias18['longitude@hdr'][bias_ind], bias18['latitude@hdr'][bias_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits_bias[n])+'..'+str(limits_bias[n+1])+' ('+str(bias_freq[n])+')')) + + ax4.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.07, 0.5)) + + #ax.set_title('Bias: ' + header_string, fontsize=12, fontweight='bold') + plt.savefig(figure_output+'/bias_map_'+timestring+'.png',dpi=140) + #plt.show() + +def plot_ttest_summary_histograms(ttest_results_ll, figure_output, timestring): + p_values = ttest_results_ll[['station@hdr','longitude@hdr','latitude@hdr','hour@hdr','p-val']] + number_of_p_bins=20 + p_val_bins = np.arange(number_of_p_bins)/number_of_p_bins + rank_counts = np.zeros((number_of_p_bins+1,4)) + rank_counts_norm = rank_counts + + fig_title = 'Summary of p-values of t-tests of stationwise monthly means' + + #p_vals_only = p_values['p-val'] + for i in range(4): + hour=i*6 + p_vals = p_values[p_values['hour@hdr']==hour] + p_vals_only = p_vals['p-val'] + #print(p_vals) + for j, pval in enumerate(p_vals_only): + ranks = np.searchsorted(p_val_bins, pval, side='right') + rank_counts[ranks,i] += 1 + rank_counts_norm[:,i] = 100*rank_counts[:,i]/sum(rank_counts[:,i]) + + fig = plt.figure(figsize=(12,6), layout='constrained') + spec = fig.add_gridspec(2,2) + fig.suptitle(fig_title, fontsize=16) + + ax1 = fig.add_subplot(spec[0,0]) + ax1.set_title('Normalized p-value frequencies 00 UTC') + ax1.bar(p_val_bins,rank_counts_norm[1:,0],width=0.05,align='edge',color='#016c59') + ax1.set_xlim(-0.01,1.01) + ax1.xaxis.set_major_locator(MultipleLocator(0.2)) + ax1.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax1.set_xlabel('P-value') + ax1.set_ylabel('% of stations') + + ax2 = fig.add_subplot(spec[0,1]) + ax2.set_title('Normalized p-value frequencies 06 UTC') + ax2.bar(p_val_bins,rank_counts_norm[1:,1],width=0.05,align='edge',color='#016c59') + ax2.set_xlim(-0.01,1.01) + ax2.xaxis.set_major_locator(MultipleLocator(0.2)) + ax2.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax2.set_xlabel('P-value') + ax2.set_ylabel('% of stations') + + ax3 = fig.add_subplot(spec[1,0]) + ax3.set_title('Normalized p-value frequencies 12 UTC') + ax3.bar(p_val_bins,rank_counts_norm[1:,2],width=0.05,align='edge',color='#016c59') + ax3.set_xlim(-0.01,1.01) + ax3.xaxis.set_major_locator(MultipleLocator(0.2)) + ax3.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax3.set_xlabel('P-value') + ax3.set_ylabel('% of stations') + + ax4 = fig.add_subplot(spec[1,1]) + ax4.set_title('Normalized p-value frequencies 18 UTC') + ax4.bar(p_val_bins,rank_counts_norm[1:,3],width=0.05,align='edge',color='#016c59') + ax4.set_xlim(-0.01,1.01) + ax4.xaxis.set_major_locator(MultipleLocator(0.2)) + ax4.xaxis.set_minor_locator(MultipleLocator(0.1)) + ax4.set_xlabel('P-value') + ax4.set_ylabel('% of stations') + + #plt.savefig('test_figures/t-test_pval_histograms.png',dpi=140) + plt.savefig(figure_output+'/t-test_pval_histograms_'+timestring+'.png',dpi=140) + #plt.show() + +def plot_ttest_summary_maps(ttest_results, figure_output, timestring): + p_values = ttest_results[['station@hdr','longitude@hdr','latitude@hdr','hour@hdr','p-val']] + lon_min=-180 + lon_max=180 + lat_min=-90 + lat_max=90 + + #figname = figure_output + '/p_vals_SYNOP_'+variable+'_rankhist_'+experiment+'_'+timestring+'.png' + figname = figure_output+'/t-test_pval_maps_'+timestring+'.png' + + colors = ['darkviolet', 'blue', 'skyblue', 'lime', 'yellow', 'orange', 'red', 'grey'] + limits = [0,0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,1] + + fig = plt.figure(figsize=(19,11), layout='constrained') + spec = fig.add_gridspec(2,2) + #fig_title='P-values for SYNOP rank histograms against observed climate: Experiment ' + experiment + ', Variable ' + variable + ', Year ' + timestring + fig_title='P-values for SYNOP monthly means against observed climate: Experiment a0gg, Variable T2m, Year 2010' + fig.suptitle(fig_title, fontsize=16) + + p_vals = p_values[p_values['hour@hdr']==0] + p_vals_only = p_vals['p-val'].to_numpy() + ax1 = fig.add_subplot(spec[0,0],projection=ccrs.PlateCarree()) + ax1.set_title('00 UTC') + ax1.coastlines() + ax1.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax1.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + #print(p_mod) + for i in range(len(p_vals_only)): + #print(i) + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax1.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax1.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + p_vals = p_values[p_values['hour@hdr']==6] + p_vals_only = p_vals['p-val'].to_numpy() + ax2 = fig.add_subplot(spec[0,1],projection=ccrs.PlateCarree()) + ax2.set_title('06 UTC') + ax2.coastlines() + ax2.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax2.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + for i in range(len(p_vals_only)): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax2.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax2.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + p_vals = p_values[p_values['hour@hdr']==12] + p_vals_only = p_vals['p-val'].to_numpy() + ax3 = fig.add_subplot(spec[1,0],projection=ccrs.PlateCarree()) + ax3.set_title('12 UTC') + ax3.coastlines() + ax3.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax3.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + for i in range(len(p_vals_only)): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax3.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax3.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + + p_vals = p_values[p_values['hour@hdr']==18] + p_vals_only = p_vals['p-val'].to_numpy() + ax4 = fig.add_subplot(spec[1,1],projection=ccrs.PlateCarree()) + ax4.set_title('18 UTC') + ax4.coastlines() + ax4.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax4.gridlines(draw_labels=True) + legend_elements = [] + p_freq = [0] * 8 + p_mod = 0.5+0.9999*(p_vals_only-0.5) + for i in range(len(p_vals_only)): + for n in range(8): + if (p_mod[i] > limits[n]) and (p_mod[i] <= limits[n+1]): + p_freq[n] = p_freq[n] + 1 + + for n in range(8): + p_ind = ((p_vals_only >= limits[n]) & (p_vals_only < limits[n+1])) + ax4.scatter(p_vals['longitude@hdr'][p_ind], p_vals['latitude@hdr'][p_ind], s=15, c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1])+' ('+str(p_freq[n])+')')) + + ax4.legend(handles=legend_elements, loc=(0.9, 0.0), bbox_to_anchor=(0.67, 0.0),fontsize='small') + plt.savefig(figname, dpi=140) + #plt.show() + +def read_synop_odb(sim_infile): + # Read the odb file containing simulated SYNOP obs. + sim = odc.read_odb(sim_infile, single = True, columns = ['year@hdr', 'month@hdr', 'day@hdr', 'hour@hdr', 'longitude@hdr', 'latitude@hdr', 'variable@hdr', 'station@hdr', 'value@body']) + return sim + +def calculate_monthly_means_sim(sim_raw): # Let's do yearly means for now and come back to this later. + # Level 2.1 + # Compute monthly means for all stations from the simulated data. + # Loop over stations + # -->> monthly_means_one_station() + # -->> write_monthly_means() (do we need to write them somewhere???) + #means = (sim[["year@descr", "month@descr", "station@descr", "variable@descr", "value@descr"]]).groupby(["year@descr","month@descr","station@descr","variable@descr"]).mean() + sim_raw['station@hdr'] = sim_raw['station@hdr'].str.strip() + sim = sim_raw[sim_raw['hour@hdr'].isin([0, 6, 12, 18])] + sim_monmean = ( + sim[["year@hdr", "month@hdr", "hour@hdr", "station@hdr", "longitude@hdr", "latitude@hdr", "value@body"]] + .groupby(["year@hdr", "month@hdr", "hour@hdr", "station@hdr"]) + .mean() + .reset_index() + ) + sim_monmean["value@body"] -= 273.15 + return sim_monmean + +def calculate_yearly_means_sim(sim_raw): + sim_yearmean = (sim_raw.groupby(["year@hdr", "hour@hdr", "station@hdr"], as_index=False).agg({"value@body": "mean"})) + sim_yearmean["value@body"] -= 273.15 # Kelvin to Celsius + return sim_yearmean + +def read_MSD_stats(msd_file): + # Read all climatological Mean Squared Deviation bootstrap statistics for production of rank histograms. + msd = odc.read_odb(msd_file, single = True, columns = ['station@hdr','longitude@hdr','latitude@hdr','elevation@hdr','realization@hdr','msd00@body','msd06@body','msd12@body','msd18@body','msd24@body']) + return msd + +def read_quantiles(quant_file): + # Read all climatological quantiles. + # Generate quantile column names dynamically + quantile_columns = [f'q{str(i).zfill(2)}@body' for i in range(1, 100)] + + # Add the other fixed column names + fixed_columns = ['station@hdr', 'longitude@hdr', 'latitude@hdr', 'elevation@hdr', + 'year@hdr', 'month@hdr', 'day@hdr', 'hour@hdr', 'variable@hdr'] + + # Combine the fixed columns with the quantile columns + all_columns = fixed_columns + quantile_columns + + # Read the ODB file with the generated column list + quant = odc.read_odb(quant_file, single=True, columns=all_columns) + return quant + +def read_obs_monmeans(obs_monmean_file): + # Obs values already in Celsius. Remove all stupid temperature values. + obs = odc.read_odb(obs_monmean_file, single = True, columns = ['station@hdr', 'longitude@hdr', 'latitude@hdr', 'year@hdr', 'month@hdr', 'hour@hdr', 'value@body']) + obs.where(obs['value@body'] > -150, np.nan, inplace = True) + obs.where(obs['value@body'] < 150, np.nan, inplace = True) + obs.dropna(how='any', inplace = True) + obs["station@hdr"] = obs["station@hdr"].astype(int).astype(str).str.zfill(6) # Force the station ids to be six char strings. + return obs + +def calculate_yearly_means_obs(obs): + obs_yearmean = (obs.groupby(["year@hdr", "hour@hdr", "station@hdr"], as_index=False).agg({"value@body": "mean"})) + return obs_yearmean + +def calculate_allyear_mean(obs): + obs_allyearmean = (obs.groupby(["hour@hdr", "station@hdr"], as_index=False).agg({"value@body": "mean"})) + return obs_allyearmean + +def calculate_bias(yearly_means_sim, obs_allyearmean, coords): + sim_and_obs_allyearmean = yearly_means_sim.merge(obs_allyearmean, on=['hour@hdr', 'station@hdr'], suffixes=('_df1', '_df2')) + sim_and_obs_allyearmean['bias'] = sim_and_obs_allyearmean['value@body_df1'] - sim_and_obs_allyearmean['value@body_df2'] + bias_tmp = sim_and_obs_allyearmean[['hour@hdr', 'station@hdr', 'bias']] + bias = bias_tmp.merge(coords, on = ["station@hdr"]) + bias["longitude@hdr"] = bias["longitude@hdr"].apply(lambda lon: lon - 360 if lon > 180 else lon) + return bias + +def produce_rank_histograms_all_stations(sim_data,msd_data,quantiles,station_list,figure_output): + # Level 2.2 + # Loop over all stations. + # This needs to give the bundle of rank histograms to the plotting functions. + # -->> rank_histogram_one_station() + p_vals_all_stations = np.zeros((len(station_list),4)) + rank_histograms_all_stations = np.zeros((len(station_list),100,4)) + sim_data['station@hdr'] = sim_data['station@hdr'].str.strip() + msd_data['station@hdr'] = msd_data['station@hdr'].astype(str).str.strip() + quantiles['station@hdr'] = quantiles['station@hdr'].astype(str).str.strip() + #for i in range(10): # For testing with 10 first stations. + for i in range(len(station_list)): # For running the full list of 541 stations. + station_id =station_list[i].strip() # prepend zeros if length <6 chars. + sim_data_one_station = sim_data[sim_data['station@hdr'] == station_id] + msd_data_one_station = msd_data[msd_data['station@hdr'] == station_id.lstrip('0')] + quantiles_one_station = quantiles[quantiles['station@hdr'] == station_id.lstrip('0')] + print('Processing rank histograms and quantile plots for station ', station_id) + rank_histogram,p_val = rank_histogram_one_station(sim_data_one_station,msd_data_one_station,quantiles_one_station,station_id,figure_output) + rank_histograms_all_stations[i,:,:] = rank_histogram[:,:] + p_vals_all_stations[i,:]=p_val[:] + + #np.savetxt('test_output_files/HadISD_p_vals_2010.txt',p_vals_all_stations) + return rank_histograms_all_stations,p_vals_all_stations + +def calculate_ttests(sim_yearmean, obs_yearmean, coords): + results = [] + + # Get unique station and hour combinations + stations = sim_yearmean["station@hdr"].unique() + #stations = stations[:5] + hours = [0, 6, 12, 18] + + # Perform t-tests for each station and hour + for station in stations: + for hour in hours: + obs_values = obs_yearmean.loc[ + (obs_yearmean["station@hdr"] == station) & (obs_yearmean["hour@hdr"] == hour), + "value@body" + ] + sim_values = sim_yearmean.loc[ + (sim_yearmean["station@hdr"] == station) & (sim_yearmean["hour@hdr"] == hour), + "value@body" + ] + + if len(obs_values) > 1 and len(sim_values) > 0: + #print(obs_values, sim_values) + test_result = pg.ttest(obs_values, sim_values) + test_result.insert(0, "station@hdr", station) + test_result.insert(1, "hour@hdr", hour) + results.append(test_result[["station@hdr","hour@hdr","T","p-val"]]) + + t_test_results = pd.concat(results, ignore_index=True) if results else pd.DataFrame() + t_test_results_ll = pd.merge(t_test_results, coords, how = "left", on = ["station@hdr"]) + t_test_results_ll["longitude@hdr"] = t_test_results_ll["longitude@hdr"].apply(lambda lon: lon - 360 if lon > 180 else lon) + return t_test_results_ll + +def produce_standard_plots_all_stations(): + # Level 2.4 + # Loop over plotting [time series with climatological quantiles] + [rank histogram] for all stations. + # -->> plot_standard_plots_one_station() + print('Not implemented!') + +#def summary_rank_histograms_all_stations(): + # Level 2.5 + # Generate rank histogram summary stats -> plot p-value map and summary rank histogram. + # -->> generate_summary_rank_histogram() + # -->> plot_summary_plot() + # print('Not implemented!') + +##################### The main function. ########################### +def main(variable, timestring, experiment, sim_infile, msd_file, quant_file, station_file, obs_monmean_file, figure_output): + #variable = '2t' + #timestring = '2010' + #experiment = 'a0gg' + #sim_infile='/projappl/project_465000454/data/OBSALL/SYNOP/output/ODB_data/a0gg/simulated_synop_data_2010.odb' + #msd_file='/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/bootstrap_statistics/167_1yr/MSD_bootstrap_167_1yr_all_stations.odb' + ###### Smaller test file that contains T2m quantiles for the first 10 stations only: + #quant_file='/projappl/project_465000454/lautuppi/playground/synop_stats/test_input_files/quantiles_167_10stations.odb' + ###### The "real" T2m quantile file for all 541 SYNOP stations: + #quant_file='/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/quantiles/quantiles_167_all_stations.odb' + #monthly_means_outfile='test_output_files/monthly_means_synop.odb' # Maybe we don't need to write these anywhere after all. + ###### A place for figures: + #figure_output='/projappl/project_465000454/lautuppi/playground/synop_stats/test_figures' # Experimental test figures + ###### A more appropriate place for figures: + #figure_output='/projappl/project_465000454/data/OBSALL/SYNOP/output/figures/'+experiment + print('Loading simulated SYNOP data for statistical computations ...') + sim_data=read_synop_odb(sim_infile) + print('... Done!') + station_list = sorted(sim_data['station@hdr'].head(541)) + ##read_stations() # Do I need this or do I get what I need from the ODB file??? + #write_monthly_means(monthly_means, monthly_means_outfile) # Maybe we just make the plots and that'll be it. + print('Loading MSD statistics and quantiles ...') + msd_stats=read_MSD_stats(msd_file) + quantiles=read_quantiles(quant_file) + print('... Done!') + print('Processing rank histograms for all stations ...') + rank_histograms_all_stations,p_vals_all_stations = produce_rank_histograms_all_stations(sim_data,msd_stats,quantiles,station_list,figure_output) + print('... Done!') + print('Plot all rank histogram p-values on a map ...') + # It is possible to extract the station coordinates from the dataframes revolving around as well, but I was too lazy this time. + #station_file = '/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/station_lists/station_list_167_183_17_1990-2023_thinned.txt' + station_data = np.loadtxt(station_file, skiprows=1) + plot_p_values(station_data,p_vals_all_stations,experiment,variable,timestring,figure_output) + print('... Done!') + print('Plot summary rank histograms and p-value histograms ...') + #np.save('test_output_files/rank_histograms_all_stations.npy',rank_histograms_all_stations) + plot_summary_rank_histograms(rank_histograms_all_stations,p_vals_all_stations,experiment,variable,timestring,figure_output) + print('... Done!') + print('Computing yearly means from simulated SYNOP data ...') # Yearly means for now. A foothold for expanding to monthly means remains. + yearly_means_sim=calculate_yearly_means_sim(sim_data) + print('... Done!') + print('Reading observed monthly means and computing yearly means from them ...') + #obs_monmean_file = '/projappl/project_465000454/data/OBSALL/SYNOP/input/STATDATASYNOP/HadISD/var167/monthly_means/monthly_means_all_stations_167.odb' + obs_monmeans = read_obs_monmeans(obs_monmean_file) + obs_yearmeans = calculate_yearly_means_obs(obs_monmeans) + obs_allyearmean = calculate_allyear_mean(obs_monmeans) + coords = sim_data[["station@hdr","longitude@hdr","latitude@hdr"]].drop_duplicates() + print('... Done!') + print('Compute stationwise biases of the selected yearly means relative to the observed climatology ...') + biases = calculate_bias(yearly_means_sim, obs_allyearmean, coords) + print('... Done!') + print('Plot biases on maps ...') + plot_bias(biases, figure_output, timestring) + print('... Done!') + print('Compute t-tests for the yearly means ...') + ttest_results = calculate_ttests(yearly_means_sim, obs_yearmeans, coords) + print('... Done!') + print('Plot p-value summary histograms of the yearly mean t-tests ...') + plot_ttest_summary_histograms(ttest_results, figure_output, timestring) + print('... Done!') + print('Plot the p-values of the yearly mean t-tests on maps ...') + plot_ttest_summary_maps(ttest_results, figure_output, timestring) + print('... Done!') + print('----- All steps of SYNOP statistical monitoring completed. -----') + #ttest_all_stations() + #produce_standard_plots_all_stations() + #summary_rank_histograms_all_stations() + +if __name__=='__main__': + variable = sys.argv[1] + timestring = sys.argv[2] + experiment = sys.argv[3] + sim_infile = sys.argv[4] + msd_file = sys.argv[5] + quant_file = sys.argv[6] + station_file = sys.argv[7] + obs_monmean_file = sys.argv[8] + figure_output = sys.argv[9] + main(variable, timestring, experiment, sim_infile, msd_file, quant_file, station_file, obs_monmean_file, figure_output) diff --git a/SYNOP/synop_stats.sh b/SYNOP/synop_stats.sh deleted file mode 100755 index 34ea1a55dffa27f0904f206168b9c7996037deb6..0000000000000000000000000000000000000000 --- a/SYNOP/synop_stats.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -echo "Bash version ${BASH_VERSION}..." -######################################################################## -# Author: Alexander Mahura : 2023-08-22 -######################################################################## - -# PRE-SETUP (TESTING FOR LIMITED ODB DATASET & LIMITED GEOGRAPHICAL AREA) -# FOR AIR TEMPERATURE AT 2 METRE -# ANY OTHER METEOROLOGICAL PARAMETER (FROM SYNOP LISTED) CAN BE ADDED -# -# Producing quantile rank histogram statistics and plots -# on example of 2 m temperature (using limited open odb datasets) -# -# Met.variable, start & end year, start & end month -varMETnum=$1 -b_yy_start=$2 -b_yy_end=$3 -b_mm_start=$4 -b_mm_end=$5 - -echo " varMETnum, b_yy_start, b_yy_end, b_mm_start, b_mm_end : $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end" - -echo "*****************************************************************" -echo " STATS - PRODUCING QUANTILE RANK HISTOGRAM STATISTICS AND PLOTS" -echo " ON EXAMPLE OF METEOROLOGICAL VARIABLE: $varMETnum" -echo " AT SYNOP STATIONS" -echo "*****************************************************************" - -cd STATS -pwd -#t exit - -echo "*****************************************************************" -echo " STEP STATS-1 - PRODUCE RANK HISTOGRAMS FOR ALL SYNOP STATIONS" -echo "*****************************************************************" - -#./produce_rank_histograms_all_stations.sh 39 2020 2020 1 12 -./produce_rank_histograms_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end - -echo "*****************************************************************" -echo " STEP STATS-2 - PRODUCE STANDARD PLOTS FOR EACH SYNOP STATION" -echo "*****************************************************************" - -#./produce_standard_plots_all_stations.sh 39 2020 2020 1 12 -./produce_standard_plots_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end - -echo "*****************************************************************" -echo " STEP STATS-3 - PRODUCE SUMMARY RANK HISTOGRAMS FOR ALL STATIONS" -echo "*****************************************************************" - -#./summary_rank_histograms_all_stations.sh 39 2020 2020 1 12 -./summary_rank_histograms_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end - -echo "*****************************************************************" -echo " ALL STATS-# STEP COMPLETED" -echo "*****************************************************************" - -exit diff --git a/quick_test_as_batchjob.bash b/quick_test_as_batchjob.bash index a61bf26d2a813445b2aab95214715c479e8a965d..9c5a67782c3ab3f487114473f0e9b4e190ffe9ca 100644 --- a/quick_test_as_batchjob.bash +++ b/quick_test_as_batchjob.bash @@ -1,11 +1,11 @@ #!/bin/bash -l -#SBATCH --job-name=a0gg2050 # Job name -#SBATCH --output=eo/a0gg_2050.eo # Name of stdout output file -#SBATCH --error=eo/a0gg_2050.eo# Name of stderr error file +#SBATCH --job-name=test_demo2 # Job name +#SBATCH --output=eo/test_demo2.eo # Name of stdout output file +#SBATCH --error=eo/test_demo2.eo# Name of stderr error file #SBATCH --partition=small # Partition (queue) name small, debug #SBATCH --ntasks=1 # One task (process) #SBATCH --cpus-per-task=1 # Number of cores (threads) -#SBATCH --mem-per-cpu=8000 +#SBATCH --mem-per-cpu=90000 #SBATCH --time=72:00:00 # Run time (hh:mm:ss) #SBATCH --account=project_465000454 # Project for billing @@ -17,9 +17,9 @@ module purge #export experiment='a002' export run_type='HadISD' # [test, FMI, HadISD] export rootdir=/projappl/project_465000454/data/OBSALL -export experiment='a0gg_2050' -export sdate=2050010100 -export edate=2050123100 +export experiment='test_demo2' +export sdate=2010010100 +export edate=2011010123 export clean_scratch='true' export restart='false' export grid='0.1/0.1' # options: [1.0/1.0, 0.25/0.25, 0.1/0.1] # please add 0.05/0.05 also @@ -28,3 +28,6 @@ export timestep='0100' # GSV timestep in the form of 'hhmm' #python run_obsall.py ./run_obsall.sh + +# IMPORTANT NOTES: +# There is an ugly hack in main_synop.sh for enabling to process the next year after computing the monitoring stats with Juha's python container, that conflicts with the other environment having GSV Interface.