diff --git a/FIGSEXAMPLES/destine_obs_radsound_figs.jpg b/FIGSEXAMPLES/destine_obs_radsound_figs.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f13b27ff0be425022d6287be5fa87e8908c5e170 Binary files /dev/null and b/FIGSEXAMPLES/destine_obs_radsound_figs.jpg differ diff --git a/FIGSEXAMPLES/destine_obs_synop_figs.jpg b/FIGSEXAMPLES/destine_obs_synop_figs.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2e8c86487d1bd24b0ea77c307eb4a7e3aa5831ec Binary files /dev/null and b/FIGSEXAMPLES/destine_obs_synop_figs.jpg differ diff --git a/RADSOUND/GSVMODDATA/2t_r360x180.nc b/RADSOUND/GSVMODDATA/2t_r360x180.nc new file mode 100644 index 0000000000000000000000000000000000000000..f556b250b838b97192a98a0febc7c866ef3901f5 Binary files /dev/null and b/RADSOUND/GSVMODDATA/2t_r360x180.nc differ diff --git a/RADSOUND/GSVMODDATA/t850_r360x180.nc b/RADSOUND/GSVMODDATA/t850_r360x180.nc new file mode 100644 index 0000000000000000000000000000000000000000..c448a988044b0ccce15d7d4ce60ec46aa37f3e91 Binary files /dev/null and b/RADSOUND/GSVMODDATA/t850_r360x180.nc differ diff --git a/RADSOUND/STATRS/fortran-programs/fourier-quantiles.f95 b/RADSOUND/STATRS/fortran-programs/fourier-quantiles.f95 new file mode 100644 index 0000000000000000000000000000000000000000..a0eb2fd24425f68f765106e7035dcaa7aa1be9d5 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/fourier-quantiles.f95 @@ -0,0 +1,657 @@ + PROGRAM fourierquantiles +! +! Estimate the quantiles of the probability distribution of a variable +! at a single location as a continuous Fourier series function of the time +! of the year. +! +! The input file (INFILE) contains the time series of observations +! The output file (OUTFILE) contains the quantiles from 1% to 99%, +! for each output UTC hour separately. +! +!------------------------------------------------------- +! Jouni Räisänen, University of Helsinki, July 2023 +!------------------------------------------------------- +! +! INPUT AND OUTPUT FILES +!------------------------------------------------------- +! +! INFILE (input): text file of input data, converted from .odb format +! using an 'odb sql select' command. +! +! The first line is a header. The other lines are assumed +! to include the time separated to four parts, and the +! Actual data value of one parameter, i.e.: +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5 value +! +! OUTFILE: (output): text file giving quantiles for each calendar +! day and hour. This can be converted to .odb using +! and 'odb import' command. +! +! The first line is a header. The other lines include +! (all items separated by a space): +! +! 1) station code +! 2-3) station longitude and latitude +! 4-5) month and day +! 6-104) quantiles from 1% to 99% +! +! OUTFILE_GRADS (output, optional) = GrADS output file (for easier plotting) +! +! Notes on files: + +! 1) Station code, longitude and latitude are given as +! namelist parameters. They are not included in INFILE +! 2) Year is not written in quantile output because the +! quantiles are assumed to be independent of year +! (true in a stationary climate). +! 3) OUTFILE_GRADS is not needed if python is used for plotting +! +!------------------------------------------------------------- +! NAMELIST PARAMETERS: +!------------------------------------------------------------- +! +! NWAVES = Number of Fourier waves in fitting the annual cycle. This +! is an important choice, because there is a tradeoff between +! sampling errors (too large NWAVES) and systematic errors +! (too small NWAVES). NWAVES = 4 is a first suggestion +! for 2m temperature but the optimum will depend on length +! of data record, variable etc. +! +! INFILE = input file with station time series (see above) +! OUTFILE = output file with quantiles (see above) +! OUTFILE_GRADS = output file in GrADS binary format (see above) +! GRADS_OUT : If .true., output is also written in a GrADS binary file +! for easier plotting. +! +! YEAR1 = first year taken into account in calculation of quantiles +! YEAR2 = first year taken into account in calculation of quantiles +! +! Observations outside [YEAR1,YEAR2] are ignored +! +! HOUR1 = first UTC hour in output +! HOUR2 = last UTC hour in output +! HOUR_STEP = interval of UTC hours in output +! L_ROUND_HOUR : if .true., hour rounded to the nearest output hour +! (otherwise, data for non-matching hours are ignored) +! +! L_CODE_IN_CHAR: if .true., the station code is assumed to be +! a 3-character string (as for GRUAN soundings). +! Otherwise, station code as integer is assumed. +! STATION_INT = station code in integer +! STATION_CHAR = station code as a 3-character string (GRUAN SOUNDINGS) +! LONGITUDE = station longitude +! LATITUDE = station latitude +! +! In practice, the station code and coordinates need to be found +! beforehand from a .odb file. +! +! MIN_N_OBS = minimum total number of valid observations required +! for calculation of quantiles (separataly for each UTC hour) +! Default: 100 +! +! MISS = missing value code (in input and output). Default: -9.99e6. +! all observations with abs(f) >= abs(miss) are treated as missing. +! +! RRMAX = maximum number of iterations in solving fourier component amplitudes +! (default: 10000) +!--------------------------------------------------------------- + IMPLICIT NONE + INTEGER :: I,J,K,Q,R,D ! loop variables + INTEGER :: IMIN ! index with minimum penalty function in iteration + INTEGER :: RRMAX ! maximum number of iterations + INTEGER :: NDAYS ! number of days from beginning of YEAR1 to end of YEAR2 + INTEGER :: NWAVES ! number of Fourier components + INTEGER :: KK ! 2 x NWAWES + 1 = number of amplitudes to be solved + INTEGER,PARAMETER :: KMAX=50 ! maximum number of Fourier components + INTEGER,PARAMETER :: NDMAX=20000 ! maximum number of days in timeseries + INTEGER,PARAMETER :: NDIV=4 ! this is used in the iteration procedure + REAL,PARAMETER :: YRLEN=365.25 ! length of year in days + DOUBLE PRECISION :: A(2*KMAX+1),AA(2*KMAX+1) ! Fourier amplitudes + DOUBLE PRECISION :: PII ! 3.14159265.. + DOUBLE PRECISION :: FOUR(NDMAX,2*KMAX+1) ! daily values of Fourier components + DOUBLE PRECISION :: SFDER(2*KMAX+1) ! penalty function derivative + DOUBLE PRECISION :: F(NDMAX) ! daily values of variable for one UTC hour + DOUBLE PRECISION :: FF(NDMAX) ! a copy of the previous + REAL :: F_min,F_max ! minimum and maximum of F + INTEGER,PARAMETER :: NHOUR=24 ! Number of hours in days + REAL :: VALUES(NDMAX,nhour) ! Daily values of variable for all 24 UTC hours + INTEGER :: n_obs(nhour) ! number of observations available for each 24 UTC hours + INTEGER :: min_n_obs ! minimum number of observations required for calculating the quantiles + REAL :: F1 ! a single data value read from INFILE + DOUBLE PRECISION :: STEP0,STEP,MINSTEP,NORM ! iteration-related stuff + DOUBLE PRECISION :: SF,SF2,SFMIN,SFMINPREV ! iteration-related stuff + REAL :: MISS ! missing value code + CHARACTER*160 :: INFILE,OUTFILE ! input and output file + CHARACTER*160 :: OUTFILE_GRADS ! grads output just for testing + INTEGER, PARAMETER :: L_dataline=1700 ! max length of input or output text line + character*1700 :: dataline,headerline,emptyline ! strings for output + character*2 :: number_of_quantile ! from 01 to 99 + character*16 :: quantile_value ! for adding a quantile value to dataline + INTEGER, PARAMETER :: NQUANT=99 ! number of quantiles (from 1% to 99%) + REAL :: FR(365) ! daily values of a quantile + REAL :: GR(365) ! copy of the previou + REAL :: FFR(NQUANT,365) ! all quantiles collected to one variable + REAL :: QUANT(NQUANT) ! quantiles to be calculated (0.01 ... 0.99) + INTEGER :: yyear,mmonth,dday,hhour !year, month, day and hour as read from file + INTEGER :: year,month,day,hour ! year, month, day and hour after +! eventual rounding of hours + INTEGER :: year1,year2 ! first and last year in analysis + INTEGER :: day_number ! day number since beginning of year 1 + INTEGER :: station_int ! station code integer (for synop stations and RHARM soundings) + CHARACTER*3 :: station_char ! station with characters (for GRUAN soundings) + LOGICAL :: l_code_in_char ! .true. for station code in characters (GRUAN soundings) + REAL ::longitude,latitude ! station coordinates + LOGICAL ::GRADS_OUT ! Output also as GrADS binaries + INTEGER :: IREC ! record number in GrADS output + + INTEGER :: HOUR1,HOUR2,HOUR_STEP ! Hours included in output + INTEGER :: N_OUTPUT_HOURS ! Number of hours included in output + INTEGER :: HOUR_INDEX ! counter for output hours (only needed for GrADS) + LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour + +!------------------------------------------------------------------------------- + + NAMELIST/param/NWAVES,INFILE,OUTFILE,& + MISS,YEAR1,YEAR2,& + STATION_INT,STATION_CHAR,LONGITUDE,LATITUDE,GRADS_OUT,OUTFILE_GRADS,& + L_CODE_IN_CHAR,HOUR1,HOUR2,HOUR_STEP,L_ROUND_HOUR,& + MIN_N_OBS,RRMAX + + PII=4.*ATAN(1.) + MISS=-9.99e6 + RRMAX=10000 + min_n_obs=100 + READ(*,NML=PARAM) + + KK = 2*NWAVES+1 + ndays=day_number(year2,12,31,year1) +! +! DAILY VALUES OF EACH COSINE AND SINE FOURIER COMPONENT +! + DO D=1,NDAYS + FOUR(D,1)=1. ! this is the constant component + DO K=1,KMAX + FOUR(D,2*K)=SIN(2.*PII/YRLEN*K*D) + FOUR(D,2*K+1)=COS(2.*PII/YRLEN*K*D) + ENDDO + ENDDO +! +! Quantiles from 1% to 99% +! + do i=1,nquant + quant(i)=i/(nquant+1.) + enddo +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +!------------------------------------------------------- +! Read the contents of the input file to array F +!------------------------------------------------------ + values=miss + open(unit=1,form='formatted',status='old',file=infile) + read(1,*) ! This is the header line + do while(.true.) + read(1,*,err=1,end=1)yyear,mmonth,dday,hhour,f1 + ! find the number of day since 31 Dec Year1-1 + if(l_round_hour)then + call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step,nhour) + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif + + d=day_number(year,month,day,year1) + ! insert the read data value to its place in table 'values' + values(d,hour+1)=f1 + enddo +1 continue + +! Calculate the number of non-missing observations for each UTC hour +! + n_obs=0 + do hour=hour1,hour2,hour_step + do d=1,ndays + if(abs(values(d,hour+1)).lt.abs(miss))n_obs(hour+1)=n_obs(hour+1)+1 + enddo + enddo +! +!-------------------------------------------------------------------- +! +! Open the output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile) + + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real month@hdr:integer day@hdr:integer hour@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real month@hdr:integer day@hdr:integer hour@hdr:integer' + endif + + do j=1,nquant + write(number_of_quantile,'(i2.2)')j + headerline=trim(headerline)//' q'//number_of_quantile//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Output file for GrADS data, for visualization only? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& + recl=4,status='unknown') + endif + +!***************************************************************************** +! Main loop: calculate the quantiles separately for each UTC hour +! + do hour=hour1,hour2,hour_step + + if(n_obs(hour+1).ge.min_n_obs)then ! enough of data to calculate the quantiles +!---------------------------------------------------------------------- + + do d=1,ndays + f(d)=values(d,hour+1) + enddo +! +! Find the minimum and maximum of f(d) to determine +! the initial step in iteration + + f_min=abs(miss) + f_max=-abs(miss) + do d=1,ndays + if(f(d).gt.f_max.and.abs(f(d)).lt.abs(miss))f_max=f(d) + if(f(d).lt.f_min.and.abs(f(d)).lt.abs(miss))f_min=f(d) + enddo + step0=(f_max-f_min)/50. ! initial step in iteration (arbitrary choice) + minstep=step0/2000. ! minimum step in iteration + +!-------------------------------------------------------------------------------- +! LOOP OVER QUANTILES BEGINS +! + DO Q=1,NQUANT + step=step0 +! +! Initial guesses for the Fourier component amplitudes +! + A(1)=(f_min+f_max)/2. + DO K=2,KK + A(K)=0. + ENDDO +! + SFMINPREV=9.99e8 ! previous minimum of the penalty function + SFMIN=SFMINPREV/2. ! arbitrary large value but smaller than SFMINPREV +! +!--------------------------------------------------------------------- +! +! Iterative solution of Fourier component amplitudes +! + R=0 +! +! Continue iteration as far as the penalty function decreases, +! but no more than RRMAX rounds +! + do while (R.lt.RRMAX.and.SFMIN.le.SFMINPREV) + R=R+1 +! +! Find out the penalty function and the direction of iteration +! + CALL PENALTY(F,FF,FOUR,KK,A,NDAYS,NDMAX,SF,SFDER,MISS,QUANT(Q),.true.) +! +! Amplitude of the penalty function derivative +! + NORM=0. + DO K=1,KK + NORM=NORM+SFDER(K)**2. + ENDDO + NORM=SQRT(NORM) +! +! Iteration. To find out a suitable down-gradient distance, +! search with intervals STEP/NDIV up to the distance STEP + + SFMIN=SF + IMIN=0 + DO I=1,NDIV + DO K=1,KK + AA(K)=A(K)-STEP*(SFDER(K)/NORM)*REAL(I)/NDIV + ENDDO + CALL PENALTY(F,FF,FOUR,KK,AA,NDAYS,NDMAX,SF2,SFDER,MISS,QUANT(Q),.false.) + IF(SF2.LE.SFMIN)THEN + SFMIN=SF2 + IMIN=I + ENDIF + ENDDO +! +! Update the iteration step +! + IF(IMIN.EQ.0)THEN + STEP=STEP/(2.*NDIV) + ELSE + STEP=(IMIN*STEP)/(1.*NDIV) + ENDIF + if(step.lt.minstep)step=minstep +! +! Update Fourier component amplitudes +! + DO K=1,KK + A(K)=A(K)-STEP*(SFDER(K)/NORM) + ENDDO + +! WRITE(*,*)'R,SFMIN,STEP',R,SFMIN,STEP + + IF(SFMIN.LT.SFMINPREV)then + SFMINPREV=SFMIN + ENDIF + + ENDDO ! END of the iteration loop +!-------------------------------------------------------------------------- +! Iteration ready. Calculate the quantile estimate for each day +! of the year from the last amplitudes (a(k)) of the Fourier components. +! + do d=1,365 + ffr(q,d)=0. + do k=1,kk + ffr(q,d)=ffr(q,d)+a(k)*four(d,k) + enddo + enddo + + ENDDO ! End of quantile loop (Q) +!--------------------------------------------------------------------------- +! +! In rare cases (some tens of a per cent), the calculated quantile +! values are not in ascending order. This is an artefact of the +! Fourier fitting. To avoid this, the quantiles are forcibly +! arranged to ascending order even if they were not. + + do d=1,365 + do q=1,nquant + fr(q)=ffr(q,d) + enddo + call order(fr,gr,nquant) + do q=1,nquant + ffr(q,d)=gr(q) + enddo + enddo +! +!--------------------------------------------------------------------- +! + else ! quantiles not calculated because there was too little data + ffr=miss + endif +! +!---------------------------------------------------------------------- +! +! Write the quantiles to file. +! + do d=1,365 + dataline=emptyline + call month_and_day_from_day_number(d,month,day) + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,1X,I2,1X,I2)')station_char,longitude,latitude,month,day,hour + else + write(dataline,'(I7,2F16.6,1X,I2,1X,I2,1X,I2)')station_int,longitude,latitude,month,day,hour + endif + + do q=1,nquant + write(quantile_value,'(F16.6)')ffr(q,d) + dataline=trim(dataline)//quantile_value + enddo + write(2,*)trim(dataline) + if(month.eq.2.and.day.eq.28)then ! copy Feb 28 values for Feb 29 + dataline=emptyline + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,1X,I2,1X,I2)')station_char,longitude,latitude,month,day,hour + else + write(dataline,'(I7,2F16.6,1X,I2,1X,I2,1X,I2)')station_int,longitude,latitude,month,day,hour + endif + do q=1,nquant + write(quantile_value,'(F16.6)')ffr(q,d) + dataline=trim(dataline)//quantile_value + enddo + write(2,*)trim(dataline) + endif + if(grads_out)then ! February 29 skipped in GrADS output, although included in the text file + ! that will be converted to ODB. + n_output_hours=(hour2-hour1)/hour_step+1 + do q=1,nquant + hour_index=(hour-hour1)/hour_step + irec=1+hour_index+n_output_hours*(q-1)+(d-1)*n_output_hours*nquant + write(11,rec=irec)ffr(q,d) + enddo + endif + enddo ! end of day lopp + + ENDDO ! end of hour loop + +!----------------------------------------------------------------- + + close(2) + if(grads_out)close(11) + + END program fourierquantiles +! +!----------------------------------------------------------------- +! + SUBROUTINE PENALTY & + (F,FF,FOUR,NFOUR,A,NDAYS,NDMAX,SF,SFDER,MISS,QUANT,LDER) +! +! Calculation of the penalty function and its derivatives +! wrt of the amplitude of each Fourier component +! +! The penalty function is defined as the sum of daily penalties +! +! If the value from the fourier series (FF) exceeds the actual data value (F), +! the daily penalty is +! +! (1-QUANT)*(FF-F) +! +! where QUANT is the quantile estimated (scale 0-1). In the opposite case (F > FF) +! the daily penalty is +! +! QUANT*(F-FF) + + IMPLICIT NONE + + integer :: nfour ! IN: number of fourier components used (cosine + sine) + integer :: ndays,ndmax ! IN: actual and maximum number of days in time series + real :: miss ! IN: missing value code + double precision :: f(ndays) ! IN: values of input for each day + double precision :: ff(ndays)! OUT: values calculated from fourier series + double precision :: four(ndmax,nfour)! IN: values of fourier components in each day + double precision :: sf,sfder(nfour) ! OUT: penalty function and its derivatives wrt. fourier amplitudes + double precision :: A(nfour) ! IN: previous amplitudes of fourier components + real :: quant ! IN: quantile to be estimated (scale 0-1) + logical :: lder ! If .true., the derivative of the penalty function is also calculate + integer :: i,k +! +! Initialize the penalty function and its derivatives to zero +! + sf=0. + if(lder)sfder=0. +! +! Sum the penalty function and its derivatives over all days in time series +! + do i=1,ndays + if(abs(f(i)).lt.abs(miss))then + +! Calculate the Fourier series estimate from the previous amplitudes + + ff(i)=0. + do k=1,nfour + ff(i)=ff(i)+a(k)*four(i,k) + enddo +! +! Add the daily contribution to the penalty function and its derivatives +! + if(f(i).lt.ff(i))then + sf=sf+(1.-quant)*(ff(i)-f(i)) + if(lder)then + do k=1,nfour + sfder(k)=sfder(k)+(1.-quant)*four(i,k) + enddo + endif + endif + if(f(i).gt.ff(i))then + sf=sf+quant*(f(i)-ff(i)) + if(lder)then + do k=1,nfour + sfder(k)=sfder(k)-quant*four(i,k) + enddo + endif + endif + + endif ! abs(f(i).lt.abs(miss)) + enddo + + RETURN + END subroutine penalty +! +!--------------------------------------------------------------------------------- +! + INTEGER FUNCTION DAY_NUMBER(year,month,day,year1) +! +! Day number of "year,month,day", where "year1,1,1" = 1. +! + implicit none + integer :: year,month,day,year1 + integer,parameter :: nmon=12 + integer :: days_before(nmon) ! this is for normal years. Leap days added separately + integer :: dnumber,n_leap_days,yr,yr_end + data days_before / 0,31,59,90,120,151,181,212,243,273,304,334 / + dnumber=365*(year-year1)+days_before(month)+day +! +! Calculate and add the number of leap days since beginning of year1 +! + n_leap_days=0 + yr_end=year + if(month.le.2)yr_end=year-1 + do yr=year1,yr_end + if(mod(yr,4).eq.0.and.((mod(yr,400).eq.0).or.(mod(yr,100).gt.0)))then + n_leap_days=n_leap_days+1 + endif + enddo + day_number=dnumber+n_leap_days + return + end function DAY_NUMBER +! +!------------------------------------------------------------------------- +! + subroutine month_and_day_from_day_number(day_number,month,day) +! +! Convert day_number from beginning of the year (IN) to month and day (OUT) +! assuming a normal (non-leap) year +! + implicit none + integer :: day_number,month,day + integer,parameter :: nmon=12 + integer :: days_before(nmon+1) ! this is for normal years only + integer m + data days_before /0,31,59,90,120,151,181,212,243,273,304,334,365/ + do m=1,12 + if(day_number.gt.days_before(m)& + .and.day_number.le.(days_before(m+1)))then + month=m + day=day_number-days_before(m) + exit + endif + enddo + return + end subroutine month_and_day_from_day_number +! +!---------------------------------------------------------------------- +! + subroutine order(f,g,n) +! +! Ordering the values of f(1...n) in ascending order. Result in g +! Simple exachange ordering (inefficient for large n!) +! + implicit none + integer :: i,j,n + real :: f(n),g(n),g1 + g=f + do i=1,n-1 + do j=i+1,n + if(g(j).lt.g(i))then + g1=g(i) + g(i)=g(j) + g(j)=g1 + endif + enddo + enddo + + return + end subroutine order +! +!-------------------------------------------------------------------! +! +subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step,nhour) +! +! Rounding of time to the nearest hour in (hour1,hour1+hour_step,...,hour2) +! +! time in input: yyear,mmonth,dday,hhour +! time in output: year,month,day,hour + +implicit none +integer :: year,month,day,hour +integer :: yyear,mmonth,dday,hhour +integer :: hour1,hour2,hour_step +integer :: ndays_month(12) +integer :: hour_index +integer :: nhour +ndays_month=31 +ndays_month(4)=30 +ndays_month(6)=30 +ndays_month(9)=30 +ndays_month(11)=30 +ndays_month(2)=28 +if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then + ndays_month(2)=29 +endif +year=yyear +month=mmonth +day=dday +! +! Round the hour to the nearest output hour +! +hour_index=nint(real(hhour-hour1)/hour_step) +hour=hour1+hour_step*hour_index + if(hour.ge.nhour)then ! hhour was rounded forward to next day + hour=hour1 + day=dday+1 + if(day.gt.ndays_month(month))then + day=1 + month=month+1 + if(month.gt.12)then + month=1 + year=yyear+1 + endif + endif + endif + if(hour.lt.0)then ! hhour was rounded backward to previous day + hour=hour2 + day=dday-1 + if(day.eq.0)then + month=month-1 + if(month.eq.0)then + month=12 + year=yyear-1 + endif + day=ndays_month(month) + endif + endif + +return +end subroutine round_hour + + diff --git a/RADSOUND/STATRS/fortran-programs/monthly_means_one_station.f95 b/RADSOUND/STATRS/fortran-programs/monthly_means_one_station.f95 new file mode 100644 index 0000000000000000000000000000000000000000..eea2adc67403501c7038711a8f3092e90af81284 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/monthly_means_one_station.f95 @@ -0,0 +1,268 @@ +program monthly_means_one_station + ! + ! Calculation of monthly mean values of one variable for one station + ! + ! The monthly means are calculated from daily data for each UTC hour in + ! (hour1, hour1+hour_step, ..., hour2) separately + ! + ! If the number of observations is less than min_n_obs, the monthly means are + ! set as missing + ! + !------------------------------------------------ + ! Jouni Räisänen, July 2023 + !-------------------------------------------------- + ! + ! INPUT FILE: infile + ! + ! This is a text file produced by an odb sql select command + ! + ! The first line is a header. The remaining lines contain + ! + ! 1. station code + ! 2. year + ! 3. month + ! 4. day + ! 5. hour + ! 6. variable code + ! 7. data value + ! + ! OUTPUT FILE: outfile + ! + ! This is a text file that can be converted to .odb + ! + ! The first line is a header. The remaining lines contain + ! + ! 1. station code + ! 2. longitude + ! 3. latitude + ! 4. year + ! 5. month + ! 6. hour + ! 7. variable code + ! 8. number of observations + ! 9. data value +!--------------------------------------------------------- + ! + ! Notes on the output file: + ! + ! The longitude and latitude are given as namelist values + ! + ! station code may be either a 3-character string or an integer + ! (depending on the namelist parameter l_code_in_char + ! +!------------------------------------------------------------- + ! + ! namelist parameters: + ! + ! + ! infile = input file (see above) + ! outfile = output file (see above) + ! longitude = station longitude + ! latitude = station latitude + ! hour1 = first UTC hour for which monthly means are calculated + ! hour2 = last UTC hour --------------------------------------- + ! hour_step = interval of UTC hours --------------------------- + ! l_round_hour : if .true., hours are rounded to the nearest + ! output UTC hour (otherwise, non-match hours are ignored) + ! l_code_in_char : if .true., the station code is read and write as a 3-char string + ! Otherwise, it is assumed to be an integer. + ! min_n_obs = minimum number of observations required for calculating a monthly mean + ! (detault: 10) + ! miss = missing value code (default: -9.99e6). All observations f with + ! abs(f) >= abs(miss) are treated as missing + ! +!---------------------------------------------------------- + + implicit none + character*160 :: infile,outfile + character*200 :: headerline,dataline + integer,parameter :: nhour=24 ! number of UTC hours per day + integer,parameter :: ndays_max=31 ! maximum number of days per month + real :: f(nhour,ndays_max),fmean(nhour) ! individual observation and monthly means + integer :: nobs(nhour) ! number of observations per day + real :: miss ! missing value code + integer :: variable ! variable code + integer :: yyear,mmonth,dday,hhour ! time as read from infile + integer :: year,month,day,hour ! time after eventual rounding of hours + integer :: hour1,hour2,hour_step ! UTC hours used + logical :: l_round_hour ! rounding of hours to nearest output hour? + integer :: station_int ! station code as integer (synop stations & RHARM) + character*3 :: station_char ! station code as string (GRUAN soundings) + logical :: l_code_in_char ! choice of the type of station code + real :: longitude,latitude ! station coordinates (given in namelist) + integer :: prev_year,prev_month + real :: f1 ! individual data value read from file + integer :: i + integer :: min_n_obs ! minimum number of observations required per month + + namelist/param/infile,outfile,longitude,latitude,& + hour1,hour2,hour_step,l_round_hour,l_code_in_char,min_n_obs,miss + min_n_obs=10 + miss=-9.99e6 + read (*,nml=param) + ! + ! Open the input and output files + ! + open(unit=1,file=trim(infile),status='old',form='formatted') + open(unit=2,file=trim(outfile),form='formatted') + ! + ! Write the header to the output file + ! + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real year@hdr:integer'//& + ' month@hdr:integer hour@hdr:integer variable@hdr:integer n_data@body:integer'//& + ' value@body:real' +else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer'//& + ' month@hdr:integer hour@hdr:integer variable@hdr:integer n_data@body:integer'//& + ' value@body:real' +endif + + write(2,*)trim(headerline) +! +!-------------------------------------------------------------------------------------------- +! + nobs=0 + fmean=0 + prev_year=0 + prev_month=0 + read(1,*) ! The first line in the input file is a header +! +! Reading the input file + writing the monthly means when a new month starts +! + do while (.true.) + if(l_code_in_char)then +1 read(1,*,err=1,end=3)station_char,yyear,mmonth,dday,hhour,variable,f1 + else +2 read(1,*,err=2,end=3)station_int,yyear,mmonth,dday,hhour,variable,f1 + endif +! +! Rounding of hours? +! + if(l_round_hour)then + call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step,nhour) + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif +! +! Calculation of monthly means when a new month starts +! + if((month.gt.prev_month).or.(year.gt.prev_year))then + if(prev_month.gt.0)then + do i=hour1,hour2,hour_step + if(nobs(i+1).gt.min_n_obs)then + fmean(i+1)=fmean(i+1)/nobs(i+1) + else + fmean(i+1)=miss + endif + if(l_code_in_char)then + write(2,'(A3,2F16.6,1X,I4,1X,I2,1X,I2,1X,I3,1X,I4,F16.6)')& + station_char,longitude,latitude,prev_year,prev_month,i,variable,nobs(i+1),fmean(i+1) + else + write(2,'(I6,2F16.6,1X,I4,1X,I2,1X,I2,1X,I3,1X,I4,F16.6)')& + station_int,longitude,latitude,prev_year,prev_month,i,variable,nobs(i+1),fmean(i+1) + endif + enddo + endif + prev_year=year + prev_month=month + nobs=0 + fmean=0. + endif +! +! Updating the counters for sum and number of observations +! + if(f1.gt.miss)then + nobs(hour+1)=nobs(hour+1)+1 + fmean(hour+1)=fmean(hour+1)+f1 + endif + enddo + +3 continue ! The program ends up here when the whole file has been read + ! + ! Write the data for the last month + ! + do i=hour1,hour2,hour_step + if(nobs(i+1).gt.0)then + fmean(i+1)=fmean(i+1)/nobs(i+1) + else + fmean(i+1)=miss + endif + if(l_code_in_char)then + write(2,'(A3,2F16.6,1X,I4,1X,I2,1X,I2,1X,I3,1X,I4,F16.6)')& + station_char,longitude,latitude,prev_year,prev_month,i,variable,nobs(i+1),fmean(i+1) + else + write(2,'(I6,2F16.6,1X,I4,1X,I2,1X,I2,1X,I3,1X,I4,F16.6)')& + station_int,longitude,latitude,prev_year,prev_month,i,variable,nobs(i+1),fmean(i+1) + endif + enddo + close (1) + close (2) + + end program monthly_means_one_station + +!-------------------------------------------------------------------! +! +subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step,nhour) +! +! Rounding of time to the nearest hour in (hour1,hour1+hour_step,...,hour2) +! +! time in input: yyear,mmonth,dday,hhour +! time in output: year,month,day + +implicit none +integer :: year,month,day,hour +integer :: yyear,mmonth,dday,hhour +integer :: hour1,hour2,hour_step +integer :: ndays_month(12) +integer :: hour_index +integer :: nhour +ndays_month=31 +ndays_month(4)=30 +ndays_month(6)=30 +ndays_month(9)=30 +ndays_month(11)=30 +ndays_month(2)=28 +if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then + ndays_month(2)=29 +endif +year=yyear +month=mmonth +day=dday +! +! Round the hour to the nearest output hour +! +hour_index=nint(real(hhour-hour1)/hour_step) +hour=hour1+hour_step*hour_index + if(hour.ge.nhour)then ! hhour was rounded forward to next day + hour=hour1 + day=dday+1 + if(day.gt.ndays_month(month))then + day=1 + month=month+1 + if(month.gt.12)then + month=1 + year=yyear+1 + endif + endif + endif + if(hour.lt.0)then ! hhour was rounded backward to previous day + hour=hour2 + day=dday-1 + if(day.eq.0)then + month=month-1 + if(month.eq.0)then + month=12 + year=yyear-1 + endif + day=ndays_month(month) + endif + endif + +return +end subroutine round_hour diff --git a/RADSOUND/STATRS/fortran-programs/plots_for_one_station.f95 b/RADSOUND/STATRS/fortran-programs/plots_for_one_station.f95 new file mode 100644 index 0000000000000000000000000000000000000000..b74cb22e5ba26d8a8466fe831b7f9dda84a6cc8d --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/plots_for_one_station.f95 @@ -0,0 +1,613 @@ +program plots_for_one_station + ! + ! For producing files that can be used to generate "one-station standard plots" + ! in python and in GrADS. However, these plots need to be generated separately, + ! i.e., python or GrADS is not called directly from this program. + ! + ! The following plots can be produced, depending on the details of the + ! python or GrADS script: + ! + ! 1) The time series of simulated data at one station location, + ! plotted against the background of the observed quantiles. + ! Separate plots for 00, 06, 12 and 18 UTC. The plots are + ! organized so that all the simulation data are projected + ! on a single annual cycle (normal year, leap days excluded) + ! regardless of the length of the simulation. + ! + ! 2) A quantile rank histogram, showing the normalized frequencies + ! for 100 percentile bins from 0-1% to 99-100%. The Mean + ! Square Deviation and its p-value are written as text in this plot. + ! + ! NB hard-coding: the program currently gives output only for 00, 06, 12 and 18 UTC. + ! + ! ---------------------------------------------------------------------- + ! Jouni Räisänen, University of Helsinki, August 2023 + !----------------------------------------------------------------------- + ! + ! INPUT FILES (all as text files retrieved from .odb format): + ! ------------------------------------------------------------ + ! + ! sim_file = simulated data from the station location of interest + ! + ! Each line of this file includes: + ! + ! 1) year + ! 2) month + ! 3) day + ! 4) hour + ! 5) data value + ! + ! quantile_file = observed quantiles for this station + ! + ! Each line of this file includes: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) month + ! 5) day + ! 6) hour + ! 7-105) quantiles from 1% to 99% + ! + ! rank_histogram_file = frequencies and MSD and P-values for plotting the rank histogram + ! + ! Each line of this file includes: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) UTC hour + ! 5) total number of observation for the UTC hour + ! 6) Mean squared deviation (MSD) of the quantile bin rank histogram wrt a flat histogram + ! 7) p-value of the MSD value, relative to the bootstrap sampling distribution + ! 8-107) frequencies for quantile bins 0-1%, 1-2%, ... ,98-99%, 99-100% + ! + ! ---------------------------------------------------------------------------------------- + ! + ! OUTPUT FILES in ODB compatible text format (for python) + ! + ! txt_data_time_series: individual data values from sim_file + ! + ! The first line is a header. The other lines include: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) year + ! 5) day of year (counted from Jan 1) + ! 6) UTC hour + ! 7) data value + ! + ! txt_data_quantiles: selected quantiles from the oberved distribution as a function of UTC hour and time of year + ! + ! The first line is a header. The other lines include: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) day of year (counted from Jan 1) + ! 5) UTC hour + ! 6-12) quantiles 1, 10, 25, 50, 75, 90 and 99% + ! + ! txt_data_rank_histogram = quantile bin frequencies (separately for each UTC hour) + ! + ! The first line is a header. The other lines include: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) UTC hour + ! 5) Mean squared deviation (MSD) of the quantile bin rank histogram wrt a flat histogram + ! 6) p-value of the MSD value, relative to the bootstrap sampling distribution + ! 7-106) frequencies for quantile bins 0-1%, 1-2%, ... ,98-99%, 99-100% + ! + ! ---------------------------------------------------------------------------------------- + ! + ! OUTPUT FILES in GrADS binary format (for GrADS) + ! + ! grads_data_time_series = individual data values from sim_file + ! + values of selected quantiles as a fuction + ! of UTC hour and time of the year. + ! + ! grads_data_rank_histogram = quantile bin frequencies (separately for each UTC hour) + ! + ! + ! In addition, the program writes a few auxilliary texts files with hard-coded names + ! for use in GrADS scripts: + ! + ! "vrange_commands" : ranges needed on the y axis of plots + ! "time_series_commands" : for plotting different years in time series with different colors + ! "coordinates" : for including coordinate values in GrADS plot + ! "msd_and_p-value_00" : for including 00 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value_06" : for including 06 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value_12" : for including 12 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value_18" : for including 18 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value" : for including all-UTC MSD and p-value in GrADS plot + ! + !------------------------------------------------------------------------------------------ + ! + ! NAMELIST PARAMETERS: + ! + ! a) input files (see above) + ! + ! sim_file + ! quantile_file + ! rank_histogram_file + ! + ! b) output files in ODB compatible text format (see above) + ! + ! txt_data_time_series + ! txt_data_rank_histogram + ! txt_data_quantiles + ! + ! c) output files in GrADS binary format + ! + ! grads_data_time_series + ! grads_data_rank_histogram + ! + ! d) other + ! + ! year1,month1 = beginning of the period in time series output + ! year2,month2 = end of the period in time series output + ! include_rank_histograms: if .true. (.false.), rank histogram data is included (excluded) in output + ! l_code_in_char : if .true., (.false.) station codes are assumed to be 3-character strings (integers) + ! l_round_6h : if .true., UTC hours are rounded to the closesest even 6-h (00, 06, 12 or 18 UTC). + ! if .false., data for non-matching UTC hours are excluded from output + ! + ! miss = missing value code. Default = -9.99e6. All data f with |f| >= |miss| are treated as missing + ! + !------------------------------------------------------------------------------------------- + ! + implicit none + character*160 :: sim_file ! see above + character*160 :: quantile_file ! see above + character*160 :: rank_histogram_file ! see above + character*160 :: grads_data_time_series ! see above + character*160 :: grads_data_rank_histogram ! see above + character*160 :: txt_data_time_series ! see above + character*160 :: txt_data_quantiles ! see above + character*160 :: txt_data_rank_histogram ! see above + integer :: year1,year2 ! first and last year of simulation + integer :: month1,month2 ! first and last month of simulation + integer :: nyear ! number of years = year2-year1+1 + ! + ! Data structures for input data + ! + integer,parameter :: nhour=4 ! only 00, 06, 12 and 18 UTC used + integer,parameter :: nmax_years=200 ! maximum length of a simulation in years + integer,parameter :: ndays_year=365 ! number of days in a non-leap year + + real :: f(nmax_years,ndays_year,nhour) ! the simulated values + real :: f1 ! a single simulated value + ! + integer,parameter :: nquant=99 ! number of quantiles + real :: quant(nquant,ndays_year,nhour) ! quantile values from quantile_file + real :: quant1(nquant) ! quantile values for a single day and UTC hour + ! + real :: freq(nquant+1,nhour+1) ! quantile bin frequencies from the rank histogram file + real :: freq1(nquant+1) ! frequencies for a single UTC hour + real :: msd(nhour+1) ! MSD value from the rank histogram file + real :: msd1 ! a single MSD value + real :: p_value(nhour+1) ! p-values from the rank histogram files + real :: p_value1 ! a single p-value + + integer :: yyear,mmonth,dday,hhour ! time as read from sim_file + integer :: year,month,day,hour ! time after eventual rounding to nearest 6-h + character*3 :: station_char ! station code as 3-char string + integer :: station_int ! station code as integer + logical :: l_code_in_char ! .true. for station codes as string + logical :: l_round_6h ! .true. for rounding time to nearest 6-h + real :: ntot ! total number of observations as read from rank_histogram_file + real :: lon,lat ! longitude and latitude + integer :: i,j,k ! loop variables + integer :: day_of_year ! day count from Jan 1 (in a non-leap year) + + real :: miss ! missing value code + + real :: fmin(nhour),fmax(nhour) ! minimum and maximum values for time series plots + real :: fmin_down(nhour),fmax_up(nhour) ! same, but extended outward + real :: freqmax(nhour+1) ! largest frequency for the rank histogram + + integer,parameter :: n_colors=13 ! number of colours for GrADS output + integer :: color(n_colors) ! colours for GrADS output + integer :: num_int_arr ! fix for Warning : Legacy Extension: REAL array index at (1) + + integer :: irec ! record number for GrADS output +! +! For text output in odb-compatible format: +! + INTEGER, PARAMETER :: L_dataline=1700 ! max length of input or output text line + character*1700 :: dataline,headerline,emptyline ! for reading / writing text files + character*2 :: number_of_bin ! quantile bin numbers for header in txt_data_quantiles + character*16 :: frequency_value ! frequency value written as character string +! + logical :: include_rank_histograms ! true for including rank histograms + + namelist/param/sim_file,quantile_file,rank_histogram_file,& + grads_data_time_series,grads_data_rank_histogram,& + year1,year2,month1,month2,& + txt_data_time_series,txt_data_rank_histogram,txt_data_quantiles,& + include_rank_histograms,l_code_in_char,l_round_6h,miss + + data color/9,14,4,11,5,13,3,10,7,12,8,2,6/ +! +! default values +! + miss=-9.99e6 + l_code_in_char=.false. + l_round_6h=.false. + include_rank_histograms=.true. +! +! Read the namelist and count the number of years for output +! + read(*,nml=param) + nyear=year2-year1+1 + + !************************************************************************************************ + ! Read the data from the input files. Only data for the period year1/month1-year2/month2 + ! will be included in the time series output, even if other data exist. + ! + ! 1) time series + ! + f=miss + open(unit=1,form='formatted',file=sim_file,status='old') + do while(.true.) +11 read(1,*,err=11,end=12) yyear,mmonth,dday,hhour,f1 + if(l_round_6h)then + call round_6h(yyear,mmonth,dday,hhour,year,month,day,hour) ! Rounding to nearest 6-h? + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif + if((year.gt.year1.or.(year.eq.year1.and.month.ge.month1)).and. & ! Is the time within the + (year.lt.year2.or.(year.eq.year2.and.month.le.month2))) then ! wanted range? + + if(.not.(month.eq.2.and.day.eq.29))then ! leap days are skipped from plotting + f(year-year1+1,day_of_year(month,day),1+hour/6)=f1 ! hard-coding to 6-hourly resolution + endif + endif + enddo +12 continue + close(1) + ! + ! 2) quantiles + ! + quant=miss + open(unit=1,form='formatted',file=quantile_file,status='old') + do while(.true.) + if(l_code_in_char)then +21 read(1,*,err=21,end=23)station_char,lon,lat,month,day,hour,(quant1(i),i=1,nquant) + else +22 read(1,*,err=22,end=23)station_int,lon,lat,month,day,hour,(quant1(i),i=1,nquant) + endif + if((hour.eq.0.or.hour.eq.6.or.hour.eq.12.or.hour.eq.18)& + .and.(.not.(month.eq.2.and.day.eq.29))) then ! leap days are skipped from plotting + do i=1,nquant + quant(i,day_of_year(month,day),1+hour/6)=quant1(i) + enddo + endif + enddo +23 continue + close(1) + ! + ! 3) quantile bin frequencies and related MSD and p-value statistics + ! + freq=miss + if(include_rank_histograms)then + open(unit=1,form='formatted',file=rank_histogram_file,status='old') + do while(.true.) + if(l_code_in_char)then +31 read(1,*,err=31,end=33)station_char,lon,lat,hour,ntot,msd1,p_value1,(freq1(i),i=1,nquant+1) + else +32 read(1,*,err=32,end=33)station_int,lon,lat,hour,ntot,msd1,p_value1,(freq1(i),i=1,nquant+1) + endif + if((hour.eq.0.or.hour.eq.6.or.hour.eq.12.or.hour.eq.18.or.hour.eq.24))then + msd(1+hour/6)=msd1 + p_value(1+hour/6)=p_value1 + do i=1,nquant+1 + freq(i,1+hour/6)=freq1(i) + enddo + endif + enddo +33 continue + close(1) + endif + +!********************************************************************************************** +! Find the minimum and maximum values of f (or the 1st and 99th percentile) +! for defining the vranges for the GrADS time series plots + + do j=1,nhour + fmin(j)=quant(1,1,j) + fmax(j)=quant(nquant,1,j) + do i=1,ndays_year + if(quant(1,i,j).lt.fmin(j))fmin(j)=quant(1,i,j) + if(quant(nquant,i,j).gt.fmax(j))fmax(j)=quant(nquant,i,j) + do k=1,nyear + if(f(k,i,j).lt.fmin(j).and.abs(f(k,i,j)).lt.abs(miss))fmin(j)=f(k,i,j) + if(f(k,i,j).gt.fmax(j).and.f(k,i,j).lt.abs(miss))fmax(j)=f(k,i,j) + enddo + enddo + fmin_down(j)=fmin(j)-0.05*(fmax(j)-fmin(j)) + fmax_up(j)=fmax(j)+0.05*(fmax(j)-fmin(j)) + enddo +! +! Find the largest frequency from the rank histograms +! + freqmax=0. + do j=1,nhour+1 + do i=1,nquant+1 + if(freq(i,j).gt.freqmax(j))freqmax(j)=freq(i,j) + enddo + enddo +! +! Write the commands to the auxiliary script files for Grads +! + open(unit=1,form='formatted',file='vrange_commands') + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(1),fmax_up(1) + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(2),fmax_up(2) + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(3),fmax_up(3) + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(4),fmax_up(4) + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(1) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(2) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(3) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(4) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(5) ! conversion to relative frequencies + close(1) + + open(unit=1,form='formatted',file='time_series_commands') + do i=1,nyear + write(1,'(A12)')'set cthick 4' + write(1,'(A12)')'set cstyle 0' + write(1,'(A11)')'set cmark 3' + write(1,'(A15)')'set digsiz 0.07' + if(nyear.eq.1)then + num_int_arr=nint(1.+n_colors)/2. + write(1,'(A11,I2)')'set ccolor ',color(num_int_arr) + !write(1,'(A11,I2)')'set ccolor ',color(nint(1.+n_colors)/2.) + else + num_int_arr=nint(1+(i-1.)/(nyear-1.)*(n_colors-1.)) + write(1,'(A11,I2)')'set ccolor ',color(num_int_arr) + !write(1,'(A11,I2)')'set ccolor ',color(nint(1+(i-1.)/(nyear-1.)*(n_colors-1.))) + endif + write(1,'(A7,I2,A1)')'d &0(z=',i,')' + enddo + close(1) + + open(unit=1,form='formatted',file='coordinates') + write(1,'(F8.3)')lon + write(1,'(F8.3)')lat + close(1) + + if(include_rank_histograms)then + open(unit=1,form='formatted',file='msd_and_p-value_00') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(1) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(1) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value_06') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(2) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(2) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value_12') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(3) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(3) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value_18') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(4) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(4) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(5) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(5) + close(1) + endif +! +!*************************************************************** +! Write the GrADS binary files needed for the plots + + open(unit=1,form='unformatted',file=grads_data_time_series,access='DIRECT',& + recl=4,status='unknown') + do j=1,nhour + do i=1,ndays_year + irec=((i-1)+(j-1)*ndays_year)*(nyear+7) + do k=1,nyear + write(1,rec=irec+k)f(k,i,j) ! time series + enddo + write(1,rec=irec+nyear+1)quant(1,i,j) ! selected quantile + write(1,rec=irec+nyear+2)quant(10,i,j) + write(1,rec=irec+nyear+3)quant(25,i,j) + write(1,rec=irec+nyear+4)quant(50,i,j) + write(1,rec=irec+nyear+5)quant(75,i,j) + write(1,rec=irec+nyear+6)quant(90,i,j) + write(1,rec=irec+nyear+7)quant(99,i,j) + enddo + enddo + close(1) + + if(include_rank_histograms)then + open(unit=1,form='unformatted',file=grads_data_rank_histogram,access='DIRECT',& + recl=4,status='unknown') + do j=1,nhour+1 + irec=(nquant+1)*(j-1) + do i=1,nquant+1 + write(1,rec=irec+i)freq(i,j) !quantile bin frequencies + enddo + enddo + close(1) + endif + +!*************************************************************** +! Write the time series, quantiles and rank histogram data to text files + +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +! +! Time series. 365 days per year, 4 times per day, from year1 to year2 +! + open(unit=1,form='formatted',file=txt_data_time_series) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real year@hdr:integer'//& + ' day_of_year@hdr:integer hour@hdr:integer value@body:real' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer'//& + ' day_of_year@hdr:integer hour@hdr:integer value@body:real' + endif + write(1,*)trim(headerline) + do year=year1,year2 + do j=1,365 + do hour=0,18,6 + if(abs(f(year-year1+1,j,1+hour/6)).lt.abs(miss))then + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I4,1X,I3,1X,I2,F16.6)')station_char,lon,lat,year,j,hour,f(year-year1+1,j,1+hour/6) + else + write(1,'(I6,2F16.6,1X,I4,1X,I3,1X,I2,F16.6)')station_int,lon,lat,year,j,hour,f(year-year1+1,j,1+hour/6) + endif + else + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I4,1X,I3,1X,I2,A16)')station_char,lon,lat,year,j,hour,' NaN' + else + write(1,'(I6,2F16.6,1X,I4,1X,I3,1X,I2,A16)')station_int,lon,lat,year,j,hour,' NaN' + endif + endif + enddo + enddo + enddo + close(1) +! +! Selected quantiles of the observed distribution. 365 days per year, 4 time per day. +! + open(unit=1,form='formatted',file=txt_data_quantiles) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer'//& + ' q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer'//& + ' q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real' + endif + write(1,*)trim(headerline) + do year=year1,year2 + do j=1,365 + do hour=0,18,6 + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I3,1X,I2,7F16.6)')station_char,lon,lat,j,hour,& + quant(1,j,hour/6+1),quant(10,j,hour/6+1),quant(25,j,hour/6+1),quant(50,j,hour/6+1),& + quant(75,j,hour/6+1),quant(90,j,hour/6+1),quant(99,j,hour/6+1) + else + write(1,'(I6,2F16.6,1X,I3,1X,I2,7F16.6)')station_int,lon,lat,j,hour,& + quant(1,j,hour/6+1),quant(10,j,hour/6+1),quant(25,j,hour/6+1),quant(50,j,hour/6+1),& + quant(75,j,hour/6+1),quant(90,j,hour/6+1),quant(99,j,hour/6+1) + endif + enddo + enddo + enddo + close(1) +! +! Rank histogram of frequencies. 100 bins from 0-1% to 99-100. Before that, the MSD and p-values + ! + if(include_rank_histograms)then + open(unit=1,form='formatted',file=txt_data_rank_histogram) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real hour@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer' + endif + headerline=trim(headerline)//' msd@body:real p_value@body:real' + do j=0,nquant + write(number_of_bin,'(i2.2)')j + headerline=trim(headerline)//' f'//number_of_bin//'@body:real' + enddo + write(1,*)trim(headerline) + + do hour=0,24,6 + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,2F16.6)')& + station_char,lon,lat,hour,msd(1+hour/6),p_value(1+hour/6) + else + write(dataline,'(I6,2F16.6,1X,I2,2F16.6)')& + station_int,lon,lat,hour,msd(1+hour/6),p_value(1+hour/6) + endif + + do i=1,nquant+1 + write(frequency_value,'(F16.6)')freq(i,1+hour/6) + dataline=trim(dataline)//frequency_value + enddo + write(1,*)trim(dataline) + enddo + + close(1) + endif + + end program plots_for_one_station + + integer function day_of_year(month,day) +! +! Number of day 'day' in the 'month':th month from the beginning of a non-leap year +! + implicit none + integer :: year,month,day + integer,parameter :: nmon=12 + integer :: days_before(nmon) ! this is for normal years + integer :: dnumber + data days_before / 0,31,59,90,120,151,181,212,243,273,304,334 / + + day_of_year=days_before(month)+day + return + end function day_of_year + +! +! rounding the time to the nearest full 6 hours +! +subroutine round_6h(year,month,day,hour,year1,month1,day1,hour1) +implicit none +integer :: year,month,day,hour ! time before rounding +integer :: year1,month1,day1,hour1 ! time after rounding +integer :: ndays_month ! number of days in a month +ndays_month=31 +if(month.eq.4.or.month.eq.6.or.month.eq.9.or.month.eq.11)ndays_month=30 +if(month.eq.2)then + if((mod(year,4).eq.0.and.mod(year,100).gt.0).or.(mod(year,400).eq.0))then + ndays_month=29 + else + ndays_month=28 + endif +endif +year1=year +month1=month +day1=day +hour1=6*((hour+3)/6) +if(hour.ge.21)then ! Hour was rounded forward to 00 UTC -> increment day + hour1=0 + day1=day+1 + if(day1.gt.ndays_month)then ! increment month + day1=1 + month1=month1+1 + if(month1.gt.12)then ! increment year + month1=1 + year1=year+1 + endif + endif +endif +return +end subroutine round_6h + + + diff --git a/RADSOUND/STATRS/fortran-programs/rank_histogram_summary_statistics.f95 b/RADSOUND/STATRS/fortran-programs/rank_histogram_summary_statistics.f95 new file mode 100644 index 0000000000000000000000000000000000000000..46a8a608af26ddb7e8e1cd32fc327b9aac536c28 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/rank_histogram_summary_statistics.f95 @@ -0,0 +1,380 @@ + program rank_histogram_summary_statistics +! +! Calculation of all-station rank histogram summary statistics: +! +! 1) distribution of p-values (in nbins_p bins) +! 2) Average frequencies in NQUANT+1 = 100 quantile bins +! (the quantile bin number is hard-coded!) +! +! The calculation is made for the UTC hours defined by +! the namelist parameters HOUR1, HOUR2, HOUR_STEP. It is assumed +! that the input file (INFILE) only includes data for these +! hours. +! +! Jouni Räisänen, University of Helsinki, August 2023 +! +!------------------------------------------------------------------------- +! +! INPUT FILE: +! +! infile: a text file including the rank histogram statistics for all stations +! +! The first line is a header. The other lines include: +! +! 1) station code (as a 3-character string, if l_code_in_char =.true. Else as an integer) +! 2) longitude +! 3) latitude +! 4) UTC hour (24 for 'all-UTC' statistics +! 5) total number of observations for this UTC hour +! 6) Mean squared deviation (MSD) from a flat quantile frequency histogram +! 7) p-value for MSD (= probability for getting at least as large MSD by chance) +! 8-107) frequencies in quantile space: 0-1%, 1-2% ... 98-99%, 99-100% +! +! Order of data assumed in the input file: +! +! line 1: header +! line 2: station 1, HOUR1 +! line 3: station 1, HOUR1 + HOUR_STEP +! line n = 1 + (HOUR2-HOUR1)/HOUR_STEP + 1 : station 2, HOUR2 +! line n+1: station2, HOUR1 +! etc. +!-------------------------------------------------------------------------- +! +! OUTPUT FILES: +! +! outfile_p_values: p-values at individual stations (This far: only the all-UTC p-values!) +! This file can be used for plotting the p-values on a map in python +! +! The first line is a header for eventual conversion to ODB. The other lines include +! +! 1) Station code as integer. If the original code was a 3-char string (GRUAN), +! the ordinal number of the station is written (for plotting in python) +! 2) longitude +! 3) latitude +! 4) p-value for MSD +!---------------------------------------------------------------------------- +! outfile_p_freq: frequency histogram of p-values (for different UTC hours separately) +! This file can be used for plotting a histogram of the p-value frequencies in python! +! +! The first line is a header. The others include +! +! 1) UTC hour +! 2-nbins_p+1) frequencies of p-values in nbins_p bins +!----------------------------------------------------------------------------- +! outfile_q = histogram of all-station-mean quantile bin frequencies (for different UTC hours separately) +! +! The first line is a header. The others include +! +! 1) UTC hour +! 2-101) quantile bin frequencies for 0-1%, 1-2% ... 99-100%. +!------------------------------------------------------------------------------ +! outfile_grads (only if grads_out=.true.): p-value frequencies and quantile bin frequencies in GrADS binary format +!------------------------------------------------------------------------------ +! text_output: frequencies of selected small p-values and some other data in free-text format, for each UTC hour separately. +!----------------------------------------------------------------------------------------------------------------------------- +! +! NAMELIST PARAMETERS: +! +! infile: see above +! outfile_p_values: see above +! outfile_p_freq: see above +! outfile_q: see above +! outfile_grads: see above +! text_output: see above +! nbins_p: number of p-value bins in outfile_p_freq +! grads_out: if .true. (.false.), outfile_grads is written (not written) +! +! hour1,hour2,hour_step: UTC hours in output = hour1, hour1+hour_step ... hour2 +! +! l_code_in_char: if .true., the station codes in infile are assumed to be 3-char string (else: integer) +! miss: missing value code. Default = -9.99e6. All values f with |f| >= |miss| are treated as missing. + + IMPLICIT NONE + INTEGER :: I,J ! loop variables + INTEGER,PARAMETER :: NHOUR=24 ! number of stations + INTEGER,PARAMETER :: NQUANT=99 ! number of quantiles from 1% to 99% + INTEGER,PARAMETER :: NPMAX=100 ! maximum bin number for frequency diagram of p-values + INTEGER,PARAMETER :: NSTATMAX=10000 ! maximum number of stations + REAL :: frequency_q1(nquant+1) ! quantile frequencies at a single station and UTC hour + REAL :: frequency_q(nquant+1,nhour+1) !all-station mean quantile frequencies for each UTC hour + REAL :: frequency_p(npmax,nhour+1) !frequencies of p-values for each UTC hours + REAL :: max_freq_p,max_freq_q ! maximum of p-value and quantile frequencies (for GrADS only) + REAL :: frequency_p01(nhour+1) ! frequency of p_values <= 0.1 % + REAL :: frequency_p1(nhour+1) ! same, <= 1 % + REAL :: frequency_p5(nhour+1) ! same, <= 5 % + character*2 :: number_of_bin ! two-digit code for quantiles for outfile_q and outfile_p_values headers + character*16 :: frequency_value ! frequency written in F16.6 format + character*160 :: infile,outfile_p_freq,outfile_p_values,outfile_q ! input and output files (see above) + character*160 :: outfile_grads,text_output ! output files (see above) + INTEGER,PARAMETER :: L_dataline=1700 ! maximum length of lines in text files + character*1700 :: dataline,headerline,emptyline ! string variables for input and output + + INTEGER :: nbins_p ! number of p-value bins in outfile_p_freq + INTEGER :: hour1,hour2,hour_step ! input / output UTC hours (see above) + INTEGER :: hour ! UTC hour + INTEGER :: n_line ! line count for lines read from infile + INTEGER :: n_station ! station count + INTEGER :: pbin ! index for p-value bin + + LOGICAL :: HOUR_USED(nhour+1) ! .true. for hours included in (hour1, hour1+hour_step,...,hour2) + INTEGER :: nhour_used ! number of UTC hours used + INTEGER :: station1_int ! the station code read from infile (RHARM, FMI) + CHARACTER*3 :: station1_char ! the station code read from infile (GRUAN) + LOGICAL :: l_code_in_char ! if .true., station codes in 3-char strings assumed + REAL :: longitude1,latitude1 !longitude and latitude + INTEGER :: station(nstatmax) !station codes of all stations + REAL :: longitude(nstatmax),latitude(nstatmax) ! station longitudes and latitudes + REAL :: p_value1, msd ! p-value and MSD read from infile + REAL :: p_value(nhour+1,nstatmax) ! p-values at individual stations for map + REAL :: total_number ! number of observations read from infile + + LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing + INTEGER :: IREC ! record number for GrADS output +! + REAL :: MISS ! Missing value code + INTEGER :: valid_stations(nhour+1) ! stations with valid data + + NAMELIST/param/infile,outfile_p_values,outfile_p_freq,outfile_q,& + outfile_grads,text_output,& + nbins_p,grads_out,& + hour1,hour2,hour_step,& + l_code_in_char,miss + + miss=-9.99e6 ! default for missing value + READ(*,NML=PARAM) + valid_stations=0 +! +! Which UTC hours are used and what is their total number? +! + hour_used=.false. + nhour_used=0. + do i=hour1,hour2,hour_step + hour_used(i+1)=.true. + nhour_used=nhour_used+1 + enddo +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +!---------------------------------------------------------------------- +! +! Initialize counters etc. +! + frequency_p=0. + frequency_q=0. + frequency_p01=0. + frequency_p1=0. + frequency_p5=0. + n_station=0 + n_line=0 + p_value=miss +! +! Read the contents of the input file +! + open(unit=1,form='formatted',status='old',file=infile) + read(1,*) ! the first line in infile is a header + write(*,*)' Opened infile: ', infile + + do while(.true.) ! Loop continued until end of file reached + if(l_code_in_char)then + 1 read(1,*,err=1,end=3)& + station1_char,longitude1,latitude1,hour,total_number,msd,p_value1,(frequency_q1(i),i=1,nquant+1) + else + 2 read(1,*,err=2,end=3)& + station1_int,longitude1,latitude1,hour,total_number,msd,p_value1,(frequency_q1(i),i=1,nquant+1) + endif +! write(*,*)'n_station,Hour step:',n_station,hour_step + if(hour_used(hour+1))then ! Only use the statistics for the UTC hours define by hour1, hour2, hour_step + n_line=n_line+1 + if(mod(n_line,nhour_used).eq.1)then ! This assumes that all the required UTC hours are always included in infile + n_station=n_station+1 + if(l_code_in_char)then + station(n_station)=n_station + else + station(n_station)=station1_int + endif + longitude(n_station)=longitude1 + latitude(n_station)=latitude1 + endif + if(total_number.gt.0.and.abs(frequency_q1(1)).lt.abs(miss))then ! Only include stations with some valid data + p_value(hour+1,n_station)=p_value1 ! for map of p-values + valid_stations(hour+1)=valid_stations(hour+1)+1 + do i=1,nquant+1 + frequency_q(i,hour+1)=frequency_q(i,hour+1)+frequency_q1(i) ! update the quantile bin frequencies + enddo + pbin=min(1+(p_value1*nbins_p),real(nbins_p)) + frequency_p(pbin,hour+1)=frequency_p(pbin,hour+1)+1 ! update the p-value bin frequencies +! +! Frequencies of small p-values: +! + if(p_value1.le.0.001.and.abs(p_value1).lt.abs(miss))then + frequency_p01(hour+1)=frequency_p01(hour+1)+1. + endif + if(p_value1.le.0.01.and.abs(p_value1).lt.abs(miss))then + frequency_p1(hour+1)=frequency_p1(hour+1)+1. + endif + if(p_value1.le.0.05.and.abs(p_value1).lt.abs(miss))then + frequency_p5(hour+1)=frequency_p5(hour+1)+1. + endif + endif + + endif + enddo +3 continue + close(1) + write(*,*)' Read infile: # of lines, # of stations: ',n_line,n_station + +! if(n_line.ne.n_station*nhour_used)then +! write(*,*)'Something wrong !!!' +! stop +! endif +!--------------------------------------------------------------------- +! Divide all the frequencies by the number of stations +! + do hour=hour1,hour2,hour_step + do i=1,nquant+1 + frequency_q(i,hour+1)=frequency_q(i,hour+1)/valid_stations(hour+1) + enddo + do i=1,nbins_p + frequency_p(i,hour+1)=frequency_p(i,hour+1)/valid_stations(hour+1) + enddo + frequency_p01(hour+1)=frequency_p01(hour+1)/valid_stations(hour+1) + frequency_p1(hour+1)=frequency_p1(hour+1)/valid_stations(hour+1) + frequency_p5(hour+1)=frequency_p5(hour+1)/valid_stations(hour+1) + enddo +! +! Find the normalized maximum frequencies for GrADS axis limits +! + max_freq_p=0 + max_freq_q=0 + do i=1,nbins_p + if(frequency_p(i,nhour+1).gt.max_freq_p/nbins_p)then + max_freq_p=frequency_p(i,nhour+1)*nbins_p + endif + enddo + max_freq_p=1.02*max_freq_p + + do i=1,nquant+1 + if(frequency_q(i,nhour+1).gt.max_freq_q/(nquant+1))then + max_freq_q=frequency_q(i,nhour+1)*(nquant+1) + endif + enddo + max_freq_q=1.02*max_freq_q +! +!--------------------------------------------------------------------- +! +! Write the text output file: +! 1) number of stations +! 2) frequencies of selected small p-values + + open(unit=1,form='formatted',file=text_output) + write(1,'(A20,I3)')'Number of stations: ',n_station + write(1,'(A24,I3)')'Number of p-value bins: ',nbins_p + if(grads_out)then + write(1,'(A23,F6.3)')'Axis_limit_p_for_GrADS ',max_freq_p + write(1,'(A23,F6.3)')'Axis_limit_q_for_GrADS ',max_freq_q + endif + do hour=hour1,hour2,hour_step + write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.001 ',hour,' UTC: ',& + frequency_p01(hour+1) + write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.01 ',hour,' UTC: ',& + frequency_p1(hour+1) + write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.05 ',hour,' UTC: ',& + frequency_p5(hour+1) + enddo + close(1) +! +! Write the average quantile bin frequencies and p-value bin frequencies +! to ODB compatible text files + +!-------------------------------------------------------------------------- +! +! Open the quantile bin frequency output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile_q) + headerline=emptyline + headerline='hour@hdr:integer ' + do j=0,nquant + write(number_of_bin,'(i2.2)')j + headerline=trim(headerline)//' f'//number_of_bin//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Write the data lines for each UTC hour used +! + do hour=hour1,hour2,hour_step + j=hour+1 + write(dataline,'(I6)')& + hour + do i=1,nquant+1 + write(frequency_value,'(F16.6)')frequency_q(i,j) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + enddo + close(2) +! +! Write the file giving the p-values for individual stations +! + open(unit=2,form='formatted',status='unknown',file=outfile_p_values) + headerline=' station@hdr:integer longitude@hdr:real latitude@hdr:real p_value@body:real' + write(2,*)trim(headerline) + do i=1,n_station + write(2,'(7X,I6,3F16.6)')& ! this far: just fwrite the station ordinal number, in case + ! the station code was a character string + station(i),longitude(i),latitude(i),p_value(nhour+1,i) + enddo + close(2) +! +! Open the p_value frequency output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile_p_freq) + headerline=emptyline + headerline='hour@hdr:integer ' + do j=1,nbins_p + write(number_of_bin,'(i2.2)')j-1 + headerline=trim(headerline)//' p'//number_of_bin//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Write the data lines for each UTC hour used +! + do hour=hour1,hour2,hour_step + j=hour+1 + write(dataline,'(I6)')& + hour + do i=1,nbins_p + write(frequency_value,'(F16.6)')frequency_p(i,j) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + enddo + close(2) +! +!------------------------------------------------------------------------------- +! +! Open the file for GrADS output and write its contents, for easier visualisation? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& + recl=4,status='unknown') +! + do hour=hour1,hour2,hour_step + j=(hour-hour1)/hour_step+1 + do i=1,nbins_p + irec=(j-1)*(nbins_p+nquant+1)+i + write(11,rec=irec)frequency_p(i,hour+1) + enddo + do i=1,nquant+1 + irec=(j-1)*(nbins_p+nquant+1)+nbins_p+i + write(11,rec=irec)frequency_q(i,hour+1) +! write(*,*)hour,i,frequency_q(i,hour+1) + enddo + enddo + close(11) + + endif ! if (grads_out) + + END program rank_histogram_summary_statistics diff --git a/RADSOUND/STATRS/fortran-programs/rank_histograms_bootstrap.f95 b/RADSOUND/STATRS/fortran-programs/rank_histograms_bootstrap.f95 new file mode 100644 index 0000000000000000000000000000000000000000..3a075742b6542832c440963015abf3a2883af230 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/rank_histograms_bootstrap.f95 @@ -0,0 +1,662 @@ + PROGRAM rank_histograms_bootstrap +! +! For estimating the sampling distribution of the Mean Square Deviation +! (MSD) statistics for quantile bin rank histogram frequencies. +! These distributions can be used to answer the following question: +! +! "If we have LENGTH_IN_YEARS years of model data and its rank histogram +! against observations has a given MSD value, what is the probability +! of getting such a large MSD just as a result of internal variability"? +! +! The sampling distribution is formed using a bootstrap: +! +! Each boostrap realization calculates the rank histograms by selecting a total +! of LENGTH_IN_YEARS years of observed data randomly divided in chunks. Their +! length is defined as follows: +! +! 1) The number of chunks is at least MIN_CHUNK_NUMBER +! 2) If not restricted by (1) there are CHUNKS_PER_YEAR chunks for each year +! 3) The length is rounded to the nearest full number of days, so that the +! total length is LENGTH_IN_YEARS years (which may be non-integer) +! +! For each realization, the Mean Squared Difference (MSD) of the resulting rank +! histogram relative to the theoretically expected flat histogram is calculated. +! +! Jouni Räisänen, July 2023 +! +!-------------------------------------------------------------------------------- +! Input files (text format, retrieved from ODB with the 'odb sql select' command) +!-------------------------------------------------------------------------------- +! +! INFILE: Observed time series at the station location. +! The first line is a header. The other lines include: +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5) data value +! +! QUANTILE_FILE: quantile values, separately for each day of the year! +! The first line is a header. The other lines include: +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) month +! 5) day +! 6) hour +! 7-105) The 99 quantiles from 1 % to 99 % +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +!-------------------------------------------------------------------------------- +! Output files +!-------------------------------------------------------------------------------- +! +! OUTFILE: MSD (mean squared deviation) distribution of quantile frequencies +! from bootstrap tests. +! +! The first 1st line is a header. The other lines include +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) number of realization +! 5 to N-1) MSD values for HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! N) MSD calculated from frequencies averaged over all the UTC hours within +! HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +! OUTFILE_GRADS: MSD distribution of quantile frequencies in a GrADS binary file (if grads_out=.true.) +! +!------------------------------------------------------------------------------------------- +! Namelist parameters +!------------------------------------------------------------------------------------------- +! +! infile : file for observation time series (see above) +! quantile_file : file for pre-computed quantile values (see above) +! outfile : output file for bootstrap MSD statistics in ODB compatible text format +! outfile_grads : output file in GrADS binary format +! grads_out: if .true., outfile_grads is also written +! l_code_in_char: if .true., 3-character station codes assumed (else integer) +! lorder: if .true., the output files are ordered from smallest to largest MSD values +! +! hour1 = first UTC hour analysed +! hour2 = last UTC hour analysed +! hour_step = step of analyzed UTC hours +! l_round_hour : if .true., hours are rounded to the nearest analysis hour. +! if .false., data for non-matching hours is ignored +! +! year1 = first year used (if data available) +! year2 = last year used (if data available) +! +! miss = missing value code. All input data f with abs(f) >= abs(miss) is treated as missing +! +! length_in_years = length of bootstrap samples in years (does not need to integer) +! +! min_chunk_number = minimum number of separate data "chunks" for one bootstrap realization +! chunks_per_year = length of bootstrap "chunks", if not limited by min_chunk_number +! nreal = number of bootstrap realizations +! l_all_hours_required : if .true., only those days are used for which data for all the required +! UTC hours is available, even in forming the single-UTC-hour bootstrap +! samples. This should be set to .false. for those data sets (e.g., soundings) +! for which data is frequently missing for some UTC hours + + ! (in QUANTILE_FILE) are needed: +! +! INFILE: text file of the simulated or observed time series at the station location. +! 1st line for each UTC hour is a header +! The other lines include station code, longitude and latitude, +! year, month, day, hour and the data value +! +! QUANTILE_FILE: quantile values, separately for each day of the year +! 1st line is a header +! The other lines include station code, longitude and latitude, +! month, day and hour, and the 99 quantiles for each day+hoyr of the year +! +! Only data for the years YEAR1-YEAR2 are taken into account, even if other years +! are present in INFILE. +! +! The output is written in OUTFILE. +! 1st line is the header. +! The other lines includes the station code, station coordinates, +! LENGTH_in_YEARS, realization/rank number and the corresponding MSD values +! for each UTC hour + their combination. +! +! If (GRADS_OUT), output is also written in a GrADS binary file for easier plotting. +! +!--------------------------------------------------------------------------------------- +! +! LIMITATIONS of the boostrap method: +! +! 1) An in-sample boostrap will likely produce a narrower sampling distribution of +! quantile bin frequencies, and therefore lower MSD:s, than would be obtained +! if observations from a period not used for calculating the quantiles were used. + +! 2) Autocorrelation on time scales exceeding 1/CHUNKS_PER_YEARS years is neglected +! This is also expected to reduce the sampling variability in quantile bin frequencies, +! resulting a low bias in the MSD:s (particularly over sea areas for temperature?) +! +! For both reasons, there will be too many "false positives" when comparing +! a model simulation with observations. +! +!---------------------------------------------------------------------------------------- + + IMPLICIT NONE + INTEGER :: I,J,K,Q,R + INTEGER,PARAMETER :: NMON=12, NDAY=31, NHOUR=24, NQUANT=99 + REAL :: quantiles(nquant,nmon,nday,nhour) ! quantiles for each month, day and UTC hour + REAL :: q1(nquant) ! quantiles for one day + hour + REAL :: value1 ! individual data values read from the input file + INTEGER :: ntimes_hour(nhour) ! total number of times with data for an UTC hour + CHARACTER*160 :: infile,outfile,quantile_file ! file names (see above) + CHARACTER*160 :: outfile_grads ! grads output for testing? + INTEGER :: loutfile_grads ! length of outfile_grads name, to circumvent a memory leak!? (6.5.2013) + CHARACTER*2 :: hh ! hour code for ODB header + INTEGER,PARAMETER :: L_dataline=500 ! maximum length of data lines in output + character*500 dataline,headerline,emptyline ! for writing lines in output text files + character*16 msd_value ! frequency written in F16.6 format + ! + INTEGER :: yyear,mmonth,dday,hhour !year, month, day and hour from file + INTEGER :: year,month,day,hour ! year, month, day and hour after eventual rounding of hours + INTEGER :: day1 ! running number for the first day in a bootstrap chunk + INTEGER :: year1,year2 ! years of data used in forming the bootstrap samples + INTEGER :: hour1,hour2,hour_step ! list of hours analysed + LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour + LOGICAL :: HOUR_USED(nhour) ! .true. for those UTC hours that are used + INTEGER :: nhour_used ! number of UTC hours used + INTEGER :: station_int ! station code read from quantile file, as integer (for synop stations) + CHARACTER*3 :: station_char ! ------------------------------ as characters (for soundings?) + LOGICAL :: l_code_in_char ! .true. for station code in characters + REAL :: longitude,latitude ! statio longitude and latitude from the quantile file + + LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing + INTEGER :: IREC ! record number for GrADS output + LOGICAL :: LORDER ! if .true., MSD output in ascending order + + INTEGER,PARAMETER :: nmaxdays=100000,nmaxyears=250 + INTEGER :: rank1(nmaxyears,nmon,nday,nhour) ! quantile ranks (1-100) of the observed values + INTEGER :: rank(nmaxdays,nhour) ! quantile ranks (1-100) of the observed values in chronological order + INTEGER :: rank_all_UTC(nmaxdays,nhour) ! same for days in which data is available for all UTC hours + INTEGER :: NDAYS(nhour+1) ! number of days in the observed time series. "nhour+1" represents the "all-UTC" value. + LOGICAL :: L_all_hours_required ! if .true., all those days are omitted when data is missing for + ! at least one of the output UTC hours. Otherwise, all valid observations are included for each UTC hour separately, + ! but the all-UTC-hour statistics only uses data for all UTC hours available. + LOGICAL:: day_OK(nhour+1) ! will the day be included in bootstrap? + + REAL :: length_in_years ! total length of bootstrap samples in years (see above) + INTEGER :: min_chunk_number ! minimum number of chunks per one bootstrap sample + INTEGER :: chunks_per_year ! number of chunks per years + INTEGER :: number_of_chunks ! number of chinks per bootstrap sample + ! This is replaced by min_chunk_number if it is larger. + INTEGER :: chunk_length_UTC(nhour+1) ! chunk length separately for each UTC hour + INTEGER :: number_of_chunks_UTC(nhour+1) ! number of chunks separately for each UTC hour + INTEGER :: nreal ! number of bootstrap realizations + REAL :: rnd ! for random numbers + + INTEGER,PARAMETER :: NMAX_REAL=10000 ! maximum number of realizations + REAL :: MSD(nmax_real,nhour+1) ! nhour+1 to accommodate the all-UTC statistics + REAL :: MSD1(nmax_real) ! MSD realizations for one UTC hour (to facilitate easy ordering) + REAL :: frequency(nquant+1,nhour+1) ! quantile bin frequencies + REAL :: frequency_all_UTC(nquant+1,nhour+1) ! quantile bin frequencies for the days in the "all-UTC" sample + REAL :: expected_freq ! expected quantile bin frequency (1/100) + + REAL :: MISS ! missing value code +! +!----------------------------------------------------------------------------------------------------- +! + NAMELIST/param/infile,outfile,quantile_file,& + hour1,hour2,hour_step,year1,year2,grads_out,outfile_grads,lorder,& + length_in_years,min_chunk_number,chunks_per_year,nreal,& + l_code_in_char,l_round_hour,l_all_hours_required,miss + + expected_freq=1./(nquant+1) ! frequencies in a flat rank histogram + lorder=.true. ! by default, ascending order of MSDs in the output + min_chunk_number=2 ! default for minimum number of chunks + chunks_per_year=4 ! default. Decrease of this will reduce the autocorrelation + ! problem but it will also reduce the independence + ! (thus the variation) between the bootstrap samples + nreal=1000 ! default number of bootstrap realizations + miss=-9.99e6 + READ(*,NML=PARAM) +! +! Length of the GrADS output file name. This should not really be needed. +! + loutfile_grads=len(trim(outfile_grads)) +! +! Which UTC hours are used and what is their total number? +! + hour_used=.false. + nhour_used=0. + do i=hour1,hour2,hour_step + hour_used(i+1)=.true. + nhour_used=nhour_used+1 + enddo +! +! Empty data line for ODB-compatible output +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +! write(*,*)'Check 1',len(trim(outfile_grads)),loutfile_grads,outfile_grads +!---------------------------------------------------------------------- +! +! Read the contents of the quantile_file to array 'quantiles' + ! + quantiles=miss + open(unit=1,form='formatted',status='old',file=quantile_file) + write(*,*)'Opened quantile file' + do while(.true.) + if(l_code_in_char)then +! The err=1 specifier ensures that header lines are skipped +1 read(1,*,err=1,end=3)& + station_char,longitude,latitude,month,day,hour,& + (q1(i),i=1,nquant) + else +! The err=2 specifier ensures that header lines are skipped +2 read(1,*,err=2,end=3)& + station_int,longitude,latitude,month,day,hour,& + (q1(i),i=1,nquant) + endif + + do i=1,nquant + quantiles(i,month,day,hour+1)=q1(i) + enddo +! copy quantiles from 28 February to 29 February +! (in case the latter is not in the quantile file as it should) + if(month.eq.2.and.day.eq.28)then + do i=1,nquant + quantiles(i,month,day+1,hour+1)=q1(i) + enddo + endif + enddo +3 continue + write(*,*)'Quantile file read' + close(1) +! +!---------------------------------------------------------------------- +! +! Open the input data file (i.e. the station observations for the variable of interest). +! The data must be in chronological order. + + open(unit=1,form='formatted',status='old',file=infile) + +! Read the data from the input file and find their ranks in the quantile distribution. + + rank1=miss + do while(.true.) +11 read(1,*,err=11,end=12)yyear,mmonth,dday,hhour,value1 + ! write(*,*)year,month,day,hour,value1 + + if(l_round_hour)then + call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif + + if(year.ge.year1.and.year.le.year2.and.hour_used(hour+1).and.abs(value1).le.abs(miss))then + call find_rank(value1,quantiles(1,month,day,hour+1),nquant,& + rank1(year-year1+1,month,day,hour+1),miss) + endif + enddo +12 continue + close(1) +!--------------------------------------------------------------------------------------------------- +! Rewrite the ranks for easier implementation of the bootstrap (= omit missing values) +!--------------------------------------------------------------------------------------------------- + ndays=0 + do year=year1,year2 + do month=1,nmon + do day=1,nday + day_OK=.true. + do hour=hour1,hour2,hour_step + if(rank1(year-year1+1,month,day,hour+1).eq.miss)then + day_OK(hour+1)=.false. + day_OK(nhour+1)=.false. ! all-UTC: day excluded if any data is missing + endif + enddo + ! all data excluded if some of the UTC hours is missing and all hours are required. + if(l_all_hours_required.and..not.day_OK(nhour+1))then + day_OK=.false. + endif +! +! Form the list of valid days and calculate its length separately for each UTC hour: +! + do hour=hour1,hour2,hour_step + if(day_OK(hour+1))then + ndays(hour+1)=ndays(hour+1)+1 + rank(ndays(hour+1),hour+1)=rank1(year-year1+1,month,day,hour+1) + endif + enddo + ! + ! All-UTC list of days and ranks. Only days with valid data for all UTC hours are included. + ! + if(day_OK(nhour+1))then + ndays(nhour+1)=ndays(nhour+1)+1 + do hour=hour1,hour2,hour_step + rank_all_UTC(ndays(nhour+1),hour+1)=rank1(year-year1+1,month,day,hour+1) + enddo + endif + + enddo ! end of year-loop + enddo ! end of month-loop + enddo ! end of day-loop + + write(*,*)'ndays',(ndays(hour+1),hour=hour1,hour2,hour_step),ndays(nhour+1) + +!-------------------------------------------------------------------------------------- +! Bootstrap parameters (nreal realizations, each consisting of 'number_of_chunks' 'chunk_length'-day periods). +! +! These parameters must be defined for each UTC hour separately, +! because the length of the time series may be different +! +! The length of a "chunk" is not allowed to exceed half of the length of the period of data +! + number_of_chunks=nint(length_in_years*chunks_per_year) ! default number of chunks per realization + number_of_chunks=max(number_of_chunks,min_chunk_number) ! reset to minimum if needed +! +! Recalculate chunk lengths and numbers of chunks for each UTC hour, based on the real number of +! days that are available and the requirement that the chunk length must not exceed half of +! the total data sample. + + do hour=hour1,hour2,hour_step + chunk_length_UTC(hour+1)=min(nint((365.25*length_in_years)/number_of_chunks),ndays(hour+1)/2) + number_of_chunks_UTC(hour+1)=nint((365.25*length_in_years)/chunk_length_UTC(hour+1)) + enddo + chunk_length_UTC(nhour+1)=min(nint((365.25*length_in_years)/number_of_chunks),ndays(nhour+1)/2) + number_of_chunks_UTC(nhour+1)=nint((365.25*length_in_years)/chunk_length_UTC(nhour+1)) + + write(*,*)'number_of_chunks_UTC',(number_of_chunks_UTC(hour+1),hour=hour1,hour2,hour_step),number_of_chunks_UTC(nhour+1) + write(*,*)'chunk_length_UTC',(chunk_length_UTC(hour+1),hour=hour1,hour2,hour_step),chunk_length_UTC(nhour+1) +!************************************************************************************** +! Bootstrap begins +!************************************************************************************** + msd=0. + do r=1,nreal + frequency=0 + + +! Selection of chunks and calculation of rank frequencies (each UTC hour sperately) + + do j=hour1,hour2,hour_step + + if(ndays(j+1).gt.1)then + + do k=1,number_of_chunks_UTC(j+1) + call random_number(rnd) + day1=1+(ndays(j+1)-chunk_length_UTC(j+1)+1)*rnd + do i=day1,day1+chunk_length_UTC(j+1)-1 + frequency(rank(i,j+1),j+1)=& + frequency(rank(i,j+1),j+1)+1./(number_of_chunks_UTC(j+1)*chunk_length_UTC(j+1)) + enddo + enddo + + endif + + enddo +! +! Calculation of the MSD for each UTC hour separately. +! + do j=hour1,hour2,hour_step + if(ndays(j+1).gt.1)then + do i=1,nquant+1 + msd(r,j+1)=msd(r,j+1)+((frequency(i,j+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + else + msd(r,j+1)=miss + endif + enddo +! +! Selection of chunks and calculation of rank frequencies (all-UTC-hour case, +! only including data for the days for which all UTC hours are available) +! + frequency_all_UTC=0. + if(ndays(nhour+1).gt.1)then + do k=1,number_of_chunks_UTC(nhour+1) + call random_number(rnd) + day1=1+(ndays(nhour+1)-chunk_length_UTC(nhour+1)+1)*rnd + do i=day1,day1+chunk_length_UTC(nhour+1)-1 + do j=hour1,hour2,hour_step + frequency_all_UTC(rank_all_UTC(i,j+1),j+1)=& + frequency_all_UTC(rank_all_UTC(i,j+1),j+1)+1./(number_of_chunks_UTC(nhour+1)*chunk_length_UTC(nhour+1)) + enddo + enddo + enddo + endif +! +! Calculation of the all-UTC MSD +! + if(ndays(nhour+1).gt.1)then + do i=1,nquant+1 + do j=hour1,hour2,hour_step + frequency_all_UTC(i,nhour+1)=& + frequency_all_UTC(i,nhour+1)+frequency_all_UTC(i,j+1)/nhour_used + enddo + msd(r,nhour+1)=msd(r,nhour+1)+((frequency_all_UTC(i,nhour+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + else + msd(r,nhour+1)=miss + endif + +!********************************************************************* + enddo ! end of the bootstrap (r) loop +!********************************************************************* + +!-------------------------------------------------------------------------------------------------- +! +! Ordering of the MSD:s in ascending order? +! Again, first the individual UTC hours and then the all-UTC MSD:s + + if(lorder)then + do j=hour1,hour2,hour_step + do r=1,nreal + msd1(r)=msd(r,j+1) + enddo + call order(msd1,msd(1,j+1),nreal) + enddo + do r=1,nreal + msd1(r)=msd(r,nhour+1) + enddo + call order(msd1,msd(1,nhour+1),nreal) + endif +!------------------------------------------------------------------------------------------------ +! +! Open the output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real realization@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real realization@hdr:integer' + endif + do j=hour1,hour2,hour_step + write(hh,'(i2.2)')j + headerline=trim(headerline)//' msd'//hh//'@body:real' + enddo + headerline=trim(headerline)//' msd24'//'@body:real' + write(2,*)trim(headerline) + ! + ! Write the MSD values to the output file: first for each + ! individual UTC hour, then the overall statistics + ! + do r=1,nreal + dataline=emptyline + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,I6)')station_char,longitude,latitude,r + else + write(dataline,'(I6,2F16.6,I6)')station_int,longitude,latitude,r + endif + do j=hour1,hour2,hour_step + write(msd_value,'(F16.6)')msd(r,j+1) + dataline=trim(dataline)//msd_value + enddo + write(msd_value,'(F16.6)')msd(r,nhour+1) + dataline=trim(dataline)//msd_value + write(2,*)trim(dataline) + if(r.eq.1.or.r.eq.500.or.r.eq.1000)write(*,*)r,trim(dataline) + enddo + + close(2) +! ----------------------------------------------------------------- +! +! Output also as GrADS binaries, for visualisation in GrADS? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads(1:loutfile_grads),access='DIRECT',& + recl=4,status='unknown') + do hour=hour1,hour2,hour_step + j=1+(hour-hour1)/hour_step + do i=1,nreal + write(11,rec=(j-1)*nreal+i)msd(i,hour+1) + enddo + enddo + do i=1,nreal + write(11,rec=j*nreal+i)msd(i,nhour+1) + enddo + close(11) + endif + + END program rank_histograms_bootstrap +! +!-------------------------------------------------------------------------------------------- +! + subroutine find_rank & + (f,quantiles,nquant,rank1,miss) +! +! Find the rank of a data value within the quantile table (quantiles). +! +! f = individual data value (IN) +! quantiles = quantile values (IN) +! nquant = number of quantile values (IN) +! rank1 = rank of the data value (OUT) +! miss = missing value code (IN/OUT) + + implicit none + integer :: nquant + real :: quantiles(nquant) + integer :: rank1 + real :: f + integer :: i1,i2,i,ind + real :: miss + if(abs(quantiles(1)).ge.abs(miss))then + write(*,*)'Quantiles missing!?' ! This should never happen. + rank1=miss + else + if(f.lt.quantiles(1))then + rank1=1 + else + if(f.ge.quantiles(nquant))then + rank1=nquant+1 + else + i1=1 + i2=nquant + do while (i2.gt.i1+1) + i=(i1+i2)/2 + if(f.ge.quantiles(i))then + i1=(i1+i2)/2 + else + i2=(i1+i2)/2 + endif + enddo + rank1=i1+1 + endif + endif + endif + + return + end subroutine find_rank +! +!-------------------------------------------------------------------------------------------- +! + subroutine order(f,g,n) +! +! Ordering the values of f(1...n) in ascending order. Result in g +! Simple exchange ordering (inefficient for large n!) +! + implicit none + integer :: i,j,n + real :: f(n),g(n),g1 + g=f + do i=1,n-1 + do j=i+1,n + if(g(j).lt.g(i))then + g1=g(i) + g(i)=g(j) + g(j)=g1 + endif + enddo + enddo + + return + end subroutine order +! +!----------------------------------------------------------------------- +! +subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) +! +! Rounding the hour to the nearest hour within (hour1, hour1 + hour_step, ... hour2) +! +implicit none +integer :: yyear,mmonth,dday,hhour ! year, month, day and hour before rounding (IN) +integer :: year,month,day,hour ! year, month, day and hour after rounding (OUT) +integer :: hour1,hour2,hour_step ! target hours: hour1, hour1 + hour_step ... hour2 +integer :: ndays_month(12) ! number of days per month +integer :: hour_index +ndays_month=31 +ndays_month(4)=30 +ndays_month(6)=30 +ndays_month(9)=30 +ndays_month(11)=30 +ndays_month(2)=28 +if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then + ndays_month(2)=29 +endif +year=yyear +month=mmonth +day=dday +! +! round the hour to the nearest output hour +! +hour_index=nint((hhour-hour1+0.)/hour_step) +hour=hour1+hour_step*hour_index + if(hour.ge.24)then ! hhour was rounded forward to next day + hour=hour1 + day=dday+1 + if(day.gt.ndays_month(month))then + day=1 + month=month+1 + if(month.gt.12)then + month=1 + year=yyear+1 + endif + endif + endif + if(hour.lt.0)then ! Hhour was rounded backward to previous day + hour=hour2 + day=dday-1 + if(day.eq.0)then + month=month-1 + if(month.eq.0)then + month=12 + year=yyear-1 + endif + day=ndays_month(month) + endif + endif + +return +end subroutine round_hour + + + + diff --git a/RADSOUND/STATRS/fortran-programs/rank_histograms_one_station.f95 b/RADSOUND/STATRS/fortran-programs/rank_histograms_one_station.f95 new file mode 100644 index 0000000000000000000000000000000000000000..c73168c0dff7d4dda4d61f01838b755aab9438e9 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/rank_histograms_one_station.f95 @@ -0,0 +1,556 @@ + program rank_histograms_one_station +! +! Produce the rank histogram (i.e., the frequencies at which the simulated or +! observed data at one station fall between different quantiles of a pre-computed +! quantile distribution (below 1%, within 1-2%, ... above 99%) +! +! Jouni Räisänen, July 2015 +! +!---------------------------------------------------------------------- +! Input files (text format) +!---------------------------------------------------------------------- +! +! INFILE: simulated or observed time series at the station location. +! The first line is a header. The other lines include: +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5) data value +! +! QUANTILE_FILE: quantile values, separately for each day of the year! +! The first line is a header. The other lines include: +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) month +! 5) day +! 6) hour +! 7-105) The 99 quantiles from 1 % to 99 % +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +! MSD_BOOTSTRAP_FILE: MSD (mean squared deviation) distribution of quantile frequencies +! from bootstrap tests. +! +! The first 1st line is a header. The other lines include +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) number of realization +! 5 to N-1) MSD values for HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! N) MSD calculated from frequencies averaged over all the UTC hours within +! HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +!---------------------------------------------------------------------- +! Output files: +!---------------------------------------------------------------------- +! +! OUTFILE: frequencies of quantiles. +! The first 1st line is a header. The other lines include +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) hour +! 5) number of obervations for this (UTC) hour +! 6) MSD for the quantile frequencies +! 7) p-value of MSD (=fraction of bootstrap realizations with larger or equal MSD) +! 8-107) quantile frequencies (on scale 0-1) from 0-1 % ... 99-100% +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer. +! +! UTC hours included: HOUR1, HOUR1 + HOUR_STEP ... HOUR2 + all-UTC-statistics coded with HOUR=24 +! +! OUTFILE_GRADS: frequencies of quantiles in GrADS binary output (if grads_out=.true.) +! +! The resulting GraDS binary file includes 7 variables (1 level for 1-6, 100 levels for 7): +! +! 1) longitude +! 2) latitude +! 3) hour +! 4) number of obervations for this (UTC) hour +! 5) MSD for the quantile frequencies +! 6) p-value of MSD (=fraction of bootstrap realizations with larger or equal MSD) +! 7) quantile frequencies (on scale 0-1) from 0-1 % ... 99-100% +! +!--------------------------------------------------------------- +! Namelist parameters +!--------------------------------------------------------------- +! +! NAMELIST/param/infile,outfile,quantile_file,msd_bootstrap_file,& +! hour1,hour2,hour_step,year1,year2,month1,month2,grads_out,outfile_grads,& +! l_code_in_char,l_round_hour,miss +! +! infile : file name for simulated or observed time series (see above) +! quantile_file : file name for pre-computed quantiles (see above) +! msd_bootstrap_file : file name for pre-computed bootstrap statistics (see above) +! outfile : name of output file (see above) +! outfile_grads : output file in GrADS binary format (see above) +! grads_out : if .true., outfile_grads is written +! l_code_in_char: if .true., 3-character station codes assumed (else integer) +! +! hour1 = first UTC hour analysed +! hour2 = last UTC hour analysed +! hour_step = step of analyzed UTC hours +! l_round_hour : if .true., hours are rounded to the nearest analysis hour. +! if .false., data for non-matching hours is ignored +! +! year1,month1 = beginning of the analysis period +! year2,month2 = end of the analysis period +! +! miss = missing value code. All input data f with abs(f) >= abs(miss) is treated as missing +! +!--------------------------------------------------------------- +! + IMPLICIT NONE + INTEGER :: I,J,K,Q + INTEGER,PARAMETER :: NMON=12, NDAY=31, NHOUR=24, NQUANT=99 + REAL :: quantiles(nquant,nmon,nday,nhour) ! array for quantile values + REAL :: q1(nquant) ! quantiles for one day + hour + REAL :: frequency(nquant+1,nhour+1) ! frequency of quantiles. NHOUR+1 for the all-UTC frequencies + REAL :: expected_freq ! expected frequency of quantiles (1/100) + REAL :: MSD(nhour+1) ! MSD statistics per UTC hour + REAL :: p_value(nhour+1) ! p values per UTC hour + REAL :: msd_bootstrap(nhour+1) ! msd values for one bootstrap realization + INTEGER :: n_bootstrap ! total number of bootstrap realizations + INTEGER :: realization ! number of bootstrap realization read from msd_boostrap_file + REAL :: value1 ! individual data values read from the input file + INTEGER :: ntimes_hour(nhour) ! total number of times with data for an UTC hour + INTEGER :: ntimes_hour_tot ! total number of times with data for all UTC hours + CHARACTER*160 :: infile,outfile,quantile_file,msd_bootstrap_file + CHARACTER*160 :: outfile_grads + INTEGER,PARAMETER :: L_dataline=1700 ! maximum length of lines in text files + character*1700 :: dataline,headerline,emptyline ! character strings for writing of output files + character*2 :: number_of_bin ! two-digit code for quantiles for outfile header + character*16 :: frequency_value ! frequency written in F16.6 format +! + INTEGER :: yyear,mmonth,dday,hhour ! time as read from the input file + INTEGER :: year,month,day,hour ! time after eventual rounding of hours + + INTEGER :: year1,year2,month1,month2 ! period used in calculations (see above) + INTEGER :: hour1,hour2,hour_step ! hours used in calculations (see above) + LOGICAL :: HOUR_USED(nhour) ! true if the UTC hour is used + INTEGER :: nhour_used ! number of UTC hours used + INTEGER :: station_int ! station code integer (for synop stations amd RHARM soundings) + CHARACTER*3 :: station_char ! station with characters (for GRUAN soundings) + LOGICAL :: l_code_in_char ! .true. for station code in characters + REAL :: longitude,latitude ! station longitude and latitude + + LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing + INTEGER :: IREC ! record number for GrADS output + + LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour +! + REAL :: MISS ! missing value code +! +!----------------------------------------------------------------------- +! + NAMELIST/param/infile,outfile,quantile_file,msd_bootstrap_file,& + hour1,hour2,hour_step,year1,year2,month1,month2,grads_out,outfile_grads,& + l_code_in_char,l_round_hour,miss + + MISS=-9.99e6 + READ(*,NML=PARAM) +! +! Which UTC hours are used and what is their total number? +! + hour_used=.false. + nhour_used=0. + do i=hour1,hour2,hour_step + hour_used(i+1)=.true. + nhour_used=nhour_used+1 + enddo +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +!---------------------------------------------------------------------- +! +! Read the contents of the quantile_file to array 'quantiles' +! + quantiles=miss + open(unit=1,form='formatted',status='old',file=quantile_file) + + do while(.true.) + if(l_code_in_char)then + ! The err=1 specifier ensures that header lines are skipped +1 read(1,*,err=1,end=3)& + station_char,longitude,latitude,month,day,hour,& + (q1(i),i=1,nquant) + else +2 read(1,*,err=2,end=3)& + station_int,longitude,latitude,month,day,hour,& + (q1(i),i=1,nquant) + endif + do i=1,nquant + quantiles(i,month,day,hour+1)=q1(i) + enddo +! copy quantiles from 28 February to 29 February + if(month.eq.2.and.day.eq.28)then + do i=1,nquant + quantiles(i,month,day+1,hour+1)=q1(i) + enddo + endif + enddo +3 continue + close(1) +! +!---------------------------------------------------------------------- +! +! Read the station observations and count the absolute quantile bin frequencies +! + frequency=0 + ntimes_hour=0 + ntimes_hour_tot=0 + + open(unit=1,form='formatted',status='old',file=infile) + + do while(.true.) +11 read(1,*,err=11,end=12)yyear,mmonth,dday,hhour,value1 +! +! Rounding of hours? +! + if(l_round_hour)then + call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif +! +! If the hour is used and the date is within th analyzed period, +! update the quantile bin frequencies + + if(hour_used(hour+1))then + if((year.gt.year1.or.(year.eq.year1.and.month.ge.month1)).and. & + (year.lt.year2.or.(year.eq.year2.and.month.le.month2)).and. & + abs(value1).lt.abs(miss))then + ntimes_hour(hour+1)=ntimes_hour(hour+1)+1 + ntimes_hour_tot=ntimes_hour_tot+1 + call update_frequencies & + (frequency(1,hour+1),value1,quantiles(1,month,day,hour+1),nquant,miss) + endif + endif + + enddo +12 continue + close(1) +! +!---------------------------------------------------------------------- +! +! Convert absolute frequencies to relative frequencies +! + do hour=hour1,hour2,hour_step + do i=1,nquant+1 + if(ntimes_hour(hour+1).gt.0)then + frequency(i,hour+1)=frequency(i,hour+1)/ntimes_hour(hour+1) + else + frequency(i,hour+1)=miss + endif + enddo + enddo +! +!-------------------------------------------------------------------------------- +! +! Calculation of MSD. Because it can be assumed that model data are always available, +! all UTC hours get the same weight in the calculation of the all-UTC MSD. +! + expected_freq=1./(nquant+1.) + do j=hour1,hour2,hour_step + if(frequency(1,j+1).eq.miss)then + msd(j+1)=miss + else + do i=1,nquant+1 + msd(j+1)=msd(j+1)+((frequency(i,j+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + endif + enddo + do i=1,nquant+1 + do j=hour1,hour2,hour_step + frequency(i,nhour+1)=& + frequency(i,nhour+1)+frequency(i,j+1)/nhour_used + enddo + msd(nhour+1)=msd(nhour+1)+((frequency(i,nhour+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + do j=hour1,hour2,hour_step + if(frequency(1,j+1).eq.miss)then + do i=1,nquant+1 + frequency(i,nhour+1)=miss + enddo + msd(nhour+1)=miss + endif + enddo + +!---------------------------------------------------------------------- +! +! Read the bootstrap MSD:s and calculate the fractions of them that exceed the actual value +! + n_bootstrap=0 + p_value=0. + + open(unit=1,form='formatted',status='old',file=msd_bootstrap_file) + do while(.true.) + if(l_code_in_char)then +21 read(1,*,err=21,end=23)station_char,longitude,latitude,realization,& + (msd_bootstrap(j+1),j=hour1,hour2,hour_step),& + msd_bootstrap(nhour+1) + else +22 read(1,*,err=22,end=23)station_int,longitude,latitude,realization,& + (msd_bootstrap(j+1),j=hour1,hour2,hour_step),& + msd_bootstrap(nhour+1) + endif +! +! Update the p-value counters +! + n_bootstrap=n_bootstrap+1 + do hour=hour1,hour2,hour_step + if(msd_bootstrap(hour+1).ge.msd(hour+1))then + p_value(hour+1)=p_value(hour+1)+1. + endif + enddo + if(msd_bootstrap(nhour+1).ge.msd(nhour+1))then + p_value(nhour+1)=p_value(nhour+1)+1. + endif + enddo +23 continue +! +! Convert the p_values from absolute counts to relative frequencies +! + do hour=hour1,hour2,hour_step + p_value(hour+1)=p_value(hour+1)/n_bootstrap + enddo + p_value(nhour+1)=p_value(nhour+1)/n_bootstrap +! +!-------------------------------------------------------------------------- +! +! Open the output file and write its header line in ODB compatible text format +! + open(unit=2,form='formatted',status='unknown',file=outfile) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real hour@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer' + endif + headerline=trim(headerline)//' total_number@body:real' + headerline=trim(headerline)//' msd@body:real' + headerline=trim(headerline)//' p_value@body:real' +! +! quantile bin frequency variable names: f00, f01 ... f99 +! + do j=0,nquant + write(number_of_bin,'(i2.2)')j + headerline=trim(headerline)//' f'//number_of_bin//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Write the data lines for each UTC hour used +! + do hour=hour1,hour2,hour_step + j=hour+1 + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,I7,2F16.6)')& + station_char,longitude,latitude,hour,ntimes_hour(j),msd(j),p_value(j) + else + write(dataline,'(I6,2F16.6,1X,I2,I7,2F16.6)')& + station_int,longitude,latitude,hour,ntimes_hour(j),msd(j),p_value(j) + endif + + do i=1,nquant+1 + write(frequency_value,'(F16.6)')frequency(i,j) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + enddo +! +! Write the data line for the all-UTC statistics +! + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,I7,2F16.6)')& + station_char,longitude,latitude,24,ntimes_hour_tot,msd(nhour+1),p_value(nhour+1) + else + write(dataline,'(I6,2F16.6,1X,I2,I7,2F16.6)')& + station_int,longitude,latitude,24,ntimes_hour_tot,msd(nhour+1),p_value(nhour+1) + endif + do i=1,nquant+1 + write(frequency_value,'(F16.6)')frequency(i,nhour+1) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + + close(2) +! +!--------------------------------------------------------------------------------- +! +! Open the file for GrADS output and write its contents, for visualisation in GrADS? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& + recl=4,status='unknown') +! +! Again, first each UTC hour separately +! + do hour=hour1,hour2,hour_step + j=(hour-hour1)/hour_step + write(11,rec=1+j*(nquant+6))longitude + write(11,rec=2+j*(nquant+6))latitude + write(11,rec=3+j*(nquant+6))real(ntimes_hour(hour+1)) + write(11,rec=4+j*(nquant+6))msd(hour+1) + write(11,rec=5+j*(nquant+6))p_value(hour+1) + do i=1,nquant+1 + write(11,rec=5+j*(nquant+6)+i)frequency(i,hour+1) + enddo + enddo + ! + ! The all-UTC statistics + ! + write(11,rec=1+(j+1)*(nquant+6))longitude + write(11,rec=2+(j+1)*(nquant+6))latitude + write(11,rec=3+(j+1)*(nquant+6))real(ntimes_hour_tot) + write(11,rec=4+(j+1)*(nquant+6))msd(nhour+1) + write(11,rec=5+(j+1)*(nquant+6))p_value(nhour+1) + do i=1,nquant+1 + write(11,rec=5+(j+1)*(nquant+6)+i)frequency(i,nhour+1) + enddo + + close(11) + + endif ! if (grads_out) + + END program rank_histograms_one_station +! +!---------------------------------------------------------------- +! + subroutine update_frequencies & + (frequency,f,quantiles,nquant,miss) +! +! Find the location of the data value (f) within the +! quantile value table (quantiles, ascending order) +! and update the table of absolute quantile bin +! frequencies (frequency) +! +! frequency (in/out) = quantile bin frequencies +! f (in) = individual data value +! quantiles (in) = quantile values +! nquant (in) = number of quantiles +! miss (in) = missing value code + + implicit none + integer :: nquant + real :: quantiles(nquant) + real :: frequency(nquant+1) + real :: f + integer :: i1,i2,i,ind + real :: miss +! +! Quantiles should never be missing, but if they are, +! this should concern all quantiles simultaneously. +! Therefore, check just the 1st quantile + + if(abs(quantiles(1)).ge.abs(miss))then + write(*,*)'Quantiles missing!?' + stop + endif +! +! Find the position of f (ind) in the quantile table +! + if(f.lt.quantiles(1))then + ind=1 + else + if(f.ge.quantiles(nquant))then + ind=nquant+1 + else + i1=1 + i2=nquant + do while (i2.gt.i1+1) + i=(i1+i2)/2 + if(f.ge.quantiles(i))then + i1=(i1+i2)/2 + else + i2=(i1+i2)/2 + endif + enddo + ind=i1+1 + endif + endif +! +! Update the frequency table +! + frequency(ind)=frequency(ind)+1. + + return + end subroutine update_frequencies + +!---------------------------------------------------------------------- + +subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) +! +! Rounding the hour to the nearest hour within (hour1, hour1 + hour_step, ... hour2) +! +implicit none +integer :: yyear,mmonth,dday,hhour ! year, month, day and hour before rounding (IN) +integer :: year,month,day,hour ! year, month, day and hour after rounding (OUT) +integer :: hour1,hour2,hour_step ! target hours: hour1, hour1 + hour_step ... hour2 +integer :: ndays_month(12) ! number of days per month +integer :: hour_index +ndays_month=31 +ndays_month(4)=30 +ndays_month(6)=30 +ndays_month(9)=30 +ndays_month(11)=30 +ndays_month(2)=28 +if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then + ndays_month(2)=29 +endif +year=yyear +month=mmonth +day=dday +! +! round the hour to the nearest output hour +! +hour_index=nint((hhour-hour1+0.)/hour_step) +hour=hour1+hour_step*hour_index + if(hour.ge.24)then ! hhour was rounded forward to next day + hour=hour1 + day=dday+1 + if(day.gt.ndays_month(month))then + day=1 + month=month+1 + if(month.gt.12)then + month=1 + year=yyear+1 + endif + endif + endif + if(hour.lt.0)then ! Hhour was rounded backward to previous day + hour=hour2 + day=dday-1 + if(day.eq.0)then + month=month-1 + if(month.eq.0)then + month=12 + year=yyear-1 + endif + day=ndays_month(month) + endif + endif + +return +end subroutine round_hour + + + diff --git a/RADSOUND/STATRS/fortran-programs/t-test.f95 b/RADSOUND/STATRS/fortran-programs/t-test.f95 new file mode 100644 index 0000000000000000000000000000000000000000..83a2b5bb42d87e3e02f858f95e3103eb19cb46e3 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/t-test.f95 @@ -0,0 +1,698 @@ +program t_test + ! + ! t-test for difference between time mean values, using monthly mean data + ! + ! The test is conducted separately for UTC hours 'hour1' ... 'hour2' + ! with interval 'step', as well as for the mean over all the selected + ! UTC hours. + ! + ! Code somehow documented: Jouni Räisänen 4.7.2023 + ! + !---------------------------------------------------------------------- + ! Input files (text format) + !---------------------------------------------------------------------- + ! files = monthly mean time series for the model simulation + ! fileo = ------------------------ for observations + ! + ! Each data line in the input files includes: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) year + ! 5) month + ! 6) variable + ! 7) data value + ! + !---------------------------------------------------------------------- + ! Output files + !---------------------------------------------------------------------- + ! + ! outfile = results from the t-test in odb compatible text format. Each + ! line of the file gives the following for one UTC hour: + ! + ! 1) station codde + ! 2) longitude + ! 3) latitude + ! 4) UTC hour + ! 5) code of variable + ! 6) number of degrees of freedom in t-test (assuming zero autocorrelation) + ! 7) t-value + ! 8) p-value of t-value (based on a simple look-up-table approach) + ! + ! outfile_grads = a second output file in GrADS binary format + ! (only if lgrads=.true.) + ! + !-------------------------------------------------------------------- + ! Namelist parameters: + !-------------------------------------------------------------------- + ! + ! files = monthly mean time series for the model simulation + ! fileo = ------------------------ for observations + ! years1,months1,years2,months2 = period used from files (if available) + ! yearo1,montho1,yearo2,montho2 = period used from fileo (if available) + ! + ! month1,month2 = range of months used in the t-test. + ! example: month1=12, month2=2 -> the test is based on + ! interannual variability of December-February mean values + ! + ! hour1 = first UTC hour tested + ! hour2 = last UTC hour test + ! hour_step = step between tested UTC hours + ! + ! min_fraction_valid_data = fraction of available monthly means that + ! is required for calculating an annual value. + ! (default = 0.5) + ! + ! miss = missing value code (default = -9.99e6). + ! Note that all data with an absolute value equal to or large than + ! abs(miss) will be treated as missing. + ! + ! outfile = output file (in ODB compatible text format) + ! + ! lgrads = if .true., output as a GrADS binary is also written. + ! + ! outfile_grads = output file (as a GrADS binary), if LGRADS=.true. + ! + ! l_code_in_char: if .true., the station code is assumed to be + ! a 3-character string (as for GRUAN soundings). + ! Otherwise, station code as integer is assumed. + ! + !--------------------------------------------------------------- + ! Definition of the 'annual' values used in the test: + !--------------------------------------------------------------- + ! + ! 1) If both data sets include at least 12 months of data, + ! then annual means include all months within the range month1-month2 + ! + ! 2) If the simulation includes less than 12 months of data + ! then annual means exclude the months not available for the simulation + ! + ! For each two time series (s and o), the period used in the + ! test starts from the first calendar month of the simulation (months1) + ! + ! 3) If the observed time series gives less than 2 annual means, + ! the test cannot be conducted and the program terminates. + ! + ! NB: it is assumed that the periods defined by + ! 'year1s,month1s,year2s,month2s' and + ! 'year1o,month1o,year2o,month2o' + ! are available in the data sets + ! + ! If monthly means of individual months are missing, the + ! annual mean for that year is calculated without these month(s) + ! (and may thus be biased). However, if the fraction of available + ! monthly means is less than min_fraction_valid_data, the + ! annual mean is set as missing. + ! + ! If an annual mean is missing, it is excluded from + ! the test (-> number of years -> number of degrees of freedom is also reduced) + ! + ! If the total number of degrees of freedom is less than 1, + ! the test cannot be conducted. + ! + !-------------------------------------------------------------------------------- + + implicit none + +! ---------------- Namelist parameters ---- + character*160 :: files,fileo,outfile,outfile_grads + integer :: year1s,month1s,year2s,month2s + integer :: year1o,month1o,year2o,month2o + integer :: month1,month2 + integer :: hour1,hour2,hour_step + logical :: lgrads + logical :: l_code_in_char + real :: min_fraction_valid_data + real :: miss +! +!---- Arrays for monthly and annual mean values ---- + integer,parameter :: nhour=24,nmonth=12,nmax_year=100 + real :: sim(nhour+1,nmonth,nmax_year) + real :: obs(nhour+1,nmonth,nmax_year) + real :: sim_ann(nmax_year,nhour+1) + real :: obs_ann(nmax_year,nhour+1) +! +!---- t test statistics (for each UTC hour --------- +! + + real :: mean_sim(nhour+1),std_sim(nhour+1) + real :: mean_obs(nhour+1),std_obs(nhour+1) + integer :: n_sim(nhour+1),n_obs(nhour+1),dof(nhour+1) + real :: t(nhour+1) + real :: p_value(nhour+1) +! +!--- Miscellaneous +! + integer :: i,j,k + integer :: station_int ! station code as an integer (if l_code_in_char=.false.) + character*3 :: station_char ! station codes as 3-char string (if l_code_in_char=.true.) + real :: longitude,latitude + integer :: year,month,hour,variable ! values read from input data lines + real :: f1 ! individual data value read from input data line + integer :: nhour_anal ! number of UTC hours for which the test is conducted + logical :: valid ! This is used for deciding whether the "all-UTC" monthly mean can be calculated + logical :: month_used(nmonth) ! .true. for calendar months used in the t-test + logical :: month_sim(nmonth) ! .true. for calendar months available in the simulation + integer :: nmon_used ! number of calendar months used included in the annual varlues + integer :: month2_up,month2o_up,month2s_up ! for figuring out which months to use ... + integer :: nyear_sim,nyear_obs ! number of simulated and observed years that should be + ! available if there is no missing data + integer :: nmon_sim ! length of the simulation period in months (based on the namelist parameters) + integer :: mon,m1,m2 ! loop variables for calculating annual values + integer :: nmon_count ! count of total number of relevant months, just to find the number of years ... + integer :: first_month ! the month starting each "year" in the t-test + character*200 :: headerline,dataline + + namelist/param/files,year1s,month1s,year2s,month2s,& + fileo,year1o,month1o,year2o,month2o,& + month1,month2,hour1,hour2,hour_step,& + outfile,outfile_grads,lgrads,miss,& + l_code_in_char,& + min_fraction_valid_data + + miss=-9.99e6 + min_fraction_valid_data=0.5 + read(*,nml=param) + +! write(*,*)'year1s,month1s,year2s,month2s',year1s,month1s,year2s,month2s +! write(*,*)'year1o,month1o,year2o,month2o',year1o,month1o,year2o,month2o +! write(*,*)'month1,month2',month1,month2 + + nhour_anal=(hour2-hour1)/hour_step+1 +! +! Which calendar months will be used in the t test? +! + month_used=.false. + month_sim=.true. + + month2_up=month2 + if(month2.lt.month1)month2_up=month2+nmonth + month2s_up=month2s + if(month2s.lt.month1s)month2s_up=month2s+nmonth + ! + ! Calculate the length of the simulation in months. + ! + nmon_sim=(nmonth*(year2s-year1s)+month2s-month1s+1) + ! + ! If the simulation is less than one year: set month_sim to .false. for months + ! that are not included in the simulation + ! + if(nmon_sim.lt.nmonth)then + do mon=1,nmonth + if((year2s.eq.year1s).and.(mon.lt.month1s.or.mon.gt.month2s))month_sim(mon)=.false. + if((year2s.gt.year1s).and.(mon.gt.month2s.and.mon.lt.month1s))month_sim(mon)=.false. + enddo + endif + ! + ! Calculate the number of calendar months included in the annual values + ! + nmon_used=0 + do month=month1,month2_up + mon=1+mod(month-1,nmonth) + month_used(mon)=month_sim(mon) + if(month_used(mon))nmon_used=nmon_used+1 + enddo + write(*,*)'Number of months used:',nmon_used + if(nmon_used.lt.1)then + write(*,*)'Nothing to calculate. Program terminates.' + stop + endif + +!------------------------------------------------------------------------ +! Read the observed and simulated time series from files +!------------------------------------------------------------------------ + + obs=miss + sim=miss + + open(unit=1,file=trim(fileo),form='formatted',status='old') + open(unit=2,file=trim(files),form='formatted',status='old') +! +! Read the monthly means from the observation file +! + read(1,*,end=3) ! the first line is a header + do while (.true.) + if(l_code_in_char)then +1 read(1,*,err=1,end=3)station_char,longitude,latitude,year,month,hour,& + variable,f1 + else +2 read(1,*,err=2,end=3)station_int,longitude,latitude,year,month,hour,& + variable,f1 + endif + if(abs(f1).lt.abs(miss))then + if((year.gt.year1o.or.(year.eq.year1o.and.month.ge.month1o)).and.& + (year.lt.year2o.or.(year.eq.year2o.and.month.le.month2o)))then + obs(hour+1,month,1+year-year1o)=f1 + endif + endif + enddo +3 continue + close (1) +!-------------------------------------------------------------------------- +! Read the monthly means from the simulation file +!-------------------------------------------------------------------------- + + read(2,*,end=13) ! the first line is a header + do while (.true.) + if(l_code_in_char)then +11 read(2,*,err=11,end=13)station_char,longitude,latitude,year,month,hour,& + variable,f1 + else +12 read(2,*,err=12,end=13)station_int,longitude,latitude,year,month,hour,& + variable,f1 + endif + + if(abs(f1).lt.abs(miss))then + if((year.gt.year1s.or.(year.eq.year1s.and.month.ge.month1s)).and.& + (year.lt.year2s.or.(year.eq.year2s.and.month.le.month2s)))then + sim(hour+1,month,1+year-year1s)=f1 + endif + endif + enddo +13 continue + close (2) + +!---------------------------------------------------------------------- +! Calculate the all-UTC monthly means for observations +! (only when data is available for all the required hours!) +!---------------------------------------------------------------------- + do year=year1o,year2o + do month=1,nmonth + valid=.true. + f1=0. + do hour=hour1,hour2,hour_step + if(abs(obs(hour+1,month,1+year-year1o)).lt.abs(miss))then + f1=f1+obs(hour+1,month,1+year-year1o) + else + valid=.false. + endif + enddo + if(valid)then + obs(nhour+1,month,1+year-year1o)=f1/nhour_anal + endif + enddo + enddo +!---------------------------------------------------------------------- +! Calculate the all-UTC monthly means for the simulation +! (only when data is available for all the required hours!) +!---------------------------------------------------------------------- + do year=year1s,year2s + do month=1,nmonth + valid=.true. + f1=0. + do hour=hour1,hour2,hour_step + if(abs(sim(hour+1,month,1+year-year1s)).lt.abs(miss))then + f1=f1+sim(hour+1,month,1+year-year1s) + else + valid=.false. + endif + enddo + if(valid)then + sim(nhour+1,month,1+year-year1s)=f1/nhour_anal + endif + enddo + enddo + +!--------------------------------------------------------------------- +! Calculate the annual values for the observations and the simulations +!--------------------------------------------------------------------- + sim_ann=0 + obs_ann=0 +! +! How many years (or partial years) are availble for the simulation? +! +! Brute force calculation: loop over all months in the simulation period, +! calculate the number of used months, and divide by the annual number of used months +! (it should be possible to do this in a more intelligent way!) + + nmon_count=0 + do year=year1s,year2s + m1=1 + m2=nmonth + if(year.eq.year1s)m1=month1s + if(year.eq.year2s)m2=month2s + do month=m1,m2 + if(month_used(month))then + nmon_count=nmon_count+1 + if(nmon_count.eq.1)first_month=month ! the month starting each year + endif + enddo + enddo + nyear_sim=nmon_count/nmon_used +! +! The same for the observations. However, those months in the first year +! that precede the first month of a t-test year are excluded because +! the do not belong to a full year + + nmon_count=0 + do year=year1o,year2o + m1=1 + m2=nmonth + if(year.eq.year1o)m1=max(month1o,first_month) + if(year.eq.year2o)m2=month2o + do month=m1,m2 + if(year.gt.year1o.or.month.ge.month1s)then + if(month_used(month))nmon_count=nmon_count+1 + endif + enddo + enddo + nyear_obs=nmon_count/nmon_used + +! +! Calculate the 'annual' values used in the t-test +! for each UTC hour analyzed + + obs_ann=0. + sim_ann=0. + do j=1,nhour_anal+1 ! the all-UTC values are included in the same loop + if(j.le.nhour_anal)then + hour=hour1+(j-1)*hour_step + else + hour=nhour + endif +! +! Annual means for the observations +! + do i=1,nyear_obs + year=year1o+(i-1) + if(first_month.lt.month1o)year=year+1 + nmon_count=0 ! number of months in a year with non-missing data + do month=first_month,first_month+nmon_used-1 + if(month.gt.nmonth)year=year+1 + mon=1+mod(month-1,nmonth) + f1=obs(hour+1,mon,1+year-year1o) + if(abs(f1).lt.abs(miss))then + obs_ann(i,hour+1)=obs_ann(i,hour+1)+f1 + nmon_count=nmon_count+1 + endif + enddo + if(nmon_count.ge.(min_fraction_valid_data*nmon_used))then + obs_ann(i,hour+1)=obs_ann(i,hour+1)/nmon_count + else + obs_ann(i,hour+1)=miss + endif + enddo +! +! Annual means for the simulation +! + do i=1,nyear_sim + year=year1s+(i-1) + if(first_month.lt.month1s)year=year+1 + nmon_count=0 ! number of months in year with non-missing data + do month=first_month,first_month+nmon_used-1 + if(month.gt.nmonth)year=year+1 + mon=1+mod(month-1,nmonth) + f1=sim(hour+1,mon,1+year-year1s) + if(abs(f1).lt.abs(miss))then + sim_ann(i,hour+1)=sim_ann(i,hour+1)+f1 + nmon_count=nmon_count+1 + endif + enddo + if(nmon_count.ge.(min_fraction_valid_data*nmon_used))then + sim_ann(i,hour+1)=sim_ann(i,hour+1)/nmon_count + else + sim_ann(i,hour+1)=miss + endif + enddo + + enddo ! end of the (j -> hour) loop + +!----------------------------------------------------------------------- +! Conduct the t test +!----------------------------------------------------------------------- + + do j=1,nhour_anal+1 ! the all-UTC values are included in the same loop + if(j.le.nhour_anal)then + hour=hour1+(j-1)*hour_step + else + hour=nhour + endif + + call test_statistic(t(hour+1),dof(hour+1),miss,& + sim_ann(1,hour+1),nyear_sim,obs_ann(1,hour+1),nyear_obs) + call pval(t(hour+1),p_value(hour+1),dof(hour+1),miss) + + enddo + +!----------------------------------------------------------------- +! Output to text file +!----------------------------------------------------------------- + + open(unit=1,file=trim(outfile),form='formatted') + + if(l_code_in_char)then + + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real'//& + ' hour@hdr:integer variable@hdr:integer dof@body:integer'//& + ' t@body:real p_value@body:real' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real'//& + ' hour@hdr:integer variable@hdr:integer dof@body:integer'//& + ' t@body:real p_value@body:real' + endif + + write(1,*)trim(headerline) + do j=1,nhour_anal+1 + if(j.le.nhour_anal)then + hour=hour1+(j-1)*hour_step + else + hour=nhour + endif + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I2,1X,I3,1X,I3,2F16.6)')& + station_char,longitude,latitude,hour,variable,dof(hour+1),t(hour+1),p_value(hour+1) + else + write(1,'(I6,2F16.6,1X,I2,1X,I3,1X,I3,2F16.6)')& + station_int,longitude,latitude,hour,variable,dof(hour+1),t(hour+1),p_value(hour+1) + endif + + enddo + close(1) +!------------------------------------------------------------- +! Output to GrADS binary file? +!------------------------------------------------------------- + if(lgrads)then + open(unit=1,form='unformatted',file=trim(outfile_grads),access='direct',recl=4) + if(l_code_in_char)station_int=0 + write(1,rec=1)real(station_int) ! will be zero if station code is a character string + write(1,rec=2)longitude + write(1,rec=3)latitude + do j=1,nhour_anal+1 + if(j.le.nhour_anal)then + hour=hour1+(j-1)*hour_step + else + hour=nhour + endif + write(1,rec=3+j)real(dof(hour+1)) + write(1,rec=3+(nhour_anal+1)+j)t(hour+1) + write(1,rec=3+2*(nhour_anal+1)+j)p_value(hour+1) + enddo + endif + +!---------------------------------------------------------------- +end program t_test +!---------------------------------------------------------------- + +subroutine test_statistic(t,dof,miss,f1,n1,f2,n2) + ! + ! Calculation of test statistic and dof for standard + ! two-sample t test + ! + implicit none + real :: t ! t test value (out) + integer :: dof ! number of degrees of freedom (out) + real :: miss ! missing value code (in / out) + integer :: n1,n2 ! mumber of years in samples 1 and 2 (in) + real :: f1(n1),f2(n2) ! time series in samples 1 and 2 (in) + real :: mean1,mean2,var1,var2 ! mean values and variances + integer :: n1_real,n2_real ! numbers of years with non-missing data + integer :: i + mean1=0. + mean2=0. + var1=0. + var2=0. + n1_real=0. + n2_real=0. +! +! Calculate mean and variance for time series 1 +! + do i=1,n1 + if(abs(f1(i)).lt.abs(miss))then + n1_real=n1_real+1 + mean1=mean1+f1(i) ! division with number of years later + var1=var1+f1(i)**2. ! conversion to true variance later + endif + enddo + if(n1_real.ge.1)then + mean1=mean1/n1_real + var1=var1/n1_real-mean1**2. ! variance with n in denominator + endif ! (cf. calculation of t later) +! +! Calculate mean and variance for time series 2 +! + do i=1,n2 + if(abs(f2(i)).lt.abs(miss))then + n2_real=n2_real+1 + mean2=mean2+f2(i) + var2=var2+f2(i)**2. + endif + enddo + if(n2_real.ge.1)then + mean2=mean2/n2_real + var2=var2/n2_real-mean2**2. + endif +! +! Number of degrees of freedom +! + dof=n1_real+n2_real-2 +! +! t value +! + if(dof.ge.1.and.n1_real.ge.1.and.n2_real.ge.1.and.& + (n1_real*var1+n2_real*var2.gt.0))then + t=(mean1-mean2)/sqrt((n1_real*var1+n2_real*var2)/dof)& + /sqrt(1./n1_real+1./n2_real) + else + t=miss + dof=0 + endif + +! write(*,*)'f1:',(f1(i),i=1,n1) +! write(*,*)'f2:',(f2(i),i=1,n2) +! write(*,*)'mean1,var1,n1_real',mean1,var1,n1_real +! write(*,*)'mean2,var2,n2_real',mean2,var2,n2_real +! write(*,*)'t,dof',t,dof + + return + end subroutine test_statistic + + subroutine pval(t,p,dof,miss) + ! + ! Approximate p-value table for t test, based on tabulated critical values. + ! + ! The values run from 0 (highly negative t) to 1 (highly positive t) + ! + ! if the t value is missing, the p-value is also set as missing + + implicit none + real :: t ! t-value (in) + real :: p ! p-value (out) + integer :: dof ! number of degrees of freedom (in) + real :: miss ! missing value code (in, out) + integer :: dof2 ! dof truncated to dofmax=120 + integer,parameter :: dof_max=120 + integer,parameter :: n_threshold=11 + real :: tcrit(n_threshold,dof_max) + real :: plev(n_threshold) + integer :: i,j + data plev /0.75,0.8,0.85,0.9,0.95,0.975,0.99,0.995,0.9975,0.999,0.9995/ + data (tcrit(i,1),i=1,n_threshold)& + /1.000,1.376,1.963,3.078,6.314,12.706,31.821,63.567,127.321,318.309,636.19/ + data (tcrit(i,2),i=1,n_threshold)& + /0.816,1.061,1.386,1.886,2.920,4.303,6.965,9.925,14.089,22.327,31.599/ + data (tcrit(i,3),i=1,n_threshold)& + /0.765,0.978,1.250,1.638,2.353,3.182,4.541,5.841,7.453,10.215,12.924/ + data (tcrit(i,4),i=1,n_threshold)& + /0.741,0.941,1.190,1.533,2.132,2.776,3.747,4.604,5.598,7.173,8.610/ + data (tcrit(i,5),i=1,n_threshold)& + /0.727,0.920,1.156,1.476,2.015,2.571,3.365,4.032,4.773,5.893,6.869/ + data (tcrit(i,6),i=1,n_threshold)& + /0.718,0.906,1.134,1.440,1.943,2.447,3.143,3.707,4.317,5.208,5.959/ + data (tcrit(i,7),i=1,n_threshold)& + /0.711,0.896,1.119,1.415,1.895,2.365,2.998,3.499,4.029,4.785,5.408/ + data (tcrit(i,8),i=1,n_threshold)& + /0.706,0.889,1.108,1.397,1.860,2.306,2.896,3.355,3.833,4.501,5.041/ + data (tcrit(i,9),i=1,n_threshold)& + /0.703,0.883,1.100,1.383,1.833,2.262,2.821,3.250,3.690,4.297,4.781/ + data (tcrit(i,10),i=1,n_threshold)& + /0.700,0.879,1.093,1.372,1.812,2.228,2.764,3.169,3.581,4.144,4.587/ + data (tcrit(i,11),i=1,n_threshold)& + /0.697,0.876,1.088,1.363,1.796,2.201,2.718,3.106,3.497,4.025,4.437/ + data (tcrit(i,12),i=1,n_threshold)& + /0.695,0.873,1.083,1.356,1.782,2.179,2.681,3.055,3.428,3.930,4.318/ + data (tcrit(i,13),i=1,n_threshold)& + /0.694,0.870,1.079,1.350,1.771,2.160,2.650,3.012,3.372,3.852,4.221/ + data (tcrit(i,14),i=1,n_threshold)& + /0.692,0.868,1.076,1.345,1.761,2.145,2.624,2.977,3.326,3.787,4.140/ + data (tcrit(i,15),i=1,n_threshold)& + /0.691,0.866,1.074,1.341,1.753,2.131,2.602,2.947,3.286,3.733,4.073/ + data (tcrit(i,16),i=1,n_threshold)& + /0.690,0.865,1.071,1.337,1.746,2.120,2.583,2.921,3.252,3.686,4.015/ + data (tcrit(i,17),i=1,n_threshold)& + /0.689,0.863,1.069,1.333,1.740,2.110,2.567,2.898,3.222,3.646,3.965/ + data (tcrit(i,18),i=1,n_threshold)& + /0.688,0.862,1.067,1.330,1.734,2.101,2.552,2.878,3.197,3.610,3.922/ + data (tcrit(i,19),i=1,n_threshold)& + /0.688,0.861,1.066,1.328,1.729,2.093,2.539,2.861,3.174,3.579,3.883/ + data (tcrit(i,20),i=1,n_threshold)& + /0.687,0.860,1.064,1.325,1.725,2.086,2.528,2.845,3.153,3.552,3.850/ + data (tcrit(i,21),i=1,n_threshold)& + /0.686,0.859,1.063,1.323,1.721,2.080,2.518,2.831,3.135,3.527,3.819/ + data (tcrit(i,22),i=1,n_threshold)& + /0.686,0.858,1.061,1.321,1.717,2.074,2.508,2.819,3.119,3.505,3.792/ + data (tcrit(i,23),i=1,n_threshold)& + /0.685,0.858,1.060,1.319,1.714,2.069,2.500,2.807,3.104,3.485,3.767/ + data (tcrit(i,24),i=1,n_threshold)& + /0.685,0.857,1.059,1.318,1.711,2.064,2.492,2.797,3.091,3.467,3.745/ + data (tcrit(i,25),i=1,n_threshold)& + /0.684,0.856,1.058,1.316,1.708,2.060,2.485,2.787,3.078,3.450,3.725/ + data (tcrit(i,26),i=1,n_threshold)& + /0.684,0.856,1.058,1.315,1.706,2.056,2.479,2.779,3.067,3.435,3.707/ + data (tcrit(i,27),i=1,n_threshold)& + /0.684,0.856,1.057,1.314,1.703,2.052,2.473,2.771,3.057,3.421,3.690/ + data (tcrit(i,28),i=1,n_threshold)& + /0.683,0.855,1.056,1.313,1.701,2.048,2.467,2.763,3.047,3.408,3.674/ + data (tcrit(i,29),i=1,n_threshold)& + /0.683,0.854,1.055,1.311,1.699,2.045,2.462,2.756,3.038,3.396,3.659/ + data (tcrit(i,30),i=1,n_threshold)& + /0.683,0.854,1.055,1.310,1.697,2.042,2.457,2.750,3.030,3.385,3.646/ + data (tcrit(i,40),i=1,n_threshold)& + /0.681,0.851,1.050,1.303,1.684,2.021,2.423,2.704,2.971,3.307,3.551/ + data (tcrit(i,50),i=1,n_threshold)& + /0.679,0.849,1.047,1.299,1.676,2.009,2.403,2.678,2.937,3.261,3.496/ + data (tcrit(i,60),i=1,n_threshold)& + /0.679,0.848,1.045,1.296,1.671,2.000,2.390,2.660,2.915,3.232,3.460/ + data (tcrit(i,80),i=1,n_threshold)& + /0.678,0.846,1.043,1.292,1.664,1.990,2.374,2.639,2.887,3.195,3.416/ + data (tcrit(i,100),i=1,n_threshold)& + /0.677,0.845,1.042,1.290,1.660,1.984,2.364,2.626,2.871,3.174,3.390/ + data (tcrit(i,120),i=1,n_threshold)& + /0.677,0.845,1.041,1.289,1.658,1.980,2.358,2.617,2.860,3.160,3.373/ +! +! Linear interpolation of critical values for dof > 30 +! + do j=1,n_threshold + do i=31,39 + tcrit(j,i)=tcrit(j,30)+(i-30.)/10.*(tcrit(j,40)-tcrit(j,30)) + enddo + do i=41,49 + tcrit(j,i)=tcrit(j,40)+(i-40.)/10.*(tcrit(j,50)-tcrit(j,40)) + enddo + do i=51,59 + tcrit(j,i)=tcrit(j,50)+(i-50.)/10.*(tcrit(j,60)-tcrit(j,50)) + enddo + do i=61,79 + tcrit(j,i)=tcrit(j,60)+(i-60.)/20.*(tcrit(j,80)-tcrit(j,60)) + enddo + do i=81,99 + tcrit(j,i)=tcrit(j,80)+(i-80.)/20.*(tcrit(j,100)-tcrit(j,80)) + enddo + do i=101,119 + tcrit(j,i)=tcrit(j,100)+(i-100.)/20.*(tcrit(j,120)-tcrit(j,100)) + enddo + enddo + + dof2=min(dof,120) + p=miss + if(abs(t).lt.abs(miss))then + p=0.501 + do i=1,n_threshold + if(abs(t).gt.tcrit(i,dof))p=plev(i) + enddo + if(t.lt.0)p=1-p +! write(*,*)'t,dof,p',t,dof,p + endif + return + end subroutine pval + diff --git a/RADSOUND/STATRS/fortran-programs/vertical_differences_soundings.f95 b/RADSOUND/STATRS/fortran-programs/vertical_differences_soundings.f95 new file mode 100644 index 0000000000000000000000000000000000000000..ad1dc4e14b659b72bec66d4f551252a948ceea62 --- /dev/null +++ b/RADSOUND/STATRS/fortran-programs/vertical_differences_soundings.f95 @@ -0,0 +1,199 @@ +Program vertical_differences_soundings +! +! The programs reads time series data from two text files with soundings data +! and writes the difference time series to the output file in a ODB compatible format +! +! Although the name hints calculation of vertical differences between two levels, +! other types of differences can also be calculated. +! +! Jouni Räisänen, University of Helsinki, August 2023 +! +!---------------------------------------------------------------------- +! Input files (text format) +!---------------------------------------------------------------------- +! +! INFILE_1: The first data file (time series for one level and location) +! INFILE_2: The second data file (time series for one level and location) +! +! The differences INFILE_1 - INFILE_2 are calculated +! +! In both files, the first line is a header. The other lines include: +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5) longitude +! 6) latitude +! 7) station code +! 8) variable code +! 9) level +! 10) data value +! +! If (lgruan.eq.true), the station code is assumed to be a 3-character string +! Otherwise, it is assumed to be an integer. +! +!---------------------------------------------------------------------- +! Output file (ODB compatible text format) +!---------------------------------------------------------------------- +! +! OUTFILE: the difference time series (INFILE_1 minus INFILE_2) +! +! The first line is a header. The other lines include +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5) longitude +! 6) latitude +! 7) station code +! 8) variable code +! 9) data value +! +! (NB: no level value is written) +! +!---------------------------------------------------------------------- +! +! Namelist parameters: +! +! infile_1 = 1st input file +! infile_2 = 2nd input file +! outfile = output file +! miss = missing value code (default: -9.99e6) +! all data values f for which |f| >= |miss| are interpreted as missing +! lgruan = if .true., station code is assumed to be 3-character string +! if .false., station codes is assumed to be integer +!----------------------------------------------------------------- +! +implicit none +! +character*160 :: infile_1,infile_2 ! input files +character*160 :: outfile ! output file +character*300 :: line,line1,line2 ! lines read from / written to text file +integer :: year1,month1,day1,hour1 ! year,month,day and hour read from infile_1 +integer :: year2,month2,day2,hour2 ! year,month,day and hour read from infile_2 +integer :: time1 ! hour1+24*day1+31*24*month1+31*24*12*(year1-1800) +integer :: time2 ! hour2+24*day2+31*24*month2+31*24*12*(year2-1800) +integer :: timeprev ! the previous time in the difference time series +character*3 :: station_char ! station code as string (GRUAN?) +integer :: station_int ! station code as integer (RHARM?) +real :: longitude,latitude ! longitude and laitude +integer :: variable ! variable code +real :: level ! level as read from input files (not used) +real :: f1,f2,f ! single data values +real :: miss=-9.99e6 ! missing value codes +logical :: lgruan ! if .true., assume 3-char station code (otherwise integer) +! +namelist/param/infile_1,infile_2,outfile,miss,lgruan +! +read(*,nml=param) +! +! Open the input and output files: +! +open(unit=1,file=trim(infile_1),form='formatted') +open(unit=2,file=trim(infile_2),form='formatted') +open(unit=11,file=trim(outfile),form='formatted') +! +! Read the header lines from the two input files +! +read(1,*) +read(2,*) +! +! Write the header to the output file. There is no 'level' because the result is a single-level variable +! +if(lgruan)then +write(11,*)& + 'year@hdr:integer ','month@hdr:integer ','day@hdr:integer ','hour@hdr:integer ',& + 'longitude@hdr:real ','latitude@hdr:real ','station@hdr:string ','variable@body:integer ',& + 'value@body:real ' +else +write(11,*)& + 'year@hdr:integer ','month@hdr:integer ','day@hdr:integer ','hour@hdr:integer ',& + 'longitude@hdr:real ','latitude@hdr:real ','station@hdr:integer ','variable@body:integer ',& + 'value@body:real ' + +endif +! +! Main loop. Search the times for which there is data in both infile_1 and infile_2 +! and write the differences to outfile +! +time1=0 +time2=0 +timeprev=0 + +do while (.true.) +1 if(time1.le.timeprev.or.time1.lt.time2)then + read(1,'(A300)',err=10,end=10)line + call compress_line(line,line1,300) + write(*,*)'line=',line + write(*,*)'line1=',line1 + if(lgruan)then + read(line1,*)year1,month1,day1,hour1,longitude,latitude,station_char,& + variable,level,f1 + else + read(line1,*)year1,month1,day1,hour1,longitude,latitude,station_int,& + variable,level,f1 + endif + time1=hour1+24*day1+31*24*month1+31*24*12*(year1-1800) + goto 1 + endif +2 if(time2.lt.time1)then + read(2,'(A300)',err=10,end=10)line + call compress_line(line,line2,300) + if(lgruan)then + read(line2,*)year2,month2,day2,hour2,longitude,latitude,station_char,& + variable,level,f2 + else + read(line2,*)year2,month2,day2,hour2,longitude,latitude,station_int,& + variable,level,f2 + endif + time2=hour2+24*day2+31*24*month2+31*24*12*(year2-1800) + goto 1 + endif + if(time1.ne.time2)then ! Just a test. The previous code should ensure that this + ! line is only reached when time1=time1 + write(*,*)'Something wrong: time1,time2=',time1,time2 + stop + endif + timeprev=time1 + ! + ! Only write the output for those times when there is non-missing + ! data in both input files. + ! + if(max(abs(f1),abs(f2)).ge.abs(miss))then + write(*,*)'Data missing:',year1,month1,day1,hour1,f1,f2 + else + if(lgruan)then + write(11,'(I4,1X,I2,1X,I2,1X,I2,2F16.6,1X,A3,1X,I2,F16.6)')& + year1,month1,day1,hour1,longitude,latitude,station_char,variable,f1-f2 + else + write(11,'(I4,1X,I2,1X,I2,1X,I2,2F16.6,1X,I6,1X,I2,F16.6)')& + year1,month1,day1,hour1,longitude,latitude,station_int,variable,f1-f2 + endif + endif +enddo +10 continue +close(1) +close(2) + +end program vertical_differences_soundings +! +! Copy the string 'line' to 'line1', excluding +! hyphens. +! +subroutine compress_line(line,line1,n) + implicit none + integer :: i,j,n + character :: line(n) + character :: line1(n) + line1=' ' + j=0 + do i=1,n + if(line(i).ne."'")then + j=j+1 + line1(j)=line(i) + endif + enddo + return +end subroutine compress_line diff --git a/RADSOUND/STATRS/list_of_all_radsound_stations.txt b/RADSOUND/STATRS/list_of_all_radsound_stations.txt new file mode 100644 index 0000000000000000000000000000000000000000..a97fe367dac7ae8eaebcd19cb03bd324ae7dfe21 --- /dev/null +++ b/RADSOUND/STATRS/list_of_all_radsound_stations.txt @@ -0,0 +1,428 @@ +30673 119.733002 53.750000 3052 2445 +42867 79.050000 21.100000 3270 1401 +42971 85.833000 20.250000 1295 1005 +42809 88.450000 22.650000 685 709 +78397 -76.849998 18.066999 800 1728 +72393 -120.567000 34.750000 751 1044 +93986 -176.567001 -43.950001 385 28 +62010 13.167000 32.683000 215 182 +37789 44.501999 40.216999 774 88 +87418 -68.782997 -32.833000 814 4888 +34858 43.349998 45.917000 778 626 +74494 -69.959000 41.657000 674 716 +44212 92.083000 49.799999 675 2669 +42410 91.583000 26.100000 1741 1454 +33658 25.900000 48.367001 3499 1611 +42369 80.883000 26.750000 821 671 +89664 166.667007 -77.849998 5862 2902 +42339 73.017000 26.300000 1503 1303 +35671 67.717003 47.799999 7131 4806 +33041 30.962999 52.402000 2966 1238 +24908 102.293999 60.335999 4550 4024 +31168 138.149994 56.450001 210 135 +85442 -70.441002 -23.450001 541 311 +24641 121.616997 63.783001 2137 2326 +76394 -100.233002 25.867001 4893 5335 +37055 43.099998 44.216999 6373 5064 +67083 47.483000 -18.800000 6740 5010 +29862 91.317001 53.766998 739 129 +32098 143.100006 49.216999 495 429 +76225 -106.032997 28.667000 4691 7220 +30635 109.016998 53.417000 5401 4566 +20744 52.700001 72.366997 773 684 +28225 56.196999 57.958000 2471 1365 +24125 112.432999 68.500000 4908 4579 +31369 140.694000 53.146999 330 326 +24507 100.217003 64.268997 3970 2315 +13275 20.417000 44.766998 3089 1382 +36872 77.003998 43.362999 4047 3766 +6447 4.358000 50.797001 419 432 +37011 39.033001 44.099998 893 690 +2185 22.114000 65.542999 759 521 +43150 83.300000 17.683000 1660 1408 +15614 23.383000 42.650000 3488 8503 +29698 99.032997 54.882999 8085 6037 +35229 57.150002 50.283001 2855 1280 +70414 174.100006 52.716999 270 263 +34467 44.333000 48.783001 1695 1075 +34172 46.039001 51.556999 4366 3119 +28275 68.250000 58.150002 978 980 +85799 -73.098000 -41.435001 77 910 +27038 39.925999 59.318001 2104 1318 +30372 118.266998 56.900002 5143 4600 +93844 168.317993 -46.410999 414 580 +30554 113.583000 54.466999 4151 4197 +23884 90.030998 61.581001 891 457 +32540 158.582993 53.083000 1781 2234 +44292 106.866997 47.917000 3997 2040 +23921 60.450000 60.683000 1841 1155 +48820 105.800003 21.033001 226 188 +89571 77.967000 -68.574000 6633 4216 +30230 108.067001 57.766998 3024 2500 +31736 135.233002 48.533001 2014 2439 +30935 108.750000 50.367001 5170 4596 +10739 9.200000 48.833000 10208 8242 +6181 12.533000 55.767000 921 844 +15420 26.077999 44.511002 2226 793 +2527 12.291000 57.657001 6143 4159 +91592 166.453000 -22.276000 422 91 +12425 16.881001 51.112999 3423 1051 +71801 -52.750000 47.667000 3390 3694 +2591 18.350000 57.650002 1224 1222 +47909 129.552994 28.393999 4715 4756 +68842 25.611000 -33.984000 260 192 +12982 20.100000 46.250000 2011 677 +23330 66.674004 66.531998 814 632 +68263 28.211000 -25.910000 6324 6020 +28445 61.067001 56.733002 7339 5301 +40375 36.599998 28.382999 9225 6493 +42182 77.200000 28.583000 3980 2965 +94975 147.503006 -42.834000 427 293 +50527 119.700000 49.250000 8566 8375 +4220 -52.852000 68.708000 2573 2810 +51431 81.333000 43.950000 8897 9159 +16320 17.957000 40.660000 315 252 +16044 13.188000 46.037000 2344 1325 +53845 109.450000 36.567000 3396 2781 +72265 -102.189000 31.942000 12450 14260 +1241 9.612000 63.706001 1430 1187 +4018 -22.595000 63.981000 3514 3401 +47678 139.779007 33.122002 2636 2514 +6011 -6.765000 62.023000 2023 2287 +4360 -37.637000 65.611000 4066 4225 +72274 -110.956000 32.228000 1332 1306 +40179 34.817001 32.000000 178 116 +1415 5.665000 58.874001 1356 1056 +12374 20.955999 52.408001 1658 742 +7510 -0.691000 44.831000 445 309 +72572 -111.955000 40.772000 13107 14572 +72528 -78.719000 42.941000 12948 13135 +16080 9.283000 45.461000 140 96 +71934 -111.929001 60.026001 4651 7487 +6610 6.943000 46.812000 6526 6543 +11035 16.356000 48.249000 5536 4590 +71926 -96.000000 64.317001 1508 2012 +70133 -162.613000 66.886000 1926 2033 +72597 -122.882000 42.377000 9389 12215 +29634 82.949997 54.966999 3437 1606 +72364 -106.698000 31.873000 11480 13113 +10868 11.552000 48.243999 11163 10580 +72681 -116.211000 43.567000 11810 14498 +72293 -117.117000 32.833000 6144 6971 +1028 19.000999 74.503998 1693 1648 +42701 85.317000 23.317000 1010 268 +11520 14.447000 50.008000 8132 5874 +72662 -103.211000 44.073000 13070 14480 +57494 114.050000 30.600000 76 148 +72645 -88.112000 44.499000 8494 8853 +3953 -10.243000 51.938000 942 726 +10548 10.377000 50.562000 7981 7601 +12843 19.183000 47.433000 5107 3390 +47412 141.328995 43.060001 217 172 +72451 -99.969000 37.761000 3525 4331 +3005 -1.185000 60.139000 4501 4504 +72776 -111.385000 47.461000 13419 14601 +70200 -165.436000 64.512000 2852 2940 +72712 -68.014000 46.868000 12284 11995 +72317 -79.943000 36.098000 13056 13997 +72327 -86.563000 36.247000 11201 10797 +72562 -100.700000 41.133000 13232 14452 +72476 -108.525000 39.120000 7421 10685 +71913 -94.067001 58.733002 809 1064 +1010 16.131001 69.315002 303 294 +3808 -5.327000 50.218000 2609 2564 +71945 -122.599998 58.833000 8528 9591 +72558 -96.367000 41.320000 7562 8599 +71907 -78.083000 58.466999 1032 1176 +72456 -95.631000 39.072000 12959 13959 +91212 144.794000 13.477000 503 566 +72363 -101.709000 35.233000 12891 14476 +70026 -156.783000 71.289000 743 760 +2836 26.629000 67.366997 6852 5555 +72403 -77.486000 38.977000 2249 1840 +10410 6.969000 51.406000 4936 4183 +71081 -81.217003 68.766998 860 958 +71109 -127.375999 50.685001 470 545 +72340 -92.259000 34.835000 11376 10831 +71909 -68.550003 63.750000 849 862 +71964 -135.097000 60.733002 6634 8056 +71722 -76.005997 46.301998 4417 6221 +11952 20.316999 49.033001 8980 7914 +70231 -155.598000 62.958000 8042 8483 +70361 -139.667000 59.517000 2472 2562 +72402 -75.483000 37.933000 307 275 +72694 -123.008000 44.909000 830 866 +70308 -170.217000 57.150000 4325 4263 +10035 9.550000 54.533001 2159 1822 +72797 -124.560000 47.934000 1212 1304 +70261 -147.877000 64.816000 9014 10036 +71924 -94.968002 74.704002 2191 2372 +72747 -93.397000 48.565000 9363 9613 +70326 -156.651000 58.681000 924 1008 +72518 -73.832000 42.693000 2800 2593 +71119 -114.108002 53.548000 7233 9964 +10393 14.117000 52.216999 714 455 +72764 -100.759000 46.772000 13328 14564 +71867 -101.099998 53.966999 5672 8045 +71836 -80.650002 51.266998 443 604 +72235 -90.080000 32.319000 1395 1033 +47582 140.098999 39.716999 155 104 +70398 -131.578000 55.039000 501 522 +70350 -152.493000 57.746000 3933 4110 +70219 -161.844000 60.778000 890 983 +72768 -106.626000 48.207000 6410 8121 +72786 -117.627000 47.681000 12270 14504 +72520 -80.217000 40.532000 13633 14178 +10184 13.408000 54.098000 1056 669 +71082 -62.333000 82.500000 3424 3371 +71917 -85.932999 79.983002 1743 1782 +71043 -126.750000 65.282997 3014 3848 +71816 -60.367001 53.299999 1128 1325 +71906 -68.417000 58.117001 1889 2380 +71925 -105.067001 69.133003 860 997 +71957 -133.516998 68.317001 4125 5275 +70273 -149.984000 61.156000 5146 5423 +74389 -70.254000 43.893000 6057 6107 +47600 136.895004 37.390999 81 160 +47646 140.126007 36.057999 256 263 +72261 -100.918000 29.374000 13500 14298 +71811 -66.250000 50.216999 1762 1705 +22522 34.650002 64.949997 1581 1041 +94998 158.937000 -54.499000 2414 1952 +22217 32.354000 67.168999 1548 1484 +41024 39.182999 21.700001 183 96 +29634 82.949997 54.966999 3437 1606 +14240 16.034000 45.821999 2032 854 +16716 23.742001 37.889999 157 208 +54161 125.217000 43.900000 2949 3077 +7110 -4.412000 48.444000 1042 785 +54374 126.883003 41.799999 7863 8210 +11520 14.447000 50.008000 8132 5874 +53513 107.367000 40.733000 11461 11664 +72768 -106.626000 48.207000 6410 8121 +71915 -83.366997 64.199997 1132 1353 +56964 100.983000 22.767000 14082 14124 +51463 87.617000 43.783000 5211 2570 +7645 4.406000 43.857000 443 288 +54342 123.517000 41.733000 666 540 +8221 -3.580000 40.465000 9042 8487 +17220 27.167000 38.432999 398 391 +33345 30.566999 50.400002 3390 2566 +61223 -3.000000 16.717000 20 1766 +64650 18.517000 4.400000 2398 2928 +83288 -43.417000 -13.267000 1 2507 +82678 -43.016998 -6.767000 22 1186 +17064 29.150000 40.900002 207 214 +32477 155.917007 54.299999 546 387 +87715 -68.133003 -38.950001 124 3537 +40766 47.150002 34.349998 3495 4452 +89022 -26.218000 -75.445000 77 4263 +40265 36.250000 32.367000 200 2 +34122 39.250000 51.650002 2151 1151 +62414 32.820000 23.964001 3952 1178 +15120 23.570999 46.778000 1581 3 +61998 70.243000 -49.352000 27 1158 +87623 -64.266998 -36.567001 386 5145 +87344 -64.217000 -31.317000 661 5859 +28661 65.401001 55.457001 757 563 +87155 -59.049999 -27.450001 107 426 +61291 -7.950000 12.533000 5035 6602 +87860 -67.500000 -45.783001 215 1441 +83840 -49.167000 -25.517000 4298 7013 +7481 5.078000 45.726000 4363 3198 +82332 -59.983000 -3.150000 1160 383 +61052 2.167000 13.483000 1170 968 +85799 -73.098000 -41.435001 77 910 +87576 -58.533001 -34.817001 80 134 +17240 30.549999 37.750000 8265 6433 +17281 40.200001 37.900002 3638 2491 +68906 -9.880000 -40.350000 631 658 +17030 36.299999 41.283001 151 197 +12120 17.535000 54.754002 322 260 +62423 27.989000 27.058001 83 182 +40990 65.849998 31.500000 1994 1091 +91948 -134.965000 -23.130000 137 9 +85469 -109.427002 -27.160999 1710 51 +94821 140.774000 -37.747000 452 33 +82765 -47.466999 -7.333000 10 200 +82983 -40.483002 -9.383000 2 3948 +10304 7.333000 52.733000 401 0 +85934 -70.845001 -53.002998 200 3824 +24343 123.400002 66.766998 2105 1497 +61901 -5.667000 -15.942000 7 7797 +83612 -54.667000 -20.467000 4779 6336 +60680 5.433000 22.800000 7932 7453 +10238 9.933000 52.817001 811 686 +10771 11.902000 49.428000 5664 6282 +10618 7.333000 49.700000 6881 6471 +61995 57.495000 -20.298000 1748 767 +40706 46.283001 38.083000 5085 1137 +40745 59.632999 36.266998 4648 1886 +3693.827000 51.555000 0 105 0 +71853 -99.633000 49.783000 78 1711 +71701 -66.433000 45.833000 9 265 +3502 -4.571000 52.139000 550 1181 +40848 52.599998 29.533001 2036 19 +72381 -117.900000 34.917000 118 0 +89611 110.523000 -66.283000 7983 5610 +3743 -1.806000 51.202000 15 1766 +94711 145.829000 -31.484000 1539 93 +94750 150.535000 -34.947000 264 0 +94461 128.301000 -25.034000 8684 607 +83378 -47.933000 -15.867000 5423 8243 +94996 167.941000 -29.039000 987 0 +41780 67.133000 24.900000 209 17 +48407 104.866997 15.250000 1727 125 +89642 140.001000 -66.663000 4807 220 +94332 139.488000 -20.678000 6253 1469 +89532 39.581001 -69.004997 3165 2918 +94638 121.893000 -33.830000 167 21 +94637 121.453000 -30.785000 7040 1015 +94711 145.829000 -31.484000 1539 93 +62378 31.349001 29.863001 2882 1498 +80447 -72.450000 7.850000 716 0 +94659 136.805000 -31.156000 5035 761 +48855 108.349998 16.066999 101 22 +38341 71.300003 42.849998 8358 5729 +95527 149.847000 -29.490000 1715 107 +40754 51.317000 35.683000 5782 5846 +94312 118.632000 -20.372000 392 38 +94802 117.802000 -34.941000 721 54 +94510 146.256000 -26.414000 7669 1584 +94910 147.457000 -35.158000 5240 448 +94326 133.889000 -23.795000 6897 99 +60571 -2.250000 31.500000 4453 3245 +47158 126.800003 35.117001 115 100 +72387 -116.028000 36.621000 7761 10338 +61202.983000 20.200001 0 142 0 +67774 31.017000 -17.833000 4758 5 +42724 91.250000 23.883000 450 336 +42314 95.017000 27.483000 2034 806 +42361 78.250000 26.233000 2941 1302 +41316 54.102001 17.044001 173 107 +56985 103.383003 23.382999 13430 13465 +43014 75.400000 19.850000 2950 1876 +41316 54.102001 17.044001 173 107 +40373 46.132999 28.316999 5646 4973 +40430 39.700000 24.550000 8584 8225 +4339 -21.951000 70.484000 3194 3399 +43041 82.033000 19.083000 2313 598 +17607 33.396000 35.141000 1 4193 +41923 90.383000 23.767000 106 13 +14430 15.340000 44.097000 900 219 +70261 -147.877000 64.816000 9014 10036 +89055 -56.716999 -64.233002 36 2405 +40582 47.971001 29.243000 1527 1052 +43185 81.150000 16.200000 198 248 +42667 77.350000 23.283000 2380 1010 +40437 46.717000 24.933000 1942 1783 +89002 -8.250000 -70.667000 120 3561 +41169 51.608000 25.273000 200 0 +8430 -1.171000 38.002000 335 259 +94150 136.820000 -12.274000 495 43 +71823 -73.679001 53.757000 190 218 +40394 41.683000 27.433000 5875 4421 +80398 -69.949997 -4.167000 389 0 +16245 12.451000 41.670000 422 283 +68240 25.917000 -24.550000 474 257 +89662 164.092000 -74.696000 642 629 +82965 -56.099998 -9.867000 1844 4179 +93417 174.983994 -40.903999 121 123 +47185 126.163002 33.293999 814 878 +94302 114.097000 -22.241000 154 66 +42397 88.367000 26.667000 1238 763 +71603 -66.100000 43.867000 318 313 +40811 48.667000 31.333000 285 59 +40875 56.367000 27.217000 310 313 +40809 59.200001 32.867001 134 1652 +72357 -97.438000 35.181000 3706 4414 +88889 -58.448000 -51.820000 3618 2500 +72214 -84.299000 30.446000 118 94 +68032 23.417000 -19.983000 110 29 +82824 -63.917000 -8.767000 846 1549 +41640 74.333000 31.550000 686 14 +71845 -90.200000 51.450000 6162 6350 +74001 -86.633003 34.599998 737 0 +74004 -114.400000 32.836000 143 1508 +1004 11.922000 78.923000 36 762 +83827 -54.583000 -25.517000 3491 4674 +3882.317000 50.898998 589 557 0 +42701 85.317000 23.317000 1010 268 +42874 81.650000 21.233000 1609 371 +71203 -119.400000 49.941000 4830 4874 +72249 -97.299000 32.835000 3363 3729 +72230 -86.782000 33.179000 3180 3151 +83566 -43.966999 -19.617001 1789 2642 +61024 7.983000 16.966999 242 1689 +72215 -84.568000 33.527000 7815 8049 +72501 -72.865000 40.866000 392 380 +72632 -83.471000 42.697000 8255 8588 +72489 -119.797000 39.568000 1800 3506 +10113 7.153000 53.714001 570 570 +72659 -98.413000 45.456000 8046 8594 +94430 118.537000 -26.614000 4993 1278 +29572 92.606003 56.181000 2888 1661 +74560 -89.338000 40.152000 6311 6476 +74455 -90.582000 41.611000 7338 7725 +72248 -93.841000 32.451000 1011 708 +72440 -93.401000 37.235000 8038 8428 +72649 -93.565000 44.850000 7453 8115 +94647 128.867004 -31.667000 591 248 +72582 -115.742000 40.860000 214 386 +72426 -83.822000 39.421000 7760 8079 +72318 -80.414000 37.204000 6973 7204 +4089 -14.400000 65.283000 30 388 +72634 -84.719000 44.908000 7315 7955 +83362 -56.099998 -15.650000 2149 2843 +59280 113.083000 23.700001 102 224 +60656 -8.167000 27.700001 7 3035 +11120 11.344000 47.259998 1866 89 +3354 -1.251000 53.006000 2644 1558 +40417 49.817000 26.450000 243 101 +94866 144.832000 -37.666000 449 401 +74646 -97.500000 36.600000 3101 3658 +71908 -122.790001 53.900002 4353 4433 +16113 7.613000 44.539000 3611 3596 +74626 -111.949997 33.450001 949 1565 +98433 121.369003 14.581000 3152 2145 +10468 12.067000 51.549999 154 93 +83779 -46.632999 -23.500000 3276 3522 +64870 13.567000 7.350000 1153 0 +3238 -1.882000 55.020000 1367 911 +83554 -57.667000 -19.000000 1346 1779 +72387 -116.028000 36.621000 7761 10338 +3918 -6.333000 54.500000 402 239 +62403 32.747002 26.200001 186 379 +62423 27.989000 27.058001 83 182 +47169 125.450996 34.687000 346 325 +11747 17.135000 49.452000 2023 991 +10954 10.872000 47.835999 1431 0 +83928 -57.033001 -29.783001 286 210 +82022 -60.700001 2.833000 543 1976 +82705 -72.766998 -7.583000 1282 2402 +82917 -67.800003 -10.000000 890 2405 +6458 4.763000 50.746000 113 8 +40821 54.283000 31.900000 852 1 +40856 60.883000 29.467000 3085 31 +65125 7.000000 9.250000 170 447 +82411 -69.932999 -4.250000 699 681 +82532 -61.283001 -5.817000 141 231 +83525 -48.216999 -18.882999 957 2789 +82026 -55.946999 2.224000 1455 3638 +8190 2.118000 41.384000 1238 1266 +82107 -66.967000 -0.117000 747 1128 +71802 -52.783000 47.517000 1657 1617 +40911 67.199997 36.700001 168 158 +47741 133.066000 35.457000 420 187 +47418 144.438000 42.953000 781 624 +68592 31.129999 -29.601999 1281 1220 +72388 -115.183000 36.050000 2745 3274 +76903 -92.250000 14.917000 346 511 +10113 7.153000 53.714001 570 570 +16546 8.967000 39.346000 675 182 +48839 107.717003 20.132999 280 4 +98747 124.612000 8.409000 244 158 +1010 16.131000 69.315000 303 294 +17196 35.483002 38.682999 1066 521 +16045 13.059000 45.981000 386 94 +98233 121.753000 17.638000 162 158 diff --git a/RADSOUND/STATRS/list_of_valid_radsound_stations.txt b/RADSOUND/STATRS/list_of_valid_radsound_stations.txt new file mode 100644 index 0000000000000000000000000000000000000000..6ce18b2b45ce81ebdf405c3b12117fce2c21fda5 --- /dev/null +++ b/RADSOUND/STATRS/list_of_valid_radsound_stations.txt @@ -0,0 +1,218 @@ +30673 119.733002 53.750000 3052 2445 +42867 79.050000 21.100000 3270 1401 +42971 85.833000 20.250000 1295 1005 +42410 91.583000 26.100000 1741 1454 +33658 25.900000 48.367001 3499 1611 +89664 166.667007 -77.849998 5862 2902 +42339 73.017000 26.300000 1503 1303 +35671 67.717003 47.799999 7131 4806 +33041 30.962999 52.402000 2966 1238 +24908 102.293999 60.335999 4550 4024 +24641 121.616997 63.783001 2137 2326 +76394 -100.233002 25.867001 4893 5335 +37055 43.099998 44.216999 6373 5064 +67083 47.483000 -18.800000 6740 5010 +76225 -106.032997 28.667000 4691 7220 +30635 109.016998 53.417000 5401 4566 +28225 56.196999 57.958000 2471 1365 +24125 112.432999 68.500000 4908 4579 +24507 100.217003 64.268997 3970 2315 +13275 20.417000 44.766998 3089 1382 +36872 77.003998 43.362999 4047 3766 +43150 83.300000 17.683000 1660 1408 +15614 23.383000 42.650000 3488 8503 +29698 99.032997 54.882999 8085 6037 +35229 57.150002 50.283001 2855 1280 +34467 44.333000 48.783001 1695 1075 +34172 46.039001 51.556999 4366 3119 +27038 39.925999 59.318001 2104 1318 +30372 118.266998 56.900002 5143 4600 +30554 113.583000 54.466999 4151 4197 +32540 158.582993 53.083000 1781 2234 +44292 106.866997 47.917000 3997 2040 +23921 60.450000 60.683000 1841 1155 +89571 77.967000 -68.574000 6633 4216 +30230 108.067001 57.766998 3024 2500 +31736 135.233002 48.533001 2014 2439 +30935 108.750000 50.367001 5170 4596 +10739 9.200000 48.833000 10208 8242 +2527 12.291000 57.657001 6143 4159 +12425 16.881001 51.112999 3423 1051 +71801 -52.750000 47.667000 3390 3694 +2591 18.350000 57.650002 1224 1222 +47909 129.552994 28.393999 4715 4756 +68263 28.211000 -25.910000 6324 6020 +28445 61.067001 56.733002 7339 5301 +40375 36.599998 28.382999 9225 6493 +42182 77.200000 28.583000 3980 2965 +50527 119.700000 49.250000 8566 8375 +4220 -52.852000 68.708000 2573 2810 +51431 81.333000 43.950000 8897 9159 +16044 13.188000 46.037000 2344 1325 +53845 109.450000 36.567000 3396 2781 +72265 -102.189000 31.942000 12450 14260 +1241 9.612000 63.706001 1430 1187 +4018 -22.595000 63.981000 3514 3401 +47678 139.779007 33.122002 2636 2514 +6011 -6.765000 62.023000 2023 2287 +4360 -37.637000 65.611000 4066 4225 +72274 -110.956000 32.228000 1332 1306 +1415 5.665000 58.874001 1356 1056 +72572 -111.955000 40.772000 13107 14572 +72528 -78.719000 42.941000 12948 13135 +71934 -111.929001 60.026001 4651 7487 +6610 6.943000 46.812000 6526 6543 +11035 16.356000 48.249000 5536 4590 +71926 -96.000000 64.317001 1508 2012 +70133 -162.613000 66.886000 1926 2033 +72597 -122.882000 42.377000 9389 12215 +29634 82.949997 54.966999 3437 1606 +72364 -106.698000 31.873000 11480 13113 +10868 11.552000 48.243999 11163 10580 +72681 -116.211000 43.567000 11810 14498 +72293 -117.117000 32.833000 6144 6971 +1028 19.000999 74.503998 1693 1648 +11520 14.447000 50.008000 8132 5874 +72662 -103.211000 44.073000 13070 14480 +72645 -88.112000 44.499000 8494 8853 +10548 10.377000 50.562000 7981 7601 +12843 19.183000 47.433000 5107 3390 +72451 -99.969000 37.761000 3525 4331 +3005 -1.185000 60.139000 4501 4504 +72776 -111.385000 47.461000 13419 14601 +70200 -165.436000 64.512000 2852 2940 +72712 -68.014000 46.868000 12284 11995 +72317 -79.943000 36.098000 13056 13997 +72327 -86.563000 36.247000 11201 10797 +72562 -100.700000 41.133000 13232 14452 +72476 -108.525000 39.120000 7421 10685 +3808 -5.327000 50.218000 2609 2564 +71945 -122.599998 58.833000 8528 9591 +72558 -96.367000 41.320000 7562 8599 +71907 -78.083000 58.466999 1032 1176 +72456 -95.631000 39.072000 12959 13959 +72363 -101.709000 35.233000 12891 14476 +2836 26.629000 67.366997 6852 5555 +72403 -77.486000 38.977000 2249 1840 +10410 6.969000 51.406000 4936 4183 +72340 -92.259000 34.835000 11376 10831 +71964 -135.097000 60.733002 6634 8056 +71722 -76.005997 46.301998 4417 6221 +11952 20.316999 49.033001 8980 7914 +70231 -155.598000 62.958000 8042 8483 +70361 -139.667000 59.517000 2472 2562 +70308 -170.217000 57.150000 4325 4263 +10035 9.550000 54.533001 2159 1822 +72797 -124.560000 47.934000 1212 1304 +70261 -147.877000 64.816000 9014 10036 +71924 -94.968002 74.704002 2191 2372 +72747 -93.397000 48.565000 9363 9613 +72518 -73.832000 42.693000 2800 2593 +71119 -114.108002 53.548000 7233 9964 +72764 -100.759000 46.772000 13328 14564 +71867 -101.099998 53.966999 5672 8045 +72235 -90.080000 32.319000 1395 1033 +70350 -152.493000 57.746000 3933 4110 +72768 -106.626000 48.207000 6410 8121 +72786 -117.627000 47.681000 12270 14504 +72520 -80.217000 40.532000 13633 14178 +71082 -62.333000 82.500000 3424 3371 +71917 -85.932999 79.983002 1743 1782 +71043 -126.750000 65.282997 3014 3848 +71816 -60.367001 53.299999 1128 1325 +71906 -68.417000 58.117001 1889 2380 +71957 -133.516998 68.317001 4125 5275 +70273 -149.984000 61.156000 5146 5423 +74389 -70.254000 43.893000 6057 6107 +72261 -100.918000 29.374000 13500 14298 +71811 -66.250000 50.216999 1762 1705 +22522 34.650002 64.949997 1581 1041 +94998 158.937000 -54.499000 2414 1952 +22217 32.354000 67.168999 1548 1484 +29634 82.949997 54.966999 3437 1606 +54161 125.217000 43.900000 2949 3077 +54374 126.883003 41.799999 7863 8210 +11520 14.447000 50.008000 8132 5874 +53513 107.367000 40.733000 11461 11664 +72768 -106.626000 48.207000 6410 8121 +71915 -83.366997 64.199997 1132 1353 +56964 100.983000 22.767000 14082 14124 +51463 87.617000 43.783000 5211 2570 +8221 -3.580000 40.465000 9042 8487 +33345 30.566999 50.400002 3390 2566 +64650 18.517000 4.400000 2398 2928 +40766 47.150002 34.349998 3495 4452 +34122 39.250000 51.650002 2151 1151 +62414 32.820000 23.964001 3952 1178 +61291 -7.950000 12.533000 5035 6602 +83840 -49.167000 -25.517000 4298 7013 +7481 5.078000 45.726000 4363 3198 +17240 30.549999 37.750000 8265 6433 +17281 40.200001 37.900002 3638 2491 +40990 65.849998 31.500000 1994 1091 +24343 123.400002 66.766998 2105 1497 +83612 -54.667000 -20.467000 4779 6336 +60680 5.433000 22.800000 7932 7453 +10771 11.902000 49.428000 5664 6282 +10618 7.333000 49.700000 6881 6471 +40706 46.283001 38.083000 5085 1137 +40745 59.632999 36.266998 4648 1886 +89611 110.523000 -66.283000 7983 5610 +83378 -47.933000 -15.867000 5423 8243 +94332 139.488000 -20.678000 6253 1469 +89532 39.581001 -69.004997 3165 2918 +94637 121.453000 -30.785000 7040 1015 +62378 31.349001 29.863001 2882 1498 +38341 71.300003 42.849998 8358 5729 +40754 51.317000 35.683000 5782 5846 +94510 146.256000 -26.414000 7669 1584 +60571 -2.250000 31.500000 4453 3245 +72387 -116.028000 36.621000 7761 10338 +42361 78.250000 26.233000 2941 1302 +56985 103.383003 23.382999 13430 13465 +43014 75.400000 19.850000 2950 1876 +40373 46.132999 28.316999 5646 4973 +40430 39.700000 24.550000 8584 8225 +4339 -21.951000 70.484000 3194 3399 +70261 -147.877000 64.816000 9014 10036 +40582 47.971001 29.243000 1527 1052 +42667 77.350000 23.283000 2380 1010 +40437 46.717000 24.933000 1942 1783 +40394 41.683000 27.433000 5875 4421 +82965 -56.099998 -9.867000 1844 4179 +72357 -97.438000 35.181000 3706 4414 +88889 -58.448000 -51.820000 3618 2500 +71845 -90.200000 51.450000 6162 6350 +83827 -54.583000 -25.517000 3491 4674 +71203 -119.400000 49.941000 4830 4874 +72249 -97.299000 32.835000 3363 3729 +72230 -86.782000 33.179000 3180 3151 +83566 -43.966999 -19.617001 1789 2642 +72215 -84.568000 33.527000 7815 8049 +72632 -83.471000 42.697000 8255 8588 +72489 -119.797000 39.568000 1800 3506 +72659 -98.413000 45.456000 8046 8594 +94430 118.537000 -26.614000 4993 1278 +29572 92.606003 56.181000 2888 1661 +74560 -89.338000 40.152000 6311 6476 +74455 -90.582000 41.611000 7338 7725 +72440 -93.401000 37.235000 8038 8428 +72649 -93.565000 44.850000 7453 8115 +72426 -83.822000 39.421000 7760 8079 +72318 -80.414000 37.204000 6973 7204 +72634 -84.719000 44.908000 7315 7955 +83362 -56.099998 -15.650000 2149 2843 +3354 -1.251000 53.006000 2644 1558 +74646 -97.500000 36.600000 3101 3658 +71908 -122.790001 53.900002 4353 4433 +16113 7.613000 44.539000 3611 3596 +98433 121.369003 14.581000 3152 2145 +83779 -46.632999 -23.500000 3276 3522 +83554 -57.667000 -19.000000 1346 1779 +72387 -116.028000 36.621000 7761 10338 +82705 -72.766998 -7.583000 1282 2402 +82026 -55.946999 2.224000 1455 3638 +8190 2.118000 41.384000 1238 1266 +71802 -52.783000 47.517000 1657 1617 +68592 31.129999 -29.601999 1281 1220 +72388 -115.183000 36.050000 2745 3274 diff --git a/RADSOUND/STATRS/produce_rank_histograms_all_stations.sh b/RADSOUND/STATRS/produce_rank_histograms_all_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..011724462c721c54916c42fb588e91ed33c9621f --- /dev/null +++ b/RADSOUND/STATRS/produce_rank_histograms_all_stations.sh @@ -0,0 +1,269 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +################################################################## +# +# Calculation of quantile space rank histograms and their +# summary statistics (MSD + p-value) for all stations in the +# RHARM data sample +# +# The following need to be given as arguments: +# +# - variable +# - first year of raw data +# - last year of raw data +# - first month of raw data (default 1) +# - last month of raw data (default 12) +# +# As the input for this script, the following files are needed: +# +# 1) List of stations +# 2) Raw simulation data at station coordinates +# (but here, the station observations are used as a surrogate of this file) +# 3) File including quantiles as a function of time of year +# 4) File including the rank histogram bootstrap MSD values +# +# Important notes: +# +# 0) It is assumed that all the files are in ODB format. However, +# their relevant parts are converted to text format for processing. +# 1) The list of station coordinates can be retrieved from the raw +# ODB file, but this part is commented here. +# 2) The rank histogram bootstrap MSD values must be available +# for the same length of simulation as given as arguments to this script. +# +# Execution (e.g): ./produce_rank_histograms_all_stations 2010 2010 1 12 +# +# Jouni Räisänen, July 2023 +# +################################################################## +# +# Arguments: +# +# 0. Variable +# +variable=T2-T850 +echo " Variable: $variable" +# +# 1.-2: First and last year +# +year1=$1 +year2=$2 +let nyears=year2-year1+1 +# +# 3.-4: First and last month +# +month1="${3:-1}" +month2="${4:-12}" +# +# Find the length of the simulation period. +# It this is 12 months or more, it is rounded to the nearest integer +# number of years. Otherwise, the number of months is recorded. +# +let nmonths=12*nyears+month2-month1-11 +echo 'Number of months' $nmonths +# +n=$( echo "$nmonths / 12" | bc -l ) +nyears_round=$( echo "($n + 0.5) / 1" | bc ) +echo 'Number of years rounded' ${nyears_round} +# +if [ $nmonths -ge 12 ] +then + period=${nyears_round}yr +else + period=${nmonths}mon +fi +echo $period +# +# Add zero to ahead of $month1 and $month2 for naming of files if needed? +# +if [ $month1 -le 9 ] +then + month1='0'${month1} +fi +if [ $month2 -le 9 ] +then + month2='0'${month2} +fi +# +################################################################## +###module load grads +# +# Add odb_api to $PATH +# +# On Puhti +#PATH="${PATH}":/projappl/project_2001011/odb_api/bin +# +# On Lumi +#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# +################################################################## +# +# ----- File names. Hard-coded, at least this far ----- +# +# NB: it would be more efficient to have ODB files for individual +# stations instead for the ones that include everything for all stations! +# +# 1) Raw data from observations +echo " Directory with raw data from observations (pre-computed) ..." +#obs_dir=RHARM_vertical_differences_odb +obs_dir=/scratch/project_465000454/ama/STATDATARADSOUND/RHARM_vertical_differences_odb +echo " $obs_dir" + +# 2) Raw data from model simulation (this far, mimicked by observations) +echo " Directory with data from model simulations (this far, mimicked by observatios) ..." +#sim_dir=RHARM_vertical_differences_odb +sim_dir=/scratch/project_465000454/ama/STATDATARADSOUND/RHARM_vertical_differences_odb +echo " $sim_dir" + +# 3) Pre-computed quantiles +echo " Directory with pre-computed quantiles ..." +#quant_dir="quantiles" +quant_dir=/scratch/project_465000454/ama/STATDATARADSOUND/quantiles +echo " $quant_dir" + +# 4) Pre-computed bootstrap MSD statistics +echo " Directory with pre-computed bootstrap MSD statistics ..." +#bootstrap_dir=bootstrap_statistics/${variable}_${period} +bootstrap_dir=/scratch/project_465000454/ama/STATDATARADSOUND/bootstrap_statistics/${variable}_${period} +echo " $bootstrap_dir" + +# 5) Directory for results +echo " Directory for output results ..." +outdir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} + +#[ ! -d ${outdir} ] && mkdir ${outdir} +echo " $outdir" +if test -d "$outdir"; then + echo $outdir exists +else + echo $outdir does not exist + echo " Creating directory: $outdir" + mkdir -p $outdir +fi + +################################################################## +# It is assumed that the list of stations has been pre-produced +# +#echo " (Note) List includes all stations with obs/data points ..." +#station_list=list_of_all_radsound_stations.txt +echo " (Note) List includes stations with at least 1000 obs/data points ..." +station_list=list_of_valid_radsound_stations.txt +echo " stationID, longitude, latitude, obs00UTC, obs12UTC" +echo " List of radiosounding stations (with geographical coordinates): $station_list" + +################################################################ +echo " Compiling fortran-program to calculate rank histograms ..." +echo " fortran-programs/rank_histograms_one_station.f95" + + gfortran fortran-programs/rank_histograms_one_station.f95 -o rank_histograms_one_station + +################################################################# +# +n_lines=$(cat ${station_list} | wc | awk NF=1) +# +# Skip the first line which contains no station_ID +# +line_number=2 +# +# Loop over all stations +# +while [ ${line_number} -le `expr $n_lines` ] +do +# head -`expr ${line_number}` ${station_list} | tail -1 > input.txt +# read station longitude latitude < input.txt + head -`expr ${line_number}` ${station_list} | tail -1 > input.txt + read station longitude latitude n00 n12 < input.txt +echo " **********" +echo " Radiosounding stationID, longitude, latitude: ${station} ${longitude} ${latitude}" + +# +# File names +# +obs_file=${obs_dir}/station_${station}_var_2_diff_H_2_p_85000.odb +sim_file=${sim_dir}/station_${station}_var_2_diff_H_2_p_85000.odb +quant_file=${quant_dir}/quantiles_${station}_T2-T850.odb +msd_bootstrap_file=${bootstrap_dir}/MSD_bootstrap_T2-T850_${station}_${period}.odb +# +################################################################ +# Select the simulation data for the station (mimicked this far by observations!). +echo " Selecting the simulation data (mimicked this far by observations) for radiosounding station: ${station}" + +odb_command="odb sql 'select year,month,day,hour,value where station=${station} and (year>=${year1} and year<=${year2}) and (hour=21 or hour=22 or hour=23 or hour=0 or hour=1 or hour=2 or hour=9 or hour=10 or hour=11 or hour=12 or hour=13 or hour=14)' -i ${sim_file} -o sim_data" +eval ${odb_command} +# +############################################################### +# Select the quantiles for the nearest station. +echo " Selecting the quantiles for radiosounding station: ${station}" +# +rm quantile_selection +odb sql select \* -i ${quant_file} -o quantile_selection +# +################################################################ +# +# Get the distribution of pre-computed Mean Square Deviations from +# the msd_bootstrap_file. Note that these values depend on the length +# of the time period (included in the name of msd_bootstrap_file) +# and (to a smaller extent) the selected station. +# +odb sql select \* -i ${msd_bootstrap_file} -o msd_bootstrap_selection +# +############################################################### +# Produce the rank histogram for one station. +# Include data for 00 and 12 aUTC. +echo " Producing the rank histogram for 00 and 12 UTCs for one radiosounding station: ${station}" +# +############################################################### +# +echo " Checking existence of file: rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} ..." +outfile=${outdir}/rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} + +echo " $outfile" +if test -f "$outfile"; then + echo $outfile exists +else + echo $outfile does not exist + echo " Creating file: $outfile" + touch $outfile +fi + +# +./rank_histograms_one_station < ${outdir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations.odb + +###################################################################### +# Delete the fortran executable +# +rm rank_histograms_one_station diff --git a/RADSOUND/STATRS/produce_standard_plots_all_stations.sh b/RADSOUND/STATRS/produce_standard_plots_all_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..f78cf796f8f5a43cc7cb83d79c84ed4f840034d5 --- /dev/null +++ b/RADSOUND/STATRS/produce_standard_plots_all_stations.sh @@ -0,0 +1,295 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +echo "Python version" +which python +########################################################################## +# +# Produce the following plots: +# +# (a) individual data values (from a model simulation or a real station) +# against the background of quantiles from the observed distribution +# for the same station +# (b) rank histogram for the same station +# +# for all stations in the RHARM data sample, for T(2 m) - T(850 hPa) +# +# The following need to be given as arguments: +# +# - first year of raw data +# - last year of raw data +# - first month of raw data (default 1) +# - last month of raw data (default 12) +# +# As the input for this script, the following files are needed: +# +# 1) List of stations +# 2) Raw simulation data at station coordinates +# (but here, the station observations are used as a surrogate of this file) +# 3) File including quantiles as a function of time of year +# 4) File including the rank histogram bootstrap MSD values +# +# Important notes: +# +# 1) It is assumed that all the files are in ODB format. However, +# their relevant parts are converted to text format for processing. +# 2) The rank histogram bootstrap MSD values must be available +# for the same length of simulation as given as arguments to this script. +# +# Execution (e.g): ./produce_standard_plot_all_stations 2010 2010 +# +# ORIGINAL: +# Jouni Räisänen, July 2023 +# MODIFIED: +# Alexander Mahura, Sep-Oct-Nov 2023 +# +################################################################## +# +# Arguments: +# +# The Variable is always the same +# +variable=T2-T850 +echo " Variable: $variable" +# +# 1.-2: First and last year +# +year1=$1 +year2=$2 +let nyears=year2-year1+1 +# +# 3.-4: First and last month +# +month1="${3:-1}" +month2="${4:-12}" +# +# Find the length of the simulation period. +# It this is 12 months or more, it is rounded to the nearest integer +# number of years. Otherwise, the number of months is recorded. +# +let nmonths=12*nyears+month2-month1-11 +echo 'Number of months' $nmonths +# +n=$( echo "$nmonths / 12" | bc -l ) +nyears_round=$( echo "($n + 0.5) / 1" | bc ) +echo 'Number of years rounded' ${nyears_round} +# +if [ $nmonths -ge 12 ] +then + period=${nyears_round}yr +else + period=${nmonths}mon +fi +echo $period +# +# Add zero to ahead of $month1 and $month2 for naming of files if needed? +# +if [ $month1 -le 9 ] +then + month1='0'${month1} +fi +if [ $month2 -le 9 ] +then + month2='0'${month2} +fi +# +################################################################## +# +# Add odb_api to $PATH +# On Puhti +#PATH="${PATH}":/projappl/project_2001011/odb_api/bin +# On Lumi +#export PATH="/projappl/project_465000454/ama/software/odb_api/bin:$PATH" +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module load odb_api/0.18.1-cpeCray-23.03.lua +# module load python-climatedt/3.11.3-cpeCray-23.03.lua +# +# +################################################################## +# +# ----- Directory names. Hard-coded, at least this far ----- +# +# 1) Raw data from observations +echo " Directory with raw data from observations (pre-computed) ..." +#obs_dir=RHARM_vertical_differences_odb +obs_dir=/scratch/project_465000454/ama/STATDATARADSOUND/RHARM_vertical_differences_odb +echo " $obs_dir" + +# 2) Raw data from model simulation (this far, mimicked by observations) +echo " Directory with data from model simulations (this far, mimicked by observatios) ..." +#sim_dir=RHARM_vertical_differences_odb +sim_dir=/scratch/project_465000454/ama/STATDATARADSOUND/RHARM_vertical_differences_odb +echo " $sim_dir" + +# 3) Pre-computed quantiles +echo " Directory for pre-computed quantiles ..." +#quant_dir=quantiles +quant_dir=/scratch/project_465000454/ama/STATDATARADSOUND/quantiles +echo " $quant_dir" + +# 3) Pre-computed rank histogram data +#rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +echo " Directory for computed rank histogram data ..." +rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +echo " $rh_dir" + +# 4) Directory for figures +echo " Directory for figures ..." +figure_dir=figures/standard_plots_${variable}_${year1}${month1}-${year2}${month2} +echo " $figure_dir" +if test -d "$figure_dir"; then + echo $figure_dir exists +else + echo $figure_dir does not exist + echo " Creating directory: $figure_dir" + mkdir -p $figure_dir +fi + +################################################################## +# It is assumed that the list of stations has been pre-produced +# +#echo " (Note) List includes all stations with obs/data points ..." +#station_list=list_of_all_radsound_stations.txt +echo " (Note) List includes stations with at least 1000 obs/data points ..." +station_list=list_of_valid_radsound_stations.txt +echo " stationID, longitude, latitude, obs00UTC, obs12UTC" +echo " List of radiosounding stations (with geographical coordinates): $station_list" + +################################################################ +# +echo " Compiling the Fortran program needed for creating the figures ..." +echo " fortran-programs/plots_for_one_station.f95" + + gfortran fortran-programs/plots_for_one_station.f95 -o plots_for_one_station + +################################################################# +# +# Count the number of lines in the station list +# +n_lines=$(cat ${station_list} | wc | awk NF=1) +# +# Skip the first line which contains no station_ID +# +line_number=2 +# +# Loop over all stations +# +while [ ${line_number} -le `expr $n_lines` ] +do + head -`expr ${line_number}` ${station_list} | tail -1 > input.txt + read station longitude latitude n00 n12 < input.txt +echo " **********" +echo " Radiosounding stationID, longitude, latitude: ${station} ${longitude} ${latitude}" +# +sim_file=${sim_dir}/station_${station}_var_2_diff_H_2_p_85000.odb +quant_file=${quant_dir}/quantiles_${station}_T2-T850.odb +rh_file=${rh_dir}/rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2}.odb +# +################################################################ +# Select the simulation data for the station (mimicked this far by observations!) +echo " Selecting the simulation data (mimicked this far by observations) for radiosounding station: ${station}" +# +odb_command="odb sql 'select year,month,day,hour,value where (year>=${year1} and year<=${year2})' -i ${sim_file} -o sim_data" +eval ${odb_command} + +################################################################ +# Select the rank histogram data for the station +echo " Selecting rank histogram data for radiosounding station: ${station}" +# +odb sql select \* where station=${station} -i ${rh_file} -o rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} +# +############################################################### +# Select the quantiles for the station +# 00 and 12 UTC are picked separately, since +# select \* does not allow for parentheses (very strange) +echo " Selecting the quantiles for radiosounding station: ${station}" +# +rm quantile_selection +odb sql select \* -i ${quant_file} -o quantile_selection +#rm quantile_selection_* +#odb sql select \* where hour=0 and station=${station} -i ${quant_file} -o quantile_selection_00 +#odb sql select \* where hour=12 and station=${station} -i ${quant_file} -o quantile_selection_12 +#cat quantile_selection_* > quantile_selection +# +################################################################ +# +# Run the fortran progam that prepares the plotting +# +############################################################### +# +./plots_for_one_station < standard_plot.cfg +[data] +variable=${variable} +station_id=${station} +longitude=${longitude} +latitude=${latitude} +year_beg=${year1} +year_end=${year2} +month_beg=${month1} +month_end=${month2} + +[input] +sim_data=time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt +quantile_sel=quantiles_${variable}_${station}.txt +rank_hists=rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt + +[output] +fig_name=${figure_dir}/standard_plot_${variable}_${station}_${year1}${month1}-${year2}${month2}.png +EOF + +################################################################ +# +echo " Calling python to plot quantiles rank histogram for radiosounding station: ${station}" +python3 python/plot_quantiles_rankhist_00_12.py standard_plot.cfg + +################################################################ +# Remove unnecessary files +echo " Removing unnecessary temporary files ..." + +rm input.txt +rm vrange_* +rm msd_and_p-value +rm quantile_selection +rm rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} +rm sim_data +##rm standard_plot_one_station +rm time_series_commands +rm time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.grads +rm rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.grads +rm standard_plot.cfg +rm time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt +rm quantiles_${variable}_${station}.txt +rm rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt +# +((line_number++)) +done + +################################################################ +# The Fortran executable is not needed any more: +# +rm rank_histogram.ctl +rm time_series.ctl +rm quantile_selection_* +rm msd_and_p-val* +rm plots_for_one_station +rm coordinates diff --git a/RADSOUND/STATRS/python/plot_p_values_map.py b/RADSOUND/STATRS/python/plot_p_values_map.py new file mode 100644 index 0000000000000000000000000000000000000000..12442d9428dadd2cdfc1a0f540c5afbdd58a7b48 --- /dev/null +++ b/RADSOUND/STATRS/python/plot_p_values_map.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-04-06 + +Based on Jouni Räisänen's script for plotting p values on the map of Finland + +Plots p values on a map restricted to an area limited by lons (19, 32) and lats (59, 71) + +Example: +$ python3 plot_p_values_map.py p_values_as_text_file +$ python3 plot_p_values_map.py p-values + +p_values_as_text_file is a text file containing stationID, longitude, latitude, and p value with one header row +''' + +import numpy as np +import matplotlib.pyplot as plt +import sys +import cartopy.crs as ccrs +from matplotlib.lines import Line2D + +def read_data(p_values_file): + ''' + Description: Reads in file containing p values at different stations + Input: file containing p values with stationID (sid), longitude (lon), latitude (lat), and p value (p). Header = 1 row. + Output: structure numpy array with sid, lon, lat, p + ''' + + p_values_data = np.loadtxt(p_values_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'p'), 'formats':('i4', 'f4', 'f4', 'f4')}) + + return p_values_data + + +def plot_p_values(p_values): + ''' + Description: plot p values on a map restricted to an area limited by lons (19, 32) and lats (59, 71) + Input: numpy array + Output: saves a figure in the running directory + ''' + + lon_min=-180 + lon_max=180 + lat_min=-90 + lat_max=90 + + colors = ['darkviolet', 'blue', 'skyblue', 'lime', 'yellow', 'orange', 'red', 'grey'] + limits = [0,0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,1] + + fig = plt.figure(figsize=(12,4.5)) + ax = fig.add_subplot(1,1,1,projection=ccrs.PlateCarree()) + ax.coastlines() + ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax.gridlines(draw_labels=True) + + # basic plot, not changing colors of p: + #ax.scatter(p_values['lon'], p_values['lat'], c=p_values['p']) + + #loop over limits: + legend_elements = [] + for n in range(8): + p_ind = ((p_values['p'] > limits[n]) & (p_values['p'] <= limits[n+1])) + ax.scatter(p_values['lon'][p_ind], p_values['lat'][p_ind], c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1]))) + + # legend table with customized values + + #print(legend_elements) + + ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.1, 0.5)) + ax.set_title('P-values for T2-T850 quantiles in ' + timestring, fontsize=20) + + plt.savefig('p_values.png', dpi=300) +# plt.show() + + +def main(p_values_file): + + p_values_data = read_data(p_values_file) + + plot_p_values(p_values_data) + + +if __name__=='__main__': + p_values_file = sys.argv[1] + timestring = sys.argv[2] + + main(p_values_file) diff --git a/RADSOUND/STATRS/python/plot_quantiles_rankhist.py b/RADSOUND/STATRS/python/plot_quantiles_rankhist.py new file mode 100644 index 0000000000000000000000000000000000000000..5478aa0e4bb1a98fd8c82059a62a9d98b8b8b063 --- /dev/null +++ b/RADSOUND/STATRS/python/plot_quantiles_rankhist.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-05-10 + +Based on Jouni Räisänen's script for plotting quantiles/time series data (00, 06, 12, 18 UTC) and rank histogram (24UTC=combination of 00,06,12,and 18 UTC data) + +Example: +$ python3 plot_quantiles_rankhist.py example.cfg + +example.cfg is a text file containing: +[data] +variable=39 +station_id=102019 +longitude=23.576000 +latitude=68.602997 +year_beg=2010 +year_end=2012 +month_beg=01 +month_end=12 + +[input] +sim_data=time_series_102019_2010-2012.txt +quantile_sel=quantiles_102019.txt +rank_hists=rank_histogram_102019_2010-2012.txt + +[output] +fig_name=standard_plot_102019_2010-2012_python.png + +''' + +import numpy as np +import matplotlib.pyplot as plt +import sys +import configparser +import datetime +from matplotlib.dates import drange, MonthLocator, DateFormatter + +config = configparser.ConfigParser() + +def plot_data(quantiles_data, rank_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, savefig=False): + ''' + quantiles data give the lines and the time series data give the dots + ''' + # set up date array consisting of all days in beg year: + days = np.arange(1,366) +# day1 = datetime.date(year_beg, 1, 1) +# day2 = datetime.date(year_beg+1, 1, 1) + day1 = datetime.date(2001, 1, 1) + day2 = datetime.date(2002, 1, 1) + delta = datetime.timedelta(days=1) + dates = drange(day1, day2, delta) + + # arrays for quantile names, axes titles, hours to plot + q_names = ['q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'] + sub_titles = ['00 UTC', '06 UTC', '12 UTC', '18 UTC'] + hours = [0, 6, 12, 18] + + # calculate number of years: + nyears = year_end - year_beg + 1 + + # set up figure + fig = plt.figure(figsize=(10,10), layout='constrained') + spec = fig.add_gridspec(3,2) + fig.suptitle(fig_title, fontsize=20) + + # counter + c = 0 + + # plot quantiles/time series for times 00, 06, 12, 18: + for i in range(2): + for j in range(2): + # set up axis: title, xaxis + ax = fig.add_subplot(spec[i,j]) + ax.set_title(sub_titles[c]) + ax.set_xlim(dates[0], dates[-1]) + ax.xaxis.set_major_locator(MonthLocator()) + ax.xaxis.set_major_formatter(DateFormatter('%b')) + + # quantile data: + # find quantile data hour = 0, 6, 12, 18 + qh_ind = (quantiles_data['hour'][:] == hours[c]) + for q in q_names: + q_data = quantiles_data[q][qh_ind] + ax.plot(dates, q_data[:365], 'k-') # --> change q only contains one year of data!! + # plot time series data: + # find time series where hour = 0, 6, 12, 18 + th_ind = (time_series_data['hour'][:] == hours[c]) + t_data = time_series_data['value'][th_ind] # all years + for n in range(nyears): + ax.scatter(dates, t_data[n*365:(n+1)*365], marker='.') + c = c + 1 + + # plot rank histogram data: + ax2 = fig.add_subplot(spec[2,:]) + ax2.set_title('Rank histogram\n' + 'MSD:' + str(rank_data[4,4]) + ' p-value:' + str(rank_data[4,5])) + ax2.bar(np.arange(100), rank_data[4,6:]*100) + ax2.set_xlim(0,100) + ax2.axhline(y=1) + + if savefig: + plt.savefig(fig_name, dpi=300) + +# plt.show( ) + + +def read_quantiles_data(quantile_sel_file): + ''' + Reads quantile data from text file and returns a structured numpy array + Input: text file + Output:a structured numpy array: sid, lon, lat, day_of_year, hour, q01, q10, q25, q50, q75, q90, q99 + ''' + # Header line contains: + #station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real + + quantile_data = np.loadtxt(quantile_sel_file, skiprows=1, dtype={'names':('sid', 'lon', 'lat', 'day_of_year', 'hour', 'q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')}) + + return quantile_data + +def read_rank_data(rank_hists_file): + ''' + Reads rank histogram data binned into a 100 bins: 0-1%, 1-2%, ..., 99-100% + Input: text file + Output: numpy array (unstructured) first columns contain station id, longitude, latitude, hour, msd, p_value, and then follows the bins f00 = 0-1%, ..., f99=99-100% + ''' + + #Header line contains: + # station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer msd@body:real p_value@body:real f00@body:real f01@body:real f02@body:real f03@body:real f04@body:real f05@body:real f06@body:real f07@body:real f08@body:real f09@body:real f10@body:real f11@body:real f12@body:real f13@body:real f14@body:real f15@body:real f16@body:real f17@body:real f18@body:real f19@body:real f20@body:real f21@body:real f22@body:real f23@body:real f24@body:real f25@body:real f26@body:real f27@body:real f28@body:real f29@body:real f30@body:real f31@body:real f32@body:real f33@body:real f34@body:real f35@body:real f36@body:real f37@body:real f38@body:real f39@body:real f40@body:real f41@body:real f42@body:real f43@body:real f44@body:real f45@body:real f46@body:real f47@body:real f48@body:real f49@body:real f50@body:real f51@body:real f52@body:real f53@body:real f54@body:real f55@body:real f56@body:real f57@body:real f58@body:real f59@body:real f60@body:real f61@body:real f62@body:real f63@body:real f64@body:real f65@body:real f66@body:real f67@body:real f68@body:real f69@body:real f70@body:real f71@body:real f72@body:real f73@body:real f74@body:real f75@body:real f76@body:real f77@body:real f78@body:real f79@body:real f80@body:real f81@body:real f82@body:real f83@body:real f84@body:real f85@body:real f86@body:real f87@body:real f88@body:real f89@body:real f90@body:real f91@body:real f92@body:real f93@body:real f94@body:real f95@body:real f96@body:real f97@body:real f98@body:real f99@body:real + + rank_hists = np.loadtxt(rank_hists_file, skiprows=1) + + return rank_hists + +def read_time_series_data(sim_data_file): + ''' + Reads time series data + Input: text file + Output: structured numpy array: sid, lon, lat, year, day_of_year, hour, value + ''' + + # Header line contains + # station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer day_of_year@hdr:integer hour@hdr:integer value@body:real + + time_series_data = np.loadtxt(sim_data_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'year', 'day_of_year', 'hour', 'value'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'i4', 'f4')}) + + return time_series_data + +def main(config_file): + ''' + Main: + reads config files + reads input data: time_series_data, quantiles_data, rank_hist_data + calls plotting function and saves figure with figure name given in config file + ''' + ## Read from config file ## + config.read(config_file) + + # data to be plotted + variable = config['data']['variable'] + station_id = config['data']['station_id'] + longitude = config['data']['longitude'] + latitude = config['data']['latitude'] + year_beg = int(config['data']['year_beg']) + year_end = int(config['data']['year_end']) + month_beg = config['data']['month_beg'] + month_end = config['data']['month_end'] + + # input files + sim_file = config['input']['sim_data'] + quantile_file = config['input']['quantile_sel'] + rank_hist_file = config['input']['rank_hists'] + + # output files + fig_name = config['output']['fig_name'] + + ## Read input data ## + time_series_data = read_time_series_data(sim_file) + quantiles_data = read_quantiles_data(quantile_file) + rank_hist_data = read_rank_data(rank_hist_file) + + ## Plot data ## + fig_title = 'Station: ' + station_id + ', ' + str(year_beg) + month_beg + '-' + str(year_end) + month_end + '\nLat=' + latitude + ' Lon=' + longitude + plot_data(quantiles_data, rank_hist_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, True) + + +if __name__=='__main__': + if (len(sys.argv) < 2): + sys.exit("Error: config file must be added as 2nd argument") + elif (len(sys.argv) > 2): + sys.exit("Error: only add config file as argument") + config_file = sys.argv[1] + + main(config_file) diff --git a/RADSOUND/STATRS/python/plot_quantiles_rankhist_00_12.py b/RADSOUND/STATRS/python/plot_quantiles_rankhist_00_12.py new file mode 100644 index 0000000000000000000000000000000000000000..b9d632ebe3c70d7ad8a395acda612670efb97897 --- /dev/null +++ b/RADSOUND/STATRS/python/plot_quantiles_rankhist_00_12.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-05-10 + +Based on Jouni Räisänen's script for plotting quantiles/time series data (00, 06, 12, 18 UTC) and rank histogram (24UTC=combination of 00,06,12,and 18 UTC data) + +Example: +$ python3 plot_quantiles_rankhist.py example.cfg + +example.cfg is a text file containing: +[data] +variable=39 +station_id=102019 +longitude=23.576000 +latitude=68.602997 +year_beg=2010 +year_end=2012 +month_beg=01 +month_end=12 + +[input] +sim_data=time_series_102019_2010-2012.txt +quantile_sel=quantiles_102019.txt +rank_hists=rank_histogram_102019_2010-2012.txt + +[output] +fig_name=standard_plot_102019_2010-2012_python.png + +''' + +import numpy as np +import matplotlib.pyplot as plt +import sys +import configparser +import datetime +from matplotlib.dates import drange, MonthLocator, DateFormatter + +config = configparser.ConfigParser() + +def plot_data(quantiles_data, rank_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, savefig=False): + ''' + quantiles data give the lines and the time series data give the dots + ''' + # set up date array consisting of all days in beg year: + days = np.arange(1,366) +# day1 = datetime.date(year_beg, 1, 1) +# day2 = datetime.date(year_beg+1, 1, 1) + day1 = datetime.date(2001, 1, 1) + day2 = datetime.date(2002, 1, 1) + delta = datetime.timedelta(days=1) + dates = drange(day1, day2, delta) + + # arrays for quantile names, axes titles, hours to plot + q_names = ['q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'] + sub_titles = ['00 UTC', '06 UTC', '12 UTC', '18 UTC'] + hours = [0, 6, 12, 18] + + # calculate number of years: + nyears = year_end - year_beg + 1 + + # set up figure + fig = plt.figure(figsize=(10,10), layout='constrained') + spec = fig.add_gridspec(3,2) + fig.suptitle(fig_title, fontsize=20) + + # counter + c = 0 + + # plot quantiles/time series for times 00, 06, 12, 18: + for i in range(1): + for j in range(2): + # set up axis: title, xaxis + ax = fig.add_subplot(spec[i,j]) + ax.set_title(sub_titles[c]) + ax.set_xlim(dates[0], dates[-1]) + ax.xaxis.set_major_locator(MonthLocator()) + ax.xaxis.set_major_formatter(DateFormatter('%b')) + + # quantile data: + # find quantile data hour = 0, 6, 12, 18 + qh_ind = (quantiles_data['hour'][:] == hours[c]) + for q in q_names: + q_data = quantiles_data[q][qh_ind] + ax.plot(dates, q_data[:365], 'k-') # --> change q only contains one year of data!! + # plot time series data: + # find time series where hour = 0, 6, 12, 18 + th_ind = (time_series_data['hour'][:] == hours[c]) + t_data = time_series_data['value'][th_ind] # all years + for n in range(nyears): + ax.scatter(dates, t_data[n*365:(n+1)*365], marker='.') + c = c + 2 + + # plot rank histogram data: + ax2 = fig.add_subplot(spec[1,:]) + ax2.set_title('Rank histogram\n' + 'MSD:' + str(rank_data[0,4]) + ' p-value:' + str(rank_data[0,5])) + ax2.bar(np.arange(100), rank_data[0,6:]*100) + ax2.set_xlim(0,100) + ax2.axhline(y=1) + + ax2 = fig.add_subplot(spec[2,:]) + ax2.set_title('Rank histogram\n' + 'MSD:' + str(rank_data[2,4]) + ' p-value:' + str(rank_data[2,5])) + ax2.bar(np.arange(100), rank_data[2,6:]*100) + ax2.set_xlim(0,100) + ax2.axhline(y=1) + + + if savefig: + plt.savefig(fig_name, dpi=300) + +# plt.show( ) + + +def read_quantiles_data(quantile_sel_file): + ''' + Reads quantile data from text file and returns a structured numpy array + Input: text file + Output:a structured numpy array: sid, lon, lat, day_of_year, hour, q01, q10, q25, q50, q75, q90, q99 + ''' + # Header line contains: + #station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real + + quantile_data = np.loadtxt(quantile_sel_file, skiprows=1, dtype={'names':('sid', 'lon', 'lat', 'day_of_year', 'hour', 'q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')}) + + return quantile_data + +def read_rank_data(rank_hists_file): + ''' + Reads rank histogram data binned into a 100 bins: 0-1%, 1-2%, ..., 99-100% + Input: text file + Output: numpy array (unstructured) first columns contain station id, longitude, latitude, hour, msd, p_value, and then follows the bins f00 = 0-1%, ..., f99=99-100% + ''' + + #Header line contains: + # station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer msd@body:real p_value@body:real f00@body:real f01@body:real f02@body:real f03@body:real f04@body:real f05@body:real f06@body:real f07@body:real f08@body:real f09@body:real f10@body:real f11@body:real f12@body:real f13@body:real f14@body:real f15@body:real f16@body:real f17@body:real f18@body:real f19@body:real f20@body:real f21@body:real f22@body:real f23@body:real f24@body:real f25@body:real f26@body:real f27@body:real f28@body:real f29@body:real f30@body:real f31@body:real f32@body:real f33@body:real f34@body:real f35@body:real f36@body:real f37@body:real f38@body:real f39@body:real f40@body:real f41@body:real f42@body:real f43@body:real f44@body:real f45@body:real f46@body:real f47@body:real f48@body:real f49@body:real f50@body:real f51@body:real f52@body:real f53@body:real f54@body:real f55@body:real f56@body:real f57@body:real f58@body:real f59@body:real f60@body:real f61@body:real f62@body:real f63@body:real f64@body:real f65@body:real f66@body:real f67@body:real f68@body:real f69@body:real f70@body:real f71@body:real f72@body:real f73@body:real f74@body:real f75@body:real f76@body:real f77@body:real f78@body:real f79@body:real f80@body:real f81@body:real f82@body:real f83@body:real f84@body:real f85@body:real f86@body:real f87@body:real f88@body:real f89@body:real f90@body:real f91@body:real f92@body:real f93@body:real f94@body:real f95@body:real f96@body:real f97@body:real f98@body:real f99@body:real + + rank_hists = np.loadtxt(rank_hists_file, skiprows=1) + + return rank_hists + +def read_time_series_data(sim_data_file): + ''' + Reads time series data + Input: text file + Output: structured numpy array: sid, lon, lat, year, day_of_year, hour, value + ''' + + # Header line contains + # station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer day_of_year@hdr:integer hour@hdr:integer value@body:real + + time_series_data = np.loadtxt(sim_data_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'year', 'day_of_year', 'hour', 'value'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'i4', 'f4')}) + + return time_series_data + +def main(config_file): + ''' + Main: + reads config files + reads input data: time_series_data, quantiles_data, rank_hist_data + calls plotting function and saves figure with figure name given in config file + ''' + ## Read from config file ## + config.read(config_file) + + # data to be plotted + variable = config['data']['variable'] + station_id = config['data']['station_id'] + longitude = config['data']['longitude'] + latitude = config['data']['latitude'] + year_beg = int(config['data']['year_beg']) + year_end = int(config['data']['year_end']) + month_beg = config['data']['month_beg'] + month_end = config['data']['month_end'] + + # input files + sim_file = config['input']['sim_data'] + quantile_file = config['input']['quantile_sel'] + rank_hist_file = config['input']['rank_hists'] + + # output files + fig_name = config['output']['fig_name'] + + ## Read input data ## + time_series_data = read_time_series_data(sim_file) + quantiles_data = read_quantiles_data(quantile_file) + rank_hist_data = read_rank_data(rank_hist_file) + + ## Plot data ## + fig_title = 'Station: ' + station_id + ', ' + str(year_beg) + month_beg + '-' + str(year_end) + month_end + '\nLat=' + latitude + ' Lon=' + longitude + plot_data(quantiles_data, rank_hist_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, True) + + +if __name__=='__main__': + if (len(sys.argv) < 2): + sys.exit("Error: config file must be added as 2nd argument") + elif (len(sys.argv) > 2): + sys.exit("Error: only add config file as argument") + config_file = sys.argv[1] + + main(config_file) diff --git a/RADSOUND/STATRS/python/plot_rank_hist_sum_all_stations.py b/RADSOUND/STATRS/python/plot_rank_hist_sum_all_stations.py new file mode 100644 index 0000000000000000000000000000000000000000..2648455dd508a423e076d0a39ddcfa76d5106da0 --- /dev/null +++ b/RADSOUND/STATRS/python/plot_rank_hist_sum_all_stations.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-09-12 + +Based on Jouni Räisänen's script for plotting rank histogram summart statistics + +Example: +$ python3 plot_rank_hist_sum_all_stations.py rh_summary_file nstat p01 p1 p5 max_freq_p max_freq_q + +''' + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) +import sys + +#def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, figname): +#def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q): +def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, utc_term): + ''' + Input: p_freq, q_freq, number of stations, p01, p1, p5, max freq p, max freq q, figname + Description: Plots p and q frequencies and saves the figure with figname + ''' + if utc_term == 'UTC00': + title='00 UTC : Rank Histogram Summary Statistics\n Number of stations: ' + str(number_of_stations) + '\nFrequency, p<0.001: ' + str(p01) + '\nFrequency, p<0.01:' + str(p1) + '\nFrequency, p<0.05:' + str(p5) + else: + title='12 UTC : Rank Histogram Summary Statistics\n Number of stations: ' + str(number_of_stations) + '\nFrequency, p<0.001: ' + str(p01) + '\nFrequency, p<0.01:' + str(p1) + '\nFrequency, p<0.05:' + str(p5) + + # can also be input arguments if the number of bins are not constant + number_of_p_bins=20 + number_of_q_bins=100 + + fig = plt.figure(figsize=(8,12)) + gs = fig.add_gridspec(2, hspace=0.4) + ax1, ax2 = gs.subplots(sharex=False, sharey=False) + + # Plot p values + ax1.set_title('Normalized p-value frequencies') + ax1.set_xlabel('p-value') + ax1.set_ylim([0,max_freq_p]) + ax1.bar(np.arange(number_of_p_bins)+0.5, number_of_p_bins*p_freq) + ax1.xaxis.set_major_locator(MultipleLocator(2)) + ax1.xaxis.set_minor_locator(MultipleLocator(1)) + p_xlabels = ax1.get_xticks() + ax1.set_xticks(p_xlabels, p_xlabels/number_of_p_bins) + ax1.set_xlim([0,number_of_p_bins]) + + # Plot q values + ax2.set_title('Normalized quantile frequencies') + ax2.set_xlabel('Quantile (%)') + ax2.bar(np.arange(number_of_q_bins)+0.5, number_of_q_bins*q_freq) + ax2.xaxis.set_major_locator(MultipleLocator(10)) + ax2.xaxis.set_minor_locator(MultipleLocator(5)) + ax2.set_ylim([0,max_freq_q]) + ax2.set_xlim([0,number_of_q_bins]) + + fig.suptitle(title) + figname='rank_hist_sumstats.png' + plt.savefig(figname, dpi=300) + +def read_data(rh_summary_file): + ''' + Input: path/to/rh_summary_file + Output: p_freq, q_freq + Description: reads in p frequencies and q frequencies + ''' + + # first line contains a header and first column contains hour + p_freq_all = np.loadtxt(rh_summary_file + '_p-freq.txt', skiprows=1) + q_freq_all = np.loadtxt(rh_summary_file + '_q-freq.txt', skiprows=1) + + # p_freq and q_freq contain data 00, 06, 12, 18, 24 (the last is used in these plots) + p_freq = p_freq_all[-1,1:] + q_freq = q_freq_all[-1,1:] + + return p_freq, q_freq + +if __name__=='__main__': + rh_summary_file = sys.argv[1] + number_of_stations = sys.argv[2] + p01 = sys.argv[3] + p1 = sys.argv[4] + p5 = sys.argv[5] + max_freq_p = float(sys.argv[6]) + max_freq_q = float(sys.argv[7]) + utc_term = sys.argv[8] + #figname = sys.argv[8] + + p_freq, q_freq = read_data(rh_summary_file) + #plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, figname) + #plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q) + plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, utc_term) + diff --git a/RADSOUND/STATRS/summary_rank_histograms_all_stations.sh b/RADSOUND/STATRS/summary_rank_histograms_all_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..ba90835e3883937f404d40ff01b9c9612addad8a --- /dev/null +++ b/RADSOUND/STATRS/summary_rank_histograms_all_stations.sh @@ -0,0 +1,214 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +echo "Python version" +which python +################################################################## +# +# Calculation and plotting of summary statistics for quantile +# space rank histograms: +# +# - a map of p-values +# - bar plot of p-value distribution + rank histogram +# averaged over all stations +# +# The following need to be given as arguments: +# +# - variable +# - first year of raw data +# - last year of raw data +# - first month of raw data (default 1) +# - last month of raw data (default 12) +# +# As the input for this script, the following files are needed: +# +# 1) Rank histogram statistics files for all available stations +# (for the time range specified by the script arguments) +# +# Execution (e.g): ./summary_rank_histograms_all_stations 2010 2010 1 12 + +# ORIGINAL: +# Jouni Räisänen, Aug 2023 +# MODIFIED: +# Alexander Mahura, Sep-Oct-Nov 2023 +# +################################################################## +# +# Arguments: +# +# 0. Variable code (not needed: always T2-T850) +# +variable=T2-T850 +echo " Variable: $variable" +# +# 1.-2: First and last year +# +year1=$1 +year2=$2 +let nyears=year2-year1+1 +# +# 3.-4: First and last month +# +month1="${3:-1}" +month2="${4:-12}" +# +# Add zero to ahead of $month1 and $month2 for naming of files if needed? +# +if [ $month1 -le 9 ] +then + month1='0'${month1} +fi +if [ $month2 -le 9 ] +then + month2='0'${month2} +fi +# +################################################################## +# +# Add odb_api to $PATH +# +# On Puhti +#PATH="${PATH}":/projappl/project_2001011/odb_api/bin +# On Lumi +#export PATH="/projappl/project_465000454/ama/software/odb_api/bin:$PATH" + +# On Lumi +#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module load odb_api/0.18.1-cpeCray-23.03.lua +# module load python-climatedt/3.11.3-cpeCray-23.03.lua +# +################################################################## +# +echo " Compiling the Fortran program that produces the rank histogram summary statistics ..." +echo " fortran-programs/rank_histogram_summary_statistics.f95" +# +gfortran fortran-programs/rank_histogram_summary_statistics.f95 -o rank_histogram_summary_statistics + +################################################################## +# ----- File names. Hard-coded, at least this far ----- +# +# NB: it would be more efficient to have ODB files for individual +# stations instead for the ones that include everything for all stations! +# +# 1) Rank histogram directory and input name file name without extension +echo " Directory for rank histograms ..." +#rh_dir=/scratch/project_2001011/RHARM_to_Alexander_120923/rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +echo " $rh_dir" +echo " Input file with rank histograms for all stations ..." +rh_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations +echo " $rh_file" +echo " Input file with sammy rank histograms ..." +rh_summary_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_summary +echo " $rh_summary_file" + +# 2) Name of output file(s) without extension +echo " Name of output file without extension ..." +out_file=${rh_dir}/rh_summary_${variable}_${year1}${month1}-${year2}${month2} +echo " $out_file" + +# 3) Directory for figures +echo " Directory for figures ..." +figure_dir=figures +echo " $figure_dir" + +################################################################## +echo " Converting the all-station ODB format rank histogram file to txt ..." +echo " (it is format for reading in Fortran)" +# +odb sql select \* -i ${rh_file}.odb -o ${rh_file}.txt +# +################################################################## +echo " Calculating the rank histogram summary statistics ..." +# +./rank_histogram_summary_statistics < line_1 +read v1 v2 v3 number_of_stations < line_1 +head -2 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 number_of_p_bins < line_1 +head -3 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 max_freq_p < line_1 +head -4 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 max_freq_q < line_1 +head -5 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 v5 v6 p01_00 < line_1 +head -6 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 v5 v6 p1_00 < line_1 +head -7 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 v5 v6 p5_00 < line_1 +head -8 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 v5 v6 p01_12 < line_1 +head -9 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 v5 v6 p1_12 < line_1 +head -10 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 v5 v6 p5_12 < line_1 +numbers_of_p_bins_up=$( echo "(${number_of_p_bins} + 0.5)" | bc ) +echo $( echo "(${number_of_p_bins} + 0.5)" | bc ) > line_1 +read number_of_p_bins_up < line_1 +rm line_1 + +###################################################################### +# +echo " (1) Plotting - P-Values Summary on 2D map ..." +# +###################################################################### + +python_script=python/plot_p_values_map.py + +python3 ${python_script} ${rh_summary_file}_p-values.txt ${year1}${month1}-${year2}${month2} +mv p_values.png ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png +echo " Output file: ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png" + +##################################################################### +# +echo " (2) Plotting - Rank Histogram Summary Statistics for 00 and 12 UTCs ..." +# +##################################################################### +utc_term="UTC00" +echo " for 00 UTC - variable : ${variable}" +python_script=python/plot_rank_hist_sum_all_stations.py +echo " number_of_stations : ${number_of_stations}" +echo " p01_00, p1_00, p5_00, max_freq_p, max_freq_q : ${p01_00} ${p1_00} ${p5_00} ${max_freq_p} ${max_freq_q}" + +python3 ${python_script} ${rh_summary_file} ${number_of_stations} ${p01_00} ${p1_00} ${p5_00} ${max_freq_p} ${max_freq_q} ${utc_term} ${figname} + +mv rank_hist_sumstats.png ${figure_dir}/rank-hist-00utc-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png +echo " Output file for 00 UTC: ${figure_dir}/rank-hist-00utc-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png" + +##################################################################### +utc_term="UTC12" +echo " for 12 UTC - variable : ${variable}" +python_script=python/plot_rank_hist_sum_all_stations.py +echo " number_of_stations : ${number_of_stations}" +echo " p01_12, p1_12, p5_12, max_freq_p, max_freq_q : ${p01_12} ${p1_12} ${p5_12} ${max_freq_p} ${max_freq_q}" + +python3 ${python_script} ${rh_summary_file} ${number_of_stations} ${p01_12} ${p1_12} ${p5_12} ${max_freq_p} ${max_freq_q} ${utc_term} ${figname} + +mv rank_hist_sumstats.png ${figure_dir}/rank-hist-12utc-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png +echo " Output file for 12 UTC: ${figure_dir}/rank-hist-12utc-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png" + +###################################################################### +# Delete files that are not needed any more + +rm rank_histogram_summary_statistics + diff --git a/RADSOUND/graph_mod_obs.py b/RADSOUND/graph_mod_obs.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c48406c58598245d4dbf958f65f530f8f85866 --- /dev/null +++ b/RADSOUND/graph_mod_obs.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python3 +# Author: Alexander Mahura : 2023-06 +# +############################################################################# +# LOADING LIBRARIES +############################################################################# +import sys +import numpy as np +import os +import copy +import time +from datetime import datetime as dt +import warnings +import matplotlib.pyplot as plt + +############################################################################# +# FUNCTIONS +############################################################################# + +#---------------------------------------------------------------------------- +# Function to read file with observations extracted from .odb-file +# note: (YYYY MM DD HH MM SS) +#---------------------------------------------------------------------------- + +def readobs(type_obs, path_tmp_obs, tmpfilename): + """ + Read extracted observations from .odb-file for ground-based (SYNOP), + radiosounding (TEMP), and satellite (AMSU-A) data + Input + ------- + type_obs: synop, radsound, satellite + Acronyms in names of files with temporarily extracted observations + Output + ------- + ds: dataset with extracted observations + """ + + if type_obs=='synop': + #filename = path_tmp_obs + "OBSSYNOP/" + tmpfilename # Puhti + filename = path_tmp_obs + tmpfilename # Lumi + print('obs synop tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-25 - no need to skip 1st row with header ! skiprows=1 + ds = np.genfromtxt(filename) + #, dtype={'names': ('year', 'month', 'day', + # 'hour','minute','second','longitude','latitude','variable','value','station'), + # 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'i4', 'f8', 'S6')}) + #print('OBS at Synop stations\n',ds) + else: + print('OBS: The specified synop file does not exist !!!') + elif type_obs=='radsound': + #filename = path_tmp_obs + "OBSRADSOUND/" + tmpfilename # Puhti + filename = path_tmp_obs + tmpfilename # Lumi + print('obs radsound tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-25 - no need to skip 1st row with header ! skiprows=1 + ds = np.genfromtxt(filename) + #ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + # 'hour','minute','second','longitude','latitude','variable','value','station'), + # 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'i4', 'f8', 'S6')}) + print('OBS from Radiosounding\n',ds) + else: + print('OBS: The specified radsound file does not exist !!!') + else: + #filename = path_tmp_obs + "OBSSATELLITE/" + tmpfilename # Puhti + filename = path_tmp_obs + tmpfilename # Lumi + print('obs satellite tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-25 - no need to skip 1st row with header ! skiprows=1 + ds = np.genfromtxt(filename) + #ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + # 'hour','minute','second','longitude','latitude','variable','value','station'), + # 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'i4', 'f8', 'S6')}) + print('OBS from Satellite\n',ds) + else: + print('OBS: The specified satellite file does not exist !!!') + return ds + +#---------------------------------------------------------------------------- +# Function to read file with modeled results extracted +# (& interpolated to points of observatio) from gsv-file +# (YYYY MM DD HH MM SS) +#---------------------------------------------------------------------------- + +def readmod(type_mod, path_tmp_mod, tmpfilename): + """ + Read extracted & interpolated to points of observatiobs the modelled results + from gsv-file for ground-based (SYNOP), radiosounding (TEMP), and + satellite (AMSU-A) data + Input + ------- + type_mod: synop, radsound, satellite + Acronyms in names of files with temporarily extracted modelled results + Output + ------- + ds: dataset with extracted & interpolated modelled results + """ + + if type_mod=='synop': + #filename = path_tmp_mod + "MODSYNOP/" + tmpfilename # Puhti + filename = path_tmp_mod + tmpfilename # Lumi + print('mod synop tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-30 - no need to skip 1st row with header ! skiprows=1 + # 2020-01-20 00:00:00 24.4916 60.999199 273.143829345703 101150 39 + # 2020 01 20 00 00 00 24.4916 60.999199 273.143829345703 39 101150 + ds = np.genfromtxt(filename) + #, dtype={'names': ('year-month-day', + # 'hour-minute-second','longitude','latitude','value','station','variable'), + # 'formats': ('S10', 'S8', 'f8', 'f8', 'f8', 'S6', 'i4')}) # v#1 + #, dtype={'names': ('year', 'month', 'day', 'hour', 'minute', 'second', + # 'longitude','latitude','value','variable','station'), + # 'formats': ('S10', 'S8', 'f8', 'f8', 'f8', 'S6', 'i4')}) # v#2 + #print('MOD for Synop stations\n',ds) + else: + print('MOD: The specified synop file does not exist !!!') + elif type_obs=='radsound': + #filename = path_tmp_mod + "MODRADSOUND/" + tmpfilename # Puhti + filename = path_tmp_mod + tmpfilename # Lumi + print('mod radsound tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-30 - no need to skip 1st row with header ! skiprows=1 + ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + 'hour','minute','second','longitude','latitude','value','variable','station'), + 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'f8', 'S6', 'i4')}) + print('MOD for Radiosounding\n',ds) + else: + print('MOD: The specified radsound file does not exist !!!') + else: + #filename = path_tmp_mod + "MODSATELLITE/" + tmpfilename # Puhti + filename = path_tmp_mod + tmpfilename # Lumi + print('mod satellite tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-30 - no need to skip 1st row with header ! skiprows=1 + ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + 'hour','minute','second','longitude','latitude','value','variable','station'), + 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'f8', 'S6', 'i4')}) + print('MOD for Satellite\n',ds) + else: + print('MOD: The specified satellite file does not exist !!!') + return ds + +############################################################################# +# MAIN PROGRAM +############################################################################# +# Passing parameters to program +try: + fnamedate = sys.argv[1]; met_param = sys.argv[2] + print (fnamedate) + print (met_param) + print ('Command line: py-script-name YYYYMMDDHH met_param-acronym') +except: + print ("Usage:",sys.argv[0], "fnamedate met_aparm"); sys.exit(1) + +##fnamedate = '2020012000' +# list to list meteorological variables (add here): +##met_param="2t" + +# Paths to temporarily storing of extracted OBS and MOD data ... +# ----- on Puhti +##path_tmp_obs='/scratch/project_2000640/wrk/ama/DEOBS/OBSDATA/' +##path_tmp_mod='/scratch/project_2000640/wrk/ama/DEOBS/MODDATA/' +# ----- on Lumi +#path_tmp_obs='/projappl/project_465000454/ama/DEOBS/SYNOP/DATAOBS/' +#path_tmp_mod='/projappl/project_465000454/ama/DEOBS/SYNOP/DATAMOD/' +path_tmp_obs='DATAOBS/' +path_tmp_mod='DATAMOD/' + +print ('Path to dir with extacted OBS data: ', path_tmp_obs) +print ('Path to dir with extacted MOD data: ', path_tmp_mod) + + +# Constructing exisitng names of files with OBS & MOD data +# for SYNOP (fx: synop_2020012000 and mod_synop_2020012000_2t.dat) +tmpfname_obs_synop = 'obs_synop_'+fnamedate+'.dat' +tmpfname_mod_synop = 'mod_synop_'+fnamedate+'_'+met_param+'.dat' +print ('Name of file with synop OBS data: ', tmpfname_obs_synop) +print ('Name of file with synop MOD data: ', tmpfname_mod_synop) +#tmpfname_obs_synop='synop_2020012000.dat' +#tmpfname_mod_synop='mod_synop_2020012000_2t.dat' +#print (tmpfname_obs_synop, tmpfname_mod_synop) + +# for RADSOUND +tmpfname_obs_radsound = 'obs_radsound_'+fnamedate+'.dat' +tmpfname_mod_radsound = 'mod_radsound_'+fnamedate+'_'+met_param+'.dat' +# for SATELLITE +tmpfname_obs_satellite = 'obs_satellite_'+fnamedate+'.dat' +tmpfname_mod_satellite = 'mod_satellite_'+fnamedate+'_'+met_param+'.dat' + +############################################################################## +# Observations ... +for ii in range(0,1): #(0,3) # testing on SYNOP obs data + if ii == 0: + type_obs='synop' + tmpfilename=tmpfname_obs_synop + ds_obs_synop=readobs(type_obs, path_tmp_obs, tmpfilename) + rec_ds_obs_synop=len(ds_obs_synop) + #t print('OBS from Synop stations: ',rec_ds_obs_synop,' records \n',ds_obs_synop) + elif ii == 1: + type_obs='radsound' + tmpfilename=tmpfname_obs_radsound + ds_obs_radsound=readobs(type_obs, path_tmp_obs, tmpfilename) + rec_ds_obs_radsound=len(ds_obs_radsound) + ##print('OBS from Radiosounding: ',rec_ds_obs_radsound,' records \n',ds_obs_radsound) + else: + type_obs='satellite' + tmpfilename=tmpfname_obs_satellite + ds_obs_satellite=readobs(type_obs, path_tmp_obs, tmpfilename) + rec_ds_obs_satellite=len(ds_obs_satellite) + ##print('OBS from Satellites: ',rec_ds_obs_satellite,' records \n',ds_obs_satellite) + +############################################################################## +# Modelled results ... +for jj in range(0,1): #(0,3) # testing on SYNOP mod data + if jj == 0: + type_mod='synop' + tmpfilename=tmpfname_mod_synop + ds_mod_synop=readmod(type_mod, path_tmp_mod, tmpfilename) + rec_ds_mod_synop=len(ds_mod_synop) + #t print('MOD for Synop stations: ',rec_ds_mod_synop,' records \n',ds_mod_synop) + elif jj == 1: + type_mod='radsound' + tmpfilename=tmpfname_mod_radsound + ds_mod_radsound=readobs(type_mod, path_tmp_mod, tmpfilename) + rec_ds_mod_radsound=len(ds_mod_radsound) + ##print('MOD for Radiosounding: ',rec_ds_mod_radsound,' records \n',ds_mod_radsound) + else: + type_mod='satellite' + tmpfilename=tmpfname_mod_satellite + ds_mod_satellite=readobs(type_mod, path_tmp_mod, tmpfilename) + rec_ds_mod_satellite=len(ds_mod_satellite) + ##print('MOD for Satellites: ',rec_ds_mod_satellite,' records \n',ds_mod_satellite) + +############################################################################## +# Calculate mismatch/difference/bias between synop OBS and MOD + +#OBS: ('year', 'month', 'day', 'hour','minute','second','longitude','latitude','value','variable','station') +#MOD: ('year-month-day', 'hour-minute-second','longitude','latitude','value','variable','station') + +# Checking number of records (obs & mod) => must be the same +nr_obs = ds_obs_synop.shape[0] +nr_mod = ds_mod_synop.shape[0] +#t print(nr_obs, nr_mod) + +val_obs_synop=np.zeros(nr_obs) +val_mod_synop=np.zeros(nr_mod) + +# Converting to modelled values to same scale as observed ... +val_obs_synop = np.array(ds_obs_synop [0:nr_obs, 9]) # synop obs (2t in deg C) +#t print(val_obs_synop) + +if met_param == '2t': + val_mod_synop = np.array(ds_mod_synop [0:nr_mod, 8] - 273.15) # synop mod (in deg K -> convert to T-273.15) +else: + val_mod_synop = np.array(ds_mod_synop [0:nr_mod, 8]) # no need to convert modelled data + +#t print(val_mod_synop) + +# Calculating a mismatch/difference/bias ... +dif_synop = val_obs_synop - val_mod_synop + +#t print(val_obs_synop, val_mod_synop, dif_synop) + +# Calculating statistcis on synop obs & mod & difference data (self-control & a possibility of flagging) +# Maximum +obs_max = max(val_obs_synop); mod_max = max(val_mod_synop); dif_synop_max = max(dif_synop) +# Minimum +obs_min = min(val_obs_synop); mod_min = min(val_mod_synop); dif_synop_min = min(dif_synop) +# Mean +obs_mean = np.mean(val_obs_synop); mod_mean = np.mean(val_mod_synop); dif_synop_mean = np.mean(dif_synop) +# Median +obs_med = np.median(val_obs_synop); mod_med = np.median(val_mod_synop); dif_synop_med = np.median(dif_synop) +# Variance +obs_var = np.var(val_obs_synop); mod_var = np.var(val_mod_synop); dif_synop_var = np.var(dif_synop) + +# Correl. coefficient - (!) not needed for climate simulations +#r_cor = np.corrcoef(val_obs_synop, val_mod_synop) +#cor_obs_mod = r_cor[1, 0] +#t print(cor_obs_mod) + +#t print (obs_max, mod_max, obs_min, mod_min, obs_mean, mod_mean, obs_med, mod_med, obs_var, mod_var) +#t print (dif_synop_max, dif_synop_min, dif_synop_mean, dif_synop_median, dif_synop_var) + +# Time slot for which stats is calculated (used in plotting fraph) +val_yy = str(int(np.array(ds_obs_synop [1,0]))) +val_mm = str(int(np.array(ds_obs_synop [1,1]))) +val_dd = str(int(np.array(ds_obs_synop [1,2]))) +val_hh = str(int(np.array(ds_obs_synop [1,3]))) +print ('Time slot:', val_yy, val_mm, val_dd, val_hh) + +print (' OBS MOD DIF') +print ('Max {:8.3f} {:8.3f} {:8.3f}'.format(obs_max, mod_max, dif_synop_max)) +print ('Min {:8.3f} {:8.3f} {:8.3f}'.format(obs_min, mod_min, dif_synop_min)) +print ('Mean {:8.3f} {:8.3f} {:8.3f}'.format(obs_mean, mod_mean, dif_synop_mean)) +print ('Med {:8.3f} {:8.3f} {:8.3f}'.format(obs_med, mod_med, dif_synop_med)) +print ('Var {:8.3f} {:8.3f} {:8.3f}'.format(obs_var, mod_var, dif_synop_var)) + +emsp=' ' # empty space for text arrangement on graphs +rec_max=emsp +str('{:8.3f}'.format(obs_max)) +str('{:8.3f}'.format(mod_max)) +str('{:8.3f}'.format(dif_synop_max)) +' Max' +rec_min=emsp +str('{:8.3f}'.format(obs_min)) +str('{:8.3f}'.format(mod_min)) +str('{:8.3f}'.format(dif_synop_min)) +' Min' +rec_mean=emsp+str('{:8.3f}'.format(obs_mean))+str('{:8.3f}'.format(mod_mean))+str('{:8.3f}'.format(dif_synop_mean))+' Mean' +rec_med=emsp +str('{:8.3f}'.format(obs_med)) +str('{:8.3f}'.format(mod_med)) +str('{:8.3f}'.format(dif_synop_med)) +' Median' +rec_var=emsp +str('{:8.3f}'.format(obs_var)) +str('{:8.3f}'.format(mod_var)) +str('{:8.3f}'.format(dif_synop_var)) +' Variance' +rec_nr_obs=emsp+emsp+emsp+str(int('{:5d}'.format(nr_obs)))+' - # of stations' + +#print ('{:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f}'. +# format(obs_max, mod_max, obs_min, mod_min, obs_mean, mod_mean, obs_med, mod_med)) + +# Plotting : scatter plot for observed vs. modelled values +# Selecting type of plot +if type_obs == 'synop': + type_title = 'SYNOP' +elif type_obs == 'radsound': + type_title = 'RADIOSOUNDING' +else: + type_title = 'SATELLITE' + +txt_time_slot='Time slot (YYYY, MM, DD, HH): '+val_yy+'-'+val_mm+'-'+val_dd+'-'+val_hh +txt_title=type_title+': Meteorological Variable: '+ met_param + +fig=plt.figure() + +plt.scatter(val_mod_synop, val_obs_synop) +plt.xlabel('Modelled Value (MOD)') +plt.ylabel('Observed Value (OBS)') +plt.suptitle(txt_title, fontsize=16, weight='bold') +plt.title(txt_time_slot, fontsize=12) +plt.text(mod_max,obs_max,' OBS MOD DIF', weight='bold') +plt.text(mod_max,obs_max-0.5, rec_max) +plt.text(mod_max,obs_max-1.0, rec_min) +plt.text(mod_max,obs_max-1.5, rec_mean) +plt.text(mod_max,obs_max-2.0, rec_med) +plt.text(mod_max,obs_max-2.5, rec_var) +plt.text(mod_max,obs_max-3.0, rec_nr_obs) + +##plt.show() + +# Saving figure to png-format-file +fnamesave=type_obs+'_'+fnamedate+'_'+met_param+'.png' +plt.savefig(fnamesave, dpi = 300, bbox_inches="tight") + +print('Output graphical file is: ', fnamesave) + +#exit() diff --git a/RADSOUND/gsv_radsound_mod_data.sh b/RADSOUND/gsv_radsound_mod_data.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2f2d1a55d046ef68e570ee0527b0ef933f47c89 --- /dev/null +++ b/RADSOUND/gsv_radsound_mod_data.sh @@ -0,0 +1,103 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# DestinE Climate DT Project +# Author: Alexander Mahura : 2023-09-21 +######################################################################## + +# STEP 1 - PRE-PROCESSING MOD DATA EXTRACTED WITH GSV_INTERFACE + +# 2020012008_2t_r360x180.nc +# Specify: +# (1) name of gsv extracted files at 2 m & 850 hPa (now at 700 from fake-FDB) +gsv_extr_filename_2t="2t_r360x180.nc" +gsv_extr_filename_t850="t850_r360x180.nc" + +# (2) path to dir where such file is placed +#path_gsv_mod_data="/projappl/project_465000454/ama/DEOBS/SYNOP/GSVMODDATA/" +path_gsv_mod_data="GSVMODDATA/" + +cd $path_gsv_mod_data; pwd + +# -------------------------------------------------------------------------------- +echo " Checking if files with gsv extracted mod data (2t & t850) are available ..." + +until [ -f $gsv_extr_filename_2t ] +do + echo " File is not available in dir: $path_gsv_mod_data" + sleep 10 +done +echo " File found: $gsv_extr_filename_2t" + +until [ -f $gsv_extr_filename_t850 ] +do + echo " File is not available in dir: $path_gsv_mod_data" + sleep 10 +done +echo " File found: $gsv_extr_filename_t850" + + +if test -f "$gsv_extr_filename_2t"; then + echo $gsv_extr_filename_2t exists +else + echo $gsv_extr_filename_2t does not exist +fi + +if test -f "$gsv_extr_filename_t850"; then + echo $gsv_extr_filename_t850 exists +else + echo $gsv_extr_filename_t850 does not exist +fi + +echo " Name of input files with extracted mod data using GSV : " +echo " for 2t : $gsv_extr_filename_2t" +echo " for t850 : $gsv_extr_filename_t850" + +# -------------------------------------------------------------------------------- +echo " CDO calculating difference between temperatures at 2 m and 850 hPa ..." + +dif_gsv_extr_filename="2t_t850_r360x180.nc" + +# cdo setname,dif_2t_t850 -sub $gsv_extr_filename_2t $gsv_extr_filename_t850 $dif_gsv_extr_filename +# in dif_gsv_extr_filename-file the difference between 2t & t850 is saved under name "2t" +cdo setname,2t -sub $gsv_extr_filename_2t $gsv_extr_filename_t850 $dif_gsv_extr_filename + +echo " and saving to file: $dif_gsv_extr_filename" + +# -------------------------------------------------------------------------------- +echo " CDO extacting timestamps from file : $dif_gsv_extr_filename" + +# temporary file for saving time slices +tmp_file_list_timestamp="list_timestamp.txt" + +cdo showtimestamp $dif_gsv_extr_filename > $tmp_file_list_timestamp +cdo -splitsel,1 $dif_gsv_extr_filename split_ + +# -------------------------------------------------------------------------------- +echo " Creating names of output files using timestamps ..." +echo " Saving individual files by time-slices ..." +for ii in 1 2 3 4 5 6 7 8 +do + sed -i 's/-//' $tmp_file_list_timestamp +done +for jj in 1 2 3 4 +do + sed -i 's/:00:00//' $tmp_file_list_timestamp + sed -i 's/T//' $tmp_file_list_timestamp +done + +filenames=($(cat $tmp_file_list_timestamp)) + +x=0; +for f in $(ls split_*.nc); +#do mv $f ${filenames[$x]}_2t_r360x180.nc; +do mv $f ${filenames[$x]}_$dif_gsv_extr_filename; +let x=$x+1; +done + +rm $tmp_file_list_timestamp +###############tmp test +### rm -rf 20200120* + +exit + diff --git a/RADSOUND/list_of_all_radsound_stations.txt b/RADSOUND/list_of_all_radsound_stations.txt new file mode 100644 index 0000000000000000000000000000000000000000..6ce18b2b45ce81ebdf405c3b12117fce2c21fda5 --- /dev/null +++ b/RADSOUND/list_of_all_radsound_stations.txt @@ -0,0 +1,218 @@ +30673 119.733002 53.750000 3052 2445 +42867 79.050000 21.100000 3270 1401 +42971 85.833000 20.250000 1295 1005 +42410 91.583000 26.100000 1741 1454 +33658 25.900000 48.367001 3499 1611 +89664 166.667007 -77.849998 5862 2902 +42339 73.017000 26.300000 1503 1303 +35671 67.717003 47.799999 7131 4806 +33041 30.962999 52.402000 2966 1238 +24908 102.293999 60.335999 4550 4024 +24641 121.616997 63.783001 2137 2326 +76394 -100.233002 25.867001 4893 5335 +37055 43.099998 44.216999 6373 5064 +67083 47.483000 -18.800000 6740 5010 +76225 -106.032997 28.667000 4691 7220 +30635 109.016998 53.417000 5401 4566 +28225 56.196999 57.958000 2471 1365 +24125 112.432999 68.500000 4908 4579 +24507 100.217003 64.268997 3970 2315 +13275 20.417000 44.766998 3089 1382 +36872 77.003998 43.362999 4047 3766 +43150 83.300000 17.683000 1660 1408 +15614 23.383000 42.650000 3488 8503 +29698 99.032997 54.882999 8085 6037 +35229 57.150002 50.283001 2855 1280 +34467 44.333000 48.783001 1695 1075 +34172 46.039001 51.556999 4366 3119 +27038 39.925999 59.318001 2104 1318 +30372 118.266998 56.900002 5143 4600 +30554 113.583000 54.466999 4151 4197 +32540 158.582993 53.083000 1781 2234 +44292 106.866997 47.917000 3997 2040 +23921 60.450000 60.683000 1841 1155 +89571 77.967000 -68.574000 6633 4216 +30230 108.067001 57.766998 3024 2500 +31736 135.233002 48.533001 2014 2439 +30935 108.750000 50.367001 5170 4596 +10739 9.200000 48.833000 10208 8242 +2527 12.291000 57.657001 6143 4159 +12425 16.881001 51.112999 3423 1051 +71801 -52.750000 47.667000 3390 3694 +2591 18.350000 57.650002 1224 1222 +47909 129.552994 28.393999 4715 4756 +68263 28.211000 -25.910000 6324 6020 +28445 61.067001 56.733002 7339 5301 +40375 36.599998 28.382999 9225 6493 +42182 77.200000 28.583000 3980 2965 +50527 119.700000 49.250000 8566 8375 +4220 -52.852000 68.708000 2573 2810 +51431 81.333000 43.950000 8897 9159 +16044 13.188000 46.037000 2344 1325 +53845 109.450000 36.567000 3396 2781 +72265 -102.189000 31.942000 12450 14260 +1241 9.612000 63.706001 1430 1187 +4018 -22.595000 63.981000 3514 3401 +47678 139.779007 33.122002 2636 2514 +6011 -6.765000 62.023000 2023 2287 +4360 -37.637000 65.611000 4066 4225 +72274 -110.956000 32.228000 1332 1306 +1415 5.665000 58.874001 1356 1056 +72572 -111.955000 40.772000 13107 14572 +72528 -78.719000 42.941000 12948 13135 +71934 -111.929001 60.026001 4651 7487 +6610 6.943000 46.812000 6526 6543 +11035 16.356000 48.249000 5536 4590 +71926 -96.000000 64.317001 1508 2012 +70133 -162.613000 66.886000 1926 2033 +72597 -122.882000 42.377000 9389 12215 +29634 82.949997 54.966999 3437 1606 +72364 -106.698000 31.873000 11480 13113 +10868 11.552000 48.243999 11163 10580 +72681 -116.211000 43.567000 11810 14498 +72293 -117.117000 32.833000 6144 6971 +1028 19.000999 74.503998 1693 1648 +11520 14.447000 50.008000 8132 5874 +72662 -103.211000 44.073000 13070 14480 +72645 -88.112000 44.499000 8494 8853 +10548 10.377000 50.562000 7981 7601 +12843 19.183000 47.433000 5107 3390 +72451 -99.969000 37.761000 3525 4331 +3005 -1.185000 60.139000 4501 4504 +72776 -111.385000 47.461000 13419 14601 +70200 -165.436000 64.512000 2852 2940 +72712 -68.014000 46.868000 12284 11995 +72317 -79.943000 36.098000 13056 13997 +72327 -86.563000 36.247000 11201 10797 +72562 -100.700000 41.133000 13232 14452 +72476 -108.525000 39.120000 7421 10685 +3808 -5.327000 50.218000 2609 2564 +71945 -122.599998 58.833000 8528 9591 +72558 -96.367000 41.320000 7562 8599 +71907 -78.083000 58.466999 1032 1176 +72456 -95.631000 39.072000 12959 13959 +72363 -101.709000 35.233000 12891 14476 +2836 26.629000 67.366997 6852 5555 +72403 -77.486000 38.977000 2249 1840 +10410 6.969000 51.406000 4936 4183 +72340 -92.259000 34.835000 11376 10831 +71964 -135.097000 60.733002 6634 8056 +71722 -76.005997 46.301998 4417 6221 +11952 20.316999 49.033001 8980 7914 +70231 -155.598000 62.958000 8042 8483 +70361 -139.667000 59.517000 2472 2562 +70308 -170.217000 57.150000 4325 4263 +10035 9.550000 54.533001 2159 1822 +72797 -124.560000 47.934000 1212 1304 +70261 -147.877000 64.816000 9014 10036 +71924 -94.968002 74.704002 2191 2372 +72747 -93.397000 48.565000 9363 9613 +72518 -73.832000 42.693000 2800 2593 +71119 -114.108002 53.548000 7233 9964 +72764 -100.759000 46.772000 13328 14564 +71867 -101.099998 53.966999 5672 8045 +72235 -90.080000 32.319000 1395 1033 +70350 -152.493000 57.746000 3933 4110 +72768 -106.626000 48.207000 6410 8121 +72786 -117.627000 47.681000 12270 14504 +72520 -80.217000 40.532000 13633 14178 +71082 -62.333000 82.500000 3424 3371 +71917 -85.932999 79.983002 1743 1782 +71043 -126.750000 65.282997 3014 3848 +71816 -60.367001 53.299999 1128 1325 +71906 -68.417000 58.117001 1889 2380 +71957 -133.516998 68.317001 4125 5275 +70273 -149.984000 61.156000 5146 5423 +74389 -70.254000 43.893000 6057 6107 +72261 -100.918000 29.374000 13500 14298 +71811 -66.250000 50.216999 1762 1705 +22522 34.650002 64.949997 1581 1041 +94998 158.937000 -54.499000 2414 1952 +22217 32.354000 67.168999 1548 1484 +29634 82.949997 54.966999 3437 1606 +54161 125.217000 43.900000 2949 3077 +54374 126.883003 41.799999 7863 8210 +11520 14.447000 50.008000 8132 5874 +53513 107.367000 40.733000 11461 11664 +72768 -106.626000 48.207000 6410 8121 +71915 -83.366997 64.199997 1132 1353 +56964 100.983000 22.767000 14082 14124 +51463 87.617000 43.783000 5211 2570 +8221 -3.580000 40.465000 9042 8487 +33345 30.566999 50.400002 3390 2566 +64650 18.517000 4.400000 2398 2928 +40766 47.150002 34.349998 3495 4452 +34122 39.250000 51.650002 2151 1151 +62414 32.820000 23.964001 3952 1178 +61291 -7.950000 12.533000 5035 6602 +83840 -49.167000 -25.517000 4298 7013 +7481 5.078000 45.726000 4363 3198 +17240 30.549999 37.750000 8265 6433 +17281 40.200001 37.900002 3638 2491 +40990 65.849998 31.500000 1994 1091 +24343 123.400002 66.766998 2105 1497 +83612 -54.667000 -20.467000 4779 6336 +60680 5.433000 22.800000 7932 7453 +10771 11.902000 49.428000 5664 6282 +10618 7.333000 49.700000 6881 6471 +40706 46.283001 38.083000 5085 1137 +40745 59.632999 36.266998 4648 1886 +89611 110.523000 -66.283000 7983 5610 +83378 -47.933000 -15.867000 5423 8243 +94332 139.488000 -20.678000 6253 1469 +89532 39.581001 -69.004997 3165 2918 +94637 121.453000 -30.785000 7040 1015 +62378 31.349001 29.863001 2882 1498 +38341 71.300003 42.849998 8358 5729 +40754 51.317000 35.683000 5782 5846 +94510 146.256000 -26.414000 7669 1584 +60571 -2.250000 31.500000 4453 3245 +72387 -116.028000 36.621000 7761 10338 +42361 78.250000 26.233000 2941 1302 +56985 103.383003 23.382999 13430 13465 +43014 75.400000 19.850000 2950 1876 +40373 46.132999 28.316999 5646 4973 +40430 39.700000 24.550000 8584 8225 +4339 -21.951000 70.484000 3194 3399 +70261 -147.877000 64.816000 9014 10036 +40582 47.971001 29.243000 1527 1052 +42667 77.350000 23.283000 2380 1010 +40437 46.717000 24.933000 1942 1783 +40394 41.683000 27.433000 5875 4421 +82965 -56.099998 -9.867000 1844 4179 +72357 -97.438000 35.181000 3706 4414 +88889 -58.448000 -51.820000 3618 2500 +71845 -90.200000 51.450000 6162 6350 +83827 -54.583000 -25.517000 3491 4674 +71203 -119.400000 49.941000 4830 4874 +72249 -97.299000 32.835000 3363 3729 +72230 -86.782000 33.179000 3180 3151 +83566 -43.966999 -19.617001 1789 2642 +72215 -84.568000 33.527000 7815 8049 +72632 -83.471000 42.697000 8255 8588 +72489 -119.797000 39.568000 1800 3506 +72659 -98.413000 45.456000 8046 8594 +94430 118.537000 -26.614000 4993 1278 +29572 92.606003 56.181000 2888 1661 +74560 -89.338000 40.152000 6311 6476 +74455 -90.582000 41.611000 7338 7725 +72440 -93.401000 37.235000 8038 8428 +72649 -93.565000 44.850000 7453 8115 +72426 -83.822000 39.421000 7760 8079 +72318 -80.414000 37.204000 6973 7204 +72634 -84.719000 44.908000 7315 7955 +83362 -56.099998 -15.650000 2149 2843 +3354 -1.251000 53.006000 2644 1558 +74646 -97.500000 36.600000 3101 3658 +71908 -122.790001 53.900002 4353 4433 +16113 7.613000 44.539000 3611 3596 +98433 121.369003 14.581000 3152 2145 +83779 -46.632999 -23.500000 3276 3522 +83554 -57.667000 -19.000000 1346 1779 +72387 -116.028000 36.621000 7761 10338 +82705 -72.766998 -7.583000 1282 2402 +82026 -55.946999 2.224000 1455 3638 +8190 2.118000 41.384000 1238 1266 +71802 -52.783000 47.517000 1657 1617 +68592 31.129999 -29.601999 1281 1220 +72388 -115.183000 36.050000 2745 3274 diff --git a/RADSOUND/list_of_radsound_stations.txt b/RADSOUND/list_of_radsound_stations.txt new file mode 100644 index 0000000000000000000000000000000000000000..ccbac197c838f5536bda78c72cc2972786c8bb08 --- /dev/null +++ b/RADSOUND/list_of_radsound_stations.txt @@ -0,0 +1,6 @@ +30673 119.733002 53.750000 3052 2445 +42867 79.050000 21.100000 3270 1401 +42971 85.833000 20.250000 1295 1005 +42410 91.583000 26.100000 1741 1454 +33658 25.900000 48.367001 3499 1611 +89664 166.667007 -77.849998 5862 2902 diff --git a/RADSOUND/main_radsound.sh b/RADSOUND/main_radsound.sh new file mode 100755 index 0000000000000000000000000000000000000000..1b43a8c213336af20dc7352ae22406b762ac1f05 --- /dev/null +++ b/RADSOUND/main_radsound.sh @@ -0,0 +1,222 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# Author: Alexander Mahura - 2023-09-15 +# UPDATED: +# 2023-09; 2023-10; +######################################################################## +# +# NEEDED MODULES LOADED ... +# module load LUMI/23.03 +# module load partition/C +# module load PrgEnv-gnu +# module load odb_api/0.18.1-cpeCray-23.03.lua +# module load python-climatedt/3.11.3-cpeCray-23.03.lua +# NOTE: +# these are part of /appl/AS/AUTOSUBMIT_DATA/expid/proj/workflow/lib/LUMI/config.sh +# +######################################################################### +# PRE-SETUP (TESTING FOR LIMITED ODB DATASET & LIMITED GEOGRAPHICAL AREA) +# FOR AIR TEMPERATURE AT 2 METRE +# ANY OTHER METEOROLOGICAL PARAMETER (FROM SYNOP LISTED) CAN BE ADDED + +echo "====================================================================" +echo " DestinE Digital Twin --- OBSALL Apps for RADIOSOUNDING OBS DATA:" +echo " TESTING/ RUNING FOR: " +echo " -- LIMITED RADIOSOUNDING OBS DATA ODB DATASET" +echo " -- LIMITED GEOGRAPHICAL AREA" +echo " -- FOR AIR TEMPERATURE AT 2 METRE & AT 850 HPA LEVEL" +echo "====================================================================" +echo " " + +varMETnum="39" +varMETstr="2t" +lon_val_min="20" #20 +lon_val_max="25" #25 +lat_val_min="60" #60 +lat_val_max="65" #65 +echo "varMETnum varMETstr lon_val_min lon_val_max lat_val_min lat_val_max:" +echo "$varMETnum $varMETstr $lon_val_min $lon_val_max $lat_val_min $lat_val_max" + + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #0 : PRE-CHEKING EXISTANCE OF NECESSARY DIRECTORIES ..." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +pwd +echo " Directory for pre-processing mod data extracted with gsv ..." +inp_dir_gsvmoddata="GSVMODDATA" +echo "$inp_dir_gsvmoddata" +if test -d "$inp_dir_gsvmoddata"; then + echo $inp_dir_gsvmoddata exists +else + echo $inp_dir_gsvmoddata does not exist + echo " Creating directory: $inp_dir_gsvmoddata" + mkdir $inp_dir_gsvmoddata +fi + +echo " Directory for processing observation data ..." +inp_dir_dataobs="DATAOBS" +echo "$inp_dir_dataobs" +if test -d "$inp_dir_dataobs"; then + echo $inp_dir_dataobs exists +else + echo $inp_dir_dataobs does not exist + echo " Creating directory: $inp_dir_dataobs" + mkdir $inp_dir_dataobs +fi + +echo " Directory for processing modeled data ..." +inp_dir_datamod="DATAMOD" +echo "$inp_dir_datamod" +if test -d "$inp_dir_datamod"; then + echo $inp_dir_datamod exists +else + echo $inp_dir_datamod does not exist + echo " Creating directory: $inp_dir_datamod" + mkdir $inp_dir_datamod +fi + +echo " Directory for saving graphical output ..." +inp_dir_graphs="GRAPHS" +echo "$inp_dir_graphs" +if test -d "$inp_dir_graphs"; then + echo $inp_dir_graphs exists +else + echo $inp_dir_graphs does not exist + echo " Creating directory: $inp_dir_graphs" + mkdir $inp_dir_graphs +fi + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 0 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo " " +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #1 : PRE-PROCESSING MOD DATA EXTRACTED WITH GSV_INTERFACE ..." +echo " SCRIPT --- gsv_radsound_mod_data.sh" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +####### +#######./gsv_radsound_mod_data.sh + + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 1 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "====================================================================" +echo " RADSOUND : START LOOP OVER ALL AVAILABLE TIME-SLICES " +echo "====================================================================" + +# List all available splitted files (by time-slices) in dir GSVMODDATA/ +# Define YYYY MM DD HH to start calculations for time-slices + +tmp_file_tstamps="file_with_tstamps.txt" +#ls GSVMODDATA/ > $tmp_file_tstamps +#ls $inp_dir_gsvmoddata > $tmp_file_tstamps +need_files="*_2t_t850_*" +cd $inp_dir_gsvmoddata +ls $need_files > $tmp_file_tstamps +tail -n 1 "$tmp_file_tstamps" | tee >(wc -c | xargs -I {} truncate "$tmp_file_tstamps" -s -{}) +mv $tmp_file_tstamps ../; cd ..; pwd; +nrec_file=$( sed -n '$=' $tmp_file_tstamps) +echo "nrec_file: $nrec_file" + +for (( nnrec=1; nnrec<=$nrec_file; nnrec++ )) +do + b_yy=$( head -n 1 $tmp_file_tstamps | cut -c 1-4 ) + b_mm=$( head -n 1 $tmp_file_tstamps | cut -c 5-6 ) + b_dd=$( head -n 1 $tmp_file_tstamps | cut -c 7-8 ) + b_hh=$( head -n 1 $tmp_file_tstamps | cut -c 9-10 ) + echo "b_yy, b_mm, b_dd, b_hh : $b_yy $b_mm $b_dd $b_hh" + #tail -n +2 "$tmp_file_tstamps" > "$tmp_file_tstamps.tmp" && mv "$tmp_file_tstamps.tmp" "$tmp_file_tstamps" + # see line above (it is added at the bottom of the script) + echo "====================================================================" + echo " START CALCULATIONS FOR TIME-SLICE: $b_yy $b_mm $b_dd $b_hh " + echo "====================================================================" + +echo " " +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #2 : EXTRACTING RADSOUND OBS DATA FROM ODB ..." +echo " SCRIPT --- radsound_obs.sh" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +#./radsound_obs.sh 2020 01 20 00 00 00 39 21 24 61 64 +#./radsound_obs.sh 2020 01 20 15 00 00 $varMETnum $lon_val_min $lon_val_max $lat_val_min $lat_val_max +b_min="00" +b_sec="00" +####### +#######./radsound_obs.sh $b_yy $b_mm $b_dd $b_hh $b_min $b_sec $varMETnum $lon_val_min $lon_val_max $lat_val_min $lat_val_max + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 2 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #3 : EXTRACTING AND INTERPOLATING MOD DATA TO RADIOSOUNDING STATIONS " +echo " GEOGRAPHICAL COORDINATES/POSITIONS" +echo " AND ADDING SUCH INTERPOLATED MOD DATA TO ODB ..." +echo " SCRIPT --- radsound_mod.sh" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +#./radsound_mod.sh 2020012000_2t_r360x180.nc 2020012000 2t +#./radsound_mod.sh 2020012015_2t_r360x180.nc 2020012015 $varMETstr +b_2t="_2t_t850_r360x180.nc" +b_yyyymmddhh=$b_yy$b_mm$b_dd$b_hh +echo "b_2t, b_yyyymmddhh = $b_2t $b_yyyymmddhh" +b_fname_yyyymmddhh_2t=$b_yyyymmddhh$b_2t +echo "b_fname_yyyymmddhh_2t = $b_fname_yyyymmddhh_2t" +####### +./radsound_mod.sh $b_fname_yyyymmddhh_2t $b_yyyymmddhh $varMETstr + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 3 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "====================================================================" +echo " END CALCULATIONS FOR TIME-SLICE: $b_yy $b_mm $b_dd $b_hh " +echo "====================================================================" + +# Removing record with completed timestamp from file ... +# in order to start next time-slice calculations +tail -n +2 "$tmp_file_tstamps" > "$tmp_file_tstamps.tmp" && mv "$tmp_file_tstamps.tmp" "$tmp_file_tstamps" + +echo " " +echo "====================================================================" +echo " Checking if STATS should be run ..." +echo " STATS - PRODUCING QUANTILE RANK HISTOGRAM STATISTICS AND PLOTS" +echo " SCRIPT --- radsound_stats.sh" +echo " STATS is run on a monthly basis - i.e., done on 2nd day of a month" +echo " ... BUT NOW : $b_yy $b_mm $b_dd $b_hh" + +##if [ $b_dd == "02" ] && [ $b_hh == "00" ]; then +if [ $b_dd == "31" ] && [ $b_hh == "00" ]; then + b_mm_start="1" + b_mm_end="12" + echo " ... CALCULATING NOW : $b_yy $b_mm $b_dd $b_hh" + echo "====================================================================" + echo " b_yy, b_mm, b_dd, b_hh, b_mm_start, b_mm_end : $b_yy $b_mm $b_dd $b_hh $b_mm_start $b_mm_end" + echo " varMETnum, b_yy, b_yy, b_mm_start, b_mm_end : $varMETnum, $b_yy, $b_yy, $b_mm_start, $b_mm_end" +####### +#./radsound_stats.sh $varMETnum $b_yy $b_yy $b_mm_start $b_mm_end +#./radsound_stats.sh $b_yy $b_yy $b_mm_start $b_mm_end +b_yy="2010" +./radsound_stats.sh $b_yy $b_yy $b_mm_start $b_mm_end +fi + +echo " Checking size of file with time stamps: $tmp_file_tstamps ..." + +file_actualsize=$(wc -c <"$tmp_file_tstamps") +if [ $file_actualsize == "0" ]; then + echo size is equal to zero bytes + echo "====================================================================" + echo " RADSOUND : END LOOP OVER ALL AVAILABLE TIME-SLICES " + echo "====================================================================" + rm $tmp_file_tstamps + exit 1 +else + echo size is over zero bytes +fi + +done + +exit 1 diff --git a/RADSOUND/radsound_mod.sh b/RADSOUND/radsound_mod.sh new file mode 100755 index 0000000000000000000000000000000000000000..22e9ac61e0349cf9702c458cdd6195b5cc39b685 --- /dev/null +++ b/RADSOUND/radsound_mod.sh @@ -0,0 +1,213 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# DestinE Climate DT Project +# +# Author: +# ORIGINAL: +# Alexander Mahura: 2023-09 +# UPDATED/ADDED: +# 2023-09-25 +# Converting/adding extracted and interpolated radsound mod data into odb +# 2023-09-29 +# Creating unstructured grid based on number of radiosounding stations with obs +# Calculating weights for unstructured grid +######################################################################## +# +# STEP 3 - EXTRACTING AND INTERPOLATING MOD DATA TO RADIOSOUNDING STATIONS +# GEOGRAPHICAL COORDINATES/POSITIONS +# AND ADDING SUCH INTERPOLATED MOD DATA TO ODB +# +# Reading extracted over global domain gsv modelled data +# for selected time-slots (00 & 12 UTCs) for "variable" +# as alerady calculated difference between air temperature at 2 m and 850 hPa +# & interpolating to geographical positions of radiosounding stations +# where observations are available for selected time-slot +# --------------------------------------------------------------------- +# Execution (fx.): +# ./radsound_mod_read.run 2020012000_2t_r360x180.nc 2020012000 2t +# ./radsound_mod_read.run 2020012015_2t_r360x180.nc 2020012015 $varMETstr + +echo " Input path to location of files with extracted (using gsv_interface) modelled data:" +#inp_path_mod="/projappl/project_465000454/ama/DEOBS/RADSOUND/GSVMODDATA/" +inp_path_mod="GSVMODDATA/" +echo " $inp_path_mod" + +echo " Input path to location of files with extracted (using sql) observed data:" +#inp_path_obs="/projappl/project_465000454/ama/DEOBS/RADSOUND/DATAOBS/" +inp_path_obs="DATAOBS/" +echo " $inp_path_obs" + +echo " Output path to location of modelled data interpolated to coordinates of radiosounding stations:" +#out_path_mod="/projappl/project_465000454/ama/DEOBS/RADSOUND/DATAMOD/" +out_path_mod="DATAMOD/" +echo " $out_path_mod" + +# Input name of nc-file with modelled data extracted with gsv_interface +ncinfile=$1 +ncinfile=$inp_path_mod$ncinfile + +# Input time for making filename +timefilename=$2 +# Input name for meteo.parameter etracted with gsv +varMETstr=$3 + +## (!) Commented, because no need in obs.data at tme-slices ... +##input radsound file: fx. obs_radsound_2020012102.dat +##var1a="obs_radsound_"; var1b=".dat" +##infile=$var1a$timefilename$var1b +##echo " Name of input file with radsound obs data: $infile" +##infile=$inp_path_obs$infile + +#######cp list_of_all_radsound_stations.txt list_of_radsound_stations.txt +infile="list_of_radsound_stations.txt" + +echo " Name of file with fixed geographical coordinates of radiosounding staions:" +echo " $infile" + +# Name of output file with interpolated modelled data to positions of radiosounding stations +vardef="_" +outfile="mod_radsound$vardef$timefilename$vardef$varMETstr.dat" + +echo " Name of output file for interpolated mod data: $outfile" + +############################################################## +# Creating unstructured grid based on number of radiosounding stations + +echo " Path and Name of input file with coordinates of radiosounding stations:" +echo " $infile" + +echo " Counting number of records in file with geographical coordinates of radiosounding stations ..." +numrec=0 +while read +do + ((numrec=$numrec+1)) +done < $infile +echo " Number of records (numrec) in file: $numrec" + +# example: obs_synop_2020012001.dat +# 2020 1 20 1 0 0 24.956800 60.326698 39 3.500000 100968 +#t while read ayear amonth aday ahour aminute asecond alongitude alatitude avariable avalue astation +#t do +#t echo "$ayear, $amonth, $aday, $ahour, $aminute, $asecond, $alongitude, $alatitude, $avariable, $avalue, $astation" +#t done < $inpfile + +echo " Creating unstructured grid for 2D geographical locations of radiosounding stations ..." + +unstr_grid_file="grid_unstructured_radsound.txt" +echo " Name of file with unstructured grid: $unstr_grid_file" + +echo "gridtype = unstructured" > $unstr_grid_file # adding grid type +echo "gridsize = $numrec" >> $unstr_grid_file # adding number of grids +echo "xvals =" >> $unstr_grid_file +while read astation alongitude alatitude aUTC00obscounts aUTC12obscounts +do +#t echo "astation alongitude alatiude: $astation $alongitude $alatitude" +echo "$alongitude" >> $unstr_grid_file # adding longitude values (along x) +done < $infile +echo "yvals =" >> $unstr_grid_file +while read astation alongitude alatitude aUTC00obscounts aUTC12obscounts +do +#t echo "astation alongitude alatiude: $astation $alongitude $alatitude" +echo "$alatitude" >> $unstr_grid_file # adding latitude values (along y) +done < $infile + +############################################################## +# Calculating weights for unstructured grid ... + +echo " CDO calculating weights for unstructured grid ... " +#cdo genbil,$unstr_grid_file -selname,2t $ncinfile weights_radsound.nc +cdo genbil,$unstr_grid_file -selname,$varMETstr $ncinfile weights_radsound.nc +cdo remap,$unstr_grid_file,weights_radsound.nc $ncinfile tmpfile_radsound_new.nc + +echo " IMPORTNAT NOTE (!!!): " +echo " in radiosounding observations for variable air temperature the code is 2" +variable="2" + +#while IFS=' ' read -r f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 +while IFS=' ' read -r f1 f2 f3 f4 f5 +do + station=$f1 + longitude=$f2 + latitude=$f3 + tUTC00=$((f4)) + tUTC12=$((f5)) + + #year=$((f1)) # converting integer $((f#)) + #month=$((f2)) + #day=$((f3)) + #hour=$((f4)) + #minute=$((f5)) + #second=$((f6)) + #longitude=$f7 + #latitude=$f8 + #variable=$((f9)) + #value=$f10 + #station=$f11 + + longitude=`echo $longitude` # excluding possible empty spaces in names + latitude=`echo $latitude` + + namevar="lon="${longitude}_"lat="${latitude} + #t echo "namevar: $namevar" + echo " CDO interpolating mod data to geographical coordinates of radiosounding stations ..." + echo " $station $longitude $latitude $variable" + # without weights - original ... + #cdo -s -outputtab,date,time,lon,lat,value -remapnn,"lon=${longitude}_lat=${latitude}" -selvar,${varMETstr} $ncinfile > tmpout_${station}_${variable}.txt + # with weight - modified + cdo -s -outputtab,date,time,lon,lat,value -remapnn,"lon=${longitude}_lat=${latitude}" -selvar,${varMETstr} tmpfile_radsound_new.nc > tmpout_${station}_${variable}.txt + cat tmpout_${station}_${variable}.txt | sed '1,1d' > tmpoutrev_${station}_${variable}.txt # deliting header line + sed -i s/$/" "${variable}/ tmpoutrev_${station}_${variable}.txt # adding variable ID + sed -i s/$/" "${station}/ tmpoutrev_${station}_${variable}.txt # adding station ID + cat tmpoutrev_${station}_${variable}.txt >> $outfile # adding record to summary file + rm -rf tmpout_*.txt tmpoutrev_*.txt # cleaning tmp-files + +done < $infile + +# Fixing time-slice structure from cdo processesed (YYYY-MM-DD HH-MM-SS) to sql-odb (YYYY MM DD HH MM SS) +sed -i 's/-/ /' $outfile +sed -i 's/-/ /' $outfile +sed -i 's/:/ /' $outfile +sed -i 's/:/ /' $outfile + +# Removing extra temporary files ... +rm -rf tmpfile_radsound_new.nc weights_radsound.nc +rm -rf grid_unstructured_radsound.txt + + +############################################################################### +# Converting radsound mod data file into odb (with import command) +# Opt1: saving/adding records to ODB (for mod data at radsound stations locations) +# Opt2: saving adding records to ODB (for odb already having obs radsound data) +# +# modify-update (& unify) according to different types of obs data: +# SYNOP (only at ground), +# RADSOUND (at 3D - lat/lon/alt) +# SATELLITE (at 3D - area within lat/lon/alt) + +echo " Converting/adding/saving records with radsound mod data into ODB ..." +# .............................................................................. +# RADSOUND data ................................................................ +# Defining header for adding mod radsound data to odb file ... +header_mod_radsound_data="year@descr:integer month@descr:integer day@descr:integer hour@descr:integer minute@descr:integer second@descr:integer longitude@descr:real latitude@descr:real value@descr:real variable@descr:integer station@descr:string" + +# Inserting header line as 1st line into mod_radsound_YYYYMMDDHH.dat file ... +sed -i "1i ${header_mod_radsound_data}" $outfile + +# Adding data to odb file ... +mod_radsound_odb="/scratch/project_465000454/ama/open_data_ODB/radsound_mod_data_2010-2021.odb" # Opt1 - new +##mod_radsound_odb="/scratch/project_465000454/ama/open_data_ODB/radsound_open_data_2010-2021.odb" # Opt2 - original +echo " Path and Name of odb-file with saved records:" +echo " $mod_radsound_odb" + +odb import -d ' ' $outfile tmp_mod_radsound_odb.odb +cat tmp_mod_radsound_odb.odb >> $mod_radsound_odb +rm -rf tmp_mod_radsound_odb.odb + +# Deleting header line as 1st line in mod_radsound_YYYYMMDDHH.dat file ... +sed -i -e "1d" $outfile + +# Moving file (with interpolated modelled data) to output dir +mv $outfile $out_path_mod + +exit diff --git a/RADSOUND/radsound_obs.sh b/RADSOUND/radsound_obs.sh new file mode 100755 index 0000000000000000000000000000000000000000..8c91bfbc3ec93657ee9cc97802dcdf49623329ef --- /dev/null +++ b/RADSOUND/radsound_obs.sh @@ -0,0 +1,171 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# DestinE Climate DT Project +# +# Author: +# ORIGINAL: +# Jouni Räisänen: 2022-11 +# UPDATED +# Alexander Mahura: 2023-04-20, 2023-05, 2023-08 +######################################################################## +# +# STEP 2 - EXTRACTING RADSOUND OBS DATA FROM ODB +# +# Prototype for finding radiosounding observations from openly available +# radiosounding (TEMP) data for a given hour +# using files converted to ODB format +# +# --------------------------------------------------------------------- +# old - Execution (e.g.): ./odb-filename 20190728 11 39 20 25 60 65 +# new - Execution (e.g.): ./odb-filename 2019 07 28 11 00 00 39 20 25 60 65 +# --------------------------------------------------------------------- +# Arguments: +# --------------------------------------------------------------------- +# 1. Time in format: year, month, day +year=$1 +month=$2 +day=$3 +#t echo " year, month, day = $year $month $day" +yyyymmdd=$1$2$3 + +# 2. Time in format: hour, minute, second +hour=$4 +minute=$5 +second=$6 +#t echo " hour, minute, second = $hour $minute $second" +hhmmss=$4$5$6 + +echo " year, month, day, hour, minute, second = $year $month $day $hour $minute $second" + +# 3. Meteorological variable code (see list below) +variable=$7 + +echo " Meteorological variable code = $variable" + +# List of included meteorological variables: +# 39 '2m temperature' +# ### - add here 850 hPa + + +# 4.-5. Minimum and maximum longitude +# If minimum > maximum, search for stations outside the interval maximum ... -minimum +lon_min="${8:--180}" +lon_max="${9:-180}" + +# 6.-7. Minimum and maximum latitude +lat_min="${10:--90}" +lat_max="${11:-90}" + +# Note: if you do not give values for the last three arguments, +# all stations will be included in the output, +# regardless of their coordinates. +# +################################################################ +# +# Increase lon_max by 360 degrees if it is smaller than lon_min +# +if [ "$(echo "$lon_max < $lon_min" | bc)" = 1 ]; then + lon_max=$((${lon_max}+360)) +fi + +# --- Year-month-day part of "date" in sql access / old +#t echo " year, monh, day (yyyymmdd=):" $yyyymmdd +# --- Hour-minute-second-part of "time" in sql access / old +#t echo " hour, minute, second (hhmmss=):" $hhmmss + +################################################################## +# +# ----- File names +# +################################################################## +# On Lumi (ODB as a single file) + +echo " Path to dir where ODB file is placed:" +in_dir=/scratch/project_465000454/ama/open_data_ODB +echo " $in_dir" + +echo " Name of ODB file containing all radiosounding obs data:" +in_file=radsound_open_data_2010-2021.odb +echo " $in_file" + +echo " Name of file for extracted radiosounding obs data at geographical coordinates of radiosounding stations:" +out_file=obs_radsound_${yyyymmdd}${hour} +echo " $out_file" +rm -f ${out_file} + +################################################################## +# +# Add odb_api to $PATH +# On Puhti +# PATH="${PATH}":/projappl/project_2001011/odb_api/bin +# +# On Lumi +#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# +################################################################## +# +# Input file name(s): + +in_file=${in_dir}/${in_file} +# echo $in_file + if test -f "$in_file"; then + echo $in_file exists +# +# This would show the file headers +# +# odb header ${in_file} +# +# This would retrieve all the contents of the input file to the output file +# +# odb sql select \* -i ${in_file} -o ${out_file} +# +# This retrieves data from the desired date, time and variable, for all stations +# within a given area. The treatment of the longitude bounds is slightly +# complicated because there are two possibilities. +# +# The 'eval' structure is required because odb_api does not directly recognize the bash variables. + +echo " Extracting (with odb sql) radsound obs data at geographical coordinates of radiosounding stations ..." +# --------------------------------------- +# On Puhti +# --------------------------------------- +# echo " date, time: $date $time" +# +# odb_command="odb sql 'select date,time,longitude,latitude,variable,value where time=${hhmmss} and variable=${variable} and ((longitude>=${lon_min} and longitude<=${lon_max}) or (longitude+360>=${lon_min} and longitude+360<=${lon_max})) and latitude>=${lat_min} and latitude<=${lat_max}' -i ${in_file} -o ${out_file}" +# eval ${odb_command} +# + +# --------------------------------------- +# On Lumi +# --------------------------------------- +# Note, in overall combined .odb-file "date" became as year,month,day & "time" became as hour,minute,second +# +echo " year, month, day, hour, minute, second : $year $month $day $hour $minute $second" + + odb_command="odb sql 'select year,month,day,hour,minute,second,longitude,latitude,variable,value,station where year=${year} and month=${month} and day=${day} and hour=${hour} and minute=${minute} and second=${second} and variable=${variable} and ((longitude>=${lon_min} and longitude<=${lon_max}) or (longitude+360>=${lon_min} and longitude+360<=${lon_max})) and latitude>=${lat_min} and latitude<=${lat_max}' -i ${in_file} -o ${out_file}" + eval ${odb_command} + +# Deleting header line (example below) in a newly created output file ... +# year@hdr month@hdr day@hdr hour@hdr minute@hdr second@hdr longitude@hdr latitude@hdr variable@body value@body station@hdr +# 2020 1 20 0 0 0 24.956800 60.326698 39 3.500000 100968 + +echo " Cleaning and moving extracted from odb radsound obs data into separate folder DATAOBS ..." +cat ${out_file} | sed '1,1d' > ${out_file}.dat +rm -rf ${out_file} +pwd +mv ${out_file}.dat DATAOBS/ + +# Some more 'odb sql' examples: +# +# https://confluence.ecmwf.int/display/ODBAPI/Examples +# + else + echo $in_file does not exist + fi + +exit diff --git a/RADSOUND/radsound_stats.sh b/RADSOUND/radsound_stats.sh new file mode 100755 index 0000000000000000000000000000000000000000..38450aef63955e5655631c9e0daebcb3cec9afd6 --- /dev/null +++ b/RADSOUND/radsound_stats.sh @@ -0,0 +1,58 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# Author: Alexander Mahura : 2023-08-22 +######################################################################## + +# PRE-SETUP (TESTING FOR LIMITED ODB DATASET & LIMITED GEOGRAPHICAL AREA) +# FOR AIR TEMPERATURE AT 2 METRE +# ANY OTHER METEOROLOGICAL PARAMETER (FROM SYNOP LISTED) CAN BE ADDED +# +# Producing quantile rank histogram statistics and plots +# on example of 2 m temperature (using limited open odb datasets) +# +# Met.variable, start & end year, start & end month +varMETnum=$1 +b_yy_start=$2 +b_yy_end=$3 +b_mm_start=$4 +b_mm_end=$5 + +echo " varMETnum, b_yy_start, b_yy_end, b_mm_start, b_mm_end : $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end" + +echo "*****************************************************************" +echo " STATS - PRODUCING QUANTILE RANK HISTOGRAM STATISTICS AND PLOTS" +echo " ON EXAMPLE OF METEOROLOGICAL VARIABLE: $varMETnum" +echo " AT RADIOSOUNDING STATIONS" +echo "*****************************************************************" + +cd STATRS +pwd +#t exit + +echo "*****************************************************************" +echo " STEP STATS-1 - PRODUCE RANK HISTOGRAMS FOR ALL RADIOSOUNDING STATIONS" +echo "*****************************************************************" + +#./produce_rank_histograms_all_stations.sh 39 2020 2020 1 12 +./produce_rank_histograms_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end + +echo "*****************************************************************" +echo " STEP STATS-2 - PRODUCE STANDARD PLOTS FOR EACH RADIOSOUNDING STATION" +echo "*****************************************************************" + +#./produce_standard_plots_all_stations.sh 39 2020 2020 1 12 +./produce_standard_plots_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end + +echo "*****************************************************************" +echo " STEP STATS-3 - PRODUCE SUMMARY RANK HISTOGRAMS FOR ALL STATIONS" +echo "*****************************************************************" + +#./summary_rank_histograms_all_stations.sh 39 2020 2020 1 12 +./summary_rank_histograms_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end + +echo "*****************************************************************" +echo " ALL STATS-# STEP COMPLETED" +echo "*****************************************************************" + +exit diff --git a/README.md b/README.md index c128ce7f7c891aa9d681776eed00305161f7a499..371c09201f8fd617008f07ecfa78b0f9dd1bf823 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,247 @@ -# OBS Apps -This repository contains the scripts related to the Observation Apps (components for ground-based (SYNOP), radiosounding (TEMP), and satellite (AMSU-A) obs data) of the Climate Adaptation Digital Twin (Climate DT). The development is done in the frameworks of the Destination Earth initiative. +# OBSALL Apps +This repository contains the scripts related to the Observation Apps - components for ground-based/surface (SYNOP), radiosounding (TEMP), and satellite (AMSU-A) obs data) of the Climate Adaptation Digital Twin (Climate DT). The development is done in the frameworks of the Destination Earth initiative. # Description -The OBS application is currently being developed as a set of scripts (written in bash, fortran, python): -(1) for pre-processing gsv_extracted modeled data (over latitude-longitude domain) for selected meteorological variables into hourly time-slices (00...23 UTCs); -(2) for extracting (using sql with odp_api software) selected meteorological variables at hourly time-slices for observations from: (i) ground-based synoptical stations at fixed at the surface geographical locations/points (i.e., 2D: latitude, longitude); (ii) vertical radiosounding of the atmosphere at changing locations/points (3D: latitude, longitude, single pressure level), and (iii) satellite at changing locations/ points (3D: latitude, longitude, multiple pressure levels); -(3) for extracting and interpolating (using cdo software - for synop; & expecting/using polytope - for radiosounding and satellite) modeled data for the same time-slices for same selected meteorological variables into corresponding locations/points of (synop, radiosounding, satellite) observations, and adding (using sql) such data to ODB; -(4) for calculating/ producing relevant statistics such as quantile rank histogram statistics and plots (t-test and others to be added)This is a readme ini file for Observationa applciation +The OBSALL application is currently being developed as a set of scripts (written in `bash`, `fortran`, `python`): +1. for pre-processing (with operators of `cdo`, Climate Data Operators, software) gsv_extracted modeled data (over latitude-longitude domain) for selected meteorological variables into hourly time-slices (00...23 UTCs); +2. for extracting (using `sql` with `odp_api` software) selected meteorological variables at hourly time-slices for observations from: (i) ground-based synoptical stations at fixed at the surface geographical locations/points (i.e., 2D: latitude, longitude); (ii) vertical radiosounding of the atmosphere at changing locations/points (3D: latitude, longitude, single pressure level), and (iii) satellite at changing locations/ points (3D: latitude, longitude, multiple pressure levels); +3. for extracting and interpolating (using `cdo` operators - for synop; & expecting/using `polytope` - for radiosounding and satellite) modeled data for the same time-slices for same selected meteorological variables into corresponding locations/points of (synop, radiosounding, satellite) observations, and adding (using `import` with `odb_api`) such data to ODB; +3. for calculating/ producing relevant statistics such as quantile rank histogram statistics and plots (t-test and others to be added) -# Disclaimers -The OBS package is in a developement phase by the University of Helsinki team, led by Heikki Järvinen (heikki.j.jarvinen@helsinki.fi). The team includes also Jouni Räisänen (jouni.raisanen@helsinki.fi), Lauri Tuppi (lauri.tuppi@helsinki.fi), Madeleine Ekblom (madeleine.ekblom@helsinki.fi), and Alexander Mahura (alexander.mahura@helsinki.fi). Some features are still not implemented and you may expect bugs. For feedback and issue reporting, feel free to open an issue in: https://earth.bsc.es/gitlab/digital-twins/de_340/obs/-/issues +# Part SYNOP (ground-based/surface observations) +1. `main_synop.sh` - main bash-script to run Apps for observational data for synoptical stations. +2. `gsv_mod_data.sh` - bash-script to run pre-processing (using `cdo` operators) of modelled data extracted with gsv interface over global domain for selected meteorological variables (measured at synoptical station) into hourly time-slices and saving (temporary) to separate hourly nc-files. +3. `synop_obs.sh` - bash-script to run reading and extraction (using `sql` with `odb_api` software) of selected meteorological variables at hourly time-slices for observations from ground-based synoptical stations at fixed at the surface geographical (latitude, longitude) locations over selected geographical domain (or over the globe) and saving (temporary) to dat-file. Note, that list of available meteorological variables in open multi-year data ODB includes: 91 - total amount of clouds; 108 - sea level pressure; 80 - 1-hour precipitation; 58 - relative humidity; 999 - 10-minute precipitation intensity; 71 - snow depth; 39 - 2m temperature; 40 - dew point temperature; 62 - visibility; 111 - wind direction; 261 - maximum wind gust in last 10 minutes; 221 -surface wind speed. +4. `synop_mod.sh` - bash-script to run reading, extraction and interpolation (using `cdo` operators) of modelled data for selected meteorological variables at the same hourly time-slices to geographical (latitude, longitude) locations of synoptical stations, and saving (temporary) to dat-file, and adding (using `import` with `odb_api`) such data to ODB. Interpolation is preceeded by constructing unstructured grid based on locations of synop stations and calculating weights for such grid. +5. `graph_mod_obs.py` - python-script to run an internal self-control in calculating differences between observed and modelled data for locations of synoptical stations +6. `synop_stats.sh` - bash-script to run (addtional `bash`, `fortran`, `python` scripts) calculating/producing: +- (i) rank histograms for all synop stations for 00, 06, 12 and 18 UTCs (with `produce_rank_histograms_all_stations.sh`, using `rank_histograms_one_station.f95` and `sql/import` with `odb_api`), +- (ii) standard plots for each synop station (with `produce_standard_plots_all_stations.sh` using `plot_quantiles_rankhist.py` and `sql` with `oadb_api`; see an example of such plot in **Figure 1a**), and +- (iii) summary rank histograms for all stations (`summary_rank_histograms_all_stations.sh` using `rank_histogram_summary_statistics.f95`, `plot_p_values_map.py`, `plot_rank_hist_sum_all_stations.py` and `sql` with `odb_api`; see examples of such plots in **Figure 1b** and **Figure 1c**). +- Note, needed input includes: list of synop stations with geographical coordinates, modelled data at stations coordinates, pre-computed quantiles as a function of time of year and rank histogram bootstrap mean square deviation (MSD) values. + - ![Semantic description of image](FIGSEXAMPLES/destine_obs_synop_figs.jpg "Plots for 2 m air temperature ")* Plots for 2 metre air temperature (T2m) for period of 1 Jan - 31 Dec 2020: **Figure 1a** - Quantiles/ time series data (00, 06, 12, and 18 UTCs) and rank histogram (combination of 00, 06, 12, and 18 UTCs) for synop station 126736; **Figure 1b** - P-values for T2m quantiles; **Figure 1c** - Rank histogram summary statistics: normalized p-value frequencies and normalized quantile frequencies* -# Supports +# Part TEMP (radiosounding observations) +1. `main_radsound.sh` - main bash-script to run Apps for observational data for radiosounding. +2. `gsv_radsound_mod_data.sh` - bash-script to run pre-processing (using `cdo` operators) of modelled data extracted with gsv interface over global domain for air temperature at 2 metre (T2) and at 850 hPa pressure level (T850) into time-slices at 00 and 12 UTCs, calculating difference (T2-T850) between these two temperatures, and saving (temporary) to separate nc-files at 00 and 12 UTC time-slices. +3. `radsound_obs.sh` - bash-script to run reading and extraction (using `sql` with `odb_api` software) of above-mentioned air temperatures at 12 hourly time-slices for radiosounding observations at surface and at altitude at geographical (latitude, longitude, pressure level) locations of radiosounding stations over selected geographical domain (or over the globe), calculatig difference between two temperatures, and saving (temporary) to dat-file. +4. `radsound_mod.sh` - bash-script to run reading, extraction and interpolation (using polytope) of modelled data for air temperature at surface and at altitude at the same time-slices to geographical (latitude, longitude, pressure level) locations of radiosounding stations, calculatig difference between two temperatures, and saving (temporary) to dat-file, and adding (using `import` with `odb_api`) such data to ODB. +5. `radsound_stats.sh` - bash-script to run (addtional `bash`, `fortran`, `python` scripts) calculating/producing: +- (i) rank histograms for all radiosounding stations for 00 and 12 UTCs (with `produce_rank_histograms_all_stations.sh`, using `rank_histograms_one_station.f95` and `sql/import` with `odb_api`), +- (ii) standard plots for each radiosounding station (with `produce_standard_plots_all_stations.sh` using `plot_quantiles_rankhist.py` and `sql` with `oadb_api`; see an example of such plot in **Figure 2a**), and +- (iii) summary rank histograms for all radiosunding stations (`summary_rank_histograms_all_stations.sh` using `rank_histogram_summary_statistics.f95`, `plot_p_values_map.py`, `plot_rank_hist_sum_all_stations.py` and `sql` with `odb_api`; see examples of such plots in **Figure 2b** and **Figure 2c**). +- Note, needed input includes: list of radiosounding stations with geographical coordinates, modelled data at stations coordinates at 2 metre height and at 850 hPha pressure level, pre-computed quantiles as a function of time of year and rank histogram bootstrap mean square deviation (MSD) values. +- ![Semantic description of image](FIGSEXAMPLES/destine_obs_radsound_figs.jpg "Plots for difference between air temperature at 2 m and 850 hPa pressure level: T2-T850")* Plots for difference in air temperatures at 2 metre and at 850 Ha pressure level (T2-T850) for period of 1 Jan - 31 Dec 2010: **Figure 2a** - Quantiles/ time series data and rank histogram (00 and 12 UTCs) for radiosounding station 51431; **Figure 2b** - P-values for T2-T850 quantiles; **Figure 2c** - Rank histogram summary statistics for 00 and 12 UTCs: normalized p-value frequencies and normalized quantile frequencies* + +# Part AMSU-A (satellite observations) - in development +1. `main_amsua.sh` - main bash-script to run Apps for observational data for satellite-based AMSU-A measurements & `set_env.sh`- bash-script which defines variables and functions, and required modules to be loaded and used in the workflow. +2. `gsv_mod_data.sh` - bash-script to run pre-processing (using `cdo` operators; in later versions - `polytope`) of 2D on surface (`sfc`) and 3D modelled data on pressure levels (`pl`) extracted with gsv interface over global domain. The `Radiance Simulator` (using `convert_pl2ml.py`) interpolates modelled data from `pl`-levels to “model”-levels (note, workflow will initially be demonstrated on a single selected location i.e., around a point of 70.0N, 45.0E in the Barents Sea; and later geographically expanded using `polytope`) and preprocessed data saved in (temporary) hourly GRIB-files. Required 3D variables are: `t` - temperature (130), `q` - specific humidity (133), `clwc` - specific liquid water content (246); and 2D variables are: `skt` - skin temperature (235), `ci` - sea ice fraction (31), `lsm` - land-sea mask (172),`sp` - surface pressure (134), `10u` and `10v` - U and V components of wind speed at 10 m (165 and 166), `2t` - air temperature at 2 m (167), `2d` - dewpoint temperature at 2 m (168), and `z` - geopotential (for orography) (129). +3. `amsua_obs.sh` - bash-script to prepare a metadata file for computation of AMSU-A model counterparts from preprocessed hourly GSV data. It runs depending on the type of climate simulation: (i) reality following data assimilation-like, and (ii) free running climate simulation. For (i) the one-to-one model counterparts can be produced, so actual metadata from AMSU-A observations in the vicinity of the selected location are extracted from ODB. This is done with `filter_data_v3.py` For (ii) the model output can be compared to observations only in climatological terms, so metadata is written only for the centre point of the selected area. +4. `amsua_mod.sh` - bash-script to run the `Radiance Simulator` tool with pre-described control configuration parameters and to produce a model counterpart (file in netcdf-format), which (using `convert_netcdf2txt.py`) is converted to (temporary) dat-file and added (using `import` with `odb_api`) to ODB. +5. The monitoring for AMSU-A is under development. Initially it was done on Puhti HPC of CSC, and then, to be migrated, tested, and implemented into workflow on Lumi HPC of CSC. The required polytope tool is expected to be released in December 2023. For AMSU-A monitoring, `amsua_stats.sh` bash-script is used to run (additional `bash`, `fortran`, `python` scripts) calculating/producing similar statistics as for TEMP, using daily mean values inside the selected area. + +# Installation +**Git**: to copy the "obsall" repository to your local directory via Git using: +``` +cd your_local_directory +git clone https://earth.bsc.es/gitlab/digital-twins/de_340/obsall.git +cd obsall +git checkout newbranch +``` +This will clone the "newbranch"-branch of the "obsall" repository. + +**Modules**: to set environment in order to run OBSALL application in LUMI: +``` +# Load modules for OBSALL Apps +module load LUMI/23.03 +module load partition/C +module load PrgEnv-gnu +module load ecCodes/2.32.0-cpeCray-23.03.lua +module load odb_api/0.18.1-cpeCray-23.03.lua +module load python-climatedt/3.11.3-cpeCray-23.03.lua +``` +**Data request (sfc):** file `request.yml` is needed for extracting selected modeled data +(example is for SYNOP Part, where only data at `sfc` is needed) +
+ +``` +EXPERIMENT: + DATELIST: 20200201 #Startdate + MEMBERS: fc0 + CHUNKSIZEUNIT: day #unit length of the chunk (hour, day, month, year) + CHUNKSIZE: 1 #number of unit lengths per chunk + NUMCHUNKS: 1 #chunkc to be run + CALENDAR: standard #including/ excluding leap years + +GSVREQUEST: #raw data + domain: g + class: rd + type: fc + expver: hz9n + stream: lwda + anoffset: 9 + levtype: "sfc" + date: "%CHUNK_START_DATE%/to/%CHUNK_SECOND_TO_LAST_DATE%" + time: 0000/to/2300/by/0100 # every hour + step: ["0"] # Irrelevant. step will be recalculated in workflow + param: #["2t"] + grid: 1/1 + method: nn + +APP: + other: "empty" + +OPAREQUEST: + NSTATS: "1" # number of variables to request + 1: + variable: "2t" + stat: "hourly" + stat_freq: "hourly" + output_freq: "daily" + time_step: 60 # in minutes, 60*timestep length in hours TODO: do that automatically + save: True + checkpoint: True + checkpoint_filepath: "/scratch/project_465000454/tmp/%DEFAULT.EXPID%/" + out_filepath: "/scratch/project_465000454/tmp/%DEFAULT.EXPID%/" + +``` +
+ +Note, for TEMP (radiosounding) and AMSU-A (satellite) Parts - modeled data at both `sfc` and `pl` levels are simulteniously required (such joint request is in development by BSC) + +**Data request (sfc+pl):** file `request.yml` is needed for extracting selected modeled data +(example below is for `2t` and `t` (it is in development by BSC) where modeled data at both `sfc` and `pl` levels are needed; note all other required modeled data are to be added) +
+ +``` +APP: + project_details: "Test for the app-workflow streaming" + +DATA: + 1: + GSVREQUEST + param: "2t" + domain: g + class: rd + type: fc + expver: hz9n + stream: lwda + anoffset: 9 + levtype: "sfc" + date: split_day #will be specified in the dn template + time: 0000/to/2300/by/0100 + step: ["0"] # Irrelevant. Step will be recalculated in workflow + grid: 1/1 + method: nn + OPAREQUEST: + stat: "raw" + stat_freq: "hourly" + output_freq: "daily" + time_step: 60 # in minutes, 60*timestep length in hours TODO: do that automatically + save: True + checkpoint: True + checkpoint_filepath: "%APP.OUTPATH% + save_filepath: "%APP.OUTPATH%" + 2: + GSVREQUEST: + param: "t" + domain: g + class: rd + type: fc + expver: hz9n + stream: lwda + anoffset: 9 + levtype: "pl" + levelist: [850] # Set of levels as int + date: split_day #will be specified in the dn template + time: 0000/to/2300/by/1200 + step: ["0"] # Irrelevant. Step will be recalculated in workflow + grid: 1/1 + method: nn + OPAREQUEST: + stat: "raw" + stat_freq: "hourly" + output_freq: "daily" + time_step: 720 # in minutes, 60*timestep length in hours TODO: do that automatically + save: True + checkpoint: True + checkpoint_filepath: "%APP.OUTPATH% + save_filepath: "%APP.OUTPATH%" + +``` +
+ +**How to run:** runscript `run_obsall.py` is used to execute OBSALL application: +
+ +``` +#!/scratch/project_465000454/devaraju/SW/LUMI-23.03/C/python-climatedt/bin/python +# OBSALL Apps (3 parts: SYNOP, TEMP, AMSU-A observations) + +# Import required libraries +import sys +import subprocess + +# IN IMPLEMENTATION +# --- Processing ground-based observations (SYNOP) +print('**********************************************************') +print('DestinE Climate Digital Twin - OBSALL Apps') +print('--- Processing ground-based observations (SYNOP)') +print('**********************************************************') +command_synop_run = "cd SYNOP; pwd; ./main_synop.sh; exit 0" +subprocess.run(command_synop_run, shell=True, check=True, executable="/bin/bash") + +# IN IMPLEMENTATION +# --- Processing radiosounding observations (TEMP) +print('**********************************************************') +print('DestinE Climate Digital Twin - OBSALL Apps') +print('--- Processing radiosounding-based observations (TEMP)') +print('**********************************************************') +command_radsound_run = "cd RADSOUND; pwd; ./main_radsound.sh; exit 0" +subprocess.run(command_radsound_run, shell=True, check=True, executable="/bin/bash") + +# IN DEVELOPMENT +# Processing satellite observations (AMSU-A) +#command_satellite_run = "cd SATELLITE; pwd; ./main_amsua.sh; exit 0" +#subprocess.run(command_satellite_run, shell=True, check=True, executable="/bin/bash") + +sys.exit(0) + +``` +
+ +**Video-recording of installaion and demo-run:** ... (coming soon) ... + +
+ +``` +... + +``` +
+ +# Disclaimer +The OBSALL package is in a developement phase by the University of Helsinki team, led by Heikki Järvinen (heikki.j.jarvinen@helsinki.fi). The team includes also Jouni Räisänen (jouni.raisanen@helsinki.fi), Lauri Tuppi (lauri.tuppi@helsinki.fi), Madeleine Ekblom (madeleine.ekblom@helsinki.fi), and Alexander Mahura (alexander.mahura@helsinki.fi). Some features are still not implemented and you may expect bugs. For feedback and issue reporting, feel free to open an issue in: https://earth.bsc.es/gitlab/digital-twins/de_340/obsall/-/issues + +# Support For all feedback, comments, and issues, feel free to open a relevant issue or email alexander.mahura@helsinki.fi +# Citing OBSALL package +If you use the OBSALL package in your research or publications, please cite using the following format: +
+ +``` +@software{obsall, + author = {Heikki Järvinen, Jouni Räisänen, Lauri Tuppi, Madeleine Ekblom, Alexander Mahura}, + title = {ObsAll: Processing ground-based, radiosounding and satellite observational data for streamed climate modeled data}, + year = {2023}, + publisher = {GitLab}, + journal = {Barcelona Supercomputing Center, Earth Sciences GitLab}, + howpublished = {\url{https://earth.bsc.es/gitlab/digital-twins/de_340/obsall/newbranch}}, +} +``` +
+ +# Acknowledgments +We would like to acknowledge many contributors who have helped develop, test, and maintain this package, along with their highly valued advice. +Development of OBSALL package is supported by European Union Contract DE_340_CSC - Destination Earth Programme Climate Adaptation Digital Twin (Climate DT). diff --git a/SYNOP/GSVMODDATA/2t_r360x180.nc b/SYNOP/GSVMODDATA/2t_r360x180.nc new file mode 100644 index 0000000000000000000000000000000000000000..44b6d956844151d329aee52bf72ac901e1a8e119 Binary files /dev/null and b/SYNOP/GSVMODDATA/2t_r360x180.nc differ diff --git a/SYNOP/STATS/fortran-programs/plots_for_one_station.f95 b/SYNOP/STATS/fortran-programs/plots_for_one_station.f95 new file mode 100644 index 0000000000000000000000000000000000000000..b74cb22e5ba26d8a8466fe831b7f9dda84a6cc8d --- /dev/null +++ b/SYNOP/STATS/fortran-programs/plots_for_one_station.f95 @@ -0,0 +1,613 @@ +program plots_for_one_station + ! + ! For producing files that can be used to generate "one-station standard plots" + ! in python and in GrADS. However, these plots need to be generated separately, + ! i.e., python or GrADS is not called directly from this program. + ! + ! The following plots can be produced, depending on the details of the + ! python or GrADS script: + ! + ! 1) The time series of simulated data at one station location, + ! plotted against the background of the observed quantiles. + ! Separate plots for 00, 06, 12 and 18 UTC. The plots are + ! organized so that all the simulation data are projected + ! on a single annual cycle (normal year, leap days excluded) + ! regardless of the length of the simulation. + ! + ! 2) A quantile rank histogram, showing the normalized frequencies + ! for 100 percentile bins from 0-1% to 99-100%. The Mean + ! Square Deviation and its p-value are written as text in this plot. + ! + ! NB hard-coding: the program currently gives output only for 00, 06, 12 and 18 UTC. + ! + ! ---------------------------------------------------------------------- + ! Jouni Räisänen, University of Helsinki, August 2023 + !----------------------------------------------------------------------- + ! + ! INPUT FILES (all as text files retrieved from .odb format): + ! ------------------------------------------------------------ + ! + ! sim_file = simulated data from the station location of interest + ! + ! Each line of this file includes: + ! + ! 1) year + ! 2) month + ! 3) day + ! 4) hour + ! 5) data value + ! + ! quantile_file = observed quantiles for this station + ! + ! Each line of this file includes: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) month + ! 5) day + ! 6) hour + ! 7-105) quantiles from 1% to 99% + ! + ! rank_histogram_file = frequencies and MSD and P-values for plotting the rank histogram + ! + ! Each line of this file includes: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) UTC hour + ! 5) total number of observation for the UTC hour + ! 6) Mean squared deviation (MSD) of the quantile bin rank histogram wrt a flat histogram + ! 7) p-value of the MSD value, relative to the bootstrap sampling distribution + ! 8-107) frequencies for quantile bins 0-1%, 1-2%, ... ,98-99%, 99-100% + ! + ! ---------------------------------------------------------------------------------------- + ! + ! OUTPUT FILES in ODB compatible text format (for python) + ! + ! txt_data_time_series: individual data values from sim_file + ! + ! The first line is a header. The other lines include: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) year + ! 5) day of year (counted from Jan 1) + ! 6) UTC hour + ! 7) data value + ! + ! txt_data_quantiles: selected quantiles from the oberved distribution as a function of UTC hour and time of year + ! + ! The first line is a header. The other lines include: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) day of year (counted from Jan 1) + ! 5) UTC hour + ! 6-12) quantiles 1, 10, 25, 50, 75, 90 and 99% + ! + ! txt_data_rank_histogram = quantile bin frequencies (separately for each UTC hour) + ! + ! The first line is a header. The other lines include: + ! + ! 1) station code + ! 2) longitude + ! 3) latitude + ! 4) UTC hour + ! 5) Mean squared deviation (MSD) of the quantile bin rank histogram wrt a flat histogram + ! 6) p-value of the MSD value, relative to the bootstrap sampling distribution + ! 7-106) frequencies for quantile bins 0-1%, 1-2%, ... ,98-99%, 99-100% + ! + ! ---------------------------------------------------------------------------------------- + ! + ! OUTPUT FILES in GrADS binary format (for GrADS) + ! + ! grads_data_time_series = individual data values from sim_file + ! + values of selected quantiles as a fuction + ! of UTC hour and time of the year. + ! + ! grads_data_rank_histogram = quantile bin frequencies (separately for each UTC hour) + ! + ! + ! In addition, the program writes a few auxilliary texts files with hard-coded names + ! for use in GrADS scripts: + ! + ! "vrange_commands" : ranges needed on the y axis of plots + ! "time_series_commands" : for plotting different years in time series with different colors + ! "coordinates" : for including coordinate values in GrADS plot + ! "msd_and_p-value_00" : for including 00 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value_06" : for including 06 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value_12" : for including 12 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value_18" : for including 18 UTC MSD and p-value in GrADS plot + ! "msd_and_p-value" : for including all-UTC MSD and p-value in GrADS plot + ! + !------------------------------------------------------------------------------------------ + ! + ! NAMELIST PARAMETERS: + ! + ! a) input files (see above) + ! + ! sim_file + ! quantile_file + ! rank_histogram_file + ! + ! b) output files in ODB compatible text format (see above) + ! + ! txt_data_time_series + ! txt_data_rank_histogram + ! txt_data_quantiles + ! + ! c) output files in GrADS binary format + ! + ! grads_data_time_series + ! grads_data_rank_histogram + ! + ! d) other + ! + ! year1,month1 = beginning of the period in time series output + ! year2,month2 = end of the period in time series output + ! include_rank_histograms: if .true. (.false.), rank histogram data is included (excluded) in output + ! l_code_in_char : if .true., (.false.) station codes are assumed to be 3-character strings (integers) + ! l_round_6h : if .true., UTC hours are rounded to the closesest even 6-h (00, 06, 12 or 18 UTC). + ! if .false., data for non-matching UTC hours are excluded from output + ! + ! miss = missing value code. Default = -9.99e6. All data f with |f| >= |miss| are treated as missing + ! + !------------------------------------------------------------------------------------------- + ! + implicit none + character*160 :: sim_file ! see above + character*160 :: quantile_file ! see above + character*160 :: rank_histogram_file ! see above + character*160 :: grads_data_time_series ! see above + character*160 :: grads_data_rank_histogram ! see above + character*160 :: txt_data_time_series ! see above + character*160 :: txt_data_quantiles ! see above + character*160 :: txt_data_rank_histogram ! see above + integer :: year1,year2 ! first and last year of simulation + integer :: month1,month2 ! first and last month of simulation + integer :: nyear ! number of years = year2-year1+1 + ! + ! Data structures for input data + ! + integer,parameter :: nhour=4 ! only 00, 06, 12 and 18 UTC used + integer,parameter :: nmax_years=200 ! maximum length of a simulation in years + integer,parameter :: ndays_year=365 ! number of days in a non-leap year + + real :: f(nmax_years,ndays_year,nhour) ! the simulated values + real :: f1 ! a single simulated value + ! + integer,parameter :: nquant=99 ! number of quantiles + real :: quant(nquant,ndays_year,nhour) ! quantile values from quantile_file + real :: quant1(nquant) ! quantile values for a single day and UTC hour + ! + real :: freq(nquant+1,nhour+1) ! quantile bin frequencies from the rank histogram file + real :: freq1(nquant+1) ! frequencies for a single UTC hour + real :: msd(nhour+1) ! MSD value from the rank histogram file + real :: msd1 ! a single MSD value + real :: p_value(nhour+1) ! p-values from the rank histogram files + real :: p_value1 ! a single p-value + + integer :: yyear,mmonth,dday,hhour ! time as read from sim_file + integer :: year,month,day,hour ! time after eventual rounding to nearest 6-h + character*3 :: station_char ! station code as 3-char string + integer :: station_int ! station code as integer + logical :: l_code_in_char ! .true. for station codes as string + logical :: l_round_6h ! .true. for rounding time to nearest 6-h + real :: ntot ! total number of observations as read from rank_histogram_file + real :: lon,lat ! longitude and latitude + integer :: i,j,k ! loop variables + integer :: day_of_year ! day count from Jan 1 (in a non-leap year) + + real :: miss ! missing value code + + real :: fmin(nhour),fmax(nhour) ! minimum and maximum values for time series plots + real :: fmin_down(nhour),fmax_up(nhour) ! same, but extended outward + real :: freqmax(nhour+1) ! largest frequency for the rank histogram + + integer,parameter :: n_colors=13 ! number of colours for GrADS output + integer :: color(n_colors) ! colours for GrADS output + integer :: num_int_arr ! fix for Warning : Legacy Extension: REAL array index at (1) + + integer :: irec ! record number for GrADS output +! +! For text output in odb-compatible format: +! + INTEGER, PARAMETER :: L_dataline=1700 ! max length of input or output text line + character*1700 :: dataline,headerline,emptyline ! for reading / writing text files + character*2 :: number_of_bin ! quantile bin numbers for header in txt_data_quantiles + character*16 :: frequency_value ! frequency value written as character string +! + logical :: include_rank_histograms ! true for including rank histograms + + namelist/param/sim_file,quantile_file,rank_histogram_file,& + grads_data_time_series,grads_data_rank_histogram,& + year1,year2,month1,month2,& + txt_data_time_series,txt_data_rank_histogram,txt_data_quantiles,& + include_rank_histograms,l_code_in_char,l_round_6h,miss + + data color/9,14,4,11,5,13,3,10,7,12,8,2,6/ +! +! default values +! + miss=-9.99e6 + l_code_in_char=.false. + l_round_6h=.false. + include_rank_histograms=.true. +! +! Read the namelist and count the number of years for output +! + read(*,nml=param) + nyear=year2-year1+1 + + !************************************************************************************************ + ! Read the data from the input files. Only data for the period year1/month1-year2/month2 + ! will be included in the time series output, even if other data exist. + ! + ! 1) time series + ! + f=miss + open(unit=1,form='formatted',file=sim_file,status='old') + do while(.true.) +11 read(1,*,err=11,end=12) yyear,mmonth,dday,hhour,f1 + if(l_round_6h)then + call round_6h(yyear,mmonth,dday,hhour,year,month,day,hour) ! Rounding to nearest 6-h? + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif + if((year.gt.year1.or.(year.eq.year1.and.month.ge.month1)).and. & ! Is the time within the + (year.lt.year2.or.(year.eq.year2.and.month.le.month2))) then ! wanted range? + + if(.not.(month.eq.2.and.day.eq.29))then ! leap days are skipped from plotting + f(year-year1+1,day_of_year(month,day),1+hour/6)=f1 ! hard-coding to 6-hourly resolution + endif + endif + enddo +12 continue + close(1) + ! + ! 2) quantiles + ! + quant=miss + open(unit=1,form='formatted',file=quantile_file,status='old') + do while(.true.) + if(l_code_in_char)then +21 read(1,*,err=21,end=23)station_char,lon,lat,month,day,hour,(quant1(i),i=1,nquant) + else +22 read(1,*,err=22,end=23)station_int,lon,lat,month,day,hour,(quant1(i),i=1,nquant) + endif + if((hour.eq.0.or.hour.eq.6.or.hour.eq.12.or.hour.eq.18)& + .and.(.not.(month.eq.2.and.day.eq.29))) then ! leap days are skipped from plotting + do i=1,nquant + quant(i,day_of_year(month,day),1+hour/6)=quant1(i) + enddo + endif + enddo +23 continue + close(1) + ! + ! 3) quantile bin frequencies and related MSD and p-value statistics + ! + freq=miss + if(include_rank_histograms)then + open(unit=1,form='formatted',file=rank_histogram_file,status='old') + do while(.true.) + if(l_code_in_char)then +31 read(1,*,err=31,end=33)station_char,lon,lat,hour,ntot,msd1,p_value1,(freq1(i),i=1,nquant+1) + else +32 read(1,*,err=32,end=33)station_int,lon,lat,hour,ntot,msd1,p_value1,(freq1(i),i=1,nquant+1) + endif + if((hour.eq.0.or.hour.eq.6.or.hour.eq.12.or.hour.eq.18.or.hour.eq.24))then + msd(1+hour/6)=msd1 + p_value(1+hour/6)=p_value1 + do i=1,nquant+1 + freq(i,1+hour/6)=freq1(i) + enddo + endif + enddo +33 continue + close(1) + endif + +!********************************************************************************************** +! Find the minimum and maximum values of f (or the 1st and 99th percentile) +! for defining the vranges for the GrADS time series plots + + do j=1,nhour + fmin(j)=quant(1,1,j) + fmax(j)=quant(nquant,1,j) + do i=1,ndays_year + if(quant(1,i,j).lt.fmin(j))fmin(j)=quant(1,i,j) + if(quant(nquant,i,j).gt.fmax(j))fmax(j)=quant(nquant,i,j) + do k=1,nyear + if(f(k,i,j).lt.fmin(j).and.abs(f(k,i,j)).lt.abs(miss))fmin(j)=f(k,i,j) + if(f(k,i,j).gt.fmax(j).and.f(k,i,j).lt.abs(miss))fmax(j)=f(k,i,j) + enddo + enddo + fmin_down(j)=fmin(j)-0.05*(fmax(j)-fmin(j)) + fmax_up(j)=fmax(j)+0.05*(fmax(j)-fmin(j)) + enddo +! +! Find the largest frequency from the rank histograms +! + freqmax=0. + do j=1,nhour+1 + do i=1,nquant+1 + if(freq(i,j).gt.freqmax(j))freqmax(j)=freq(i,j) + enddo + enddo +! +! Write the commands to the auxiliary script files for Grads +! + open(unit=1,form='formatted',file='vrange_commands') + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(1),fmax_up(1) + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(2),fmax_up(2) + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(3),fmax_up(3) + write(1,'(A11,2F10.3)')'set vrange ',fmin_down(4),fmax_up(4) + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(1) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(2) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(3) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(4) ! conversion to relative frequencies + write(1,'(A13,F7.3)')'set vrange 0 ',1.1*(nquant+1)*freqmax(5) ! conversion to relative frequencies + close(1) + + open(unit=1,form='formatted',file='time_series_commands') + do i=1,nyear + write(1,'(A12)')'set cthick 4' + write(1,'(A12)')'set cstyle 0' + write(1,'(A11)')'set cmark 3' + write(1,'(A15)')'set digsiz 0.07' + if(nyear.eq.1)then + num_int_arr=nint(1.+n_colors)/2. + write(1,'(A11,I2)')'set ccolor ',color(num_int_arr) + !write(1,'(A11,I2)')'set ccolor ',color(nint(1.+n_colors)/2.) + else + num_int_arr=nint(1+(i-1.)/(nyear-1.)*(n_colors-1.)) + write(1,'(A11,I2)')'set ccolor ',color(num_int_arr) + !write(1,'(A11,I2)')'set ccolor ',color(nint(1+(i-1.)/(nyear-1.)*(n_colors-1.))) + endif + write(1,'(A7,I2,A1)')'d &0(z=',i,')' + enddo + close(1) + + open(unit=1,form='formatted',file='coordinates') + write(1,'(F8.3)')lon + write(1,'(F8.3)')lat + close(1) + + if(include_rank_histograms)then + open(unit=1,form='formatted',file='msd_and_p-value_00') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(1) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(1) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value_06') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(2) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(2) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value_12') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(3) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(3) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value_18') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(4) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(4) + close(1) + open(unit=1,form='formatted',file='msd_and_p-value') + write(1,'(A15)')'set strsiz 0.17' + write(1,'(A18)')'set string 1 l 4 0' + write(1,'(A25,F5.3)')'draw string 1.3 &0 MSD=',msd(5) + write(1,'(A29,F5.3)')'draw string 3.0 &0 p-value=',p_value(5) + close(1) + endif +! +!*************************************************************** +! Write the GrADS binary files needed for the plots + + open(unit=1,form='unformatted',file=grads_data_time_series,access='DIRECT',& + recl=4,status='unknown') + do j=1,nhour + do i=1,ndays_year + irec=((i-1)+(j-1)*ndays_year)*(nyear+7) + do k=1,nyear + write(1,rec=irec+k)f(k,i,j) ! time series + enddo + write(1,rec=irec+nyear+1)quant(1,i,j) ! selected quantile + write(1,rec=irec+nyear+2)quant(10,i,j) + write(1,rec=irec+nyear+3)quant(25,i,j) + write(1,rec=irec+nyear+4)quant(50,i,j) + write(1,rec=irec+nyear+5)quant(75,i,j) + write(1,rec=irec+nyear+6)quant(90,i,j) + write(1,rec=irec+nyear+7)quant(99,i,j) + enddo + enddo + close(1) + + if(include_rank_histograms)then + open(unit=1,form='unformatted',file=grads_data_rank_histogram,access='DIRECT',& + recl=4,status='unknown') + do j=1,nhour+1 + irec=(nquant+1)*(j-1) + do i=1,nquant+1 + write(1,rec=irec+i)freq(i,j) !quantile bin frequencies + enddo + enddo + close(1) + endif + +!*************************************************************** +! Write the time series, quantiles and rank histogram data to text files + +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +! +! Time series. 365 days per year, 4 times per day, from year1 to year2 +! + open(unit=1,form='formatted',file=txt_data_time_series) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real year@hdr:integer'//& + ' day_of_year@hdr:integer hour@hdr:integer value@body:real' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer'//& + ' day_of_year@hdr:integer hour@hdr:integer value@body:real' + endif + write(1,*)trim(headerline) + do year=year1,year2 + do j=1,365 + do hour=0,18,6 + if(abs(f(year-year1+1,j,1+hour/6)).lt.abs(miss))then + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I4,1X,I3,1X,I2,F16.6)')station_char,lon,lat,year,j,hour,f(year-year1+1,j,1+hour/6) + else + write(1,'(I6,2F16.6,1X,I4,1X,I3,1X,I2,F16.6)')station_int,lon,lat,year,j,hour,f(year-year1+1,j,1+hour/6) + endif + else + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I4,1X,I3,1X,I2,A16)')station_char,lon,lat,year,j,hour,' NaN' + else + write(1,'(I6,2F16.6,1X,I4,1X,I3,1X,I2,A16)')station_int,lon,lat,year,j,hour,' NaN' + endif + endif + enddo + enddo + enddo + close(1) +! +! Selected quantiles of the observed distribution. 365 days per year, 4 time per day. +! + open(unit=1,form='formatted',file=txt_data_quantiles) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer'//& + ' q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer'//& + ' q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real' + endif + write(1,*)trim(headerline) + do year=year1,year2 + do j=1,365 + do hour=0,18,6 + if(l_code_in_char)then + write(1,'(A3,2F16.6,1X,I3,1X,I2,7F16.6)')station_char,lon,lat,j,hour,& + quant(1,j,hour/6+1),quant(10,j,hour/6+1),quant(25,j,hour/6+1),quant(50,j,hour/6+1),& + quant(75,j,hour/6+1),quant(90,j,hour/6+1),quant(99,j,hour/6+1) + else + write(1,'(I6,2F16.6,1X,I3,1X,I2,7F16.6)')station_int,lon,lat,j,hour,& + quant(1,j,hour/6+1),quant(10,j,hour/6+1),quant(25,j,hour/6+1),quant(50,j,hour/6+1),& + quant(75,j,hour/6+1),quant(90,j,hour/6+1),quant(99,j,hour/6+1) + endif + enddo + enddo + enddo + close(1) +! +! Rank histogram of frequencies. 100 bins from 0-1% to 99-100. Before that, the MSD and p-values + ! + if(include_rank_histograms)then + open(unit=1,form='formatted',file=txt_data_rank_histogram) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real hour@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer' + endif + headerline=trim(headerline)//' msd@body:real p_value@body:real' + do j=0,nquant + write(number_of_bin,'(i2.2)')j + headerline=trim(headerline)//' f'//number_of_bin//'@body:real' + enddo + write(1,*)trim(headerline) + + do hour=0,24,6 + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,2F16.6)')& + station_char,lon,lat,hour,msd(1+hour/6),p_value(1+hour/6) + else + write(dataline,'(I6,2F16.6,1X,I2,2F16.6)')& + station_int,lon,lat,hour,msd(1+hour/6),p_value(1+hour/6) + endif + + do i=1,nquant+1 + write(frequency_value,'(F16.6)')freq(i,1+hour/6) + dataline=trim(dataline)//frequency_value + enddo + write(1,*)trim(dataline) + enddo + + close(1) + endif + + end program plots_for_one_station + + integer function day_of_year(month,day) +! +! Number of day 'day' in the 'month':th month from the beginning of a non-leap year +! + implicit none + integer :: year,month,day + integer,parameter :: nmon=12 + integer :: days_before(nmon) ! this is for normal years + integer :: dnumber + data days_before / 0,31,59,90,120,151,181,212,243,273,304,334 / + + day_of_year=days_before(month)+day + return + end function day_of_year + +! +! rounding the time to the nearest full 6 hours +! +subroutine round_6h(year,month,day,hour,year1,month1,day1,hour1) +implicit none +integer :: year,month,day,hour ! time before rounding +integer :: year1,month1,day1,hour1 ! time after rounding +integer :: ndays_month ! number of days in a month +ndays_month=31 +if(month.eq.4.or.month.eq.6.or.month.eq.9.or.month.eq.11)ndays_month=30 +if(month.eq.2)then + if((mod(year,4).eq.0.and.mod(year,100).gt.0).or.(mod(year,400).eq.0))then + ndays_month=29 + else + ndays_month=28 + endif +endif +year1=year +month1=month +day1=day +hour1=6*((hour+3)/6) +if(hour.ge.21)then ! Hour was rounded forward to 00 UTC -> increment day + hour1=0 + day1=day+1 + if(day1.gt.ndays_month)then ! increment month + day1=1 + month1=month1+1 + if(month1.gt.12)then ! increment year + month1=1 + year1=year+1 + endif + endif +endif +return +end subroutine round_6h + + + diff --git a/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics.f95 b/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics.f95 new file mode 100644 index 0000000000000000000000000000000000000000..46a8a608af26ddb7e8e1cd32fc327b9aac536c28 --- /dev/null +++ b/SYNOP/STATS/fortran-programs/rank_histogram_summary_statistics.f95 @@ -0,0 +1,380 @@ + program rank_histogram_summary_statistics +! +! Calculation of all-station rank histogram summary statistics: +! +! 1) distribution of p-values (in nbins_p bins) +! 2) Average frequencies in NQUANT+1 = 100 quantile bins +! (the quantile bin number is hard-coded!) +! +! The calculation is made for the UTC hours defined by +! the namelist parameters HOUR1, HOUR2, HOUR_STEP. It is assumed +! that the input file (INFILE) only includes data for these +! hours. +! +! Jouni Räisänen, University of Helsinki, August 2023 +! +!------------------------------------------------------------------------- +! +! INPUT FILE: +! +! infile: a text file including the rank histogram statistics for all stations +! +! The first line is a header. The other lines include: +! +! 1) station code (as a 3-character string, if l_code_in_char =.true. Else as an integer) +! 2) longitude +! 3) latitude +! 4) UTC hour (24 for 'all-UTC' statistics +! 5) total number of observations for this UTC hour +! 6) Mean squared deviation (MSD) from a flat quantile frequency histogram +! 7) p-value for MSD (= probability for getting at least as large MSD by chance) +! 8-107) frequencies in quantile space: 0-1%, 1-2% ... 98-99%, 99-100% +! +! Order of data assumed in the input file: +! +! line 1: header +! line 2: station 1, HOUR1 +! line 3: station 1, HOUR1 + HOUR_STEP +! line n = 1 + (HOUR2-HOUR1)/HOUR_STEP + 1 : station 2, HOUR2 +! line n+1: station2, HOUR1 +! etc. +!-------------------------------------------------------------------------- +! +! OUTPUT FILES: +! +! outfile_p_values: p-values at individual stations (This far: only the all-UTC p-values!) +! This file can be used for plotting the p-values on a map in python +! +! The first line is a header for eventual conversion to ODB. The other lines include +! +! 1) Station code as integer. If the original code was a 3-char string (GRUAN), +! the ordinal number of the station is written (for plotting in python) +! 2) longitude +! 3) latitude +! 4) p-value for MSD +!---------------------------------------------------------------------------- +! outfile_p_freq: frequency histogram of p-values (for different UTC hours separately) +! This file can be used for plotting a histogram of the p-value frequencies in python! +! +! The first line is a header. The others include +! +! 1) UTC hour +! 2-nbins_p+1) frequencies of p-values in nbins_p bins +!----------------------------------------------------------------------------- +! outfile_q = histogram of all-station-mean quantile bin frequencies (for different UTC hours separately) +! +! The first line is a header. The others include +! +! 1) UTC hour +! 2-101) quantile bin frequencies for 0-1%, 1-2% ... 99-100%. +!------------------------------------------------------------------------------ +! outfile_grads (only if grads_out=.true.): p-value frequencies and quantile bin frequencies in GrADS binary format +!------------------------------------------------------------------------------ +! text_output: frequencies of selected small p-values and some other data in free-text format, for each UTC hour separately. +!----------------------------------------------------------------------------------------------------------------------------- +! +! NAMELIST PARAMETERS: +! +! infile: see above +! outfile_p_values: see above +! outfile_p_freq: see above +! outfile_q: see above +! outfile_grads: see above +! text_output: see above +! nbins_p: number of p-value bins in outfile_p_freq +! grads_out: if .true. (.false.), outfile_grads is written (not written) +! +! hour1,hour2,hour_step: UTC hours in output = hour1, hour1+hour_step ... hour2 +! +! l_code_in_char: if .true., the station codes in infile are assumed to be 3-char string (else: integer) +! miss: missing value code. Default = -9.99e6. All values f with |f| >= |miss| are treated as missing. + + IMPLICIT NONE + INTEGER :: I,J ! loop variables + INTEGER,PARAMETER :: NHOUR=24 ! number of stations + INTEGER,PARAMETER :: NQUANT=99 ! number of quantiles from 1% to 99% + INTEGER,PARAMETER :: NPMAX=100 ! maximum bin number for frequency diagram of p-values + INTEGER,PARAMETER :: NSTATMAX=10000 ! maximum number of stations + REAL :: frequency_q1(nquant+1) ! quantile frequencies at a single station and UTC hour + REAL :: frequency_q(nquant+1,nhour+1) !all-station mean quantile frequencies for each UTC hour + REAL :: frequency_p(npmax,nhour+1) !frequencies of p-values for each UTC hours + REAL :: max_freq_p,max_freq_q ! maximum of p-value and quantile frequencies (for GrADS only) + REAL :: frequency_p01(nhour+1) ! frequency of p_values <= 0.1 % + REAL :: frequency_p1(nhour+1) ! same, <= 1 % + REAL :: frequency_p5(nhour+1) ! same, <= 5 % + character*2 :: number_of_bin ! two-digit code for quantiles for outfile_q and outfile_p_values headers + character*16 :: frequency_value ! frequency written in F16.6 format + character*160 :: infile,outfile_p_freq,outfile_p_values,outfile_q ! input and output files (see above) + character*160 :: outfile_grads,text_output ! output files (see above) + INTEGER,PARAMETER :: L_dataline=1700 ! maximum length of lines in text files + character*1700 :: dataline,headerline,emptyline ! string variables for input and output + + INTEGER :: nbins_p ! number of p-value bins in outfile_p_freq + INTEGER :: hour1,hour2,hour_step ! input / output UTC hours (see above) + INTEGER :: hour ! UTC hour + INTEGER :: n_line ! line count for lines read from infile + INTEGER :: n_station ! station count + INTEGER :: pbin ! index for p-value bin + + LOGICAL :: HOUR_USED(nhour+1) ! .true. for hours included in (hour1, hour1+hour_step,...,hour2) + INTEGER :: nhour_used ! number of UTC hours used + INTEGER :: station1_int ! the station code read from infile (RHARM, FMI) + CHARACTER*3 :: station1_char ! the station code read from infile (GRUAN) + LOGICAL :: l_code_in_char ! if .true., station codes in 3-char strings assumed + REAL :: longitude1,latitude1 !longitude and latitude + INTEGER :: station(nstatmax) !station codes of all stations + REAL :: longitude(nstatmax),latitude(nstatmax) ! station longitudes and latitudes + REAL :: p_value1, msd ! p-value and MSD read from infile + REAL :: p_value(nhour+1,nstatmax) ! p-values at individual stations for map + REAL :: total_number ! number of observations read from infile + + LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing + INTEGER :: IREC ! record number for GrADS output +! + REAL :: MISS ! Missing value code + INTEGER :: valid_stations(nhour+1) ! stations with valid data + + NAMELIST/param/infile,outfile_p_values,outfile_p_freq,outfile_q,& + outfile_grads,text_output,& + nbins_p,grads_out,& + hour1,hour2,hour_step,& + l_code_in_char,miss + + miss=-9.99e6 ! default for missing value + READ(*,NML=PARAM) + valid_stations=0 +! +! Which UTC hours are used and what is their total number? +! + hour_used=.false. + nhour_used=0. + do i=hour1,hour2,hour_step + hour_used(i+1)=.true. + nhour_used=nhour_used+1 + enddo +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +!---------------------------------------------------------------------- +! +! Initialize counters etc. +! + frequency_p=0. + frequency_q=0. + frequency_p01=0. + frequency_p1=0. + frequency_p5=0. + n_station=0 + n_line=0 + p_value=miss +! +! Read the contents of the input file +! + open(unit=1,form='formatted',status='old',file=infile) + read(1,*) ! the first line in infile is a header + write(*,*)' Opened infile: ', infile + + do while(.true.) ! Loop continued until end of file reached + if(l_code_in_char)then + 1 read(1,*,err=1,end=3)& + station1_char,longitude1,latitude1,hour,total_number,msd,p_value1,(frequency_q1(i),i=1,nquant+1) + else + 2 read(1,*,err=2,end=3)& + station1_int,longitude1,latitude1,hour,total_number,msd,p_value1,(frequency_q1(i),i=1,nquant+1) + endif +! write(*,*)'n_station,Hour step:',n_station,hour_step + if(hour_used(hour+1))then ! Only use the statistics for the UTC hours define by hour1, hour2, hour_step + n_line=n_line+1 + if(mod(n_line,nhour_used).eq.1)then ! This assumes that all the required UTC hours are always included in infile + n_station=n_station+1 + if(l_code_in_char)then + station(n_station)=n_station + else + station(n_station)=station1_int + endif + longitude(n_station)=longitude1 + latitude(n_station)=latitude1 + endif + if(total_number.gt.0.and.abs(frequency_q1(1)).lt.abs(miss))then ! Only include stations with some valid data + p_value(hour+1,n_station)=p_value1 ! for map of p-values + valid_stations(hour+1)=valid_stations(hour+1)+1 + do i=1,nquant+1 + frequency_q(i,hour+1)=frequency_q(i,hour+1)+frequency_q1(i) ! update the quantile bin frequencies + enddo + pbin=min(1+(p_value1*nbins_p),real(nbins_p)) + frequency_p(pbin,hour+1)=frequency_p(pbin,hour+1)+1 ! update the p-value bin frequencies +! +! Frequencies of small p-values: +! + if(p_value1.le.0.001.and.abs(p_value1).lt.abs(miss))then + frequency_p01(hour+1)=frequency_p01(hour+1)+1. + endif + if(p_value1.le.0.01.and.abs(p_value1).lt.abs(miss))then + frequency_p1(hour+1)=frequency_p1(hour+1)+1. + endif + if(p_value1.le.0.05.and.abs(p_value1).lt.abs(miss))then + frequency_p5(hour+1)=frequency_p5(hour+1)+1. + endif + endif + + endif + enddo +3 continue + close(1) + write(*,*)' Read infile: # of lines, # of stations: ',n_line,n_station + +! if(n_line.ne.n_station*nhour_used)then +! write(*,*)'Something wrong !!!' +! stop +! endif +!--------------------------------------------------------------------- +! Divide all the frequencies by the number of stations +! + do hour=hour1,hour2,hour_step + do i=1,nquant+1 + frequency_q(i,hour+1)=frequency_q(i,hour+1)/valid_stations(hour+1) + enddo + do i=1,nbins_p + frequency_p(i,hour+1)=frequency_p(i,hour+1)/valid_stations(hour+1) + enddo + frequency_p01(hour+1)=frequency_p01(hour+1)/valid_stations(hour+1) + frequency_p1(hour+1)=frequency_p1(hour+1)/valid_stations(hour+1) + frequency_p5(hour+1)=frequency_p5(hour+1)/valid_stations(hour+1) + enddo +! +! Find the normalized maximum frequencies for GrADS axis limits +! + max_freq_p=0 + max_freq_q=0 + do i=1,nbins_p + if(frequency_p(i,nhour+1).gt.max_freq_p/nbins_p)then + max_freq_p=frequency_p(i,nhour+1)*nbins_p + endif + enddo + max_freq_p=1.02*max_freq_p + + do i=1,nquant+1 + if(frequency_q(i,nhour+1).gt.max_freq_q/(nquant+1))then + max_freq_q=frequency_q(i,nhour+1)*(nquant+1) + endif + enddo + max_freq_q=1.02*max_freq_q +! +!--------------------------------------------------------------------- +! +! Write the text output file: +! 1) number of stations +! 2) frequencies of selected small p-values + + open(unit=1,form='formatted',file=text_output) + write(1,'(A20,I3)')'Number of stations: ',n_station + write(1,'(A24,I3)')'Number of p-value bins: ',nbins_p + if(grads_out)then + write(1,'(A23,F6.3)')'Axis_limit_p_for_GrADS ',max_freq_p + write(1,'(A23,F6.3)')'Axis_limit_q_for_GrADS ',max_freq_q + endif + do hour=hour1,hour2,hour_step + write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.001 ',hour,' UTC: ',& + frequency_p01(hour+1) + write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.01 ',hour,' UTC: ',& + frequency_p1(hour+1) + write(1,'(A22,I2.2,A6,F5.3)')'Frequency, p <= 0.05 ',hour,' UTC: ',& + frequency_p5(hour+1) + enddo + close(1) +! +! Write the average quantile bin frequencies and p-value bin frequencies +! to ODB compatible text files + +!-------------------------------------------------------------------------- +! +! Open the quantile bin frequency output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile_q) + headerline=emptyline + headerline='hour@hdr:integer ' + do j=0,nquant + write(number_of_bin,'(i2.2)')j + headerline=trim(headerline)//' f'//number_of_bin//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Write the data lines for each UTC hour used +! + do hour=hour1,hour2,hour_step + j=hour+1 + write(dataline,'(I6)')& + hour + do i=1,nquant+1 + write(frequency_value,'(F16.6)')frequency_q(i,j) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + enddo + close(2) +! +! Write the file giving the p-values for individual stations +! + open(unit=2,form='formatted',status='unknown',file=outfile_p_values) + headerline=' station@hdr:integer longitude@hdr:real latitude@hdr:real p_value@body:real' + write(2,*)trim(headerline) + do i=1,n_station + write(2,'(7X,I6,3F16.6)')& ! this far: just fwrite the station ordinal number, in case + ! the station code was a character string + station(i),longitude(i),latitude(i),p_value(nhour+1,i) + enddo + close(2) +! +! Open the p_value frequency output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile_p_freq) + headerline=emptyline + headerline='hour@hdr:integer ' + do j=1,nbins_p + write(number_of_bin,'(i2.2)')j-1 + headerline=trim(headerline)//' p'//number_of_bin//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Write the data lines for each UTC hour used +! + do hour=hour1,hour2,hour_step + j=hour+1 + write(dataline,'(I6)')& + hour + do i=1,nbins_p + write(frequency_value,'(F16.6)')frequency_p(i,j) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + enddo + close(2) +! +!------------------------------------------------------------------------------- +! +! Open the file for GrADS output and write its contents, for easier visualisation? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& + recl=4,status='unknown') +! + do hour=hour1,hour2,hour_step + j=(hour-hour1)/hour_step+1 + do i=1,nbins_p + irec=(j-1)*(nbins_p+nquant+1)+i + write(11,rec=irec)frequency_p(i,hour+1) + enddo + do i=1,nquant+1 + irec=(j-1)*(nbins_p+nquant+1)+nbins_p+i + write(11,rec=irec)frequency_q(i,hour+1) +! write(*,*)hour,i,frequency_q(i,hour+1) + enddo + enddo + close(11) + + endif ! if (grads_out) + + END program rank_histogram_summary_statistics diff --git a/SYNOP/STATS/fortran-programs/rank_histograms_bootstrap.f95 b/SYNOP/STATS/fortran-programs/rank_histograms_bootstrap.f95 new file mode 100644 index 0000000000000000000000000000000000000000..3a075742b6542832c440963015abf3a2883af230 --- /dev/null +++ b/SYNOP/STATS/fortran-programs/rank_histograms_bootstrap.f95 @@ -0,0 +1,662 @@ + PROGRAM rank_histograms_bootstrap +! +! For estimating the sampling distribution of the Mean Square Deviation +! (MSD) statistics for quantile bin rank histogram frequencies. +! These distributions can be used to answer the following question: +! +! "If we have LENGTH_IN_YEARS years of model data and its rank histogram +! against observations has a given MSD value, what is the probability +! of getting such a large MSD just as a result of internal variability"? +! +! The sampling distribution is formed using a bootstrap: +! +! Each boostrap realization calculates the rank histograms by selecting a total +! of LENGTH_IN_YEARS years of observed data randomly divided in chunks. Their +! length is defined as follows: +! +! 1) The number of chunks is at least MIN_CHUNK_NUMBER +! 2) If not restricted by (1) there are CHUNKS_PER_YEAR chunks for each year +! 3) The length is rounded to the nearest full number of days, so that the +! total length is LENGTH_IN_YEARS years (which may be non-integer) +! +! For each realization, the Mean Squared Difference (MSD) of the resulting rank +! histogram relative to the theoretically expected flat histogram is calculated. +! +! Jouni Räisänen, July 2023 +! +!-------------------------------------------------------------------------------- +! Input files (text format, retrieved from ODB with the 'odb sql select' command) +!-------------------------------------------------------------------------------- +! +! INFILE: Observed time series at the station location. +! The first line is a header. The other lines include: +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5) data value +! +! QUANTILE_FILE: quantile values, separately for each day of the year! +! The first line is a header. The other lines include: +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) month +! 5) day +! 6) hour +! 7-105) The 99 quantiles from 1 % to 99 % +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +!-------------------------------------------------------------------------------- +! Output files +!-------------------------------------------------------------------------------- +! +! OUTFILE: MSD (mean squared deviation) distribution of quantile frequencies +! from bootstrap tests. +! +! The first 1st line is a header. The other lines include +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) number of realization +! 5 to N-1) MSD values for HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! N) MSD calculated from frequencies averaged over all the UTC hours within +! HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +! OUTFILE_GRADS: MSD distribution of quantile frequencies in a GrADS binary file (if grads_out=.true.) +! +!------------------------------------------------------------------------------------------- +! Namelist parameters +!------------------------------------------------------------------------------------------- +! +! infile : file for observation time series (see above) +! quantile_file : file for pre-computed quantile values (see above) +! outfile : output file for bootstrap MSD statistics in ODB compatible text format +! outfile_grads : output file in GrADS binary format +! grads_out: if .true., outfile_grads is also written +! l_code_in_char: if .true., 3-character station codes assumed (else integer) +! lorder: if .true., the output files are ordered from smallest to largest MSD values +! +! hour1 = first UTC hour analysed +! hour2 = last UTC hour analysed +! hour_step = step of analyzed UTC hours +! l_round_hour : if .true., hours are rounded to the nearest analysis hour. +! if .false., data for non-matching hours is ignored +! +! year1 = first year used (if data available) +! year2 = last year used (if data available) +! +! miss = missing value code. All input data f with abs(f) >= abs(miss) is treated as missing +! +! length_in_years = length of bootstrap samples in years (does not need to integer) +! +! min_chunk_number = minimum number of separate data "chunks" for one bootstrap realization +! chunks_per_year = length of bootstrap "chunks", if not limited by min_chunk_number +! nreal = number of bootstrap realizations +! l_all_hours_required : if .true., only those days are used for which data for all the required +! UTC hours is available, even in forming the single-UTC-hour bootstrap +! samples. This should be set to .false. for those data sets (e.g., soundings) +! for which data is frequently missing for some UTC hours + + ! (in QUANTILE_FILE) are needed: +! +! INFILE: text file of the simulated or observed time series at the station location. +! 1st line for each UTC hour is a header +! The other lines include station code, longitude and latitude, +! year, month, day, hour and the data value +! +! QUANTILE_FILE: quantile values, separately for each day of the year +! 1st line is a header +! The other lines include station code, longitude and latitude, +! month, day and hour, and the 99 quantiles for each day+hoyr of the year +! +! Only data for the years YEAR1-YEAR2 are taken into account, even if other years +! are present in INFILE. +! +! The output is written in OUTFILE. +! 1st line is the header. +! The other lines includes the station code, station coordinates, +! LENGTH_in_YEARS, realization/rank number and the corresponding MSD values +! for each UTC hour + their combination. +! +! If (GRADS_OUT), output is also written in a GrADS binary file for easier plotting. +! +!--------------------------------------------------------------------------------------- +! +! LIMITATIONS of the boostrap method: +! +! 1) An in-sample boostrap will likely produce a narrower sampling distribution of +! quantile bin frequencies, and therefore lower MSD:s, than would be obtained +! if observations from a period not used for calculating the quantiles were used. + +! 2) Autocorrelation on time scales exceeding 1/CHUNKS_PER_YEARS years is neglected +! This is also expected to reduce the sampling variability in quantile bin frequencies, +! resulting a low bias in the MSD:s (particularly over sea areas for temperature?) +! +! For both reasons, there will be too many "false positives" when comparing +! a model simulation with observations. +! +!---------------------------------------------------------------------------------------- + + IMPLICIT NONE + INTEGER :: I,J,K,Q,R + INTEGER,PARAMETER :: NMON=12, NDAY=31, NHOUR=24, NQUANT=99 + REAL :: quantiles(nquant,nmon,nday,nhour) ! quantiles for each month, day and UTC hour + REAL :: q1(nquant) ! quantiles for one day + hour + REAL :: value1 ! individual data values read from the input file + INTEGER :: ntimes_hour(nhour) ! total number of times with data for an UTC hour + CHARACTER*160 :: infile,outfile,quantile_file ! file names (see above) + CHARACTER*160 :: outfile_grads ! grads output for testing? + INTEGER :: loutfile_grads ! length of outfile_grads name, to circumvent a memory leak!? (6.5.2013) + CHARACTER*2 :: hh ! hour code for ODB header + INTEGER,PARAMETER :: L_dataline=500 ! maximum length of data lines in output + character*500 dataline,headerline,emptyline ! for writing lines in output text files + character*16 msd_value ! frequency written in F16.6 format + ! + INTEGER :: yyear,mmonth,dday,hhour !year, month, day and hour from file + INTEGER :: year,month,day,hour ! year, month, day and hour after eventual rounding of hours + INTEGER :: day1 ! running number for the first day in a bootstrap chunk + INTEGER :: year1,year2 ! years of data used in forming the bootstrap samples + INTEGER :: hour1,hour2,hour_step ! list of hours analysed + LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour + LOGICAL :: HOUR_USED(nhour) ! .true. for those UTC hours that are used + INTEGER :: nhour_used ! number of UTC hours used + INTEGER :: station_int ! station code read from quantile file, as integer (for synop stations) + CHARACTER*3 :: station_char ! ------------------------------ as characters (for soundings?) + LOGICAL :: l_code_in_char ! .true. for station code in characters + REAL :: longitude,latitude ! statio longitude and latitude from the quantile file + + LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing + INTEGER :: IREC ! record number for GrADS output + LOGICAL :: LORDER ! if .true., MSD output in ascending order + + INTEGER,PARAMETER :: nmaxdays=100000,nmaxyears=250 + INTEGER :: rank1(nmaxyears,nmon,nday,nhour) ! quantile ranks (1-100) of the observed values + INTEGER :: rank(nmaxdays,nhour) ! quantile ranks (1-100) of the observed values in chronological order + INTEGER :: rank_all_UTC(nmaxdays,nhour) ! same for days in which data is available for all UTC hours + INTEGER :: NDAYS(nhour+1) ! number of days in the observed time series. "nhour+1" represents the "all-UTC" value. + LOGICAL :: L_all_hours_required ! if .true., all those days are omitted when data is missing for + ! at least one of the output UTC hours. Otherwise, all valid observations are included for each UTC hour separately, + ! but the all-UTC-hour statistics only uses data for all UTC hours available. + LOGICAL:: day_OK(nhour+1) ! will the day be included in bootstrap? + + REAL :: length_in_years ! total length of bootstrap samples in years (see above) + INTEGER :: min_chunk_number ! minimum number of chunks per one bootstrap sample + INTEGER :: chunks_per_year ! number of chunks per years + INTEGER :: number_of_chunks ! number of chinks per bootstrap sample + ! This is replaced by min_chunk_number if it is larger. + INTEGER :: chunk_length_UTC(nhour+1) ! chunk length separately for each UTC hour + INTEGER :: number_of_chunks_UTC(nhour+1) ! number of chunks separately for each UTC hour + INTEGER :: nreal ! number of bootstrap realizations + REAL :: rnd ! for random numbers + + INTEGER,PARAMETER :: NMAX_REAL=10000 ! maximum number of realizations + REAL :: MSD(nmax_real,nhour+1) ! nhour+1 to accommodate the all-UTC statistics + REAL :: MSD1(nmax_real) ! MSD realizations for one UTC hour (to facilitate easy ordering) + REAL :: frequency(nquant+1,nhour+1) ! quantile bin frequencies + REAL :: frequency_all_UTC(nquant+1,nhour+1) ! quantile bin frequencies for the days in the "all-UTC" sample + REAL :: expected_freq ! expected quantile bin frequency (1/100) + + REAL :: MISS ! missing value code +! +!----------------------------------------------------------------------------------------------------- +! + NAMELIST/param/infile,outfile,quantile_file,& + hour1,hour2,hour_step,year1,year2,grads_out,outfile_grads,lorder,& + length_in_years,min_chunk_number,chunks_per_year,nreal,& + l_code_in_char,l_round_hour,l_all_hours_required,miss + + expected_freq=1./(nquant+1) ! frequencies in a flat rank histogram + lorder=.true. ! by default, ascending order of MSDs in the output + min_chunk_number=2 ! default for minimum number of chunks + chunks_per_year=4 ! default. Decrease of this will reduce the autocorrelation + ! problem but it will also reduce the independence + ! (thus the variation) between the bootstrap samples + nreal=1000 ! default number of bootstrap realizations + miss=-9.99e6 + READ(*,NML=PARAM) +! +! Length of the GrADS output file name. This should not really be needed. +! + loutfile_grads=len(trim(outfile_grads)) +! +! Which UTC hours are used and what is their total number? +! + hour_used=.false. + nhour_used=0. + do i=hour1,hour2,hour_step + hour_used(i+1)=.true. + nhour_used=nhour_used+1 + enddo +! +! Empty data line for ODB-compatible output +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +! write(*,*)'Check 1',len(trim(outfile_grads)),loutfile_grads,outfile_grads +!---------------------------------------------------------------------- +! +! Read the contents of the quantile_file to array 'quantiles' + ! + quantiles=miss + open(unit=1,form='formatted',status='old',file=quantile_file) + write(*,*)'Opened quantile file' + do while(.true.) + if(l_code_in_char)then +! The err=1 specifier ensures that header lines are skipped +1 read(1,*,err=1,end=3)& + station_char,longitude,latitude,month,day,hour,& + (q1(i),i=1,nquant) + else +! The err=2 specifier ensures that header lines are skipped +2 read(1,*,err=2,end=3)& + station_int,longitude,latitude,month,day,hour,& + (q1(i),i=1,nquant) + endif + + do i=1,nquant + quantiles(i,month,day,hour+1)=q1(i) + enddo +! copy quantiles from 28 February to 29 February +! (in case the latter is not in the quantile file as it should) + if(month.eq.2.and.day.eq.28)then + do i=1,nquant + quantiles(i,month,day+1,hour+1)=q1(i) + enddo + endif + enddo +3 continue + write(*,*)'Quantile file read' + close(1) +! +!---------------------------------------------------------------------- +! +! Open the input data file (i.e. the station observations for the variable of interest). +! The data must be in chronological order. + + open(unit=1,form='formatted',status='old',file=infile) + +! Read the data from the input file and find their ranks in the quantile distribution. + + rank1=miss + do while(.true.) +11 read(1,*,err=11,end=12)yyear,mmonth,dday,hhour,value1 + ! write(*,*)year,month,day,hour,value1 + + if(l_round_hour)then + call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif + + if(year.ge.year1.and.year.le.year2.and.hour_used(hour+1).and.abs(value1).le.abs(miss))then + call find_rank(value1,quantiles(1,month,day,hour+1),nquant,& + rank1(year-year1+1,month,day,hour+1),miss) + endif + enddo +12 continue + close(1) +!--------------------------------------------------------------------------------------------------- +! Rewrite the ranks for easier implementation of the bootstrap (= omit missing values) +!--------------------------------------------------------------------------------------------------- + ndays=0 + do year=year1,year2 + do month=1,nmon + do day=1,nday + day_OK=.true. + do hour=hour1,hour2,hour_step + if(rank1(year-year1+1,month,day,hour+1).eq.miss)then + day_OK(hour+1)=.false. + day_OK(nhour+1)=.false. ! all-UTC: day excluded if any data is missing + endif + enddo + ! all data excluded if some of the UTC hours is missing and all hours are required. + if(l_all_hours_required.and..not.day_OK(nhour+1))then + day_OK=.false. + endif +! +! Form the list of valid days and calculate its length separately for each UTC hour: +! + do hour=hour1,hour2,hour_step + if(day_OK(hour+1))then + ndays(hour+1)=ndays(hour+1)+1 + rank(ndays(hour+1),hour+1)=rank1(year-year1+1,month,day,hour+1) + endif + enddo + ! + ! All-UTC list of days and ranks. Only days with valid data for all UTC hours are included. + ! + if(day_OK(nhour+1))then + ndays(nhour+1)=ndays(nhour+1)+1 + do hour=hour1,hour2,hour_step + rank_all_UTC(ndays(nhour+1),hour+1)=rank1(year-year1+1,month,day,hour+1) + enddo + endif + + enddo ! end of year-loop + enddo ! end of month-loop + enddo ! end of day-loop + + write(*,*)'ndays',(ndays(hour+1),hour=hour1,hour2,hour_step),ndays(nhour+1) + +!-------------------------------------------------------------------------------------- +! Bootstrap parameters (nreal realizations, each consisting of 'number_of_chunks' 'chunk_length'-day periods). +! +! These parameters must be defined for each UTC hour separately, +! because the length of the time series may be different +! +! The length of a "chunk" is not allowed to exceed half of the length of the period of data +! + number_of_chunks=nint(length_in_years*chunks_per_year) ! default number of chunks per realization + number_of_chunks=max(number_of_chunks,min_chunk_number) ! reset to minimum if needed +! +! Recalculate chunk lengths and numbers of chunks for each UTC hour, based on the real number of +! days that are available and the requirement that the chunk length must not exceed half of +! the total data sample. + + do hour=hour1,hour2,hour_step + chunk_length_UTC(hour+1)=min(nint((365.25*length_in_years)/number_of_chunks),ndays(hour+1)/2) + number_of_chunks_UTC(hour+1)=nint((365.25*length_in_years)/chunk_length_UTC(hour+1)) + enddo + chunk_length_UTC(nhour+1)=min(nint((365.25*length_in_years)/number_of_chunks),ndays(nhour+1)/2) + number_of_chunks_UTC(nhour+1)=nint((365.25*length_in_years)/chunk_length_UTC(nhour+1)) + + write(*,*)'number_of_chunks_UTC',(number_of_chunks_UTC(hour+1),hour=hour1,hour2,hour_step),number_of_chunks_UTC(nhour+1) + write(*,*)'chunk_length_UTC',(chunk_length_UTC(hour+1),hour=hour1,hour2,hour_step),chunk_length_UTC(nhour+1) +!************************************************************************************** +! Bootstrap begins +!************************************************************************************** + msd=0. + do r=1,nreal + frequency=0 + + +! Selection of chunks and calculation of rank frequencies (each UTC hour sperately) + + do j=hour1,hour2,hour_step + + if(ndays(j+1).gt.1)then + + do k=1,number_of_chunks_UTC(j+1) + call random_number(rnd) + day1=1+(ndays(j+1)-chunk_length_UTC(j+1)+1)*rnd + do i=day1,day1+chunk_length_UTC(j+1)-1 + frequency(rank(i,j+1),j+1)=& + frequency(rank(i,j+1),j+1)+1./(number_of_chunks_UTC(j+1)*chunk_length_UTC(j+1)) + enddo + enddo + + endif + + enddo +! +! Calculation of the MSD for each UTC hour separately. +! + do j=hour1,hour2,hour_step + if(ndays(j+1).gt.1)then + do i=1,nquant+1 + msd(r,j+1)=msd(r,j+1)+((frequency(i,j+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + else + msd(r,j+1)=miss + endif + enddo +! +! Selection of chunks and calculation of rank frequencies (all-UTC-hour case, +! only including data for the days for which all UTC hours are available) +! + frequency_all_UTC=0. + if(ndays(nhour+1).gt.1)then + do k=1,number_of_chunks_UTC(nhour+1) + call random_number(rnd) + day1=1+(ndays(nhour+1)-chunk_length_UTC(nhour+1)+1)*rnd + do i=day1,day1+chunk_length_UTC(nhour+1)-1 + do j=hour1,hour2,hour_step + frequency_all_UTC(rank_all_UTC(i,j+1),j+1)=& + frequency_all_UTC(rank_all_UTC(i,j+1),j+1)+1./(number_of_chunks_UTC(nhour+1)*chunk_length_UTC(nhour+1)) + enddo + enddo + enddo + endif +! +! Calculation of the all-UTC MSD +! + if(ndays(nhour+1).gt.1)then + do i=1,nquant+1 + do j=hour1,hour2,hour_step + frequency_all_UTC(i,nhour+1)=& + frequency_all_UTC(i,nhour+1)+frequency_all_UTC(i,j+1)/nhour_used + enddo + msd(r,nhour+1)=msd(r,nhour+1)+((frequency_all_UTC(i,nhour+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + else + msd(r,nhour+1)=miss + endif + +!********************************************************************* + enddo ! end of the bootstrap (r) loop +!********************************************************************* + +!-------------------------------------------------------------------------------------------------- +! +! Ordering of the MSD:s in ascending order? +! Again, first the individual UTC hours and then the all-UTC MSD:s + + if(lorder)then + do j=hour1,hour2,hour_step + do r=1,nreal + msd1(r)=msd(r,j+1) + enddo + call order(msd1,msd(1,j+1),nreal) + enddo + do r=1,nreal + msd1(r)=msd(r,nhour+1) + enddo + call order(msd1,msd(1,nhour+1),nreal) + endif +!------------------------------------------------------------------------------------------------ +! +! Open the output file and write its first line. +! + open(unit=2,form='formatted',status='unknown',file=outfile) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real realization@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real realization@hdr:integer' + endif + do j=hour1,hour2,hour_step + write(hh,'(i2.2)')j + headerline=trim(headerline)//' msd'//hh//'@body:real' + enddo + headerline=trim(headerline)//' msd24'//'@body:real' + write(2,*)trim(headerline) + ! + ! Write the MSD values to the output file: first for each + ! individual UTC hour, then the overall statistics + ! + do r=1,nreal + dataline=emptyline + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,I6)')station_char,longitude,latitude,r + else + write(dataline,'(I6,2F16.6,I6)')station_int,longitude,latitude,r + endif + do j=hour1,hour2,hour_step + write(msd_value,'(F16.6)')msd(r,j+1) + dataline=trim(dataline)//msd_value + enddo + write(msd_value,'(F16.6)')msd(r,nhour+1) + dataline=trim(dataline)//msd_value + write(2,*)trim(dataline) + if(r.eq.1.or.r.eq.500.or.r.eq.1000)write(*,*)r,trim(dataline) + enddo + + close(2) +! ----------------------------------------------------------------- +! +! Output also as GrADS binaries, for visualisation in GrADS? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads(1:loutfile_grads),access='DIRECT',& + recl=4,status='unknown') + do hour=hour1,hour2,hour_step + j=1+(hour-hour1)/hour_step + do i=1,nreal + write(11,rec=(j-1)*nreal+i)msd(i,hour+1) + enddo + enddo + do i=1,nreal + write(11,rec=j*nreal+i)msd(i,nhour+1) + enddo + close(11) + endif + + END program rank_histograms_bootstrap +! +!-------------------------------------------------------------------------------------------- +! + subroutine find_rank & + (f,quantiles,nquant,rank1,miss) +! +! Find the rank of a data value within the quantile table (quantiles). +! +! f = individual data value (IN) +! quantiles = quantile values (IN) +! nquant = number of quantile values (IN) +! rank1 = rank of the data value (OUT) +! miss = missing value code (IN/OUT) + + implicit none + integer :: nquant + real :: quantiles(nquant) + integer :: rank1 + real :: f + integer :: i1,i2,i,ind + real :: miss + if(abs(quantiles(1)).ge.abs(miss))then + write(*,*)'Quantiles missing!?' ! This should never happen. + rank1=miss + else + if(f.lt.quantiles(1))then + rank1=1 + else + if(f.ge.quantiles(nquant))then + rank1=nquant+1 + else + i1=1 + i2=nquant + do while (i2.gt.i1+1) + i=(i1+i2)/2 + if(f.ge.quantiles(i))then + i1=(i1+i2)/2 + else + i2=(i1+i2)/2 + endif + enddo + rank1=i1+1 + endif + endif + endif + + return + end subroutine find_rank +! +!-------------------------------------------------------------------------------------------- +! + subroutine order(f,g,n) +! +! Ordering the values of f(1...n) in ascending order. Result in g +! Simple exchange ordering (inefficient for large n!) +! + implicit none + integer :: i,j,n + real :: f(n),g(n),g1 + g=f + do i=1,n-1 + do j=i+1,n + if(g(j).lt.g(i))then + g1=g(i) + g(i)=g(j) + g(j)=g1 + endif + enddo + enddo + + return + end subroutine order +! +!----------------------------------------------------------------------- +! +subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) +! +! Rounding the hour to the nearest hour within (hour1, hour1 + hour_step, ... hour2) +! +implicit none +integer :: yyear,mmonth,dday,hhour ! year, month, day and hour before rounding (IN) +integer :: year,month,day,hour ! year, month, day and hour after rounding (OUT) +integer :: hour1,hour2,hour_step ! target hours: hour1, hour1 + hour_step ... hour2 +integer :: ndays_month(12) ! number of days per month +integer :: hour_index +ndays_month=31 +ndays_month(4)=30 +ndays_month(6)=30 +ndays_month(9)=30 +ndays_month(11)=30 +ndays_month(2)=28 +if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then + ndays_month(2)=29 +endif +year=yyear +month=mmonth +day=dday +! +! round the hour to the nearest output hour +! +hour_index=nint((hhour-hour1+0.)/hour_step) +hour=hour1+hour_step*hour_index + if(hour.ge.24)then ! hhour was rounded forward to next day + hour=hour1 + day=dday+1 + if(day.gt.ndays_month(month))then + day=1 + month=month+1 + if(month.gt.12)then + month=1 + year=yyear+1 + endif + endif + endif + if(hour.lt.0)then ! Hhour was rounded backward to previous day + hour=hour2 + day=dday-1 + if(day.eq.0)then + month=month-1 + if(month.eq.0)then + month=12 + year=yyear-1 + endif + day=ndays_month(month) + endif + endif + +return +end subroutine round_hour + + + + diff --git a/SYNOP/STATS/fortran-programs/rank_histograms_one_station.f95 b/SYNOP/STATS/fortran-programs/rank_histograms_one_station.f95 new file mode 100644 index 0000000000000000000000000000000000000000..2540ae9b0a0018c1acfb1c82f815b5a670be2f41 --- /dev/null +++ b/SYNOP/STATS/fortran-programs/rank_histograms_one_station.f95 @@ -0,0 +1,552 @@ + program rank_histograms_one_station +! +! Produce the rank histogram (i.e., the frequencies at which the simulated or +! observed data at one station fall between different quantiles of a pre-computed +! quantile distribution (below 1%, within 1-2%, ... above 99%) +! +! Jouni Räisänen, July 2015 +! +!---------------------------------------------------------------------- +! Input files (text format) +!---------------------------------------------------------------------- +! +! INFILE: simulated or observed time series at the station location. +! The first line is a header. The other lines include: +! +! 1) year +! 2) month +! 3) day +! 4) hour +! 5) data value +! +! QUANTILE_FILE: quantile values, separately for each day of the year! +! The first line is a header. The other lines include: +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) month +! 5) day +! 6) hour +! 7-105) The 99 quantiles from 1 % to 99 % +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +! MSD_BOOTSTRAP_FILE: MSD (mean squared deviation) distribution of quantile frequencies +! from bootstrap tests. +! +! The first 1st line is a header. The other lines include +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) number of realization +! 5 to N-1) MSD values for HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! N) MSD calculated from frequencies averaged over all the UTC hours within +! HOUR1, HOUR1 + HOUR_STEP ... HOUR2 +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer +! +!---------------------------------------------------------------------- +! Output files: +!---------------------------------------------------------------------- +! +! OUTFILE: frequencies of quantiles. +! The first 1st line is a header. The other lines include +! +! 1) station code +! 2) longitude +! 3) latitude +! 4) hour +! 5) number of obervations for this (UTC) hour +! 6) MSD for the quantile frequencies +! 7) p-value of MSD (=fraction of bootstrap realizations with larger or equal MSD) +! 8-107) quantile frequencies (on scale 0-1) from 0-1 % ... 99-100% +! +! if (l_code_in_char) is .true., the station code is assumed to be a 3-char string. +! Otherwise, it is assumed to be an integer. +! +! UTC hours included: HOUR1, HOUR1 + HOUR_STEP ... HOUR2 + all-UTC-statistics coded with HOUR=24 +! +! OUTFILE_GRADS: frequencies of quantiles in GrADS binary output (if grads_out=.true.) +! +! The resulting GraDS binary file includes 7 variables (1 level for 1-6, 100 levels for 7): +! +! 1) longitude +! 2) latitude +! 3) hour +! 4) number of obervations for this (UTC) hour +! 5) MSD for the quantile frequencies +! 6) p-value of MSD (=fraction of bootstrap realizations with larger or equal MSD) +! 7) quantile frequencies (on scale 0-1) from 0-1 % ... 99-100% +! +!--------------------------------------------------------------- +! Namelist parameters +!--------------------------------------------------------------- +! +! NAMELIST/param/infile,outfile,quantile_file,msd_bootstrap_file,& +! hour1,hour2,hour_step,year1,year2,month1,month2,grads_out,outfile_grads,& +! l_code_in_char,l_round_hour,miss +! +! infile : file name for simulated or observed time series (see above) +! quantile_file : file name for pre-computed quantiles (see above) +! msd_bootstrap_file : file name for pre-computed bootstrap statistics (see above) +! outfile : name of output file (see above) +! outfile_grads : output file in GrADS binary format (see above) +! grads_out : if .true., outfile_grads is written +! l_code_in_char: if .true., 3-character station codes assumed (else integer) +! +! hour1 = first UTC hour analysed +! hour2 = last UTC hour analysed +! hour_step = step of analyzed UTC hours +! l_round_hour : if .true., hours are rounded to the nearest analysis hour. +! if .false., data for non-matching hours is ignored +! +! year1,month1 = beginning of the analysis period +! year2,month2 = end of the analysis period +! +! miss = missing value code. All input data f with abs(f) >= abs(miss) is treated as missing +! +!--------------------------------------------------------------- +! + IMPLICIT NONE + INTEGER :: I,J,K,Q + INTEGER,PARAMETER :: NMON=12, NDAY=31, NHOUR=24, NQUANT=99 + REAL :: quantiles(nquant,nmon,nday,nhour) ! array for quantile values + REAL :: q1(nquant) ! quantiles for one day + hour + REAL :: frequency(nquant+1,nhour+1) ! frequency of quantiles. NHOUR+1 for the all-UTC frequencies + REAL :: expected_freq ! expected frequency of quantiles (1/100) + REAL :: MSD(nhour+1) ! MSD statistics per UTC hour + REAL :: p_value(nhour+1) ! p values per UTC hour + REAL :: msd_bootstrap(nhour+1) ! msd values for one bootstrap realization + INTEGER :: n_bootstrap ! total number of bootstrap realizations + INTEGER :: realization ! number of bootstrap realization read from msd_boostrap_file + REAL :: value1 ! individual data values read from the input file + INTEGER :: ntimes_hour(nhour) ! total number of times with data for an UTC hour + INTEGER :: ntimes_hour_tot ! total number of times with data for all UTC hours + CHARACTER*160 :: infile,outfile,quantile_file,msd_bootstrap_file + CHARACTER*160 :: outfile_grads + INTEGER,PARAMETER :: L_dataline=1700 ! maximum length of lines in text files + character*1700 :: dataline,headerline,emptyline ! character strings for writing of output files + character*2 :: number_of_bin ! two-digit code for quantiles for outfile header + character*16 :: frequency_value ! frequency written in F16.6 format +! + INTEGER :: yyear,mmonth,dday,hhour ! time as read from the input file + INTEGER :: year,month,day,hour ! time after eventual rounding of hours + + INTEGER :: year1,year2,month1,month2 ! period used in calculations (see above) + INTEGER :: hour1,hour2,hour_step ! hours used in calculations (see above) + LOGICAL :: HOUR_USED(nhour) ! true if the UTC hour is used + INTEGER :: nhour_used ! number of UTC hours used + INTEGER :: station_int ! station code integer (for synop stations amd RHARM soundings) + CHARACTER*3 :: station_char ! station with characters (for GRUAN soundings) + LOGICAL :: l_code_in_char ! .true. for station code in characters + REAL :: longitude,latitude ! station longitude and latitude + + LOGICAL :: GRADS_OUT ! Output also as GrADS binaries, just for testing + INTEGER :: IREC ! record number for GrADS output + + LOGICAL :: L_ROUND_HOUR ! if .true., hour rounded to the nearest output hour +! + REAL :: MISS ! missing value code +! +!----------------------------------------------------------------------- +! + NAMELIST/param/infile,outfile,quantile_file,msd_bootstrap_file,& + hour1,hour2,hour_step,year1,year2,month1,month2,grads_out,outfile_grads,& + l_code_in_char,l_round_hour,miss + + MISS=-9.99e6 + READ(*,NML=PARAM) +! +! Which UTC hours are used and what is their total number? +! + hour_used=.false. + nhour_used=0. + do i=hour1,hour2,hour_step + hour_used(i+1)=.true. + nhour_used=nhour_used+1 + enddo +! +! Empty data line +! + do i=1,L_dataline + emptyline(i:i)=' ' + enddo +!---------------------------------------------------------------------- +! +! Read the contents of the quantile_file to array 'quantiles' +! + quantiles=miss + open(unit=1,form='formatted',status='old',file=quantile_file) + + do while(.true.) + if(l_code_in_char)then + ! The err=1 specifier ensures that header lines are skipped +1 read(1,*,err=1,end=3)& + station_char,longitude,latitude,month,day,hour,(q1(i),i=1,nquant) + else +2 read(1,*,err=2,end=3)& + station_int,longitude,latitude,month,day,hour,(q1(i),i=1,nquant) + endif + do i=1,nquant + quantiles(i,month,day,hour+1)=q1(i) + enddo +! copy quantiles from 28 February to 29 February + if(month.eq.2.and.day.eq.28)then + do i=1,nquant + quantiles(i,month,day+1,hour+1)=q1(i) + enddo + endif + enddo +3 continue + close(1) +! +!---------------------------------------------------------------------- +! +! Read the station observations and count the absolute quantile bin frequencies +! + frequency=0 + ntimes_hour=0 + ntimes_hour_tot=0 + + open(unit=1,form='formatted',status='old',file=infile) + + do while(.true.) +11 read(1,*,err=11,end=12) yyear,mmonth,dday,hhour,value1 +! +! Rounding of hours? +! + if(l_round_hour)then + call round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) + else + year=yyear + month=mmonth + day=dday + hour=hhour + endif +! +! If the hour is used and the date is within th analyzed period, +! update the quantile bin frequencies + + if(hour_used(hour+1))then + if((year.gt.year1.or.(year.eq.year1.and.month.ge.month1)).and. & + (year.lt.year2.or.(year.eq.year2.and.month.le.month2)).and. & + abs(value1).lt.abs(miss))then + ntimes_hour(hour+1)=ntimes_hour(hour+1)+1 + ntimes_hour_tot=ntimes_hour_tot+1 + call update_frequencies & + (frequency(1,hour+1),value1,quantiles(1,month,day,hour+1),nquant,miss) + endif + endif + + enddo +12 continue + close(1) +! +!---------------------------------------------------------------------- +! +! Convert absolute frequencies to relative frequencies +! + do hour=hour1,hour2,hour_step + do i=1,nquant+1 + if(ntimes_hour(hour+1).gt.0)then + frequency(i,hour+1)=frequency(i,hour+1)/ntimes_hour(hour+1) + else + frequency(i,hour+1)=miss + endif + enddo + enddo +! +!-------------------------------------------------------------------------------- +! +! Calculation of MSD. Because it can be assumed that model data are always available, +! all UTC hours get the same weight in the calculation of the all-UTC MSD. +! + expected_freq=1./(nquant+1.) + do j=hour1,hour2,hour_step + if(frequency(1,j+1).eq.miss)then + msd(j+1)=miss + else + do i=1,nquant+1 + msd(j+1)=msd(j+1)+((frequency(i,j+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + endif + enddo + do i=1,nquant+1 + do j=hour1,hour2,hour_step + frequency(i,nhour+1)=& + frequency(i,nhour+1)+frequency(i,j+1)/nhour_used + enddo + msd(nhour+1)=msd(nhour+1)+((frequency(i,nhour+1)/expected_freq-1.)**2.)/(nquant+1.) + enddo + do j=hour1,hour2,hour_step + if(frequency(1,j+1).eq.miss)then + do i=1,nquant+1 + frequency(i,nhour+1)=miss + enddo + msd(nhour+1)=miss + endif + enddo + +!---------------------------------------------------------------------- +! +! Read the bootstrap MSD:s and calculate the fractions of them that exceed the actual value +! + n_bootstrap=0 + p_value=0. + + open(unit=1,form='formatted',status='old',file=msd_bootstrap_file) + do while(.true.) + if(l_code_in_char)then +21 read(1,*,err=21,end=23)station_char,longitude,latitude,realization,& + (msd_bootstrap(j+1),j=hour1,hour2,hour_step),msd_bootstrap(nhour+1) + else +22 read(1,*,err=22,end=23)station_int,longitude,latitude,realization,& + (msd_bootstrap(j+1),j=hour1,hour2,hour_step),msd_bootstrap(nhour+1) + endif +! +! Update the p-value counters +! + n_bootstrap=n_bootstrap+1 + do hour=hour1,hour2,hour_step + if(msd_bootstrap(hour+1).ge.msd(hour+1))then + p_value(hour+1)=p_value(hour+1)+1. + endif + enddo + if(msd_bootstrap(nhour+1).ge.msd(nhour+1))then + p_value(nhour+1)=p_value(nhour+1)+1. + endif + enddo +23 continue +! +! Convert the p_values from absolute counts to relative frequencies +! + do hour=hour1,hour2,hour_step + p_value(hour+1)=p_value(hour+1)/n_bootstrap + enddo + p_value(nhour+1)=p_value(nhour+1)/n_bootstrap +! +!-------------------------------------------------------------------------- +! +! Open the output file and write its header line in ODB compatible text format +! + open(unit=2,form='formatted',status='unknown',file=outfile) + headerline=emptyline + if(l_code_in_char)then + headerline='station@hdr:string longitude@hdr:real latitude@hdr:real hour@hdr:integer' + else + headerline='station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer' + endif + headerline=trim(headerline)//' total_number@body:real' + headerline=trim(headerline)//' msd@body:real' + headerline=trim(headerline)//' p_value@body:real' +! +! quantile bin frequency variable names: f00, f01 ... f99 +! + do j=0,nquant + write(number_of_bin,'(i2.2)')j + headerline=trim(headerline)//' f'//number_of_bin//'@body:real' + enddo + write(2,*)trim(headerline) +! +! Write the data lines for each UTC hour used +! + do hour=hour1,hour2,hour_step + j=hour+1 + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,I7,2F16.6)')& + station_char,longitude,latitude,hour,ntimes_hour(j),msd(j),p_value(j) + else + write(dataline,'(I6,2F16.6,1X,I2,I7,2F16.6)')& + station_int,longitude,latitude,hour,ntimes_hour(j),msd(j),p_value(j) + endif + + do i=1,nquant+1 + write(frequency_value,'(F16.6)')frequency(i,j) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + enddo +! +! Write the data line for the all-UTC statistics +! + if(l_code_in_char)then + write(dataline,'(A3,2F16.6,1X,I2,I7,2F16.6)')& + station_char,longitude,latitude,24,ntimes_hour_tot,msd(nhour+1),p_value(nhour+1) + else + write(dataline,'(I6,2F16.6,1X,I2,I7,2F16.6)')& + station_int,longitude,latitude,24,ntimes_hour_tot,msd(nhour+1),p_value(nhour+1) + endif + do i=1,nquant+1 + write(frequency_value,'(F16.6)')frequency(i,nhour+1) + dataline=trim(dataline)//frequency_value + enddo + write(2,*)trim(dataline) + + close(2) +! +!--------------------------------------------------------------------------------- +! +! Open the file for GrADS output and write its contents, for visualisation in GrADS? +! + if(grads_out)then + open(unit=11,form='unformatted',file=outfile_grads,access='DIRECT',& + recl=4,status='unknown') +! +! Again, first each UTC hour separately +! + do hour=hour1,hour2,hour_step + j=(hour-hour1)/hour_step + write(11,rec=1+j*(nquant+6))longitude + write(11,rec=2+j*(nquant+6))latitude + write(11,rec=3+j*(nquant+6))real(ntimes_hour(hour+1)) + write(11,rec=4+j*(nquant+6))msd(hour+1) + write(11,rec=5+j*(nquant+6))p_value(hour+1) + do i=1,nquant+1 + write(11,rec=5+j*(nquant+6)+i)frequency(i,hour+1) + enddo + enddo + ! + ! The all-UTC statistics + ! + write(11,rec=1+(j+1)*(nquant+6))longitude + write(11,rec=2+(j+1)*(nquant+6))latitude + write(11,rec=3+(j+1)*(nquant+6))real(ntimes_hour_tot) + write(11,rec=4+(j+1)*(nquant+6))msd(nhour+1) + write(11,rec=5+(j+1)*(nquant+6))p_value(nhour+1) + do i=1,nquant+1 + write(11,rec=5+(j+1)*(nquant+6)+i)frequency(i,nhour+1) + enddo + + close(11) + + endif ! if (grads_out) + + END program rank_histograms_one_station +! +!---------------------------------------------------------------- +! + subroutine update_frequencies & + (frequency,f,quantiles,nquant,miss) +! +! Find the location of the data value (f) within the +! quantile value table (quantiles, ascending order) +! and update the table of absolute quantile bin +! frequencies (frequency) +! +! frequency (in/out) = quantile bin frequencies +! f (in) = individual data value +! quantiles (in) = quantile values +! nquant (in) = number of quantiles +! miss (in) = missing value code + + implicit none + integer :: nquant + real :: quantiles(nquant) + real :: frequency(nquant+1) + real :: f + integer :: i1,i2,i,ind + real :: miss +! +! Quantiles should never be missing, but if they are, +! this should concern all quantiles simultaneously. +! Therefore, check just the 1st quantile + + if(abs(quantiles(1)).ge.abs(miss))then + write(*,*)'Quantiles missing!?' + stop + endif +! +! Find the position of f (ind) in the quantile table +! + if(f.lt.quantiles(1))then + ind=1 + else + if(f.ge.quantiles(nquant))then + ind=nquant+1 + else + i1=1 + i2=nquant + do while (i2.gt.i1+1) + i=(i1+i2)/2 + if(f.ge.quantiles(i))then + i1=(i1+i2)/2 + else + i2=(i1+i2)/2 + endif + enddo + ind=i1+1 + endif + endif +! +! Update the frequency table +! + frequency(ind)=frequency(ind)+1. + + return + end subroutine update_frequencies + +!---------------------------------------------------------------------- + +subroutine round_hour(yyear,mmonth,dday,hhour,year,month,day,hour,& + hour1,hour2,hour_step) +! +! Rounding the hour to the nearest hour within (hour1, hour1 + hour_step, ... hour2) +! +implicit none +integer :: yyear,mmonth,dday,hhour ! year, month, day and hour before rounding (IN) +integer :: year,month,day,hour ! year, month, day and hour after rounding (OUT) +integer :: hour1,hour2,hour_step ! target hours: hour1, hour1 + hour_step ... hour2 +integer :: ndays_month(12) ! number of days per month +integer :: hour_index +ndays_month=31 +ndays_month(4)=30 +ndays_month(6)=30 +ndays_month(9)=30 +ndays_month(11)=30 +ndays_month(2)=28 +if((mod(yyear,4).eq.0.and.mod(yyear,100).gt.0).or.(mod(yyear,400).eq.0))then + ndays_month(2)=29 +endif +year=yyear +month=mmonth +day=dday +! +! round the hour to the nearest output hour +! +hour_index=nint((hhour-hour1+0.)/hour_step) +hour=hour1+hour_step*hour_index + if(hour.ge.24)then ! hhour was rounded forward to next day + hour=hour1 + day=dday+1 + if(day.gt.ndays_month(month))then + day=1 + month=month+1 + if(month.gt.12)then + month=1 + year=yyear+1 + endif + endif + endif + if(hour.lt.0)then ! Hhour was rounded backward to previous day + hour=hour2 + day=dday-1 + if(day.eq.0)then + month=month-1 + if(month.eq.0)then + month=12 + year=yyear-1 + endif + day=ndays_month(month) + endif + endif + +return +end subroutine round_hour + + + diff --git a/SYNOP/STATS/list_of_stations_39.txt b/SYNOP/STATS/list_of_stations_39.txt new file mode 100644 index 0000000000000000000000000000000000000000..603bc1a79bc5b7386f84a9081dde2c0fcffefae3 --- /dev/null +++ b/SYNOP/STATS/list_of_stations_39.txt @@ -0,0 +1,4 @@ + station@hdr longitude@hdr latitude@hdr + 102019 23.576000 68.602997 + 102017 20.850901 69.042801 + 101695 25.423300 64.142601 diff --git a/SYNOP/STATS/orig_list_of_stations_39.txt b/SYNOP/STATS/orig_list_of_stations_39.txt new file mode 100644 index 0000000000000000000000000000000000000000..7313d190e7cbfb7761eb489b5a75fd794f0b376a --- /dev/null +++ b/SYNOP/STATS/orig_list_of_stations_39.txt @@ -0,0 +1,90 @@ + station@hdr longitude@hdr latitude@hdr + 102019 23.576000 68.602997 + 102017 20.850901 69.042801 + 101695 25.423300 64.142601 + 101776 24.727600 65.019699 + 101784 24.561199 65.039703 + 101150 24.491600 60.999199 + 100919 19.131399 60.300999 + 101196 26.049700 61.200100 + 100968 24.956800 60.326698 + 101130 24.802999 60.595901 + 101651 30.974701 62.769402 + 101649 31.044500 63.141701 + 102026 25.736500 68.903000 + 102033 27.419001 68.613297 + 102047 27.265699 69.140900 + 102055 28.894100 69.582497 + 102052 28.299101 68.849297 + 102009 28.301201 68.477997 + 102006 27.444099 68.433899 + 102005 27.413300 68.415100 + 101315 24.804600 61.855499 + 101608 29.636101 62.657001 + 101104 23.498199 60.813999 + 100917 19.986900 60.178200 + 101609 29.233999 63.232399 + 101317 24.287001 61.845901 + 101339 25.670900 62.397598 + 100934 22.551800 60.386902 + 100971 24.944599 60.175201 + 101503 23.043600 63.120300 + 101783 24.095699 65.385101 + 101950 27.159100 66.715599 + 101840 24.582800 65.789398 + 102016 20.813801 69.039001 + 101986 24.855400 67.691704 + 101994 25.783199 68.169601 + 101194 26.811100 60.696499 + 101662 23.143600 63.726601 + 101773 30.170601 64.297600 + 101586 27.633301 62.892601 + 101887 29.311899 66.370003 + 101897 29.151800 66.166100 + 101247 28.208700 61.057499 + 101636 30.045799 63.321098 + 100974 24.049500 60.244499 + 101982 24.033300 68.062798 + 101149 24.653700 60.508701 + 101799 25.372999 64.936996 + 101794 25.393200 65.006401 + 100908 21.374800 59.779099 + 101914 23.962999 66.772797 + 101064 21.783199 61.478901 + 101028 25.607300 60.391701 + 101805 26.963100 65.400101 + 101441 29.315100 61.801899 + 101831 28.056700 64.664001 + 101775 23.896700 64.611801 + 101873 26.367599 65.977203 + 101933 26.010900 66.579399 + 101920 25.838100 66.562401 + 101928 25.708799 66.498299 + 101959 28.688299 66.816597 + 101966 29.177700 67.162300 + 102012 29.611300 67.748596 + 100955 23.112900 60.373901 + 101952 28.178600 67.282997 + 101932 26.629000 67.366600 + 102001 27.184999 68.084198 + 101826 28.746700 64.931297 + 101885 28.217699 65.574898 + 101124 23.746799 61.465599 + 100949 22.178699 60.454399 + 102035 27.006800 69.756401 + 102036 27.896500 70.082001 + 101800 26.423901 64.501404 + 101464 21.068600 63.435101 + 101421 27.907900 62.322201 + 101726 27.220600 63.840900 + 101537 25.858601 63.082199 + 101231 27.672701 60.527199 + 101908 24.649599 66.529503 + 101690 24.724701 64.050301 + 101486 22.488800 62.938099 + 101958 27.218399 67.022003 + 101103 22.346100 61.252899 + 101485 21.639400 63.098701 + 126736 27.751900 64.216797 + 101705 25.705700 63.735901 + 101690 24.724700 64.050300 diff --git a/SYNOP/STATS/produce_rank_histograms_all_stations.sh b/SYNOP/STATS/produce_rank_histograms_all_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..214fe17334fb01c9ab3fb529acc481a1964839c5 --- /dev/null +++ b/SYNOP/STATS/produce_rank_histograms_all_stations.sh @@ -0,0 +1,286 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +################################################################## +# +# Calculation of quantile space rank histograms and their +# summary statistics (MSD + p-value) for all stations in the +# FMI open data sample +# +# The following need to be given as arguments: +# +# - variable +# - first year of raw data +# - last year of raw data +# - first month of raw data (default 1) +# - last month of raw data (default 12) +# +# As the input for this script, the following files are needed: +# +# 1) List of stations +# 2) Raw simulation data at station coordinates +# (but here, the station observations are used as a surrogate of this file) +# 3) File including quantiles as a function of time of year +# 4) File including the rank histogram bootstrap MSD values +# +# Important notes: +# +# 1) It is assumed that all the files are in ODB format. However, +# their relevant parts are converted to text format for processing. +# 2) The rank histogram bootstrap MSD values must be available +# for the same length of simulation as given as arguments to this script. +# +# Execution (e.g): ./produce_rank_histograms_all_stations 39 2020 2020 1 12 +# +# ORIGINAL: +# Jouni Räisänen, Aug 2023 +# MODIFIED: +# Alexander Mahura, Sep-Oct-Nov 2023 +# +################################################################## +# +# Arguments: +# +# 1. Variable code +# +variable=$1 +echo " Meteorological variable code = $1" +# +# The following variables are included: +# +# 91 'total amount of clouds' +# 108 'sea level pressure' +# 80 '1-hour precipitation' +# 58 'relative humidity' +# 999 '10-minute precipitation intensity' +# 71 'snow depth' +# 39 '2m temperature' +# 40 'dew point temperature' +# 62 'visibility' +# 111 'wind direction' +# 261 'maximum wind gust in last 10 minutes' +# 221 'surface wind speed' +# +# 2.-3: First and last year +# +year1=$2 +year2=$3 +let nyears=year2-year1+1 +# +# 4.-5: First and last month +# +month1="${4:-1}" +month2="${5:-12}" +# +# Find the length of the simulation period. +# It this is 12 months or more, it is rounded to the nearest integer +# number of years. Otherwise, the number of months is recorded. +# +let nmonths=12*nyears+month2-month1-11 +echo ' Number of months' $nmonths +# +n=$( echo "$nmonths / 12" | bc -l ) +nyears_round=$( echo "($n + 0.5) / 1" | bc ) +echo ' Number of years rounded' ${nyears_round} +# +if [ $nmonths -ge 12 ] +then + period=${nyears_round}yr +else + period=${nmonths}mon +fi +echo $period +# +# Add zero to ahead of $month1 and $month2 for naming of files if needed? +# +if [ $month1 -le 9 ] +then + month1='0'${month1} +fi +if [ $month2 -le 9 ] +then + month2='0'${month2} +fi +# +################################################################## +# +# Add odb_api to $PATH +# +# On Puhti +#PATH="${PATH}":/projappl/project_2001011/jouni/to_alexander_220823/odb_api/bin +# +# On Lumi +#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# +################################################################## +# +# ----- File names. Hard-coded, at least this far ----- +# +# NB: it would be more efficient to have ODB files for individual +# stations instead for the ones that include everything for all stations! +# +# 1) Raw data from model simulation (this far, mimicked by observations) +echo " Raw data from model simulation (this far, mimicked by observations) ..." +sim_dir="/scratch/project_465000454/ama/open_data_ODB" +sim_file=${sim_dir}/FMI_open_data_2010-2021.odb +echo " $sim_file" + +# 2) Pre-computed quantiles +echo " Directory with pre-computed quantiles ..." +#quant_dir=quantiles +quant_dir=/scratch/project_465000454/ama/STATDATASYNOP/quantiles +echo " $quant_dir" +quant_file=${quant_dir}/quant_all_stations_${variable}_4.odb + +# 3) Pre-computed bootstrap MSD statistics +echo " Directory with pre-computed bootstrap MSD statistics ..." +#bootstrap_dir=bootstrap_statistics/${variable}_${period} +bootstrap_dir=/scratch/project_465000454/ama/STATDATASYNOP/bootstrap_statistics/${variable}_${period} +echo " $bootstrap_dir" +msd_bootstrap_file=${bootstrap_dir}/MSD_bootstrap_${variable}_${period}_all_stations.odb + +# 4) Directory for results +echo " Directory for results ..." +outdir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +echo " $outdir" +if test -d "$outdir"; then + echo $outdir exists +else + echo $outdir does not exist + echo " Creating directory: $outdir" + mkdir -p $outdir +fi +# +################################################################## +# +# It is assumed that the list of stations has been pre-produced +# and is available as a .txt file. +# +station_list=list_of_stations_${variable}.txt +echo " List of synop stations (with geographical coordinates): $station_list" +# +# +################################################################ +# +# Compile the Fortran program that calculates the rank histograms +# +echo " Compiling fortran-program to calculate rank histograms ..." +echo " fortran-programs/rank_histograms_one_station.f95" + +gfortran fortran-programs/rank_histograms_one_station.f95 -o rank_histograms_one_station + +################################################################# +# +# Count the number of lines in the station list: +# +n_lines=$(cat ${station_list} | wc | awk NF=1) +# +# Skip the first line which contains no station_ID +# +line_number=2 +# +# Loop over all stations +# +while [ ${line_number} -le `expr $n_lines` ] +do + head -`expr ${line_number}` ${station_list} | tail -1 > input.txt + read station longitude latitude < input.txt +echo " **********" +echo " Synop station ID, longitude, latitude: ${station} ${longitude} ${latitude}" + +#echo ${station} +#echo ${longitude} +#echo ${latitude} +# +################################################################ +# Select the simulation data for the station (mimicked this far by observations!). +# When simulation data are available, a means to retrieve the correct data points from it must be designed +echo " Selecting the simulation data (mimicked this far by observations) for synop station: ${station}" +# +odb_command="odb sql 'select year,month,day,hour,value where station=${station} and (year>=${year1} and year<=${year2}) and (hour=0 or hour=6 or hour=12 or hour=18) and variable=${variable}' -i ${sim_file} -o sim_data" +eval ${odb_command} +# +############################################################### +# Select the quantiles for the station: +echo " Selecting the quantiles for synop station: ${station}" +# 00, 06, 12 and 18 UTC are picked separately, since +# select \* does not allow for parentheses (very strange) +# +rm quantile_selection_* +odb sql select \* where hour=0 and station=${station} -i ${quant_file} -o quantile_selection_00 +odb sql select \* where hour=6 and station=${station} -i ${quant_file} -o quantile_selection_06 +odb sql select \* where hour=12 and station=${station} -i ${quant_file} -o quantile_selection_12 +odb sql select \* where hour=18 and station=${station} -i ${quant_file} -o quantile_selection_18 +cat quantile_selection_* > quantile_selection +# +################################################################ +# +# Get the distribution of pre-computed Mean Square Deviations from +# the msd_bootstrap_file. Note that these values depend on the length +# of the time period (included in the name of msd_bootstrap_file) +# and (to a smaller extent) the selected station. +# +odb sql select \* where station=${station} -i ${msd_bootstrap_file} -o msd_bootstrap_selection +# +############################################################### +# Produce the rank histogram for one station. +# Include data for 00, 06, 12 and 18 UTC. +echo " Producing the rank histogram for one synop station: ${station}" +# +############################################################## +# +echo " Checking existence of file: rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} ..." +outfile=${outdir}/rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} + +echo " $outfile" +if test -f "$outfile"; then + echo $outfile exists +else + echo $outfile does not exist + echo " Creating file: $outfile" + touch $outfile +fi + + +# +./rank_histograms_one_station < ${outdir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations.odb +###################################################################### +# +# Delete the fortran executable +# +rm rank_histograms_one_station diff --git a/SYNOP/STATS/produce_standard_plots_all_stations.sh b/SYNOP/STATS/produce_standard_plots_all_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..6ceb2315a1be99e16129f782fcefed241d1f48dc --- /dev/null +++ b/SYNOP/STATS/produce_standard_plots_all_stations.sh @@ -0,0 +1,310 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +echo "Python version" +which python +########################################################################## +# +# Produce the following plots: +# +# (a) individual data values (from a model simulation or a real station) +# against the background of quantiles from the observed distribution +# for the same station +# (b) rank histogram for the same station +# +# for all stations in the FMI open data sample +# +# The following need to be given as arguments: +# +# - variable +# - first year of raw data +# - last year of raw data +# - first month of raw data (default 1) +# - last month of raw data (default 12) +# +# As the input for this script, the following files are needed: +# +# 1) List of stations +# 2) Raw simulation data at station coordinates +# (but here, the station observations are used as a surrogate of this file) +# 3) File including quantiles as a function of time of year +# 4) File including the rank histogram bootstrap MSD values +# +# Important note: +# +# 1) It is assumed that all the files are in ODB format. However, +# their relevant parts are converted to text format for processing. +# +# Execution (e.g): ./produce_standard_plot_all_stations 39 2020 2020 1 12 +# ORIGINAL: +# Jouni Räisänen, August 2023 +# MODIFIED: +# Alexander Mahura, Sep-Oct 2023 +# +################################################################## +# +# Arguments: +# +# 1. Variable code +# +variable=$1 +echo " Meteorological variable code = $1" + +# The following variables are included: +# +# 91 'total amount of clouds' +# 108 'sea level pressure' +# 80 '1-hour precipitation' +# 58 'relative humidity' +# 999 '10-minute precipitation intensity' +# 71 'snow depth' +# 39 '2m temperature' +# 40 'dew point temperature' +# 62 'visibility' +# 111 'wind direction' +# 261 'maximum wind gust in last 10 minutes' +# 221 'surface wind speed' +# +# 2.-3: First and last year +# +year1=$2 +year2=$3 +let nyears=year2-year1+1 +# +# 4.-5: First and last month +# +month1="${4:-1}" +month2="${5:-12}" +# +# Find the length of the simulation period. +# It this is 12 months or more, it is rounded to the nearest integer +# number of years. Otherwise, the number of months is recorded. +# +let nmonths=12*nyears+month2-month1-11 +echo 'Number of months' $nmonths +# +n=$( echo "$nmonths / 12" | bc -l ) +nyears_round=$( echo "($n + 0.5) / 1" | bc ) +echo 'Number of years rounded' ${nyears_round} +# +if [ $nmonths -ge 12 ] +then + period=${nyears_round}yr +else + period=${nmonths}mon +fi +echo $period +# +# Add zero to ahead of $month1 and $month2 for naming of files if needed? +# +if [ $month1 -le 9 ] +then + month1='0'${month1} +fi +if [ $month2 -le 9 ] +then + month2='0'${month2} +fi +# +################################################################## +# +# Add odb_api to $PATH +# On Puhti +#PATH="${PATH}":/projappl/project_2001011/jouni/to_alexander_220823/odb_api/bin +# On Lumi +#export PATH="/projappl/project_465000454/ama/software/odb_api/bin:$PATH" +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# +# Define the python environment +# On Puhti +#export PATH="/projappl/project_2001011/madeleine/python_envs/post-pp/bin:$PATH" +# On Lumi +# it has been activated in .bash_profile with --- module load python-climatedt/3.11.3-cpeCray-22.08 +# On Lumi +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# +################################################################## +# +# ----- File names. Hard-coded, at least this far ----- +# +# 1) Raw data from model simulation (this far, mimicked by observations) +echo " Raw data from model simulation (this far, mimicked by observations) ..." +sim_dir="/scratch/project_465000454/ama/open_data_ODB" +sim_file=${sim_dir}/FMI_open_data_2010-2021.odb +echo " $sim_file" + +# 2) Pre-computed quantiles +echo " Directory for pre-computed quantiles ..." +#quant_dir=quantiles +quant_dir=/scratch/project_465000454/ama/STATDATASYNOP/quantiles +echo " $quant_dir" +quant_file=${quant_dir}/quant_all_stations_${variable}_4.odb + +# 3) Pre-computed rank histogram data +echo " Directory for pre-computed rank histogram data ..." +rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +echo " $rh_dir" +rh_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations.odb + +# 4) Directory for figures +echo " Directory for figures ..." +figure_dir=figures/standard_plots_${variable}_${year1}${month1}-${year2}${month2} +echo " $figure_dir" +if test -d "$figure_dir"; then + echo $figure_dir exists +else + echo $figure_dir does not exist + echo " Creating directory: $figure_dir" + mkdir -p $figure_dir +fi + +# +################################################################## +# +# It is assumed that the list of stations has been pre-produced +# and is available as a .txt file. +# +station_list=list_of_stations_${variable}.txt +echo " List of synop stations (with geographical coordinates): $station_list" +# +# +################################################################ +# +echo " Compiling the Fortran program needed for creating the figures ..." +echo " fortran-programs/plots_for_one_station.f95" + +gfortran fortran-programs/plots_for_one_station.f95 -o plots_for_one_station + +################################################################# +# +# Count the number of lines in the station list +# +n_lines=$(cat ${station_list} | wc | awk NF=1) +# +# Skip the first line which contains no station_ID +# +line_number=2 +# +# Loop over all stations +# +while [ ${line_number} -le `expr $n_lines` ] +do + head -`expr ${line_number}` ${station_list} | tail -1 > input.txt + read station longitude latitude < input.txt +echo " **********" +echo " Synop station ID, longitude, latitude: ${station} ${longitude} ${latitude}" +# +################################################################ +# Select the simulation data for the station (mimicked this far by observations!) +# When simulation data are available, a means to retrieve the correct data points from it must be designed. +echo " Selecting the simulation data (mimicked this far by observations) for synop station: ${station}" +# +odb_command="odb sql 'select year,month,day,hour,value where station=${station} and (year>=${year1} and year<=${year2}) and (hour=0 or hour=6 or hour=12 or hour=18) and variable=${variable}' -i ${sim_file} -o sim_data" +eval ${odb_command} + +################################################################ +# Select the rank histogram data for the station +echo " Selecting rank histogram data for synop station: ${station}" +# +odb sql select \* where station=${station} -i ${rh_file} -o rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} +# +############################################################### +# Select the quantiles for the station +echo " Selecting the quantiles for synop station: ${station}" +# +# 00, 06, 12 and 18 UTC are picked separately, since +# select \* does not allow for parentheses (very strange) +# +rm quantile_selection_* +odb sql select \* where hour=0 and station=${station} -i ${quant_file} -o quantile_selection_00 +odb sql select \* where hour=6 and station=${station} -i ${quant_file} -o quantile_selection_06 +odb sql select \* where hour=12 and station=${station} -i ${quant_file} -o quantile_selection_12 +odb sql select \* where hour=18 and station=${station} -i ${quant_file} -o quantile_selection_18 +cat quantile_selection_* > quantile_selection +# +############################################################## +# +# Produce the plots for one station +# 1) Run the fortran program that produces the required text files +# 2) Run the python script that produces the actual plot +# +# NB: the fortran program also produces some files for generation of the same plots +# in GrADS, but plotting in GrADS is omitted in this script. +# +############################################################### +./plots_for_one_station < standard_plot.cfg +[data] +variable=${variable} +station_id=${station} +longitude=${longitude} +latitude=${latitude} +year_beg=${year1} +year_end=${year2} +month_beg=${month1} +month_end=${month2} + +[input] +sim_data=time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt +quantile_sel=quantiles_${variable}_${station}.txt +rank_hists=rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt + +[output] +fig_name=${figure_dir}/standard_plot_${variable}_${station}_${year1}${month1}-${year2}${month2}_python.png +EOF + +################################################################ +# +echo " Calling python to plot quantiles rank histogram for synop station: ${station}" +python3 python/plot_quantiles_rankhist.py standard_plot.cfg + +################################################################ +# Remove unnecessary files +echo " Removing unnecessary temporary files ..." + +rm input.txt +rm vrange_* +rm msd_and_p-value +rm quantile_selection +rm rank_histograms_${variable}_${station}_${year1}${month1}-${year2}${month2} +rm sim_data +###rm msd_bootstrap_selection +###rm standard_plot_one_station +rm time_series_commands +rm time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.grads +rm rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.grads +rm standard_plot.cfg +rm time_series_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt +rm quantiles_${variable}_${station}.txt +rm rank_histogram_${variable}_${station}_${year1}${month1}-${year2}${month2}.txt +# +((line_number++)) +done + +################################################################ +# The Fortran executable is not needed any more: +# +rm plots_for_one_station +rm quantile_selection_* +rm msd_and_p-value_* +rm coordinates diff --git a/SYNOP/STATS/python/plot_p_values_map.py b/SYNOP/STATS/python/plot_p_values_map.py new file mode 100644 index 0000000000000000000000000000000000000000..7b6e3527bb32cf416e20330327ff36769ca5f2bb --- /dev/null +++ b/SYNOP/STATS/python/plot_p_values_map.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-04-06 + +Based on Jouni Räisänen's script for plotting p values on the map of Finland + +Plots p values on a map restricted to an area limited by lons (19, 32) and lats (59, 71) + +Example: +$ python3 plot_p_values_map.py p_values_as_text_file +$ python3 plot_p_values_map.py p-values + +p_values_as_text_file is a text file containing stationID, longitude, latitude, and p value with one header row +''' + +import numpy as np +import matplotlib.pyplot as plt +import sys +import cartopy.crs as ccrs +from matplotlib.lines import Line2D + +def read_data(p_values_file): + ''' + Description: Reads in file containing p values at different stations + Input: file containing p values with stationID (sid), longitude (lon), latitude (lat), and p value (p). Header = 1 row. + Output: structure numpy array with sid, lon, lat, p + ''' + + p_values_data = np.loadtxt(p_values_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'p'), 'formats':('i4', 'f4', 'f4', 'f4')}) + + return p_values_data + + +def plot_p_values(p_values): + ''' + Description: plot p values on a map restricted to an area limited by lons (19, 32) and lats (59, 71) + Input: numpy array + Output: saves a figure in the running directory + ''' + + lon_min=19 + lon_max=32 + lat_min=59 + lat_max=71 + + colors = ['darkviolet', 'blue', 'skyblue', 'lime', 'yellow', 'orange', 'red', 'grey'] + limits = [0,0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,1] + + fig = plt.figure(figsize=(12,8)) + ax = fig.add_subplot(1,1,1,projection=ccrs.PlateCarree()) + ax.coastlines() + ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree()) + ax.gridlines(draw_labels=True) + + # basic plot, not changing colors of p: + #ax.scatter(p_values['lon'], p_values['lat'], c=p_values['p']) + + #loop over limits: + legend_elements = [] + for n in range(8): + p_ind = ((p_values['p'] > limits[n]) & (p_values['p'] <= limits[n+1])) + ax.scatter(p_values['lon'][p_ind], p_values['lat'][p_ind], c=colors[n]) + legend_elements.append(Line2D([0], [0], marker='o', color='w',markerfacecolor=colors[n], label=str(limits[n])+'-'+str(limits[n+1]))) + + # legend table with customized values + + #print(legend_elements) + + ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.1, 0.5)) + ax.set_title('P-values for T2m quantiles in ' + timestring, fontsize=20) + + plt.savefig('p_values.png', dpi=300) +# plt.show() + + +def main(p_values_file): + + p_values_data = read_data(p_values_file) + + plot_p_values(p_values_data) + + +if __name__=='__main__': + p_values_file = sys.argv[1] + timestring = sys.argv[2] + + main(p_values_file) diff --git a/SYNOP/STATS/python/plot_quantiles_rankhist.py b/SYNOP/STATS/python/plot_quantiles_rankhist.py new file mode 100644 index 0000000000000000000000000000000000000000..b5fd9958ffa4fe85ca38d8a56ea5c8962e8ed7ef --- /dev/null +++ b/SYNOP/STATS/python/plot_quantiles_rankhist.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +''' +Written by Madeleine Ekblom, 2023-05-10 + +Based on Jouni Räisänen's script for plotting quantiles/time series data (00, 06, 12, 18 UTC) and rank histogram (24UTC=combination of 00,06,12,and 18 UTC data) + +Example: +$ python3 plot_quantiles_rankhist.py example.cfg + +example.cfg is a text file containing: +[data] +variable=39 +station_id=102019 +longitude=23.576000 +latitude=68.602997 +year_beg=2010 +year_end=2012 +month_beg=01 +month_end=12 + +[input] +sim_data=time_series_102019_2010-2012.txt +quantile_sel=quantiles_102019.txt +rank_hists=rank_histogram_102019_2010-2012.txt + +[output] +fig_name=standard_plot_102019_2010-2012_python.png + +''' + +import numpy as np +import matplotlib.pyplot as plt +import sys +import configparser +import datetime +from matplotlib.dates import drange, MonthLocator, DateFormatter + +config = configparser.ConfigParser() + +#plot_data(quantiles_data, rank_hist_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, True) +def plot_data(quantiles_data, rank_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, savefig=False): + ''' + quantiles data give the lines and the time series data give the dots + ''' + # set up date array consisting of all days in beg year: + days = np.arange(1,366) +# day1 = datetime.date(year_beg, 1, 1) +# day2 = datetime.date(year_beg+1, 1, 1) + day1 = datetime.date(2001, 1, 1) + day2 = datetime.date(2002, 1, 1) + delta = datetime.timedelta(days=1) + dates = drange(day1, day2, delta) + + # arrays for quantile names, axes titles, hours to plot + q_names = ['q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'] + sub_titles = ['00 UTC', '06 UTC', '12 UTC', '18 UTC'] + hours = [0, 6, 12, 18] + + # calculate number of years: + nyears = year_end - year_beg + 1 + + # set up figure + fig = plt.figure(figsize=(10,10), layout='constrained') + spec = fig.add_gridspec(3,2) + fig.suptitle(fig_title, fontsize=20) + + # counter + c = 0 + + # plot quantiles/time series for times 00, 06, 12, 18: + for i in range(2): + for j in range(2): + # set up axis: title, xaxis + ax = fig.add_subplot(spec[i,j]) + ax.set_title(sub_titles[c]) + ax.set_xlim(dates[0], dates[-1]) + ax.xaxis.set_major_locator(MonthLocator()) + ax.xaxis.set_major_formatter(DateFormatter('%b')) + + # quantile data: + # find quantile data hour = 0, 6, 12, 18 + qh_ind = (quantiles_data['hour'][:] == hours[c]) + for q in q_names: + q_data = quantiles_data[q][qh_ind] + ax.plot(dates, q_data[:365], 'k-') # --> change q only contains one year of data!! + # plot time series data: + # find time series where hour = 0, 6, 12, 18 + th_ind = (time_series_data['hour'][:] == hours[c]) + t_data = time_series_data['value'][th_ind] # all years + for n in range(nyears): + ax.scatter(dates, t_data[n*365:(n+1)*365], marker='.') + c = c + 1 + + # plot rank histogram data: + ax2 = fig.add_subplot(spec[2,:]) + ax2.set_title('Rank histogram\n' + 'MSD:' + str(rank_data[4,4]) + ' p-value:' + str(rank_data[4,5])) + ax2.bar(np.arange(100), rank_data[4,6:]*100) + ax2.set_xlim(0,100) + ax2.axhline(y=1) + + if savefig: + plt.savefig(fig_name, dpi=300) + + #plt.savefig(fig_name, dpi=300) + +# plt.show( ) + + +def read_quantiles_data(quantile_sel_file): + ''' + Reads quantile data from text file and returns a structured numpy array + Input: text file + Output:a structured numpy array: sid, lon, lat, day_of_year, hour, q01, q10, q25, q50, q75, q90, q99 + ''' + # Header line contains: + #station@hdr:integer longitude@hdr:real latitude@hdr:real day_of_year@hdr:integer hour@hdr:integer q01@body:real q10@body:real q25@body:real q50@body:real q75@body:real q90@body:real q99@body:real + + quantile_data = np.loadtxt(quantile_sel_file, skiprows=1, dtype={'names':('sid', 'lon', 'lat', 'day_of_year', 'hour', 'q01', 'q10', 'q25', 'q50', 'q75', 'q90', 'q99'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')}) + + return quantile_data + +def read_rank_data(rank_hists_file): + ''' + Reads rank histogram data binned into a 100 bins: 0-1%, 1-2%, ..., 99-100% + Input: text file + Output: numpy array (unstructured) first columns contain station id, longitude, latitude, hour, msd, p_value, and then follows the bins f00 = 0-1%, ..., f99=99-100% + ''' + + #Header line contains: + # station@hdr:integer longitude@hdr:real latitude@hdr:real hour@hdr:integer msd@body:real p_value@body:real f00@body:real f01@body:real f02@body:real f03@body:real f04@body:real f05@body:real f06@body:real f07@body:real f08@body:real f09@body:real f10@body:real f11@body:real f12@body:real f13@body:real f14@body:real f15@body:real f16@body:real f17@body:real f18@body:real f19@body:real f20@body:real f21@body:real f22@body:real f23@body:real f24@body:real f25@body:real f26@body:real f27@body:real f28@body:real f29@body:real f30@body:real f31@body:real f32@body:real f33@body:real f34@body:real f35@body:real f36@body:real f37@body:real f38@body:real f39@body:real f40@body:real f41@body:real f42@body:real f43@body:real f44@body:real f45@body:real f46@body:real f47@body:real f48@body:real f49@body:real f50@body:real f51@body:real f52@body:real f53@body:real f54@body:real f55@body:real f56@body:real f57@body:real f58@body:real f59@body:real f60@body:real f61@body:real f62@body:real f63@body:real f64@body:real f65@body:real f66@body:real f67@body:real f68@body:real f69@body:real f70@body:real f71@body:real f72@body:real f73@body:real f74@body:real f75@body:real f76@body:real f77@body:real f78@body:real f79@body:real f80@body:real f81@body:real f82@body:real f83@body:real f84@body:real f85@body:real f86@body:real f87@body:real f88@body:real f89@body:real f90@body:real f91@body:real f92@body:real f93@body:real f94@body:real f95@body:real f96@body:real f97@body:real f98@body:real f99@body:real + + rank_hists = np.loadtxt(rank_hists_file, skiprows=1) + + return rank_hists + +def read_time_series_data(sim_data_file): + ''' + Reads time series data + Input: text file + Output: structured numpy array: sid, lon, lat, year, day_of_year, hour, value + ''' + + # Header line contains + # station@hdr:integer longitude@hdr:real latitude@hdr:real year@hdr:integer day_of_year@hdr:integer hour@hdr:integer value@body:real + + time_series_data = np.loadtxt(sim_data_file, skiprows=1, dtype={'names': ('sid', 'lon', 'lat', 'year', 'day_of_year', 'hour', 'value'), 'formats': ('i4', 'f4', 'f4', 'i4', 'i4', 'i4', 'f4')}) + + return time_series_data + +def main(config_file): + ''' + Main: + reads config files + reads input data: time_series_data, quantiles_data, rank_hist_data + calls plotting function and saves figure with figure name given in config file + ''' + ## Read from config file ## + config.read(config_file) + + # data to be plotted + variable = config['data']['variable'] + station_id = config['data']['station_id'] + longitude = config['data']['longitude'] + latitude = config['data']['latitude'] + year_beg = int(config['data']['year_beg']) + year_end = int(config['data']['year_end']) + month_beg = config['data']['month_beg'] + month_end = config['data']['month_end'] + + # input files + sim_file = config['input']['sim_data'] + quantile_file = config['input']['quantile_sel'] + rank_hist_file = config['input']['rank_hists'] + + # output files + fig_name = config['output']['fig_name'] + + ## Read input data ## + time_series_data = read_time_series_data(sim_file) + quantiles_data = read_quantiles_data(quantile_file) + rank_hist_data = read_rank_data(rank_hist_file) + + ## Plot data ## + fig_title = 'Station: ' + station_id + ', ' + str(year_beg) + month_beg + '-' + str(year_end) + month_end + '\nLat=' + latitude + ' Lon=' + longitude + plot_data(quantiles_data, rank_hist_data, time_series_data, fig_title, fig_name, year_beg, year_end, month_beg, month_end, True) + + +if __name__=='__main__': + if (len(sys.argv) < 2): + sys.exit("Error: config file must be added as 2nd argument") + elif (len(sys.argv) > 2): + sys.exit("Error: only add config file as argument") + config_file = sys.argv[1] + + main(config_file) diff --git a/SYNOP/STATS/python/plot_rank_hist_sum_all_stations.py b/SYNOP/STATS/python/plot_rank_hist_sum_all_stations.py new file mode 100644 index 0000000000000000000000000000000000000000..602caac02c2942ceacec6554b58eac26fb107971 --- /dev/null +++ b/SYNOP/STATS/python/plot_rank_hist_sum_all_stations.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-09-12 + +Based on Jouni Räisänen's script for plotting rank histogram summart statistics + +Example: +$ python3 plot_rank_hist_sum_all_stations.py rh_summary_file nstat p01 p1 p5 max_freq_p max_freq_q + +''' + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) +import sys + +#def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, figname): +def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q): + ''' + Input: p_freq, q_freq, number of stations, p01, p1, p5, max freq p, max freq q, figname + Description: Plots p and q frequencies and saves the figure with figname + ''' + title='Rank Histogram Summary Statistics\n Number of stations: ' + str(number_of_stations) + '\nFrequency, p<0.001: ' + str(p01) + '\nFrequency, p<0.01:' + str(p1) + '\nFrequency, p<0.05:' + str(p5) + + # can also be input arguments if the number of bins are not constant + number_of_p_bins=20 + number_of_q_bins=100 + + fig = plt.figure(figsize=(8,12)) + gs = fig.add_gridspec(2, hspace=0.4) + ax1, ax2 = gs.subplots(sharex=False, sharey=False) + + # Plot p values + ax1.set_title('Normalized p-value frequencies') + ax1.set_xlabel('p-value') + ax1.set_ylim([0,max_freq_p]) + ax1.bar(np.arange(number_of_p_bins)+0.5, number_of_p_bins*p_freq) + ax1.xaxis.set_major_locator(MultipleLocator(2)) + ax1.xaxis.set_minor_locator(MultipleLocator(1)) + p_xlabels = ax1.get_xticks() + ax1.set_xticks(p_xlabels, p_xlabels/number_of_p_bins) + ax1.set_xlim([0,number_of_p_bins]) + + # Plot q values + ax2.set_title('Normalized quantile frequencies') + ax2.set_xlabel('Quantile (%)') + ax2.bar(np.arange(number_of_q_bins)+0.5, number_of_q_bins*q_freq) + ax2.xaxis.set_major_locator(MultipleLocator(10)) + ax2.xaxis.set_minor_locator(MultipleLocator(5)) + ax2.set_ylim([0,max_freq_q]) + ax2.set_xlim([0,number_of_q_bins]) + + fig.suptitle(title) + figname='rank_hist_sumstats.png' + plt.savefig(figname, dpi=300) + +def read_data(rh_summary_file): + ''' + Input: path/to/rh_summary_file + Output: p_freq, q_freq + Description: reads in p frequencies and q frequencies + ''' + + # first line contains a header and first column contains hour + p_freq_all = np.loadtxt(rh_summary_file + '_p-freq.txt', skiprows=1) + q_freq_all = np.loadtxt(rh_summary_file + '_q-freq.txt', skiprows=1) + + # p_freq and q_freq contain data 00, 06, 12, 18, 24 (the last is used in these plots) + p_freq = p_freq_all[-1,1:] + q_freq = q_freq_all[-1,1:] + + return p_freq, q_freq + +if __name__=='__main__': + rh_summary_file = sys.argv[1] + number_of_stations = sys.argv[2] + p01 = sys.argv[3] + p1 = sys.argv[4] + p5 = sys.argv[5] + max_freq_p = float(sys.argv[6]) + max_freq_q = float(sys.argv[7]) + #figname = sys.argv[8] + + p_freq, q_freq = read_data(rh_summary_file) + #plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, figname) + plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q) + diff --git a/SYNOP/STATS/python/plot_rank_hist_sum_all_stations_v2.py b/SYNOP/STATS/python/plot_rank_hist_sum_all_stations_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..e31630ae90ef0bc4fa5b4ea28a76d09f8bbca899 --- /dev/null +++ b/SYNOP/STATS/python/plot_rank_hist_sum_all_stations_v2.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +''' +Written by Madeleine Ekblom, 2023-09-12 + +Based on Jouni Räisänen's script for plotting rank histogram summart statistics + +Example: +$ python3 plot_rank_hist_sum_all_stations.py rh_summary_file nstat p01 p1 p5 max_freq_p max_freq_q + +''' + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) +import sys + +def plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, figname): + ''' + Input: p_freq, q_freq, number of stations, p01, p1, p5, max freq p, max freq q, figname + Description: Plots p and q frequencies and saves the figure with figname + ''' + title='Rank histogram summary statistics\n Number of stations: ' + str(number_of_stations) + '\nFrequency, p<0.001: ' + str(p01) + '\nFrequency, p<0.01: ' + str(p1) + '\nFrequency, p<0.05: ' + str(p5) + + # can also be input arguments if the number of bins are not constant + number_of_p_bins=20 + number_of_q_bins=100 + + fig = plt.figure(figsize=(8,12)) + gs = fig.add_gridspec(2, hspace=0.4) + ax1, ax2 = gs.subplots(sharex=False, sharey=False) + + # Plot p values + ax1.set_title('Normalized p-value frequencies') + ax1.set_xlabel('p-value') + ax1.set_ylim([0,max_freq_p]) + ax1.bar(np.arange(number_of_p_bins)+0.5, number_of_p_bins*p_freq) + ax1.xaxis.set_major_locator(MultipleLocator(2)) + ax1.xaxis.set_minor_locator(MultipleLocator(1)) + p_xlabels = ax1.get_xticks() + ax1.set_xticks(p_xlabels, p_xlabels/number_of_p_bins) + ax1.set_xlim([0,number_of_p_bins]) + + # Plot q values + ax2.set_title('Normalized quantile frequencies') + ax2.set_xlabel('Quantile (%)') + ax2.bar(np.arange(number_of_q_bins)+0.5, number_of_q_bins*q_freq) + ax2.xaxis.set_major_locator(MultipleLocator(10)) + ax2.xaxis.set_minor_locator(MultipleLocator(5)) + ax2.set_ylim([0,max_freq_q]) + ax2.set_xlim([0,number_of_q_bins]) + + fig.suptitle(title) + plt.savefig(figname) + +def read_data(rh_summary_file): + ''' + Input: path/to/rh_summary_file + Output: p_freq, q_freq + Description: reads in p frequencies and q frequencies + ''' + + # first line contains a header and first column contains hour + p_freq_all = np.loadtxt(rh_summary_file + '_p-freq.txt', skiprows=1) + q_freq_all = np.loadtxt(rh_summary_file + '_q-freq.txt', skiprows=1) + + # p_freq and q_freq contain data 00, 06, 12, 18, 24 (the last is used in these plots) + p_freq = p_freq_all[-1,1:] + q_freq = q_freq_all[-1,1:] + + return p_freq, q_freq + +if __name__=='__main__': + rh_summary_file = sys.argv[1] + number_of_stations = sys.argv[2] + p01 = sys.argv[3] + p1 = sys.argv[4] + p5 = sys.argv[5] + max_freq_p = float(sys.argv[6]) + max_freq_q = float(sys.argv[7]) + figname = sys.argv[8] + + p_freq, q_freq = read_data(rh_summary_file) + plot_rank_hist(p_freq, q_freq, number_of_stations, p01, p1, p5, max_freq_p, max_freq_q, figname) + diff --git a/SYNOP/STATS/summary_rank_histograms_all_stations.sh b/SYNOP/STATS/summary_rank_histograms_all_stations.sh new file mode 100755 index 0000000000000000000000000000000000000000..cdb22d2e6eca7abe8a07df2e4ef43cfd3c9b80a0 --- /dev/null +++ b/SYNOP/STATS/summary_rank_histograms_all_stations.sh @@ -0,0 +1,219 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +echo "Python version" +which python +################################################################## +# +# Calculation and plotting of summary statistics for quantile +# space rank histograms: +# +# - a map of p-values +# - bar plot of p-value distribution + rank histogram +# averaged over all stations (*** only implemented in GrADS this far ***) +# +# The following need to be given as arguments: +# +# - variable +# - first year of raw data +# - last year of raw data +# - first month of raw data (default 1) +# - last month of raw data (default 12) +# +# As the input for this script, only the following is needed: +# +# 1) Rank histogram statistics files for all stations in a single file +# (for the time range specified by the script arguments) +# +# Execution (e.g): ./summary_rank_histograms_all_stations 39 2020 2020 1 12 +# ORIGINAL: +# Jouni Räisänen, August 2023 +# MODIFIED: +# Alexander Mahura, Sep-Oct 2023 +# +################################################################## +# +# Arguments: +# +# 1. Meteorological variable code +# +variable=$1 +echo " Meteorological variable code = $1" +# +# The following variables are included: +# +# 91 'total amount of clouds' +# 108 'sea level pressure' +# 80 '1-hour precipitation' +# 58 'relative humidity' +# 999 '10-minute precipitation intensity' +# 71 'snow depth' +# 39 '2m temperature' +# 40 'dew point temperature' +# 62 'visibility' +# 111 'wind direction' +# 261 'maximum wind gust in last 10 minutes' +# 221 'surface wind speed' +# +# 2.-3: First and last year +# +year1=$2 +year2=$3 +let nyears=year2-year1+1 +# +# 4.-5: First and last month +# +month1="${4:-1}" +month2="${5:-12}" +# +# Add zero to ahead of $month1 and $month2 for naming of files if needed? +# +if [ $month1 -le 9 ] +then + month1='0'${month1} +fi +if [ $month2 -le 9 ] +then + month2='0'${month2} +fi +# +################################################################## +# +# Add odb_api to $PATH +# On Puhti +#PATH="${PATH}":/projappl/project_2001011/jouni/to_alexander_220823/odb_api/bin +# +# On Lumi +#export PATH="/projappl/project_465000454/ama/software/odb_api/bin:$PATH" +# +# On Lumi +#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C + +# Define the python environment +# On Puhti +#export PATH="/projappl/project_2001011/madeleine/python_envs/post-pp/bin:$PATH" +# On Lumi +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/ +# +################################################################## +# +echo " Compiling the Fortran program that produces the rank histogram summary statistics ..." +echo " fortran-programs/rank_histogram_summary_statistics.f95" + +gfortran fortran-programs/rank_histogram_summary_statistics.f95 -o rank_histogram_summary_statistics + +################################################################## +# +# ----- File names. Hard-coded, at least this far ----- +# +# 1) Rank histogram directory and input name file name without extension +echo " Directory for rank histogram ..." +rh_dir=rank_histograms/${variable}_${year1}${month1}-${year2}${month2} +echo " $rh_dir" +rh_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_all_stations +rh_summary_file=${rh_dir}/rank_histograms_${variable}_${year1}${month1}-${year2}${month2}_summary + +# 2) Name of output file(s) without extension +out_file=${rh_dir}/rh_summary_${variable}_${year1}${month1}-${year2}${month2} + +# 3) Directory for figures +echo " Directory for figures ..." +figure_dir=figures +echo " $figure_dir" + +################################################################## +# +# Convert the all-station ODB format rank histogram file to txt +# format for reading in Fortran +# +odb sql select \* -i ${rh_file}.odb -o ${rh_file}.txt +# +################################################################## +# +echo " Calculating the rank histogram summary statistics ..." +# +./rank_histogram_summary_statistics < line_1 +read v1 v2 v3 number_of_stations < line_1 +head -2 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 v2 v3 v4 number_of_p_bins < line_1 +head -3 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 max_freq_p < line_1 +head -4 ${rh_summary_file}_text_output | tail -1 > line_1 +read v1 max_freq_q < line_1 +tail -3 ${rh_summary_file}_text_output | head -1 > line_1 +read v1 v2 v3 v4 v5 v6 p01 < line_1 +tail -2 ${rh_summary_file}_text_output | head -1 > line_1 +read v1 v2 v3 v4 v5 v6 p1 < line_1 +tail -1 ${rh_summary_file}_text_output > line_1 +read v1 v2 v3 v4 v5 v6 p5 < line_1 +numbers_of_p_bins_up=$( echo "(${number_of_p_bins} + 0.5)" | bc ) +echo $( echo "(${number_of_p_bins} + 0.5)" | bc ) > line_1 +read number_of_p_bins_up < line_1 +rm line_1 +# +###################################################################### +# +# Plotting in Python: +# Madeleine converted Jouni's grads-scripts to python +# 1 - Madeleine Ekblom, 2023-04-06 +# python/plot_p_values_map.py +# 2 - Madeleine Ekblom, 2023-09-12 +# python/plot_rank_hist_sum_all_stations.py +# +###################################################################### +# +echo " (1) Plotting - P-Values Summary on 2D map ..." +# +###################################################################### + +python_script=python/plot_p_values_map.py + +python3 ${python_script} ${rh_summary_file}_p-values.txt ${year1}${month1}-${year2}${month2} + +mv p_values.png ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png +echo " Output file: ${figure_dir}/p-value_map_${variable}_${year1}${month1}-${year2}${month2}.png" + +##################################################################### +# +echo " (2) Plotting - Rank Histogram Summary Statistics ..." +# +##################################################################### + +python_script=python/plot_rank_hist_sum_all_stations.py + +python3 ${python_script} ${rh_summary_file} ${number_of_stations} ${p01} ${p1} ${p5} ${max_freq_p} ${max_freq_q} ${figname} + +mv rank_hist_sumstats.png ${figure_dir}/rank-hist-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png +echo " Output file: ${figure_dir}/rank-hist-sumstats_${variable}_${year1}${month1}-${year2}${month2}.png" + +###################################################################### +# +# Delete files that are not needed any more/ +# ! NOT NEEDED as converted to usage of python instead of grads +# +##################################################################### +#rm summary_plot_in_grads +#rm summary_statistics.ctl diff --git a/SYNOP/graph_mod_obs.py b/SYNOP/graph_mod_obs.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c48406c58598245d4dbf958f65f530f8f85866 --- /dev/null +++ b/SYNOP/graph_mod_obs.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python3 +# Author: Alexander Mahura : 2023-06 +# +############################################################################# +# LOADING LIBRARIES +############################################################################# +import sys +import numpy as np +import os +import copy +import time +from datetime import datetime as dt +import warnings +import matplotlib.pyplot as plt + +############################################################################# +# FUNCTIONS +############################################################################# + +#---------------------------------------------------------------------------- +# Function to read file with observations extracted from .odb-file +# note: (YYYY MM DD HH MM SS) +#---------------------------------------------------------------------------- + +def readobs(type_obs, path_tmp_obs, tmpfilename): + """ + Read extracted observations from .odb-file for ground-based (SYNOP), + radiosounding (TEMP), and satellite (AMSU-A) data + Input + ------- + type_obs: synop, radsound, satellite + Acronyms in names of files with temporarily extracted observations + Output + ------- + ds: dataset with extracted observations + """ + + if type_obs=='synop': + #filename = path_tmp_obs + "OBSSYNOP/" + tmpfilename # Puhti + filename = path_tmp_obs + tmpfilename # Lumi + print('obs synop tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-25 - no need to skip 1st row with header ! skiprows=1 + ds = np.genfromtxt(filename) + #, dtype={'names': ('year', 'month', 'day', + # 'hour','minute','second','longitude','latitude','variable','value','station'), + # 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'i4', 'f8', 'S6')}) + #print('OBS at Synop stations\n',ds) + else: + print('OBS: The specified synop file does not exist !!!') + elif type_obs=='radsound': + #filename = path_tmp_obs + "OBSRADSOUND/" + tmpfilename # Puhti + filename = path_tmp_obs + tmpfilename # Lumi + print('obs radsound tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-25 - no need to skip 1st row with header ! skiprows=1 + ds = np.genfromtxt(filename) + #ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + # 'hour','minute','second','longitude','latitude','variable','value','station'), + # 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'i4', 'f8', 'S6')}) + print('OBS from Radiosounding\n',ds) + else: + print('OBS: The specified radsound file does not exist !!!') + else: + #filename = path_tmp_obs + "OBSSATELLITE/" + tmpfilename # Puhti + filename = path_tmp_obs + tmpfilename # Lumi + print('obs satellite tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-25 - no need to skip 1st row with header ! skiprows=1 + ds = np.genfromtxt(filename) + #ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + # 'hour','minute','second','longitude','latitude','variable','value','station'), + # 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'i4', 'f8', 'S6')}) + print('OBS from Satellite\n',ds) + else: + print('OBS: The specified satellite file does not exist !!!') + return ds + +#---------------------------------------------------------------------------- +# Function to read file with modeled results extracted +# (& interpolated to points of observatio) from gsv-file +# (YYYY MM DD HH MM SS) +#---------------------------------------------------------------------------- + +def readmod(type_mod, path_tmp_mod, tmpfilename): + """ + Read extracted & interpolated to points of observatiobs the modelled results + from gsv-file for ground-based (SYNOP), radiosounding (TEMP), and + satellite (AMSU-A) data + Input + ------- + type_mod: synop, radsound, satellite + Acronyms in names of files with temporarily extracted modelled results + Output + ------- + ds: dataset with extracted & interpolated modelled results + """ + + if type_mod=='synop': + #filename = path_tmp_mod + "MODSYNOP/" + tmpfilename # Puhti + filename = path_tmp_mod + tmpfilename # Lumi + print('mod synop tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-30 - no need to skip 1st row with header ! skiprows=1 + # 2020-01-20 00:00:00 24.4916 60.999199 273.143829345703 101150 39 + # 2020 01 20 00 00 00 24.4916 60.999199 273.143829345703 39 101150 + ds = np.genfromtxt(filename) + #, dtype={'names': ('year-month-day', + # 'hour-minute-second','longitude','latitude','value','station','variable'), + # 'formats': ('S10', 'S8', 'f8', 'f8', 'f8', 'S6', 'i4')}) # v#1 + #, dtype={'names': ('year', 'month', 'day', 'hour', 'minute', 'second', + # 'longitude','latitude','value','variable','station'), + # 'formats': ('S10', 'S8', 'f8', 'f8', 'f8', 'S6', 'i4')}) # v#2 + #print('MOD for Synop stations\n',ds) + else: + print('MOD: The specified synop file does not exist !!!') + elif type_obs=='radsound': + #filename = path_tmp_mod + "MODRADSOUND/" + tmpfilename # Puhti + filename = path_tmp_mod + tmpfilename # Lumi + print('mod radsound tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-30 - no need to skip 1st row with header ! skiprows=1 + ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + 'hour','minute','second','longitude','latitude','value','variable','station'), + 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'f8', 'S6', 'i4')}) + print('MOD for Radiosounding\n',ds) + else: + print('MOD: The specified radsound file does not exist !!!') + else: + #filename = path_tmp_mod + "MODSATELLITE/" + tmpfilename # Puhti + filename = path_tmp_mod + tmpfilename # Lumi + print('mod satellite tmp-filename is:', filename) + if os.path.isfile(filename)==True: + # 2023-08-30 - no need to skip 1st row with header ! skiprows=1 + ds = np.loadtxt(filename, dtype={'names': ('year', 'month', 'day', + 'hour','minute','second','longitude','latitude','value','variable','station'), + 'formats': ('i4', 'i2', 'i2', 'i2', 'i2', 'i2', 'f8', 'f8', 'f8', 'S6', 'i4')}) + print('MOD for Satellite\n',ds) + else: + print('MOD: The specified satellite file does not exist !!!') + return ds + +############################################################################# +# MAIN PROGRAM +############################################################################# +# Passing parameters to program +try: + fnamedate = sys.argv[1]; met_param = sys.argv[2] + print (fnamedate) + print (met_param) + print ('Command line: py-script-name YYYYMMDDHH met_param-acronym') +except: + print ("Usage:",sys.argv[0], "fnamedate met_aparm"); sys.exit(1) + +##fnamedate = '2020012000' +# list to list meteorological variables (add here): +##met_param="2t" + +# Paths to temporarily storing of extracted OBS and MOD data ... +# ----- on Puhti +##path_tmp_obs='/scratch/project_2000640/wrk/ama/DEOBS/OBSDATA/' +##path_tmp_mod='/scratch/project_2000640/wrk/ama/DEOBS/MODDATA/' +# ----- on Lumi +#path_tmp_obs='/projappl/project_465000454/ama/DEOBS/SYNOP/DATAOBS/' +#path_tmp_mod='/projappl/project_465000454/ama/DEOBS/SYNOP/DATAMOD/' +path_tmp_obs='DATAOBS/' +path_tmp_mod='DATAMOD/' + +print ('Path to dir with extacted OBS data: ', path_tmp_obs) +print ('Path to dir with extacted MOD data: ', path_tmp_mod) + + +# Constructing exisitng names of files with OBS & MOD data +# for SYNOP (fx: synop_2020012000 and mod_synop_2020012000_2t.dat) +tmpfname_obs_synop = 'obs_synop_'+fnamedate+'.dat' +tmpfname_mod_synop = 'mod_synop_'+fnamedate+'_'+met_param+'.dat' +print ('Name of file with synop OBS data: ', tmpfname_obs_synop) +print ('Name of file with synop MOD data: ', tmpfname_mod_synop) +#tmpfname_obs_synop='synop_2020012000.dat' +#tmpfname_mod_synop='mod_synop_2020012000_2t.dat' +#print (tmpfname_obs_synop, tmpfname_mod_synop) + +# for RADSOUND +tmpfname_obs_radsound = 'obs_radsound_'+fnamedate+'.dat' +tmpfname_mod_radsound = 'mod_radsound_'+fnamedate+'_'+met_param+'.dat' +# for SATELLITE +tmpfname_obs_satellite = 'obs_satellite_'+fnamedate+'.dat' +tmpfname_mod_satellite = 'mod_satellite_'+fnamedate+'_'+met_param+'.dat' + +############################################################################## +# Observations ... +for ii in range(0,1): #(0,3) # testing on SYNOP obs data + if ii == 0: + type_obs='synop' + tmpfilename=tmpfname_obs_synop + ds_obs_synop=readobs(type_obs, path_tmp_obs, tmpfilename) + rec_ds_obs_synop=len(ds_obs_synop) + #t print('OBS from Synop stations: ',rec_ds_obs_synop,' records \n',ds_obs_synop) + elif ii == 1: + type_obs='radsound' + tmpfilename=tmpfname_obs_radsound + ds_obs_radsound=readobs(type_obs, path_tmp_obs, tmpfilename) + rec_ds_obs_radsound=len(ds_obs_radsound) + ##print('OBS from Radiosounding: ',rec_ds_obs_radsound,' records \n',ds_obs_radsound) + else: + type_obs='satellite' + tmpfilename=tmpfname_obs_satellite + ds_obs_satellite=readobs(type_obs, path_tmp_obs, tmpfilename) + rec_ds_obs_satellite=len(ds_obs_satellite) + ##print('OBS from Satellites: ',rec_ds_obs_satellite,' records \n',ds_obs_satellite) + +############################################################################## +# Modelled results ... +for jj in range(0,1): #(0,3) # testing on SYNOP mod data + if jj == 0: + type_mod='synop' + tmpfilename=tmpfname_mod_synop + ds_mod_synop=readmod(type_mod, path_tmp_mod, tmpfilename) + rec_ds_mod_synop=len(ds_mod_synop) + #t print('MOD for Synop stations: ',rec_ds_mod_synop,' records \n',ds_mod_synop) + elif jj == 1: + type_mod='radsound' + tmpfilename=tmpfname_mod_radsound + ds_mod_radsound=readobs(type_mod, path_tmp_mod, tmpfilename) + rec_ds_mod_radsound=len(ds_mod_radsound) + ##print('MOD for Radiosounding: ',rec_ds_mod_radsound,' records \n',ds_mod_radsound) + else: + type_mod='satellite' + tmpfilename=tmpfname_mod_satellite + ds_mod_satellite=readobs(type_mod, path_tmp_mod, tmpfilename) + rec_ds_mod_satellite=len(ds_mod_satellite) + ##print('MOD for Satellites: ',rec_ds_mod_satellite,' records \n',ds_mod_satellite) + +############################################################################## +# Calculate mismatch/difference/bias between synop OBS and MOD + +#OBS: ('year', 'month', 'day', 'hour','minute','second','longitude','latitude','value','variable','station') +#MOD: ('year-month-day', 'hour-minute-second','longitude','latitude','value','variable','station') + +# Checking number of records (obs & mod) => must be the same +nr_obs = ds_obs_synop.shape[0] +nr_mod = ds_mod_synop.shape[0] +#t print(nr_obs, nr_mod) + +val_obs_synop=np.zeros(nr_obs) +val_mod_synop=np.zeros(nr_mod) + +# Converting to modelled values to same scale as observed ... +val_obs_synop = np.array(ds_obs_synop [0:nr_obs, 9]) # synop obs (2t in deg C) +#t print(val_obs_synop) + +if met_param == '2t': + val_mod_synop = np.array(ds_mod_synop [0:nr_mod, 8] - 273.15) # synop mod (in deg K -> convert to T-273.15) +else: + val_mod_synop = np.array(ds_mod_synop [0:nr_mod, 8]) # no need to convert modelled data + +#t print(val_mod_synop) + +# Calculating a mismatch/difference/bias ... +dif_synop = val_obs_synop - val_mod_synop + +#t print(val_obs_synop, val_mod_synop, dif_synop) + +# Calculating statistcis on synop obs & mod & difference data (self-control & a possibility of flagging) +# Maximum +obs_max = max(val_obs_synop); mod_max = max(val_mod_synop); dif_synop_max = max(dif_synop) +# Minimum +obs_min = min(val_obs_synop); mod_min = min(val_mod_synop); dif_synop_min = min(dif_synop) +# Mean +obs_mean = np.mean(val_obs_synop); mod_mean = np.mean(val_mod_synop); dif_synop_mean = np.mean(dif_synop) +# Median +obs_med = np.median(val_obs_synop); mod_med = np.median(val_mod_synop); dif_synop_med = np.median(dif_synop) +# Variance +obs_var = np.var(val_obs_synop); mod_var = np.var(val_mod_synop); dif_synop_var = np.var(dif_synop) + +# Correl. coefficient - (!) not needed for climate simulations +#r_cor = np.corrcoef(val_obs_synop, val_mod_synop) +#cor_obs_mod = r_cor[1, 0] +#t print(cor_obs_mod) + +#t print (obs_max, mod_max, obs_min, mod_min, obs_mean, mod_mean, obs_med, mod_med, obs_var, mod_var) +#t print (dif_synop_max, dif_synop_min, dif_synop_mean, dif_synop_median, dif_synop_var) + +# Time slot for which stats is calculated (used in plotting fraph) +val_yy = str(int(np.array(ds_obs_synop [1,0]))) +val_mm = str(int(np.array(ds_obs_synop [1,1]))) +val_dd = str(int(np.array(ds_obs_synop [1,2]))) +val_hh = str(int(np.array(ds_obs_synop [1,3]))) +print ('Time slot:', val_yy, val_mm, val_dd, val_hh) + +print (' OBS MOD DIF') +print ('Max {:8.3f} {:8.3f} {:8.3f}'.format(obs_max, mod_max, dif_synop_max)) +print ('Min {:8.3f} {:8.3f} {:8.3f}'.format(obs_min, mod_min, dif_synop_min)) +print ('Mean {:8.3f} {:8.3f} {:8.3f}'.format(obs_mean, mod_mean, dif_synop_mean)) +print ('Med {:8.3f} {:8.3f} {:8.3f}'.format(obs_med, mod_med, dif_synop_med)) +print ('Var {:8.3f} {:8.3f} {:8.3f}'.format(obs_var, mod_var, dif_synop_var)) + +emsp=' ' # empty space for text arrangement on graphs +rec_max=emsp +str('{:8.3f}'.format(obs_max)) +str('{:8.3f}'.format(mod_max)) +str('{:8.3f}'.format(dif_synop_max)) +' Max' +rec_min=emsp +str('{:8.3f}'.format(obs_min)) +str('{:8.3f}'.format(mod_min)) +str('{:8.3f}'.format(dif_synop_min)) +' Min' +rec_mean=emsp+str('{:8.3f}'.format(obs_mean))+str('{:8.3f}'.format(mod_mean))+str('{:8.3f}'.format(dif_synop_mean))+' Mean' +rec_med=emsp +str('{:8.3f}'.format(obs_med)) +str('{:8.3f}'.format(mod_med)) +str('{:8.3f}'.format(dif_synop_med)) +' Median' +rec_var=emsp +str('{:8.3f}'.format(obs_var)) +str('{:8.3f}'.format(mod_var)) +str('{:8.3f}'.format(dif_synop_var)) +' Variance' +rec_nr_obs=emsp+emsp+emsp+str(int('{:5d}'.format(nr_obs)))+' - # of stations' + +#print ('{:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f} {:8.3f}'. +# format(obs_max, mod_max, obs_min, mod_min, obs_mean, mod_mean, obs_med, mod_med)) + +# Plotting : scatter plot for observed vs. modelled values +# Selecting type of plot +if type_obs == 'synop': + type_title = 'SYNOP' +elif type_obs == 'radsound': + type_title = 'RADIOSOUNDING' +else: + type_title = 'SATELLITE' + +txt_time_slot='Time slot (YYYY, MM, DD, HH): '+val_yy+'-'+val_mm+'-'+val_dd+'-'+val_hh +txt_title=type_title+': Meteorological Variable: '+ met_param + +fig=plt.figure() + +plt.scatter(val_mod_synop, val_obs_synop) +plt.xlabel('Modelled Value (MOD)') +plt.ylabel('Observed Value (OBS)') +plt.suptitle(txt_title, fontsize=16, weight='bold') +plt.title(txt_time_slot, fontsize=12) +plt.text(mod_max,obs_max,' OBS MOD DIF', weight='bold') +plt.text(mod_max,obs_max-0.5, rec_max) +plt.text(mod_max,obs_max-1.0, rec_min) +plt.text(mod_max,obs_max-1.5, rec_mean) +plt.text(mod_max,obs_max-2.0, rec_med) +plt.text(mod_max,obs_max-2.5, rec_var) +plt.text(mod_max,obs_max-3.0, rec_nr_obs) + +##plt.show() + +# Saving figure to png-format-file +fnamesave=type_obs+'_'+fnamedate+'_'+met_param+'.png' +plt.savefig(fnamesave, dpi = 300, bbox_inches="tight") + +print('Output graphical file is: ', fnamesave) + +#exit() diff --git a/SYNOP/gsv_mod_data.sh b/SYNOP/gsv_mod_data.sh new file mode 100755 index 0000000000000000000000000000000000000000..e711b932d646f3eb993818ad1c627e7d46ffba97 --- /dev/null +++ b/SYNOP/gsv_mod_data.sh @@ -0,0 +1,75 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# DestinE Climate DT Project +# Author: Alexander Mahura : 2023-09-21 +######################################################################## + +# STEP 1 - PRE-PROCESSING MOD DATA EXTRACTED WITH GSV_INTERFACE + +# 2020012008_2t_r360x180.nc +# Specify: +# (1) name of gsv extracted file +gsv_extr_filename="2t_r360x180.nc" + +# (2) path to dir where such file is placed +#path_gsv_mod_data="/projappl/project_465000454/ama/DEOBS/SYNOP/GSVMODDATA/" +path_gsv_mod_data="GSVMODDATA/" + +cd $path_gsv_mod_data; pwd + +# -------------------------------------------------------------------------------- +echo " Checking if file with gsv extracted mod data is available ..." + +until [ -f $gsv_extr_filename ] +do + echo " File is not available in dir: $path_gsv_mod_data" + sleep 10 +done +echo " File found: $gsv_extr_filename" + +if test -f "$gsv_extr_filename"; then + echo $gsv_extr_filename exists +else + echo $gsv_extr_filename does not exist +fi + +echo " Name of input file with extracted mod data using GSV : $gsv_extr_filename" + +# -------------------------------------------------------------------------------- +echo " CDO extacting timestamps from file : $gsv_extr_filename" + +# temporary file for saving time slices +tmp_file_list_timestamp="list_timestamp.txt" + +cdo showtimestamp $gsv_extr_filename > $tmp_file_list_timestamp +cdo -splitsel,1 $gsv_extr_filename split_ + +# -------------------------------------------------------------------------------- +echo " Creating names of output files using timestamps ..." +echo " Saving individual files by time-slices ..." +for ii in 1 2 3 4 5 6 7 8 +do + sed -i 's/-//' $tmp_file_list_timestamp +done +for jj in 1 2 3 4 +do + sed -i 's/:00:00//' $tmp_file_list_timestamp + sed -i 's/T//' $tmp_file_list_timestamp +done + +filenames=($(cat $tmp_file_list_timestamp)) + +x=0; +for f in $(ls split_*.nc); +#do mv $f ${filenames[$x]}_2t_r360x180.nc; +do mv $f ${filenames[$x]}_$gsv_extr_filename; +let x=$x+1; +done + +rm $tmp_file_list_timestamp +###############tmp test +### rm -rf 20200120* + +exit + diff --git a/SYNOP/main_synop.sh b/SYNOP/main_synop.sh new file mode 100755 index 0000000000000000000000000000000000000000..03005c1d4da1204e1364e21ea1b41b529ea1382c --- /dev/null +++ b/SYNOP/main_synop.sh @@ -0,0 +1,224 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# Author: Alexander Mahura - 2023-08-03 +# UPDATED: +# 2023-08; 2023-09; +######################################################################## +# +# NEEDED MODULES LOADED ... +# module load LUMI/23.03 +# module load partition/C +# module load PrgEnv-gnu +# module load odb_api/0.18.1-cpeCray-23.03.lua +# module load python-climatedt/3.11.3-cpeCray-23.03.lua +# NOTE: +# these are part of /appl/AS/AUTOSUBMIT_DATA/expid/proj/workflow/lib/LUMI/config.sh +# +######################################################################### +# PRE-SETUP (TESTING FOR LIMITED ODB DATASET & LIMITED GEOGRAPHICAL AREA) +# FOR AIR TEMPERATURE AT 2 METRE +# ANY OTHER METEOROLOGICAL PARAMETER (FROM SYNOP LISTED) CAN BE ADDED + +echo "====================================================================" +echo " DestinE Digital Twin --- OBSALL Apps for SYNOP OBS DATA:" +echo " TESTING/ RUNING FOR: " +echo " -- LIMITED SYNOP OBS DATA ODB DATASET" +echo " -- LIMITED GEOGRAPHICAL AREA" +echo " -- FOR AIR TEMPERATURE AT 2 METRE" +echo "====================================================================" +echo " " + +varMETnum="39" +varMETstr="2t" +lon_val_min="20" #20 +lon_val_max="25" #25 +lat_val_min="60" #60 +lat_val_max="65" #65 +echo "varMETnum varMETstr lon_val_min lon_val_max lat_val_min lat_val_max:" +echo "$varMETnum $varMETstr $lon_val_min $lon_val_max $lat_val_min $lat_val_max" + + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #0 : PRE-CHEKING EXISTANCE OF NECESSARY DIRECTORIES ..." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +pwd +echo " Directory for pre-processing mod data extracted with gsv ..." +inp_dir_gsvmoddata="GSVMODDATA" +echo "$inp_dir_gsvmoddata" +if test -d "$inp_dir_gsvmoddata"; then + echo $inp_dir_gsvmoddata exists +else + echo $inp_dir_gsvmoddata does not exist + echo " Creating directory: $inp_dir_gsvmoddata" + mkdir $inp_dir_gsvmoddata +fi + +echo " Directory for processing observation data ..." +inp_dir_dataobs="DATAOBS" +echo "$inp_dir_dataobs" +if test -d "$inp_dir_dataobs"; then + echo $inp_dir_dataobs exists +else + echo $inp_dir_dataobs does not exist + echo " Creating directory: $inp_dir_dataobs" + mkdir $inp_dir_dataobs +fi + +echo " Directory for processing modeled data ..." +inp_dir_datamod="DATAMOD" +echo "$inp_dir_datamod" +if test -d "$inp_dir_datamod"; then + echo $inp_dir_datamod exists +else + echo $inp_dir_datamod does not exist + echo " Creating directory: $inp_dir_datamod" + mkdir $inp_dir_datamod +fi + +echo " Directory for saving graphical output ..." +inp_dir_graphs="GRAPHS" +echo "$inp_dir_graphs" +if test -d "$inp_dir_graphs"; then + echo $inp_dir_graphs exists +else + echo $inp_dir_graphs does not exist + echo " Creating directory: $inp_dir_graphs" + mkdir $inp_dir_graphs +fi + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 0 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo " " +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #1 : PRE-PROCESSING MOD DATA EXTRACTED WITH GSV_INTERFACE ..." +echo " SCRIPT --- gsv_mod_data.sh" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +./gsv_mod_data.sh + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 1 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "====================================================================" +echo " SYNOP : START LOOP OVER ALL AVAILABLE TIME-SLICES " +echo "====================================================================" + +# List all available splitted files (by time-slices) in dir GSVMODDATA/ +# Define YYYY MM DD HH to start calculations for time-slices + +tmp_file_tstamps="file_with_tstamps.txt" +#ls GSVMODDATA/ > $tmp_file_tstamps +ls $inp_dir_gsvmoddata > $tmp_file_tstamps +tail -n 1 "$tmp_file_tstamps" | tee >(wc -c | xargs -I {} truncate "$tmp_file_tstamps" -s -{}) + +nrec_file=$( sed -n '$=' $tmp_file_tstamps) +echo "nrec_file: $nrec_file" +for (( nnrec=1; nnrec<=$nrec_file; nnrec++ )) +do + b_yy=$( head -n 1 $tmp_file_tstamps | cut -c 1-4 ) + b_mm=$( head -n 1 $tmp_file_tstamps | cut -c 5-6 ) + b_dd=$( head -n 1 $tmp_file_tstamps | cut -c 7-8 ) + b_hh=$( head -n 1 $tmp_file_tstamps | cut -c 9-10 ) + #echo "b_yy, b_mm, b_dd, b_hh : $b_yy $b_mm $b_dd $b_hh" + #tail -n +2 "$tmp_file_tstamps" > "$tmp_file_tstamps.tmp" && mv "$tmp_file_tstamps.tmp" "$tmp_file_tstamps" + # see line above (it is added at the bottom of the script) + echo "====================================================================" + echo " START CALCULATIONS FOR TIME-SLICE: $b_yy $b_mm $b_dd $b_hh " + echo "====================================================================" + +echo " " +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #2 : EXTRACTING SYNOP OBS DATA FROM ODB ..." +echo " SCRIPT --- synop_obs.sh" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +#./synop_obs.sh 2020 01 20 00 00 00 39 21 24 61 64 +#./synop_obs.sh 2020 01 20 15 00 00 $varMETnum $lon_val_min $lon_val_max $lat_val_min $lat_val_max +b_min="00" +b_sec="00" +./synop_obs.sh $b_yy $b_mm $b_dd $b_hh $b_min $b_sec $varMETnum $lon_val_min $lon_val_max $lat_val_min $lat_val_max + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 2 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #3 : EXTRACTING AND INTERPOLATING MOD DATA TO SYNOP STATIONS " +echo " GEOGRAPHICAL COORDINATES/POSITIONS" +echo " AND ADDING SUCH INTERPOLATED MOD DATA TO ODB ..." +echo " SCRIPT --- synop_mod.sh" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +#./synop_mod.sh 2020012000_2t_r360x180.nc 2020012000 2t +#./synop_mod.sh 2020012015_2t_r360x180.nc 2020012015 $varMETstr +b_2t="_2t_r360x180.nc" +b_yyyymmddhh=$b_yy$b_mm$b_dd$b_hh +echo "b_2t, b_yyyymmddhh = $b_2t $b_yyyymmddhh" +b_fname_yyyymmddhh_2t=$b_yyyymmddhh$b_2t +echo "b_fname_yyyymmddhh_2t = $b_fname_yyyymmddhh_2t" +./synop_mod.sh $b_fname_yyyymmddhh_2t $b_yyyymmddhh $varMETstr + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 3 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " STEP #4 : SELF-CONTROL BY PLOTTING DIFFEENCE OBS VS MOD ..." +echo " SCRIPT --- graph_mod_obs.py" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +#python graph_mod_obs.py 2020012000 2t +#python graph_mod_obs.py 2020012015 $varMETstr +python graph_mod_obs.py $b_yyyymmddhh $varMETstr + +mv *.png GRAPHS/ + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " ..... STEP 4 - COMPLETED ....." +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +echo "====================================================================" +echo " END CALCULATIONS FOR TIME-SLICE: $b_yy $b_mm $b_dd $b_hh " +echo "====================================================================" + +# Removing record with completed timestamp from file ... +# in order to start next time-slice calculations +tail -n +2 "$tmp_file_tstamps" > "$tmp_file_tstamps.tmp" && mv "$tmp_file_tstamps.tmp" "$tmp_file_tstamps" + +echo " " +echo "====================================================================" +echo " Checking if STATS should be run ..." +echo " STATS - PRODUCING QUANTILE RANK HISTOGRAM STATISTICS AND PLOTS" +echo " SCRIPT --- synop_stats.sh" +echo " STATS is run on a monthly basis - i.e., done on 2nd day of a month" +echo " ... BUT NOW : $b_yy $b_mm $b_dd $b_hh" + +##if [ $b_dd == "02" ] && [ $b_hh == "00" ]; then +if [ $b_dd == "21" ] && [ $b_hh == "01" ]; then + b_mm_start="1" + b_mm_end="12" + echo " ... CALCULATING NOW : $b_yy $b_mm $b_dd $b_hh" + echo "====================================================================" + echo " b_yy, b_mm, b_dd, b_hh, b_mm_start, b_mm_end : $b_yy $b_mm $b_dd $b_hh $b_mm_start $b_mm_end" + echo " varMETnum, b_yy, b_yy, b_mm_start, b_mm_end : $varMETnum, $b_yy, $b_yy, $b_mm_start, $b_mm_end" +./synop_stats.sh $varMETnum $b_yy $b_yy $b_mm_start $b_mm_end +fi + +echo " Checking size of file with time stamps: $tmp_file_tstamps ..." + +file_actualsize=$(wc -c <"$tmp_file_tstamps") +if [ $file_actualsize == "0" ]; then + echo size is equal to zero bytes + echo "====================================================================" + echo " SYNOP : END LOOP OVER ALL AVAILABLE TIME-SLICES " + echo "====================================================================" + rm $tmp_file_tstamps + exit 1 +else + echo size is over zero bytes +fi + +done + +exit 1 diff --git a/SYNOP/synop_mod.sh b/SYNOP/synop_mod.sh new file mode 100755 index 0000000000000000000000000000000000000000..f5a0cd903186bf02572959a2704a0ef55c40ee06 --- /dev/null +++ b/SYNOP/synop_mod.sh @@ -0,0 +1,217 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# DestinE Climate DT Project +# +# Author: +# ORIGINAL: +# Alexander Mahura: 2023-08, 2023-09 +# UPDATED/ADDED: +# 2023-08-14 +# Converting/adding extracted and interpolated synop mod data into odb +# 2023-09-19 +# Creating unstructured grid based on number of synop stations with obs +# Calculating weights for unstructured grid +######################################################################## +# +# STEP 3 - EXTRACTING AND INTERPOLATING MOD DATA TO SYNOP STATIONS +# GEOGRAPHICAL COORDINATES/POSITIONS +# AND ADDING SUCH INTERPOLATED MOD DATA TO ODB +# +# Reading extracted over global domain gsv modelled data +# for selected time-slot for selected variable & +# interpolating to geographical positions of synop stations +# where observations are available for selected time-slot +# --------------------------------------------------------------------- +# Execution (fx.): +# ./synop_mod_read.run 2020012000_2t_r360x180.nc 2020012000 2t +# ./synop_mod_read.run 2020012015_2t_r360x180.nc 2020012015 $varMETstr + +echo " Input path to location of files with extracted (using gsv_interface) modelled data:" +#inp_path_mod="/projappl/project_465000454/ama/DEOBS/SYNOP/GSVMODDATA/" +inp_path_mod="GSVMODDATA/" +echo " $inp_path_mod" + +echo " Input path to location of files with extracted (using sql) observed data:" +#inp_path_obs="/projappl/project_465000454/ama/DEOBS/SYNOP/DATAOBS/" +inp_path_obs="DATAOBS/" +echo " $inp_path_obs" + +echo " Output path to location of modelled data interpolated to coordinates of synop stations:" +#out_path_mod="/projappl/project_465000454/ama/DEOBS/SYNOP/DATAMOD/" +out_path_mod="DATAMOD/" +echo " $out_path_mod" + +# Input name of nc-file with modelled data extracted with gsv_interface +ncinfile=$1 +ncinfile=$inp_path_mod$ncinfile + +# Input time for making filename +timefilename=$2 +# Input name for meteo.parameter etracted with gsv +varMETstr=$3 + +#input synop file: fx. obs_synop_2020012102.dat +var1a="obs_synop_"; var1b=".dat" +infile=$var1a$timefilename$var1b +echo " Name of input file with synop obs data: $infile" + +infile=$inp_path_obs$infile + +# Name of output file with interpolated modelled data to positions of synop stations +vardef="_" +outfile="mod_synop$vardef$timefilename$vardef$varMETstr.dat" + +echo " Name of output file for interpolated mod data: $outfile" + +############################################################## +# Creating unstructured grid based on number of synop stations with observations + +echo " Path and Name of input file with synop obs data:" +echo " $infile" + +echo " Counting number of records in obs data file ..." +numrec=0 +while read +do + ((numrec=$numrec+1)) +done < $infile +echo " Number of records (numrec) in obs data file: $numrec" + +# example: obs_synop_2020012001.dat +# 2020 1 20 1 0 0 24.956800 60.326698 39 3.500000 100968 +#t while read ayear amonth aday ahour aminute asecond alongitude alatitude avariable avalue astation +#t do +#t echo "$ayear, $amonth, $aday, $ahour, $aminute, $asecond, $alongitude, $alatitude, $avariable, $avalue, $astation" +#t done < $inpfile + +echo " Creating unstructured grid for 2D geographical locations of synop stations ..." + +unstr_grid_file="grid_unstructured_synop.txt" +echo " Name of file with unstructured grid: $unstr_grid_file" + +echo "gridtype = unstructured" > $unstr_grid_file # adding grid type +echo "gridsize = $numrec" >> $unstr_grid_file # adding number of grids +echo "xvals =" >> $unstr_grid_file +while read ayear amonth aday ahour aminute asecond alongitude alatitude avariable avalue astation +do +echo "$alongitude" >> $unstr_grid_file # adding longitude values (along x) +done < $infile +echo "yvals =" >> $unstr_grid_file +while read ayear amonth aday ahour aminute asecond alongitude alatitude avariable avalue astation +do +echo "$alatitude" >> $unstr_grid_file # adding latitude values (along y) +done < $infile + +############################################################## +# Calculating weights for unstructured grid ... + +echo " CDO calculating weights for unstructured grid ... " +#cdo genbil,$unstr_grid_file -selname,2t $ncinfile weights_synop.nc +cdo genbil,$unstr_grid_file -selname,$varMETstr $ncinfile weights_synop.nc +cdo remap,$unstr_grid_file,weights_synop.nc $ncinfile tmpfile_synop_new.nc + +while IFS=' ' read -r f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 +do + year=$((f1)) # converting integer $((f#)) + month=$((f2)) + day=$((f3)) + hour=$((f4)) + minute=$((f5)) + second=$((f6)) + longitude=$f7 + latitude=$f8 + variable=$((f9)) + value=$f10 + station=$f11 + + longitude=`echo $longitude` # excluding possible empty spaces in names + latitude=`echo $latitude` + + namevar="lon="${longitude}_"lat="${latitude} + #t echo "namevar: $namevar" + echo " CDO interpolating synop mod data to geographical coordinates of observational points ..." + echo " $longitude $latitude $variable $value $station" + # without weights - original ... + #cdo -s -outputtab,date,time,lon,lat,value -remapnn,"lon=${longitude}_lat=${latitude}" -selvar,${varMETstr} $ncinfile > tmpout_${station}_${variable}.txt + # with weight - modified + cdo -s -outputtab,date,time,lon,lat,value -remapnn,"lon=${longitude}_lat=${latitude}" -selvar,${varMETstr} tmpfile_synop_new.nc > tmpout_${station}_${variable}.txt + cat tmpout_${station}_${variable}.txt | sed '1,1d' > tmpoutrev_${station}_${variable}.txt # deliting header line + sed -i s/$/" "${variable}/ tmpoutrev_${station}_${variable}.txt # adding variable ID + sed -i s/$/" "${station}/ tmpoutrev_${station}_${variable}.txt # adding station ID + cat tmpoutrev_${station}_${variable}.txt >> $outfile # adding record to summary file + rm -rf tmpout_*.txt tmpoutrev_*.txt # cleaning tmp-files + +done < $infile + +# Fixing time-slice structure from cdo processesed (YYYY-MM-DD HH-MM-SS) to sql-odb (YYYY MM DD HH MM SS) +sed -i 's/-/ /' $outfile +sed -i 's/-/ /' $outfile +sed -i 's/:/ /' $outfile +sed -i 's/:/ /' $outfile + +# Removing extra temporary files ... +rm -rf tmpfile_synop_new.nc weights_synop.nc +rm -rf grid_unstructured_synop.txt + + +############################################################################### +# Converting synop mod data file into odb (with import command) +# Opt1: saving/adding records to ODB (for mod data at synop stations locations) +# Opt2: saving adding records to ODB (for odb already having obs synop data) +# +# modify-update (& unify) according to different types of obs data: +# SYNOP (only at ground), +# RADSOUND (at 3D - lat/lon/alt) +# SATELLITE (at 3D - area within lat/lon/alt) + +echo " Converting/adding/saving records with synop mod data into ODB ..." +# .............................................................................. +# SYNOP data ................................................................... +# Defining header for adding mod synop data to odb file ... +header_mod_synop_data="year@descr:integer month@descr:integer day@descr:integer hour@descr:integer minute@descr:integer second@descr:integer longitude@descr:real latitude@descr:real value@descr:real variable@descr:integer station@descr:string" + +# Inserting header line as 1st line into mod_synop_YYYYMMDDHH.dat file ... +sed -i "1i ${header_mod_synop_data}" $outfile + +# Adding data to odb file ... +mod_synop_odb="/scratch/project_465000454/ama/open_data_ODB/synop_mod_data_2010-2021.odb" # Opt1 - new +##mod_synop_odb="/scratch/project_465000454/ama/open_data_ODB/synop_open_data_2010-2021.odb" # Opt2 - original +echo " Path and Name of odb-file with saved records:" +echo " $mod_synop_odb" + +odb import -d ' ' $outfile tmp_mod_synop_odb.odb +cat tmp_mod_synop_odb.odb >> $mod_synop_odb +rm -rf tmp_mod_synop_odb.odb + +# Deleting header line as 1st line in mod_synop_YYYYMMDDHH.dat file ... +sed -i -e "1d" $outfile + +# Moving file (with interpolated modelled data) to output dir +mv $outfile $out_path_mod + +# .............................................................................. +# RADSOUND data ................................................................ + +# Defining header for adding mod radsound data to odb file ... +# header_mod_radsound_data= + +# Inserting header line as 1st line into mod_radsound_YYYYMMDDHH.dat file ... + +# Adding data to odb file ... + +# Deleting header line as 1st line in mod_radsound_YYYYMMDDHH.dat file ... + +# .............................................................................. +# SATELLITE data ............................................................... + +# Defining header for adding mod satellite data to odb file ... +# header_mod_satellite_data= + +# Inserting header line as 1st line into mod_satellite_YYYYMMDDHH.dat file ... + +# Adding data to odb file ... + +# Deleting header line as 1st line in mod_satellite_YYYYMMDDHH.dat file ... + +exit diff --git a/SYNOP/synop_obs.sh b/SYNOP/synop_obs.sh new file mode 100755 index 0000000000000000000000000000000000000000..10d59e7a2a66cea3ba0ddd0a0435dde2ac9f48ab --- /dev/null +++ b/SYNOP/synop_obs.sh @@ -0,0 +1,180 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# DestinE Climate DT Project +# +# Author: +# ORIGINAL: +# Jouni Räisänen: 2022-11 +# UPDATED +# Alexander Mahura: 2023-04-20, 2023-05, 2023-08 +######################################################################## +# +# STEP 2 - EXTRACTING SYNOP OBS DATA FROM ODB +# +# Prototype for finding surface observations from openly available +# ground-based (SYNOP) data for a given hour +# using files converted to ODB format +# +# --------------------------------------------------------------------- +# old - Execution (e.g.): ./odb-filename 20190728 11 39 20 25 60 65 +# new - Execution (e.g.): ./odb-filename 2019 07 28 11 00 00 39 20 25 60 65 +# --------------------------------------------------------------------- +# Arguments: +# --------------------------------------------------------------------- +# 1. Time in format: year, month, day +year=$1 +month=$2 +day=$3 +#t echo " year, month, day = $year $month $day" +yyyymmdd=$1$2$3 + +# 2. Time in format: hour, minute, second +hour=$4 +minute=$5 +second=$6 +#t echo " hour, minute, second = $hour $minute $second" +hhmmss=$4$5$6 + +echo " year, month, day, hour, minute, second = $year $month $day $hour $minute $second" + +# 3. Meteorological variable code (see list below) +variable=$7 + +echo " Meteorological variable code = $variable" + +# List of included meteorological variables: +# 91 'total amount of clouds' +# 108 'sea level pressure' +# 80 '1-hour precipitation' +# 58 'relative humidity' +# 999 '10-minute precipitation intensity' +# 71 'snow depth' +# 39 '2m temperature' +# 40 'dew point temperature' +# 62 'visibility' +# 111 'wind direction' +# 261 'maximum wind gust in last 10 minutes' +# 221 'surface wind speed' + +# 4.-5. Minimum and maximum longitude +# If minimum > maximum, search for stations outside the interval maximum ... -minimum +lon_min="${8:--180}" +lon_max="${9:-180}" + +# 6.-7. Minimum and maximum latitude +lat_min="${10:--90}" +lat_max="${11:-90}" + +# Note: if you do not give values for the last three arguments, +# all stations will be included in the output, +# regardless of their coordinates. +# +################################################################ +# +# Increase lon_max by 360 degrees if it is smaller than lon_min +# +if [ "$(echo "$lon_max < $lon_min" | bc)" = 1 ]; then + lon_max=$((${lon_max}+360)) +fi + +# --- Year-month-day part of "date" in sql access / old +#t echo " year, monh, day (yyyymmdd=):" $yyyymmdd +# --- Hour-minute-second-part of "time" in sql access / old +#t echo " hour, minute, second (hhmmss=):" $hhmmss + +################################################################## +# +# ----- File names +# +################################################################## +# On Lumi (ODB as a single file) + +echo " Path to dir where ODB file is placed:" +in_dir=/scratch/project_465000454/ama/open_data_ODB +echo " $in_dir" + +echo " Name of ODB file containing all synop obs data:" +in_file=synop_open_data_2010-2021.odb +echo " $in_file" + +echo " Name of file for extracted synop obs data at geographical coordinates of stations:" +out_file=obs_synop_${yyyymmdd}${hour} +echo " $out_file" +rm -f ${out_file} + +################################################################## +# +# Add odb_api to $PATH +# On Puhti +# PATH="${PATH}":/projappl/project_2001011/odb_api/bin +# +# On Lumi +#PATH="${PATH}":/projappl/project_465000454/ama/software/odb_api/bin +# +# On Lumi (asked Devaraju Narayanappa, CSC) +# to make in /project/project_465000454/devaraju/modules/LUMI/23.03/C +# module use /project/project_465000454/devaraju/modules/LUMI/23.03/C +# +################################################################## +# +# Input file name(s): + +in_file=${in_dir}/${in_file} +# echo $in_file + if test -f "$in_file"; then + echo $in_file exists +# +# This would show the file headers +# +# odb header ${in_file} +# +# This would retrieve all the contents of the input file to the output file +# +# odb sql select \* -i ${in_file} -o ${out_file} +# +# This retrieves data from the desired date, time and variable, for all stations +# within a given area. The treatment of the longitude bounds is slightly +# complicated because there are two possibilities. +# +# The 'eval' structure is required because odb_api does not directly recognize the bash variables. + +echo " Extracting (with odb sql) synop obs data at geographical coordinates of stations ..." +# --------------------------------------- +# On Puhti +# --------------------------------------- +# echo " date, time: $date $time" +# +# odb_command="odb sql 'select date,time,longitude,latitude,variable,value where time=${hhmmss} and variable=${variable} and ((longitude>=${lon_min} and longitude<=${lon_max}) or (longitude+360>=${lon_min} and longitude+360<=${lon_max})) and latitude>=${lat_min} and latitude<=${lat_max}' -i ${in_file} -o ${out_file}" +# eval ${odb_command} +# + +# --------------------------------------- +# On Lumi +# --------------------------------------- +# Note, in overall combined .odb-file "date" became as year,month,day & "time" became as hour,minute,second +# +echo " year, month, day, hour, minute, second : $year $month $day $hour $minute $second" + + odb_command="odb sql 'select year,month,day,hour,minute,second,longitude,latitude,variable,value,station where year=${year} and month=${month} and day=${day} and hour=${hour} and minute=${minute} and second=${second} and variable=${variable} and ((longitude>=${lon_min} and longitude<=${lon_max}) or (longitude+360>=${lon_min} and longitude+360<=${lon_max})) and latitude>=${lat_min} and latitude<=${lat_max}' -i ${in_file} -o ${out_file}" + eval ${odb_command} + +# Deleting header line (example below) in a newly created output file ... +# year@hdr month@hdr day@hdr hour@hdr minute@hdr second@hdr longitude@hdr latitude@hdr variable@body value@body station@hdr +# 2020 1 20 0 0 0 24.956800 60.326698 39 3.500000 100968 + +echo " Cleaning and moving extracted from odb synop obs data into separate folder DATAOBS ..." +cat ${out_file} | sed '1,1d' > ${out_file}.dat +rm -rf ${out_file} +pwd +mv ${out_file}.dat DATAOBS/ + +# Some more 'odb sql' examples: +# +# https://confluence.ecmwf.int/display/ODBAPI/Examples +# + else + echo $in_file does not exist + fi + +exit diff --git a/SYNOP/synop_stats.sh b/SYNOP/synop_stats.sh new file mode 100755 index 0000000000000000000000000000000000000000..34ea1a55dffa27f0904f206168b9c7996037deb6 --- /dev/null +++ b/SYNOP/synop_stats.sh @@ -0,0 +1,58 @@ +#!/bin/bash +echo "Bash version ${BASH_VERSION}..." +######################################################################## +# Author: Alexander Mahura : 2023-08-22 +######################################################################## + +# PRE-SETUP (TESTING FOR LIMITED ODB DATASET & LIMITED GEOGRAPHICAL AREA) +# FOR AIR TEMPERATURE AT 2 METRE +# ANY OTHER METEOROLOGICAL PARAMETER (FROM SYNOP LISTED) CAN BE ADDED +# +# Producing quantile rank histogram statistics and plots +# on example of 2 m temperature (using limited open odb datasets) +# +# Met.variable, start & end year, start & end month +varMETnum=$1 +b_yy_start=$2 +b_yy_end=$3 +b_mm_start=$4 +b_mm_end=$5 + +echo " varMETnum, b_yy_start, b_yy_end, b_mm_start, b_mm_end : $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end" + +echo "*****************************************************************" +echo " STATS - PRODUCING QUANTILE RANK HISTOGRAM STATISTICS AND PLOTS" +echo " ON EXAMPLE OF METEOROLOGICAL VARIABLE: $varMETnum" +echo " AT SYNOP STATIONS" +echo "*****************************************************************" + +cd STATS +pwd +#t exit + +echo "*****************************************************************" +echo " STEP STATS-1 - PRODUCE RANK HISTOGRAMS FOR ALL SYNOP STATIONS" +echo "*****************************************************************" + +#./produce_rank_histograms_all_stations.sh 39 2020 2020 1 12 +./produce_rank_histograms_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end + +echo "*****************************************************************" +echo " STEP STATS-2 - PRODUCE STANDARD PLOTS FOR EACH SYNOP STATION" +echo "*****************************************************************" + +#./produce_standard_plots_all_stations.sh 39 2020 2020 1 12 +./produce_standard_plots_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end + +echo "*****************************************************************" +echo " STEP STATS-3 - PRODUCE SUMMARY RANK HISTOGRAMS FOR ALL STATIONS" +echo "*****************************************************************" + +#./summary_rank_histograms_all_stations.sh 39 2020 2020 1 12 +./summary_rank_histograms_all_stations.sh $varMETnum $b_yy_start $b_yy_end $b_mm_start $b_mm_end + +echo "*****************************************************************" +echo " ALL STATS-# STEP COMPLETED" +echo "*****************************************************************" + +exit diff --git a/request_synop.yml b/request_synop.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc27bb7dffe58f24636cfd1b39e2516a7ad88df5 --- /dev/null +++ b/request_synop.yml @@ -0,0 +1,39 @@ +EXPERIMENT: + DATELIST: 20200201 #Startdate + MEMBERS: fc0 + CHUNKSIZEUNIT: day #unit length of the chunk (hour, day, month, year) + CHUNKSIZE: 1 #number of unit lengths per chunk + NUMCHUNKS: 1 #chunkc to be run + CALENDAR: standard #including/ excluding leap years + +GSVREQUEST: #raw data + domain: g + class: rd + type: fc + expver: hz9n + stream: lwda + anoffset: 9 + levtype: "sfc" + date: "%CHUNK_START_DATE%/to/%CHUNK_SECOND_TO_LAST_DATE%" + time: 0000/to/2300/by/0100 # every hour + step: ["0"] # Irrelevant. step will be recalculated in workflow + param: #["2t"] + grid: 1/1 + method: nn + +APP: + other: "empty" + +OPAREQUEST: + NSTATS: "1" # number of variables to request + 1: + variable: "2t" + stat: "hourly" + stat_freq: "hourly" + output_freq: "daily" + time_step: 60 # in minutes, 60*timestep length in hours TODO: do that automatically + save: True + checkpoint: True + checkpoint_filepath: "/scratch/project_465000454/tmp/%DEFAULT.EXPID%/" + out_filepath: "/scratch/project_465000454/tmp/%DEFAULT.EXPID%/" + diff --git a/run_obsall.py b/run_obsall.py new file mode 100644 index 0000000000000000000000000000000000000000..a5b9ae02f463546c4615fa3147280560c89601a7 --- /dev/null +++ b/run_obsall.py @@ -0,0 +1,27 @@ +#!/scratch/project_465000454/devaraju/SW/LUMI-23.03/C/python-climatedt/bin/python +# OBSALL Apps (3 parts: SYNOP, TEMP, AMSU-A observations) + +# Import required libraries +import sys +import subprocess + +# IN IMPLEMENTATION +# --- Processing ground-based observations (SYNOP) +print('**********************************************************') +print('DestinE Climate Digital Twin - OBSALL Apps') +print('--- Processing ground-based observations (SYNOP)') +print('**********************************************************') +command_synop_run = "cd SYNOP; pwd; ./main_synop.sh; exit 0" +subprocess.run(command_synop_run, shell=True, check=True, executable="/bin/bash") + +# IN DEVELOPMENT +# --- Processing radiosounding observations (TEMP) +#command_radsound_run = "cd RADSOUND; pwd; ./main_radsound.sh; exit 0" +#subprocess.run(command_radsound_run, shell=True, executable="/bin/bash") + +# IN DEVELOPMENT +# Processing satellite observations (AMSU-A) +#command_satellite_run = "cd SATELLITE; pwd; ./main_satellite.sh; exit 0" +#subprocess.run(command_satellite_run, shell=True, executable="/bin/bash") + +sys.exit(0)