diff --git a/CHANGELOG b/CHANGELOG index f9ec87144f0c1867e289afff9c619ec0dc07b2fe..44862577f1883a32bdf965cb9fc8f62950fef4d1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,3 +9,7 @@ - Fixing bug in the in-situ cams formatter, error with the concatenating of dataframes - Also changed where the run_nord3.sh points pathtopy points for testing - Updating some of the prints + +## 0.0.3 - 2025/05/20 + +- Adding remove countries functionality that we can Remove the data of Countries we deem to be wrong diff --git a/setup.py b/setup.py index abb3a91c23e720285c0dba8e93027cbbc894c40e..156ee30e48d1dbab4df943b9f57849c2a7b17964 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ from setuptools import find_packages from setuptools import setup # Could update this using versioneer -version="0.0.2" +version="0.0.3" setup( name="monarch-DA-preprocessor", diff --git a/src/in-situ/camsFormatter.py b/src/in-situ/camsFormatter.py index ea67b0c6cb3fef7b62ffc98ea364fd70cae00e55..afd437bd2238c87e0d7849a16ef6871125ab5151 100755 --- a/src/in-situ/camsFormatter.py +++ b/src/in-situ/camsFormatter.py @@ -14,12 +14,14 @@ import json import numpy as np import pandas as pd +from ast import literal_eval from datetime import timedelta from numpy import f2py from os import listdir, makedirs, walk from os.path import isfile, join, exists, getsize from CAMS_standards import standard_parameters + #import module from fortran # path to the .so fortran path_to_so = '../libs/' @@ -115,7 +117,7 @@ def datemap(iDate, fDate, lafrec_slice, filespath, lafrec = 'D'): return allfiles_slice -def read_and_process_data(iDate, fDate, dataDir, elements, an_re, unit): +def read_and_process_data(iDate, fDate, dataDir, elements, an_re, unit, remove_countries): element_analysis = {'o3':'O3', 'no2':'NO2', 'so2':'SO2', 'co':'CO', 'pm10':'PM10', 'pm2p5':'PM25'} allfiles = datemap(iDate, fDate, 'D', dataDir) print("Files to be downloaded") @@ -139,6 +141,16 @@ def read_and_process_data(iDate, fDate, dataDir, elements, an_re, unit): # date e.g. 2021, 05, 07, 00 to 2021050700 df['date'] = df[['YEAR', 'MONTH', 'DAY', 'HOUR']].agg(''.join, axis=1) + # Create a countries column to cut later + df["COUNTRY"] = [station[:2] for station in df["STATION"]] + # Remove countries if they are in remove countries list + print("REMOVING COUNTRIES: {}".format(remove_countries)) + print(len(df)) + df = df[~df["COUNTRY"].isin(remove_countries)] + print(len(df)) + # Save the index of the data removed + index_removed = df[df["COUNTRY"].isin(remove_countries)].index + # Add all observational events for each specific date and element for e in elements: @@ -147,6 +159,7 @@ def read_and_process_data(iDate, fDate, dataDir, elements, an_re, unit): print(date) cut_df = df[(df.element_con == e) & (df.date == date)] + # Apply boundaries for the values cut_df = cut_df[cut_df['CONCENTRATION'].between( standard_parameters[e]['extreme_lower_limit_{}'.format(an_re)], @@ -214,6 +227,7 @@ def getOptions(args=sys.argv[1:]): parser.add_argument("-i", "--startdate", help="Initial date in YYYY-MM-DD format.") parser.add_argument("-t", "--enddate", help="End date in YYYY-MM-DD format.") parser.add_argument("-r", "--anre", help="If the obs are for analysis or reanalysis") + parser.add_argument("-rc", "--remove_countries", help="Remove stations in this list") options = parser.parse_args(args) return options @@ -237,6 +251,7 @@ if __name__ == "__main__": an_re = options.anre iDate = options.startdate fDate = options.enddate + remove_countries = literal_eval(options.remove_countries) # Convert from string to list # Creating output directories if they don't already exist if not exists(obs_no_rotated_save): @@ -282,7 +297,7 @@ if __name__ == "__main__": ### Read in dataand preprocess it adding to obs dictionary - read_and_process_data(iDate, fDate, dataDir, elements, an_re, unit) + read_and_process_data(iDate, fDate, dataDir, elements, an_re, unit, remove_countries) print("Final result from processing of data:") print(obsnorot) diff --git a/src/in-situ/elwriterdaf90.cpython-37m-x86_64-linux-gnu.so b/src/in-situ/elwriterdaf90.cpython-37m-x86_64-linux-gnu.so index 975f32768dc6c44331663aba4a1e8f61e663c254..d58c8adaf9aa60ff00300f296091e0b36db306d8 100755 Binary files a/src/in-situ/elwriterdaf90.cpython-37m-x86_64-linux-gnu.so and b/src/in-situ/elwriterdaf90.cpython-37m-x86_64-linux-gnu.so differ diff --git a/src/in-situ/run_nord3.sh b/src/in-situ/run_nord3.sh index 193000879e1fa23fc04658750b3c3b095e1cd64e..c6f6e48efb88293e3863fb8366f26220a11b92cb 100755 --- a/src/in-situ/run_nord3.sh +++ b/src/in-situ/run_nord3.sh @@ -9,7 +9,8 @@ #SBATCH -J monarch-DA-cams #SBATCH -e /esarchive/scratch/cmeikle/Projects/monarch-DA/out-logs/format-cams-monarch-DA-data-%j.err #SBATCH -o /esarchive/scratch/cmeikle/Projects/monarch-DA/out-logs/format-cams-monarch-DA-data-%j.out -#BSUB -x +#SBATCH --qos=bsc_es +#SBATCH --account=bsc32 module purge @@ -56,6 +57,8 @@ southbound='-27.0' uncb='{"O3":0.01,"CO":0.01,"NO2":0.005,"SO2":0.005,"PM10":0.0001,"PM25":0.0005}' uncc='{"O3":0.001,"CO":0.001,"NO2":0.005,"SO2":0.0005,"PM10":0.00001,"PM25":0.00005}' +remove_countries='["RO"]' + #start date iDate='202503310000' @@ -75,7 +78,7 @@ echo ${pathtopy} f2py3 -m elwriterdaf90 -c ../libs/elwriterdaf90.f90 -time python camsFormatter.py -d ${dataDir} -u ${unitChangeFile_anre} -e ${element} -x ${fortranexec} -o ${obsnorotatedsave} -f ${obsfinalsave} -a ${save_all_obs_in_one} -m ${cenlon} -l ${cenlat} -w ${westbound} -q ${southbound} -b ${uncb} -c ${uncc} -i ${iDate} -t ${fDate} -r ${anre} +time python camsFormatter.py -d ${dataDir} -u ${unitChangeFile_anre} -e ${element} -x ${fortranexec} -o ${obsnorotatedsave} -f ${obsfinalsave} -a ${save_all_obs_in_one} -m ${cenlon} -l ${cenlat} -w ${westbound} -q ${southbound} -b ${uncb} -c ${uncc} -i ${iDate} -t ${fDate} -r ${anre} -rc ${remove_countries} chmod -R 775 ${obsfinalsave} exit