RAPHAEL GRODOFZIG · RAPHAEL GRODOFZIG · daff17ba · daff17ba · daff17ba · 9cfc1a3a
--- a/download_scripts/CAPMoN_download.py
+++ b/download_scripts/CAPMoN_download.py
@@ -171,20 +171,21 @@ def download_metadata(n_max_tries, max_time_per_dl):
        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
    time.sleep(1)

-    """# create json from original metadata file
+    
+    # create json from original metadata file
    json_metadata = {}
    with open('/esarchive/obs/ghost/CAPMoN/metadata/network_provided/CAPMoN_META.csv', 'r', encoding='ISO-8859-1') as file:
        csv_filedata = csv.DictReader(file)

        for row in csv_filedata:  
-            key = row['SiteName_NomDuSite']
+            key = row['ID']+'_'+row['Measurements_Mesures'].replace('"', '')[:3]
            update_date = today.strftime('%Y-%m-%d')
            for parameter in row:
                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
            json_metadata[key] = row

    with open('/esarchive/obs/ghost/CAPMoN/metadata/processed/CAPMoN_META.json', 'w', encoding='utf-8') as f:
-        f.write(json.dumps(json_metadata, indent=4))"""
+        f.write(json.dumps(json_metadata, indent=4))

    
    # create json in desired shape from current metadata file
@@ -193,7 +194,7 @@ def download_metadata(n_max_tries, max_time_per_dl):
        csv_filedata = csv.DictReader(file)

        for row in csv_filedata:  
-            key = row['SiteName_NomDuSite']
+            key = row['ID']+'_'+row['Measurements_Mesures'].replace('"', '')[:3]
            update_date = today.strftime('%Y-%m-%d')
            for parameter in row:
                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
@@ -218,11 +219,18 @@ def download_metadata(n_max_tries, max_time_per_dl):
                print('Station {} was abolished'.format(station))

        for station in json_metadata_now: # loop through all the new stations
-            if station in json_metadata.keys(): # if station is in old meta data
-                pass # comparison was done before
-            else: # new station appeared!
-                print('New station {}'.format(station))
-                json_metadata.update({station: json_metadata_now[station]})
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})


    # safe

--- a/download_scripts/CHILE_SINCA_download.py
+++ b/download_scripts/CHILE_SINCA_download.py
@@ -5,10 +5,11 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
+from selenium.common.exceptions import WebDriverException
 from bs4 import BeautifulSoup


-
 from bs4 import BeautifulSoup
 import requests
 import time
@@ -23,14 +24,16 @@ import zipfile
 import shutil
 import os
 import re
+import csv
+import json


-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    # paths and variables
-    variables_website = ['MP 10','MP 2,5', 'MP 10discreto', 'SO2', 'NO2', 'NOX', 'NO', 'CO', 'O3', 'Pb', 'As', 'Cu'] # complete list later
+    variables_website = ['MP 10','MP 2,5', 'MP 10discreto', 'SO2', 'NO2', 'NOX', 'NO', 'CO', 'O3', 'Pb', 'As', 'Cu'] 
    variables_text = ["Material particulado MP 10", "Material particulado MP 2,5", "Material particulado 10 micrometros discreto", 'Dióxido de azufre', 'Dióxido de nitrógeno', 'Óxidos de nitrógeno', 'Monóxido de nitrógeno', 'Monóxido de carbono', 'Ozono', 'Plomo', 'Arsénico', 'Cobre']
-    time_resolutions_website = ["registro diario", "registro horario"] # complete later
+    time_resolutions_website = ["registro diario", "registro horario"] 

    variables_ghost = ['PM10', 'PM2.5', 'PM10disc', 'SO2', 'NO2', 'NOX', 'NO','CO', 'O3','Pb', 'As', 'Cu'] # needs to be same order as variables_website!
    time_resolution_ghost = ['daily', 'hourly']
@@ -39,210 +42,345 @@ def scraper(mode, version):

    baseurl = 'https://sinca.mma.gob.cl/index.php/'

-    # only for nrt
-    bdate = "240101"
-    edate = date.today().strftime('%Y%m%d')[2:]
-    print(edate)
-
-    # create download directory
-    os.makedirs('/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/temp/'.format(version), exist_ok=True)
-    download_location = '/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/temp/'.format(version)
-
-    # set up driver
-    options = Options()
-    prefs = {'download.default_directory' : download_location}
-    options.add_experimental_option('prefs', prefs)
-    options.add_argument("--no-sandbox")
-    options.add_argument("disable-infobars")
-    options.add_argument("--disable-extensions")
-    options.add_argument("--disable-gpu")
-    options.add_argument("--disable-dev-shm-usage")
-    svc = webdriver.ChromeService(executable_path=binary_path)
-    driver = webdriver.Chrome(service=svc, options=options)
-
-    # open url
-    driver.get(baseurl)
-    WebDriverWait(driver, 60).until(EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "/index.php/region/index/id/")]'))) # wait till loaded
-
-    # navigate to regions
-    html = driver.page_source
-    soup = BeautifulSoup(html, features="html.parser")
-    regions = soup.find_all("a", href=re.compile(r"^/index.php/region/index/id/"))
-    regions = soup.find_all("a", {"href": "/index.php/region/index/id/II"}) # Antofagasta
-    
-
-    for region in regions:
-
-        print("Region is "+region.getText())
-
-        driver.get("https://sinca.mma.gob.cl/"+region.get("href"))
-        time.sleep(3)
-
-        # navigate to station and component
-        html_region = driver.page_source
-        soup_region = BeautifulSoup(html_region, features="html.parser")
-        a_titles = [a.get("title") for a in soup_region.find_all("a", title=True)] # all links
-        # from all the links, choose only the components
-        stations_components = []
-        for a_title in a_titles:
-            for variable_text in variables_text:
-                if variable_text in a_title:
-                    stations_components.append(soup_region.find("a", {"title": a_title}))
-
-        # loop through all stations and components of the region
-        for station_component in stations_components:
-
-            print(station_component.get("title"))
-            station = station_component.get("title").split("| ", 1)[1]
-            component = [x for x in variables_text if x in station_component.get("title")][0] # get component name on website
-            component_choose_time_res = variables_website[variables_text.index(component)] # get component name for choosing time resolution
-            component_ghost = variables_ghost[variables_text.index(component)] # get component name accordingly in ghost
-
-            # create storage directory
-            try:
-                station_id = metadata15["station_reference"][metadata15["station_name"] == station].iloc[0]
-                os.makedirs('/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/{}/'.format(version, station_id), exist_ok=True)
-                storage_location = '/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/{}/'.format(version, station_id)
-            except:
-                print("{} added since 1.5, no station_id found in metadata_1.5, did not download data from new station".format(station))
-                continue
-
-            # go to data on website
-            driver.get('https:'+station_component.get("href"))
-            time.sleep(5)
+    if mode == 'all':
+        # create download directory
+        os.makedirs('/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/temp/'.format(version), exist_ok=True)
+        download_location = '/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/temp/'.format(version)
+
+    elif mode == 'nrt':
+        bdate = date(date.today().year, 1, 1).strftime('%Y%m%d')[2:] #"240101"
+        edate = date.today().strftime('%Y%m%d')[2:]
+        print(edate)
+        # create download directory
+        version = mode
+        os.makedirs('/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/temp/'.format(version), exist_ok=True)
+        download_location = '/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/temp/'.format(version)
+
+    else:
+        print('time mode inapplicable')
+
+    n_tries = 0
+    errcode = 999
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        try:
+            # set up driver
+            options = Options()
+            prefs = {'download.default_directory' : download_location}
+            options.add_experimental_option('prefs', prefs)
+            options.add_argument("--no-sandbox")
+            options.add_argument("disable-infobars")
+            options.add_argument("--disable-extensions")
+            options.add_argument("--disable-gpu")
+            options.add_argument("--disable-dev-shm-usage")
+            if n_tries > 0:
+                options.add_argument("--headless")
+
+            svc = webdriver.ChromeService(executable_path=binary_path)
+            driver = webdriver.Chrome(service=svc, options=options)
+
+            driver.get(baseurl)
+            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "/index.php/region/index/id/")]'))) # wait till loaded
+
+            # navigate to regions
+            html = driver.page_source
+            soup = BeautifulSoup(html, features="html.parser")
+            regions = soup.find_all("a", href=re.compile(r"^/index.php/region/index/id/"))
+            regions = soup.find_all("a", {"href": "/index.php/region/index/id/II"}) # Antofagasta
            
-            driver.switch_to.frame("left")

-            # select time resolution
-            dropdown_element = driver.find_element(By.ID, 'ic')
-            select = Select(dropdown_element)
-            options = [opt.get_attribute("text") for opt in select.options]
-            
-            i=0
-            for time_resolution in time_resolutions_website:
-                
-                #select time resolution if existent!
-                if (component_choose_time_res+' - '+time_resolution) in options:
-                
-                    select.select_by_visible_text(component_choose_time_res+' - '+time_resolution)
-                    
-                    #print("Time resolution is: {}".format(time_resolution_ghost[i]))
+            for region in regions:
+
+                print("Region is "+region.getText())
+
+                driver.get("https://sinca.mma.gob.cl/"+region.get("href"))
+                time.sleep(3)
+
+                # navigate to station and component
+                html_region = driver.page_source
+                soup_region = BeautifulSoup(html_region, features="html.parser")
+                a_titles = [a.get("title") for a in soup_region.find_all("a", title=True)] # all links
+                # from all the links, choose only the components
+                stations_components = []
+                for a_title in a_titles:
+                    for variable_text in variables_text:
+                        if variable_text in a_title:
+                            stations_components.append(soup_region.find("a", {"title": a_title}))
+
+                # loop through all stations and components of the region
+                for station_component in stations_components:
+
+                    print(station_component.get("title"))
+                    station = station_component.get("title").split("| ", 1)[1]
+                    component = [x for x in variables_text if x in station_component.get("title")][0] # get component name on website
+                    component_choose_time_res = variables_website[variables_text.index(component)] # get component name for choosing time resolution
+                    component_ghost = variables_ghost[variables_text.index(component)] # get component name accordingly in ghost
+
+                    # create storage directory
+                    try:
+                        station_id = metadata15["station_reference"][metadata15["station_name"] == station].iloc[0]
+                        os.makedirs('/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/{}/'.format(version, station_id), exist_ok=True)
+                        storage_location = '/esarchive/obs/ghost/CHILE_SINCA/original_files/{}/{}/'.format(version, station_id)
+                    except:
+                        print("{} added since 1.5, no station_id found in metadata_1.5, did not download data from new station".format(station))
+                        continue
+
+                    # go to data on website
+                    driver.get('https:'+station_component.get("href"))
                    time.sleep(5)
+                    
+                    driver.switch_to.frame("left")

-                    if mode == "all":
-                        start_date = driver.find_element(By.ID, "from").get_attribute("value")
-                        end_date = driver.find_element(By.ID, "to").get_attribute("value")
-                    if mode == "nrt": # updating dates difficult
-                        start_date = driver.find_element(By.ID, "from")
-                        driver.execute_script("arguments[0].value = {};".format(bdate), start_date)
-                
-                        end_date = driver.find_element(By.ID, "to")
-                        driver.execute_script("arguments[0].value = {};".format(edate), end_date) 
-
-                        time.sleep(10)
+                    # select time resolution
+                    dropdown_element = driver.find_element(By.ID, 'ic')
+                    select = Select(dropdown_element)
+                    options = [opt.get_attribute("text") for opt in select.options]
+                    
+                    i=0
+                    for time_resolution in time_resolutions_website:
+                        
+                        #select time resolution if existent!
+                        if (component_choose_time_res+' - '+time_resolution) in options:
+                        
+                            select.select_by_visible_text(component_choose_time_res+' - '+time_resolution)
+                            
+                            #print("Time resolution is: {}".format(time_resolution_ghost[i]))
+                            time.sleep(5)

-                    driver.switch_to.default_content()
-                    driver.switch_to.frame("right")
+                            if mode == "all":
+                                start_date = driver.find_element(By.ID, "from").get_attribute("value")
+                                end_date = driver.find_element(By.ID, "to").get_attribute("value")
+                            if mode == "nrt": # updating dates difficult
+                                start_date = driver.find_element(By.ID, "from")
+                                driver.execute_script("arguments[0].value = {};".format(bdate), start_date)
+                        
+                                end_date = driver.find_element(By.ID, "to")
+                                driver.execute_script("arguments[0].value = {};".format(edate), end_date) 

-                    time.sleep(10)
-                    WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.LINK_TEXT, "Excel CSV"))) # wait till loaded
+                                time.sleep(10)

-                    driver.find_element(By.LINK_TEXT, "Excel CSV").click()
+                            driver.switch_to.default_content()
+                            driver.switch_to.frame("right")

-                    # wait until download finished
-                    while not os.path.exists("{}/datos_{}_{}.csv".format(download_location, start_date, end_date)):
-                        time.sleep(1)
+                            time.sleep(10)
+                            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.LINK_TEXT, "Excel CSV"))) # wait till loaded

-                        if os.path.isfile("{}/datos_{}_{}.csv".format(download_location, start_date, end_date)):
-                            print('{} {} download successful'.format(station_component.get("title"), time_resolution_ghost[i]))
-                            shutil.copyfile("{}/datos_{}_{}.csv".format(download_location, start_date, end_date), storage_location+component_ghost+'_'+time_resolution_ghost[i]+'.csv')
+                            driver.find_element(By.LINK_TEXT, "Excel CSV").click()

-                    os.remove("{}/datos_{}_{}.csv".format(download_location, start_date, end_date))    
+                            # wait until download finished
+                            while not os.path.exists("{}/datos_{}_{}.csv".format(download_location, start_date, end_date)):
+                                time.sleep(1)

-                    driver.switch_to.default_content()
-                    driver.switch_to.frame("left")
-                    i=i+1
+                                if os.path.isfile("{}/datos_{}_{}.csv".format(download_location, start_date, end_date)):
+                                    print('{} {} download successful'.format(station_component.get("title"), time_resolution_ghost[i]))
+                                    shutil.copyfile("{}/datos_{}_{}.csv".format(download_location, start_date, end_date), storage_location+component_ghost+'_'+time_resolution_ghost[i]+'.csv')

+                            os.remove("{}/datos_{}_{}.csv".format(download_location, start_date, end_date))    

-    driver.close()
+                            driver.switch_to.default_content()
+                            driver.switch_to.frame("left")
+                            i=i+1
+            driver.close()
+            errcode = 200

+        except WebDriverException as e:
+            print(e) 
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue

-def scraper_metadata(mode, version):
+    if n_tries == n_max_tries:
+        print('Failed downloading CHILE_SINCA data {} times in {} seconds'.format(n_tries, max_time_per_dl))

-    baseurl = 'https://sinca.mma.gob.cl/index.php/'
-
-    metadata15 = pd.read_csv("/esarchive/obs/ghost/CHILE_SINCA/metadata/network_provided/temp/CHILE_SINCA_META_temp.txt")
-    print(metadata15)
-
-    # set up driver
-    options = Options()
-    #prefs = {'download.default_directory' : download_location}
-    #options.add_experimental_option('prefs', prefs)
-    options.add_argument("--no-sandbox")
-    svc = webdriver.ChromeService(executable_path=binary_path)
-    driver = webdriver.Chrome(service=svc, options=options)
-
-    # open url
-    driver.get(baseurl)
-    WebDriverWait(driver, 60).until(EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "/index.php/region/index/id/")]'))) # wait till loaded
-
-    # navigate to regions
-    html = driver.page_source
-    soup = BeautifulSoup(html, features="html.parser")
-    regions = soup.find_all("a", href=re.compile(r"^/index.php/region/index/id/"))
-    regions = soup.find_all("a", {"href": "/index.php/region/index/id/II"}) # Antofagasta
-    

-    for region in regions:

-        print("Region is "+region.getText())
+def download_metadata(n_max_tries, max_time_per_dl):

-        driver.get("https://sinca.mma.gob.cl/"+region.get("href"))
-        time.sleep(3)
+    # paths and variables
+    variables_website = ['MP 10','MP 2,5', 'MP 10discreto', 'SO2', 'NO2', 'NOX', 'NO', 'CO', 'O3', 'Pb', 'As', 'Cu'] 
+    variables_text = ["Material particulado MP 10", "Material particulado MP 2,5", "Material particulado 10 micrometros discreto", 'Dióxido de azufre', 'Dióxido de nitrógeno', 'Óxidos de nitrógeno', 'Monóxido de nitrógeno', 'Monóxido de carbono', 'Ozono', 'Plomo', 'Arsénico', 'Cobre']
+    time_resolutions_website = ["registro diario", "registro horario"] 

-        html = driver.page_source
-        soup = BeautifulSoup(html, features="html.parser")
-        stations = soup.find_all("a", href=re.compile(r"^/index.php/estacion/index/id/"))
-        
+    variables_ghost = ['PM10', 'PM2.5', 'PM10disc', 'SO2', 'NO2', 'NOX', 'NO','CO', 'O3','Pb', 'As', 'Cu'] # needs to be same order as variables_website!
+    time_resolution_ghost = ['daily', 'hourly']

-        for station in stations:
+    baseurl = 'https://sinca.mma.gob.cl/index.php/'
+    today = date.today()

-            station_name = station.getText()
-            print(station_name)

-            driver.get("https://sinca.mma.gob.cl/"+station.get("href"))
-            time.sleep(3)
-            
-            # get meta info
+    metadata15 = pd.read_csv("/esarchive/obs/ghost/CHILE_SINCA/metadata/network_provided/temp/CHILE_SINCA_META_temp.txt")
+    #print(metadata15)
+    instruments_old = ['O3_instrument','NO_instrument','NO2_instrument','CO_instrument','CH4_instrument','SO2_instrument','NMHC_instrument','HC_instrument','PM10_instrument','PM2.5_instrument','As_instrument','Cu_instrument','Pb_instrument']
+    """
+    with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/processed/CHILE_SINCA_META.json', 'r', encoding='utf-8') as f:
+        metadata_old = json.loads(f.read())
+
+    n_tries = 0
+    errcode = 999
+    metadata_new = {}
+    while (n_tries < n_max_tries) and (errcode != 200):
+        try:
+            # set up driver
+            options = Options()
+            #prefs = {'download.default_directory' : download_location}
+            #options.add_experimental_option('prefs', prefs)
+            options.add_argument("--no-sandbox")
+            options.add_argument("--headless")
+            svc = webdriver.ChromeService(executable_path=binary_path)
+            driver = webdriver.Chrome(service=svc, options=options)
+
+            # open url
+            driver.get(baseurl)
+            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "/index.php/region/index/id/")]'))) # wait till loaded
+
+            # navigate to regions
            html = driver.page_source
            soup = BeautifulSoup(html, features="html.parser")
-            
-            region = soup.find("th", text="Región").find_next_sibling().getText()
-            province = soup.find("th", text="Provincia").find_next_sibling().getText()
-            commune = soup.find("th", text="Comuna").find_next_sibling().getText()
-            UTM_coordinates = soup.find("th", text="Coordenadas UTM").find_next_sibling().getText()
-            timezone = soup.find("th", text="Huso horario").find_next_sibling().getText()
+            regions = soup.find_all("a", href=re.compile(r"^/index.php/region/index/id/"))
+            #regions = soup.find_all("a", {"href": "/index.php/region/index/id/II"}) # Antofagasta

-            scraped_metadata = [station_reference, station_name, region, province, commune, UTM_coordinates, timezone]
+            for region in regions:

-            metadata15_per_station = metadata15.loc[metadata15["station_name"] == station_name]
-            print(region)
-            print(metadata15_per_station)
-            print(metadata15_per_station["region"].iloc[0])
+                print("Region is "+region.getText())

-            i=0
-            for column in metadata15_per_station.head():
-                print(column)
+                driver.get("https://sinca.mma.gob.cl/"+region.get("href"))
+                time.sleep(1)

-                if metadata15_per_station[column].iloc[0] == scraped_metadata[i]:
-                    print("ok!")
-                else:
-                    print("not ok")
+                html = driver.page_source
+                soup = BeautifulSoup(html, features="html.parser")
+                stations = soup.find_all("a", href=re.compile(r"^/index.php/estacion/index/id/"))
+                

-                i=i+1
+                for station in stations:

+                    station_name_new = station.getText()
+                    print(station_name_new)

+                    driver.get("https://sinca.mma.gob.cl/"+station.get("href"))
+                    time.sleep(3)
+                    
+                    # get meta info
+                    html = driver.page_source
+                    soup = BeautifulSoup(html, features="html.parser")
+                    
+                    region = soup.find("th", text="Región").find_next_sibling().getText()
+                    province = soup.find("th", text="Provincia").find_next_sibling().getText()
+                    commune = soup.find("th", text="Comuna").find_next_sibling().getText()
+                    UTM_coordinates = soup.find("th", text="Coordenadas UTM").find_next_sibling().getText().replace(' ', '')
+                    lon = UTM_coordinates.split('E')[0]+'E'
+                    lat = UTM_coordinates.split('E')[1].split("\n")[0]
+                    timezone = soup.find("th", text="Huso horario").find_next_sibling().getText().replace(' ', '')
+
+                    ins_table = soup.find('table', id="medicion")
+                    if ins_table is not None: # check if there are instruments for air pollution at this station
+                        instruments = ins_table.find_all("td", {"class": "helpTecnica center"})
+                        instruments_per_component = {}
+                    else:
+                        continue
+
+                    for instrument in instruments:
+                        component = instrument.find_parent().find('a').getText()
+                        try: # rename
+                            component = variables_ghost[variables_text.index(component)]
+                        except:
+                            try:
+                                component = variables_ghost[variables_website.index(component)]
+                            except:
+                                pass
+
+                        if 'Ozono.-' in component:
+                            component = 'O3'
+
+                        #======
+                        if "No informado" in instrument.getText():
+                            instruments_per_component[component] = ''
+                        else:
+                            instrument_name = re.sub(' +', ' ', instrument.getText())
+                            instrument_name = instrument_name.split("\n")[-1]
+                            instruments_per_component[component] = instrument_name
+                    
+                    for station_reference in metadata_old:
+                        if metadata_old[station_reference]['station_name']['values'][0] == station_name_new: # match station with previously referenced station reference from old file
+                            i=0
+                            metadata_new[station_reference] = {} # create inner dictionary
+                            scraped_metadata = [station_reference, station_name_new, region, province, commune, lon, lat, timezone]
+                            for parameter in metadata_old[station_reference]: # loop through the meta parameters
+                                if ("instrument" not in parameter) and ("comments" not in parameter): # go through all that are not instruments
+                                    metadata_new[station_reference][parameter] = {"values": [scraped_metadata[i]], "update_time": [today.strftime('%Y-%m-%d')]}
+                                    i=i+1
+                                elif "comments" == parameter:
+                                    metadata_new[station_reference][parameter] = {"values": [''], "update_time": [today.strftime('%Y-%m-%d')]}
+                                else: # go through the instruments
+                                    for component in instruments_per_component:
+                                        if component in parameter:
+                                            metadata_new[station_reference][parameter] = {"values": [instruments_per_component[component]], "update_time": [today.strftime('%Y-%m-%d')]}
+                                        else:
+                                            metadata_new[station_reference][parameter] = {"values": [''], "update_time": [today.strftime('%Y-%m-%d')]}
+                    
+            # safe
+            with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/network_provided/CHILE_SINCA_META_{}.json'.format(today.strftime('%Y%m%d')), 'w', encoding='utf-8') as f:
+                f.write(json.dumps(metadata_new, indent=4, ensure_ascii=False))

+            driver.close()
+            errcode = 200

+        except WebDriverException as e:
+            print(e) 
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+
+    if n_tries == n_max_tries:
+        print('Failed downloading CHILE_SINCA metadata {} times in {} seconds'.format(n_tries, max_time_per_dl))"""
+    

-    driver.close()
+    """
+    # create json from original metadata file =====================================================================================
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/network_provided/CHILE_SINCA_META.txt', 'r') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['station_reference']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/processed/CHILE_SINCA_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+    """
+
+    
+    # read newly scraped file
+    with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/network_provided/CHILE_SINCA_META_{}.json'.format(today.strftime('%Y%m%d')), 'r', encoding='utf-8') as f:
+        json_metadata_now = json.loads(f.read())
+
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/processed/CHILE_SINCA_META.json', 'r', encoding='utf-8') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                        # if different value, append the standardised metadeta file
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                    else:
+                        pass
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            if station in json_metadata.keys(): # if station is in old meta data
+                pass # comparison was done before
+            else: # new station appeared!
+                print('New station {}'.format(station))
+                json_metadata.update({station: json_metadata_now[station]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/CHILE_SINCA/metadata/processed/CHILE_SINCA_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
\ No newline at end of file
--- a/download_scripts/EANET_download.py
+++ b/download_scripts/EANET_download.py
@@ -15,14 +15,30 @@ import zipfile

 import os.path
 import os
+import pandas as pd


-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):
    
    url = 'https://monitoring.eanet.asia/document/menu/index#publicData'
-    download_location = "/esarchive/obs/ghost/EANET/original_files/{}/".format(version)
    today = date.today().strftime('%Y%m%d') #+ timedelta(days = 1)).strftime('%Y%m%d') # problem with timezones???
-    #print(today)
+    
+    if mode == 'all':
+        bdate = date(1980, 1, 1) #date(1960, 1, 1) # date before record starts
+        edate = date.today()
+
+        os.makedirs('/esarchive/obs/ghost/EANET/original_files/{}/'.format(version), exist_ok=True)
+        download_location = "/esarchive/obs/ghost/EANET/original_files/{}/".format(version)
+
+
+    elif mode == 'nrt':
+        print("EANET no nrt")
+        exit()
+
+    else:
+        print('time mode inapplicable')
+        exit()
+    

    options = Options()
    prefs = {'download.default_directory' : download_location}
@@ -32,17 +48,17 @@ def scraper(mode, version):
    driver = webdriver.Chrome(service=svc, options=options)

    driver.get(url)
-    time.sleep(2)
+    time.sleep(max_time_per_dl)

    # login
    email = driver.find_element(By.ID, "email") 
    email.send_keys("raphael.grodofzig@bsc.es")
    passwd = driver.find_element(By.ID, "passwd") 
    passwd.send_keys("274s9QZ5")
-    time.sleep(2)
+    time.sleep(max_time_per_dl)
    driver.find_element(By.NAME, "submitBtn").click()

-    time.sleep(3)
+    time.sleep(max_time_per_dl)

    # find countries
    dropdown_element = driver.find_element(By.ID, 'countryCd')
@@ -104,4 +120,109 @@ def scraper(mode, version):

        i=i+1

-    driver.close()
\ No newline at end of file
+    driver.close()
+
+
+def download_metadata(n_max_tries, max_time_per_dl):
+
+    url_metadata = 'https://www.eanet.asia/wp-content/uploads/2024/01/Site_Information_Acid_Deposition_Monitoring_NMP2023_1117.xlsm'
+    download_location = "/esarchive/obs/ghost/EANET/metadata/network_provided/EANET_META_{}.csv"
+    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
+
+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        r = requests.get(url_metadata, timeout=max_time_per_dl, headers=Headers)
+        if r.status_code == 200:
+            with open(download_location.format(today.strftime('%Y%m%d')), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+    metadata = pd.read_excel(download_location.format(today.strftime('%Y%m%d')), engine='pyxlsb').fillna('')
+    print(metadata)
+    """
+    # create json from original metadata file
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/EANET/metadata/network_provided/EANET_META.csv', 'r', encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/EANET/metadata/processed/EANET_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+        
+    """
+    """
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/EANET/metadata/processed/EANET_META.json', 'r', encoding='ISO-8859-1') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if parameter in json_metadata_now[station].keys(): # check if column of csv exists in new file
+                        if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                            # if different value, append the standardised metadeta file
+                            print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                            json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                            json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                        else:
+                            pass
+                    else:
+                        print('{} not in new metadata file'.format(parameter))
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/EANET/metadata/processed/EANET_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))"""
\ No newline at end of file
--- a/download_scripts/EANET_metadata.py
+++ b/download_scripts/EANET_metadata.py
-import requests
-import time
-from datetime import date
-from datetime import timedelta
-import zipfile
-import urllib
-
-import os.path
-import os
-import pandas as pd
-
-
-def scraper(mode):
-    
-    url = 'https://monitoring.eanet.asia/document/menu/index#publicData'
-    download_url = 'https://www.eanet.asia/wp-content/uploads/2024/01/Site_Information_Acid_Deposition_Monitoring_NMP2023_1117.xlsm'
-    download_location = "/esarchive/obs/ghost/EANET/metadata/network_provided/"
-    today = date.today().strftime('%Y%m%d') #+ timedelta(days = 1)).strftime('%Y%m%d') # problem with timezones???
-
-    """
-    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
-
-    r = requests.get(download_url, timeout=120, headers=Headers)
-    print(r.status_code)
-    if r.status_code == 200:
-        urllib.request.urlretrieve(url, download_location+"downloaded_metadata.xlsm")
-        print('Downloaded metadata')
-    else:
-        print('url status not ok')"""
-
-    # open file
-    metadata = pd.read_excel(download_location+"downloaded_metadata.xlsm", engine='pyxlsb').fillna('')
-    print(metadata)
--- a/download_scripts/MEXICO_CDMX_download.py
+++ b/download_scripts/MEXICO_CDMX_download.py
@@ -8,20 +8,20 @@ import urllib
 import tarfile
 import shutil
 import gzip
+import csv
+import json



-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    base_url = 'http://www.aire.cdmx.gob.mx/opendata/anuales_horarios_gz/contaminantes_{}.csv.gz'

-
    if mode == 'all':
        bdate = date(1980, 1, 1) #date(1960, 1, 1) # date before record starts
        edate = date.today()

        os.makedirs('/esarchive/obs/ghost/MEXICO_CDMX/original_files/{}/'.format(version), exist_ok=True)
-
        download_location = '/esarchive/obs/ghost/MEXICO_CDMX/original_files/'+version+'/contaminantes_{}.csv.gz'

    elif mode == 'nrt':
@@ -39,21 +39,143 @@ def scraper(mode, version):
    # download
    for year in years:
        url = base_url.format(year)
-        r = requests.get(url, timeout=120)
-        if r.status_code == 200:
-            urllib.request.urlretrieve(url, download_location.format(year))
-            print('Downloaded {}'.format(url))
+        n_tries = 0
+        errcode = 999
+
+        while (n_tries < n_max_tries) and (errcode != 200):
+            r = requests.get(url, timeout=max_time_per_dl)
+
+            if r.status_code == 200:
+                urllib.request.urlretrieve(url, download_location.format(year))
+                print('Downloaded {}'.format(url))
+                        # unzip
+                with gzip.open(download_location.format(year), 'rb') as f_in:
+                    with open(download_location.format(year)[:-3], 'wb') as f_out:
+                        shutil.copyfileobj(f_in, f_out)
+                # remove files
+                os.remove(download_location.format(year))
+
+                errcode = r.status_code
+            elif r.status_code == 404:
+                print("No data found, error 404, year {}".format(year))
+                errcode = 200
+            elif r.status_code == 403:
+                print("Permission denied for {}".format(year))
+                errcode = 200
+            else:
+                # try again
+                print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+                errcode = r.status_code
+                n_tries += 1
+                max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+                time.sleep(n_tries ** 2) # wait a lil more every time 
+
+        if n_tries == n_max_tries:
+            print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
+
+        time.sleep(1)

-            # unzip
-            with gzip.open(download_location.format(year), 'rb') as f_in:
-                with open(download_location.format(year)[:-3], 'wb') as f_out:
-                    shutil.copyfileobj(f_in, f_out)
-                
+def download_metadata(n_max_tries, max_time_per_dl):

-            # remove files
-            os.remove(download_location.format(year))
+    url_metadata = 'http://www.aire.cdmx.gob.mx/opendata/catalogos/cat_estacion.csv'
+    download_location = "/esarchive/obs/ghost/MEXICO_CDMX/metadata/network_provided/MEXICO_CDMX_META_{}.csv"

+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        r = requests.get(url_metadata, timeout=max_time_per_dl)
+        if r.status_code == 200:
+            with open(download_location.format('_unformatted'), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
        else:
-            print('No {}'.format(url))
-
-        time.sleep(1)
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+    # import it as pandas to clean header
+    meta_file = pd.read_csv(download_location.format('_unformatted'), header=[1], encoding='ISO-8859-1')
+    meta_file.to_csv(download_location.format(today.strftime('%Y%m%d')), index=False)
+    os.remove(download_location.format('_unformatted'))
+
+    # create json from original metadata file
+    """json_metadata = {}
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/network_provided/MEXICO_CDMX_META.csv', 'r', encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/processed/MEXICO_CDMX_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+        
+    """
+    
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/processed/MEXICO_CDMX_META.json', 'r', encoding='ISO-8859-1') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if parameter in json_metadata_now[station].keys(): # check if column of csv exists in new file
+                        if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                            # if different value, append the standardised metadeta file
+                            print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                            json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                            json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                        else:
+                            pass
+                    else:
+                        print('{} not in new metadata file'.format(parameter))
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/processed/MEXICO_CDMX_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
\ No newline at end of file
--- a/download_scripts/MITECO_download.py
+++ b/download_scripts/MITECO_download.py
@@ -22,22 +22,18 @@ from selenium.webdriver.support import expected_conditions as EC



-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    baseurl = 'https://www.miteco.gob.es/es/calidad-y-evaluacion-ambiental/temas/atmosfera-y-calidad-del-aire/calidad-del-aire/evaluacion-datos/datos/datos-2001-2021.html'

-
    if mode == 'all':
        bdate = date(2001, 1, 1) #date(1960, 1, 1) # date before record starts
        edate = date.today()
-
        os.makedirs('/esarchive/obs/ghost/MITECO/original_files/{}/'.format(version), exist_ok=True)
-
        download_location = '/esarchive/obs/ghost/MITECO/original_files/{}/'.format(version)

    elif mode == 'nrt':
-        bdate = date(2024, 3, 2) #date.today() - timedelta(days = 1) # if code is run after 2 am, data from previous day will be available
-        edate = date(2024, 3, 3) #date.today() - timedelta(days = 1)
+        print("nrt not available")
        download_location = '/esarchive/obs/ghost/MITECO/original_files/nrt/'

    else:
@@ -70,23 +66,40 @@ def scraper(mode, version):
    for zip_link in zip_links:
        filename = zip_link.get("href").rpartition('/')[-1]
        url = 'https://www.miteco.gob.es/{}'.format(zip_link.get("href"))
+        n_tries = 0
+        errcode = 999

-        r = requests.get(url, timeout=120)
-        if r.status_code == 200:
-            urllib.request.urlretrieve(url, download_location+filename)
-            print('Downloaded {}'.format(filename))
+        while (n_tries < n_max_tries) and (errcode != 200):
+            r = requests.get(url, timeout=max_time_per_dl)
+            if r.status_code == 200:
+                urllib.request.urlretrieve(url, download_location+filename)
+                print('Downloaded {}'.format(filename))
+                # unzip
+                with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
+                    zip_ref.extractall(download_location)
+                os.remove(download_location+filename)

-            # unzip
-            with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
-                zip_ref.extractall(download_location)
-    
-            os.remove(download_location+filename)
-                
-        else:
-            print('No {}'.format(url))
+                errcode = r.status_code
+
+            elif r.status_code == 404:
+                print("No data found, error 404")
+                errcode = 200

+            elif r.status_code == 403:
+                print("Permission denied for {}".format(url))
+                errcode = 200
+
+            else:
+                # try again
+                print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+                errcode = r.status_code
+                n_tries += 1
+                max_time_per_dl = max_time_per_dl*2
+                time.sleep(n_tries ** 2) # wait a lil more every time 
+
+        if n_tries == n_max_tries:
+            print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
        time.sleep(1)
-    

    # go to hyperlinks

@@ -118,21 +131,41 @@ def scraper(mode, version):
                    os.remove(zip_file)

                continue
+            
+            n_tries = 0
+            errcode = 999

-            r = requests.get(url, timeout=120)
-            if r.status_code == 200:
-                urllib.request.urlretrieve(url, download_location+filename)
-                print('Downloaded {}'.format(filename))
-
-                # unzip
-                with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
-                    zip_ref.extractall(download_location)
-        
-                os.remove(download_location+filename)
-                    
-            else:
-                print('No {}'.format(url))
+            while (n_tries < n_max_tries) and (errcode != 200):
+                r = requests.get(url, timeout=max_time_per_dl)
+                if r.status_code == 200:
+                    urllib.request.urlretrieve(url, download_location+filename)
+                    print('Downloaded {}'.format(filename))

+                    # unzip
+                    with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
+                        zip_ref.extractall(download_location)
+            
+                    os.remove(download_location+filename)
+                    errcode = r.status_code
+
+                elif r.status_code == 404:
+                    print("No data found, error 404")
+                    errcode = 200
+
+                elif r.status_code == 403:
+                    print("Permission denied for {}".format(url))
+                    errcode = 200
+
+                else:
+                    # try again
+                    print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+                    errcode = r.status_code
+                    n_tries += 1
+                    max_time_per_dl = max_time_per_dl*2
+                    time.sleep(n_tries ** 2) # wait a lil more every time 
+
+            if n_tries == n_max_tries:
+                print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
            time.sleep(1)

    # delete metadata
@@ -152,3 +185,96 @@ def scraper(mode, version):


    driver.close()
+
+
+def download_metadata(n_max_tries, max_time_per_dl):
+
+    url_metadata = 'https://www.miteco.gob.es/content/dam/miteco/es/calidad-y-evaluacion-ambiental/sgalsi/atm%C3%B3sfera-y-calidad-del-aire/evaluaci%C3%B3n-2022/Metainformacion2022.xlsx'
+    download_location = "/esarchive/obs/ghost/MITECO/metadata/network_provided/MITECO_META_{}.xlsx"
+    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
+    r = requests.get(url_metadata, headers=Headers, timeout=max_time_per_dl)
+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        if r.status_code == 200:
+            with open(download_location.format(today.strftime('%Y%m%d')), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+    # convert to csv
+    file = pd.read_excel(download_location.format(today.strftime('%Y%m%d')))
+    file.to_csv('/esarchive/obs/ghost/MITECO/metadata/network_provided/MITECO_META_{}.csv'.format(today.strftime('%Y%m%d')), index=False, header=True)
+
+    """# create json from original metadata file
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/MITECO/metadata/network_provided/MITECO_META.csv', 'r', encoding='ISO-8859-1') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['SiteName_NomDuSite']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/MITECO/metadata/processed/MITECO_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+
+    
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='ISO-8859-1') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['SiteName_NomDuSite']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/MITECO/metadata/processed/MITECO_META.json', 'r', encoding='ISO-8859-1') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                        # if different value, append the standardised metadeta file
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1]), json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                    else:
+                        pass
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            if station in json_metadata.keys(): # if station is in old meta data
+                pass # comparison was done before
+            else: # new station appeared!
+                print('New station {}'.format(station))
+                json_metadata.update({station: json_metadata_now[station]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/MITECO/metadata/processed/MITECO_META.json', 'w', encoding='ISO-8859-1') as f:
+        f.write(json.dumps(json_metadata, indent=4))"""
--- a/download_scripts/NOAA_ISD_download.py
+++ b/download_scripts/NOAA_ISD_download.py
@@ -7,20 +7,21 @@ import re
 import os
 from datetime import date
 from datetime import timedelta
+import requests
+import csv
+import json
+import time


-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    if mode == 'all':
-            
            start_year = 1971
            end_year = 2024

    elif mode == 'nrt':
-
        start_year = date.today().strftime('%Y')
        end_year = (date.today() + timedelta(days=365)).strftime('%Y')
-
        version = mode

    else:
@@ -36,7 +37,7 @@ def scraper(mode, version):
        read_url = False
        while read_url == False:
            try:
-                link_data = re.findall("href=[\"\'](.*?)[\"\']", urlopen(link_url, timeout=15, cafile=certifi.where()).read().decode('utf-8-sig'))
+                link_data = re.findall("href=[\"\'](.*?)[\"\']", urlopen(link_url, timeout=max_time_per_dl, cafile=certifi.where()).read().decode('utf-8-sig'))
                read_url = True
            except HTTPError as error:
                print('Data not retrieved because %s\nURL: %s'%(error, link_url))
@@ -57,11 +58,10 @@ def scraper(mode, version):
        #handles issue of server hanging for 3 minutes spoaradically

        #try downloading each link a certain number of times before giving up
-        n_tries_limit = 3
        for link in link_list:
            n_tries = 0
            errcode = 999
-            while (n_tries < n_tries_limit) & (errcode != 0):
+            while (n_tries < n_max_tries) & (errcode != 0):
                if n_tries == 0:
                    print('Checking/Downloading %s'%(link))
                else:
@@ -91,4 +91,103 @@ def scraper(mode, version):
            cmd = 'rm {}/{}'.format(specific_directory,lnk)
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', shell=True)
            status = process.communicate()[0]
-            errcode = process.returncode
\ No newline at end of file
+            errcode = process.returncode
+
+def download_metadata(n_max_tries, max_time_per_dl):
+
+    url_metadata = 'https://www.ncei.noaa.gov/pub/data/noaa/isd-history.csv'
+    download_location = "/esarchive/obs/ghost/NOAA_ISD/metadata/network_provided/NOAA_ISD_META_{}.csv"
+
+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        r = requests.get(url_metadata, timeout=max_time_per_dl)
+        if r.status_code == 200:
+            with open(download_location.format(today.strftime('%Y%m%d')), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+    """
+    # create json from original metadata file
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/network_provided/NOAA_ISD_META.csv', 'r', encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['USAF']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/processed/NOAA_ISD_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+        
+    """
+    
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['USAF']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/processed/NOAA_ISD_META.json', 'r', encoding='utf-8') as f:
+        json_metadata = json.loads(f.read())
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if parameter in json_metadata_now[station].keys(): # check if column of csv exists in new file
+                        if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                            # if different value, append the standardised metadeta file
+                            print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                            json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                            json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                        else:
+                            pass
+                    else:
+                        print('{} not in new metadata file'.format(parameter))
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/processed/NOAA_ISD_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
\ No newline at end of file
--- a/download_scripts/US_NADP_AMNet_download.py
+++ b/download_scripts/US_NADP_AMNet_download.py
@@ -47,15 +47,14 @@ def download_data(mode, version, n_max_tries, max_time_per_dl):
    options.add_argument("--no-sandbox")
    #options.add_argument("--headless")

+    svc = webdriver.ChromeService(executable_path=binary_path)
+    driver = webdriver.Chrome(service=svc, options=options)
    n_tries = 0
    errcode = 999

    while (n_tries < n_max_tries) and (errcode != 200):
        
        try:
-            svc = webdriver.ChromeService(executable_path=binary_path)
-            driver = webdriver.Chrome(service=svc, options=options)
-
            # open url
            driver.get(baseurl)
            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.ID, 'sites-list'))) # wait till loaded
@@ -105,14 +104,11 @@ def download_data(mode, version, n_max_tries, max_time_per_dl):
            print("Number of tries: {}".format(n_tries))
            continue
        
-        driver.close()
+    driver.close()

    if n_tries == n_max_tries:
        print('Failed downloading US_NADP_AMNet data {} times in {} seconds'.format(n_tries, max_time_per_dl))

-
-    print(os.path.split(download_location[:-5]))
-
    os.rename("{}AMNET-ALL-h.csv".format(download_location), "/esarchive/obs/ghost/US_NADP_AMNet/original_files/{}/AMNET-ALL-h.csv".format(version))


@@ -229,7 +225,7 @@ def download_metadata(n_max_tries, max_time_per_dl):
                for parameter in json_metadata[station]:
                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
                        # if different value, append the standardised metadeta file
-                        print("new {} --- old {}".format(json_metadata_now[station][parameter]['values'][0], json_metadata[station][parameter]['values'][-1]))
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
                    else:

--- a/download_scripts/US_NADP_AMoN_download.py
+++ b/download_scripts/US_NADP_AMoN_download.py
@@ -8,9 +8,9 @@ import pandas
 import os.path
 import urllib
 import time
-import ssl 
 import zipfile
-from compare_two_files import compare_files
+import json
+import csv

 from chromedriver_py import binary_path
 from selenium.webdriver.chrome.options import Options
@@ -22,22 +22,16 @@ from selenium.webdriver.support import expected_conditions as EC



-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    baseurl = 'https://nadp.slh.wisc.edu/networks/ammonia-monitoring-network/'

    if mode == 'all':
-        bdate = date(2013, 12, 1) #date(1960, 1, 1) # date before record starts
-        edate = date(2024, 1, 1) #date.today() - timedelta(days = 1)
-
-        os.makedirs('/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/'.format(version), exist_ok=True)
-
-        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/'.format(version)
+        os.makedirs('/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/temp/'.format(version), exist_ok=True)
+        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/temp/'.format(version)

    elif mode == 'nrt':
-        bdate = date(2024, 1, 1) #date.today() - timedelta(days = 1)
-        edate = date.today() - timedelta(days = 1)
-        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/nrt/'
+        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/nrt/temp/'

    else:
        print('time mode inapplicable')
@@ -52,38 +46,198 @@ def scraper(mode, version):

    svc = webdriver.ChromeService(executable_path=binary_path)
    driver = webdriver.Chrome(service=svc, options=options)
+    n_tries = 0
+    errcode = 999
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        try:
+            # open url
+            driver.get(baseurl)
+            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.ID, 'sites-list'))) # wait till loaded
+
+            dropdown_element = driver.find_element(By.ID, 'data-type')
+            select = Select(dropdown_element)
+            options = [opt.get_attribute("text") for opt in select.options]
+            select.select_by_visible_text("Bi-weekly")
+            time.sleep(max_time_per_dl)
+
+            dropdown_element = driver.find_element(By.ID, 'sites-list')
+            select = Select(dropdown_element)
+            options = [opt.get_attribute("text") for opt in select.options]
+            select.select_by_visible_text("All Sites")
+            time.sleep(max_time_per_dl)
+
+            invalid_box = driver.find_element(By.ID, 'invalid')
+            invalid_box.click()
+            time.sleep(max_time_per_dl)
+
+            # download
+            driver.find_element(By.ID, 'generate-button-text').click()
+
+            # wait until download finished
+            while not os.path.exists("{}AMoN-ALL-W-i.csv".format(download_location)):
+                time.sleep(1)
+
+                if os.path.isfile("{}AMoN-ALL-W-i.csv".format(download_location)):
+                    print('AMoN-ALL-W-i.csv download successful')
+                    errcode = 200
+                    continue
+
+        except TimeoutException as e:
+            print(e)
+            max_time_per_dl = max_time_per_dl*2 # set waiting time to double
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+
+        except WebDriverException as e:
+            print(e) 
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+        
+        except:
+            print("Unknown error")
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+        
+    driver.close()
+
+    if n_tries == n_max_tries:
+        print('Failed downloading US_NADP_AMoN data {} times in {} seconds'.format(n_tries, max_time_per_dl))
+
+    os.rename("{}AMoN-ALL-W-i.csv".format(download_location), "/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/AMoN-ALL-W-i.csv".format(version))
+
+
+
+
+def download_metadata(n_max_tries, max_time_per_dl):

-    # open url
-    driver.get(baseurl)
-    WebDriverWait(driver, 60).until(EC.presence_of_all_elements_located((By.ID, 'sites-list'))) # wait till loaded
-
-    dropdown_element = driver.find_element(By.ID, 'data-type')
-    select = Select(dropdown_element)
-    options = [opt.get_attribute("text") for opt in select.options]
-    print(options)
-    select.select_by_visible_text("Bi-weekly")
-    time.sleep(3)
-
-    dropdown_element = driver.find_element(By.ID, 'sites-list')
-    select = Select(dropdown_element)
-    options = [opt.get_attribute("text") for opt in select.options]
-    print(options)
-    select.select_by_visible_text("All Sites")
-    time.sleep(3)
-
-
-    invalid_box = driver.find_element(By.ID, 'invalid')
-    invalid_box.click()
-    time.sleep(3)
-
-    # download
-    driver.find_element(By.ID, 'generate-button-text').click()
+    baseurl = 'https://nadp.slh.wisc.edu/networks/ammonia-monitoring-network/'
+    #os.makedirs('/esarchive/obs/ghost/US_NADP_AMNet/metadata/network_provided/', exist_ok=True)
+    download_location = '/esarchive/obs/ghost/US_NADP_AMoN/metadata/network_provided/'
+    today = date.today()

-    # wait until download finished
-    while not os.path.exists("{}AMoN-ALL-W-i.csv".format(download_location)):
-        time.sleep(1)
+    # set up driver
+    options = Options()
+    prefs = {'download.default_directory' : download_location}
+    options.add_experimental_option('prefs', prefs)
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")

-        if os.path.isfile("{}AMoN-ALL-W-i.csv".format(download_location)):
-            print('AMoN-ALL-W-i.csv download successful')
+    n_tries = 0
+    errcode = 999
+    svc = webdriver.ChromeService(executable_path=binary_path)
+    driver = webdriver.Chrome(service=svc, options=options)
+    driver.maximize_window()
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        try:
+            # open url
+            driver.get(baseurl)
+
+            #WebDriverWait(driver, max_time_per_dl).until(EC.element_to_be_clickable((By.ID, 'invalid'))) # wait till loaded
+            time.sleep(max_time_per_dl)
+            invalid_box = driver.find_element(By.ID, 'download-show-inactive')
+            driver.execute_script("arguments[0].click()", invalid_box)
+            
+            # download
+            #WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.ID, 'generate-button-text'))) # wait till loaded
+            time.sleep(max_time_per_dl)
+            bttn = driver.find_element(By.ID, 'network-data-submit')
+            driver.execute_script("arguments[0].click()", bttn)
+
+
+            # wait until download finished
+            while not os.path.exists(download_location+'amon.csv'):
+                time.sleep(1)
+
+                if os.path.isfile(download_location+'amon.csv'):
+                    print('Amon metadata download successful')
+                    errcode = 200
+                    continue
+
+        except TimeoutException as e:
+            print(e)
+            max_time_per_dl = max_time_per_dl*2 # set waiting time to double
+            n_tries = n_tries+1
+            continue
+        
+        except WebDriverException as e:
+            print(e) 
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+        
+        except:
+            print("Unknown error")
+            max_time_per_dl = max_time_per_dl*2 # set waiting time to double
+            n_tries = n_tries+1
+            continue
+
+    if n_tries == n_max_tries:
+        print('Failed downloading AMoN metadata {} times in {} seconds'.format(n_tries, max_time_per_dl))
+
+    driver.close()
+
+    os.rename(download_location+'amon.csv', download_location+'US_NADP_AMoN_META_{}.csv'.format(today.strftime('%Y%m%d')))
+
+    # create json from original metadata file =====================================================================================
+    """json_metadata = {}
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/network_provided/US_NADP_AMoN_META.csv', 'r') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['siteId']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/processed/US_NADP_AMoN_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+
+    """
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location+'US_NADP_AMoN_META_{}.csv'.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['siteId']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/processed/US_NADP_AMoN_META.json', 'r', encoding='utf-8') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                        # if different value, append the standardised metadeta file
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                    else:
+                        pass
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            if station in json_metadata.keys(): # if station is in old meta data
+                pass # comparison was done before
+            else: # new station appeared!
+                print('New station {}'.format(station))
+                json_metadata.update({station: json_metadata_now[station]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/processed/US_NADP_AMoN_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))

-    driver.close()
\ No newline at end of file
--- a/download_scripts/__pycache__/CAPMoN_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/CAPMoN_download.cpython-39.pyc
--- a/download_scripts/__pycache__/CHILE_SINCA_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/CHILE_SINCA_download.cpython-39.pyc
--- a/download_scripts/__pycache__/EANET_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/EANET_download.cpython-39.pyc
--- a/download_scripts/__pycache__/MEXICO_CDMX_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/MEXICO_CDMX_download.cpython-39.pyc
--- a/download_scripts/__pycache__/MITECO_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/MITECO_download.cpython-39.pyc
--- a/download_scripts/__pycache__/NOAA_ISD_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/NOAA_ISD_download.cpython-39.pyc
--- a/download_scripts/__pycache__/US_NADP_AMNet_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/US_NADP_AMNet_download.cpython-39.pyc
--- a/download_scripts/__pycache__/US_NADP_AMoN_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/US_NADP_AMoN_download.cpython-39.pyc
--- a/download_scripts/__pycache__/compare_two_files.cpython-39.pyc
+++ b/download_scripts/__pycache__/compare_two_files.cpython-39.pyc
--- a/download_scripts/compare_two_files.py
+++ b/download_scripts/compare_two_files.py
 import hashlib # works for all type of data
+import requests
+
+
+
+def request_download(url, max_time_per_dl, download_location):
+    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
+    n_tries = 0
+    errcode = 999
+
+    r = requests.get(url, headers=Headers, timeout=max_time_per_dl)
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        if r.status_code == 200:
+            open(download_location + file.format(year), "wb").write(r.content)
+            print('Downloaded {}'.format(file.format(year)))
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No ozone l data found, error 404")
+            errcode = 200
+        elif r.status_code == 403:
+            print("Permission denied for {}".format(file.format(year)))
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+
+


 # check if files are different

--- a/download_scripts/configure_downloads.py
+++ b/download_scripts/configure_downloads.py
@@ -61,7 +61,13 @@ if __name__ == "__main__":
                        'CNEMC':                    {'max_time_dl': 3},
                        'CANADA_NAPS':              {'max_time_dl': 5},
                        'CAPMoN':                   {'max_time_dl': 5},
-                        'US_NADP_AMNet':            {'max_time_dl': 10}}
+                        'US_NADP_AMNet':            {'max_time_dl': 10},
+                        'US_NADP_AMoN':             {'max_time_dl': 7},
+                        'MEXICO_CDMX':              {'max_time_dl': 10},
+                        'NOAA_ISD':                 {'max_time_dl': 15},
+                        'MITECO':                   {'max_time_dl': 10},
+                        'EANET':                    {'max_time_dl': 5},
+                        'CHILE_SINCA':              {'max_time_dl': 30}}


    # download data
@@ -71,7 +77,7 @@ if __name__ == "__main__":
    dl_metadata = True

    # networks you want to download
-    networks = [US_NADP_AMNet_download]
+    networks = [CHILE_SINCA_download]

    # download all networks
    #networks = ['all']
@@ -107,8 +113,6 @@ if __name__ == "__main__":

        if dl_data == True:
            network.download_data(mode, version, n_max_tries, max_time_per_dl)
-            pass

        if dl_metadata == True:
            network.download_metadata(n_max_tries, max_time_per_dl)
-            pass