RAPHAEL GRODOFZIG · RAPHAEL GRODOFZIG · daff17ba · daff17ba · daff17ba · 9cfc1a3a
--- a/download_scripts/CAPMoN_download.py
+++ b/download_scripts/CAPMoN_download.py
@@ -171,20 +171,21 @@ def download_metadata(n_max_tries, max_time_per_dl):
        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
    time.sleep(1)

-    """# create json from original metadata file
+    
+    # create json from original metadata file
    json_metadata = {}
    with open('/esarchive/obs/ghost/CAPMoN/metadata/network_provided/CAPMoN_META.csv', 'r', encoding='ISO-8859-1') as file:
        csv_filedata = csv.DictReader(file)

        for row in csv_filedata:  
-            key = row['SiteName_NomDuSite']
+            key = row['ID']+'_'+row['Measurements_Mesures'].replace('"', '')[:3]
            update_date = today.strftime('%Y-%m-%d')
            for parameter in row:
                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
            json_metadata[key] = row

    with open('/esarchive/obs/ghost/CAPMoN/metadata/processed/CAPMoN_META.json', 'w', encoding='utf-8') as f:
-        f.write(json.dumps(json_metadata, indent=4))"""
+        f.write(json.dumps(json_metadata, indent=4))

    
    # create json in desired shape from current metadata file
@@ -193,7 +194,7 @@ def download_metadata(n_max_tries, max_time_per_dl):
        csv_filedata = csv.DictReader(file)

        for row in csv_filedata:  
-            key = row['SiteName_NomDuSite']
+            key = row['ID']+'_'+row['Measurements_Mesures'].replace('"', '')[:3]
            update_date = today.strftime('%Y-%m-%d')
            for parameter in row:
                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
@@ -218,11 +219,18 @@ def download_metadata(n_max_tries, max_time_per_dl):
                print('Station {} was abolished'.format(station))

        for station in json_metadata_now: # loop through all the new stations
-            if station in json_metadata.keys(): # if station is in old meta data
-                pass # comparison was done before
-            else: # new station appeared!
-                print('New station {}'.format(station))
-                json_metadata.update({station: json_metadata_now[station]})
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})


    # safe

--- a/download_scripts/CHILE_SINCA_download.py
+++ b/download_scripts/CHILE_SINCA_download.py
--- a/download_scripts/EANET_download.py
+++ b/download_scripts/EANET_download.py
@@ -15,14 +15,30 @@ import zipfile

 import os.path
 import os
+import pandas as pd


-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):
    
    url = 'https://monitoring.eanet.asia/document/menu/index#publicData'
-    download_location = "/esarchive/obs/ghost/EANET/original_files/{}/".format(version)
    today = date.today().strftime('%Y%m%d') #+ timedelta(days = 1)).strftime('%Y%m%d') # problem with timezones???
-    #print(today)
+    
+    if mode == 'all':
+        bdate = date(1980, 1, 1) #date(1960, 1, 1) # date before record starts
+        edate = date.today()
+
+        os.makedirs('/esarchive/obs/ghost/EANET/original_files/{}/'.format(version), exist_ok=True)
+        download_location = "/esarchive/obs/ghost/EANET/original_files/{}/".format(version)
+
+
+    elif mode == 'nrt':
+        print("EANET no nrt")
+        exit()
+
+    else:
+        print('time mode inapplicable')
+        exit()
+    

    options = Options()
    prefs = {'download.default_directory' : download_location}
@@ -32,17 +48,17 @@ def scraper(mode, version):
    driver = webdriver.Chrome(service=svc, options=options)

    driver.get(url)
-    time.sleep(2)
+    time.sleep(max_time_per_dl)

    # login
    email = driver.find_element(By.ID, "email") 
    email.send_keys("raphael.grodofzig@bsc.es")
    passwd = driver.find_element(By.ID, "passwd") 
    passwd.send_keys("274s9QZ5")
-    time.sleep(2)
+    time.sleep(max_time_per_dl)
    driver.find_element(By.NAME, "submitBtn").click()

-    time.sleep(3)
+    time.sleep(max_time_per_dl)

    # find countries
    dropdown_element = driver.find_element(By.ID, 'countryCd')
@@ -104,4 +120,109 @@ def scraper(mode, version):

        i=i+1

-    driver.close()
\ No newline at end of file
+    driver.close()
+
+
+def download_metadata(n_max_tries, max_time_per_dl):
+
+    url_metadata = 'https://www.eanet.asia/wp-content/uploads/2024/01/Site_Information_Acid_Deposition_Monitoring_NMP2023_1117.xlsm'
+    download_location = "/esarchive/obs/ghost/EANET/metadata/network_provided/EANET_META_{}.csv"
+    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
+
+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        r = requests.get(url_metadata, timeout=max_time_per_dl, headers=Headers)
+        if r.status_code == 200:
+            with open(download_location.format(today.strftime('%Y%m%d')), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+    metadata = pd.read_excel(download_location.format(today.strftime('%Y%m%d')), engine='pyxlsb').fillna('')
+    print(metadata)
+    """
+    # create json from original metadata file
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/EANET/metadata/network_provided/EANET_META.csv', 'r', encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/EANET/metadata/processed/EANET_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+        
+    """
+    """
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/EANET/metadata/processed/EANET_META.json', 'r', encoding='ISO-8859-1') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if parameter in json_metadata_now[station].keys(): # check if column of csv exists in new file
+                        if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                            # if different value, append the standardised metadeta file
+                            print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                            json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                            json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                        else:
+                            pass
+                    else:
+                        print('{} not in new metadata file'.format(parameter))
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/EANET/metadata/processed/EANET_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))"""
\ No newline at end of file
--- a/download_scripts/EANET_metadata.py
+++ b/download_scripts/EANET_metadata.py
-import requests
-import time
-from datetime import date
-from datetime import timedelta
-import zipfile
-import urllib
-
-import os.path
-import os
-import pandas as pd
-
-
-def scraper(mode):
-    
-    url = 'https://monitoring.eanet.asia/document/menu/index#publicData'
-    download_url = 'https://www.eanet.asia/wp-content/uploads/2024/01/Site_Information_Acid_Deposition_Monitoring_NMP2023_1117.xlsm'
-    download_location = "/esarchive/obs/ghost/EANET/metadata/network_provided/"
-    today = date.today().strftime('%Y%m%d') #+ timedelta(days = 1)).strftime('%Y%m%d') # problem with timezones???
-
-    """
-    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
-
-    r = requests.get(download_url, timeout=120, headers=Headers)
-    print(r.status_code)
-    if r.status_code == 200:
-        urllib.request.urlretrieve(url, download_location+"downloaded_metadata.xlsm")
-        print('Downloaded metadata')
-    else:
-        print('url status not ok')"""
-
-    # open file
-    metadata = pd.read_excel(download_location+"downloaded_metadata.xlsm", engine='pyxlsb').fillna('')
-    print(metadata)
--- a/download_scripts/MEXICO_CDMX_download.py
+++ b/download_scripts/MEXICO_CDMX_download.py
@@ -8,20 +8,20 @@ import urllib
 import tarfile
 import shutil
 import gzip
+import csv
+import json



-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    base_url = 'http://www.aire.cdmx.gob.mx/opendata/anuales_horarios_gz/contaminantes_{}.csv.gz'

-
    if mode == 'all':
        bdate = date(1980, 1, 1) #date(1960, 1, 1) # date before record starts
        edate = date.today()

        os.makedirs('/esarchive/obs/ghost/MEXICO_CDMX/original_files/{}/'.format(version), exist_ok=True)
-
        download_location = '/esarchive/obs/ghost/MEXICO_CDMX/original_files/'+version+'/contaminantes_{}.csv.gz'

    elif mode == 'nrt':
@@ -39,21 +39,143 @@ def scraper(mode, version):
    # download
    for year in years:
        url = base_url.format(year)
-        r = requests.get(url, timeout=120)
-        if r.status_code == 200:
-            urllib.request.urlretrieve(url, download_location.format(year))
-            print('Downloaded {}'.format(url))
+        n_tries = 0
+        errcode = 999
+
+        while (n_tries < n_max_tries) and (errcode != 200):
+            r = requests.get(url, timeout=max_time_per_dl)
+
+            if r.status_code == 200:
+                urllib.request.urlretrieve(url, download_location.format(year))
+                print('Downloaded {}'.format(url))
+                        # unzip
+                with gzip.open(download_location.format(year), 'rb') as f_in:
+                    with open(download_location.format(year)[:-3], 'wb') as f_out:
+                        shutil.copyfileobj(f_in, f_out)
+                # remove files
+                os.remove(download_location.format(year))
+
+                errcode = r.status_code
+            elif r.status_code == 404:
+                print("No data found, error 404, year {}".format(year))
+                errcode = 200
+            elif r.status_code == 403:
+                print("Permission denied for {}".format(year))
+                errcode = 200
+            else:
+                # try again
+                print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+                errcode = r.status_code
+                n_tries += 1
+                max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+                time.sleep(n_tries ** 2) # wait a lil more every time 
+
+        if n_tries == n_max_tries:
+            print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
+
+        time.sleep(1)

-            # unzip
-            with gzip.open(download_location.format(year), 'rb') as f_in:
-                with open(download_location.format(year)[:-3], 'wb') as f_out:
-                    shutil.copyfileobj(f_in, f_out)
-                
+def download_metadata(n_max_tries, max_time_per_dl):

-            # remove files
-            os.remove(download_location.format(year))
+    url_metadata = 'http://www.aire.cdmx.gob.mx/opendata/catalogos/cat_estacion.csv'
+    download_location = "/esarchive/obs/ghost/MEXICO_CDMX/metadata/network_provided/MEXICO_CDMX_META_{}.csv"

+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        r = requests.get(url_metadata, timeout=max_time_per_dl)
+        if r.status_code == 200:
+            with open(download_location.format('_unformatted'), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
        else:
-            print('No {}'.format(url))
-
-        time.sleep(1)
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+    # import it as pandas to clean header
+    meta_file = pd.read_csv(download_location.format('_unformatted'), header=[1], encoding='ISO-8859-1')
+    meta_file.to_csv(download_location.format(today.strftime('%Y%m%d')), index=False)
+    os.remove(download_location.format('_unformatted'))
+
+    # create json from original metadata file
+    """json_metadata = {}
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/network_provided/MEXICO_CDMX_META.csv', 'r', encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/processed/MEXICO_CDMX_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+        
+    """
+    
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['cve_estac']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/processed/MEXICO_CDMX_META.json', 'r', encoding='ISO-8859-1') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if parameter in json_metadata_now[station].keys(): # check if column of csv exists in new file
+                        if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                            # if different value, append the standardised metadeta file
+                            print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                            json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                            json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                        else:
+                            pass
+                    else:
+                        print('{} not in new metadata file'.format(parameter))
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/MEXICO_CDMX/metadata/processed/MEXICO_CDMX_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
\ No newline at end of file
--- a/download_scripts/MITECO_download.py
+++ b/download_scripts/MITECO_download.py
@@ -22,22 +22,18 @@ from selenium.webdriver.support import expected_conditions as EC



-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    baseurl = 'https://www.miteco.gob.es/es/calidad-y-evaluacion-ambiental/temas/atmosfera-y-calidad-del-aire/calidad-del-aire/evaluacion-datos/datos/datos-2001-2021.html'

-
    if mode == 'all':
        bdate = date(2001, 1, 1) #date(1960, 1, 1) # date before record starts
        edate = date.today()
-
        os.makedirs('/esarchive/obs/ghost/MITECO/original_files/{}/'.format(version), exist_ok=True)
-
        download_location = '/esarchive/obs/ghost/MITECO/original_files/{}/'.format(version)

    elif mode == 'nrt':
-        bdate = date(2024, 3, 2) #date.today() - timedelta(days = 1) # if code is run after 2 am, data from previous day will be available
-        edate = date(2024, 3, 3) #date.today() - timedelta(days = 1)
+        print("nrt not available")
        download_location = '/esarchive/obs/ghost/MITECO/original_files/nrt/'

    else:
@@ -70,23 +66,40 @@ def scraper(mode, version):
    for zip_link in zip_links:
        filename = zip_link.get("href").rpartition('/')[-1]
        url = 'https://www.miteco.gob.es/{}'.format(zip_link.get("href"))
+        n_tries = 0
+        errcode = 999

-        r = requests.get(url, timeout=120)
-        if r.status_code == 200:
-            urllib.request.urlretrieve(url, download_location+filename)
-            print('Downloaded {}'.format(filename))
+        while (n_tries < n_max_tries) and (errcode != 200):
+            r = requests.get(url, timeout=max_time_per_dl)
+            if r.status_code == 200:
+                urllib.request.urlretrieve(url, download_location+filename)
+                print('Downloaded {}'.format(filename))
+                # unzip
+                with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
+                    zip_ref.extractall(download_location)
+                os.remove(download_location+filename)

-            # unzip
-            with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
-                zip_ref.extractall(download_location)
-    
-            os.remove(download_location+filename)
-                
-        else:
-            print('No {}'.format(url))
+                errcode = r.status_code
+
+            elif r.status_code == 404:
+                print("No data found, error 404")
+                errcode = 200

+            elif r.status_code == 403:
+                print("Permission denied for {}".format(url))
+                errcode = 200
+
+            else:
+                # try again
+                print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+                errcode = r.status_code
+                n_tries += 1
+                max_time_per_dl = max_time_per_dl*2
+                time.sleep(n_tries ** 2) # wait a lil more every time 
+
+        if n_tries == n_max_tries:
+            print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
        time.sleep(1)
-    

    # go to hyperlinks

@@ -118,21 +131,41 @@ def scraper(mode, version):
                    os.remove(zip_file)

                continue
+            
+            n_tries = 0
+            errcode = 999

-            r = requests.get(url, timeout=120)
-            if r.status_code == 200:
-                urllib.request.urlretrieve(url, download_location+filename)
-                print('Downloaded {}'.format(filename))
-
-                # unzip
-                with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
-                    zip_ref.extractall(download_location)
-        
-                os.remove(download_location+filename)
-                    
-            else:
-                print('No {}'.format(url))
+            while (n_tries < n_max_tries) and (errcode != 200):
+                r = requests.get(url, timeout=max_time_per_dl)
+                if r.status_code == 200:
+                    urllib.request.urlretrieve(url, download_location+filename)
+                    print('Downloaded {}'.format(filename))

+                    # unzip
+                    with zipfile.ZipFile(download_location+filename, 'r') as zip_ref:
+                        zip_ref.extractall(download_location)
+            
+                    os.remove(download_location+filename)
+                    errcode = r.status_code
+
+                elif r.status_code == 404:
+                    print("No data found, error 404")
+                    errcode = 200
+
+                elif r.status_code == 403:
+                    print("Permission denied for {}".format(url))
+                    errcode = 200
+
+                else:
+                    # try again
+                    print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+                    errcode = r.status_code
+                    n_tries += 1
+                    max_time_per_dl = max_time_per_dl*2
+                    time.sleep(n_tries ** 2) # wait a lil more every time 
+
+            if n_tries == n_max_tries:
+                print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
            time.sleep(1)

    # delete metadata
@@ -152,3 +185,96 @@ def scraper(mode, version):


    driver.close()
+
+
+def download_metadata(n_max_tries, max_time_per_dl):
+
+    url_metadata = 'https://www.miteco.gob.es/content/dam/miteco/es/calidad-y-evaluacion-ambiental/sgalsi/atm%C3%B3sfera-y-calidad-del-aire/evaluaci%C3%B3n-2022/Metainformacion2022.xlsx'
+    download_location = "/esarchive/obs/ghost/MITECO/metadata/network_provided/MITECO_META_{}.xlsx"
+    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
+    r = requests.get(url_metadata, headers=Headers, timeout=max_time_per_dl)
+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        if r.status_code == 200:
+            with open(download_location.format(today.strftime('%Y%m%d')), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+    # convert to csv
+    file = pd.read_excel(download_location.format(today.strftime('%Y%m%d')))
+    file.to_csv('/esarchive/obs/ghost/MITECO/metadata/network_provided/MITECO_META_{}.csv'.format(today.strftime('%Y%m%d')), index=False, header=True)
+
+    """# create json from original metadata file
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/MITECO/metadata/network_provided/MITECO_META.csv', 'r', encoding='ISO-8859-1') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['SiteName_NomDuSite']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/MITECO/metadata/processed/MITECO_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+
+    
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='ISO-8859-1') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['SiteName_NomDuSite']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/MITECO/metadata/processed/MITECO_META.json', 'r', encoding='ISO-8859-1') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                        # if different value, append the standardised metadeta file
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1]), json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                    else:
+                        pass
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            if station in json_metadata.keys(): # if station is in old meta data
+                pass # comparison was done before
+            else: # new station appeared!
+                print('New station {}'.format(station))
+                json_metadata.update({station: json_metadata_now[station]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/MITECO/metadata/processed/MITECO_META.json', 'w', encoding='ISO-8859-1') as f:
+        f.write(json.dumps(json_metadata, indent=4))"""
--- a/download_scripts/NOAA_ISD_download.py
+++ b/download_scripts/NOAA_ISD_download.py
@@ -7,20 +7,21 @@ import re
 import os
 from datetime import date
 from datetime import timedelta
+import requests
+import csv
+import json
+import time


-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    if mode == 'all':
-            
            start_year = 1971
            end_year = 2024

    elif mode == 'nrt':
-
        start_year = date.today().strftime('%Y')
        end_year = (date.today() + timedelta(days=365)).strftime('%Y')
-
        version = mode

    else:
@@ -36,7 +37,7 @@ def scraper(mode, version):
        read_url = False
        while read_url == False:
            try:
-                link_data = re.findall("href=[\"\'](.*?)[\"\']", urlopen(link_url, timeout=15, cafile=certifi.where()).read().decode('utf-8-sig'))
+                link_data = re.findall("href=[\"\'](.*?)[\"\']", urlopen(link_url, timeout=max_time_per_dl, cafile=certifi.where()).read().decode('utf-8-sig'))
                read_url = True
            except HTTPError as error:
                print('Data not retrieved because %s\nURL: %s'%(error, link_url))
@@ -57,11 +58,10 @@ def scraper(mode, version):
        #handles issue of server hanging for 3 minutes spoaradically

        #try downloading each link a certain number of times before giving up
-        n_tries_limit = 3
        for link in link_list:
            n_tries = 0
            errcode = 999
-            while (n_tries < n_tries_limit) & (errcode != 0):
+            while (n_tries < n_max_tries) & (errcode != 0):
                if n_tries == 0:
                    print('Checking/Downloading %s'%(link))
                else:
@@ -91,4 +91,103 @@ def scraper(mode, version):
            cmd = 'rm {}/{}'.format(specific_directory,lnk)
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', shell=True)
            status = process.communicate()[0]
-            errcode = process.returncode
\ No newline at end of file
+            errcode = process.returncode
+
+def download_metadata(n_max_tries, max_time_per_dl):
+
+    url_metadata = 'https://www.ncei.noaa.gov/pub/data/noaa/isd-history.csv'
+    download_location = "/esarchive/obs/ghost/NOAA_ISD/metadata/network_provided/NOAA_ISD_META_{}.csv"
+
+    n_tries = 0
+    errcode = 999
+    today = date.today()
+    
+    while (n_tries < n_max_tries) and (errcode != 200):
+        r = requests.get(url_metadata, timeout=max_time_per_dl)
+        if r.status_code == 200:
+            with open(download_location.format(today.strftime('%Y%m%d')), 'wb') as outfile:
+                outfile.write(r.content)
+            print('Downloaded metadata')
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No metadata found, error 404")
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            max_time_per_dl = max_time_per_dl*2 # increase waiting time 
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url_metadata, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+    """
+    # create json from original metadata file
+    json_metadata = {}
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/network_provided/NOAA_ISD_META.csv', 'r', encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['USAF']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/processed/NOAA_ISD_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+        
+    """
+    
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['USAF']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/processed/NOAA_ISD_META.json', 'r', encoding='utf-8') as f:
+        json_metadata = json.loads(f.read())
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if parameter in json_metadata_now[station].keys(): # check if column of csv exists in new file
+                        if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                            # if different value, append the standardised metadeta file
+                            print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                            json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                            json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                        else:
+                            pass
+                    else:
+                        print('{} not in new metadata file'.format(parameter))
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            for parameter in json_metadata_now[station]: # loop through all the parameters
+                if station in json_metadata.keys(): # if station is in old meta data
+                    pass # comparison was done before
+                else: # new station appeared!
+                    print('New station {}'.format(station))
+                    json_metadata.update({station: json_metadata_now[station]})
+                # is there a new parameter that wasn't in the old file?
+                if parameter in json_metadata[station].keys():
+                    pass # parameter (column) is already there
+                else:
+                    print('{} is new'.format(parameter))
+                    json_metadata[station].update({parameter: json_metadata_now[station][parameter]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/NOAA_ISD/metadata/processed/NOAA_ISD_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
\ No newline at end of file
--- a/download_scripts/US_NADP_AMNet_download.py
+++ b/download_scripts/US_NADP_AMNet_download.py
@@ -47,15 +47,14 @@ def download_data(mode, version, n_max_tries, max_time_per_dl):
    options.add_argument("--no-sandbox")
    #options.add_argument("--headless")

+    svc = webdriver.ChromeService(executable_path=binary_path)
+    driver = webdriver.Chrome(service=svc, options=options)
    n_tries = 0
    errcode = 999

    while (n_tries < n_max_tries) and (errcode != 200):
        
        try:
-            svc = webdriver.ChromeService(executable_path=binary_path)
-            driver = webdriver.Chrome(service=svc, options=options)
-
            # open url
            driver.get(baseurl)
            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.ID, 'sites-list'))) # wait till loaded
@@ -105,14 +104,11 @@ def download_data(mode, version, n_max_tries, max_time_per_dl):
            print("Number of tries: {}".format(n_tries))
            continue
        
-        driver.close()
+    driver.close()

    if n_tries == n_max_tries:
        print('Failed downloading US_NADP_AMNet data {} times in {} seconds'.format(n_tries, max_time_per_dl))

-
-    print(os.path.split(download_location[:-5]))
-
    os.rename("{}AMNET-ALL-h.csv".format(download_location), "/esarchive/obs/ghost/US_NADP_AMNet/original_files/{}/AMNET-ALL-h.csv".format(version))


@@ -229,7 +225,7 @@ def download_metadata(n_max_tries, max_time_per_dl):
                for parameter in json_metadata[station]:
                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
                        # if different value, append the standardised metadeta file
-                        print("new {} --- old {}".format(json_metadata_now[station][parameter]['values'][0], json_metadata[station][parameter]['values'][-1]))
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
                    else:

--- a/download_scripts/US_NADP_AMoN_download.py
+++ b/download_scripts/US_NADP_AMoN_download.py
@@ -8,9 +8,9 @@ import pandas
 import os.path
 import urllib
 import time
-import ssl 
 import zipfile
-from compare_two_files import compare_files
+import json
+import csv

 from chromedriver_py import binary_path
 from selenium.webdriver.chrome.options import Options
@@ -22,22 +22,16 @@ from selenium.webdriver.support import expected_conditions as EC



-def scraper(mode, version):
+def download_data(mode, version, n_max_tries, max_time_per_dl):

    baseurl = 'https://nadp.slh.wisc.edu/networks/ammonia-monitoring-network/'

    if mode == 'all':
-        bdate = date(2013, 12, 1) #date(1960, 1, 1) # date before record starts
-        edate = date(2024, 1, 1) #date.today() - timedelta(days = 1)
-
-        os.makedirs('/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/'.format(version), exist_ok=True)
-
-        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/'.format(version)
+        os.makedirs('/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/temp/'.format(version), exist_ok=True)
+        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/temp/'.format(version)

    elif mode == 'nrt':
-        bdate = date(2024, 1, 1) #date.today() - timedelta(days = 1)
-        edate = date.today() - timedelta(days = 1)
-        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/nrt/'
+        download_location = '/esarchive/obs/ghost/US_NADP_AMoN/original_files/nrt/temp/'

    else:
        print('time mode inapplicable')
@@ -52,38 +46,198 @@ def scraper(mode, version):

    svc = webdriver.ChromeService(executable_path=binary_path)
    driver = webdriver.Chrome(service=svc, options=options)
+    n_tries = 0
+    errcode = 999
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        try:
+            # open url
+            driver.get(baseurl)
+            WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.ID, 'sites-list'))) # wait till loaded
+
+            dropdown_element = driver.find_element(By.ID, 'data-type')
+            select = Select(dropdown_element)
+            options = [opt.get_attribute("text") for opt in select.options]
+            select.select_by_visible_text("Bi-weekly")
+            time.sleep(max_time_per_dl)
+
+            dropdown_element = driver.find_element(By.ID, 'sites-list')
+            select = Select(dropdown_element)
+            options = [opt.get_attribute("text") for opt in select.options]
+            select.select_by_visible_text("All Sites")
+            time.sleep(max_time_per_dl)
+
+            invalid_box = driver.find_element(By.ID, 'invalid')
+            invalid_box.click()
+            time.sleep(max_time_per_dl)
+
+            # download
+            driver.find_element(By.ID, 'generate-button-text').click()
+
+            # wait until download finished
+            while not os.path.exists("{}AMoN-ALL-W-i.csv".format(download_location)):
+                time.sleep(1)
+
+                if os.path.isfile("{}AMoN-ALL-W-i.csv".format(download_location)):
+                    print('AMoN-ALL-W-i.csv download successful')
+                    errcode = 200
+                    continue
+
+        except TimeoutException as e:
+            print(e)
+            max_time_per_dl = max_time_per_dl*2 # set waiting time to double
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+
+        except WebDriverException as e:
+            print(e) 
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+        
+        except:
+            print("Unknown error")
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+        
+    driver.close()
+
+    if n_tries == n_max_tries:
+        print('Failed downloading US_NADP_AMoN data {} times in {} seconds'.format(n_tries, max_time_per_dl))
+
+    os.rename("{}AMoN-ALL-W-i.csv".format(download_location), "/esarchive/obs/ghost/US_NADP_AMoN/original_files/{}/AMoN-ALL-W-i.csv".format(version))
+
+
+
+
+def download_metadata(n_max_tries, max_time_per_dl):

-    # open url
-    driver.get(baseurl)
-    WebDriverWait(driver, 60).until(EC.presence_of_all_elements_located((By.ID, 'sites-list'))) # wait till loaded
-
-    dropdown_element = driver.find_element(By.ID, 'data-type')
-    select = Select(dropdown_element)
-    options = [opt.get_attribute("text") for opt in select.options]
-    print(options)
-    select.select_by_visible_text("Bi-weekly")
-    time.sleep(3)
-
-    dropdown_element = driver.find_element(By.ID, 'sites-list')
-    select = Select(dropdown_element)
-    options = [opt.get_attribute("text") for opt in select.options]
-    print(options)
-    select.select_by_visible_text("All Sites")
-    time.sleep(3)
-
-
-    invalid_box = driver.find_element(By.ID, 'invalid')
-    invalid_box.click()
-    time.sleep(3)
-
-    # download
-    driver.find_element(By.ID, 'generate-button-text').click()
+    baseurl = 'https://nadp.slh.wisc.edu/networks/ammonia-monitoring-network/'
+    #os.makedirs('/esarchive/obs/ghost/US_NADP_AMNet/metadata/network_provided/', exist_ok=True)
+    download_location = '/esarchive/obs/ghost/US_NADP_AMoN/metadata/network_provided/'
+    today = date.today()

-    # wait until download finished
-    while not os.path.exists("{}AMoN-ALL-W-i.csv".format(download_location)):
-        time.sleep(1)
+    # set up driver
+    options = Options()
+    prefs = {'download.default_directory' : download_location}
+    options.add_experimental_option('prefs', prefs)
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")

-        if os.path.isfile("{}AMoN-ALL-W-i.csv".format(download_location)):
-            print('AMoN-ALL-W-i.csv download successful')
+    n_tries = 0
+    errcode = 999
+    svc = webdriver.ChromeService(executable_path=binary_path)
+    driver = webdriver.Chrome(service=svc, options=options)
+    driver.maximize_window()
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        try:
+            # open url
+            driver.get(baseurl)
+
+            #WebDriverWait(driver, max_time_per_dl).until(EC.element_to_be_clickable((By.ID, 'invalid'))) # wait till loaded
+            time.sleep(max_time_per_dl)
+            invalid_box = driver.find_element(By.ID, 'download-show-inactive')
+            driver.execute_script("arguments[0].click()", invalid_box)
+            
+            # download
+            #WebDriverWait(driver, max_time_per_dl).until(EC.presence_of_all_elements_located((By.ID, 'generate-button-text'))) # wait till loaded
+            time.sleep(max_time_per_dl)
+            bttn = driver.find_element(By.ID, 'network-data-submit')
+            driver.execute_script("arguments[0].click()", bttn)
+
+
+            # wait until download finished
+            while not os.path.exists(download_location+'amon.csv'):
+                time.sleep(1)
+
+                if os.path.isfile(download_location+'amon.csv'):
+                    print('Amon metadata download successful')
+                    errcode = 200
+                    continue
+
+        except TimeoutException as e:
+            print(e)
+            max_time_per_dl = max_time_per_dl*2 # set waiting time to double
+            n_tries = n_tries+1
+            continue
+        
+        except WebDriverException as e:
+            print(e) 
+            n_tries = n_tries+1
+            print("Number of tries: {}".format(n_tries))
+            continue
+        
+        except:
+            print("Unknown error")
+            max_time_per_dl = max_time_per_dl*2 # set waiting time to double
+            n_tries = n_tries+1
+            continue
+
+    if n_tries == n_max_tries:
+        print('Failed downloading AMoN metadata {} times in {} seconds'.format(n_tries, max_time_per_dl))
+
+    driver.close()
+
+    os.rename(download_location+'amon.csv', download_location+'US_NADP_AMoN_META_{}.csv'.format(today.strftime('%Y%m%d')))
+
+    # create json from original metadata file =====================================================================================
+    """json_metadata = {}
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/network_provided/US_NADP_AMoN_META.csv', 'r') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['siteId']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata[key] = row
+
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/processed/US_NADP_AMoN_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))
+
+    """
+    # create json in desired shape from current metadata file
+    json_metadata_now = {}  
+    with open(download_location+'US_NADP_AMoN_META_{}.csv'.format(today.strftime('%Y%m%d')), encoding='utf-8') as file:
+        csv_filedata = csv.DictReader(file)
+
+        for row in csv_filedata:  
+            key = row['siteId']
+            update_date = today.strftime('%Y-%m-%d')
+            for parameter in row:
+                row[parameter] = {'values': [row[parameter]], 'update_time': [update_date]} # create inner dictionary for every parameter
+            json_metadata_now[key] = row
+
+    
+    # read standardised file to compare!
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/processed/US_NADP_AMoN_META.json', 'r', encoding='utf-8') as f:
+        json_metadata = json.loads(f.read())
+
+        for station in json_metadata: # loop through all the old stations
+            if station in json_metadata_now.keys(): # if station is in current meta data, go on
+                for parameter in json_metadata[station]:
+                    if json_metadata[station][parameter]['values'][-1] != json_metadata_now[station][parameter]['values'][0]: # compare last entry in standardised file to value in new file
+                        # if different value, append the standardised metadeta file
+                        print("old {} --- new {}".format(json_metadata[station][parameter]['values'][-1], json_metadata_now[station][parameter]['values'][0]))
+                        json_metadata[station][parameter]['values'].append(json_metadata_now[station][parameter]['values'][0])
+                        json_metadata[station][parameter]['update_time'].append(json_metadata_now[station][parameter]['update_time'][0])
+                    else:
+                        pass
+            else:
+                print('Station {} was abolished'.format(station))
+
+        for station in json_metadata_now: # loop through all the new stations
+            if station in json_metadata.keys(): # if station is in old meta data
+                pass # comparison was done before
+            else: # new station appeared!
+                print('New station {}'.format(station))
+                json_metadata.update({station: json_metadata_now[station]})
+
+
+    # safe
+    with open('/esarchive/obs/ghost/US_NADP_AMoN/metadata/processed/US_NADP_AMoN_META.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(json_metadata, indent=4))

-    driver.close()
\ No newline at end of file
--- a/download_scripts/__pycache__/CAPMoN_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/CAPMoN_download.cpython-39.pyc
--- a/download_scripts/__pycache__/CHILE_SINCA_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/CHILE_SINCA_download.cpython-39.pyc
--- a/download_scripts/__pycache__/EANET_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/EANET_download.cpython-39.pyc
--- a/download_scripts/__pycache__/MEXICO_CDMX_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/MEXICO_CDMX_download.cpython-39.pyc
--- a/download_scripts/__pycache__/MITECO_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/MITECO_download.cpython-39.pyc
--- a/download_scripts/__pycache__/NOAA_ISD_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/NOAA_ISD_download.cpython-39.pyc
--- a/download_scripts/__pycache__/US_NADP_AMNet_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/US_NADP_AMNet_download.cpython-39.pyc
--- a/download_scripts/__pycache__/US_NADP_AMoN_download.cpython-39.pyc
+++ b/download_scripts/__pycache__/US_NADP_AMoN_download.cpython-39.pyc
--- a/download_scripts/__pycache__/compare_two_files.cpython-39.pyc
+++ b/download_scripts/__pycache__/compare_two_files.cpython-39.pyc
--- a/download_scripts/compare_two_files.py
+++ b/download_scripts/compare_two_files.py
 import hashlib # works for all type of data
+import requests
+
+
+
+def request_download(url, max_time_per_dl, download_location):
+    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
+    n_tries = 0
+    errcode = 999
+
+    r = requests.get(url, headers=Headers, timeout=max_time_per_dl)
+
+    while (n_tries < n_max_tries) and (errcode != 200):
+        if r.status_code == 200:
+            open(download_location + file.format(year), "wb").write(r.content)
+            print('Downloaded {}'.format(file.format(year)))
+            errcode = r.status_code
+        elif r.status_code == 404:
+            print("No ozone l data found, error 404")
+            errcode = 200
+        elif r.status_code == 403:
+            print("Permission denied for {}".format(file.format(year)))
+            errcode = 200
+        else:
+            # try again
+            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
+            errcode = r.status_code
+            n_tries += 1
+            time.sleep(n_tries ** 2) # wait a lil more every time 
+
+    if n_tries == n_max_tries:
+        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
+    time.sleep(1)
+
+
+


 # check if files are different

--- a/download_scripts/configure_downloads.py
+++ b/download_scripts/configure_downloads.py
@@ -61,7 +61,13 @@ if __name__ == "__main__":
                        'CNEMC':                    {'max_time_dl': 3},
                        'CANADA_NAPS':              {'max_time_dl': 5},
                        'CAPMoN':                   {'max_time_dl': 5},
-                        'US_NADP_AMNet':            {'max_time_dl': 10}}
+                        'US_NADP_AMNet':            {'max_time_dl': 10},
+                        'US_NADP_AMoN':             {'max_time_dl': 7},
+                        'MEXICO_CDMX':              {'max_time_dl': 10},
+                        'NOAA_ISD':                 {'max_time_dl': 15},
+                        'MITECO':                   {'max_time_dl': 10},
+                        'EANET':                    {'max_time_dl': 5},
+                        'CHILE_SINCA':              {'max_time_dl': 30}}


    # download data
@@ -71,7 +77,7 @@ if __name__ == "__main__":
    dl_metadata = True

    # networks you want to download
-    networks = [US_NADP_AMNet_download]
+    networks = [CHILE_SINCA_download]

    # download all networks
    #networks = ['all']
@@ -107,8 +113,6 @@ if __name__ == "__main__":

        if dl_data == True:
            network.download_data(mode, version, n_max_tries, max_time_per_dl)
-            pass

        if dl_metadata == True:
            network.download_metadata(n_max_tries, max_time_per_dl)
-            pass