compare_two_files.py 2.14 KB
Newer Older
RAPHAEL GRODOFZIG's avatar
RAPHAEL GRODOFZIG committed
import hashlib # works for all type of data
RAPHAEL GRODOFZIG's avatar
RAPHAEL GRODOFZIG committed
import requests



def request_download(url, max_time_per_dl, download_location):
    Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
    n_tries = 0
    errcode = 999

    r = requests.get(url, headers=Headers, timeout=max_time_per_dl)

    while (n_tries < n_max_tries) and (errcode != 200):
        if r.status_code == 200:
            open(download_location + file.format(year), "wb").write(r.content)
            print('Downloaded {}'.format(file.format(year)))
            errcode = r.status_code
        elif r.status_code == 404:
            print("No ozone l data found, error 404")
            errcode = 200
        elif r.status_code == 403:
            print("Permission denied for {}".format(file.format(year)))
            errcode = 200
        else:
            # try again
            print('Response error {}, attempt {}'.format(r.status_code, n_tries))
            errcode = r.status_code
            n_tries += 1
            time.sleep(n_tries ** 2) # wait a lil more every time 

    if n_tries == n_max_tries:
        print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
    time.sleep(1)



RAPHAEL GRODOFZIG's avatar
RAPHAEL GRODOFZIG committed


# check if files are different
def compare_files(f1, f2):

    with open(f1, 'rb') as t1, open(f2, 'rb') as t2:

        fileA_hash = hashlib.sha256(t1.read()).digest()
        fileB_hash = hashlib.sha256(t2.read()).digest()
        if fileA_hash == fileB_hash:
            print("Files are the same: no new data")
            return True
        else:
            print("Files are not the same: new data to process")
            return False
            
            """with open(f1, 'r') as t1, open(f2, 'r') as t2: # open again to read data not binary
                fileA = t1.readlines()
                fileB = t2.readlines()

            with open(out_path+'update.csv', 'w') as outFile:
                
                for line in fileB:
                    if line not in fileA:
                        print("different line detected")
                        outFile.write(line)"""