import hashlib # works for all type of data import requests def request_download(url, max_time_per_dl, download_location): Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'} n_tries = 0 errcode = 999 r = requests.get(url, headers=Headers, timeout=max_time_per_dl) while (n_tries < n_max_tries) and (errcode != 200): if r.status_code == 200: open(download_location + file.format(year), "wb").write(r.content) print('Downloaded {}'.format(file.format(year))) errcode = r.status_code elif r.status_code == 404: print("No ozone l data found, error 404") errcode = 200 elif r.status_code == 403: print("Permission denied for {}".format(file.format(year))) errcode = 200 else: # try again print('Response error {}, attempt {}'.format(r.status_code, n_tries)) errcode = r.status_code n_tries += 1 time.sleep(n_tries ** 2) # wait a lil more every time if n_tries == n_max_tries: print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode)) time.sleep(1) # check if files are different def compare_files(f1, f2): with open(f1, 'rb') as t1, open(f2, 'rb') as t2: fileA_hash = hashlib.sha256(t1.read()).digest() fileB_hash = hashlib.sha256(t2.read()).digest() if fileA_hash == fileB_hash: print("Files are the same: no new data") return True else: print("Files are not the same: new data to process") return False """with open(f1, 'r') as t1, open(f2, 'r') as t2: # open again to read data not binary fileA = t1.readlines() fileB = t2.readlines() with open(out_path+'update.csv', 'w') as outFile: for line in fileB: if line not in fileA: print("different line detected") outFile.write(line)"""