Newer
Older
import requests
def request_download(url, max_time_per_dl, download_location):
Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
n_tries = 0
errcode = 999
r = requests.get(url, headers=Headers, timeout=max_time_per_dl)
while (n_tries < n_max_tries) and (errcode != 200):
if r.status_code == 200:
open(download_location + file.format(year), "wb").write(r.content)
print('Downloaded {}'.format(file.format(year)))
errcode = r.status_code
elif r.status_code == 404:
print("No ozone l data found, error 404")
errcode = 200
elif r.status_code == 403:
print("Permission denied for {}".format(file.format(year)))
errcode = 200
else:
# try again
print('Response error {}, attempt {}'.format(r.status_code, n_tries))
errcode = r.status_code
n_tries += 1
time.sleep(n_tries ** 2) # wait a lil more every time
if n_tries == n_max_tries:
print('Failed downloading {} {} times in {} seconds, error code {}'.format(url, n_tries, max_time_per_dl, errcode))
time.sleep(1)
# check if files are different
def compare_files(f1, f2):
with open(f1, 'rb') as t1, open(f2, 'rb') as t2:
fileA_hash = hashlib.sha256(t1.read()).digest()
fileB_hash = hashlib.sha256(t2.read()).digest()
if fileA_hash == fileB_hash:
print("Files are the same: no new data")
return True
else:
print("Files are not the same: new data to process")
return False
"""with open(f1, 'r') as t1, open(f2, 'r') as t2: # open again to read data not binary
fileA = t1.readlines()
fileB = t2.readlines()
with open(out_path+'update.csv', 'w') as outFile:
for line in fileB:
if line not in fileA:
print("different line detected")
outFile.write(line)"""