Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import hashlib
from datetime import date
from datetime import timedelta
import pandas
import os.path
import urllib
import time
import ssl
import zipfile
from compare_two_files import compare_files
def scraper(mode, version):
base_url = "https://data-donnees.az.ec.gc.ca/api/file?path=/air/monitor/monitoring-of-atmospheric-gases/ground-level-ozone/"
file = "AtmosphericGases-GroundLevelOzone-CAPMoN-AllSites-{}.csv"
baseurl_ions = 'https://data-donnees.az.ec.gc.ca/api/file?path=/air/monitor/monitoring-of-atmospheric-precipitation-chemistry/major-ions/AtmosphericPrecipitationChemistry-MajorIons-CAPMoN-AllSites-{}.csv'
components = ['O3', 'Particulate_Metals']
if mode == 'all':
bdate = date(1980, 1, 1) #date(1960, 1, 1) # date before record starts
edate = date.today() + timedelta(days = 365)
# create download directory
for component in components:
os.makedirs('/esarchive/obs/ghost/CAPMoN/original_files/{}/{}/'.format(version, component), exist_ok=True)
os.makedirs('/esarchive/obs/ghost/CAPMoN/original_files/{}/precip/major-ions/'.format(version), exist_ok=True)
elif mode == 'nrt':
print("No nrt data for CAPMoN network.")
quit()
else:
print('time mode inapplicable')
quit()
# create date array, per year
years = pandas.date_range(bdate, edate, freq='Y').strftime('%Y').tolist()
print(years)
# ozone
download_location = '/esarchive/obs/ghost/CAPMoN/original_files/{}/{}/'.format(version, 'O3')
download_location_wetdep = '/esarchive/obs/ghost/CAPMoN/original_files/{}/precip/major-ions/'.format(version)
for year in years:
url = base_url + file.format(year)
Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
r = requests.get(url, headers=Headers, timeout=120)
if r.status_code == 200:
open(download_location + file.format(year), "wb").write(r.content)
print('Downloaded {}'.format(file.format(year)))
elif r.status_code == 403:
print("Permission denied for {}".format(file.format(year)))
else:
print(file.format(year) + " download failed or no data")
time.sleep(1)
# major ions in wetdep
res = requests.get(baseurl_ions.format(year), headers=Headers, timeout=120)
time.sleep(1)
if res.status_code == 200:
with open(download_location_wetdep+"precip/major-ions/"+os.path.basename(baseurl_ions.format(year)), 'wb') as outfile:
outfile.write(res.content)
print('Downloaded {}'.format(os.path.basename(baseurl_ions.format(year))))
elif res.status_code == 404:
print("No major ions data in {}".format(year))
else:
print("Problem with major ions download")
# particulate metals
base_url = "https://data-donnees.az.ec.gc.ca/api/file?path=/air/monitor/monitoring-of-atmospheric-particles/particulate-metals/"
file = "AtmosphericParticles-ParticulateMetals-GLBM-MultipleSites-1988_2017.csv"
download_location = '/esarchive/obs/ghost/CAPMoN/original_files/1.6/Particulate_Metals/'
url = base_url + file
Headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}
r = requests.get(url, headers=Headers, timeout=120)
if r.status_code == 200:
open(download_location + file, "wb").write(r.content)
print('Downloaded ' + file)
elif r.status_code == 403:
print("Permission denied for {}".format(file))
else:
print(file + " download failed or no data")