Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# coding=utf-8
from autosubmit.config.log import Log
from earthdiagnostics.diagnostic import Diagnostic
from earthdiagnostics.utils import Utils, TempFile
from earthdiagnostics.variable import Domain
import numpy as np
class ClimatologicalPercentile(Diagnostic):
"""
Calculates the climatological percentiles for the given leadtimes
:param data_manager: data management object
:type data_manager: DataManager
:param variable: variable to average
:type variable: str
:param experiment_config:
:type experiment_config: ExperimentConfig
"""
alias = 'climpercent'
"Diagnostic alias for the configuration file"
def __init__(self, data_manager, domain, variable, leadtimes, experiment_config):
Diagnostic.__init__(self, data_manager)
self.variable = variable
self.domain = domain
self.leadtimes = leadtimes
self.experiment_config = experiment_config
self.min_value = None
self.max_value = None
self.realizations = None
self.lat_len = None
self.lon_len = None
self.num_bins = 1000
self._bins = None
self.percentiles = np.array([0.1, 0.25, 0.5, 0.75, 0.9])
def __eq__(self, other):
return self.domain == other.domain and self.variable == other.variable and self.leadtimes == other.leadtimes
def __str__(self):
return 'Climatological percentile Variable: {0}:{1} Leadtimes: {2}'.format(self.domain, self.variable,
self.leadtimes)
@classmethod
def generate_jobs(cls, diags, options):
"""
Creates a job for each chunk to compute the diagnostic
:param diags: Diagnostics manager class
:type diags: Diags
:param options: domain, variable, percentil number, maximum depth (level)
:type options: list[str]
:return:
"""
num_options = len(options) - 1
if num_options < 3:
raise Exception('You must specify the variable (and its domain) and the leadtimes you want to compute '
'the percentiles on')
if num_options > 3:
raise Exception('You must specify three parameters for the climatological percentiles')
domain = Domain(options[1])
variable = options[2]
leadtimes = [int(i) for i in options[3].split('-')]
job_list = list()
job_list.append(ClimatologicalPercentile(diags.data_manager, domain, variable, leadtimes,
diags.config.experiment))
return job_list
def compute(self):
"""
Runs the diagnostic
"""
member_files = list()
for startdate, member in self.experiment_config.get_member_list():
Log.debug('Retrieving startdate {0}', startdate)
memberfile = self.data_manager.get_leadtimes(self.domain, self.variable, startdate, member, self.leadtimes)
Log.debug('Getting data for startdate {0}', startdate)
handler = Utils.openCdf(memberfile)
self._get_value_interval(handler)
self._get_realizations_present(handler)
self._get_var_size(handler)
handler.close()
member_files.append(memberfile)
self._bins = np.linspace(self.min_value, self.max_value, num=(self.num_bins + 1))
distribution = None
for memberfile in member_files:
Log.debug('Discretizing file {0}', memberfile)
handler = Utils.openCdf(memberfile)
for realization in range(self.realizations):
if distribution is None:
distribution = self._calculate_distribution(handler, realization)
else:
distribution += self._calculate_distribution(handler, realization)
handler.close()
Log.debug('Calculating percentiles')
def calculate_percentiles(point_distribution):
cs = np.cumsum(point_distribution)
total = cs[-1]
percentile_values = self.percentiles * total
index = np.searchsorted(cs, percentile_values)
return [(self._bins[i + 1] + self._bins[i])/2 for i in index]
distribution = np.apply_along_axis(calculate_percentiles, 0, distribution)
temp = TempFile.get()
handler = Utils.openCdf(temp, 'w')
handler.createDimension('percentile', len(self.percentiles))
percentile_var = handler.createVariable('percentile', float, ('percentile',))
percentile_var[:] = self.percentiles
handler.createDimension('lat', self.lat_len)
lat_var = handler.createVariable('lat', float, ('lat',))
lat_var[:] = self.lat
handler.createDimension('lon', self.lon_len)
lon_var = handler.createVariable('lon', float, ('lon',))
lon_var[:] = self.lon
p75_var = handler.createVariable('percent', float, ('percentile', 'lat', 'lon'))
p75_var[...] = distribution
handler.close()
self.send_file(temp, self.domain, self.variable + 'percent', None, None, frequency='clim', rename_var='percent')
def _get_realizations_present(self, handler):
realizations = 1
if 'realization' in handler.dimensions:
realizations = handler.dimensions['realization'].size
if 'ensemble' in handler.dimensions:
realizations = handler.dimensions['ensemble'].size
if self.realizations is None:
self.realizations = realizations
if realizations != self.realizations:
self.realizations = min(self.realizations, realizations)
Log.warning('Different number of realizations in the data used by diagnostic {0}', self)
def _get_value_interval(self, handler):
values = handler.variables[self.variable][:]
file_max = np.amax(values)
file_min = np.amin(values)
self.max_value = max(self.min_value, file_max)
if self.min_value is None:
self.min_value = file_min
else:
self.min_value = min(self.min_value, file_min)
def _calculate_distribution(self, handler, realization):
Log.debug('Discretizing realization {0}', realization)
def calculate_histogram(time_series):
return np.histogram(time_series, bins=self._bins)[0]
var = handler.variables[self.variable]
return np.apply_along_axis(calculate_histogram, 0, var[:, realization, ...])
def _get_var_size(self, handler):
if self.lat_len is not None:
return
self.lat = handler.variables['latitude'][:]
self.lon = handler.variables['longitude'][:]
self.lat = handler.dimensions['latitude'].size
self.lon = handler.dimensions['longitude'].size
def create_empty_distribution(self):
return np.zeros((self.lat_len, self.lon_len, self.num_bins), int)