-
Notifications
You must be signed in to change notification settings - Fork 3
/
hadcrut5lib.py
executable file
·288 lines (238 loc) · 10.5 KB
/
hadcrut5lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#!/usr/bin/python3
# Copyright (c) 2020-2024 Davide Madrisan <[email protected]>
# SPDX-License-Identifier: GPL-3.0-or-later
"""
HadCRUT5 class and library functions for parsing the HadCRUT5 temperature
datasets. See: https://www.metoffice.gov.uk/hadobs/hadcrut5/
"""
import argparse
import json
import logging
import sys
from typing import Dict, List, Mapping, Tuple
import numpy as np
import requests
# pylint: disable=E0611
from netCDF4 import Dataset as nc_Dataset
# pylint: enable=E0611
__author__ = "Davide Madrisan"
__copyright__ = "Copyright (C) 2020-2024 Davide Madrisan"
__license__ = "GNU General Public License v3.0"
__version__ = "2024.1"
__email__ = "[email protected]"
__status__ = "stable"
def copyleft(descr: str) -> str:
"""Print the Copyright message and License"""
return (
f"{descr} v{__version__} ({__status__})\n"
"{__copyright__} <{__email__}>\nLicense: {__license__}"
)
def argparser(descr: str, examples: List[str]) -> argparse.ArgumentParser:
"""Return a new ArgumentParser object"""
return argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=copyleft(descr),
epilog="examples:\n " + "\n ".join(examples),
)
# pylint: disable=R0902
class HadCRUT5:
"""Class for parsing and plotting HadCRUT5 datasets"""
_DATASET_VERSION = "5.0.2.0"
"""current dataset version"""
_DEFAULT_DATATYPE = "annual"
_VALID_DATATYPES = [_DEFAULT_DATATYPE, "monthly"]
"""list of all the available data types"""
_DEFAULT_PERIOD = "1961-1990"
_VALID_PERIODS = [_DEFAULT_PERIOD, "1850-1900", "1880-1920"]
"""list of all the valid periods"""
GLOBAL_REGION = "Global"
NORTHERN_REGION = "Northern Hemisphere"
SOUTHERN_REGION = "Southern Hemisphere"
def __init__(
self,
period=_DEFAULT_PERIOD,
datatype=_DEFAULT_DATATYPE,
regions=(True, False, False),
smoother=1,
verbose=False,
):
if datatype not in self._VALID_DATATYPES:
raise ValueError(f'Unsupported time series type "{datatype}"')
if period not in self._VALID_PERIODS:
raise ValueError(f'Unsupported reference period: "{period}"')
# will be populated by datasets_load()
self._datasets = {}
# will be populated by datasets_normalize()
self._datasets_normalized = {}
self._datatype = datatype
(self._enable_global, self._enable_northern, self._enable_southern) = regions
self._period = period
self._smoother = smoother
self._verbose = verbose
self._global_filename = (
f"HadCRUT.{self._DATASET_VERSION}.analysis.summary_series.global.{datatype}.nc"
)
self._northern_hemisphere_filename = (
f"HadCRUT.{self._DATASET_VERSION}."
f"analysis.summary_series.northern_hemisphere.{datatype}.nc"
)
self._southern_hemisphere_filename = (
f"HadCRUT.{self._DATASET_VERSION}."
f"analysis.summary_series.southern_hemisphere.{datatype}.nc"
)
self._logging_level = logging.DEBUG if self._verbose else logging.INFO
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
self._logger = logging.getLogger(__name__)
def datasets_download(self):
"""Download the required HadCRUT5 datasets"""
if self._enable_global:
self._wget_dataset_file(self._global_filename)
if self._enable_northern:
self._wget_dataset_file(self._northern_hemisphere_filename)
if self._enable_southern:
self._wget_dataset_file(self._southern_hemisphere_filename)
def datasets_load(self) -> None:
"""Load all the netCDFv4 datasets"""
def dataset_load(dataset_filename: str) -> Dict:
"""Load the data provided by the netCDFv4 file 'dataset_filename'"""
dataset = nc_Dataset(dataset_filename)
return {
"dimensions": dataset.dimensions,
"metadata": dataset.__dict__,
"variables": dataset.variables,
}
def dataset_metadata_dump(dataset_name: str, dataset) -> None:
metadata = dataset["metadata"]
self.logging_debug(
(f'Metadata for "{dataset_name}" dataset:\n{json.dumps(metadata, indent=2)}'),
)
if self._enable_global:
region = self.GLOBAL_REGION
self._datasets[region] = dataset_load(self._global_filename)
dataset_metadata_dump(region, self._datasets[region])
if self._enable_northern:
region = self.NORTHERN_REGION
self._datasets[region] = dataset_load(self._northern_hemisphere_filename)
dataset_metadata_dump(region, self._datasets[region])
if self._enable_southern:
region = self.SOUTHERN_REGION
self._datasets[region] = dataset_load(self._southern_hemisphere_filename)
dataset_metadata_dump(region, self._datasets[region])
def datasets_normalize(self) -> None:
"""
Normalize the temperature means to the required time period.
Set _datasets_normalized with a tuple containing lower, mean, and upper
temperatures for every enabled region
"""
def normalization_value(temperatures: List[float]) -> float:
"""
Return the value to be substracted to temperatures in order to
obtain a mean-centered dataset for the required period
"""
if self._period == "1961-1990":
# No changes required: the original dataset is based on the
# reference period 1961-1990
return 0
factor = 12 if self.is_monthly_dataset else 1
if self._period == "1850-1900":
# The dataset starts from 1850-01-01 00:00:00
# so we calculate the mean of the first 50 years
norm_temp = np.mean(temperatures[: 50 * factor])
elif self._period == "1880-1920":
# We have to skip the first 30 years here
norm_temp = np.mean(temperatures[30 * factor : 70 * factor + 1])
else:
# this should never happen...
raise ValueError(f'Unsupported period "{self._period}"')
self.logging_debug("The mean anomaly in {self._period} is about {norm_temp:.8f}°C")
return float(norm_temp)
for region, data in self._datasets.items():
mean = data["variables"]["tas_mean"]
self.logging_debug(f"dataset ({region}): mean ({len(mean)} entries) \\\n{mean[:]}")
lower = data["variables"]["tas_lower"]
upper = data["variables"]["tas_upper"]
norm_temp = normalization_value(mean)
self._datasets_normalized[region] = {
"lower": np.array(lower) - norm_temp,
"mean": np.array(mean) - norm_temp,
"upper": np.array(upper) - norm_temp,
}
self.logging_debug(
f"normalized dataset ({region}): mean \\\n{np.array(mean) - norm_temp}"
)
def datasets_regions(self) -> Mapping[str, object]:
"""Return the dataset regions set by the user at command-line"""
return self._datasets.keys()
def logging(self, message: str) -> None:
"""Print a message"""
logging.info(message)
def logging_debug(self, message: str) -> None:
"""Print a message when in verbose mode only"""
if self._verbose:
logging.info(message)
def _hadcrut5_data_url(self, filename: str) -> str:
site = "https://www.metoffice.gov.uk"
path = f"/hadobs/hadcrut5/data/HadCRUT.{self._DATASET_VERSION}/analysis/diagnostics/"
url = f"{site}{path}{filename}"
return url
def _wget_dataset_file(self, filename: str) -> None:
"""Download a netCDFv4 HadCRUT5 file if not already found locally"""
try:
with open(filename, encoding="utf-8"):
if self._verbose:
logging.info("Using the local dataset file: %s", filename)
except IOError:
if self._verbose:
logging.info("Downloading %s ...", filename)
url_dataset = self._hadcrut5_data_url(filename)
try:
response = requests.get(url_dataset, stream=True, timeout=10)
except requests.exceptions.RequestException as e:
logging.error(str(e))
sys.exit(1)
# Throw an error for bad status codes
response.raise_for_status()
with open(filename, "wb") as handle:
for block in response.iter_content(1024):
handle.write(block)
@property
def dataset_datatype(self) -> str:
"""Return the datatype string"""
return self._datatype
@property
def dataset_history(self) -> str:
"""Return the datatype history from metadata"""
# The datasets have all the same length so choose the first one
region = list(self._datasets.keys())[0]
metadata = self._datasets[region]["metadata"]
return metadata.get("history")
def dataset_normalized_data(self, region) -> Tuple[List[float], List[float], List[float]]:
"""Return the dataset data normalized for the specified region"""
lower = self._datasets_normalized[region]["lower"]
mean = self._datasets_normalized[region]["mean"]
upper = self._datasets_normalized[region]["upper"]
return (lower, mean, upper)
@property
def dataset_period(self) -> str:
"""Return the dataset period as a string"""
return self._period
def dataset_years(self) -> List[int | float]:
"""
Return an array of years corresponding of the loaded datasets.
If the original dataset packages monthly data, the returning vector
will contain float values (year plus a decimal part for the month).
"""
# The datasets have all the same length so choose the first one
region = list(self._datasets.keys())[0]
mean = self._datasets[region]["variables"]["tas_mean"][:]
factor = 1 / 12 if self.is_monthly_dataset else 1
years = [1850 + (y * factor) for y in range(len(mean))]
self.logging_debug(f"years: \\\n{np.array(years)}")
return years
@property
def is_monthly_dataset(self) -> bool:
"""
Return True if the loaded dataset provide monthly data,
False otherwise
"""
return self._datatype == "monthly"