-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jan Griesfeller
committed
Sep 3, 2024
1 parent
3f28569
commit 37a7970
Showing
3 changed files
with
49 additions
and
242 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,12 @@ | ||
import csv | ||
import logging | ||
import tomllib | ||
from io import BytesIO | ||
from urllib.parse import urlparse | ||
from urllib.request import urlopen | ||
from zipfile import BadZipFile, ZipFile | ||
|
||
from geocoder_reverse_natural_earth import ( | ||
Geocoder_Reverse_NE, | ||
Geocoder_Reverse_Exception, | ||
) | ||
import numpy as np | ||
import requests | ||
import json | ||
from pyaro.timeseries import ( | ||
AutoFilterReaderEngine, | ||
Data, | ||
|
@@ -19,71 +16,33 @@ | |
) | ||
from tqdm import tqdm | ||
import datetime | ||
import xarray as xr | ||
import os | ||
|
||
# default URL | ||
BASE_URL = "https://aeronet.gsfc.nasa.gov/data_push/V3/All_Sites_Times_Daily_Averages_AOD20.zip" | ||
# number of lines to read before the reading is handed to Pythobn's csv reader | ||
HEADER_LINE_NO = 7 | ||
DELIMITER = "," | ||
# | ||
NAN_VAL = -999.0 | ||
# update progress bar every N lines... | ||
PG_UPDATE_LINES = 100 | ||
# main variables to store | ||
LAT_NAME = "Site_Latitude(Degrees)" | ||
LON_NAME = "Site_Longitude(Degrees)" | ||
ALT_NAME = "Site_Elevation(m)" | ||
SITE_NAME = "AERONET_Site_Name" | ||
DATE_NAME = "Date(dd:mm:yyyy)" | ||
TIME_NAME: str = "Time(hh:mm:ss)" | ||
AOD500_NAME = "AOD_500nm" | ||
ANG4487_NAME = "440-870_Angstrom_Exponent" | ||
AOD440_NAME = "AOD_440nm" | ||
AOD870_NAME = "AOD_870nm" | ||
AOD550_NAME = "AOD_550nm" | ||
|
||
DATA_VARS = [AOD500_NAME, ANG4487_NAME, AOD440_NAME, AOD870_NAME] | ||
COMPUTED_VARS = [AOD550_NAME] | ||
# The computed variables have to be named after the read ones, otherwise the calculation will fail! | ||
DATA_VARS.extend(COMPUTED_VARS) | ||
|
||
FILL_COUNTRY_FLAG = False | ||
logger = logging.getLogger(__name__) | ||
|
||
TS_TYPE_DIFFS = { | ||
"daily": np.timedelta64(12, "h"), | ||
"instantaneous": np.timedelta64(0, "s"), | ||
"points": np.timedelta64(0, "s"), | ||
"monthly": np.timedelta64(15, "D"), | ||
} | ||
# default API URL base | ||
# BASE_API_URL = "https://prod-actris-md.nilu.no/Vocabulary/categories" | ||
BASE_API_URL = "https://prod-actris-md.nilu.no/" | ||
# base URL to query for data for a certain variable | ||
VAR_QUERY_URL = f"{BASE_API_URL}/content/" | ||
# basename of definitions.toml which connects the pyaerocom variable names with the ACTRIS variable names | ||
DEFINITION_FILE_BASENAME = "definitions.toml" | ||
|
||
DEFINITION_FILE = os.path.join( | ||
os.path.dirname(os.path.realpath(__file__)), | ||
DEFINITION_FILE_BASENAME) | ||
|
||
class ActrisEbasTimeSeriesReader(AutoFilterReaderEngine.AutoFilterReader): | ||
def __init__( | ||
self, | ||
filename, | ||
filters=[], | ||
fill_country_flag: bool = FILL_COUNTRY_FLAG, | ||
var_name="ozone mass concentration", | ||
tqdm_desc: str | None = None, | ||
ts_type: str = "daily", | ||
): | ||
"""open a new Aeronet timeseries-reader | ||
:param filename: str | ||
:param filters: | ||
:param fill_country_flag: | ||
:param tqdm_desc: | ||
:param filename_or_obj_or_url: path-like object to csv-file | ||
input file looks like this (daily file; times noted are middle times): | ||
AERONET Version 3; | ||
Cuiaba | ||
Version 3: AOD Level 2.0 | ||
The following data are automatically cloud cleared and quality assured with pre-field and post-field calibration applied. | ||
Contact: PI=Pawan Gupta and Elena Lind; PI [email protected] and [email protected] | ||
Daily Averages,UNITS can be found at,,, https://aeronet.gsfc.nasa.gov/new_web/units.html | ||
AERONET_Site,Date(dd:mm:yyyy),Time(hh:mm:ss),Day_of_Year,AOD_1640nm,AOD_1020nm,AOD_870nm,AOD_865nm,AOD_779nm,AOD_675nm,AOD_667nm,AOD_620nm,AOD_560nm,AOD_555nm,AOD_551nm,AOD_532nm,AOD_531nm,AOD_510nm,AOD_500nm,AOD_490nm,AOD_443nm,AOD_440nm,AOD_412nm,AOD_400nm,AOD_380nm,AOD_340nm,Precipitable_Water(cm),AOD_681nm,AOD_709nm,AOD_Empty,AOD_Empty,AOD_Empty,AOD_Empty,AOD_Empty,440-870_Angstrom_Exponent,380-500_Angstrom_Exponent,440-675_Angstrom_Exponent,500-870_Angstrom_Exponent,340-440_Angstrom_Exponent,440-675_Angstrom_Exponent[Polar],N[AOD_1640nm],N[AOD_1020nm],N[AOD_870nm],N[AOD_865nm],N[AOD_779nm],N[AOD_675nm],N[AOD_667nm],N[AOD_620nm],N[AOD_560nm],N[AOD_555nm],N[AOD_551nm],N[AOD_532nm],N[AOD_531nm],N[AOD_510nm],N[AOD_500nm],N[AOD_490nm],N[AOD_443nm],N[AOD_440nm],N[AOD_412nm],N[AOD_400nm],N[AOD_380nm],N[AOD_340nm],N[Precipitable_Water(cm)],N[AOD_681nm],N[AOD_709nm],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[440-870_Angstrom_Exponent],N[380-500_Angstrom_Exponent],N[440-675_Angstrom_Exponent],N[500-870_Angstrom_Exponent],N[340-440_Angstrom_Exponent],N[440-675_Angstrom_Exponent[Polar]],Data_Quality_Level,AERONET_Instrument_Number,AERONET_Site_Name,Site_Latitude(Degrees),Site_Longitude(Degrees),Site_Elevation(m) | ||
Cuiaba,16:06:1993,12:00:00,167,-999.,0.081800,0.088421,-999.,-999.,0.095266,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.117581,-999.,-999.,-999.,0.149887,2.487799,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.424234,-999.,0.497630,-999.,0.924333,-999.,0,3,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,6,0,0,0,0,0,0,0,3,0,3,0,3,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000 | ||
Cuiaba,17:06:1993,12:00:00,168,-999.,0.092246,0.099877,-999.,-999.,0.110915,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.144628,-999.,-999.,-999.,0.187276,2.592902,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.547807,-999.,0.628609,-999.,0.988320,-999.,0,16,16,0,0,16,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,16,32,0,0,0,0,0,0,0,16,0,16,0,16,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000 | ||
""" | ||
""" | ||
self._filename = filename | ||
self._stations = {} | ||
|
@@ -92,112 +51,11 @@ def __init__( | |
self._header = [] | ||
_laststatstr = "" | ||
self._revision = datetime.datetime.min | ||
# check if file is a URL | ||
if self.is_valid_url(self._filename): | ||
# try to open as zipfile | ||
try: | ||
r = requests.get(self._filename) | ||
zip_ref = ZipFile(BytesIO(r.content)) | ||
for file in zip_ref.namelist(): | ||
with zip_ref.open(file) as response: | ||
lines = [line.decode("utf-8") for line in response.readlines()] | ||
# read only 1st file here | ||
break | ||
except BadZipFile: | ||
response = urlopen(self._filename) | ||
lines = [line.decode("utf-8") for line in response.readlines()] | ||
|
||
else: | ||
with open(self._filename, newline="") as csvfile: | ||
lines = csvfile.readlines() | ||
|
||
for _hidx in range(HEADER_LINE_NO - 1): | ||
self._header.append(lines.pop(0)) | ||
# get fields from header line although csv can do that as well since we might want to adjust these names | ||
self._fields = lines.pop(0).strip().split(",") | ||
|
||
gcd = Geocoder_Reverse_NE() | ||
crd = csv.DictReader(lines, fieldnames=self._fields, delimiter=DELIMITER) | ||
bar = tqdm(desc=tqdm_desc, total=len(lines)) | ||
for _ridx, row in enumerate(crd): | ||
bar.update(1) | ||
if row[SITE_NAME] != _laststatstr: | ||
_laststatstr = row[SITE_NAME] | ||
# new station | ||
station = row[SITE_NAME] | ||
lon = float(row[LON_NAME]) | ||
lat = float(row[LAT_NAME]) | ||
alt = float(row["Site_Elevation(m)"]) | ||
if fill_country_flag: | ||
try: | ||
country = gcd.lookup(lat, lon)["ISO_A2_EH"] | ||
except Geocoder_Reverse_Exception: | ||
country = "NN" | ||
else: | ||
country = "NN" | ||
|
||
# units of Aeronet data are always 1 | ||
units = "1" | ||
if not station in self._stations: | ||
self._stations[station] = Station( | ||
{ | ||
"station": station, | ||
"longitude": lon, | ||
"latitude": lat, | ||
"altitude": alt, | ||
"country": country, | ||
"url": "", | ||
"long_name": station, | ||
} | ||
) | ||
# every line contains all variables, sometimes filled with NaNs though | ||
if _ridx == 0: | ||
for variable in DATA_VARS: | ||
if variable in self._data: | ||
da = self._data[variable] | ||
if da.units != units: | ||
raise Exception( | ||
f"unit change from '{da.units}' to 'units'" | ||
) | ||
else: | ||
da = NpStructuredData(variable, units) | ||
self._data[variable] = da | ||
# read config file | ||
self._def_data = self._read_definitions(file=DEFINITION_FILE) | ||
|
||
day, month, year = row[DATE_NAME].split(":") | ||
datestring = "-".join([year, month, day]) | ||
datestring = "T".join([datestring, row[TIME_NAME]]) | ||
self._revision = max( | ||
[ | ||
self._revision, | ||
datetime.datetime.strptime(datestring, "%Y-%m-%dT%H:%M:%S"), | ||
] | ||
) | ||
time_dummy = np.datetime64(datestring) | ||
start = time_dummy - TS_TYPE_DIFFS[ts_type] | ||
end = time_dummy + TS_TYPE_DIFFS[ts_type] | ||
|
||
ts_dummy_data = {} | ||
for variable in DATA_VARS: | ||
try: | ||
value = float(row[variable]) | ||
if value == NAN_VAL: | ||
value = np.nan | ||
# store value in ts_dummy_data, so we don't need to perform the nan check | ||
# for each component of calculated values again | ||
ts_dummy_data[variable] = value | ||
except KeyError: | ||
# computed variable | ||
if variable == AOD550_NAME: | ||
value = self.compute_od_from_angstromexp( | ||
0.55, | ||
ts_dummy_data[AOD440_NAME], | ||
0.44, | ||
ts_dummy_data[ANG4487_NAME], | ||
) | ||
self._data[variable].append( | ||
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan | ||
) | ||
bar.close() | ||
# bar = tqdm(desc=tqdm_desc, total=len(lines)) | ||
# bar.close() | ||
|
||
def metadata(self): | ||
return dict(revision=datetime.datetime.strftime(self._revision, "%y%m%d%H%M%S")) | ||
|
@@ -214,46 +72,12 @@ def _unfiltered_variables(self) -> list[str]: | |
def close(self): | ||
pass | ||
|
||
def compute_od_from_angstromexp( | ||
self, to_lambda: float, od_ref: float, lambda_ref: float, angstrom_coeff: float | ||
) -> float: | ||
"""Compute AOD at specified wavelength | ||
Uses Angstrom coefficient and reference AOD to compute the | ||
corresponding wavelength shifted AOD | ||
Parameters | ||
---------- | ||
to_lambda : :obj:`float` or :obj:`ndarray` | ||
wavelength for which AOD is calculated | ||
od_ref : :obj:`float` or :obj:`ndarray` | ||
reference AOD | ||
lambda_ref : :obj:`float` or :obj:`ndarray` | ||
wavelength corresponding to reference AOD | ||
angstrom_coeff : :obj:`float` or :obj:`ndarray` | ||
Angstrom coefficient | ||
Returns | ||
------- | ||
:obj:`float` or :obj:`ndarray` | ||
AOD(s) at shifted wavelength | ||
""" | ||
return od_ref * (lambda_ref / to_lambda) ** angstrom_coeff | ||
|
||
def calc_angstroem_coeff( | ||
self, od1: float, od2: float, wl1: float, wl2: float | ||
) -> float: | ||
""" | ||
small helper method to calculate angstroem coefficient | ||
:param od1: | ||
:param od2: | ||
:param wl1: | ||
:param wl2: | ||
:return: | ||
""" | ||
return -np.log(od1 / od2) / np.log(wl1 / wl2) | ||
def _read_definitions(self, file=DEFINITION_FILE): | ||
# definitions file for a connection between aerocom names, ACTRIS vocabulary and EBAS vocabulary | ||
# The EBAS part will hopefully not be necessary in the next EBAS version anymore | ||
with open(file, 'rb') as fh: | ||
tmp = tomllib.load(fh) | ||
return tmp | ||
|
||
def is_valid_url(self, url): | ||
try: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# connection between pyaerocom and ACTRIS-EBAS variables | ||
[variables] | ||
#prototype | ||
# the order in the list below is preserved and determines the order the data is read | ||
#[variables.<var name>] | ||
#actris_variable = [<list of actris variables>] | ||
#actris_matrix = [<list of actris matrixes>] | ||
#ebas_component = [<list of ebas component names>] | ||
#ebas_matrix = [<lust of ebas matrix names>] | ||
|
||
[variables.vmro3] | ||
actris_variable = ["ozone mass concentration", "ozone amount fraction"] | ||
actris_matrix = ["gas phase"] | ||
ebas_component = ["ozone"] | ||
ebas_matrix = ["air"] | ||
|
||
[variables.concso4] | ||
actris_variable = ["aerosol particle sulphate mass concentration"] | ||
actris_matrix = ["aerosol particle phase", "PM10", "PM2.5"] | ||
ebas_component = ["sulphate_corrected", "sulphate_total"] | ||
ebas_matrix = ["aerosol", "pm10", "pm25"] |
This file was deleted.
Oops, something went wrong.