Skip to content

Commit

Permalink
WIP: start simple
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Griesfeller committed Sep 3, 2024
1 parent 3f28569 commit 37a7970
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 242 deletions.
232 changes: 28 additions & 204 deletions src/pyaro_readers/actrisebas/ActrisEbasReader.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import csv
import logging
import tomllib
from io import BytesIO
from urllib.parse import urlparse
from urllib.request import urlopen
from zipfile import BadZipFile, ZipFile

from geocoder_reverse_natural_earth import (
Geocoder_Reverse_NE,
Geocoder_Reverse_Exception,
)
import numpy as np
import requests
import json
from pyaro.timeseries import (
AutoFilterReaderEngine,
Data,
Expand All @@ -19,71 +16,33 @@
)
from tqdm import tqdm
import datetime
import xarray as xr
import os

# default URL
BASE_URL = "https://aeronet.gsfc.nasa.gov/data_push/V3/All_Sites_Times_Daily_Averages_AOD20.zip"
# number of lines to read before the reading is handed to Pythobn's csv reader
HEADER_LINE_NO = 7
DELIMITER = ","
#
NAN_VAL = -999.0
# update progress bar every N lines...
PG_UPDATE_LINES = 100
# main variables to store
LAT_NAME = "Site_Latitude(Degrees)"
LON_NAME = "Site_Longitude(Degrees)"
ALT_NAME = "Site_Elevation(m)"
SITE_NAME = "AERONET_Site_Name"
DATE_NAME = "Date(dd:mm:yyyy)"
TIME_NAME: str = "Time(hh:mm:ss)"
AOD500_NAME = "AOD_500nm"
ANG4487_NAME = "440-870_Angstrom_Exponent"
AOD440_NAME = "AOD_440nm"
AOD870_NAME = "AOD_870nm"
AOD550_NAME = "AOD_550nm"

DATA_VARS = [AOD500_NAME, ANG4487_NAME, AOD440_NAME, AOD870_NAME]
COMPUTED_VARS = [AOD550_NAME]
# The computed variables have to be named after the read ones, otherwise the calculation will fail!
DATA_VARS.extend(COMPUTED_VARS)

FILL_COUNTRY_FLAG = False
logger = logging.getLogger(__name__)

TS_TYPE_DIFFS = {
"daily": np.timedelta64(12, "h"),
"instantaneous": np.timedelta64(0, "s"),
"points": np.timedelta64(0, "s"),
"monthly": np.timedelta64(15, "D"),
}
# default API URL base
# BASE_API_URL = "https://prod-actris-md.nilu.no/Vocabulary/categories"
BASE_API_URL = "https://prod-actris-md.nilu.no/"
# base URL to query for data for a certain variable
VAR_QUERY_URL = f"{BASE_API_URL}/content/"
# basename of definitions.toml which connects the pyaerocom variable names with the ACTRIS variable names
DEFINITION_FILE_BASENAME = "definitions.toml"

DEFINITION_FILE = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
DEFINITION_FILE_BASENAME)

class ActrisEbasTimeSeriesReader(AutoFilterReaderEngine.AutoFilterReader):
def __init__(
self,
filename,
filters=[],
fill_country_flag: bool = FILL_COUNTRY_FLAG,
var_name="ozone mass concentration",
tqdm_desc: str | None = None,
ts_type: str = "daily",
):
"""open a new Aeronet timeseries-reader
:param filename: str
:param filters:
:param fill_country_flag:
:param tqdm_desc:
:param filename_or_obj_or_url: path-like object to csv-file
input file looks like this (daily file; times noted are middle times):
AERONET Version 3;
Cuiaba
Version 3: AOD Level 2.0
The following data are automatically cloud cleared and quality assured with pre-field and post-field calibration applied.
Contact: PI=Pawan Gupta and Elena Lind; PI [email protected] and [email protected]
Daily Averages,UNITS can be found at,,, https://aeronet.gsfc.nasa.gov/new_web/units.html
AERONET_Site,Date(dd:mm:yyyy),Time(hh:mm:ss),Day_of_Year,AOD_1640nm,AOD_1020nm,AOD_870nm,AOD_865nm,AOD_779nm,AOD_675nm,AOD_667nm,AOD_620nm,AOD_560nm,AOD_555nm,AOD_551nm,AOD_532nm,AOD_531nm,AOD_510nm,AOD_500nm,AOD_490nm,AOD_443nm,AOD_440nm,AOD_412nm,AOD_400nm,AOD_380nm,AOD_340nm,Precipitable_Water(cm),AOD_681nm,AOD_709nm,AOD_Empty,AOD_Empty,AOD_Empty,AOD_Empty,AOD_Empty,440-870_Angstrom_Exponent,380-500_Angstrom_Exponent,440-675_Angstrom_Exponent,500-870_Angstrom_Exponent,340-440_Angstrom_Exponent,440-675_Angstrom_Exponent[Polar],N[AOD_1640nm],N[AOD_1020nm],N[AOD_870nm],N[AOD_865nm],N[AOD_779nm],N[AOD_675nm],N[AOD_667nm],N[AOD_620nm],N[AOD_560nm],N[AOD_555nm],N[AOD_551nm],N[AOD_532nm],N[AOD_531nm],N[AOD_510nm],N[AOD_500nm],N[AOD_490nm],N[AOD_443nm],N[AOD_440nm],N[AOD_412nm],N[AOD_400nm],N[AOD_380nm],N[AOD_340nm],N[Precipitable_Water(cm)],N[AOD_681nm],N[AOD_709nm],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[AOD_Empty],N[440-870_Angstrom_Exponent],N[380-500_Angstrom_Exponent],N[440-675_Angstrom_Exponent],N[500-870_Angstrom_Exponent],N[340-440_Angstrom_Exponent],N[440-675_Angstrom_Exponent[Polar]],Data_Quality_Level,AERONET_Instrument_Number,AERONET_Site_Name,Site_Latitude(Degrees),Site_Longitude(Degrees),Site_Elevation(m)
Cuiaba,16:06:1993,12:00:00,167,-999.,0.081800,0.088421,-999.,-999.,0.095266,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.117581,-999.,-999.,-999.,0.149887,2.487799,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.424234,-999.,0.497630,-999.,0.924333,-999.,0,3,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,6,0,0,0,0,0,0,0,3,0,3,0,3,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
Cuiaba,17:06:1993,12:00:00,168,-999.,0.092246,0.099877,-999.,-999.,0.110915,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.144628,-999.,-999.,-999.,0.187276,2.592902,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0.547807,-999.,0.628609,-999.,0.988320,-999.,0,16,16,0,0,16,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,16,32,0,0,0,0,0,0,0,16,0,16,0,16,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
"""
"""
self._filename = filename
self._stations = {}
Expand All @@ -92,112 +51,11 @@ def __init__(
self._header = []
_laststatstr = ""
self._revision = datetime.datetime.min
# check if file is a URL
if self.is_valid_url(self._filename):
# try to open as zipfile
try:
r = requests.get(self._filename)
zip_ref = ZipFile(BytesIO(r.content))
for file in zip_ref.namelist():
with zip_ref.open(file) as response:
lines = [line.decode("utf-8") for line in response.readlines()]
# read only 1st file here
break
except BadZipFile:
response = urlopen(self._filename)
lines = [line.decode("utf-8") for line in response.readlines()]

else:
with open(self._filename, newline="") as csvfile:
lines = csvfile.readlines()

for _hidx in range(HEADER_LINE_NO - 1):
self._header.append(lines.pop(0))
# get fields from header line although csv can do that as well since we might want to adjust these names
self._fields = lines.pop(0).strip().split(",")

gcd = Geocoder_Reverse_NE()
crd = csv.DictReader(lines, fieldnames=self._fields, delimiter=DELIMITER)
bar = tqdm(desc=tqdm_desc, total=len(lines))
for _ridx, row in enumerate(crd):
bar.update(1)
if row[SITE_NAME] != _laststatstr:
_laststatstr = row[SITE_NAME]
# new station
station = row[SITE_NAME]
lon = float(row[LON_NAME])
lat = float(row[LAT_NAME])
alt = float(row["Site_Elevation(m)"])
if fill_country_flag:
try:
country = gcd.lookup(lat, lon)["ISO_A2_EH"]
except Geocoder_Reverse_Exception:
country = "NN"
else:
country = "NN"

# units of Aeronet data are always 1
units = "1"
if not station in self._stations:
self._stations[station] = Station(
{
"station": station,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": country,
"url": "",
"long_name": station,
}
)
# every line contains all variables, sometimes filled with NaNs though
if _ridx == 0:
for variable in DATA_VARS:
if variable in self._data:
da = self._data[variable]
if da.units != units:
raise Exception(
f"unit change from '{da.units}' to 'units'"
)
else:
da = NpStructuredData(variable, units)
self._data[variable] = da
# read config file
self._def_data = self._read_definitions(file=DEFINITION_FILE)

day, month, year = row[DATE_NAME].split(":")
datestring = "-".join([year, month, day])
datestring = "T".join([datestring, row[TIME_NAME]])
self._revision = max(
[
self._revision,
datetime.datetime.strptime(datestring, "%Y-%m-%dT%H:%M:%S"),
]
)
time_dummy = np.datetime64(datestring)
start = time_dummy - TS_TYPE_DIFFS[ts_type]
end = time_dummy + TS_TYPE_DIFFS[ts_type]

ts_dummy_data = {}
for variable in DATA_VARS:
try:
value = float(row[variable])
if value == NAN_VAL:
value = np.nan
# store value in ts_dummy_data, so we don't need to perform the nan check
# for each component of calculated values again
ts_dummy_data[variable] = value
except KeyError:
# computed variable
if variable == AOD550_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD440_NAME],
0.44,
ts_dummy_data[ANG4487_NAME],
)
self._data[variable].append(
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan
)
bar.close()
# bar = tqdm(desc=tqdm_desc, total=len(lines))
# bar.close()

def metadata(self):
return dict(revision=datetime.datetime.strftime(self._revision, "%y%m%d%H%M%S"))
Expand All @@ -214,46 +72,12 @@ def _unfiltered_variables(self) -> list[str]:
def close(self):
pass

def compute_od_from_angstromexp(
self, to_lambda: float, od_ref: float, lambda_ref: float, angstrom_coeff: float
) -> float:
"""Compute AOD at specified wavelength
Uses Angstrom coefficient and reference AOD to compute the
corresponding wavelength shifted AOD
Parameters
----------
to_lambda : :obj:`float` or :obj:`ndarray`
wavelength for which AOD is calculated
od_ref : :obj:`float` or :obj:`ndarray`
reference AOD
lambda_ref : :obj:`float` or :obj:`ndarray`
wavelength corresponding to reference AOD
angstrom_coeff : :obj:`float` or :obj:`ndarray`
Angstrom coefficient
Returns
-------
:obj:`float` or :obj:`ndarray`
AOD(s) at shifted wavelength
"""
return od_ref * (lambda_ref / to_lambda) ** angstrom_coeff

def calc_angstroem_coeff(
self, od1: float, od2: float, wl1: float, wl2: float
) -> float:
"""
small helper method to calculate angstroem coefficient
:param od1:
:param od2:
:param wl1:
:param wl2:
:return:
"""
return -np.log(od1 / od2) / np.log(wl1 / wl2)
def _read_definitions(self, file=DEFINITION_FILE):
# definitions file for a connection between aerocom names, ACTRIS vocabulary and EBAS vocabulary
# The EBAS part will hopefully not be necessary in the next EBAS version anymore
with open(file, 'rb') as fh:
tmp = tomllib.load(fh)
return tmp

def is_valid_url(self, url):
try:
Expand Down
21 changes: 21 additions & 0 deletions src/pyaro_readers/actrisebas/definitions.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# connection between pyaerocom and ACTRIS-EBAS variables
[variables]
#prototype
# the order in the list below is preserved and determines the order the data is read
#[variables.<var name>]
#actris_variable = [<list of actris variables>]
#actris_matrix = [<list of actris matrixes>]
#ebas_component = [<list of ebas component names>]
#ebas_matrix = [<lust of ebas matrix names>]

[variables.vmro3]
actris_variable = ["ozone mass concentration", "ozone amount fraction"]
actris_matrix = ["gas phase"]
ebas_component = ["ozone"]
ebas_matrix = ["air"]

[variables.concso4]
actris_variable = ["aerosol particle sulphate mass concentration"]
actris_matrix = ["aerosol particle phase", "PM10", "PM2.5"]
ebas_component = ["sulphate_corrected", "sulphate_total"]
ebas_matrix = ["aerosol", "pm10", "pm25"]
38 changes: 0 additions & 38 deletions src/pyaro_readers/actrisebas/definitions.yaml

This file was deleted.

0 comments on commit 37a7970

Please sign in to comment.