Skip to content

Commit

Permalink
1st working reader
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Jurgen Griesfeller committed May 23, 2024
1 parent e85480a commit 8eaf8ac
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 38 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = pyaro_readers
version = 0.0.8dev
version = 0.0.8dev1
author = MET Norway
description = implementations of pyaerocom reading plugings using pyaro as interface
long_description = file: README.md
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,36 +21,40 @@

# default URL
# BASE_URL = "https://secondary-data-archive.nilu.no/ebas/gen.h8ds-8596/EIMPs_winter2017-2018_data.zip"
BASE_URL = "/lustre/storeB/project/fou/kl/emep/People/danielh/projects/pyaerocom/obs/nilu_pmf/cameo_2024/EIMPs_winter2017-2018_data/"
ABSORB_FOLDER = "EIMPs_winter_2017_2018_absorption/"
LEVO_FOLDER = "EIMPs_winter_2017_2018_ECOC_Levo/"
METADATA_FILE = "Sites_EBC-campaign.xlsx"
# BASE_URL = "/lustre/storeB/project/fou/kl/emep/People/danielh/projects/pyaerocom/obs/nilu_pmf/cameo_2024/EIMPs_winter2017-2018_data/"
# ABSORB_FOLDER = "EIMPs_winter_2017_2018_absorption/"
# LEVO_FOLDER = "EIMPs_winter_2017_2018_ECOC_Levo/"
# METADATA_FILE = "Sites_EBC-campaign.xlsx"
# number of lines to read before the reading is handed to Pythobn's csv reader
HEADER_LINE_NO = 7
DELIMITER = ","
#
NAN_VAL = -999.0
# NAN_VAL = -999.0
# update progress bar every N lines...
PG_UPDATE_LINES = 100
# PG_UPDATE_LINES = 100
# main variables to store
LAT_NAME = "Station latitude"
LON_NAME = "Station longitude"
ALT_NAME = "Station altitude"
STAT_CODE = "Station code"
STAT_NAME = "Station name"
DATE_NAME = "Date(dd:mm:yyyy)"
TIME_NAME: str = "Time(hh:mm:ss)"
# LAT_NAME = "Station latitude"
# LON_NAME = "Station longitude"
# ALT_NAME = "Station altitude"
# STAT_CODE = "Station code"
# STAT_NAME = "Station name"
# DATE_NAME = "Date(dd:mm:yyyy)"
# TIME_NAME: str = "Time(hh:mm:ss)"

BABAS_BB_NAME = "Babs_bb"
BABAS_FF_NAME = "Babs_ff"
EBC_BB_NAME = "eBC_bb"
EBC_FF_NAME = "eBC_ff"


# in principle this has to be read from the file since it's allowed to vary over the
# different variables. But since these NASA-AMES files are not compatible with
# EBAS NASA-AMES files we stick to this for now
NAN_CODE = 999.9999
NAN_EPS = 1e-2


# in principle WRONG since line indices are not absolute in NASA-AMES files
# But since these NASA-AMES files are not compatible with EBAS NASA-AMES files
# we stick to this for now
INDECIES = dict(
PI=1,
DATES=6,
Expand All @@ -60,20 +64,20 @@
EBC_BB_UNIT=15,
EBC_FF_UNIT=16,
START=17,
CODE=18,
NAME=19,
LAT=20,
LON=21,
ALT=22,
NAME=18,
LAT=19,
LON=20,
ALT=21,
)

FILE_MASK = "*.nas"

DATA_VARS = [BABAS_BB_NAME, BABAS_FF_NAME, EBC_BB_NAME, EBC_FF_NAME]
COMPUTED_VARS = []
# The computed variables have to be named after the read ones, otherwise the calculation will fail!
DATA_VARS.extend(COMPUTED_VARS)

FILL_COUNTRY_FLAG = False
FILL_COUNTRY_FLAG = True

TS_TYPE_DIFFS = {
"daily": np.timedelta64(12, "h"),
Expand All @@ -90,6 +94,7 @@ def __init__(
filters=[],
fill_country_flag: bool = FILL_COUNTRY_FLAG,
tqdm_desc: [str, None] = None,
file_mask: str = FILE_MASK,
ts_type: str = "hourly",
):
self._stations = {}
Expand All @@ -99,11 +104,10 @@ def __init__(

if Path(filename).is_file():
self._filename = filename
self._process_file(self._filename)
self._process_file(self._filename, fill_country_flag)

elif Path(filename).is_dir():
self._filename = filename + ABSORB_FOLDER
files_pathlib = Path(self._filename).glob("*.nas")
files_pathlib = Path(filename).glob(file_mask)
files = [x for x in files_pathlib if x.is_file()]

if len(files) == 0:
Expand All @@ -113,19 +117,20 @@ def __init__(
bar = tqdm(desc=tqdm_desc, total=len(files))
for file in files:
bar.update(1)
self._process_file(file)
self._process_file(file, fill_country_flag)
else:
raise ValueError(f"Given filename {filename} is neither a folder or a file")

def _process_file(self, file: Path):
def _process_file(self, file: Path, fill_country_flag: bool = FILL_COUNTRY_FLAG):
with open(file, newline="") as f:
lines = f.readlines()
self._process_open_file(lines, file)
self._process_open_file(lines, file, fill_country_flag)

def _process_open_file(self, lines: list[str], file: Path) -> None:
def _process_open_file(
self, lines: list[str], file: Path, fill_country_flag: bool = FILL_COUNTRY_FLAG
) -> None:
line_index = 0
data_start_line = int(lines[line_index].split()[0])
station = lines[INDECIES["CODE"]].split(":")[1].strip()
data_start_line = int(lines[line_index].replace(",", "").split()[0])
long_name = lines[INDECIES["NAME"]].split(":")[1].strip()

station = long_name
Expand All @@ -136,15 +141,18 @@ def _process_open_file(self, lines: list[str], file: Path) -> None:
lon = float(lines[INDECIES["LON"]].split(":")[1].strip())
lat = float(lines[INDECIES["LAT"]].split(":")[1].strip())
alt = float(lines[INDECIES["ALT"]].split(":")[1].strip()[:-1])
print(station)
country = "NN"
if not station in self._stations:
if fill_country_flag:
country = self._lookup_function()(lat, lon)

self._stations[station] = Station(
{
"station": station,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": self._lookup_function()(lat, lon),
"country": country,
"url": str(file),
"long_name": station,
}
Expand Down Expand Up @@ -235,8 +243,3 @@ def description(self):

def url(self):
return "https://github.com/metno/pyaro-readers"


if __name__ == "__main__":
file_name = "/lustre/storeB/project/fou/kl/emep/People/danielh/projects/pyaerocom/obs/nilu_pmf/cameo_2024/EIMPs_winter2017-2018_data/"
reader = NILUPMFAbsorptionReader(filename=file_name)
42 changes: 42 additions & 0 deletions tests/test_NILUPMFAbsorptionReader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest
import os

import pyaro
import pyaro.timeseries


class TestPMFEBASTimeSeriesReader(unittest.TestCase):
engine = "nilupmfabsorption"

file = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"testdata",
"PMF_Absorption",
"Zeppelin_absorption_20171201_3mo_PMF_lev3.nas",
)

test_vars = ["Babs_bb", "Babs_ff", "eBC_bb", "eBC_ff"]

testdata_dir = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "testdata", "PMF_Absorption"
)

def test_0engine(self):
self.assertIn(self.engine, pyaro.list_timeseries_engines())

def test_1open_single_file(self):
with pyaro.open_timeseries(self.engine, self.file, filters=[]) as ts:
self.assertGreaterEqual(len(ts.variables()), 1)
for var in ts.variables():
assert var in self.test_vars
self.assertEqual(len(ts.stations()), 1)

def test_2open_directory(self):
with pyaro.open_timeseries(self.engine, self.testdata_dir, filters=[]) as ts:
self.assertGreaterEqual(len(ts.variables()), 1)
for var in ts.variables():
assert var in self.test_vars


if __name__ == "__main__":
unittest.main()

0 comments on commit 8eaf8ac

Please sign in to comment.