From 9904d0f1f62e4288fdcd1420552e210a9b303b37 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 4 Jul 2024 18:20:38 +1000 Subject: [PATCH 01/10] now varlist and template are 1 step see #150 --- src/mopdb/mopdb.py | 125 ++++++++++++++++++++++++++------------- src/mopdb/mopdb_utils.py | 54 +++++++++++++---- 2 files changed, 129 insertions(+), 50 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 892b4cb..b0935ff 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -43,18 +43,47 @@ def mopdb_catch(): sys.exit(1) +def require_date(ctx, param, value): + """Changes startdate option in template command from optional to + required if fpath is a directory. + """ + if Path(value).is_dir(): + ctx.command.params[1].required = True + return value + + def db_args(f): - """Define database click arguments + """Define database click options """ constraints = [ click.option('--fname', '-f', type=str, required=True, - help='Input file: used to update db table (mapping/cmor),' + - 'or to pass output model variables (list)'), + help='Input file: used to update db table (mapping/cmor)'), click.option('--dbname', type=str, required=False, default='default', help='Database relative path by default is package access.db'), - click.option('--alias', '-a', type=str, required=False, default=None, - help='Table alias to use when updating cmor var table or creating map template with list' + - ' to keep track of variable definition origin. If none passed uses input filename')] + click.option('--alias', '-a', type=str, required=False, default='', + help='Table alias to track definitions origin in cmorvar table.')] + for c in reversed(constraints): + f = c(f) + return f + + +def map_args(f): + """Define mapping click options for varlist and template commands""" + constraints = [ + click.option('--fpath', '-f', type=str, required=True, + callback=require_date, + help='Model output directory or varlist for the same'), + click.option('--startdate', '-d', type=str, required=False, + help='Start date of model run as YYYYMMDD'), + click.option('--version', '-v', required=True, + type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), + show_default=True, + help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2'), + click.option('--dbname', type=str, required=False, default='default', + help='Database relative path by default is package access.db'), + click.option('--alias', '-a', type=str, required=False, default='', + help='''Alias to use to keep track of variable definition origin. + If none passed uses input filename''')] for c in reversed(constraints): f = c(f) return f @@ -118,8 +147,10 @@ def check_cmor(ctx, dbname): @mopdb.command(name='table') @db_args @click.option('--label', '-l', required=False, default='CMIP6', - type=click.Choice(['CMIP6', 'AUS2200', 'CM2']), show_default=True, - help='Label indicating origin of CMOR variable definitions. Currently only CMIP6, AUS2200 and CM2') + type=click.Choice(['CMIP6', 'AUS2200', 'CM2', 'OM2']), + show_default=True, + help='''Label indicating origin of CMOR variable definitions. + Currently only CMIP6, AUS2200, CM2 and OM2''') @click.pass_context def cmor_table(ctx, dbname, fname, alias, label): """Create CMIP style table containing new variable definitions @@ -136,7 +167,7 @@ def cmor_table(ctx, dbname, fname, alias, label): fname : str Mapping file??? alias : str - not used here + ??? it is used so what's ahppenw hen not passed? label : str Label indicating preferred cmor variable definitions """ @@ -184,7 +215,7 @@ def cmor_table(ctx, dbname, fname, alias, label): if len(v[4].split()) != len(record[9].split()): mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}") var_list.append(definition) - write_cmor_table(var_list, alias, mopdb_log) + write_cmor_table(var_list, alias) conn.close() return @@ -206,15 +237,15 @@ def update_cmor(ctx, dbname, fname, alias): fname : str Name of json input file with records to add alias : str - Indicates origin of records to add, if None json filename - base is used instead + Indicates origin of records to add, if '' (default) json + filename base is used instead Returns ------- """ mopdb_log = logging.getLogger('mopdb_log') - if alias is None: + if alias == '': alias = fname.split("/")[-1] alias = alias.replace('.json', '') mopdb_log.info(f"Adding {alias} to variable name to track origin") @@ -260,35 +291,46 @@ def update_cmor(ctx, dbname, fname, alias): @mopdb.command(name='template') -@db_args -@click.option('--version', '-v', required=True, - type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, - help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2') +@map_args @click.pass_context -def map_template(ctx, dbname, fname, alias, version): +def map_template(ctx, fpath, startdate, dbname, version, alias): """Writes a template of mapping file needed to run setup. First opens database and check if variables match any in mapping table. If not tries to partially match them. + It can get as input the directory containing the output in + which case it will first call model_vars() (varlist command) + or the file output of the same if already available. + Parameters ---------- ctx : obj Click context object + fpath : str + Path of csv input file with output variables to map or + of directory containing output files to scan + startdate : str + Date or other string to match to individuate one file per type dbname : str Database relative path (default is data/access.db) - fname : str - Name of csv input file with output variables to map - alias : str - Indicates origin of records to add, if None csv filename - base is used instead version : str Version of ACCESS model used to generate variables + alias : str + Indicates origin of records to add, if '' csv filename + base is used instead Returns ------- """ mopdb_log = logging.getLogger('mopdb_log') - if alias is None: + # work out if fpath is varlist or path to output + fpath = Path(fpath) + if fpath.is_file(): + fname = fpath.name + else: + mopdb_log.debug(f"Calling model_vars() from template: {fpath}") + fname = model_vars(fpath, startdate, dbname, version, alias) + if alias == '': alias = fname.split(".")[0] # connect to db, check first if db exists or exit if dbname == 'default': @@ -298,6 +340,7 @@ def map_template(ctx, dbname, fname, alias, version): with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') rows = list(reader) + check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, @@ -340,7 +383,7 @@ def update_map(ctx, dbname, fname, alias): fname : str Name of csv input file with mapping records alias : str - Indicates origin of records to add, if None csv filename + Indicates origin of records to add, if '' csv filename base is used instead Returns @@ -373,26 +416,23 @@ def update_map(ctx, dbname, fname, alias): @mopdb.command(name='varlist') -@click.option('--indir', '-i', type=str, required=True, - help='Converted model output directory') -@click.option('--startdate', '-d', type=str, required=True, - help='Start date of model run as YYYYMMDD') -@click.option('--dbname', type=str, required=False, default='default', - help='Database relative path by default is package access.db') -@click.option('--version', '-v', required=False, default='CM2', - type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, - help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2') +@map_args @click.pass_context -def model_vars(ctx, indir, startdate, dbname, version): +def list_vars(ctx, fpath, startdate, dbname, version, alias): + """Calls model_vars to generate list of variables""" + fname = model_vars(fpath, startdate, dbname, version, alias) + + +@click.pass_context +def model_vars(ctx, fpath, startdate, dbname, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file - alias is not used so far Parameters ---------- ctx : obj Click context object - indir : str + fpath : str Path for model output files startdate : str Date or other string to match to individuate one file per type @@ -400,18 +440,24 @@ def model_vars(ctx, indir, startdate, dbname, version): Database relative path (default is data/access.db) version : str Version of ACCESS model to use as preferred mapping + alias : str + Used for output filename: 'varlist_'. If '', + 'varlist_mopdb' is used instead Returns ------- + fname : str + Name of output varlist file """ + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('data').joinpath('access.db') conn = db_connect(dbname) - write_varlist(conn, indir, startdate, version) + fname = write_varlist(conn, fpath, startdate, version, alias) conn.close() - return + return fname @mopdb.command(name='del') @@ -456,4 +502,3 @@ def remove_record(ctx, dbname, table, pair): # select, confirm, delete record/s delete_record(conn, table, col, pair) return - diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index d4de94f..b9e0c4d 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -523,7 +523,7 @@ def get_cell_methods(attrs, dims): return val, frqmod -def write_varlist(conn, indir, startdate, version): +def write_varlist(conn, indir, startdate, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided for each variable @@ -533,6 +533,14 @@ def write_varlist(conn, indir, startdate, version): files = list_files(indir, sdate) mopdb_log.debug(f"Found files: {files}") patterns = [] + if alias == '': + alias = 'mopdb' + fname = f"varlist_{alias}.csv" + fcsv = open(fname, 'w') + fwriter = csv.writer(fcsv, delimiter=';') + fwriter.writerow(["name", "cmor_var", "units", "dimensions", + "frequency", "realm", "cell_methods", "cmor_table", "vtype", + "size", "nsteps", "filename", "long_name", "standard_name"]) for fpath in files: # get filename pattern until date match mopdb_log.debug(f"Filename: {fpath.name}") @@ -545,12 +553,7 @@ def write_varlist(conn, indir, startdate, version): pattern_list = list_files(indir, f"{fpattern}*") nfiles = len(pattern_list) mopdb_log.debug(f"File pattern: {fpattern}") - fcsv = open(f"{fpattern}.csv", 'w') - fwriter = csv.writer(fcsv, delimiter=';') - fwriter.writerow(["name", "cmor_var", "units", "dimensions", - "frequency", "realm", "cell_methods", "cmor_table", - "vtype", "size", "nsteps", "filename", "long_name", - "standard_name"]) + fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables realm = get_realm(fpath, version) ds = xr.open_dataset(fpath, decode_times=False) @@ -587,9 +590,9 @@ def write_varlist(conn, indir, startdate, version): nsteps, fpattern, attrs.get('long_name', ""), attrs.get('standard_name', "")] fwriter.writerow(line) - fcsv.close() mopdb_log.info(f"Variable list for {fpattern} successfully written") - return + fcsv.close() + return fname def read_map_app4(fname): @@ -644,7 +647,7 @@ def read_map(fname, alias): notes = row[16] else: notes = row[15] - if alias is None: + if alias is '': alias = fname.replace(".csv","") var_list.append(row[:11] + [notes, alias]) return var_list @@ -883,6 +886,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename, long_name, standard_name """ + mopdb_log = logging.getLogger('mopdb_log') keys = ['cmor_var', 'input_vars', 'calculation', 'units', 'dimensions', 'frequency', 'realm', 'cell_methods', @@ -919,6 +923,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): """ """ + mopdb_log = logging.getLogger('mopdb_log') if len(vlist) > 0: if type(div) is str: @@ -938,6 +943,7 @@ def check_realm_units(conn, var): """Checks that realm and units are consistent with values in cmor table. """ + mopdb_log = logging.getLogger('mopdb_log') vname = f"{var['cmor_var']}-{var['cmor_table']}" if var['cmor_table'] is None or var['cmor_table'] == "": @@ -965,6 +971,7 @@ def check_realm_units(conn, var): def get_realm(fpath, version): '''Return realm for variable in files or NArealm''' + mopdb_log = logging.getLogger('mopdb_log') if version == 'AUS2200': realm = 'atmos' @@ -980,3 +987,30 @@ def get_realm(fpath, version): mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm") mopdb_log.debug(f"Realm is {realm}") return realm + + +def check_varlist(rows, fname): + """Checks that varlist written to file has sensible information for frequency and realm + to avoid incorrect mapping to be produced. + + At the moment we're checking only frequency and realm as they can be missed or wrong + depending on the file structure. + + Parameters + ---------- + rows : list(dict) + list of variables to match + """ + + mopdb_log = logging.getLogger('mopdb_log') + frq_list = ['min', 'hr', 'day', 'mon', 'yr'] + realm_list = ['ice', 'ocean', 'atmos', 'land'] + for row in rows: + if row['name'][0] == "#" or row['name'] == 'name': + continue + elif (not any( x in row['frequency'] for x in frq_list) + or row['realm'] not in realm_list): + mopdb_log.error(f""" Check frequency and realm in {fname}. + Some values might be invalid and need fixing""") + sys.exit() + return From 29567a35e821a59f9a7a861f65e2ea9198600129 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 5 Jul 2024 15:08:39 +1000 Subject: [PATCH 02/10] moved options #151, #113 --- conda/meta.yaml | 2 +- docs/gettingstarted.rst | 62 ++++++++++++++---------------- docs/mopdb_command.rst | 49 ++++++++++++++---------- src/mopdb/mopdb.py | 8 ++-- src/mopper/mop_setup.py | 8 ++++ src/mopper/mopper.py | 80 ++++++++++++++++++++++++++++----------- src/mopper/setup_utils.py | 7 ++-- 7 files changed, 132 insertions(+), 84 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index b0cb321..f20a79a 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "0.6.1" %} +{% set version = "1.0.0" %} package: name: mopper version: {{ version }} diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index 397652e..666e89a 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -3,46 +3,38 @@ Starting with MOPPeR A typical workflow to post-process an ACCESS or UM model output requires three steps. -Step1: get a list of variables from the raw output --------------------------------------------------- - - *mopdb varlist -i -d * - -`mopdb varlist` will output one or more `csv` files with a detailed list of variables, one list for each pattern of output files. - -.. code-block:: console - - $ mopdb varlist -i /scratch/../exp -d 20120101 - Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully - Variable list for ocean_scalar.nc- successfully written - Variable list for ocean_month.nc- successfully written - Variable list for ocean_daily.nc- successfully written - -.. csv-table:: Example of varlist output - :file: varlist_example.csv - :delim: ; - -The argument is used to reduce the number of files to check. The tool will recognise anyway a repeated pattern and only add a list of variable for the same pattern once. - -Step2: create a template for a mapping file +Step1: create a template for a mapping file ------------------------------------------- - *mopdb template -i -v -a * + *mopdb template -f -v -a * .. code-block:: console - $ mopdb template -f ocean.csv -v OM2 -a ocnmon - Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully - Derived variables: {'msftyrho', 'msftmrho', 'hfds', 'msftmz', 'msftyz'} - Changing advectsweby-CM2_mon units from Watts/m^2 to W m-2 - Changing areacello-CMIP6_Ofx units from m^2 to m2 - Variable difvho-CM2_Omon not found in cmor table + $ mopdb template -f /scratch/.../exp1/atmos -m 095101 -v CM2 -a exp1 + Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully + Found more than 1 definition for fld_s16i222: + [('psl', 'AUS2200', 'AUS2200_A10min', '10minPt'), ('psl', 'AUS2200', 'AUS2200_A1hr', '1hr')] + Using psl from AUS2200_A10min + Variable list for cw323a.pm successfully written + Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully + Derived variables: {'treeFracBdlEvg', 'grassFracC4', 'shrubFrac', 'prc', 'mrsfl', 'landCoverFrac', 'mmrbc', 'mmrso4', 'theta24', 'sftgif', 'treeFracNdlEvg', 'snw', 'rtmt', 'nwdFracLut', 'sifllatstop', 'prw', 'mrfso', 'rlus', 'mrsll', 'baresoilFrac', 'c4PftFrac', 'wetlandFrac', 'mrro', 'c3PftFrac', 'treeFracBdlDcd', 'od550lt1aer', 'treeFracNdlDcd', 'residualFrac', 'wetss', 'sbl', 'vegFrac', 'rsus', 'cropFrac', 'mmrdust', 'grassFrac', 'mmrss', 'od550aer', 'hus24', 'dryss', 'fracLut', 'mrlso', 'mc', 'od440aer', 'grassFracC3', 'nep', 'mmroa', 'cropFracC3', 'snm', 'agesno'} + Changing cl-CMIP6_Amon units from 1 to % + Changing cli-CMIP6_Amon units from 1 to kg kg-1 + Changing clt-CMIP6_Amon units from 1 to % + Changing clw-CMIP6_Amon units from 1 to kg kg-1 + Variable husuvgrid-CM2_mon not found in cmor table + ... `mopdb template` takes as input: - * the output/s of `varlist` - To get one template for the all variable concatenate the output on `varlist` into one file first. - * the access version to use as preferred - * an optional alias, if omitted the varlist filename will be used. Based on the example: `map_ocnmon.csv` or `map_ocean.csv` if omitted. + * -f/--fpath : the path to the model output + * -m/--match : used to identify files' patterns. The tool will only add a list of variables for the same pattern once. + * -v/--version : the access version to use as preferred mapping. ESM1.5, CM2, OM2 and AUS2200 are currently available. + * -a/--alias : an optional alias, if omitted default names will be used for the output files. + +Alternatively a list of variables can be created separately using the *varlist* command and this can be passed directly to template using the *fpath* option. + + *mopdb template -f -v -a * It produces a csv file with a list of all the variables from raw output mapped to cmip style variables. These mappings also take into account the frequency and include variables that can be potentially calculated with the listed fields. The console output lists these, as shown above. @@ -51,18 +43,20 @@ The mappings can be different between different version and/or configurations of Starting with version 0.6 the list includes matches based on the standard_name, as these rows often list more than one option per field, it's important to either edit or remove these rows before using the mapping file. The :doc:`Customing section ` covers what to do for an experiment using a new configuration which is substantially different from the ones which are available. +It also provides an intermediate varlist_.csv file that shows the information derived directly from the files. This can be useful to debug in case of issues with the mapping. This file is checked before the mapping step to make sure the tool has detected sensible frequency and realm, if the check fails the mapping won't proceed but the varlist file can be edited appropriately. .. warning:: Always check that the resulting template is mapping the variables correctly. This is particularly true for derived variables. Comment lines are inserted to give some information on what assumptions were done for each group of mappings. + The se -Step3: Set up the working environment +Step2: Set up the working environment ------------------------------------- *mop -c setup* .. code-block:: console - +https://climate-cms.org/posts/2023-05-31-vscode-are.html $ mop -c exp_conf.yaml setup Simulation to process: cy286 Setting environment and creating working directory diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst index 32d712c..421f39c 100644 --- a/docs/mopdb_command.rst +++ b/docs/mopdb_command.rst @@ -54,29 +54,17 @@ e.g. use aus2200 for mappings related to the AUS2200 configuration: A user that wants to create a mapping table for another AUS2200 simulation can use this value to select appropriate mappings (see how to do that below). -Get a list of variables from the model output ---------------------------------------------- +Create a mapping file +--------------------- .. code-block:: - mopdb varlist -i -d - -this will create for each output file a list of variables with useful attributes -These can be concatenated into one or used to create separate mappings. - -.. _varlist example: -.. dropdown:: Example output of varlist +This can be done by providing the model output path and a pattern to match or directly a varlist file - name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name - fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature - fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity - fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature - fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask - fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction - ... +From output path: + + mopdb template -f -m -v -Create a mapping file starting from variable list -------------------------------------------------- -.. code-block:: +From varlist file: mopdb template -f -v @@ -119,6 +107,29 @@ The other groups of records require checking, as either the version or the frequ ... +Get a list of variables from the model output +--------------------------------------------- +.. code-block:: + + mopdb varlist -f -m + +this will create a list of variables with useful attributes + +.. _varlist example: +.. dropdown:: Example output of varlist + + name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name + #cw323a.pm + fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature + fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity + fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature + fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask + fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction + ... + +Doing this step separately can be useful if the model output is using a random directory structure, as it's more likely in such a case that important attributes like frequency and realm which are used for the mapping might be incorrect or missing. In such a case it might be more efficient processing different kind of files separately first, making sure frequency and realm are correct and then combining them into one file to pass to template. +The template command will stop execution if detects potentially wrong values for these fields and save + Check which variables aren't yet defined ---------------------------------------- .. code-block:: console diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index b0935ff..7a2e744 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -72,9 +72,11 @@ def map_args(f): constraints = [ click.option('--fpath', '-f', type=str, required=True, callback=require_date, - help='Model output directory or varlist for the same'), - click.option('--startdate', '-d', type=str, required=False, - help='Start date of model run as YYYYMMDD'), + help=(''''Path for model output files. For "template" + command can also be file generated by varlist step''')), + click.option('--match', '-m', type=str, required=False, + help=('''String to match output files. Most often + the timestamp from one of the output files''')), click.option('--version', '-v', required=True, type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 7040270..90ba47e 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -196,6 +196,14 @@ def setup_env(ctx): else: cdict['tables_path'] = appdir / cdict['tables_path'] cdict['ancils_path'] = appdir / cdict['ancils_path'] + # conda env to run job + if cdict['conda_env'] == 'default': + cdict['conda_env'] = '' + else: + path = Path(cdict['conda_env']) + if not path.is_absolute(): + path = appdir / path + cdict['conda_env'] = f"source {str(path)}" # Output subdirectories outpath = cdict['outpath'] cdict['maps'] = outpath / "maps" diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 5418309..6313edd 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -53,13 +53,22 @@ def mop_catch(): sys.exit(1) +def mop_args(f): + """Define common click options + """ + constraints = [ + click.option('--debug', is_flag=True, default=False, + help="Show debug info"), + click.option('--cfile', '-c', type=str, required=True, + help='Experiment configuration as yaml file')] + for c in reversed(constraints): + f = c(f) + return f + + @click.group(context_settings=dict(help_option_names=['-h', '--help'])) -@click.option('--cfile', '-c', type=str, required=True, - help='Experiment configuration as yaml file') -@click.option('--debug', is_flag=True, default=False, - help="Show debug info") @click.pass_context -def mop(ctx, cfile, debug): +def mop(ctx): """Main command with 2 sub-commands: - setup to setup the job to run - run to execute the post-processing @@ -68,33 +77,39 @@ def mop(ctx, cfile, debug): ---------- ctx : obj Click context object + """ + #ctx.obj = {} + pass + + +@mop.command(name='run') +@mop_args +#@click.option('--cfile', '-c', type=str, required=True, +# help='Experiment configuration as yaml file') +@click.pass_context +def mop_run(ctx, cfile, debug): + """Subcommand that executes the processing. + + Use the configuration yaml file created in setup step as input. + + Parameters + ---------- cfile : str Name of yaml configuration file, run sub-command uses the configuration created by setup debug : bool If true set logging level to debug """ + + # load config file with open(cfile, 'r') as yfile: cfg = yaml.safe_load(yfile) ctx.obj = cfg['cmor'] ctx.obj['attrs'] = cfg['attrs'] - # set up main mop log - if ctx.invoked_subcommand == 'setup': - mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO) - else: - mop_log = config_log(debug, ctx.obj['appdir']) + # set up logger + mop_log = config_log(debug, ctx.obj['appdir']) ctx.obj['debug'] = debug mop_log.info(f"Simulation to process: {ctx.obj['exp']}") - - -@mop.command(name='run') -@click.pass_context -def mop_run(ctx): - """Subcommand that executes the processing. - - Use the configuration yaml file created in setup step as input. - """ - mop_log = logging.getLogger('mop_log') # Open database and retrieve list of files to create conn = db_connect(ctx.obj['database']) c = conn.cursor() @@ -117,11 +132,12 @@ def mop_run(ctx): return +@mop.command(name='setup') +@mop_args @click.option('--update', is_flag=True, default=False, help="Update current settings, keeping db and logs") -@mop.command(name='setup') @click.pass_context -def mop_setup(ctx, update): +def mop_setup(ctx, cfile, debug, update): """Setup of mopper processing job and working environment. * Defines and creates paths @@ -131,8 +147,26 @@ def mop_setup(ctx, update): * creates/updates database filelist table to list files to create * finalises configuration and save in new yaml file * writes job executable file and submits (optional) to queue + + Parameters + ---------- + cfile : str + Name of yaml configuration file, run sub-command uses the + configuration created by setup + debug : bool + If True set logging level to debug + update : bool + If True update current workding directory (default is False) """ - mop_log = logging.getLogger('mop_log') + + # load config file + with open(cfile, 'r') as yfile: + cfg = yaml.safe_load(yfile) + ctx.obj = cfg['cmor'] + ctx.obj['attrs'] = cfg['attrs'] + ctx.obj['debug'] = debug + # set up logger + mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO) # then add setup_env to config mop_log.info("Setting environment and creating working directory") ctx.obj['update'] = update diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index 68c60dd..e0341fa 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -700,8 +700,6 @@ def define_template(ctx, flag, nrows): cdict : dict Dictionary with cmor settings for experiment """ - # temporarily removing this as it only works for conda envs - #{os.path.dirname(sys.executable)}/mop -c {ctx.obj['exp']}_config.yaml run template = f"""#!/bin/bash #PBS -P {ctx.obj['project']} #PBS -q {ctx.obj['queue']} @@ -717,9 +715,10 @@ def define_template(ctx, flag, nrows): # for a list of packages module use /g/data/hh5/public/modules -module load conda/analysis3 +module load conda/analysis3-unstable +{ctx.obj['conda_env']} cd {ctx.obj['appdir']} -mop -c {ctx.obj['exp']}_config.yaml run +mop run -c {ctx.obj['exp']}_config.yaml echo 'APP completed for exp {ctx.obj['exp']}.'""" return template From f8b1a24a4b96d781f4fd5c33bf1a4a3d5d8e76ac Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 5 Jul 2024 16:40:28 +1000 Subject: [PATCH 03/10] minor fix to actions and solved #150 --- .github/workflows/mopper-conda.yaml | 8 ++++---- src/mopdb/mopdb.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 98ea1a7..b4ecaa5 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -1,11 +1,11 @@ -name: xmhw-conda-install-test +name: mopper-conda-install-test #on: [push] on: push: branches: - main - - newrelease + - prerelease pull_request: branches: - main @@ -38,8 +38,8 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # - name: Install package - # run: | - # conda run python setup.py install + run: | + conda build conda/meta.yaml - name: Test with pytest run: | conda install pytest coverage codecov diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 7a2e744..4a63bba 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -44,7 +44,7 @@ def mopdb_catch(): def require_date(ctx, param, value): - """Changes startdate option in template command from optional to + """Changes match option in template command from optional to required if fpath is a directory. """ if Path(value).is_dir(): @@ -295,7 +295,7 @@ def update_cmor(ctx, dbname, fname, alias): @mopdb.command(name='template') @map_args @click.pass_context -def map_template(ctx, fpath, startdate, dbname, version, alias): +def map_template(ctx, fpath, match, dbname, version, alias): """Writes a template of mapping file needed to run setup. First opens database and check if variables match any in mapping table. If not tries to partially match them. @@ -311,7 +311,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias): fpath : str Path of csv input file with output variables to map or of directory containing output files to scan - startdate : str + match : str Date or other string to match to individuate one file per type dbname : str Database relative path (default is data/access.db) @@ -331,7 +331,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias): fname = fpath.name else: mopdb_log.debug(f"Calling model_vars() from template: {fpath}") - fname = model_vars(fpath, startdate, dbname, version, alias) + fname = model_vars(fpath, match, dbname, version, alias) if alias == '': alias = fname.split(".")[0] # connect to db, check first if db exists or exit @@ -420,13 +420,13 @@ def update_map(ctx, dbname, fname, alias): @mopdb.command(name='varlist') @map_args @click.pass_context -def list_vars(ctx, fpath, startdate, dbname, version, alias): +def list_vars(ctx, fpath, match, dbname, version, alias): """Calls model_vars to generate list of variables""" - fname = model_vars(fpath, startdate, dbname, version, alias) + fname = model_vars(fpath, match, dbname, version, alias) @click.pass_context -def model_vars(ctx, fpath, startdate, dbname, version, alias): +def model_vars(ctx, fpath, match, dbname, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file @@ -436,7 +436,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias): Click context object fpath : str Path for model output files - startdate : str + match : str Date or other string to match to individuate one file per type dbname : str Database relative path (default is data/access.db) @@ -457,7 +457,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias): if dbname == 'default': dbname = import_files('data').joinpath('access.db') conn = db_connect(dbname) - fname = write_varlist(conn, fpath, startdate, version, alias) + fname = write_varlist(conn, fpath, match, version, alias) conn.close() return fname From ab3cd299d3604d3d3fe559905363cfe4f254cd52 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Sat, 6 Jul 2024 18:31:26 +1000 Subject: [PATCH 04/10] progress in detecting relam and frequency --- src/mopdb/mopdb_utils.py | 84 ++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index b9e0c4d..f723025 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -426,50 +426,67 @@ def delete_record(conn, table, col, pairs): def list_files(indir, match): """Returns list of files matching input directory and match""" mopdb_log = logging.getLogger('mopdb_log') - files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()] - mopdb_log.debug(f"{indir}/**/*{match}*") + mopdb_log.debug(f"Pattern to list files: {indir}/**/*{match}*") + files = [x for x in Path(indir).rglob(f"{match}") if x.is_file() + and '.nc' in str(x)] + files.sort(key=lambda x:x.name) + mopdb_log.debug(f"Files after sorting: {files}") return files -def build_umfrq(time_axs, ds): +def get_file_frq(ds, fnext): """Return a dictionary with frequency for each time axis. Frequency is inferred by comparing interval between two consecutive timesteps with expected interval at a given frequency. Order time_axis so ones with only one step are last, so we can use file frequency (interval_file) inferred from other time axes. + This is called if there are more than one time axis in file + (usually only UM) or if frequency can be guessed from filename. """ mopdb_log = logging.getLogger('mopdb_log') - umfrq = {} + frq = {} int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, 'day': 1.0, '6hr': 0.25, '3hr': 0.125, '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944} + # retrieve all time axes + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) - mopdb_log.debug(f"in build_umfrq, time_axs: {time_axs}") + mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}") + max_len = len(ds[time_axs[0]]) + # if all time axes have only 1 timestep we cannot infer frequency + # so we open also next file but get only time axs + if max_len == 1: + dsnext = xr.open_dataset(fnext, decode_times = False) + time_axs2 = [d for d in dsnext.dims if 'time' in d] + ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) + time_axs.sort(key=lambda x: len(ds[x]), reverse=True) for t in time_axs: mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") if len(ds[t]) > 1: - interval = (ds[t][1]-ds[t][0]).values / np.timedelta64(1, 'D') -#astype('timedelta64[m]') / 1440.0 - interval_file = (ds[t][-1] -ds[t][0]).values / np.timedelta64(1, 'D') + interval = (ds[t][1]-ds[t][0]).values #/ np.timedelta64(1, 'D') + interval_file = (ds[t][-1] -ds[t][0]).values #/ np.timedelta64(1, 'D') else: interval = interval_file mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") - mopdb_log.debug(f"interval entire file {t}: {interval_file}") + #mopdb_log.debug(f"interval entire file {t}: {interval_file}") for k,v in int2frq.items(): if math.isclose(interval, v, rel_tol=0.05): - umfrq[t] = k + frq[t] = k break - return umfrq + return frq -def get_frequency(realm, fname, ds): +def get_frequency(realm, fname, ds, fnext): """Return frequency based on realm and filename For UM files checks if more than one time axis is present and if so returns dictionary with frequency: variable list """ mopdb_log = logging.getLogger('mopdb_log') - umfrq = {} + frq_dict = {} frequency = 'NAfrq' if realm == 'atmos': fbits = fname.split("_") @@ -479,14 +496,8 @@ def get_frequency(realm, fname, ds): frequency = fix_frq[frequency] else: frequency = frequency.replace('hPt', 'hrPt') - # retrieve all time axes and check their frequency - time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) - if len(time_axs_len) == 1: - umfrq = {} - else: - umfrq = build_umfrq(time_axs, ds) - mopdb_log.debug(f"umfrq: {umfrq}") + frq_dict = get_file_frq(ds, fnext) + mopdb_log.debug(f"frq_dict: {frq_dict}") elif realm == 'ocean': # if I found scalar or monthly in any of fbits if any(x in fname for x in ['scalar', 'month']): @@ -498,8 +509,13 @@ def get_frequency(realm, fname, ds): frequency = 'mon' elif '_d.' in fname: frequency = 'day' + if frequency == 'NAfrq': + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + frequency = frq_dict.popitem()[1] mopdb_log.debug(f"Frequency: {frequency}") - return frequency, umfrq + return frequency, frq_dict def get_cell_methods(attrs, dims): @@ -523,15 +539,13 @@ def get_cell_methods(attrs, dims): return val, frqmod -def write_varlist(conn, indir, startdate, version, alias): +def write_varlist(conn, indir, match, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided for each variable """ mopdb_log = logging.getLogger('mopdb_log') - sdate = f"*{startdate}*" - files = list_files(indir, sdate) - mopdb_log.debug(f"Found files: {files}") + files = list_files(indir, f"*{match}*") patterns = [] if alias == '': alias = 'mopdb' @@ -541,10 +555,10 @@ def write_varlist(conn, indir, startdate, version, alias): fwriter.writerow(["name", "cmor_var", "units", "dimensions", "frequency", "realm", "cell_methods", "cmor_table", "vtype", "size", "nsteps", "filename", "long_name", "standard_name"]) - for fpath in files: + for i, fpath in enumerate(files): # get filename pattern until date match mopdb_log.debug(f"Filename: {fpath.name}") - fpattern = fpath.name.split(startdate)[0] + fpattern = fpath.name.split(match)[0] # adding this in case we have a mix of yyyy/yyyymn date stamps # as then a user would have to pass yyyy only and would get 12 files for some of the patterns if fpattern in patterns: @@ -555,10 +569,12 @@ def write_varlist(conn, indir, startdate, version, alias): mopdb_log.debug(f"File pattern: {fpattern}") fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables - realm = get_realm(fpath, version) - ds = xr.open_dataset(fpath, decode_times=False) + ds = xr.open_dataset(str(pattern_list[0]), decode_times=False) + realm = get_realm(fpath, version, ds) coords = [c for c in ds.coords] + ['latitude_longitude'] - frequency, umfrq = get_frequency(realm, fpath.name, ds) + #pass next file in case of 1 timestep per file and no frq in name + fnext = str(pattern_list[1]) + frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext) multiple_frq = False if umfrq != {}: multiple_frq = True @@ -569,7 +585,7 @@ def write_varlist(conn, indir, startdate, version, alias): mopdb_log.debug(f"Variable: {v.name}") # get size in bytes of grid for 1 timestep and number of timesteps vsize = v[0].nbytes - nsteps = nfiles * v.shape[0] + nsteps = nfiles * v.shape[0]/2 # assign specific frequency if more than one is available if multiple_frq: if 'time' in v.dims[0]: @@ -969,7 +985,7 @@ def check_realm_units(conn, var): return var -def get_realm(fpath, version): +def get_realm(fpath, version, ds): '''Return realm for variable in files or NArealm''' mopdb_log = logging.getLogger('mopdb_log') @@ -978,7 +994,7 @@ def get_realm(fpath, version): else: realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] if x in fpath.parts][0] - if realm == 'atm': + if realm == 'atm' or 'um_version' in ds.attrs.keys(): realm = 'atmos' elif realm == 'ocn': realm = 'ocean' From d24bea219d88ca30cb3b0f2be9675154215b9be6 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Mon, 8 Jul 2024 12:36:52 +1000 Subject: [PATCH 05/10] removed unneccessary adjustment to variable size from mopdb_utils.py --- src/mopdb/mopdb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index f723025..18bc48f 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -585,7 +585,7 @@ def write_varlist(conn, indir, match, version, alias): mopdb_log.debug(f"Variable: {v.name}") # get size in bytes of grid for 1 timestep and number of timesteps vsize = v[0].nbytes - nsteps = nfiles * v.shape[0]/2 + nsteps = nfiles * v.shape[0] # assign specific frequency if more than one is available if multiple_frq: if 'time' in v.dims[0]: From 704d607e104abd0e6a2f630107e8e9dff515b4e6 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 15:12:23 +1000 Subject: [PATCH 06/10] minor adjustment to conftest and action --- .github/workflows/mopper-conda.yaml | 2 +- src/mopdb/mopdb_utils.py | 10 ++++++++++ tests/conftest.py | 9 --------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index b4ecaa5..c232518 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -4,11 +4,11 @@ name: mopper-conda-install-test on: push: branches: - - main - prerelease pull_request: branches: - main + - prerelease jobs: diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 18bc48f..a14ca5e 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -539,6 +539,15 @@ def get_cell_methods(attrs, dims): return val, frqmod +def identify_patterns(files): + """Return patterns of files + """ + i = 0 + while present is True: + + + return patterns + def write_varlist(conn, indir, match, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided @@ -546,6 +555,7 @@ def write_varlist(conn, indir, match, version, alias): """ mopdb_log = logging.getLogger('mopdb_log') files = list_files(indir, f"*{match}*") + patterns = identify_patterns(files) patterns = [] if alias == '': alias = 'mopdb' diff --git a/tests/conftest.py b/tests/conftest.py index 9f2f190..9a60849 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,15 +30,6 @@ TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) TESTS_DATA = os.path.join(TESTS_HOME, "testdata") -# setting up loggers for both mopdb and mop -@pytest.fixture -def moplog(): - return logging.getLogger('mop_log') - - -@pytest.fixture -def mopdblog(): - return logging.getLogger('mopdb_log') # setting up fixtures for databases:a ccess.db and mopper.db @pytest.fixture From 7c45897cf8403591e1449289ae522a967a938c61 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:17:25 +1000 Subject: [PATCH 07/10] some imporvements to tests --- tests/conftest.py | 16 ++++++++++++---- tests/test_calculations.py | 8 ++++---- tests/test_mop_utils.py | 22 +++++++++++----------- tests/test_mopdb.py | 6 +++--- tests/test_mopdb_utils.py | 7 +++---- tests/testdata/varlist_ex.csv | 3 +++ 6 files changed, 36 insertions(+), 26 deletions(-) create mode 100644 tests/testdata/varlist_ex.csv diff --git a/tests/conftest.py b/tests/conftest.py index 9a60849..0dd6c56 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ import pandas as pd import datetime import logging +import csv from mopdb.mopdb_utils import mapping_sql, cmorvar_sql from mopper.setup_utils import filelist_sql @@ -72,12 +73,19 @@ def test_check_timestamp(caplog): @pytest.fixture def varlist_rows(): - lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature", - "fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;AUS2200_A1hr;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction", -"fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu"] - rows = [l.split(";") for l in lines] + # read list of vars from iexample file + with open('testdata/varlist_ex.csv', 'r') as csvfile: + reader = csv.DictReader(csvfile, delimiter=';') + rows = list(reader) return rows +@pytest.fixture +def matches(): + matches = [("tas", "fld_s03i236", "", "1hr", "atmos", "AUS2200", "AUS2200_A1hr", "", "K"), + ("siconca", "fld_s00i031", "", "mon", "ocean", "CM2", "CMIP6_OImon", "", "1"), + ("hfls", "fld_s03i234", "", "mon", "atmos", "CM2", "CMIP6_Amon", "up", "W/m2")] + return matches + @pytest.fixture def add_var_out(): vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': '' diff --git a/tests/test_calculations.py b/tests/test_calculations.py index dcd6398..9037b53 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -23,10 +23,9 @@ import logging from mopper.calculations import * -logger = logging.getLogger('var_log') ctx = click.Context(click.Command('cmd'), obj={'sel_start': '198302170600', 'sel_end': '198302181300', - 'realm': 'atmos', 'frequency': '1hr', 'var_log': logger}) + 'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'}) def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): @@ -68,8 +67,9 @@ def test_calc_topsoil(): xrtest.assert_allclose(out, expected, rtol=1e-05) -def test_overturn_stream(): - global ctx, logger +def test_overturn_stream(caplog): + global ctx + caplog.set_level(logging.DEBUG, logger='varlog_1') # set up input dims = ['time', 'depth', 'lat', 'lon'] time = pd.date_range("2014-09-06", periods=1) diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py index f177f21..4889274 100644 --- a/tests/test_mop_utils.py +++ b/tests/test_mop_utils.py @@ -19,7 +19,6 @@ import numpy as np import pandas as pd from mopper.mop_utils import * -from conftest import moplog #try: # import unittest.mock as mock @@ -28,24 +27,24 @@ ctx = click.Context(click.Command('cmd'), obj={'sel_start': '198302170600', 'sel_end': '198302181300', - 'realm': 'atmos', 'frequency': '1hr'}) -#logger = logging.getLogger('mop_log') + 'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'}) -def test_check_timestamp(caplog, ctx): - moplog.set_level(logging.DEBUG)#, logger='mop_log') +def test_check_timestamp(caplog): + global ctx + caplog.set_level(logging.DEBUG, logger='mop_log') + caplog.set_level(logging.DEBUG, logger='varlog_1') # test atmos files files = [f'obj_198302{d}T{str(h).zfill(2)}01_1hr.nc' for d in ['17','18','19'] for h in range(24)] - print(files) inrange = files[6:37] with ctx: - out1 = check_timestamp(files, logger) + out1 = check_timestamp(files) assert out1 == inrange # get only first file is frequency is fx ctx.obj['frequency'] = 'fx' inrange = [files[0]] with ctx: - out2 = check_timestamp(files, logger) + out2 = check_timestamp(files) assert out2 == inrange # test ocn files ctx.obj['frequency'] = 'day' @@ -53,12 +52,13 @@ def test_check_timestamp(caplog, ctx): files = [f'ocn_daily.nc-198302{str(d).zfill(2)}' for d in range(1,29)] inrange = files[16:18] with ctx: - out3 = check_timestamp(files, logger) + out3 = check_timestamp(files) assert out3 == inrange -def test_get_cmorname(caplog, ctx): - caplog.set_level(logging.DEBUG)#, logger='mop_log') +def test_get_cmorname(caplog): + global ctx + caplog.set_level(logging.DEBUG, logger='mop_log') # axis_name t ctx.obj['calculation'] = "plevinterp(var[0], var[1], 24)" ctx.obj['variable_id'] = "ta24" diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py index 0eddc58..e570fdb 100644 --- a/tests/test_mopdb.py +++ b/tests/test_mopdb.py @@ -30,7 +30,7 @@ def test_mopdb(command, subcommand, runner): result = runner.invoke(mopdb, [subcommand, '--help']) assert result.exit_code == 0 -@pytest.mark.usefixtures("setup_db") # 1 +@pytest.mark.usefixtures("setup_access_db") # 1 def test_template(session): runner = CliRunner() @@ -45,8 +45,8 @@ def test_template(session): result = runner.invoke(mopdb, ['template', '-f varlist.txt', '-vCM2']) #assert result.exit_code == 0 - assert 'Opened database successfully' in result.output - assert 'Definable cmip var' in result.output + assert 'Opened database ' in result.output + #assert 'Definable cmip var' in result.output #Pass temp_dir to control where the temporary directory is created. The directory will not be removed by Click in this case. This is useful to integrate with a framework like Pytest that manages temporary files. #def test_keep_dir(tmp_path): diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index 9737c52..ebc8be0 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -27,12 +27,11 @@ @pytest.mark.parametrize('idx', [0,1,2]) -def test_add_var(varlist_rows, idx, caplog): +def test_add_var(varlist_rows, matches, idx, caplog): caplog.set_level(logging.DEBUG, logger='mopdb_log') vlist = [] - match = [("tas", "", "K"), ("siconca", "", ""), ("hfls", "", "")] - vlist = add_var(vlist, varlist_rows[idx], match[idx]) - assert vlist[idx]['cmor_var'] == match[idx][0] + vlist = add_var(vlist, varlist_rows[idx], matches[idx]) + assert vlist[0]['cmor_var'] == matches[idx][0] def test_build_umfrq(um_multi_time, caplog): diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv new file mode 100644 index 0000000..154729f --- /dev/null +++ b/tests/testdata/varlist_ex.csv @@ -0,0 +1,3 @@ +name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name +fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature +fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu From 63f3b380ede007c88b08b646d4f0a9eae73a6122 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:30:57 +1000 Subject: [PATCH 08/10] updated install instruction to current situation in docs --- docs/overview.rst | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/docs/overview.rst b/docs/overview.rst index 908db06..f074224 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -1,25 +1,16 @@ Install ======= -You can install the latest version of `mopper` directly from conda (accessnri channel):: +We are planning to release ACCESS-MOPPeR in conda soon and then it will be available at NCI on our conda environments. +In the meantime, you can icreate a custom conda environment and install mopper following these steps: - conda install -c accessnri mopper +1. module load conda/analysis3 +2. python -m venv mopper_env --system-site-packages +3. source /mopper_env/bin/activate +4. pip install git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@main + +The source command will activate the conda env you just created. +Any time you want to use the tool in a new session repeat the first and third steps. -If you want to install an unstable version or a different branch: +The `pip` command above will install from the main branch, you can also indicate a different branch. - * git clone - * git checkout (if installing a a different branch from master) - * cd mopper - * pip install ./ - use --user flag if you want to install it in ~/.local - -Working on the NCI server -------------------------- - -MOPPeR is pre-installed into a Conda environment at NCI. Load it with:: - - module use /g/data3/hh5/public/modules - module load conda/analysis3-unstable - -.. note:: - You need to be a member of the hh5 project to load the modules. From fc8bb02d7688e7b0671981bba099a73a0b81e320 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:43:04 +1000 Subject: [PATCH 09/10] removed partial pattern function --- src/mopdb/mopdb_utils.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index a14ca5e..80565b4 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -261,7 +261,7 @@ def query(conn, sql, tup=(), first=True): def get_columns(conn, table): - """Gets list of columns form db table + """Gets list of columns from db table """ mopdb_log = logging.getLogger('mopdb_log') sql = f'PRAGMA table_info({table});' @@ -539,15 +539,6 @@ def get_cell_methods(attrs, dims): return val, frqmod -def identify_patterns(files): - """Return patterns of files - """ - i = 0 - while present is True: - - - return patterns - def write_varlist(conn, indir, match, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided From cacdd9570acb0526012880c1c7348ab89665381d Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:46:03 +1000 Subject: [PATCH 10/10] removed partial pattern function 2 --- src/mopdb/mopdb_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 80565b4..b9875c7 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -546,7 +546,6 @@ def write_varlist(conn, indir, match, version, alias): """ mopdb_log = logging.getLogger('mopdb_log') files = list_files(indir, f"*{match}*") - patterns = identify_patterns(files) patterns = [] if alias == '': alias = 'mopdb'