From 9904d0f1f62e4288fdcd1420552e210a9b303b37 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 4 Jul 2024 18:20:38 +1000
Subject: [PATCH 01/10] now varlist and template are 1 step see #150

---
 src/mopdb/mopdb.py       | 125 ++++++++++++++++++++++++++-------------
 src/mopdb/mopdb_utils.py |  54 +++++++++++++----
 2 files changed, 129 insertions(+), 50 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 892b4cb..b0935ff 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -43,18 +43,47 @@ def mopdb_catch():
         sys.exit(1)
 
 
+def require_date(ctx, param, value):
+    """Changes startdate option in template command from optional to
+    required if fpath is a directory.
+    """
+    if Path(value).is_dir():
+        ctx.command.params[1].required = True
+    return value
+
+
 def db_args(f):
-    """Define database click arguments
+    """Define database click options
     """
     constraints = [
         click.option('--fname', '-f', type=str, required=True,
-            help='Input file: used to update db table (mapping/cmor),' +
-                 'or to pass output model variables (list)'),
+            help='Input file: used to update db table (mapping/cmor)'),
         click.option('--dbname', type=str, required=False, default='default',
             help='Database relative path by default is package access.db'),
-        click.option('--alias', '-a', type=str, required=False, default=None,
-            help='Table alias to use when updating cmor var table or creating map template with list' +
-                 ' to keep track of variable definition origin. If none passed uses input filename')]
+        click.option('--alias', '-a', type=str, required=False, default='',
+            help='Table alias to track definitions origin in cmorvar table.')]
+    for c in reversed(constraints):
+        f = c(f)
+    return f
+
+
+def map_args(f):
+    """Define mapping click options for varlist and template commands"""
+    constraints = [
+        click.option('--fpath', '-f', type=str, required=True,
+            callback=require_date,
+            help='Model output directory or varlist for the same'),
+        click.option('--startdate', '-d', type=str, required=False,
+            help='Start date of model run as YYYYMMDD'),
+        click.option('--version', '-v', required=True,
+            type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']),
+            show_default=True,
+            help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2'),
+        click.option('--dbname', type=str, required=False, default='default',
+            help='Database relative path by default is package access.db'),
+        click.option('--alias', '-a', type=str, required=False, default='',
+            help='''Alias to use to keep track of variable definition origin.
+                 If none passed uses input filename''')]
     for c in reversed(constraints):
         f = c(f)
     return f
@@ -118,8 +147,10 @@ def check_cmor(ctx, dbname):
 @mopdb.command(name='table')
 @db_args
 @click.option('--label', '-l', required=False, default='CMIP6',
-    type=click.Choice(['CMIP6', 'AUS2200', 'CM2']), show_default=True,
-    help='Label indicating origin of CMOR variable definitions. Currently only CMIP6, AUS2200 and CM2')
+    type=click.Choice(['CMIP6', 'AUS2200', 'CM2', 'OM2']),
+    show_default=True,
+    help='''Label indicating origin of CMOR variable definitions. 
+    Currently only CMIP6, AUS2200, CM2 and OM2''')
 @click.pass_context
 def cmor_table(ctx, dbname, fname, alias, label):
     """Create CMIP style table containing new variable definitions
@@ -136,7 +167,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
     fname : str
         Mapping file??? 
     alias : str
-           not used here
+           ??? it is used so what's ahppenw hen not passed?
     label : str
         Label indicating preferred cmor variable definitions 
     """
@@ -184,7 +215,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
                 if len(v[4].split()) != len(record[9].split()):
                     mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}")
                 var_list.append(definition)
-    write_cmor_table(var_list, alias, mopdb_log)
+    write_cmor_table(var_list, alias)
     conn.close()
     return
 
@@ -206,15 +237,15 @@ def update_cmor(ctx, dbname, fname, alias):
     fname : str
         Name of json input file with records to add
     alias : str
-        Indicates origin of records to add, if None json filename
-        base is used instead
+        Indicates origin of records to add, if '' (default) json
+        filename base is used instead
 
     Returns
     -------
     """
 
     mopdb_log = logging.getLogger('mopdb_log')
-    if alias is None:
+    if alias == '':
         alias = fname.split("/")[-1]
         alias = alias.replace('.json', '')
     mopdb_log.info(f"Adding {alias} to variable name to track origin")
@@ -260,35 +291,46 @@ def update_cmor(ctx, dbname, fname, alias):
 
 
 @mopdb.command(name='template')
-@db_args
-@click.option('--version', '-v', required=True,
-    type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True,
-    help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2')
+@map_args
 @click.pass_context
-def map_template(ctx, dbname, fname, alias, version):
+def map_template(ctx, fpath, startdate, dbname, version, alias):
     """Writes a template of mapping file needed to run setup.
        First opens database and check if variables match any in
        mapping table. If not tries to partially match them.
 
+    It can get as input the directory containing the output in
+    which case it will first call model_vars() (varlist command)
+    or the file output of the same if already available.
+
     Parameters
     ----------
     ctx : obj
         Click context object
+    fpath : str
+        Path of csv input file with output variables to map or
+        of directory containing output files to scan
+    startdate : str
+        Date or other string to match to individuate one file per type
     dbname : str
         Database relative path (default is data/access.db)
-    fname : str
-        Name of csv input file with output variables to map
-    alias : str
-        Indicates origin of records to add, if None csv filename
-        base is used instead
     version : str
         Version of ACCESS model used to generate variables
+    alias : str
+        Indicates origin of records to add, if '' csv filename
+        base is used instead
 
     Returns
     -------
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    if alias is None:
+    # work out if fpath is varlist or path to output
+    fpath = Path(fpath)
+    if fpath.is_file():
+        fname = fpath.name
+    else:
+        mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
+        fname = model_vars(fpath, startdate, dbname, version, alias) 
+    if alias == '':
         alias = fname.split(".")[0]
     # connect to db, check first if db exists or exit 
     if dbname == 'default':
@@ -298,6 +340,7 @@ def map_template(ctx, dbname, fname, alias, version):
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
         rows = list(reader)
+    check_varlist(rows, fname)
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
     full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
@@ -340,7 +383,7 @@ def update_map(ctx, dbname, fname, alias):
     fname : str
         Name of csv input file with mapping records
     alias : str
-        Indicates origin of records to add, if None csv filename
+        Indicates origin of records to add, if '' csv filename
         base is used instead
 
     Returns
@@ -373,26 +416,23 @@ def update_map(ctx, dbname, fname, alias):
 
 
 @mopdb.command(name='varlist')
-@click.option('--indir', '-i', type=str, required=True,
-    help='Converted model output directory')
-@click.option('--startdate', '-d', type=str, required=True,
-    help='Start date of model run as YYYYMMDD')
-@click.option('--dbname', type=str, required=False, default='default',
-    help='Database relative path by default is package access.db')
-@click.option('--version', '-v', required=False, default='CM2',
-    type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True,
-    help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2')
+@map_args
 @click.pass_context
-def model_vars(ctx, indir, startdate, dbname, version):
+def list_vars(ctx, fpath, startdate, dbname, version, alias):
+    """Calls model_vars to generate list of variables""" 
+    fname = model_vars(fpath, startdate, dbname, version, alias)
+
+
+@click.pass_context
+def model_vars(ctx, fpath, startdate, dbname, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
-       alias is not used so far
 
     Parameters
     ----------
     ctx : obj
         Click context object
-    indir : str
+    fpath : str
         Path for model output files
     startdate : str
         Date or other string to match to individuate one file per type
@@ -400,18 +440,24 @@ def model_vars(ctx, indir, startdate, dbname, version):
         Database relative path (default is data/access.db)
     version : str
         Version of ACCESS model to use as preferred mapping
+    alias : str
+        Used for output filename: 'varlist_<alias>'. If '', 
+        'varlist_mopdb' is used instead
 
     Returns
     -------
+    fname : str
+        Name of output varlist file
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
     conn = db_connect(dbname)
-    write_varlist(conn, indir, startdate, version)
+    fname = write_varlist(conn, fpath, startdate, version, alias)
     conn.close()
-    return
+    return fname
 
 
 @mopdb.command(name='del')
@@ -456,4 +502,3 @@ def remove_record(ctx, dbname, table, pair):
     # select, confirm, delete record/s 
     delete_record(conn, table, col, pair)
     return
-    
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index d4de94f..b9e0c4d 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -523,7 +523,7 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def write_varlist(conn, indir, startdate, version):
+def write_varlist(conn, indir, startdate, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
        for each variable
@@ -533,6 +533,14 @@ def write_varlist(conn, indir, startdate, version):
     files = list_files(indir, sdate)
     mopdb_log.debug(f"Found files: {files}")
     patterns = []
+    if alias == '':
+        alias = 'mopdb'
+    fname = f"varlist_{alias}.csv"
+    fcsv = open(fname, 'w')
+    fwriter = csv.writer(fcsv, delimiter=';')
+    fwriter.writerow(["name", "cmor_var", "units", "dimensions",
+        "frequency", "realm", "cell_methods", "cmor_table", "vtype",
+        "size", "nsteps", "filename", "long_name", "standard_name"])
     for fpath in files:
         # get filename pattern until date match
         mopdb_log.debug(f"Filename: {fpath.name}")
@@ -545,12 +553,7 @@ def write_varlist(conn, indir, startdate, version):
         pattern_list = list_files(indir, f"{fpattern}*")
         nfiles = len(pattern_list) 
         mopdb_log.debug(f"File pattern: {fpattern}")
-        fcsv = open(f"{fpattern}.csv", 'w')
-        fwriter = csv.writer(fcsv, delimiter=';')
-        fwriter.writerow(["name", "cmor_var", "units", "dimensions",
-                          "frequency", "realm", "cell_methods", "cmor_table",
-                          "vtype", "size", "nsteps", "filename", "long_name",
-                          "standard_name"])
+        fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
         realm = get_realm(fpath, version)
         ds = xr.open_dataset(fpath, decode_times=False)
@@ -587,9 +590,9 @@ def write_varlist(conn, indir, startdate, version):
                         nsteps, fpattern, attrs.get('long_name', ""), 
                         attrs.get('standard_name', "")]
                 fwriter.writerow(line)
-        fcsv.close()
         mopdb_log.info(f"Variable list for {fpattern} successfully written")
-    return
+    fcsv.close()
+    return  fname
 
 
 def read_map_app4(fname):
@@ -644,7 +647,7 @@ def read_map(fname, alias):
                     notes = row[16]
                 else:
                     notes = row[15]
-                if alias is None:
+                if alias is '':
                     alias = fname.replace(".csv","")
                 var_list.append(row[:11] + [notes, alias])
     return var_list
@@ -883,6 +886,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
     cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename,
     long_name, standard_name
     """ 
+
     mopdb_log = logging.getLogger('mopdb_log')
     keys = ['cmor_var', 'input_vars', 'calculation', 'units',
             'dimensions', 'frequency', 'realm', 'cell_methods',
@@ -919,6 +923,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
 def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
     """
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     if len(vlist) > 0:
         if type(div) is str:
@@ -938,6 +943,7 @@ def check_realm_units(conn, var):
     """Checks that realm and units are consistent with values in 
     cmor table.
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     vname = f"{var['cmor_var']}-{var['cmor_table']}"
     if var['cmor_table'] is None or var['cmor_table'] == "":
@@ -965,6 +971,7 @@ def check_realm_units(conn, var):
 
 def get_realm(fpath, version):
     '''Return realm for variable in files or NArealm'''
+
     mopdb_log = logging.getLogger('mopdb_log')
     if version == 'AUS2200':
         realm = 'atmos'
@@ -980,3 +987,30 @@ def get_realm(fpath, version):
         mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm")
     mopdb_log.debug(f"Realm is {realm}")
     return realm
+
+
+def check_varlist(rows, fname):
+    """Checks that varlist written to file has sensible information for frequency and realm
+    to avoid incorrect mapping to be produced.
+
+    At the moment we're checking only frequency and realm as they can be missed or wrong
+    depending on the file structure.
+
+    Parameters
+    ----------
+    rows : list(dict)
+         list of variables to match
+    """
+
+    mopdb_log = logging.getLogger('mopdb_log')
+    frq_list = ['min', 'hr', 'day', 'mon', 'yr'] 
+    realm_list = ['ice', 'ocean', 'atmos', 'land']
+    for row in rows:
+        if row['name'][0] == "#" or row['name'] == 'name':
+            continue
+        elif (not any( x in row['frequency'] for x in frq_list) 
+            or row['realm'] not in realm_list):
+                mopdb_log.error(f"""  Check frequency and realm in {fname}.
+  Some values might be invalid and need fixing""")
+                sys.exit()
+    return

From 29567a35e821a59f9a7a861f65e2ea9198600129 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 5 Jul 2024 15:08:39 +1000
Subject: [PATCH 02/10] moved options #151, #113

---
 conda/meta.yaml           |  2 +-
 docs/gettingstarted.rst   | 62 ++++++++++++++----------------
 docs/mopdb_command.rst    | 49 ++++++++++++++----------
 src/mopdb/mopdb.py        |  8 ++--
 src/mopper/mop_setup.py   |  8 ++++
 src/mopper/mopper.py      | 80 ++++++++++++++++++++++++++++-----------
 src/mopper/setup_utils.py |  7 ++--
 7 files changed, 132 insertions(+), 84 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index b0cb321..f20a79a 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "0.6.1" %}
+{% set version = "1.0.0" %}
 package:
     name: mopper 
     version: {{ version }}
diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
index 397652e..666e89a 100644
--- a/docs/gettingstarted.rst
+++ b/docs/gettingstarted.rst
@@ -3,46 +3,38 @@ Starting with MOPPeR
 
 A typical workflow to post-process an ACCESS or UM model output requires three steps.
 
-Step1: get a list of variables from the raw output
---------------------------------------------------
-
-     *mopdb varlist -i <path-to-raw-output> -d <date-pattern>*
-
-`mopdb varlist` will output one or more `csv` files with a detailed list of variables, one list for each pattern of output files.
-
-.. code-block:: console
-
-   $ mopdb varlist -i /scratch/../exp -d 20120101
-   Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully
-   Variable list for ocean_scalar.nc- successfully written
-   Variable list for ocean_month.nc- successfully written
-   Variable list for ocean_daily.nc- successfully written
-
-.. csv-table:: Example of varlist output 
-   :file: varlist_example.csv
-   :delim: ;
-
-The <date-pattern> argument is used to reduce the number of files to check. The tool will recognise anyway a repeated pattern and only add a list of variable for the same pattern once.
-
  
-Step2: create a template for a mapping file
+Step1: create a template for a mapping file
 -------------------------------------------
 
-   *mopdb template -i <varlist.csv> -v <access-version> -a <alias>*
+   *mopdb template -f <path-to-model-output> -v <access-version> -a <alias>*
 
 .. code-block:: console 
 
-   $ mopdb template -f ocean.csv -v OM2 -a ocnmon
-   Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully
-   Derived variables: {'msftyrho', 'msftmrho', 'hfds', 'msftmz', 'msftyz'}
-   Changing advectsweby-CM2_mon units from Watts/m^2 to W m-2
-   Changing areacello-CMIP6_Ofx units from m^2 to m2
-   Variable difvho-CM2_Omon not found in cmor table
+   $ mopdb template -f /scratch/.../exp1/atmos -m 095101 -v CM2 -a exp1
+   Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully
+   Found more than 1 definition for fld_s16i222:
+   [('psl', 'AUS2200', 'AUS2200_A10min', '10minPt'), ('psl', 'AUS2200', 'AUS2200_A1hr', '1hr')]
+   Using psl from AUS2200_A10min
+   Variable list for cw323a.pm successfully written
+   Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully
+   Derived variables: {'treeFracBdlEvg', 'grassFracC4', 'shrubFrac', 'prc', 'mrsfl', 'landCoverFrac', 'mmrbc', 'mmrso4', 'theta24', 'sftgif', 'treeFracNdlEvg', 'snw', 'rtmt', 'nwdFracLut', 'sifllatstop', 'prw', 'mrfso', 'rlus', 'mrsll', 'baresoilFrac', 'c4PftFrac', 'wetlandFrac', 'mrro', 'c3PftFrac', 'treeFracBdlDcd', 'od550lt1aer', 'treeFracNdlDcd', 'residualFrac', 'wetss', 'sbl', 'vegFrac', 'rsus', 'cropFrac', 'mmrdust', 'grassFrac', 'mmrss', 'od550aer', 'hus24', 'dryss', 'fracLut', 'mrlso', 'mc', 'od440aer', 'grassFracC3', 'nep', 'mmroa', 'cropFracC3', 'snm', 'agesno'}
+   Changing cl-CMIP6_Amon units from 1 to %
+   Changing cli-CMIP6_Amon units from 1 to kg kg-1
+   Changing clt-CMIP6_Amon units from 1 to %
+   Changing clw-CMIP6_Amon units from 1 to kg kg-1
+   Variable husuvgrid-CM2_mon not found in cmor table
+   ...
 
 `mopdb template` takes as input:
- * the output/s of `varlist` - To get one template for the all variable concatenate the output on `varlist` into one file first.
- * the access version to use as preferred
- * an optional alias, if omitted the varlist filename will be used. Based on the example: `map_ocnmon.csv` or `map_ocean.csv` if omitted.
+ * -f/--fpath : the path to the model output
+ * -m/--match : used to identify files' patterns. The tool will only add a list of variables for the same pattern once.
+ * -v/--version : the access version to use as preferred mapping. ESM1.5, CM2, OM2 and AUS2200 are currently available.
+ * -a/--alias : an optional alias, if omitted default names will be used for the output files. 
+
+Alternatively a list of variables can be created separately using the *varlist* command and this can be passed directly to template using the *fpath* option.
+
+   *mopdb template -f <varlist.csv> -v <access-version> -a <alias>*
 
 It produces a csv file with a list of all the variables from raw output mapped to cmip style variables. These mappings also take into account the frequency and include variables that can be potentially calculated with the listed fields. The console output lists these, as shown above.
  
@@ -51,18 +43,20 @@ The mappings can be different between different version and/or configurations of
 
 Starting with version 0.6 the list includes matches based on the standard_name, as these rows often list more than one option per field, it's important to either edit or remove these rows before using the mapping file. 
 The :doc:`Customing section <customising>` covers what to do for an experiment using a new configuration which is substantially different from the ones which are available.
+It also provides an intermediate varlist_<alias>.csv file that shows the information derived directly from the files. This can be useful to debug in case of issues with the mapping. This file is checked before the mapping step to make sure the tool has detected sensible frequency and realm, if the check fails the mapping won't proceed but the varlist file can be edited appropriately.
 
 .. warning:: 
    Always check that the resulting template is mapping the variables correctly. This is particularly true for derived variables. Comment lines are inserted to give some information on what assumptions were done for each group of mappings.
+   The se
 
 
-Step3: Set up the working environment 
+Step2: Set up the working environment 
 -------------------------------------
 
    *mop -c <conf_exp.yaml> setup*
 
 .. code-block:: console 
-
+https://climate-cms.org/posts/2023-05-31-vscode-are.html
    $ mop -c exp_conf.yaml setup
    Simulation to process: cy286
    Setting environment and creating working directory
diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst
index 32d712c..421f39c 100644
--- a/docs/mopdb_command.rst
+++ b/docs/mopdb_command.rst
@@ -54,29 +54,17 @@ e.g. use aus2200 for mappings related to the AUS2200 configuration:
 
 A user that wants to create a mapping table for another AUS2200 simulation can use this value to select appropriate mappings (see how to do that below).
 
-Get a list of variables from the model output
----------------------------------------------
+Create a mapping file
+---------------------
 .. code-block::
 
-    mopdb varlist -i <output-path> -d <start-date>
-
-this will create for each output file a list of variables with useful attributes
-These can be concatenated into one or used to create separate mappings.
-
-.. _varlist example:
-.. dropdown:: Example output of varlist
+This can be done by providing the model output path and a pattern to match or directly a varlist file
 
-   name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
-   fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature
-   fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity
-   fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature
-   fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask
-   fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction
-   ...
+From output path:
+  
+    mopdb template  -f <output-path> -m <string-to-match> -v <access-version>
 
-Create a mapping file starting from variable list
--------------------------------------------------
-.. code-block::
+From varlist file:
 
     mopdb template  -f <varlist-out> -v <access-version>
 
@@ -119,6 +107,29 @@ The other groups of records require checking, as either the version or the frequ
    ...
 
 
+Get a list of variables from the model output
+---------------------------------------------
+.. code-block::
+
+    mopdb varlist -f <output-path> -m <string-to-match>
+
+this will create a list of variables with useful attributes
+
+.. _varlist example:
+.. dropdown:: Example output of varlist
+
+   name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
+   #cw323a.pm
+   fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature
+   fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity
+   fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature
+   fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask
+   fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction
+   ...
+
+Doing this step separately can be useful if the model output is using a random directory structure, as it's more likely in such a case that important attributes like frequency and realm which are used for the mapping might be incorrect or missing. In such a case it might be more efficient processing different kind of files separately first, making sure frequency and realm are correct and then combining them into one file to pass to template.
+The template command will stop execution if detects potentially wrong values for these fields and save 
+
 Check which variables aren't yet defined
 ----------------------------------------
 .. code-block:: console
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index b0935ff..7a2e744 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -72,9 +72,11 @@ def map_args(f):
     constraints = [
         click.option('--fpath', '-f', type=str, required=True,
             callback=require_date,
-            help='Model output directory or varlist for the same'),
-        click.option('--startdate', '-d', type=str, required=False,
-            help='Start date of model run as YYYYMMDD'),
+            help=(''''Path for model output files. For "template"
+              command can also be file generated by varlist step''')),
+        click.option('--match', '-m', type=str, required=False,
+            help=('''String to match output files. Most often
+                the timestamp from one of the output files''')),
         click.option('--version', '-v', required=True,
             type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']),
             show_default=True,
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 7040270..90ba47e 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -196,6 +196,14 @@ def setup_env(ctx):
     else:
         cdict['tables_path'] = appdir / cdict['tables_path']
     cdict['ancils_path'] = appdir / cdict['ancils_path']
+    # conda env to run job
+    if cdict['conda_env'] == 'default':
+        cdict['conda_env'] = ''
+    else: 
+        path =  Path(cdict['conda_env'])
+        if not path.is_absolute():
+            path = appdir / path
+        cdict['conda_env'] = f"source {str(path)}"
     # Output subdirectories
     outpath = cdict['outpath']
     cdict['maps'] = outpath / "maps"
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 5418309..6313edd 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -53,13 +53,22 @@ def mop_catch():
         sys.exit(1)
 
 
+def mop_args(f):
+    """Define common click options
+    """
+    constraints = [
+        click.option('--debug', is_flag=True, default=False,
+            help="Show debug info"),
+        click.option('--cfile', '-c', type=str, required=True, 
+            help='Experiment configuration as yaml file')]
+    for c in reversed(constraints):
+        f = c(f)
+    return f
+
+
 @click.group(context_settings=dict(help_option_names=['-h', '--help']))
-@click.option('--cfile', '-c', type=str, required=True, 
-                help='Experiment configuration as yaml file')
-@click.option('--debug', is_flag=True, default=False,
-               help="Show debug info")
 @click.pass_context
-def mop(ctx, cfile, debug):
+def mop(ctx):
     """Main command with 2 sub-commands:
     - setup to setup the job to run
     - run to execute the post-processing
@@ -68,33 +77,39 @@ def mop(ctx, cfile, debug):
     ----------
     ctx : obj
         Click context object
+    """
+    #ctx.obj = {} 
+    pass
+
+
+@mop.command(name='run')
+@mop_args
+#@click.option('--cfile', '-c', type=str, required=True, 
+#                help='Experiment configuration as yaml file')
+@click.pass_context
+def mop_run(ctx, cfile, debug):
+    """Subcommand that executes the processing.
+
+    Use the configuration yaml file created in setup step as input.
+
+    Parameters
+    ----------
     cfile : str
         Name of yaml configuration file, run sub-command uses the 
         configuration created by setup
     debug : bool
         If true set logging level to debug
     """
+
+    # load config file
     with open(cfile, 'r') as yfile:
         cfg = yaml.safe_load(yfile)
     ctx.obj = cfg['cmor']
     ctx.obj['attrs'] = cfg['attrs']
-    # set up main mop log
-    if ctx.invoked_subcommand == 'setup':
-        mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO)
-    else:
-        mop_log = config_log(debug, ctx.obj['appdir'])
+    # set up logger
+    mop_log = config_log(debug, ctx.obj['appdir'])
     ctx.obj['debug'] = debug
     mop_log.info(f"Simulation to process: {ctx.obj['exp']}")
-
-
-@mop.command(name='run')
-@click.pass_context
-def mop_run(ctx):
-    """Subcommand that executes the processing.
-
-    Use the configuration yaml file created in setup step as input.
-    """
-    mop_log = logging.getLogger('mop_log')
     # Open database and retrieve list of files to create
     conn = db_connect(ctx.obj['database'])
     c = conn.cursor()
@@ -117,11 +132,12 @@ def mop_run(ctx):
     return
 
 
+@mop.command(name='setup')
+@mop_args
 @click.option('--update', is_flag=True, default=False,
                help="Update current settings, keeping db and logs")
-@mop.command(name='setup')
 @click.pass_context
-def mop_setup(ctx, update):
+def mop_setup(ctx, cfile, debug, update):
     """Setup of mopper processing job and working environment.
 
     * Defines and creates paths
@@ -131,8 +147,26 @@ def mop_setup(ctx, update):
     * creates/updates database filelist table to list files to create
     * finalises configuration and save in new yaml file
     * writes job executable file and submits (optional) to queue
+
+    Parameters
+    ----------
+    cfile : str
+        Name of yaml configuration file, run sub-command uses the 
+        configuration created by setup
+    debug : bool
+        If True set logging level to debug
+    update : bool
+        If True update current workding directory (default is False)
     """
-    mop_log = logging.getLogger('mop_log')
+
+    # load config file
+    with open(cfile, 'r') as yfile:
+        cfg = yaml.safe_load(yfile)
+    ctx.obj = cfg['cmor']
+    ctx.obj['attrs'] = cfg['attrs']
+    ctx.obj['debug'] = debug
+    # set up logger
+    mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO)
     # then add setup_env to config
     mop_log.info("Setting environment and creating working directory")
     ctx.obj['update'] = update
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index 68c60dd..e0341fa 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -700,8 +700,6 @@ def define_template(ctx, flag, nrows):
     cdict : dict
         Dictionary with cmor settings for experiment
     """
-    # temporarily removing this as it only works for conda envs
-    #{os.path.dirname(sys.executable)}/mop  -c {ctx.obj['exp']}_config.yaml run
     template = f"""#!/bin/bash
 #PBS -P {ctx.obj['project']}
 #PBS -q {ctx.obj['queue']}
@@ -717,9 +715,10 @@ def define_template(ctx, flag, nrows):
 # for a list of packages
 
 module use /g/data/hh5/public/modules
-module load conda/analysis3
+module load conda/analysis3-unstable
+{ctx.obj['conda_env']}
 
 cd {ctx.obj['appdir']}
-mop  -c {ctx.obj['exp']}_config.yaml run
+mop  run -c {ctx.obj['exp']}_config.yaml
 echo 'APP completed for exp {ctx.obj['exp']}.'"""
     return template

From f8b1a24a4b96d781f4fd5c33bf1a4a3d5d8e76ac Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 5 Jul 2024 16:40:28 +1000
Subject: [PATCH 03/10] minor fix to actions and solved #150

---
 .github/workflows/mopper-conda.yaml |  8 ++++----
 src/mopdb/mopdb.py                  | 18 +++++++++---------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 98ea1a7..b4ecaa5 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -1,11 +1,11 @@
-name: xmhw-conda-install-test
+name: mopper-conda-install-test
 
 #on: [push]
 on: 
   push:
     branches:
       - main
-      - newrelease
+      - prerelease
   pull_request:
     branches:
       - main
@@ -38,8 +38,8 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
         #    - name: Install package 
-      #      run: |
-              #        conda run python setup.py install
+            run: |
+                      conda build conda/meta.yaml
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 7a2e744..4a63bba 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -44,7 +44,7 @@ def mopdb_catch():
 
 
 def require_date(ctx, param, value):
-    """Changes startdate option in template command from optional to
+    """Changes match option in template command from optional to
     required if fpath is a directory.
     """
     if Path(value).is_dir():
@@ -295,7 +295,7 @@ def update_cmor(ctx, dbname, fname, alias):
 @mopdb.command(name='template')
 @map_args
 @click.pass_context
-def map_template(ctx, fpath, startdate, dbname, version, alias):
+def map_template(ctx, fpath, match, dbname, version, alias):
     """Writes a template of mapping file needed to run setup.
        First opens database and check if variables match any in
        mapping table. If not tries to partially match them.
@@ -311,7 +311,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias):
     fpath : str
         Path of csv input file with output variables to map or
         of directory containing output files to scan
-    startdate : str
+    match : str
         Date or other string to match to individuate one file per type
     dbname : str
         Database relative path (default is data/access.db)
@@ -331,7 +331,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias):
         fname = fpath.name
     else:
         mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
-        fname = model_vars(fpath, startdate, dbname, version, alias) 
+        fname = model_vars(fpath, match, dbname, version, alias) 
     if alias == '':
         alias = fname.split(".")[0]
     # connect to db, check first if db exists or exit 
@@ -420,13 +420,13 @@ def update_map(ctx, dbname, fname, alias):
 @mopdb.command(name='varlist')
 @map_args
 @click.pass_context
-def list_vars(ctx, fpath, startdate, dbname, version, alias):
+def list_vars(ctx, fpath, match, dbname, version, alias):
     """Calls model_vars to generate list of variables""" 
-    fname = model_vars(fpath, startdate, dbname, version, alias)
+    fname = model_vars(fpath, match, dbname, version, alias)
 
 
 @click.pass_context
-def model_vars(ctx, fpath, startdate, dbname, version, alias):
+def model_vars(ctx, fpath, match, dbname, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
 
@@ -436,7 +436,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias):
         Click context object
     fpath : str
         Path for model output files
-    startdate : str
+    match : str
         Date or other string to match to individuate one file per type
     dbname : str
         Database relative path (default is data/access.db)
@@ -457,7 +457,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias):
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
     conn = db_connect(dbname)
-    fname = write_varlist(conn, fpath, startdate, version, alias)
+    fname = write_varlist(conn, fpath, match, version, alias)
     conn.close()
     return fname
 

From ab3cd299d3604d3d3fe559905363cfe4f254cd52 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Sat, 6 Jul 2024 18:31:26 +1000
Subject: [PATCH 04/10] progress in detecting relam and frequency

---
 src/mopdb/mopdb_utils.py | 84 ++++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index b9e0c4d..f723025 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -426,50 +426,67 @@ def delete_record(conn, table, col, pairs):
 def list_files(indir, match):
     """Returns list of files matching input directory and match"""
     mopdb_log = logging.getLogger('mopdb_log')
-    files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()]
-    mopdb_log.debug(f"{indir}/**/*{match}*")
+    mopdb_log.debug(f"Pattern to list files: {indir}/**/*{match}*")
+    files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()
+        and  '.nc' in str(x)]
+    files.sort(key=lambda x:x.name)
+    mopdb_log.debug(f"Files after sorting: {files}")
     return files
 
 
-def build_umfrq(time_axs, ds):
+def get_file_frq(ds, fnext):
     """Return a dictionary with frequency for each time axis.
 
     Frequency is inferred by comparing interval between two consecutive
     timesteps with expected interval at a given frequency.
     Order time_axis so ones with only one step are last, so we can use 
     file frequency (interval_file) inferred from other time axes.
+    This is called if there are more than one time axis in file 
+    (usually only UM) or if frequency can be guessed from filename.
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    umfrq = {}
+    frq = {}
     int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
                'day': 1.0, '6hr': 0.25, '3hr': 0.125,
                '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944}
+    # retrieve all time axes
+    time_axs = [d for d in ds.dims if 'time' in d]
+    time_axs_len = set(len(ds[d]) for d in time_axs)
     time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
-    mopdb_log.debug(f"in build_umfrq, time_axs: {time_axs}")
+    mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}")
+    max_len = len(ds[time_axs[0]]) 
+    # if all time axes have only 1 timestep we cannot infer frequency
+    # so we open also next file but get only time axs
+    if max_len == 1:
+        dsnext = xr.open_dataset(fnext, decode_times = False)
+        time_axs2 = [d for d in dsnext.dims if 'time' in d]
+        ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
+        time_axs = [d for d in ds.dims if 'time' in d]
+        time_axs_len = set(len(ds[d]) for d in time_axs)
+        time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
     for t in time_axs: 
         mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
         if len(ds[t]) > 1:
-            interval = (ds[t][1]-ds[t][0]).values / np.timedelta64(1, 'D')
-#astype('timedelta64[m]') / 1440.0
-            interval_file = (ds[t][-1] -ds[t][0]).values / np.timedelta64(1, 'D')
+            interval = (ds[t][1]-ds[t][0]).values #/ np.timedelta64(1, 'D')
+            interval_file = (ds[t][-1] -ds[t][0]).values #/ np.timedelta64(1, 'D')
         else:
             interval = interval_file
         mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
-        mopdb_log.debug(f"interval entire file {t}: {interval_file}")
+        #mopdb_log.debug(f"interval entire file {t}: {interval_file}")
         for k,v in int2frq.items():
             if math.isclose(interval, v, rel_tol=0.05):
-                umfrq[t] = k
+                frq[t] = k
                 break
-    return umfrq
+    return frq
 
 
-def get_frequency(realm, fname, ds):
+def get_frequency(realm, fname, ds, fnext):
     """Return frequency based on realm and filename
     For UM files checks if more than one time axis is present and if so
     returns dictionary with frequency: variable list
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    umfrq = {} 
+    frq_dict = {} 
     frequency = 'NAfrq'
     if realm == 'atmos':
         fbits = fname.split("_")
@@ -479,14 +496,8 @@ def get_frequency(realm, fname, ds):
             frequency = fix_frq[frequency]
         else:
             frequency = frequency.replace('hPt', 'hrPt')
-        # retrieve all time axes and check their frequency
-        time_axs = [d for d in ds.dims if 'time' in d]
-        time_axs_len = set(len(ds[d]) for d in time_axs)
-        if len(time_axs_len) == 1:
-            umfrq = {}
-        else:
-            umfrq = build_umfrq(time_axs, ds)
-        mopdb_log.debug(f"umfrq: {umfrq}")
+        frq_dict = get_file_frq(ds, fnext)
+        mopdb_log.debug(f"frq_dict: {frq_dict}")
     elif realm == 'ocean':
         # if I found scalar or monthly in any of fbits 
         if any(x in fname for x in ['scalar', 'month']):
@@ -498,8 +509,13 @@ def get_frequency(realm, fname, ds):
             frequency = 'mon'
         elif '_d.' in fname:
             frequency = 'day'
+    if frequency == 'NAfrq':
+        frq_dict = get_file_frq(ds, fnext)
+        # if only one frequency detected empty dict
+        if len(frq_dict) == 1:
+            frequency = frq_dict.popitem()[1]
     mopdb_log.debug(f"Frequency: {frequency}")
-    return frequency, umfrq
+    return frequency, frq_dict
 
 
 def get_cell_methods(attrs, dims):
@@ -523,15 +539,13 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def write_varlist(conn, indir, startdate, version, alias):
+def write_varlist(conn, indir, match, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
        for each variable
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    sdate = f"*{startdate}*"
-    files = list_files(indir, sdate)
-    mopdb_log.debug(f"Found files: {files}")
+    files = list_files(indir, f"*{match}*")
     patterns = []
     if alias == '':
         alias = 'mopdb'
@@ -541,10 +555,10 @@ def write_varlist(conn, indir, startdate, version, alias):
     fwriter.writerow(["name", "cmor_var", "units", "dimensions",
         "frequency", "realm", "cell_methods", "cmor_table", "vtype",
         "size", "nsteps", "filename", "long_name", "standard_name"])
-    for fpath in files:
+    for i, fpath in enumerate(files):
         # get filename pattern until date match
         mopdb_log.debug(f"Filename: {fpath.name}")
-        fpattern = fpath.name.split(startdate)[0]
+        fpattern = fpath.name.split(match)[0]
         # adding this in case we have a mix of yyyy/yyyymn date stamps 
         # as then a user would have to pass yyyy only and would get 12 files for some of the patterns
         if fpattern in patterns:
@@ -555,10 +569,12 @@ def write_varlist(conn, indir, startdate, version, alias):
         mopdb_log.debug(f"File pattern: {fpattern}")
         fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
-        realm = get_realm(fpath, version)
-        ds = xr.open_dataset(fpath, decode_times=False)
+        ds = xr.open_dataset(str(pattern_list[0]), decode_times=False)
+        realm = get_realm(fpath, version, ds)
         coords = [c for c in ds.coords] + ['latitude_longitude']
-        frequency, umfrq = get_frequency(realm, fpath.name, ds)
+        #pass next file in case of 1 timestep per file and no frq in name
+        fnext = str(pattern_list[1])
+        frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext)
         multiple_frq = False
         if umfrq != {}:
             multiple_frq = True
@@ -569,7 +585,7 @@ def write_varlist(conn, indir, startdate, version, alias):
                 mopdb_log.debug(f"Variable: {v.name}")
                 # get size in bytes of grid for 1 timestep and number of timesteps
                 vsize = v[0].nbytes
-                nsteps = nfiles * v.shape[0]
+                nsteps = nfiles * v.shape[0]/2
                 # assign specific frequency if more than one is available
                 if multiple_frq:
                     if 'time' in v.dims[0]:
@@ -969,7 +985,7 @@ def check_realm_units(conn, var):
     return var 
        
 
-def get_realm(fpath, version):
+def get_realm(fpath, version, ds):
     '''Return realm for variable in files or NArealm'''
 
     mopdb_log = logging.getLogger('mopdb_log')
@@ -978,7 +994,7 @@ def get_realm(fpath, version):
     else:
         realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
                  if x in fpath.parts][0]
-    if realm == 'atm':
+    if realm == 'atm' or 'um_version' in ds.attrs.keys():
         realm = 'atmos'
     elif realm == 'ocn':
         realm = 'ocean'

From d24bea219d88ca30cb3b0f2be9675154215b9be6 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Mon, 8 Jul 2024 12:36:52 +1000
Subject: [PATCH 05/10] removed unneccessary adjustment to variable size from
 mopdb_utils.py

---
 src/mopdb/mopdb_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index f723025..18bc48f 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -585,7 +585,7 @@ def write_varlist(conn, indir, match, version, alias):
                 mopdb_log.debug(f"Variable: {v.name}")
                 # get size in bytes of grid for 1 timestep and number of timesteps
                 vsize = v[0].nbytes
-                nsteps = nfiles * v.shape[0]/2
+                nsteps = nfiles * v.shape[0]
                 # assign specific frequency if more than one is available
                 if multiple_frq:
                     if 'time' in v.dims[0]:

From 704d607e104abd0e6a2f630107e8e9dff515b4e6 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 15:12:23 +1000
Subject: [PATCH 06/10] minor adjustment to conftest and action

---
 .github/workflows/mopper-conda.yaml |  2 +-
 src/mopdb/mopdb_utils.py            | 10 ++++++++++
 tests/conftest.py                   |  9 ---------
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index b4ecaa5..c232518 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -4,11 +4,11 @@ name: mopper-conda-install-test
 on: 
   push:
     branches:
-      - main
       - prerelease
   pull_request:
     branches:
       - main
+      - prerelease
 
 
 jobs:
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 18bc48f..a14ca5e 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -539,6 +539,15 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
+def identify_patterns(files):
+    """Return patterns of files
+    """
+    i = 0
+    while present is True:
+
+
+    return patterns
+
 def write_varlist(conn, indir, match, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
@@ -546,6 +555,7 @@ def write_varlist(conn, indir, match, version, alias):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     files = list_files(indir, f"*{match}*")
+    patterns = identify_patterns(files)
     patterns = []
     if alias == '':
         alias = 'mopdb'
diff --git a/tests/conftest.py b/tests/conftest.py
index 9f2f190..9a60849 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,15 +30,6 @@
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
 TESTS_DATA = os.path.join(TESTS_HOME, "testdata")
 
-# setting up loggers for both mopdb and mop
-@pytest.fixture
-def moplog():
-    return logging.getLogger('mop_log')
-
-
-@pytest.fixture
-def mopdblog():
-    return logging.getLogger('mopdb_log')
 
 # setting up fixtures for databases:a ccess.db and mopper.db
 @pytest.fixture

From 7c45897cf8403591e1449289ae522a967a938c61 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:17:25 +1000
Subject: [PATCH 07/10] some imporvements to tests

---
 tests/conftest.py             | 16 ++++++++++++----
 tests/test_calculations.py    |  8 ++++----
 tests/test_mop_utils.py       | 22 +++++++++++-----------
 tests/test_mopdb.py           |  6 +++---
 tests/test_mopdb_utils.py     |  7 +++----
 tests/testdata/varlist_ex.csv |  3 +++
 6 files changed, 36 insertions(+), 26 deletions(-)
 create mode 100644 tests/testdata/varlist_ex.csv

diff --git a/tests/conftest.py b/tests/conftest.py
index 9a60849..0dd6c56 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -23,6 +23,7 @@
 import pandas as pd
 import datetime
 import logging
+import csv
 from mopdb.mopdb_utils import mapping_sql, cmorvar_sql
 from mopper.setup_utils import filelist_sql
 
@@ -72,12 +73,19 @@ def test_check_timestamp(caplog):
 
 @pytest.fixture
 def varlist_rows():
-    lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature",
-    "fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;AUS2200_A1hr;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction",
-"fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu"]
-    rows = [l.split(";") for l in lines]
+    # read list of vars from iexample file
+    with open('testdata/varlist_ex.csv', 'r') as csvfile:
+        reader = csv.DictReader(csvfile, delimiter=';')
+        rows = list(reader)
     return rows
 
+@pytest.fixture
+def matches():
+    matches = [("tas", "fld_s03i236", "", "1hr", "atmos", "AUS2200", "AUS2200_A1hr", "", "K"),
+        ("siconca", "fld_s00i031", "", "mon", "ocean", "CM2", "CMIP6_OImon", "", "1"), 
+        ("hfls", "fld_s03i234", "", "mon", "atmos", "CM2", "CMIP6_Amon", "up", "W/m2")]
+    return matches
+
 @pytest.fixture
 def add_var_out():
     vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': ''
diff --git a/tests/test_calculations.py b/tests/test_calculations.py
index dcd6398..9037b53 100644
--- a/tests/test_calculations.py
+++ b/tests/test_calculations.py
@@ -23,10 +23,9 @@
 import logging
 from mopper.calculations import *
 
-logger = logging.getLogger('var_log')
 ctx = click.Context(click.Command('cmd'),
     obj={'sel_start': '198302170600', 'sel_end': '198302181300',
-         'realm': 'atmos', 'frequency': '1hr', 'var_log': logger})
+         'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'})
 
 def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
 
@@ -68,8 +67,9 @@ def test_calc_topsoil():
     xrtest.assert_allclose(out, expected, rtol=1e-05) 
 
 
-def test_overturn_stream():
-    global ctx, logger
+def test_overturn_stream(caplog):
+    global ctx
+    caplog.set_level(logging.DEBUG, logger='varlog_1')
     # set up input
     dims = ['time', 'depth', 'lat', 'lon']
     time = pd.date_range("2014-09-06", periods=1)
diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py
index f177f21..4889274 100644
--- a/tests/test_mop_utils.py
+++ b/tests/test_mop_utils.py
@@ -19,7 +19,6 @@
 import numpy as np
 import pandas as pd
 from mopper.mop_utils import *
-from conftest import moplog
 
 #try:
 #    import unittest.mock as mock
@@ -28,24 +27,24 @@
 
 ctx = click.Context(click.Command('cmd'),
     obj={'sel_start': '198302170600', 'sel_end': '198302181300',
-         'realm': 'atmos', 'frequency': '1hr'})
-#logger = logging.getLogger('mop_log')
+         'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'})
 
-def test_check_timestamp(caplog, ctx):
-    moplog.set_level(logging.DEBUG)#, logger='mop_log')
+def test_check_timestamp(caplog):
+    global ctx
+    caplog.set_level(logging.DEBUG, logger='mop_log')
+    caplog.set_level(logging.DEBUG, logger='varlog_1')
     # test atmos files
     files = [f'obj_198302{d}T{str(h).zfill(2)}01_1hr.nc' for d in ['17','18','19']
              for h in range(24)] 
-    print(files)
     inrange = files[6:37]
     with ctx:
-            out1 = check_timestamp(files, logger)
+            out1 = check_timestamp(files)
     assert out1 == inrange
     # get only first file is frequency is fx
     ctx.obj['frequency'] = 'fx'
     inrange = [files[0]]
     with ctx:
-            out2 = check_timestamp(files, logger)
+            out2 = check_timestamp(files)
     assert out2 == inrange
     # test ocn files
     ctx.obj['frequency'] = 'day'
@@ -53,12 +52,13 @@ def test_check_timestamp(caplog, ctx):
     files = [f'ocn_daily.nc-198302{str(d).zfill(2)}' for d in range(1,29)] 
     inrange = files[16:18]
     with ctx:
-            out3 = check_timestamp(files, logger)
+            out3 = check_timestamp(files)
     assert out3 == inrange
 
 
-def test_get_cmorname(caplog, ctx):
-    caplog.set_level(logging.DEBUG)#, logger='mop_log')
+def test_get_cmorname(caplog):
+    global ctx
+    caplog.set_level(logging.DEBUG, logger='mop_log')
     # axis_name t
     ctx.obj['calculation'] = "plevinterp(var[0], var[1], 24)"
     ctx.obj['variable_id'] = "ta24"
diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py
index 0eddc58..e570fdb 100644
--- a/tests/test_mopdb.py
+++ b/tests/test_mopdb.py
@@ -30,7 +30,7 @@ def test_mopdb(command, subcommand, runner):
         result = runner.invoke(mopdb, [subcommand, '--help'])
         assert result.exit_code == 0
 
-@pytest.mark.usefixtures("setup_db") # 1
+@pytest.mark.usefixtures("setup_access_db") # 1
 def test_template(session):
 
     runner = CliRunner()
@@ -45,8 +45,8 @@ def test_template(session):
 
         result = runner.invoke(mopdb, ['template', '-f varlist.txt', '-vCM2'])
         #assert result.exit_code == 0
-        assert 'Opened database successfully' in result.output
-        assert 'Definable cmip var' in result.output 
+        assert 'Opened database ' in result.output
+        #assert 'Definable cmip var' in result.output 
 #Pass temp_dir to control where the temporary directory is created. The directory will not be removed by Click in this case. This is useful to integrate with a framework like Pytest that manages temporary files.
 
 #def test_keep_dir(tmp_path):
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index 9737c52..ebc8be0 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -27,12 +27,11 @@
 
 
 @pytest.mark.parametrize('idx', [0,1,2])
-def test_add_var(varlist_rows, idx, caplog):
+def test_add_var(varlist_rows, matches, idx, caplog):
     caplog.set_level(logging.DEBUG, logger='mopdb_log')
     vlist = []
-    match = [("tas", "", "K"),  ("siconca", "", ""), ("hfls", "", "")]
-    vlist = add_var(vlist, varlist_rows[idx], match[idx])
-    assert vlist[idx]['cmor_var'] == match[idx][0] 
+    vlist = add_var(vlist, varlist_rows[idx], matches[idx])
+    assert vlist[0]['cmor_var'] == matches[idx][0] 
 
 
 def test_build_umfrq(um_multi_time, caplog):
diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv
new file mode 100644
index 0000000..154729f
--- /dev/null
+++ b/tests/testdata/varlist_ex.csv
@@ -0,0 +1,3 @@
+name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
+fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature
+fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction                                                                                                     fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu 

From 63f3b380ede007c88b08b646d4f0a9eae73a6122 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:30:57 +1000
Subject: [PATCH 08/10] updated install instruction to current situation in
 docs

---
 docs/overview.rst | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/docs/overview.rst b/docs/overview.rst
index 908db06..f074224 100644
--- a/docs/overview.rst
+++ b/docs/overview.rst
@@ -1,25 +1,16 @@
 Install
 =======
 
-You can install the latest version of `mopper` directly from conda (accessnri channel)::
+We are planning to release ACCESS-MOPPeR in conda soon and then it will be available at NCI on our conda environments.
+In the meantime, you can icreate a custom conda environment and install mopper following these steps:
 
-   conda install -c accessnri mopper 
+1. module load conda/analysis3
+2. python -m venv mopper_env --system-site-packages
+3. source  <path-to-mopper-env>/mopper_env/bin/activate
+4. pip install git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@main
+ 
+The source command will activate the conda env you just created.
+Any time you want to use the tool in a new session repeat the first and third steps.
 
-If you want to install an unstable version or a different branch:
+The `pip` command above will install from the main branch, you can also indicate a different branch.
 
-    * git clone 
-    * git checkout <branch-name>   (if installing a a different branch from master)
-    * cd mopper 
-    * pip install ./ 
-      use --user flag if you want to install it in ~/.local
-
-Working on the NCI server
--------------------------
-
-MOPPeR is pre-installed into a Conda environment at NCI. Load it with::
-
-    module use /g/data3/hh5/public/modules
-    module load conda/analysis3-unstable
-
-.. note::
-   You need to be a member of the hh5 project to load the modules.

From fc8bb02d7688e7b0671981bba099a73a0b81e320 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:43:04 +1000
Subject: [PATCH 09/10] removed partial pattern function

---
 src/mopdb/mopdb_utils.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index a14ca5e..80565b4 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -261,7 +261,7 @@ def query(conn, sql, tup=(), first=True):
 
 
 def get_columns(conn, table):
-    """Gets list of columns form db table
+    """Gets list of columns from db table
     """
     mopdb_log = logging.getLogger('mopdb_log')
     sql = f'PRAGMA table_info({table});'
@@ -539,15 +539,6 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def identify_patterns(files):
-    """Return patterns of files
-    """
-    i = 0
-    while present is True:
-
-
-    return patterns
-
 def write_varlist(conn, indir, match, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided

From cacdd9570acb0526012880c1c7348ab89665381d Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:46:03 +1000
Subject: [PATCH 10/10] removed partial pattern function 2

---
 src/mopdb/mopdb_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 80565b4..b9875c7 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -546,7 +546,6 @@ def write_varlist(conn, indir, match, version, alias):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     files = list_files(indir, f"*{match}*")
-    patterns = identify_patterns(files)
     patterns = []
     if alias == '':
         alias = 'mopdb'