Skip to content

Commit

Permalink
Improve carryover dependency handling
Browse files Browse the repository at this point in the history
  • Loading branch information
forsyth2 committed Oct 16, 2024
1 parent d5c8005 commit 5d388cf
Show file tree
Hide file tree
Showing 5 changed files with 390 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# Test carryover dependency handling

# Things to check for in the output:
# 1. Check that `grep -v "OK" *status returns nothing (i.e., all jobs worked)
# 2. Check that .settings files include `dependencies` listed out,
# using `grep -A 5 "'dependencies'" *settings`.
# (Change the -A number to include enough lines to see the entire dependencies list)
# Check that these look correct.


# These tasks can have carryover dependencies:
# 1. mpas_analysis.py

# These tasks have dependencies, but no carryover:
# 1. e3sm_diags.py
# 2. global_time_series.py
# 3. ilamb.py
# 4. tc_analysis.py

# These tasks don't have dependencies:
# 1. climo.py
# 2. ts.py

[default]
case = "v3.LR.historical_0051"
constraint = ""
dry_run = "False"
environment_commands = ""
input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051
input_subdir = archive/atm/hist
mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc"
output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_carryover_dependencies_output/unique_id/v3.LR.historical_0051"
partition = "debug"
qos = "regular"
www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_carryover_dependencies_www/unique_id"
years = "1985:1989:2",

[climo]
active = True
walltime = "00:30:00"

[[ atm_monthly_180x360_aave ]]
frequency = "monthly"
input_files = "eam.h0"
input_subdir = "archive/atm/hist"
vars = ""

[[ atm_monthly_diurnal_8xdaily_180x360_aave ]]
frequency = "diurnal_8xdaily"
input_files = "eam.h3"
input_subdir = "archive/atm/hist"
vars = "PRECT"

[[ land_monthly_climo ]]
frequency = "monthly"
input_files = "elm.h0"
input_subdir = "archive/lnd/hist"
mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc"
vars = ""

[ts]
active = True
e3sm_to_cmip_environment_commands = ""
walltime = "00:30:00"

[[ atm_monthly_180x360_aave ]]
frequency = "monthly"
input_files = "eam.h0"
input_subdir = "archive/atm/hist"
ts_fmt = "cmip"

[[ rof_monthly ]]
extra_vars = 'areatotal2'
frequency = "monthly"
input_files = "mosart.h0"
input_subdir = "archive/rof/hist"
mapping_file = ""
vars = "RIVER_DISCHARGE_OVER_LAND_LIQ"

[[ atm_monthly_glb ]]
# Note global average won't work for 3D variables.
frequency = "monthly"
input_files = "eam.h0"
input_subdir = "archive/atm/hist"
mapping_file = "glb"
years = "1985:1995:5",

[[ lnd_monthly_glb ]]
frequency = "monthly"
input_files = "elm.h0"
input_subdir = "archive/lnd/hist"
mapping_file = "glb"
vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR"
years = "1985:1995:5",

[[ land_monthly ]]
extra_vars = "landfrac"
frequency = "monthly"
input_files = "elm.h0"
input_subdir = "archive/lnd/hist"
mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc"
ts_fmt = "cmip"
vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR"

# TODO: Add "tc_analysis" back in after empty dat is resolved.
# [tc_analysis]
# # The second run should run in parallel with the first run.
# active = True
# scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_carryover_dependencies_scratch/unique_id/v3.LR.historical_0051"
# walltime = "00:30:00"

[e3sm_diags]
active = True
environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015"
grid = '180x360_aave'
multiprocessing = True
num_workers = 8
partition = "compute"
qos = "regular"
ref_final_yr = 1986
ref_start_yr = 1985
ref_years = "1985-1986",
short_name = "v3.LR.historical_0051"
ts_num_years = 2
walltime = "5:00:00"
years = "1987:1989:2"

[[ atm_monthly_180x360_aave ]]
# TODO: Add "tc_analysis" back in after empty dat is resolved.
# This task will depend on the tc_analysis task
climo_diurnal_frequency = "diurnal_8xdaily"
climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave"
climo_subsection = "atm_monthly_180x360_aave"
dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology'
# TODO: Add "tc_analysis" back in after empty dat is resolved.
#sets = "lat_lon","tc_analysis"
sets = "lat_lon",

[[ atm_monthly_180x360_aave_mvm ]]
# This task will not depend on the tc_analysis task
# Test model-vs-model using the same files as the reference
climo_subsection = "atm_monthly_180x360_aave"
diff_title = "Difference"
partition = "compute"
qos = "regular"
ref_name = "v3.LR.historical_0051"
reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_carryover_dependencies_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim"
run_type = "model_vs_model"
sets = "lat_lon",
short_ref_name = "same simulation"
swap_test_ref = False
tag = "model_vs_model"
ts_num_years_ref = 2
ts_subsection = "atm_monthly_180x360_aave"

[mpas_analysis]
# The second run should depend on the first run.
active = True
anomalyRefYear = 1985
climo_years = "1985-1989", "1990-1995",
enso_years = "1985-1989", "1990-1995",
mesh = "IcoswISC30E3r5"
parallelTaskCount = 6
partition = "compute"
qos = "regular"
shortTermArchive = True
ts_years = "1985-1989", "1985-1995",
walltime = "00:30:00"

[global_time_series]
active = True
climo_years = "1985-1989", "1990-1995",
experiment_name = "v3.LR.historical_0051"
figstr = "v3.LR.historical_0051"
moc_file=mocTimeSeries_1985-1995.nc
plots_lnd = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR"
ts_num_years = 5
ts_years = "1985-1989", "1985-1995",
walltime = "00:30:00"
years = "1985-1995",

[ilamb]
active = True
nodes = 8
partition = "compute"
short_name = "v3.LR.historical_0051"
ts_num_years = 2
walltime = "2:00:00"
years = "1985:1989:4"
189 changes: 189 additions & 0 deletions tests/integration/template_min_case_carryover_dependencies.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# Test carryover dependency handling

# Things to check for in the output:
# 1. Check that `grep -v "OK" *status returns nothing (i.e., all jobs worked)
# 2. Check that .settings files include `dependencies` listed out,
# using `grep -A 5 "'dependencies'" *settings`.
# (Change the -A number to include enough lines to see the entire dependencies list)
# Check that these look correct.


# These tasks can have carryover dependencies:
# 1. mpas_analysis.py

# These tasks have dependencies, but no carryover:
# 1. e3sm_diags.py
# 2. global_time_series.py
# 3. ilamb.py
# 4. tc_analysis.py

# These tasks don't have dependencies:
# 1. climo.py
# 2. ts.py

[default]
case = "#expand case_name#"
constraint = "#expand constraint#"
dry_run = "#expand dry_run#"
environment_commands = "#expand environment_commands#"
input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name#
input_subdir = archive/atm/hist
mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc"
output = "#expand user_output#zppy_min_case_carryover_dependencies_output/#expand unique_id#/#expand case_name#"
partition = "#expand partition_short#"
qos = "#expand qos_short#"
www = "#expand user_www#zppy_min_case_carryover_dependencies_www/#expand unique_id#"
years = "1985:1989:2",

[climo]
active = True
walltime = "00:30:00"

[[ atm_monthly_180x360_aave ]]
frequency = "monthly"
input_files = "eam.h0"
input_subdir = "archive/atm/hist"
vars = ""

[[ atm_monthly_diurnal_8xdaily_180x360_aave ]]
frequency = "diurnal_8xdaily"
input_files = "eam.h3"
input_subdir = "archive/atm/hist"
vars = "PRECT"

[[ land_monthly_climo ]]
frequency = "monthly"
input_files = "elm.h0"
input_subdir = "archive/lnd/hist"
mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc"
vars = ""

[ts]
active = True
e3sm_to_cmip_environment_commands = "#expand e3sm_to_cmip_environment_commands#"
walltime = "00:30:00"

[[ atm_monthly_180x360_aave ]]
frequency = "monthly"
input_files = "eam.h0"
input_subdir = "archive/atm/hist"
ts_fmt = "cmip"

[[ rof_monthly ]]
extra_vars = 'areatotal2'
frequency = "monthly"
input_files = "mosart.h0"
input_subdir = "archive/rof/hist"
mapping_file = ""
vars = "RIVER_DISCHARGE_OVER_LAND_LIQ"

[[ atm_monthly_glb ]]
# Note global average won't work for 3D variables.
frequency = "monthly"
input_files = "eam.h0"
input_subdir = "archive/atm/hist"
mapping_file = "glb"
years = "1985:1995:5",

[[ lnd_monthly_glb ]]
frequency = "monthly"
input_files = "elm.h0"
input_subdir = "archive/lnd/hist"
mapping_file = "glb"
vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR"
years = "1985:1995:5",

[[ land_monthly ]]
extra_vars = "landfrac"
frequency = "monthly"
input_files = "elm.h0"
input_subdir = "archive/lnd/hist"
mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc"
ts_fmt = "cmip"
vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR"

# TODO: Add "tc_analysis" back in after empty dat is resolved.
# [tc_analysis]
# # The second run should run in parallel with the first run.
# active = True
# scratch = "#expand scratch#zppy_min_case_carryover_dependencies_scratch/#expand unique_id#/#expand case_name#"
# walltime = "00:30:00"

[e3sm_diags]
active = True
environment_commands = "#expand diags_environment_commands#"
grid = '180x360_aave'
multiprocessing = True
num_workers = 8
partition = "#expand partition_long#"
qos = "#expand qos_long#"
ref_final_yr = 1986
ref_start_yr = 1985
ref_years = "1985-1986",
short_name = "#expand case_name#"
ts_num_years = 2
walltime = "#expand diags_walltime#"
years = "1987:1989:2"

[[ atm_monthly_180x360_aave ]]
# TODO: Add "tc_analysis" back in after empty dat is resolved.
# This task will depend on the tc_analysis task
climo_diurnal_frequency = "diurnal_8xdaily"
climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave"
climo_subsection = "atm_monthly_180x360_aave"
dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology'
# TODO: Add "tc_analysis" back in after empty dat is resolved.
#sets = "lat_lon","tc_analysis"
sets = "lat_lon",

[[ atm_monthly_180x360_aave_mvm ]]
# This task will not depend on the tc_analysis task
# Test model-vs-model using the same files as the reference
climo_subsection = "atm_monthly_180x360_aave"
diff_title = "Difference"
partition = "#expand partition_long#"
qos = "#expand qos_long#"
ref_name = "#expand case_name#"
reference_data_path = "#expand user_output#zppy_min_case_carryover_dependencies_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim"
run_type = "model_vs_model"
sets = "lat_lon",
short_ref_name = "same simulation"
swap_test_ref = False
tag = "model_vs_model"
ts_num_years_ref = 2
ts_subsection = "atm_monthly_180x360_aave"

[mpas_analysis]
# The second run should depend on the first run.
active = True
anomalyRefYear = 1985
climo_years = "1985-1989", "1990-1995",
enso_years = "1985-1989", "1990-1995",
mesh = "IcoswISC30E3r5"
parallelTaskCount = 6
partition = "#expand partition_long#"
qos = "#expand qos_long#"
shortTermArchive = True
ts_years = "1985-1989", "1985-1995",
walltime = "#expand mpas_analysis_walltime#"

[global_time_series]
active = True
climo_years = "1985-1989", "1990-1995",
experiment_name = "#expand case_name#"
figstr = "#expand case_name#"
moc_file=mocTimeSeries_1985-1995.nc
plots_lnd = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR"
ts_num_years = 5
ts_years = "1985-1989", "1985-1995",
walltime = "00:30:00"
years = "1985-1995",

[ilamb]
active = True
nodes = 8
partition = "#expand partition_long#"
short_name = "#expand case_name#"
ts_num_years = 2
walltime = "2:00:00"
years = "1985:1989:4"
1 change: 1 addition & 0 deletions tests/integration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ def generate_cfgs(unified_testing=False, dry_run=False):

cfg_names = [
"min_case_add_dependencies",
"min_case_carryover_dependencies",
"min_case_tc_analysis_simultaneous_1",
"min_case_tc_analysis_simultaneous_2",
"min_case_e3sm_diags_depend_on_climo_mvm_1",
Expand Down
4 changes: 3 additions & 1 deletion zppy/ilamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def ilamb(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file):
return existing_bundles

# --- Generate and submit ilamb scripts ---
dependencies: List[str] = []
for c in tasks:

dependencies: List[str] = []

if "ts_num_years" in c.keys():
c["ts_num_years"] = int(c["ts_num_years"])
# Loop over year sets
Expand Down
Loading

0 comments on commit 5d388cf

Please sign in to comment.