From 5d388cf6addebdb147a7ef9a539e871e508540e6 Mon Sep 17 00:00:00 2001 From: Ryan Forsyth Date: Mon, 23 Sep 2024 15:15:50 -0500 Subject: [PATCH] Improve carryover dependency handling --- ..._case_carryover_dependencies_chrysalis.cfg | 189 ++++++++++++++++++ ...mplate_min_case_carryover_dependencies.cfg | 189 ++++++++++++++++++ tests/integration/utils.py | 1 + zppy/ilamb.py | 4 +- zppy/mpas_analysis.py | 11 +- 5 files changed, 390 insertions(+), 4 deletions(-) create mode 100644 tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg create mode 100644 tests/integration/template_min_case_carryover_dependencies.cfg diff --git a/tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg b/tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg new file mode 100644 index 00000000..721faaf9 --- /dev/null +++ b/tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg @@ -0,0 +1,189 @@ +# Test carryover dependency handling + +# Things to check for in the output: +# 1. Check that `grep -v "OK" *status returns nothing (i.e., all jobs worked) +# 2. Check that .settings files include `dependencies` listed out, +# using `grep -A 5 "'dependencies'" *settings`. +# (Change the -A number to include enough lines to see the entire dependencies list) +# Check that these look correct. + + +# These tasks can have carryover dependencies: +# 1. mpas_analysis.py + +# These tasks have dependencies, but no carryover: +# 1. e3sm_diags.py +# 2. global_time_series.py +# 3. ilamb.py +# 4. tc_analysis.py + +# These tasks don't have dependencies: +# 1. climo.py +# 2. ts.py + +[default] +case = "v3.LR.historical_0051" +constraint = "" +dry_run = "False" +environment_commands = "" +input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input_subdir = archive/atm/hist +mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_carryover_dependencies_output/unique_id/v3.LR.historical_0051" +partition = "debug" +qos = "regular" +www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_carryover_dependencies_www/unique_id" +years = "1985:1989:2", + +[climo] +active = True +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + vars = "" + + [[ atm_monthly_diurnal_8xdaily_180x360_aave ]] + frequency = "diurnal_8xdaily" + input_files = "eam.h3" + input_subdir = "archive/atm/hist" + vars = "PRECT" + + [[ land_monthly_climo ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + vars = "" + +[ts] +active = True +e3sm_to_cmip_environment_commands = "" +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + ts_fmt = "cmip" + + [[ rof_monthly ]] + extra_vars = 'areatotal2' + frequency = "monthly" + input_files = "mosart.h0" + input_subdir = "archive/rof/hist" + mapping_file = "" + vars = "RIVER_DISCHARGE_OVER_LAND_LIQ" + + [[ atm_monthly_glb ]] + # Note global average won't work for 3D variables. + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + mapping_file = "glb" + years = "1985:1995:5", + + [[ lnd_monthly_glb ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "glb" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + years = "1985:1995:5", + + [[ land_monthly ]] + extra_vars = "landfrac" + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + ts_fmt = "cmip" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# # The second run should run in parallel with the first run. +# active = True +# scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_carryover_dependencies_scratch/unique_id/v3.LR.historical_0051" +# walltime = "00:30:00" + +[e3sm_diags] +active = True +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" +grid = '180x360_aave' +multiprocessing = True +num_workers = 8 +partition = "compute" +qos = "regular" +ref_final_yr = 1986 +ref_start_yr = 1985 +ref_years = "1985-1986", +short_name = "v3.LR.historical_0051" +ts_num_years = 2 +walltime = "5:00:00" +years = "1987:1989:2" + + [[ atm_monthly_180x360_aave ]] + # TODO: Add "tc_analysis" back in after empty dat is resolved. + # This task will depend on the tc_analysis task + climo_diurnal_frequency = "diurnal_8xdaily" + climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" + climo_subsection = "atm_monthly_180x360_aave" + dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' + # TODO: Add "tc_analysis" back in after empty dat is resolved. + #sets = "lat_lon","tc_analysis" + sets = "lat_lon", + + [[ atm_monthly_180x360_aave_mvm ]] + # This task will not depend on the tc_analysis task + # Test model-vs-model using the same files as the reference + climo_subsection = "atm_monthly_180x360_aave" + diff_title = "Difference" + partition = "compute" + qos = "regular" + ref_name = "v3.LR.historical_0051" + reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_carryover_dependencies_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" + run_type = "model_vs_model" + sets = "lat_lon", + short_ref_name = "same simulation" + swap_test_ref = False + tag = "model_vs_model" + ts_num_years_ref = 2 + ts_subsection = "atm_monthly_180x360_aave" + +[mpas_analysis] +# The second run should depend on the first run. +active = True +anomalyRefYear = 1985 +climo_years = "1985-1989", "1990-1995", +enso_years = "1985-1989", "1990-1995", +mesh = "IcoswISC30E3r5" +parallelTaskCount = 6 +partition = "compute" +qos = "regular" +shortTermArchive = True +ts_years = "1985-1989", "1985-1995", +walltime = "00:30:00" + +[global_time_series] +active = True +climo_years = "1985-1989", "1990-1995", +experiment_name = "v3.LR.historical_0051" +figstr = "v3.LR.historical_0051" +moc_file=mocTimeSeries_1985-1995.nc +plots_lnd = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" +ts_num_years = 5 +ts_years = "1985-1989", "1985-1995", +walltime = "00:30:00" +years = "1985-1995", + +[ilamb] +active = True +nodes = 8 +partition = "compute" +short_name = "v3.LR.historical_0051" +ts_num_years = 2 +walltime = "2:00:00" +years = "1985:1989:4" diff --git a/tests/integration/template_min_case_carryover_dependencies.cfg b/tests/integration/template_min_case_carryover_dependencies.cfg new file mode 100644 index 00000000..4d0fc037 --- /dev/null +++ b/tests/integration/template_min_case_carryover_dependencies.cfg @@ -0,0 +1,189 @@ +# Test carryover dependency handling + +# Things to check for in the output: +# 1. Check that `grep -v "OK" *status returns nothing (i.e., all jobs worked) +# 2. Check that .settings files include `dependencies` listed out, +# using `grep -A 5 "'dependencies'" *settings`. +# (Change the -A number to include enough lines to see the entire dependencies list) +# Check that these look correct. + + +# These tasks can have carryover dependencies: +# 1. mpas_analysis.py + +# These tasks have dependencies, but no carryover: +# 1. e3sm_diags.py +# 2. global_time_series.py +# 3. ilamb.py +# 4. tc_analysis.py + +# These tasks don't have dependencies: +# 1. climo.py +# 2. ts.py + +[default] +case = "#expand case_name#" +constraint = "#expand constraint#" +dry_run = "#expand dry_run#" +environment_commands = "#expand environment_commands#" +input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input_subdir = archive/atm/hist +mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" +output = "#expand user_output#zppy_min_case_carryover_dependencies_output/#expand unique_id#/#expand case_name#" +partition = "#expand partition_short#" +qos = "#expand qos_short#" +www = "#expand user_www#zppy_min_case_carryover_dependencies_www/#expand unique_id#" +years = "1985:1989:2", + +[climo] +active = True +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + vars = "" + + [[ atm_monthly_diurnal_8xdaily_180x360_aave ]] + frequency = "diurnal_8xdaily" + input_files = "eam.h3" + input_subdir = "archive/atm/hist" + vars = "PRECT" + + [[ land_monthly_climo ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + vars = "" + +[ts] +active = True +e3sm_to_cmip_environment_commands = "#expand e3sm_to_cmip_environment_commands#" +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + ts_fmt = "cmip" + + [[ rof_monthly ]] + extra_vars = 'areatotal2' + frequency = "monthly" + input_files = "mosart.h0" + input_subdir = "archive/rof/hist" + mapping_file = "" + vars = "RIVER_DISCHARGE_OVER_LAND_LIQ" + + [[ atm_monthly_glb ]] + # Note global average won't work for 3D variables. + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + mapping_file = "glb" + years = "1985:1995:5", + + [[ lnd_monthly_glb ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "glb" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + years = "1985:1995:5", + + [[ land_monthly ]] + extra_vars = "landfrac" + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + ts_fmt = "cmip" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# # The second run should run in parallel with the first run. +# active = True +# scratch = "#expand scratch#zppy_min_case_carryover_dependencies_scratch/#expand unique_id#/#expand case_name#" +# walltime = "00:30:00" + +[e3sm_diags] +active = True +environment_commands = "#expand diags_environment_commands#" +grid = '180x360_aave' +multiprocessing = True +num_workers = 8 +partition = "#expand partition_long#" +qos = "#expand qos_long#" +ref_final_yr = 1986 +ref_start_yr = 1985 +ref_years = "1985-1986", +short_name = "#expand case_name#" +ts_num_years = 2 +walltime = "#expand diags_walltime#" +years = "1987:1989:2" + + [[ atm_monthly_180x360_aave ]] + # TODO: Add "tc_analysis" back in after empty dat is resolved. + # This task will depend on the tc_analysis task + climo_diurnal_frequency = "diurnal_8xdaily" + climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" + climo_subsection = "atm_monthly_180x360_aave" + dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' + # TODO: Add "tc_analysis" back in after empty dat is resolved. + #sets = "lat_lon","tc_analysis" + sets = "lat_lon", + + [[ atm_monthly_180x360_aave_mvm ]] + # This task will not depend on the tc_analysis task + # Test model-vs-model using the same files as the reference + climo_subsection = "atm_monthly_180x360_aave" + diff_title = "Difference" + partition = "#expand partition_long#" + qos = "#expand qos_long#" + ref_name = "#expand case_name#" + reference_data_path = "#expand user_output#zppy_min_case_carryover_dependencies_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" + run_type = "model_vs_model" + sets = "lat_lon", + short_ref_name = "same simulation" + swap_test_ref = False + tag = "model_vs_model" + ts_num_years_ref = 2 + ts_subsection = "atm_monthly_180x360_aave" + +[mpas_analysis] +# The second run should depend on the first run. +active = True +anomalyRefYear = 1985 +climo_years = "1985-1989", "1990-1995", +enso_years = "1985-1989", "1990-1995", +mesh = "IcoswISC30E3r5" +parallelTaskCount = 6 +partition = "#expand partition_long#" +qos = "#expand qos_long#" +shortTermArchive = True +ts_years = "1985-1989", "1985-1995", +walltime = "#expand mpas_analysis_walltime#" + +[global_time_series] +active = True +climo_years = "1985-1989", "1990-1995", +experiment_name = "#expand case_name#" +figstr = "#expand case_name#" +moc_file=mocTimeSeries_1985-1995.nc +plots_lnd = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" +ts_num_years = 5 +ts_years = "1985-1989", "1985-1995", +walltime = "00:30:00" +years = "1985-1995", + +[ilamb] +active = True +nodes = 8 +partition = "#expand partition_long#" +short_name = "#expand case_name#" +ts_num_years = 2 +walltime = "2:00:00" +years = "1985:1989:4" diff --git a/tests/integration/utils.py b/tests/integration/utils.py index 913f3b75..f33a0815 100644 --- a/tests/integration/utils.py +++ b/tests/integration/utils.py @@ -271,6 +271,7 @@ def generate_cfgs(unified_testing=False, dry_run=False): cfg_names = [ "min_case_add_dependencies", + "min_case_carryover_dependencies", "min_case_tc_analysis_simultaneous_1", "min_case_tc_analysis_simultaneous_2", "min_case_e3sm_diags_depend_on_climo_mvm_1", diff --git a/zppy/ilamb.py b/zppy/ilamb.py index 3d214f48..f3a44a26 100644 --- a/zppy/ilamb.py +++ b/zppy/ilamb.py @@ -31,8 +31,10 @@ def ilamb(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): return existing_bundles # --- Generate and submit ilamb scripts --- - dependencies: List[str] = [] for c in tasks: + + dependencies: List[str] = [] + if "ts_num_years" in c.keys(): c["ts_num_years"] = int(c["ts_num_years"]) # Loop over year sets diff --git a/zppy/mpas_analysis.py b/zppy/mpas_analysis.py index 17cbe507..8355a13c 100644 --- a/zppy/mpas_analysis.py +++ b/zppy/mpas_analysis.py @@ -31,8 +31,13 @@ def mpas_analysis(config: ConfigObj, script_dir: str, existing_bundles, job_ids_ # job should run at once. To gracefully handle this, we make each # MPAS-Analysis task dependant on all previous ones. This may not # be 100% fool-proof, but should be a reasonable start - dependencies = [] + + # Dependencies carried over from previous task. + carried_over_dependencies: List[str] = [] + for c in tasks: + + dependencies: List[str] = carried_over_dependencies set_subdirs(config, c) # Loop over year sets ts_year_sets: List[Tuple[int, int]] = get_years(c["ts_years"]) @@ -75,7 +80,7 @@ def mpas_analysis(config: ConfigObj, script_dir: str, existing_bundles, job_ids_ skip: bool = check_status(status_file) if skip: # Add to the dependency list - dependencies.append(status_file) + carried_over_dependencies.append(status_file) continue # Create script with open(bash_file, "w") as f: @@ -107,7 +112,7 @@ def mpas_analysis(config: ConfigObj, script_dir: str, existing_bundles, job_ids_ # Note that this line should still be executed even if jobid == -1 # The later MPAS-Analysis tasks still depend on this task (and thus will also fail). # Add to the dependency list - dependencies.append(status_file) + carried_over_dependencies.append(status_file) else: print(f"...adding to bundle {c['bundle']}")